xref: /illumos-gate/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _SYS_IB_MGT_IBCM_IBCM_IMPL_H
27 #define	_SYS_IB_MGT_IBCM_IBCM_IMPL_H
28 
29 /*
30  * ibcm_impl.h
31  *
32  * This file contains all of the internal data structures and
33  * definitions for IBCM.
34  *
35  * The general state transition processing of CM is achieved by the
36  * following callgraph:
37  *
38  * CM INIT : Register for hca attach and detach callbacks, and other asyncs
39  *
40  * On new HCA attach:	Register with IBMF on all ports of upcoming HCA
41  *			Specify CM callback and callback "per HCA arg"
42  *			Register with SA, allocate AVL trees etc.
43  *
44  * IBMF Callback
45  *  	Validate combination of method and attribute Id in the generic MAD hdr
46  *	-> Call CM Connection state transition function based on attribute ID
47  *	    Create/lookup/delete CM state structure and save it into avl tree
48  *	    Handle duplicate messages and MRA to adjust timers etc.
49  *	    Handle stale connections
50  *	    Allocate reply MADs
51  *		-> Call CM QP/EEC state transition function based on CM message
52  *		     Change QP/EEC state  (to enable recvQ posting by client)
53  *		     Call Client/Server handler callback function
54  *		     Modify QP/EEC attributes
55  *		     Optionally fill up some fields of response MAD
56  *	    Post reply MADs
57  *	    Store reply MADs and reply MAD address, if necessary
58  *	    Initialize timeouts for the message
59  *	    Change CM state
60  *	    Deallocate reply MADs
61  *
62  * NOTES:
63  * 	o There are *NO* explicit CM allocation and deallocation routines for
64  *	CM MADs and state data structures
65  *	o CM timeouts are scheduled using timeout(9f), and cancelled using
66  *	untimeout(9f)
67  *	o svc_id allocation scheme
68  *	A new counter for svcid is maintained in ibcm_hca_info_t
69  *	which is used to allocate svcid. The svcids are incremented
70  *	sequentially and allocated (with wrap around on overflow) with
71  *	these considerations:
72  *		The WellKnown service id's and locally allocated svcid's
73  *		could be maintained in separate lists, thus allowing the
74  *		lists to be kept apart and sorted easily.
75  *		The insertions are done at the end of the list
76  *	o reqid allocation scheme
77  *	The list is a sorted one (as reqid's are allocated sequentially).
78  *	If there is a code required for wrap around, it would search for
79  *	a reqid from the head of the list.
80  *	The insertions are always done at the end of the lists
81  *	o XXX svc_id allocation scheme and req_id allocation scheme will
82  *	be revisited.
83  */
84 
85 #include <sys/sysmacros.h>
86 #include <sys/systm.h>
87 #include <sys/kmem.h>
88 #include <sys/modctl.h>
89 #include <sys/avl.h>
90 #include <sys/taskq.h>
91 #include <sys/vmem.h>
92 #include <sys/note.h>
93 #include <sys/t_lock.h>
94 
95 #include <sys/ib/ibtl/ibvti.h>
96 #include <sys/ib/ibtl/impl/ibtl_cm.h>
97 #include <sys/ib/ibtl/impl/ibtl_util.h>
98 #include <sys/ib/mgt/ibmf/ibmf.h>
99 #include <sys/ib/mgt/ibcm/ibcm_trace.h>
100 #include <inet/ip.h>
101 
102 #ifdef __cplusplus
103 extern "C" {
104 #endif
105 
106 _NOTE(SCHEME_PROTECTS_DATA("Private", sa_service_record_s))
107 _NOTE(SCHEME_PROTECTS_DATA("Exclusive access to ibmf msg buf based on state",
108 ib_mad_hdr_t))
109 _NOTE(SCHEME_PROTECTS_DATA("Exclusive access to ibmf msg buf based on state",
110 _ibmf_msg))
111 
112 /*
113  * Defines for all CM state machine states, as defined in
114  * section 12.9.7. IBCM_REJ_SENT is a state not defined in
115  * the spec and is added for implementation purposes.
116  */
117 typedef enum ibcm_conn_state_e {
118 	/* Initial states */
119 	IBCM_STATE_IDLE			= 0,
120 	IBCM_STATE_LISTEN,
121 
122 	/* States during connection establishment */
123 	IBCM_STATE_REQ_SENT,
124 	IBCM_STATE_REQ_RCVD,
125 	IBCM_STATE_REP_SENT,
126 	IBCM_STATE_REP_RCVD,
127 	IBCM_STATE_REP_WAIT,
128 	IBCM_STATE_MRA_SENT,
129 	IBCM_STATE_MRA_REP_SENT,
130 	IBCM_STATE_MRA_REP_RCVD,
131 
132 	/* States during connection establishment failures */
133 	IBCM_STATE_TIMED_OUT,
134 	IBCM_STATE_ABORTED,
135 	IBCM_STATE_REJ_SENT,
136 
137 	/* Established state */
138 	IBCM_STATE_TRANSIENT_ESTABLISHED,
139 	IBCM_STATE_ESTABLISHED,
140 
141 	/* States during connection teardown */
142 	IBCM_STATE_TRANSIENT_DREQ_SENT,
143 	IBCM_STATE_DREQ_SENT,
144 	IBCM_STATE_DREQ_RCVD,
145 	IBCM_STATE_DREP_RCVD,
146 	IBCM_STATE_TIMEWAIT,
147 
148 	/* states for UD side of things */
149 	IBCM_STATE_SIDR_REQ_SENT,
150 	IBCM_STATE_SIDR_REQ_RCVD,
151 	IBCM_STATE_SIDR_REP_SENT,
152 	IBCM_STATE_SIDR_REP_RCVD,
153 
154 	/* states common to RC and UD, during state resource deletion */
155 	IBCM_STATE_DELETE
156 } ibcm_conn_state_t;
157 
158 /* Defines the AP states for LAP/APR */
159 typedef enum ibcm_ap_state_e {
160 	IBCM_AP_STATE_IDLE	= 0x0,
161 	IBCM_AP_STATE_LAP_SENT,
162 	IBCM_AP_STATE_LAP_RCVD,
163 	IBCM_AP_STATE_APR_RCVD,
164 	IBCM_AP_STATE_MRA_LAP_RCVD,
165 	IBCM_AP_STATE_MRA_LAP_SENT,
166 	IBCM_AP_STATE_TIMED_OUT
167 } ibcm_ap_state_t;
168 
169 /*
170  * Defines for the CM event types/MAD attribute IDs
171  */
172 typedef enum ibcm_event_type_e {
173 	IBCM_INCOMING_REQ	= 0x0,
174 	IBCM_INCOMING_MRA	= 0x1,
175 	IBCM_INCOMING_REJ	= 0x2,
176 	IBCM_INCOMING_REP	= 0x3,
177 	IBCM_INCOMING_RTU	= 0x4,
178 	IBCM_INCOMING_DREQ	= 0x5,
179 	IBCM_INCOMING_DREP	= 0x6,
180 	IBCM_INCOMING_SIDR_REQ	= 0x7,
181 	IBCM_INCOMING_SIDR_REP	= 0x8,
182 	IBCM_INCOMING_LAP	= 0x9,
183 	IBCM_INCOMING_APR	= 0xA,
184 	IBCM_OUTGOING_REQ	= 0xB,	/* REQ Sent on active CM side */
185 	IBCM_INCOMING_REQ_STALE	= 0xC,	/* lookup by remote HCA and */
186 					/* remote comid */
187 	IBCM_INCOMING_REP_STALE	= 0xD,	/* lookup by passive HCA and QPN */
188 	IBCM_INCOMING_REJ_RCOMID = 0xE	/* lookup by remote com id */
189 } ibcm_event_type_t;
190 
191 /*
192  * IBMF calls back into CM on only the first 11 events defined in
193  * ibcm_event_type_t. CM has pre-defined functions for these 11 events
194  *
195  */
196 #define	IBCM_MAX_EVENTS		11
197 
198 /*
199  * CM message attribute IDs begin at this "base ID". The first 11 event types
200  * in ibcm_event_type_t are CM protocol messages that are posted to IBMF by
201  * adding the "base_id" to the respective event type value. By subtracting
202  * the "base_id" in IBMF callback in CM MAD, the message type is gotten back
203  */
204 #define	IBCM_ATTR_BASE_ID		0x10
205 
206 #define	IBCM_MAX_RETRY_CNT		15
207 #define	IBCM_ATTRID_FIELD_SIZE		4
208 #define	IBCM_TRANID_PRIV_FIELD_SIZE	28
209 
210 #define	IBCM_RNR_RETRY_CNT_MASK		0x7	/* 3 bits */
211 #define	IBCM_MAX_RNR_RETRY_CNT		7
212 
213 #define	IBCM_INITIAL_COMID		1
214 #define	IBCM_INITIAL_REQID		1
215 #define	IBCM_INITIAL_SID		1
216 
217 /*
218  * Maximum number of com ids / req ids that can be active at any given time
219  * MUST ENSURE THAT (INITIAL ID + MAX IDS -1), for any of the IDs does not
220  * exceed the max 32 bit
221  */
222 
223 /* An hca can have max of 2^24 -2  RC connections */
224 #define	IBCM_MAX_COMIDS		(0x01000000 - 2)
225 #define	IBCM_MAX_REQIDS		0xFFFFFFFF
226 #define	IBCM_MAX_LOCAL_SIDS	0xFFFFFFFF
227 #define	IBCM_MAX_IP_SIDS	0xFFFF
228 
229 typedef uint32_t ib_com_id_t;	/* CM Communication ID */
230 
231 /*
232  * Defines the CM Mode of operation for a connection
233  */
234 typedef enum ibcm_mode_e {
235 	IBCM_ACTIVE_MODE	= 1,	/* Active side CM */
236 	IBCM_PASSIVE_MODE	= 2	/* Passive side CM */
237 } ibcm_mode_t;
238 
239 
240 /* different IBCM return values */
241 typedef enum ibcm_status_e {
242 	IBCM_SUCCESS  		= 0,	/* good status */
243 	IBCM_LOOKUP_EXISTS,		/* statep lookup found existing entry */
244 	IBCM_LOOKUP_NEW,		/* lookup created new statep entry */
245 	IBCM_LOOKUP_FAIL,		/* lookup found no statep entry */
246 	IBCM_SEND_REJ,			/* CM QP state change sent REJ msg */
247 	IBCM_SEND_REP,			/* CM QP state change sent REP msg */
248 	IBCM_SEND_RTU,			/* CM QP state change sent RTU msg */
249 	IBCM_SEND_APR,			/* CM to send APR MAD as response */
250 	IBCM_SEND_SIDR_REP, 		/* client's UD handler returned this */
251 	IBCM_DEFER,			/* client's handler returned this */
252 	IBCM_FAILURE			/* generic IBCM failure */
253 } ibcm_status_t;
254 
255 /*
256  * Struct definition for addressing information that CM maintains for
257  * each of the incoming MADs
258  */
259 typedef	struct	ibcm_mad_addr {
260 	ibmf_global_addr_info_t	grh_hdr;	/* GRH related fields of MAD */
261 	ibmf_addr_info_t	rcvd_addr;	/* Outgoing/Incoming MAD addr */
262 	ibmf_handle_t		ibmf_hdl;	/* IBMF handle */
263 	boolean_t		grh_exists;	/* TRUE if grh exists */
264 	uint8_t			port_num;
265 	struct ibcm_qp_list_s	*cm_qp_entry;	/* IBMF hdl on which MAD rcvd */
266 						/* or on which MAD shall be */
267 						/* sent out */
268 } ibcm_mad_addr_t;
269 
270 _NOTE(READ_ONLY_DATA(ibcm_mad_addr))
271 
272 #define	IBCM_MAD_SIZE		0x100			/* size of MAD */
273 #define	IBCM_MAD_HDR_SIZE	sizeof (ib_mad_hdr_t)	/* size of MAD HDR */
274 #define	IBCM_MSG_SIZE		IBCM_MAD_SIZE-IBCM_MAD_HDR_SIZE
275 
276 typedef enum ibcm_abort_flag_e {
277 	IBCM_ABORT_INIT		= 0,	/* no abort flag is set */
278 	IBCM_ABORT_CLIENT	= 1,	/* client requested connection abort */
279 	IBCM_ABORT_REJ		= 2	/* REJ received with timeout reason */
280 } ibcm_abort_flag_t;
281 
282 typedef	enum ibcm_isync_e {
283 	IBCM_BLOCK	= 0,	/* Block cm operation */
284 	IBCM_UNBLOCK	= 1,	/* Unblock cm operation */
285 	IBCM_FAIL	= 2	/* fail cm operation */
286 } ibcm_isync_t;
287 
288 /*
289  * Define a connection state structure, used by the IBTF CM
290  * to maintain state about connected QPs.
291  *
292  * mode			: CM connection mode active/passive
293  * state		: CM connection state
294  * ap_state		: CM AP Internal state to manage LAP/APR state machine
295  * state_mutex		: lock for this structure
296  * channel		: Channel associated with this RC state structure
297  * ref_cnt		: Number of active threads that may reference this
298  *			  state structure
299  * svcid		: Service ID
300  * cm_handler		: Client handler callback address
301  * stored_reply_addr	: Address for replying using the stored mad
302  * hcap			: A pointer to the HCA's entry
303  * stored_msg		: Stores the response REP/REJ/RTU MAD
304  * mra_msg		: Stores the response MRA MAD
305  * dreq_msg		: Stores the DREQ MAD
306  * drep_msg		: Stores the DREP MAD
307  * lapr_msg		: Stores the LAP/APR MAD
308  *			  detect duplicate LAP messages
309  * local_comid  	: Local communication id
310  * local_hca_guid	: Local HCA GUID
311  * local_qpn		: Local QPN
312  *
313  * remote_comid 	: Remote communication id
314  * remote_hca_guid	: Remote HCA GUID
315  * remote_qpn		: Remote QPN
316  *
317  * timerid		: Timer id for the timeout either for re-sending the
318  *			  stored mad or deleting the stored mad
319  *			  Ex: A REJ/RTU response for an incoming REP
320  *			      A REP response to an incoming REQ
321  *			      An outgoing REQ on active connection side
322  * timer_value		: Time for any of the above timers in HZ
323  * pkt_life_time	: pkt life time from source to destination
324  * remote_ack_delay	: Remote hca's ack delay in clock_t
325  * rc_alt_pkt_lt	: Life time for new ALT path specified in LAP
326  * stale_clock		: clock used to detect stale vs duplicate REQs
327  * timer_stored_state	: state of connection for timeout() validation
328  * timer_stored_ap_state: CM ap_state for timeout validation
329  * remaining_retry_count: Remaining count for retries ie., posting stored MADs
330  * max_cm_retries	: Max retry count for sending a REQ/REP/DREQ
331  * delete_mra_msg	: Set to TRUE for deletion, if MRA re-send in progress
332  * resend_mad		: B_TRUE, if REQ/REP/RTU/REJ MAD re-send is in progress
333  * resend_mra_mad	: B_TRUE, if a MRA mad re-sens is in progress
334  * cep_retry_cnt	: Retry count for CEP.
335  * stale		: B_TRUE, if connection has become stale
336  * blocking_done	: B_TRUE, if cv_signal been issued to block_client_cv
337  * clnt_hdl		: Clnt_hdl passed in ibt_open_channel
338  * return_data		: RC return args, valid for blocking
339  *			  ibt_open_channel
340  * drep_priv_data;	: The pointer to client specified outgoing private
341  *			  data, from close channel API call
342  * drep_priv_data_len   : The length of DREP private data that client would
343  *			  like to be returned from close channel API call
344  * delete_state_data	: B_TRUE, if CM decides to delete state data, but
345  *			  there is some thread that could access state data
346  *
347  * avl_active_link	: For inserting this state-data into active AVL tree
348  * avl_passive_link	: For inserting this state-data into passive AVL tree
349  * Note : All timer values that are of type "clock_t" below are in usecs
350  */
351 typedef struct ibcm_state_data_s {
352 	/* for AVL tree */
353 	avl_node_t		avl_active_link;
354 	avl_node_t		avl_passive_link;
355 	avl_node_t		avl_passive_comid_link;
356 
357 	/* remote stuff */
358 	ib_guid_t		remote_hca_guid;
359 	ib_com_id_t		remote_comid;
360 	ib_qpn_t		remote_qpn;
361 
362 	/* local stuff */
363 	ib_com_id_t		local_comid;
364 	ib_qpn_t		local_qpn;
365 	ib_guid_t		local_hca_guid;
366 
367 	ibcm_mode_t		mode;
368 	ibcm_conn_state_t	state;
369 	ibcm_ap_state_t		ap_state;
370 	kmutex_t		state_mutex;
371 	ibt_channel_hdl_t	channel;	/* save a copy */
372 
373 	/* ref_cnt so others cannot delete a statep that may be referenced */
374 	int			ref_cnt;
375 
376 	ib_svc_id_t		svcid;
377 	ibt_cm_handler_t	cm_handler;
378 
379 	ibcm_mad_addr_t		stored_reply_addr;
380 
381 	struct ibcm_hca_info_s *hcap;
382 
383 	ibmf_msg_t		*stored_msg;
384 	ibmf_msg_t		*mra_msg;
385 	ibmf_msg_t		*dreq_msg;
386 	ibmf_msg_t		*drep_msg;
387 	ibmf_msg_t		*lapr_msg;
388 
389 	void			*defer_cm_msg;
390 
391 	/* timeout related stuff */
392 	timeout_id_t		timerid;
393 	clock_t			timer_value;
394 	clock_t			pkt_life_time;
395 	clock_t			remote_ack_delay;
396 	clock_t			rc_alt_pkt_lt;
397 
398 	hrtime_t		stale_clock;
399 	hrtime_t		post_time;
400 	hrtime_t		mra_time;
401 
402 	ibcm_conn_state_t	timer_stored_state;
403 	ibcm_ap_state_t		timer_stored_ap_state;
404 	uint8_t			remaining_retry_cnt;
405 	uint8_t			max_cm_retries;
406 	uint8_t			cm_retries;
407 
408 	uint8_t			drep_in_progress;
409 
410 	/* some cep stuff, stored here temporarily during connection est  */
411 	uint8_t			cep_retry_cnt:3;
412 	ibt_srate_t		local_srate;
413 	ibt_srate_t		local_alt_srate;
414 	ib_pkey_t		pkey;
415 	uint8_t			prim_port;
416 	uint8_t			alt_port;
417 	uint32_t		starting_psn;
418 	ib_path_bits_t		prim_src_path_bits;
419 	ib_path_bits_t		alt_src_path_bits;
420 
421 	boolean_t		delete_mra_msg;
422 	boolean_t		stale;
423 	boolean_t		delete_state_data;
424 	boolean_t		skip_rtr;
425 
426 	boolean_t		open_done;
427 	boolean_t		close_done;
428 	boolean_t		ap_done;
429 
430 	uint8_t			send_mad_flags;
431 	uint8_t			close_flow;
432 	uint8_t			open_flow;
433 	ibcm_abort_flag_t	abort_flag;
434 
435 	struct ibcm_state_data_s	*timeout_next;
436 
437 	ibcm_conn_state_t	timedout_state;
438 
439 	ibcm_isync_t		cep_in_rts;
440 	ibcm_isync_t		clnt_proceed;
441 	ibcm_isync_t		close_nocb_state;
442 
443 	/* Clients' information */
444 	void			*state_cm_private;
445 
446 	/* pointer to service info */
447 	struct ibcm_svc_info_s  *state_svc_infop;
448 
449 	kcondvar_t		block_client_cv;
450 	kcondvar_t		block_mad_cv;
451 
452 	/* Data for recycle function */
453 	struct ibcm_taskq_recycle_arg_s	*recycle_arg;
454 
455 	/* Return data pointers in various cm api calls */
456 	ibt_rc_returns_t	*open_return_data;
457 	ibt_ap_returns_t	*ap_return_data;
458 	uint8_t			*close_ret_priv_data;
459 	ibt_priv_data_len_t	*close_ret_priv_data_len;
460 	uint8_t			*close_ret_status;
461 
462 	/* for queuing of open_rc_channel requests */
463 	struct ibcm_state_data_s	*open_link;
464 	/* for queuing of non-blocking close_rc_channel requests */
465 	struct ibcm_state_data_s	*close_link;
466 
467 	struct ibcm_conn_trace_s	*conn_trace;
468 
469 	/* For ibt_ofuvcm_get_req_data() */
470 	void			*req_msgp;
471 
472 } ibcm_state_data_t;
473 
474 _NOTE(MUTEX_PROTECTS_DATA(ibcm_state_data_s::state_mutex,
475     ibcm_state_data_s::{state ref_cnt timer_stored_state timer_value
476     timer_stored_ap_state remaining_retry_cnt clnt_proceed cep_in_rts
477     close_nocb_state block_client_cv block_mad_cv timedout_state cm_handler
478     abort_flag mra_msg}))
479 
480 _NOTE(READ_ONLY_DATA(ibcm_state_data_s::{mode channel svcid hcap
481     local_comid local_hca_guid local_qpn remote_comid remote_hca_guid
482     remote_qpn pkt_life_time remote_ack_delay rc_alt_pkt_lt stored_reply_addr
483     max_cm_retries cep_retry_cnt local_srate local_alt_srate pkey
484     prim_port alt_port starting_psn state_svc_infop avl_active_link
485     avl_passive_link avl_passive_comid_link defer_cm_msg recycle_arg
486     conn_trace}))
487 
488 _NOTE(SCHEME_PROTECTS_DATA("Serailized access by block_client_cv",
489     ibcm_state_data_s::{open_return_data ap_return_data close_ret_priv_data
490     close_ret_priv_data_len close_ret_status}))
491 
492 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_state_data_s::{timedout_state
493     cm_handler mra_msg abort_flag}))
494 
495 /*
496  * Definitions for send mad flags. Respective bits in send_mad_flags or
497  * ud_send_mad_flags are set to 1, during MAD transmission, and reset in
498  * ibmf send completion callback or on completion of a blocking ibmf mad post.
499  */
500 #define	IBCM_REP_POST_BUSY	1	/* REP post in progress */
501 #define	IBCM_REJ_POST_BUSY	2	/* REJ post in progress */
502 #define	IBCM_RTU_POST_BUSY	4	/* RTU post in progress */
503 #define	IBCM_MRA_POST_BUSY	8	/* MRA post in progress */
504 #define	IBCM_DREP_POST_BUSY	16	/* DREQ post in progress */
505 #define	IBCM_SREP_POST_BUSY	32	/* SIDR REP post in progress */
506 
507 /* MADs that are retransmitted only because of a timeout */
508 #define	IBCM_REQ_POST_BUSY	64	/* REQ post in progress */
509 
510 
511 /* Incr/Decr ref_cnt by 1 */
512 #define	IBCM_REF_CNT_INCR(s)	(s->ref_cnt++)
513 #define	IBCM_REF_CNT_DECR(s)	\
514 	if ((--(s->ref_cnt) == 0) && (s->delete_state_data == B_TRUE)) { \
515 		ibcm_add_tlist(s);\
516 	} \
517 	ASSERT(s->ref_cnt >= 0);
518 
519 /*
520  * This macro checks if ch_qp/ch_eec handles are both not set for a channel
521  */
522 #define	IBCM_INVALID_CHANNEL(chan)	(chan == NULL)
523 
524 /*
525  * The next macros are used to get/set the statep from the QP
526  * handles, using the CM private data. These call into IBTL.
527  * The WAIT and RELEASE macros deal with related issues that
528  * require use of the same lock within IBTL.
529  */
530 #define	IBCM_GET_CHAN_PRIVATE(ch, s) \
531 	if ((ch) != NULL) { \
532 		s = ibtl_cm_get_chan_private(ch); \
533 	} else \
534 		s = NULL;
535 
536 #define	IBCM_SET_CHAN_PRIVATE(ch, s) \
537 	if ((ch) != NULL) { \
538 		ibtl_cm_set_chan_private(ch, (void *)(s)); \
539 	}
540 
541 #define	IBCM_RELEASE_CHAN_PRIVATE(ch) \
542 	if ((ch) != NULL) { \
543 		ibtl_cm_release_chan_private(ch); \
544 	}
545 
546 #define	IBCM_WAIT_CHAN_PRIVATE(ch) \
547 	ibtl_cm_wait_chan_private(ch);
548 
549 /* In future, if we intend to change it to realtime_timeout, it's easy */
550 #define	IBCM_TIMEOUT(arg1, arg2)	timeout(ibcm_timeout_cb, arg1,\
551 						drv_usectohz(arg2))
552 #define	IBCM_UD_TIMEOUT(arg1, arg2)	timeout(ibcm_sidr_timeout_cb, arg1,\
553 						drv_usectohz(arg2))
554 
555 extern void ibcm_open_enqueue(ibcm_state_data_t *statep);
556 extern void ibcm_open_done(ibcm_state_data_t *statep);
557 extern void ibcm_close_enqueue(ibcm_state_data_t *statep);
558 extern void ibcm_close_done(ibcm_state_data_t *statep, int send_done);
559 extern void ibcm_close_enter(void);
560 extern void ibcm_close_exit(void);
561 extern void ibcm_lapr_enter(void);
562 extern void ibcm_lapr_exit(void);
563 extern void ibcm_check_for_opens(void);
564 extern void ibcm_check_for_async_close(void);
565 extern void ibcm_close_start(ibcm_state_data_t *statep);
566 extern void ibcm_run_tlist_thread(void);
567 
568 /*
569  * Structures & defines for SIDR
570  */
571 
572 /*
573  * Define a connection state structure, used for SIDR REQ and REP
574  * (ibcm_ud_state_data_t - struct for SIDR connection)
575  *
576  * ud_state: 		CM connection state (See ibcm_conn_state_t)
577  * ud_req_id:		Request ID
578  * ud_svcid:		Service ID
579  * ud_state_mutex:	CM connection state
580  *
581  * ud_max_cm_retries:	Max retry count for sending a SIDR REQ
582  * ud_ref_cnt:		State ref count for not deleting accidentally
583  * ud_remaining_retry_count: Remaining count for retries ie., posting
584  *			stored MADs
585  * ud_cm_handler:	Server's handler callback address
586  *
587  * ud_nextp:		CM link for IBTF list
588  * ud_hcap:		A pointer to the HCA's entry
589  *
590  * ud_timerid:		Timer id for the timeout either for re-sending the
591  *			stored mad or deleting the stored mad
592  *			Ex: A SIDR REP response for an incoming SIDR REQ
593  *			An outgoing SIDR REQ on active connection side
594  * ud_timer_value:	Time for any of the above timers in HZ
595  * ud_pkt_life_time:	pkt life time from source to destination
596  * ud_stored_reply_addr: Address for replying using the stored mad
597  *
598  * ud_sidr_req_lid:	SIDR REQ sender's port LID
599  * ud_sidr_req_gid:	SIDR REQ sender's port GID
600  * ud_grh_exists:	TRUE if GRH exists in the incoming SIDR REQ
601  *
602  * ud_passive_qpn:	QPN allocated by server for a SIDR REQ
603  * ud_passive_qpn_qkey:	QPN's QKEY allocated by server
604  *
605  * ud_block_client_cv:	CV condition variable on which ibt_ud_get_dqpn() waits,
606  *			if called in blocking mode.
607  * ud_return_data:	UD return args, valid for blocking ibt_ud_get_dqpn
608  * ud_timer_stored_state: State stored for timeout handling
609  * ud_blocking_done	: Tells if cv_wait is needed or not. To handle the
610  *			  case where a cv_signal is received prior to its
611  *			  cv_wait().
612  * Note : All timer values that are of type "clock_t" below are in usec
613  */
614 typedef struct ibcm_ud_state_data_s {
615 	kmutex_t		ud_state_mutex;
616 	ibcm_conn_state_t	ud_state;
617 	ibcm_mode_t		ud_mode;
618 
619 	int			ud_ref_cnt;
620 
621 	uint32_t		ud_req_id;
622 	ib_svc_id_t		ud_svc_id;
623 
624 	uint8_t			ud_max_cm_retries;
625 	uint8_t			ud_remaining_retry_cnt;
626 	ibt_cm_ud_handler_t	ud_cm_handler;
627 
628 	struct ibcm_ud_state_data_s	*ud_nextp;
629 	struct ibcm_hca_info_s *ud_hcap;
630 
631 	/* timeout related stuff */
632 	timeout_id_t		ud_timerid;
633 	clock_t			ud_timer_value;
634 	clock_t			ud_pkt_life_time;
635 	ibcm_mad_addr_t		ud_stored_reply_addr;
636 	ibmf_msg_t		*ud_stored_msg;
637 
638 
639 	/* SIDR REQ side related */
640 	ib_lid_t		ud_sidr_req_lid;
641 	ib_gid_t		ud_sidr_req_gid;
642 	boolean_t		ud_grh_exists;
643 
644 	/* Stored values on server/SIDR REP side for re-transmits */
645 	ib_qpn_t		ud_passive_qpn;
646 	ib_qkey_t		ud_passive_qp_qkey;
647 
648 	/* Clients' information */
649 	void			*ud_state_cm_private;
650 
651 	struct ibcm_ud_state_data_s	*ud_timeout_next;
652 	boolean_t		ud_delete_state_data;
653 	boolean_t		ud_blocking_done;
654 
655 	uint8_t			ud_send_mad_flags;
656 
657 	ibcm_isync_t		ud_clnt_proceed;
658 
659 	/* The following fields are not used by server side connection */
660 	kcondvar_t		ud_block_client_cv;
661 	ibt_ud_returns_t	*ud_return_data;
662 	ibcm_conn_state_t	ud_timer_stored_state;
663 } ibcm_ud_state_data_t;
664 
665 _NOTE(MUTEX_PROTECTS_DATA(ibcm_ud_state_data_s::ud_state_mutex,
666     ibcm_ud_state_data_s::{ud_state ud_ref_cnt ud_timerid
667     ud_delete_state_data ud_blocking_done ud_send_mad_flags ud_clnt_proceed
668     ud_timer_stored_state ud_send_mad_flags ud_clnt_proceed
669     ud_block_client_cv ud_timer_value ud_remaining_retry_cnt}))
670 
671 _NOTE(READ_ONLY_DATA(ibcm_ud_state_data_s::{ud_mode ud_req_id ud_svc_id
672     ud_max_cm_retries ud_pkt_life_time ud_stored_reply_addr ud_stored_msg
673     ud_sidr_req_lid ud_sidr_req_gid ud_grh_exists ud_passive_qpn
674     ud_passive_qp_qkey ud_state_cm_private ud_stored_reply_addr ud_stored_msg}))
675 
676 _NOTE(SCHEME_PROTECTS_DATA("Serailized access by ud_block_client_cv",
677     ibcm_ud_state_data_s::{ud_return_data}))
678 
679 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_ud_state_data_s::{ud_cm_handler}))
680 
681 /*
682  * Structure used to specify the SIDR search parameters
683  */
684 typedef struct ibcm_sidr_srch_s {
685 	ib_lid_t		srch_lid;
686 	ib_gid_t		srch_gid;
687 	boolean_t		srch_grh_exists;
688 	uint32_t		srch_req_id;
689 	ibcm_mode_t		srch_mode;
690 } ibcm_sidr_srch_t;
691 
692 _NOTE(READ_ONLY_DATA(ibcm_sidr_srch_s))
693 
694 /*
695  * Incr/Decr ud_ref_cnt by 1
696  */
697 #define	IBCM_UD_REF_CNT_INCR(s)	((s)->ud_ref_cnt++)
698 #define	IBCM_UD_REF_CNT_DECR(s)	\
699 	if ((--(s->ud_ref_cnt) == 0) && (s->ud_delete_state_data == B_TRUE)) { \
700 		ibcm_add_ud_tlist(s);\
701 	} \
702 	ASSERT(s->ud_ref_cnt >= 0);
703 
704 /*
705  * Structure to store the Service Registration and Service Bind entries.
706  *
707  * Well known service id's are unique on a given HCA, but can be registered
708  * only at some GID's. Hence can be multiple GID's per Service ID. For each
709  * such GID and PKEY combination registered, there will be an ibcm_svc_info_t
710  * entry in the CM global service list.
711  *
712  * Annex A of the spec constrains that there shall be one service provider per
713  * service id, which implies same svc_rc_handler for all such entries
714  * There can be multiple transport types (svc_tran_type) per Service ID. For
715  * each such transport type, there will be an ibcm_svc_info_t entry in the
716  * CM global service list and cm handler can be different
717  *
718  * For locally allocated service id's (maintained by OS), there can be only
719  * one GID, where the service can be registered
720  *
721  * svc_id:		Service ID
722  * svc_num_sids:	Number (Range) of service-ids supported
723  * svc_flags:		Service flags specified at registration time
724  * svc_link:		Global AVL tree of ibcm_svc_info_t structs
725  * svc_rc_handler:	Server handler for RC (only one is valid at a time)
726  * svc_ud_handler:	Server handler for UD (only one is valid at a time)
727  * svc_ref_cnt:		Reference count
728  * svc_to_delete:	If 1, then the entry is marked to be deleted
729  *
730  * sbind_gid:		GID
731  * sbind_pkey:		P_Key
732  * sbind_lease:		Service Lease
733  * sbind_name:		Service Name
734  */
735 typedef struct ibcm_svc_info_s {
736 	avl_node_t		svc_link;
737 	struct ibcm_svc_bind_s	*svc_bind_list;
738 	ibt_cm_handler_t	svc_rc_handler;
739 	ibt_cm_ud_handler_t	svc_ud_handler;
740 	int			svc_ref_cnt;
741 	int			svc_to_delete;
742 	ib_svc_id_t		svc_id;
743 	int			svc_num_sids;
744 	ibt_service_flags_t	svc_flags;
745 } ibcm_svc_info_t;
746 
747 typedef struct ibcm_svc_bind_s {
748 	struct ibcm_svc_bind_s	*sbind_link;
749 	void			*sbind_cm_private;
750 	ib_gid_t		sbind_gid;
751 	ib_guid_t		sbind_hcaguid;
752 	uint64_t		sbind_key[2];
753 				/* sbind_data is assumed to be 8-byte aligned */
754 	uint8_t			sbind_data[IB_SVC_DATA_LEN]; /* ServiceData */
755 	uint32_t		sbind_lease;
756 	ib_pkey_t		sbind_pkey;
757 	uint8_t			sbind_port;
758 	uint8_t			sbind_rewrite_state;
759 	char			sbind_name[IB_SVC_NAME_LEN];
760 } ibcm_svc_bind_t;
761 
762 /*
763  * Service records may be lost by the SM/SA (reboot, change in who
764  * is the master, etc.).  When any of the above occurs, a PORT_UP
765  * async event is supposed to occur, at which point we mark all of
766  * our service record information as stale (REWRITE_NEEDED), and
767  * subsequently make the necessary sa_update calls to get the
768  * SM/SA in sync with all the service records we previously wrote.
769  *
770  * Values for sbind_rewrite_state follow.  This field is protected by
771  * ibcm_svc_info_lock.  ibt_unbind_service has to wait until a service
772  * binding is either idle or needed, sleeping on ibcm_svc_info_cv if
773  * busy (rewrite in progress).
774  */
775 #define	IBCM_REWRITE_IDLE	0
776 #define	IBCM_REWRITE_NEEDED	1
777 #define	IBCM_REWRITE_BUSY	2
778 
779 typedef struct ibcm_port_up_s {
780 	ib_guid_t	pup_hca_guid;
781 	uint8_t		pup_port;
782 } ibcm_port_up_t;
783 
784 /* arg is a pointer to ibcm_port_up_t */
785 extern void ibcm_service_record_rewrite_task(void *);
786 
787 #define	IBCM_SVC_INCR(svcinfop) (svcinfop)->svc_ref_cnt++
788 #define	IBCM_SVC_DECR(svcinfop) \
789 	if (--((svcinfop)->svc_ref_cnt) == 0 && \
790 	    (svcinfop)->svc_to_delete) \
791 		cv_broadcast(&ibcm_svc_info_cv); \
792 	ASSERT(svcinfop->svc_ref_cnt >= 0);
793 
794 _NOTE(READ_ONLY_DATA(ibcm_svc_info_s::{svc_rc_handler svc_ud_handler svc_id
795     svc_num_sids svc_flags}))
796 
797 _NOTE(READ_ONLY_DATA(ibcm_svc_bind_s::{sbind_cm_private sbind_gid sbind_hcaguid
798     sbind_key sbind_data sbind_lease sbind_pkey sbind_port sbind_name}))
799 
800 /* for avl tree search */
801 typedef struct ibcm_svc_lookup_s {
802 	ib_svc_id_t	sid;
803 	int		num_sids;
804 } ibcm_svc_lookup_t;
805 
806 typedef struct ibcm_ar_ref_s {
807 	struct ibcm_ar_ref_s	*ar_ref_link;
808 	ibt_clnt_hdl_t		ar_ibt_hdl;
809 } ibcm_ar_ref_t;
810 
811 typedef struct ibcm_ar_s {
812 	ibt_ar_t		ar;
813 	int			ar_flags;	/* 1 = INITING, 2 = FAILED */
814 	int			ar_waiters;	/* # of waiters */
815 	kcondvar_t		ar_cv;
816 	uint8_t			ar_port;
817 	uint8_t			ar_rewrite_state; /* see sbind_rewrite_state */
818 	ibcm_ar_ref_t		*ar_ibt_hdl_list;
819 	struct ibcm_ar_s	*ar_link;
820 	sa_service_record_t	*ar_srv_recp;
821 	ibmf_saa_handle_t	ar_saa_handle;
822 	struct ibcm_hca_info_s	*ar_hcap;
823 } ibcm_ar_t;
824 
825 /* ar_flags */
826 #define	IBCM_AR_SUCCESS		0
827 #define	IBCM_AR_FAILED		1
828 #define	IBCM_AR_INITING		2
829 
830 
831 /*
832  * These flags are used for adding (if an entry does not exist) or
833  * for just looking one up
834  */
835 typedef enum ibcm_lookup_flag_e {
836 	IBCM_FLAG_LOOKUP		= 0,	/* just lookup */
837 	IBCM_FLAG_ADD			= 1,	/* just add */
838 	IBCM_FLAG_LOOKUP_AND_ADD	= 2	/* lookup first. add if  */
839 						/* lookup failed */
840 } ibcm_lookup_flag_t;
841 
842 typedef enum ibcm_finit_state_e {
843 	IBCM_FINIT_INIT,		/* CM's init is not yet completed */
844 	IBCM_FINIT_IDLE,		/* CM not in either init or fini */
845 	IBCM_FINIT_BUSY,		/* CM busy either in init or fini */
846 	IBCM_FINIT_FAIL,		/* Init failed */
847 	IBCM_FINIT_SUCCESS		/* Fini has succeeded */
848 } ibcm_finit_state_t;
849 
850 /*
851  * Identifies HCA's state. Used in the definition of ibcm_hca_info_t
852  * If HCA is in ACTIVE state only does CM allow any MAD processing.
853  */
854 typedef enum ibcm_hca_state_e {
855 	IBCM_HCA_INIT,
856 	IBCM_HCA_ACTIVE,
857 	IBCM_HCA_NOT_ACTIVE
858 } ibcm_hca_state_t;
859 
860 /* QP information per pkey, stored in port information */
861 typedef struct ibcm_qp_list_s {
862 	ib_pkey_t		qp_pkey;
863 	ibmf_qp_handle_t	qp_cm;
864 	uint32_t		qp_ref_cnt;
865 	struct ibcm_port_info_s *qp_port;
866 	struct ibcm_qp_list_s	*qp_next;
867 } ibcm_qp_list_t;
868 
869 _NOTE(READ_ONLY_DATA(ibcm_qp_list_s::{qp_pkey qp_cm qp_port qp_next}))
870 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_qp_list_s))
871 
872 /*
873  * port information per HCA
874  * port_ibmf_hdl	- contains IBMF handle for that port if valid
875  *			  otherwise is NULL
876  * port_ibmf_saa_hdl	- contains SA Access handle for that port if valid
877  *			  otherwise is NULL
878  */
879 typedef struct ibcm_port_info_s {
880 	ibmf_handle_t		port_ibmf_hdl;
881 	ibmf_saa_handle_t	port_ibmf_saa_hdl;
882 	ib_gid_t		port_sgid0;
883 	uint8_t			port_event_status;
884 	uint8_t			port_saa_open_in_progress;
885 	uint8_t			port_num;
886 	ibmf_register_info_t	port_ibmf_reg;
887 	ibmf_impl_caps_t	port_ibmf_caps;
888 	ibcm_qp_list_t		port_qp1;
889 	ibcm_qp_list_t		*port_qplist;
890 	struct ibcm_hca_info_s	*port_hcap;
891 } ibcm_port_info_t;
892 
893 _NOTE(READ_ONLY_DATA(ibcm_port_info_s::{port_num port_ibmf_caps port_qp1
894     port_hcap}))
895 
896 /* Value to indicate to exit the timeout list processing thread */
897 #define	IBCM_TIMEOUT_THREAD_EXIT	01
898 
899 /*
900  * IBCM code relies on AVL routines already in kernel for faster lookups.
901  * AVL was chosen over mod hashing mechanism based on the its internal
902  * limitations in the kernel (no support for over 100,000 keys).
903  *
904  * IBCM uses two AVL trees on the passive side and one on active side per HCA.
905  * The two trees are need on the passive side because the tree lookup criteria
906  * changes based on the type of message being processed. On passive side it is
907  * based on remote_qpn and remote_hca_guid for only incoming REQ message and for
908  * for all other messages the search criteria is based upon remote_comid.
909  * On active side the lookup criteria remains static based upon local_comid.
910  *
911  * AVL tree insertions are done by grabbing the writer lock (hca_state_rwlock)
912  * and lookups are done by grabbing the reader lock.
913  */
914 
915 /*
916  * CM's per HCA data structure.
917  *
918  * One such entry is added/removed on hca attach/detach notifications to CM
919  * respectively.
920  *
921  * Comids are used for all connections. Req ids are used for SIDR REQ and
922  * SIDR REP messages.  These are  simple counters that wrap around INT_MAX.
923  * NOTE: The starting value for comid, per HCA, is 2.
924  *
925  * hca_state:		HCA's current state (ibcm_hca_state_t) - whether
926  *				IBT_HCA_ACTIVE, IBT_HCA_NOT_ACTIVE,
927  * hca_guid:            Active HCA guid
928  * hca_caps:		HCA capability mask
929  * hca_ack_delay:	HCA ack delay
930  * hca_max_rdma_rd	Max RDMA in Reads
931  * hca_max_rdma_dpt	Max RDMA out Reads
932  * hca_active_tree:	This tree is used for lookups on Active/Passive side
933  *			CM based on communication id ONLY.
934  * hca_passive_tree:	This tree is used to lookup/create ibcm_state_data_t on
935  *			Passive Side CM based on remote_qpn and remote_hca_guid.
936  * hca_passive_comid_tree:
937  *			This tree is used to lookup/create ibcm_state_data_t on
938  *			Passive Side CM based on remote_comid and
939  *			remote_hca_guid.
940  * hca_state_rwlock:	reader/writer Lock for the hca entry
941  *				for hca_active_tree
942  *				for hca_passive_tree
943  *				for hca_next_comid
944  * hca_sidr_list:	List for UD side
945  * hca_sidr_list_lock:	List lock for UD side
946  *				for hca_sidr_list
947  *				for hca_next_reqid
948  * hca_next_reqid:	Next active ReqId
949  * hca_next_comid:	Next active ComID
950  * hca_next:		Pointer to the next HCA
951  * hca_svc_cnt:		A count of services registered on this hca
952  * hca_acc_cnt:		A count of active references to this ibcm_hca_info_t
953  * hca_res_cnt:		A count of client's active resources on this hca
954  * hca_num_ports:	Number of ports that this HCA has
955  * hca_port_info:	Per port information (IBMA/SA access handles etc.)
956  *
957  * Note : The global mutex ibcm_global_hca_mutex declared in CM is used for
958  * accesses to the following fields :
959  * hca_acc_cnt, hca_res_cnt, hca_svc_cnt, hca_state
960  */
961 typedef struct ibcm_hca_info_s {
962 	ibcm_hca_state_t	hca_state;		/* Is HCA attached? */
963 	ib_guid_t		hca_guid;		/* HCA's guid value */
964 	ibt_hca_flags_t		hca_caps;		/* HCA capabilities */
965 	uint32_t		hca_vendor_id:24;
966 	uint16_t		hca_device_id;
967 	ib_time_t		hca_ack_delay;		/* HCA ack delay */
968 	uint8_t			hca_max_rdma_in_qp;	/* Max RDMA in Reads */
969 	uint8_t			hca_max_rdma_out_qp;	/* Max RDMA out Reads */
970 	vmem_t			*hca_comid_arena;	/* arena for com ids */
971 	vmem_t			*hca_reqid_arena;	/* arena for req ids */
972 	avl_tree_t		hca_active_tree;	/* active node tree */
973 	avl_tree_t		hca_passive_tree;	/* passive node tree */
974 	avl_tree_t		hca_passive_comid_tree;	/* passive comid tree */
975 	krwlock_t		hca_state_rwlock;	/* per HCA lock */
976 	ibcm_ud_state_data_t	*hca_sidr_list;		/* SIDR state list */
977 	krwlock_t		hca_sidr_list_lock;
978 
979 	struct ibcm_hca_info_s	*hca_next;		/* Next HCA entry */
980 
981 	int			hca_svc_cnt;		/* # of */
982 							/* services allocated */
983 	int			hca_acc_cnt;		/* active references */
984 	int			hca_res_cnt;		/* total resources */
985 	uint8_t			hca_num_ports;		/* #ports on this HCA */
986 	ibcm_port_info_t	hca_port_info[1];	/* Per portinfo array */
987 } ibcm_hca_info_t;
988 
989 _NOTE(RWLOCK_PROTECTS_DATA(ibcm_hca_info_s::hca_state_rwlock,
990     ibcm_hca_info_s::{hca_active_tree hca_passive_tree hca_passive_comid_tree}))
991 
992 _NOTE(SCHEME_PROTECTS_DATA("hca_sidr_list_lock protects hca_sidr_list",
993     ibcm_hca_info_s::{hca_sidr_list}))
994 
995 _NOTE(READ_ONLY_DATA(ibcm_hca_info_s::{hca_guid hca_caps hca_ack_delay
996     hca_max_rdma_in_qp hca_max_rdma_out_qp hca_comid_arena hca_reqid_arena
997     hca_passive_tree hca_active_tree hca_passive_comid_tree hca_num_ports }))
998 
999 /* Are we on Tavor HCA */
1000 #define	IBCM_IS_HCA_TAVOR(hcap)	\
1001 	(((hcap)->hca_device_id == 0x5a44) && ((hcap)->hca_vendor_id == 0x15b3))
1002 
1003 /*
1004  * called to ensure that HCA is in "attached" state and is willing to
1005  * process connections etc.
1006  */
1007 #define	IBCM_ACCESS_HCA_OK(s)	((s)->hca_state == IBCM_HCA_ACTIVE)
1008 
1009 /*
1010  * Passive AVL tree lookup info  (for hca_passive_tree)
1011  * CM needs this structure as passive tree lookups are based on
1012  * QPN and HCA GUID.
1013  */
1014 typedef	struct ibcm_passive_node_info_s {
1015 	ib_qpn_t	info_qpn;
1016 	ib_guid_t	info_hca_guid;
1017 } ibcm_passive_node_info_t;
1018 
1019 /*
1020  * Passive Com ID AVL tree lookup info  (for hca_passive_comid_tree)
1021  * CM needs this structure as passive comid tree lookups are based on
1022  * Remote Com ID and Remote HCA GUID.
1023  */
1024 typedef struct ibcm_passive_comid_node_info_s {
1025 	ib_com_id_t	info_comid;
1026 	ib_guid_t	info_hca_guid;
1027 } ibcm_passive_comid_node_info_t;
1028 
1029 /* CM proceed task args structure definition */
1030 typedef struct ibcm_proceed_targs_s {
1031 	ibt_cm_event_type_t	event;
1032 	ibt_cm_status_t		status;
1033 	union tst_t {
1034 		struct rc_s {
1035 			ibcm_state_data_t	*statep;
1036 			ibt_cm_proceed_reply_t	rc_cm_event_data;
1037 		} rc;
1038 		struct ud_s {
1039 			ibcm_ud_state_data_t	*ud_statep;
1040 			ib_qpn_t		ud_qpn;
1041 			ib_qkey_t		ud_qkey;
1042 			ibt_redirect_info_t	ud_redirect_info;
1043 		} ud;
1044 	} tst;
1045 	ibt_priv_data_len_t	priv_data_len;
1046 	/* keep priv_data as the last field */
1047 	uint8_t			priv_data[IBT_MAX_PRIV_DATA_SZ];
1048 } ibcm_proceed_targs_t;
1049 
1050 _NOTE(READ_ONLY_DATA(ibcm_proceed_targs_s))
1051 
1052 
1053 /*
1054  * function prototypes for AVL tree compares
1055  */
1056 int	ibcm_active_node_compare(const void *, const void *);
1057 int	ibcm_passive_node_compare(const void *, const void *);
1058 int	ibcm_passive_comid_node_compare(const void *, const void *);
1059 
1060 /*
1061  * function prototypes to allocate IBMF/SA_ACCESS handles
1062  */
1063 ibt_status_t	ibcm_hca_reinit_port(ibcm_hca_info_t *hca_p,
1064 		    uint8_t port_index);
1065 
1066 /* function prototypes to Manage CM's IBMF QP's */
1067 
1068 ibcm_qp_list_t *ibcm_find_qp(ibcm_hca_info_t *hcap, int port_no,
1069 		    ib_pkey_t pkey);
1070 
1071 void		ibcm_release_qp(ibcm_qp_list_t *cm_qp_entry);
1072 
1073 ibcm_status_t	ibcm_free_qp(ibcm_qp_list_t *cm_qp_entry);
1074 
1075 ibcm_status_t	ibcm_free_allqps(ibcm_hca_info_t *hcap, int port_no);
1076 
1077 /*
1078  * function prototypes to allocate and free outgoing CM messages
1079  */
1080 ibt_status_t
1081 ibcm_alloc_out_msg(ibmf_handle_t ibmf_handle, ibmf_msg_t **ibmf_msgpp,
1082     uint8_t method);
1083 ibcm_status_t
1084 ibcm_free_out_msg(ibmf_handle_t ibmf_handle, ibmf_msg_t **ibmf_msgpp);
1085 
1086 /*
1087  * Definition for CM state transition processing function
1088  */
1089 typedef void (*ibcm_state_handler_t)(ibcm_hca_info_t *hcap,
1090 		uint8_t *cm_input_mad, ibcm_mad_addr_t *cm_mad_addr);
1091 
1092 /*
1093  * CM REQ Message structure
1094  *
1095  * Request for communication.
1096  *
1097  * Things of interest are:-
1098  * ib_qpn_t cannot be used - it is typecast to uint32_t but is 24 bits
1099  * ib_eecn_t cannot be used - it is typecast to uint32_t but is 24 bits
1100  *
1101  * (See Table 85 REQ Message Contents - chapter 12 in IB Spec v1.0a)
1102  *
1103  */
1104 typedef struct ibcm_req_msg_s {
1105 	ib_com_id_t	req_local_comm_id;	/* Local communication id */
1106 						/* 32 bits */
1107 	uint32_t	req_rsvd1;		/* Reserved1 - 32 bits */
1108 	ib_svc_id_t	req_svc_id;		/* Service Id - 64 bits */
1109 	ib_guid_t	req_local_ca_guid;	/* Local CA GUID - 64 bits */
1110 	uint32_t	req_rsvd1p;		/* Reserved1+ - 32 bits */
1111 	ib_qkey_t	req_local_qkey;		/* Local Q_KEY - 32 bits */
1112 	uint32_t	req_local_qpn_plus;	/* QPN_24 RESP_RSRC_8 */
1113 						/* local side QPN - 24 bits */
1114 						/* Offered responder */
1115 						/* resources - 8 bits */
1116 	uint32_t	req_local_eec_no_plus;	/* LOCAL_EECN_24 INIT_DEPTH_8 */
1117 						/* Local side EECN - 24 bits */
1118 						/* Offered initiator */
1119 						/* depth - 8 bits */
1120 	uint32_t	req_remote_eecn_plus;	/* REM_EECN_24 TO_5 TT_2 EE_1 */
1121 						/* Remote side EECN - 24 bits */
1122 						/* Remote CM timeout - 5 bits */
1123 						/* Transport srvtype - 2 bits */
1124 						/* End-to-End flow - 1 bit */
1125 	uint32_t	req_starting_psn_plus;	/* START_PSN_24 TO_5 RETRY_3 */
1126 						/* Starting PSN - 24 bits */
1127 						/* Local CM timeout - 5 bits */
1128 						/* Retry count - 3 bits */
1129 	ib_pkey_t	req_part_key;		/* Partition key - 16 bits */
1130 	uint8_t		req_mtu_plus;		/* PATH_MTU_4 RDC_1 RNR_3 */
1131 						/* Path Pkt MTU - 4 bits */
1132 						/* Does RDC exist? - 1 bits */
1133 						/* RNR retry count - 3 bits */
1134 	uint8_t		req_max_cm_retries_plus; /* MAX_CM_RET_4 SRQ_1 RSV_3 */
1135 						/* Max CM retries - 4 bits */
1136 						/* SRQ Exists - 1 bit */
1137 						/* Reserved2 - 3 bits */
1138 	ib_lid_t	req_primary_l_port_lid;	/* Primary local port LID */
1139 	ib_lid_t	req_primary_r_port_lid;	/* Primary Remote port LID */
1140 	ib_gid_t	req_primary_l_port_gid;	/* Primary local port GID */
1141 	ib_gid_t	req_primary_r_port_gid;	/* Primary remote port GID */
1142 	uint32_t	req_primary_flow_label_plus; /* FLOW_20 RSV_4 SRATE_6 */
1143 						/* Prim. flow label - 20 bits */
1144 						/* Reserved3 - 6 bits */
1145 						/* Primary rate - 6 bits */
1146 	uint8_t		req_primary_traffic_class;
1147 						/* Primary Traffic class */
1148 	uint8_t		req_primary_hop_limit;	/* Prim Hop Limit */
1149 	uint8_t		req_primary_sl_plus;	/* PRIMARY_SL_4 LOCAL_1 RSV_3 */
1150 						/* Primary SL - 4 bits */
1151 						/* Prim. subnet local - 1 bit */
1152 						/* Reserved4 - 3 bits */
1153 	uint8_t		req_primary_localtime_plus; /* LOCAL_TO_5 RSV_3 */
1154 						/* Primary local */
1155 						/* timeout - 5 bits */
1156 						/* Reserved5 - 3 bits */
1157 	ib_lid_t	req_alt_l_port_lid;	/* Alt local port LID */
1158 	ib_lid_t	req_alt_r_port_lid;	/* Alt Remote port LID */
1159 	/* Note: req_alt_l_port_gid/req_alt_r_port_gid are not 8-byte aligned */
1160 	uint8_t		req_alt_l_port_gid[16];	/* Alt local port GID */
1161 	uint8_t		req_alt_r_port_gid[16];	/* Alt remote port GID */
1162 	uint32_t	req_alt_flow_label_plus; /* ALT_FLOW_20 RSV_6 ARATE_6 */
1163 						/* Alt flow label - 20 bits */
1164 						/* Reserved6 - 6 bits */
1165 						/* Alternate rate - 6 bits */
1166 	uint8_t		req_alt_traffic_class;	/* Alt traffic class */
1167 	uint8_t		req_alt_hop_limit;	/* Alt hop limit */
1168 	uint8_t		req_alt_sl_plus;	/* ALT_SL_4 A_LOCAL_1 RSV_3 */
1169 						/* Alternate SL - 4 bits */
1170 						/* Alt subnet local - 1 bit */
1171 						/* Reserved7 - 3 bits */
1172 	uint8_t		req_alt_localtime_plus;	/* ALT_LOCAL_ACK_TO_5 RSV_3 */
1173 						/* Alt Local ACK */
1174 						/* timeout - 5 bits */
1175 						/* Reserved8 - 3 bits */
1176 	uint8_t		req_private_data[IBT_REQ_PRIV_DATA_SZ];
1177 						/* Private data */
1178 } ibcm_req_msg_t;
1179 
1180 
1181 /*
1182  * The following set of defines are short-cuts to CEP_PATH or GRH info
1183  */
1184 #define	IBCM_PRIM_CEP_PATH(s)	(s)->oc_path->pi_prim_cep_path
1185 #define	IBCM_PRIM_ADDS_VECT(s)	(s)->oc_path->pi_prim_cep_path.cep_adds_vect
1186 
1187 #define	IBCM_ALT_CEP_PATH(s)	(s)->oc_path->pi_alt_cep_path
1188 #define	IBCM_ALT_ADDS_VECT(s)	(s)->oc_path->pi_alt_cep_path.cep_adds_vect
1189 
1190 #define	IBCM_UD_CEP_PATH(s)	(s)->us_path_info->ai_cep_path
1191 #define	IBCM_UD_ADDS_VECT(s)	(s)->us_path_info->ai_cep_path.cep_adds_vect
1192 
1193 /*
1194  * The following set of defines are short-cuts to ibt_cm_event_t
1195  */
1196 #define	IBCM_EVT_REQ(e)		(e).cm_event.req
1197 #define	IBCM_EVT_REP(e)		(e).cm_event.rep
1198 
1199 /*
1200  * The following set of defines are short-cuts to qp_attrs or qp_info
1201  */
1202 #define	IBCM_QP_RC(q)		(q).qp_info.qp_transport.rc
1203 #define	IBCM_QP_UD(q)		(q).qp_info.qp_transport.ud
1204 #define	IBCM_QP_UC(q)		(q).qp_info.qp_transport.uc
1205 
1206 #define	IBCM_QPINFO(q)		(q).qp_transport
1207 #define	IBCM_QPINFO_RC(q)	(q).qp_transport.rc
1208 #define	IBCM_QPINFO_RC_PATH(q)	(q).qp_transport.rc.rc_path
1209 #define	IBCM_QPINFO_UC(q)	(q).qp_transport.uc
1210 #define	IBCM_QPINFO_UC_PATH(q)	(q).qp_transport.uc.uc_path
1211 #define	IBCM_QPINFO_UD(q)	(q).qp_transport.ud
1212 
1213 
1214 /* The following set of defines are short-cuts to RC and SIDR MAD HDRs */
1215 
1216 #define	IBCM_OUT_MADP(msgp)	(msgp->im_msgbufs_send.im_bufs_mad_hdr)
1217 #define	IBCM_OUT_HDRP(msgp)	((ib_mad_hdr_t *)IBCM_OUT_MADP(msgp))
1218 #define	IBCM_OUT_MSGP(msgp)	(msgp->im_msgbufs_send.im_bufs_cl_data)
1219 
1220 #define	IBCM_IN_MADP(msgp)	(msgp->im_msgbufs_recv.im_bufs_mad_hdr)
1221 #define	IBCM_IN_HDRP(msgp)	((ib_mad_hdr_t *)IBCM_IN_MADP(msgp))
1222 #define	IBCM_IN_MSGP(msgp)	(msgp->im_msgbufs_recv.im_bufs_cl_data)
1223 
1224 #define	IBCM_REJ_PRIV(msgp)  &(((ibcm_rej_msg_t *) \
1225 	IBCM_OUT_MSGP(statep->stored_msg))->rej_private_data[0])
1226 /*
1227  * CM MRA Message structure
1228  *
1229  * Message Receipt Acknowledgement (MRA).
1230  *
1231  * NOTE: IB hosts and targets are required to be able to receive and
1232  * act upon an MRA, but the ability to send an MRA is optional.
1233  */
1234 typedef struct ibcm_mra_msg_s {
1235 	ib_com_id_t	mra_local_comm_id;	/* Local communication id */
1236 	ib_com_id_t	mra_remote_comm_id;	/* Remote communication id */
1237 	uint8_t		mra_message_type_plus;	/* Message Type - 2 bits */
1238 						/* Reserved1 - 6 bits */
1239 	uint8_t		mra_service_timeout_plus; /* SVC_TO_5 RSV_3 */
1240 						/* Service timeout - 5 bits */
1241 						/* Reserved2 - 3 bits */
1242 	uint8_t		mra_private_data[IBT_MRA_PRIV_DATA_SZ];
1243 						/* Private data */
1244 } ibcm_mra_msg_t;
1245 
1246 /*
1247  * CM REJ Message structure
1248  * REJ indicates that the sender will not continue through the communication
1249  * establishment sequence and the reason why it will not.
1250  *
1251  * NOTE: See ibt_cm_reason_t in common/sys/ib/ib_cm.h for complete list
1252  * of rejection reasons supported.
1253  */
1254 typedef struct ibcm_rej_msg_s {
1255 	ib_com_id_t	rej_local_comm_id;	/* Local communication id */
1256 	ib_com_id_t	rej_remote_comm_id;	/* Remote communication id */
1257 	uint8_t		rej_msg_type_plus;	/* REJ_MSG_TYPE_2 RSV_6 */
1258 						/* Msg being REJed - 2 bits */
1259 						/* Reserved1 - 6 bits */
1260 	uint8_t		rej_reject_info_len_plus; /* REJ_INFO_LEN_7 RSV_1 */
1261 						/* Rej. Info Length - 7 bits */
1262 						/* Reserved2 - 1 bit */
1263 	uint16_t	rej_rejection_reason;	/* Reject err code - 16 bits */
1264 	uint8_t		rej_addl_rej_info[IBT_CM_ADDL_REJ_LEN];
1265 						/* Additional Reject Info */
1266 	uint8_t		rej_private_data[IBT_REJ_PRIV_DATA_SZ];
1267 						/* Private data */
1268 } ibcm_rej_msg_t;
1269 
1270 /*
1271  * CM REP Message structure
1272  *
1273  * REP is returned in response to REQ, indicating that the respondent
1274  * accepts the Service-ID, proposed primary port, and any parameters
1275  * specified in the PrivateData of the REQ.
1276  */
1277 typedef struct ibcm_rep_msg_s {
1278 	ib_com_id_t	rep_local_comm_id;	/* Local communication id */
1279 	ib_com_id_t	rep_remote_comm_id;	/* Remote communication id */
1280 	ib_qkey_t	rep_local_qkey;		/* Local Q_KEY */
1281 	uint32_t	rep_local_qpn_plus;	/* LOCAL_QPN_24 RSV_8 */
1282 						/* Local side QPN - 24 bits */
1283 						/* Reserved1 - 8 bits */
1284 	uint32_t	rep_local_eecn_plus;	/* LOCAL_EECN_24 RSV_8 */
1285 						/* Local side EECN - 24 bits */
1286 						/* Reserved2 - 8 bits */
1287 	uint32_t	rep_starting_psn_plus;	/* STARTING_PSN_24 RSV_8 */
1288 						/* Starting PSN - 24 bits */
1289 						/* Reserved3 - 8 bits */
1290 	uint8_t		rep_resp_resources;	/* Responder resources 8 bits */
1291 	uint8_t		rep_initiator_depth;	/* Initiator depth - 8 bits */
1292 	uint8_t		rep_target_delay_plus;	/* TGT_ACK_DLY_5 FAIL_2 EE_1 */
1293 						/* Target ACK delay - 5 bits */
1294 						/* Failover accepted - 2 bits */
1295 						/* End-to-End flow control - */
1296 						/* 1 bit */
1297 	uint8_t		rep_rnr_retry_cnt_plus;	/* RNR_COUNT_3 SRQ_1 RSV_4 */
1298 						/* RNR retry count - 3 bits */
1299 						/* SRQ Exists - 1 bit */
1300 						/* Reserved4 - 4 bits */
1301 	uint8_t		rep_local_ca_guid[8];	/* Local CA GUID - 64 bits */
1302 	uint8_t		rep_private_data[IBT_REP_PRIV_DATA_SZ];
1303 						/* Private data */
1304 } ibcm_rep_msg_t;
1305 
1306 
1307 /*
1308  * CM RTU Message structure
1309  *
1310  * RTU indicates that the connection is established, and that the
1311  * recipient may begin transmitting.
1312  */
1313 typedef struct ibcm_rtu_msg_s {
1314 	ib_com_id_t	rtu_local_comm_id;	/* Local communication id */
1315 	ib_com_id_t	rtu_remote_comm_id;	/* Remote communication id */
1316 	uint8_t		rtu_private_data[IBT_RTU_PRIV_DATA_SZ];
1317 						/* Private data */
1318 } ibcm_rtu_msg_t;
1319 
1320 
1321 /*
1322  * CM DREQ Message structure
1323  *
1324  * DREQ is sent to initiate the connection release sequence.
1325  */
1326 typedef struct ibcm_dreq_msg_s {
1327 	ib_com_id_t	dreq_local_comm_id;	/* Local communication id */
1328 	ib_com_id_t	dreq_remote_comm_id;	/* Remote communication id */
1329 	uint32_t	dreq_remote_qpn_eecn_plus; /* REM_EECN_24 RSV_8 */
1330 						/* Remote QPN/EECN - 24 bits */
1331 						/* reserved - 8 bits */
1332 	uint8_t		dreq_private_data[IBT_DREQ_PRIV_DATA_SZ];
1333 						/* Private data */
1334 } ibcm_dreq_msg_t;
1335 
1336 
1337 /*
1338  * CM DREP Message structure
1339  *
1340  * DREP is sent in response to DREQ, and signifies that the sender has
1341  * received DREQ.
1342  */
1343 typedef struct ibcm_drep_msg_s {
1344 	ib_com_id_t	drep_local_comm_id;	/* Local communication id */
1345 	ib_com_id_t	drep_remote_comm_id;	/* Remote communication id */
1346 	uint8_t		drep_private_data[IBT_DREP_PRIV_DATA_SZ];
1347 						/* Private Data */
1348 } ibcm_drep_msg_t;
1349 
1350 
1351 /*
1352  * CM LAP Message structure
1353  *
1354  * NOTE: LAP and APR messages are optional. These are needed if CM
1355  * accepts REQ messages and agrees to perform Automatic Path Migration.
1356  *
1357  * This message is used to change the alternate path information for a
1358  * specific connection.
1359  */
1360 typedef struct ibcm_lap_msg_s {
1361 	ib_com_id_t	lap_local_comm_id;	/* Local communication id */
1362 	ib_com_id_t	lap_remote_comm_id;	/* Remote communication id */
1363 	uint32_t	lap_rsvd1;		/* Reserved - 32 bits */
1364 	uint32_t	lap_remote_qpn_eecn_plus; /* REM_EECN_24 TO_5 RSV_3 */
1365 						/* Remote QPN/EECN - 24 bits */
1366 						/* Remote CM response */
1367 						/* timeout - 5 bits */
1368 						/* Reserved1 - 3 bits */
1369 	uint32_t	lap_rsvd2;		/* Reserved2 - 32 bits */
1370 	ib_lid_t	lap_alt_l_port_lid;	/* Alt local port LID */
1371 	ib_lid_t	lap_alt_r_port_lid;	/* Alt Remote port LID */
1372 	ib_gid_t	lap_alt_l_port_gid;	/* Alt local port GID */
1373 	ib_gid_t	lap_alt_r_port_gid;	/* Alt remote port GID */
1374 	uint32_t	lap_alt_flow_label_plus; /* ALT_FLOW_20 RSV_4 TCL_8 */
1375 						/* Alt flow label - 20 bits */
1376 						/* Reserved3 - 4 bits */
1377 						/* Alt traffic class - 8 bits */
1378 	uint8_t		lap_alt_hop_limit;	/* Alt hop limit */
1379 	uint8_t		lap_alt_srate_plus;	/* Reserved4 - 2 bits */
1380 						/* Alt. static rate - 6 bits */
1381 	uint8_t		lap_alt_sl_plus;	/* ALT_SL_4 A_LOCAL_1 RSV_3 */
1382 						/* Alternate SL - 4 bits */
1383 						/* Alt subnet local - 1 bit */
1384 						/* Reserved5 - 3 bits */
1385 	uint8_t		lap_alt_local_acktime_plus; /* ALT_TO_5 RSV_3 */
1386 						/* Alt Local ACK */
1387 						/* timeout - 5 bits */
1388 						/* Reserved6 - 3 bits */
1389 	uint8_t		lap_private_data[IBT_LAP_PRIV_DATA_SZ];
1390 						/* Private data */
1391 } ibcm_lap_msg_t;
1392 
1393 
1394 /*
1395  * CM APR Message structure
1396  *
1397  * APR is sent in response to a LAP request. MRA may be sent to allow
1398  * processing of the LAP.
1399  */
1400 typedef struct ibcm_apr_msg_s {
1401 	ib_com_id_t	apr_local_comm_id;	/* Local communication id */
1402 	ib_com_id_t	apr_remote_comm_id;	/* Remote communication id */
1403 	uint8_t		apr_addl_info_len;	/* Add'l Info Len - 8 bits */
1404 	uint8_t		apr_ap_status;		/* AP status - 8 bits */
1405 	uint16_t	apr_rsvd1;		/* Reserved1 - 16 bits */
1406 	uint8_t		apr_addl_info[IBT_CM_APR_ADDL_LEN];
1407 						/* Additional Information */
1408 	uint8_t		apr_private_data[IBT_APR_PRIV_DATA_SZ];
1409 						/* Private data */
1410 } ibcm_apr_msg_t;
1411 
1412 
1413 /*
1414  * CM SIDR_REQ Message structure
1415  *
1416  * NOTE: SIDR_REQ and SIDR_REP messages are conditionally required.
1417  * These are needed if non-management services are provided on the Channel
1418  * Adapter other than fixed QPNs. Management services include those
1419  * provided thru Subnet Manager Packets or thru General Management Packets.
1420  *
1421  * SIDR_REQ requests that the recipient return the information necessary
1422  * to communicate via UD messages with the entity specified by
1423  * SIDR_REQ:ServiceID
1424  */
1425 typedef struct ibcm_sidr_req_msg_s {
1426 	uint32_t	sidr_req_request_id;		/* Request id */
1427 	ib_pkey_t	sidr_req_pkey;			/* P_Key */
1428 	uint8_t		sidr_req_reserved[2];		/* Reserved */
1429 	ib_svc_id_t	sidr_req_service_id;		/* Service Id */
1430 	uint8_t		sidr_req_private_data[IBT_SIDR_REQ_PRIV_DATA_SZ];
1431 							/* Private Data */
1432 } ibcm_sidr_req_msg_t;
1433 
1434 
1435 /*
1436  * CM SIDR_REP Message structure
1437  *
1438  * SIDR_REP returns the information necessary to communicate via UD
1439  * messages with the entity specified by SIDR_REQ:ServiceID
1440  */
1441 typedef struct ibcm_sidr_rep_msg_s {
1442 	uint32_t	sidr_rep_request_id;		/* Request id */
1443 	uint8_t		sidr_rep_rep_status;		/* Status */
1444 	uint8_t		sidr_rep_add_info_len;		/* Length of Add Info */
1445 	uint8_t		sidr_rep_reserved1[2];		/* Reserved */
1446 	uint32_t	sidr_rep_qpn_plus;		/* QPN_24 RSV_8 */
1447 	/* since the 64-bit SID is not aligned, treat it as a byte array */
1448 	uint8_t		sidr_rep_service_id[8];		/* Service Id */
1449 	ib_qkey_t	sidr_rep_qkey;			/* Q_KEY */
1450 	uint8_t		sidr_rep_class_port_info[IBT_CM_SIDR_CP_LEN];
1451 							/* Class Port Info */
1452 							/* aka., add'l info */
1453 	uint8_t		sidr_rep_private_data[IBT_SIDR_REP_PRIV_DATA_SZ];
1454 							/* Private data */
1455 } ibcm_sidr_rep_msg_t;
1456 
1457 typedef struct ibcm_classportinfo_msg_s {
1458 	uint8_t		BaseVersion;		/* ver. of MAD base format */
1459 	uint8_t		ClassVersion;		/* ver. of MAD class format */
1460 	uint16_t	CapabilityMask;		/* capabilities of this class */
1461 	uint32_t	RespTimeValue_plus;	/* reserved : 27 bits */
1462 						/* resptime value : 5 bits */
1463 	uint64_t	RedirectGID_hi;		/* dest gid of redirect msgs */
1464 	uint64_t	RedirectGID_lo;		/* dest gid of redirect msgs */
1465 	uint32_t	RedirectTC_plus;	/* traffic class: 8 bits */
1466 						/* SL: 4 bits */
1467 						/* Flow label: 20 bits */
1468 	ib_lid_t	RedirectLID;		/* dlid for class services */
1469 	ib_pkey_t	RedirectP_Key;		/* p_key for class services */
1470 	uint32_t	RedirectQP_plus;	/* Reserved: 8 bits */
1471 						/* QPN: 24 bits */
1472 	ib_qkey_t	RedirectQ_Key;		/* q_key for class services */
1473 	uint64_t	TrapGID_hi;		/* dest gid of trap msgs */
1474 	uint64_t	TrapGID_lo;		/* dest gid of trap msgs */
1475 	uint32_t	TrapTC_plus;		/* Trap traffic class, etc., */
1476 	ib_lid_t	TrapLID;		/* dlid for traps */
1477 	ib_pkey_t	TrapP_Key;		/* p_key for traps */
1478 	uint32_t	TrapHL_plus;		/* Trap hop limit,etc., */
1479 	ib_qkey_t	TrapQ_Key;		/* q_key for traps */
1480 } ibcm_classportinfo_msg_t;
1481 
1482 /* All msgs are readonly on receiving side */
1483 _NOTE(READ_ONLY_DATA(ibcm_req_msg_s))
1484 _NOTE(READ_ONLY_DATA(ibcm_rep_msg_s))
1485 _NOTE(READ_ONLY_DATA(ibcm_mra_msg_s))
1486 _NOTE(READ_ONLY_DATA(ibcm_rej_msg_s))
1487 _NOTE(READ_ONLY_DATA(ibcm_lap_msg_s))
1488 _NOTE(READ_ONLY_DATA(ibcm_apr_msg_s))
1489 _NOTE(READ_ONLY_DATA(ibcm_sidr_req_msg_s))
1490 _NOTE(READ_ONLY_DATA(ibcm_sidr_rep_msg_s))
1491 _NOTE(READ_ONLY_DATA(ibcm_rtu_msg_s))
1492 _NOTE(READ_ONLY_DATA(ibcm_dreq_msg_s))
1493 _NOTE(READ_ONLY_DATA(ibcm_drep_msg_s))
1494 _NOTE(READ_ONLY_DATA(ibcm_classportinfo_msg_s))
1495 
1496 /* Prototype definitions for CM implementation functions */
1497 
1498 /*
1499  * The callback from IBMF to CM. This routines calls one of the CM
1500  * state processing functions depending upon mesg/attribute id
1501  *
1502  * ibmf_handle	: IBMF handle on which CM MAD was received
1503  * pktp		: MAD packet
1504  * args		: IBMF receive mad callback arg
1505  */
1506 void	ibcm_recv_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args);
1507 
1508 /*
1509  * Prototypes for CM state transition handling functions
1510  */
1511 
1512 /*
1513  * The following are the CM state processing functions called on an
1514  * incoming REQ/REP/RTU/MRA/REJ/DREQ/DREP on active/passive sides
1515  * (Also handled are SIDR_REP and SIDR_REQ)
1516  * The brief description of these functions
1517  *	Search based on CM message fields in CM's HCA entry.
1518  *	Create/Delete state structures based on incoming message
1519  *	Handle duplicate messages and state transitions
1520  *	Set and Cancel timeouts
1521  *	Handle stale connections
1522  *	Change CM connection state
1523  *	Call CM CEP state transition functions to update CEP state
1524  *	and set CEP attributes
1525  *
1526  * INPUTS:
1527  *	hcap:		- IBMF callback argument
1528  *	cm_input_mad:	- ibmf message pointer of incoming MAD
1529  *	cm_mad_addr	- CM MAD address
1530  *
1531  * The state transition processing is specified in different functions based
1532  * on incoming message type rather than as one function because, the CM
1533  * processing is different for each of them.
1534  *
1535  * A global call table is initialized with these function addresses
1536  * (is defined in ibcm_impl.c), and invoked from ibcm_recv_cb
1537  * (IBMF's recv callback to CM) based on mesg/attribute id.
1538  */
1539 void	ibcm_process_req_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1540 	    ibcm_mad_addr_t *cm_mad_addr);
1541 void	ibcm_process_rep_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1542 	    ibcm_mad_addr_t *cm_mad_addr);
1543 void	ibcm_process_rtu_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1544 	    ibcm_mad_addr_t *cm_mad_addr);
1545 void	ibcm_process_dreq_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1546 	    ibcm_mad_addr_t *cm_mad_addr);
1547 void	ibcm_process_drep_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1548 	    ibcm_mad_addr_t *cm_mad_addr);
1549 void	ibcm_process_rej_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1550 	    ibcm_mad_addr_t *cm_mad_addr);
1551 void	ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1552 	    ibcm_mad_addr_t *cm_mad_addr);
1553 void	ibcm_process_apr_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1554 	    ibcm_mad_addr_t *cm_mad_addr);
1555 void	ibcm_process_lap_msg(ibcm_hca_info_t *hcap, uint8_t *cm_input_mad,
1556 	    ibcm_mad_addr_t *cm_mad_addr);
1557 void	ibcm_process_sidr_req_msg(ibcm_hca_info_t *hcap,
1558 	    uint8_t *cm_input_mad, ibcm_mad_addr_t *cm_mad_addr);
1559 void	ibcm_process_sidr_rep_msg(ibcm_hca_info_t *hcap,
1560 	    uint8_t *cm_input_mad, ibcm_mad_addr_t *cm_mad_addr);
1561 
1562 typedef enum ibcm_proceed_error_e {
1563 	IBCM_PROCEED_INVALID_NONE	= 0,
1564 	IBCM_PROCEED_INVALID_EVENT,
1565 	IBCM_PROCEED_INVALID_EVENT_STATE,
1566 	IBCM_PROCEED_INVALID_PRIV_SZ,
1567 	IBCM_PROCEED_INVALID_LAP
1568 } ibcm_proceed_error_t;
1569 
1570 /* Encapsulates the information that client returns back from CM callback */
1571 typedef struct ibcm_clnt_reply_info_s {
1572 	ibt_cm_proceed_reply_t	*reply_event;
1573 	void			*priv_data;
1574 	ibt_priv_data_len_t	priv_data_len;
1575 } ibcm_clnt_reply_info_t;
1576 
1577 /* Encapsulates the information that UD client returns back from CM callback */
1578 typedef struct ibcm_ud_clnt_reply_info_s {
1579 	ib_qpn_t		ud_qpn;
1580 	ib_qkey_t		ud_qkey;
1581 	ibt_redirect_info_t	*redirect_infop;
1582 	void			*priv_data;
1583 	ibt_priv_data_len_t	priv_data_len;
1584 } ibcm_ud_clnt_reply_info_t;
1585 
1586 /*
1587  * Prototypes for CM CEP state transition handling functions. These are
1588  * called from CM connection state transition handling functions.
1589  *
1590  * The brief description of these functions :
1591  *	Validate CEP related attributes in the messages
1592  *	Change CEP state
1593  *	Set CEP attributes (modify CEP)
1594  *	Call client/server callback handlers
1595  *	Fill up the response MADs
1596  *
1597  * The arguments are :
1598  *	statep:		Connection state structure
1599  *	cm_req/rep/rtu/rej msg : Received CM message
1600  *	cm_output_mad	: The response CM MAD with some of the fields filled in
1601  *			  The cm output mad is allocated by CM state transition
1602  *			  functions and has generic MAD header
1603  *			  Certain fields like com id, etc., are filled by CM
1604  *			  connection state transition functions that are above
1605  */
1606 
1607 /* QP state transition function called for an incoming REQ on passive side */
1608 ibcm_status_t	ibcm_cep_state_req(ibcm_state_data_t *statep,
1609 		    ibcm_req_msg_t *cm_req_msg, ibt_cm_reason_t *reason,
1610 		    uint8_t *arej_info_len);
1611 
1612 /* Processes QP state machine based on return values from cm handler */
1613 ibcm_status_t	ibcm_process_cep_req_cm_hdlr(ibcm_state_data_t *statep,
1614 		    ibt_cm_status_t cb_status,
1615 		    ibcm_clnt_reply_info_t *clnt_info,
1616 		    ibt_cm_reason_t *reject_reason, uint8_t *arej_len,
1617 		    ibcm_req_msg_t *cm_req_msgp);
1618 
1619 /* Processes CM state machine based on return values from ibcm_cep_state_req */
1620 void		ibcm_handle_cep_req_response(ibcm_state_data_t *statep,
1621 		    ibcm_status_t response, ibt_cm_reason_t reject_reason,
1622 		    uint8_t arej_info_len);
1623 
1624 /* QP state transition function called for an incoming REP on active side */
1625 ibcm_status_t	ibcm_cep_state_rep(ibcm_state_data_t *statep,
1626 		    ibcm_rep_msg_t *cm_rep_msg, ibt_cm_reason_t *reason,
1627 		    uint8_t *arej_info_len);
1628 
1629 /* Processes QP state machine based on return values from cm handler */
1630 ibcm_status_t	ibcm_process_cep_rep_cm_hdlr(ibcm_state_data_t *statep,
1631 		    ibt_cm_status_t cb_status,
1632 		    ibcm_clnt_reply_info_t *clnt_info,
1633 		    ibt_cm_reason_t *reject_reason, uint8_t *arej_len,
1634 		    ibcm_rep_msg_t *cm_rep_msgp);
1635 
1636 /* Processes CM state machine based on return values from ibcm_cep_state_rep */
1637 void		ibcm_handle_cep_rep_response(ibcm_state_data_t *statep,
1638 		    ibcm_status_t response, ibt_cm_reason_t reject_reason,
1639 		    uint8_t arej_info_len, ibcm_rep_msg_t *rep_msgp);
1640 
1641 /* QP state transition function called for an incoming RTU on passive side */
1642 void	ibcm_cep_state_rtu(ibcm_state_data_t *statep,
1643 	    ibcm_rtu_msg_t *cm_rtu_msg);
1644 
1645 /* QP state transition func called for an incoming REJ on active/passive side */
1646 void	ibcm_cep_state_rej(ibcm_state_data_t *statep,
1647 	    ibcm_rej_msg_t *cm_rej_msg, ibcm_conn_state_t rej_state);
1648 
1649 /* QP state transition func for an incoming REJ on active side in est state */
1650 void	ibcm_cep_state_rej_est(ibcm_state_data_t *statep);
1651 
1652 /*
1653  * QP state transition function called for an outgoing RTU on active side,
1654  * after setting CEP to RTS state active/passive side
1655  */
1656 void	ibcm_cep_send_rtu(ibcm_state_data_t *statep);
1657 
1658 
1659 /* QP state transition function called for an incoming LAP */
1660 ibcm_status_t	ibcm_cep_state_lap(ibcm_state_data_t *statep,
1661 		    ibcm_lap_msg_t *lap_msg, ibcm_apr_msg_t *apr_msg);
1662 
1663 /* Processes QP state machine based on return value from cm handler for LAP */
1664 void		ibcm_process_cep_lap_cm_hdlr(ibcm_state_data_t *statep,
1665 		    ibt_cm_status_t cb_status,
1666 		    ibcm_clnt_reply_info_t *clnt_info,
1667 		    ibcm_lap_msg_t *lap_msg, ibcm_apr_msg_t *apr_msg);
1668 
1669 void		ibcm_post_apr_mad(ibcm_state_data_t *statep);
1670 
1671 void		ibcm_cep_state_apr(ibcm_state_data_t *statep,
1672 		    ibcm_lap_msg_t *lap_msg, ibcm_apr_msg_t *apr_msg);
1673 
1674 /* Processes CM state machine based on return value from cm handler */
1675 void		ibcm_handle_cep_dreq_response(ibcm_state_data_t *statep,
1676 		    void *priv_data, ibt_priv_data_len_t  priv_data_len);
1677 
1678 /* Processes CM UD state machine based on return values from cm handler */
1679 void		ibcm_process_sidr_req_cm_hdlr(ibcm_ud_state_data_t *ud_statep,
1680 		    ibt_cm_status_t cb_status,
1681 		    ibcm_ud_clnt_reply_info_t *ud_clnt_info,
1682 		    ibt_sidr_status_t *sidr_status,
1683 		    ibcm_sidr_rep_msg_t *sidr_repp);
1684 
1685 void		ibcm_proceed_via_taskq(void *targs);
1686 void		ibcm_ud_proceed_via_taskq(void *targs);
1687 
1688 /*
1689  * Builds the reply MAD address based on "incoming mad addr" that is
1690  * supplied to it as an arg.
1691  *	Swaps the source and destination lids in ibmf_addr_info_t
1692  *	Swaps the source and destination gids in ib_grh_t
1693  *
1694  * INPUTS:
1695  *	incoming_cm_mad_addr	- Address information in the incoming MAD
1696  *	reply_cm_mad_addr	- Derived address for the reply MAD
1697  *				  The reply MAD address is derived based
1698  *				  address information of incoming CM MAD
1699  */
1700 void	ibcm_build_reply_mad_addr(ibcm_mad_addr_t *incoming_cm_mad_addr,
1701 	    ibcm_mad_addr_t *reply_cm_mad_addr);
1702 
1703 /*  Posts RC CM MAD using IBMF */
1704 void	ibcm_post_rc_mad(ibcm_state_data_t *statep, ibmf_msg_t *msgp,
1705 	    ibmf_msg_cb_t post_cb, void *args);
1706 
1707 /*  Posts UD CM MAD using IBMF */
1708 void	ibcm_post_ud_mad(ibcm_ud_state_data_t *ud_statep, ibmf_msg_t *msgp,
1709 	    ibmf_msg_cb_t ud_post_cb, void *args);
1710 
1711 /*  Posts CM MAD using IBMF */
1712 ibt_status_t	ibcm_post_mad(ibmf_msg_t *msgp, ibcm_mad_addr_t *cm_mad_addr,
1713 	    ibmf_msg_cb_t post_cb, void *args);
1714 
1715 /* Post REJ MAD */
1716 void	ibcm_post_rej_mad(ibcm_state_data_t *statep, ibt_cm_reason_t reason,
1717 	    int who, void *addl_rej_info, uint8_t arej_info_len);
1718 
1719 /* Post REP MAD */
1720 void	ibcm_post_rep_mad(ibcm_state_data_t *statep);
1721 
1722 /* Post RTU MAD */
1723 ibcm_status_t	ibcm_post_rtu_mad(ibcm_state_data_t *statep);
1724 
1725 /* Post DREQ MAD */
1726 void	ibcm_post_dreq_mad(void *statep);
1727 
1728 /* Post LAP MAD */
1729 void	ibcm_post_lap_mad(ibcm_state_data_t *statep);
1730 
1731 
1732 /*
1733  * Posts CM SIDR MAD using IBMF in blocking mode
1734  *
1735  * INPUTS:
1736  *	ud_statep:	UD statep which is posting the mad
1737  *	cm_mad_addr:	Address information for the MAD to be posted
1738  *	status:		SIDR status
1739  */
1740 void	ibcm_post_sidr_rep_mad(ibcm_ud_state_data_t *ud_statep,
1741 	    ibt_sidr_status_t status);
1742 
1743 /* prototypes to resend RC mad and UD MAD */
1744 void	ibcm_resend_rep_mad(ibcm_state_data_t *statep);
1745 void	ibcm_resend_rtu_mad(ibcm_state_data_t *statep);
1746 void	ibcm_resend_rej_mad(ibcm_state_data_t *statep);
1747 void	ibcm_resend_mra_mad(ibcm_state_data_t *statep);
1748 void	ibcm_resend_srep_mad(ibcm_ud_state_data_t *statep);
1749 
1750 
1751 /* Helper function used in connection abort processing */
1752 void	ibcm_process_abort(ibcm_state_data_t	*statep);
1753 
1754 /*
1755  * Prototypes for CM functions that lookup for a connection state structure
1756  */
1757 
1758 /*
1759  * ibcm_lookup_msg:
1760  *
1761  * Retrieves an existing state structure or creates a new one if none found.
1762  * This function is used during passive side of connection establishment for
1763  * INCOMING REQ/REJ/RTU/MRA
1764  * This function is used during active side of connection establishment for
1765  * INCOMING REP/REJ/MRA
1766  * This function is used during active side of connection establishment for
1767  * an outgoing REQ.
1768  *
1769  * NOTE: IBCM_LOOKP_FAIL is only returned if a new entry wasn't created and
1770  * a match wasn't found.
1771  *
1772  * Arguments are:-
1773  *	ibcm_event_type_t	- what type of message
1774  *				  incoming REQ, REP, REJ, MRA, RTU, DREQ, DREP
1775  *	local_comid		- ONLY *NOT* valid for incoming REQ.
1776  *					needed for others
1777  *	remote_qpn		- Remote CM's QP number
1778  *	remote_hca_guid		- ONLY VALID FOR incoming REQ.
1779  *				  Ignored for others
1780  *	hcap			- HCA entry table pointer
1781  *	statep			- "return"ed state pointer
1782  *
1783  * Return Values:
1784  *	IBCM_LOOKUP_NEW		- new statep allocated
1785  *	IBCM_LOOKUP_EXISTS	- found an existing entry
1786  *	IBCM_LOOKUP_FAIL	- failed to find an entry
1787  *	IBCM_MEMORY_FAILURE	- failed to get memory
1788  *					iff flags != IBT_CHAN_BLOCKING
1789  */
1790 ibcm_status_t	ibcm_lookup_msg(ibcm_event_type_t event_type,
1791 		    ib_com_id_t local_comid, ib_qpn_t remote_qpn,
1792 		    ib_guid_t remote_hca_guid, ibcm_hca_info_t *hcap,
1793 		    ibcm_state_data_t **statep);
1794 
1795 
1796 /*
1797  * Routines for CM SIDR state structure list manipulation
1798  * Wherever possible, the list routines of ibtl are used
1799  * for list manipulation
1800  */
1801 
1802 /*
1803  * Finds an entry based on lid, gid and grh exists fields
1804  * lid:		LID of incoming SIDR REQ
1805  * gid:		GID of incoming SIDR REQ
1806  * grh_exists:		TRUE if GRH exists in the incoming SIDR REQ
1807  * hcap:	CM State HCA entry ptr to search for SIDR state structure
1808  * statep:	Returns a valid state structure, if one exists based
1809  *		on lid, gid and grh_exists fields
1810  * flag:	whether to just look OR to look and add if it doesn't exist.
1811  */
1812 ibcm_status_t		ibcm_find_sidr_entry(ibcm_sidr_srch_t *srch_param,
1813 			    ibcm_hca_info_t *hcap,
1814 			    ibcm_ud_state_data_t **statep,
1815 			    ibcm_lookup_flag_t flag);
1816 
1817 ibcm_ud_state_data_t	*ibcm_add_sidr_entry(ibcm_sidr_srch_t *srch_param,
1818 			    ibcm_hca_info_t *hcap);
1819 
1820 /*
1821  * Deletes a given state structure, from both hca state and passive trees
1822  * If ref cnt is zero, deallocates all buffers and memory of state data
1823  */
1824 void	ibcm_delete_state_data(ibcm_state_data_t *statep);
1825 
1826 /*
1827  * Deallocates all the buffers and memory of state data.
1828  * This function must be called, only when ref_cnt is zero.
1829  */
1830 void	ibcm_dealloc_state_data(ibcm_state_data_t *statep);
1831 
1832 /*
1833  * Deletes a given UD state structure, from SIDR list.
1834  * The routine acquires and releases the SIDR list lock.
1835  */
1836 void	ibcm_delete_ud_state_data(ibcm_ud_state_data_t *statep);
1837 void	ibcm_dealloc_ud_state_data(ibcm_ud_state_data_t *statep);
1838 
1839 /*
1840  * Service ID entry create and lookup functions
1841  */
1842 
1843 /*
1844  * Adds/looks-up an ibcm_svc_info_t entry in the CM's global table.
1845  * This global table is defined in ibcm_impl.c.
1846  *
1847  * svc_info_list_lock must be held for RW_READER by caller of
1848  * ibcm_find_svc_entry().
1849  *
1850  * Arguments are:-
1851  *	sid		- service id
1852  *	num_sids	- Number (Range) of service-ids
1853  *
1854  * Return values:
1855  *	Pointer to ibcm_svc_info_t on success, otherwise NULL.
1856  */
1857 int ibcm_svc_compare(const void *p1, const void *p2);
1858 ibcm_svc_info_t *ibcm_create_svc_entry(ib_svc_id_t sid, int num_sids);
1859 ibcm_svc_info_t *ibcm_find_svc_entry(ib_svc_id_t sid);
1860 
1861 /*
1862  * The following are the function prototypes for various id initialization,
1863  * allocation, free and destroy operations. The cm id allocations are based
1864  * on vmem operations
1865  * The service id's are maintained globally per host
1866  * The com id and req id's are maintained per hca
1867  * To maintain compatibility with intel, service ids are allocated on a 32 bit
1868  * range, though spec has 64 bit range for service id's
1869  */
1870 ibcm_status_t	ibcm_init_ids();
1871 void		ibcm_fini_ids();
1872 
1873 ibcm_status_t	ibcm_init_hca_ids(ibcm_hca_info_t *hcap);
1874 void		ibcm_fini_hca_ids(ibcm_hca_info_t *hcap);
1875 
1876 ibcm_status_t	ibcm_alloc_comid(ibcm_hca_info_t *hcap, ib_com_id_t *comid);
1877 void		ibcm_free_comid(ibcm_hca_info_t *hcap, ib_com_id_t comid);
1878 
1879 ibcm_status_t	ibcm_alloc_reqid(ibcm_hca_info_t *hcap, uint32_t *reqid);
1880 void		ibcm_free_reqid(ibcm_hca_info_t *hcap, uint32_t reqid);
1881 
1882 ib_svc_id_t	ibcm_alloc_local_sids(int num_sids);
1883 void		ibcm_free_local_sids(ib_svc_id_t service_id, int num_sids);
1884 
1885 ib_svc_id_t	ibcm_alloc_ip_sid();
1886 void		ibcm_free_ip_sid(ib_svc_id_t sid);
1887 
1888 uint64_t	ibcm_generate_tranid(uint8_t event, uint32_t id,
1889 		    uint32_t cm_tran_priv);
1890 
1891 void		ibcm_decode_tranid(uint64_t tran_id, uint32_t *cm_tran_priv);
1892 
1893 ibcm_status_t	ibcm_ar_init(void);
1894 ibcm_status_t	ibcm_ar_fini(void);
1895 
1896 /* IP Addressing API debugging */
1897 extern int ibcm_printip;	/* set to 1 to enable IBTF DPRINTFs */
1898 extern void ibcm_ip_print(char *label, ibt_ip_addr_t *ipa);
1899 
1900 #define	IBCM_PRINT_IP(LABEL, IP_ADDR)			\
1901 	if (ibcm_printip) {			\
1902 		ibcm_ip_print(LABEL, IP_ADDR);	\
1903 	}
1904 /*
1905  * These functions are called to do timeout processing from CM connection
1906  * state transitions. (Also for SIDR REQ and SIDR REP processing)
1907  *
1908  * Brief description :
1909  *	If retry count is below max retry value, then post the stored response
1910  *	MAD using IBMF in blocking mode, adjusts remaining retry counters.
1911  *	If retry counter reaches max value, then retry failure handling is
1912  *	done here
1913  *
1914  *	CM will ensure that the state data structure of the associated
1915  *	timeout is valid when this timeout function is called.
1916  *	(See timer_stored_state in ibcm_state_data_t and
1917  *	ud_timer_stored_state in ibcm_ud_state_data_t)
1918  */
1919 void	ibcm_timeout_cb(void *arg);
1920 void	ibcm_sidr_timeout_cb(void *arg);
1921 
1922 /*
1923  * function prototypes for IBMF send completion callbacks on non-blocking
1924  * MAD posts
1925  */
1926 void	ibcm_post_req_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1927 	    void *args);
1928 void	ibcm_post_rep_wait_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1929 	    void *args);	/* MRA Rcvd on active side */
1930 void	ibcm_post_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1931 	    void *args);
1932 void	ibcm_resend_post_rep_complete(ibmf_handle_t ibmf_handle,
1933 	    ibmf_msg_t *msgp, void *args);
1934 void	ibcm_post_mra_rep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1935 	    void *args);	/* MRA Rcvd on passive side */
1936 void	ibcm_post_rej_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1937 	    void *args);
1938 void	ibcm_post_dreq_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1939 	    void *args);
1940 void	ibcm_post_drep_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1941 	    void *args);
1942 void	ibcm_post_lap_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1943 	    void *args);
1944 void	ibcm_post_apr_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1945 	    void *args);
1946 void	ibcm_post_stored_apr_complete(ibmf_handle_t ibmf_handle,
1947 	    ibmf_msg_t *msgp, void *args);
1948 void	ibcm_post_mra_lap_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1949 	    void *args);	/* MRA Rcvd for LAP on active side */
1950 void	ibcm_post_mra_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1951 	    void *args);	/* for MRA sender */
1952 void	ibcm_post_rtu_complete(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp,
1953 	    void *args);
1954 
1955 void	ibcm_post_sidr_req_complete(ibmf_handle_t ibmf_handle,
1956 	    ibmf_msg_t *msgp, void *args);
1957 
1958 /*
1959  * ibcm_find_hca_entry:
1960  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1961  *	This entry can be then used to access AVL tree/SIDR list etc.
1962  *
1963  *	NOTE: This entry is not removed from the "ibcm_hca_listp".
1964  *	And this function is called with ibcm_hca_list_mutex mutex held.
1965  *
1966  * INPUTS:
1967  *	hca_guid	- HCA's guid
1968  *
1969  * RETURN VALUE:
1970  *	hcap		- if a match is found, else NULL
1971  */
1972 ibcm_hca_info_t	*ibcm_find_hca_entry(ib_guid_t hca_guid);
1973 ibcm_hca_info_t	*ibcm_find_hcap_entry(ib_guid_t hca_guid);
1974 void ibcm_delete_hca_entry(ibcm_hca_info_t *hcap);
1975 
1976 /* Routines that manage the hca's temporary access count */
1977 ibcm_status_t ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hca);
1978 void ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hca);
1979 
1980 /* Routines that manage the hca's resource count */
1981 void ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hca);
1982 void ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hca);
1983 
1984 /* Routines that manage the hca's service count */
1985 void ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hca);
1986 void ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hca);
1987 
1988 /* Routine to fetch the saa_handle */
1989 ibmf_saa_handle_t ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port);
1990 
1991 /* Allow some flow control of RC connection initiations */
1992 void ibcm_flow_inc(void);
1993 void ibcm_flow_dec(hrtime_t delta, char *mad_type);
1994 
1995 /* Allow some flow control of SA requests */
1996 void ibcm_sa_access_enter(void);
1997 void ibcm_sa_access_exit(void);
1998 
1999 /*
2000  * ibcm_cep_to_error_state:
2001  *	Helper function to transition a CEP to ERROR state
2002  *
2003  *	NOTE: This function checks if ch_qp is valid or ch_eec and calls
2004  *	into IBTL to transition the CEP.
2005  *
2006  * INPUTS:
2007  *	statep	- Connection state pointer
2008  *
2009  * RETURN VALUE:
2010  *	IBT_SUCCESS	- if CEP transition succeeded; else error
2011  */
2012 ibt_status_t	ibcm_cep_to_error_state(ibcm_state_data_t *statep);
2013 
2014 /*
2015  * Processes the pending stateps in a linked list. The operations are to
2016  * invoke a cm handler or delete statep
2017  * When the above operations are required on statep from a timeout handler,
2018  * they are linked for later processing by an independent thread
2019  */
2020 void	ibcm_process_tlist();
2021 /* Links RC stateps to an RC timeout processing list */
2022 void	ibcm_add_tlist(ibcm_state_data_t *statep);
2023 
2024 /* Links SIDR/UD stateps to an SIDR/UD timeout processing list */
2025 void	ibcm_add_ud_tlist(ibcm_ud_state_data_t *ud_statep);
2026 
2027 /*
2028  * This call either aborts a pending or completes a in-progress LAP/APR
2029  * operation
2030  */
2031 void	ibcm_sync_lapr_idle(ibcm_state_data_t	*statep);
2032 
2033 void	ibcm_process_rc_recycle(void *recycle_arg);
2034 
2035 /*
2036  * Helper function to handle endianess in case of Service Data.
2037  * Used by ibt_bind_service() and ibt_get_paths().
2038  */
2039 void ibcm_swizzle_from_srv(ibt_srv_data_t *sb_data, uint8_t *service_bytes);
2040 void ibcm_swizzle_to_srv(uint8_t *service_bytes, ibt_srv_data_t *sb_data);
2041 
2042 /* Misc ibcm global variables */
2043 extern char			cmlog[];
2044 extern ibt_clnt_hdl_t		ibcm_ibt_handle;
2045 extern taskq_t			*ibcm_taskq;
2046 extern ibcm_state_handler_t	ibcm_sm_funcs_tbl[];
2047 extern uint8_t			ibcm_timeout_list_flags;
2048 extern ibcm_classportinfo_msg_t	ibcm_clpinfo;
2049 
2050 /* Global lists */
2051 extern avl_tree_t	ibcm_svc_avl_tree;	/* global service id tree */
2052 extern ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
2053 extern ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr,
2054 				*ibcm_ud_timeout_list_tail;
2055 /* Default global retry counts */
2056 extern uint8_t		ibcm_max_retries;
2057 extern uint32_t		ibcm_max_sa_retries;
2058 extern int		ibcm_sa_timeout_delay;	/* in ticks */
2059 
2060 /* Various default global timers */
2061 extern ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time;
2062 
2063 extern clock_t		ibcm_local_processing_time;	/* usecs */
2064 extern clock_t		ibcm_remote_response_time;
2065 extern ib_time_t	ibcm_max_sidr_rep_proctime;
2066 extern ib_time_t	ibcm_max_sidr_rep_store_time;
2067 extern uint32_t		ibcm_adj_btime;
2068 extern uint32_t		ibcm_sw_delay;
2069 
2070 extern ib_time_t	ibcm_max_ib_pkt_lt;
2071 extern ib_time_t	ibcm_max_ib_mad_pkt_lt;
2072 
2073 /* Global locks */
2074 extern kmutex_t		ibcm_svc_info_lock;
2075 extern kmutex_t		ibcm_mcglist_lock;
2076 extern kmutex_t		ibcm_global_hca_lock;
2077 extern kmutex_t		ibcm_qp_list_lock;
2078 extern kmutex_t		ibcm_timeout_list_lock;
2079 extern kmutex_t		ibcm_recv_mutex;
2080 
2081 /* Global cond variables */
2082 extern kcondvar_t	ibcm_global_hca_cv;
2083 extern kcondvar_t	ibcm_svc_info_cv;
2084 extern kcondvar_t	ibcm_timeout_list_cv;
2085 extern kcondvar_t	ibcm_timeout_thread_done_cv;
2086 
2087 _NOTE(LOCK_ORDER(ibcm_state_data_s::state_mutex ibcm_timeout_list_lock))
2088 _NOTE(LOCK_ORDER(ibcm_ud_state_data_s::ud_state_mutex ibcm_timeout_list_lock))
2089 _NOTE(LOCK_ORDER(ibcm_hca_info_s::hca_state_rwlock
2090     ibcm_state_data_s::state_mutex))
2091 _NOTE(LOCK_ORDER(ibcm_hca_info_s::hca_sidr_list_lock
2092     ibcm_ud_state_data_s::ud_state_mutex))
2093 
2094 _NOTE(READ_ONLY_DATA(ibcm_local_processing_time ibcm_remote_response_time
2095     ibcm_max_sidr_rep_proctime ibcm_max_sidr_rep_store_time ibcm_adj_btime
2096     ibcm_sw_delay ibcm_max_retries ibcm_max_sa_retries))
2097 
2098 /*
2099  * miscellaneous defines for retries, times etc.
2100  */
2101 #define	IBCM_MAX_RETRIES		11	/* Max CM retries for a msg */
2102 #define	IBCM_LOCAL_RESPONSE_TIME	300000	/* Local CM processing time */
2103 						/* in usecs */
2104 #define	IBCM_REMOTE_RESPONSE_TIME	300000	/* Remote CM response time  */
2105 						/* in usecs */
2106 #define	IBCM_MAX_SIDR_PROCESS_TIME	16	/* Time to process SIDR REP */
2107 #define	IBCM_MAX_SIDR_PKT_LIFE_TIME	9	/* Approx pkt lt for UD srver */
2108 
2109 #define	IBCM_MAX_IB_PKT_LT		20	/* 4 second */
2110 #define	IBCM_MAX_IB_MAD_PKT_LT		18	/* 1 second */
2111 
2112 #define	IBCM_MAX_SA_RETRIES		0	/* Max CM retry for SA update */
2113 
2114 /* versions for CM MADs */
2115 #define	IBCM_MAD_BASE_VERSION		1
2116 #define	IBCM_MAD_CLASS_VERSION		2
2117 
2118 /* for Class_Port_Info stuff - see section 16.7.3.1 in Vol1 IB Spec */
2119 #define	IBCM_CPINFO_CAP_RC		0x0200	/* RC is supported */
2120 #define	IBCM_CPINFO_CAP_RD		0x0400	/* RD is supported */
2121 #define	IBCM_CPINFO_CAP_RAW		0x0800	/* Raw Datagrams supported */
2122 #define	IBCM_CPINFO_CAP_UC		0x1000	/* UC supported */
2123 #define	IBCM_CPINFO_CAP_SIDR		0x2000	/* SIDR supported */
2124 
2125 #define	IBCM_V4_PART_OF_V6(v6)	v6.s6_addr32[3]
2126 /* RDMA CM IP Service's Private Data Format. */
2127 #ifdef _BIG_ENDIAN
2128 typedef struct ibcm_ip_pvtdata_s {
2129 	uint8_t		ip_MajV:4,
2130 			ip_MinV:4;
2131 	uint8_t		ip_ipv:4,
2132 			ip_rsvd:4;	/* 0-3: rsvd, 4-7: ipv */
2133 	uint16_t	ip_srcport;	/* Source Port */
2134 	in6_addr_t	ip_srcip;	/* Source IP address. */
2135 	in6_addr_t	ip_dstip;	/* Remote IP address. */
2136 #define	ip_srcv4	IBCM_V4_PART_OF_V6(ip_srcip)
2137 #define	ip_dstv4	IBCM_V4_PART_OF_V6(ip_dstip)
2138 #define	ip_srcv6	ip_srcip
2139 #define	ip_dstv6	ip_dstip
2140 } ibcm_ip_pvtdata_t;
2141 #else
2142 typedef struct ibcm_ip_pvtdata_s {
2143 	uint8_t		ip_MinV:4,
2144 			ip_MajV:4;
2145 	uint8_t		ip_rsvd:4,
2146 			ip_ipv:4;	/* 0-3: rsvd, 4-7: ipv */
2147 	uint16_t	ip_srcport;	/* Source Port */
2148 	in6_addr_t	ip_srcip;	/* Source IP address. */
2149 	in6_addr_t	ip_dstip;	/* Remote IP address. */
2150 #define	ip_srcv4	IBCM_V4_PART_OF_V6(ip_srcip)
2151 #define	ip_dstv4	IBCM_V4_PART_OF_V6(ip_dstip)
2152 #define	ip_srcv6	ip_srcip
2153 #define	ip_dstv6	ip_dstip
2154 } ibcm_ip_pvtdata_t;
2155 #endif
2156 
2157 /*
2158  * for debug purposes
2159  */
2160 #ifdef	DEBUG
2161 extern	int ibcm_test_mode;
2162 
2163 void	ibcm_query_qp(ibmf_handle_t ibmf_hdl, ibmf_qp_handle_t ibmf_qp);
2164 void	ibcm_dump_raw_message(uchar_t *);
2165 void	ibcm_dump_srvrec(sa_service_record_t *);
2166 void	ibcm_dump_pathrec(sa_path_record_t *);
2167 void	ibcm_dump_noderec(sa_node_record_t *);
2168 
2169 void	ibcm_query_classport_info(ibt_channel_hdl_t channel);
2170 
2171 #define	IBCM_DUMP_RAW_MSG	ibcm_dump_raw_message
2172 #define	IBCM_DUMP_SERVICE_REC	ibcm_dump_srvrec
2173 #define	IBCM_DUMP_PATH_REC	ibcm_dump_pathrec
2174 #define	IBCM_DUMP_NODE_REC	ibcm_dump_noderec
2175 #else
2176 #define	IBCM_DUMP_RAW_MSG	0 &&
2177 #define	IBCM_DUMP_SERVICE_REC	0 &&
2178 #define	IBCM_DUMP_PATH_REC	0 &&
2179 #define	IBCM_DUMP_NODE_REC	0 &&
2180 #endif
2181 
2182 ibt_status_t ibcm_ibmf_analyze_error(int ibmf_status);
2183 
2184 ibt_status_t ibcm_contact_sa_access(ibmf_saa_handle_t saa_handle,
2185     ibmf_saa_access_args_t *access_args, size_t *length, void **results_p);
2186 
2187 ibt_status_t	ibcm_ibtl_node_info(ib_guid_t, uint8_t, ib_lid_t,
2188     ibt_node_info_t *node_info);
2189 
2190 void ibcm_path_cache_init(void);
2191 void ibcm_path_cache_fini(void);
2192 void ibcm_path_cache_purge(void);
2193 
2194 #ifdef	__cplusplus
2195 }
2196 #endif
2197 
2198 
2199 #endif /* _SYS_IB_MGT_IBCM_IBCM_IMPL_H */
2200