2 * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 FILE_LICENCE ( GPL2_OR_LATER );
28 #include <ipxe/infiniband.h>
29 #include <ipxe/ib_mi.h>
30 #include <ipxe/ib_pathrec.h>
31 #include <ipxe/ib_cm.h>
36 * Infiniband communication management
40 /** List of connections */
41 static LIST_HEAD ( ib_cm_conns );
44 * Find connection by local communication ID
46 * @v local_id Local communication ID
47 * @ret conn Connection, or NULL
49 static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
50 struct ib_connection *conn;
52 list_for_each_entry ( conn, &ib_cm_conns, list ) {
53 if ( conn->local_id == local_id )
60 * Send "ready to use" response
62 * @v ibdev Infiniband device
63 * @v mi Management interface
64 * @v av Address vector
65 * @v local_id Local communication ID
66 * @v remote_id Remote communication ID
67 * @ret rc Return status code
69 static int ib_cm_send_rtu ( struct ib_device *ibdev,
70 struct ib_mad_interface *mi,
71 struct ib_address_vector *av,
72 uint32_t local_id, uint32_t remote_id ) {
74 struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
77 /* Construct "ready to use" response */
78 memset ( &mad, 0, sizeof ( mad ) );
79 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
80 mad.hdr.class_version = IB_CM_CLASS_VERSION;
81 mad.hdr.method = IB_MGMT_METHOD_SEND;
82 mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
83 rtu->local_id = htonl ( local_id );
84 rtu->remote_id = htonl ( remote_id );
85 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
86 DBG ( "CM could not send RTU: %s\n", strerror ( rc ) );
94 * Handle duplicate connection replies
96 * @v ibdev Infiniband device
97 * @v mi Management interface
99 * @v av Source address vector
100 * @ret rc Return status code
102 * If a "ready to use" MAD is lost, the peer may resend the connection
103 * reply. We have to respond to these with duplicate "ready to use"
104 * MADs, otherwise the peer may time out and drop the connection.
106 static void ib_cm_recv_rep ( struct ib_device *ibdev,
107 struct ib_mad_interface *mi,
109 struct ib_address_vector *av ) {
110 struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
111 struct ib_connection *conn;
112 uint32_t local_id = ntohl ( rep->remote_id );
115 /* Identify connection */
116 conn = ib_cm_find ( local_id );
118 /* Try to send "ready to use" reply */
119 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
120 conn->remote_id ) ) != 0 ) {
121 /* Ignore errors; the remote end will retry */
124 DBG ( "CM unidentified connection %08x\n", local_id );
129 * Send reply to disconnection request
131 * @v ibdev Infiniband device
132 * @v mi Management interface
133 * @v av Address vector
134 * @v local_id Local communication ID
135 * @v remote_id Remote communication ID
136 * @ret rc Return status code
138 static int ib_cm_send_drep ( struct ib_device *ibdev,
139 struct ib_mad_interface *mi,
140 struct ib_address_vector *av,
141 uint32_t local_id, uint32_t remote_id ) {
143 struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
146 /* Construct reply to disconnection request */
147 memset ( &mad, 0, sizeof ( mad ) );
148 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
149 mad.hdr.class_version = IB_CM_CLASS_VERSION;
150 mad.hdr.method = IB_MGMT_METHOD_SEND;
151 mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
152 drep->local_id = htonl ( local_id );
153 drep->remote_id = htonl ( remote_id );
154 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
155 DBG ( "CM could not send DREP: %s\n", strerror ( rc ) );
163 * Handle disconnection requests
165 * @v ibdev Infiniband device
166 * @v mi Management interface
167 * @v mad Received MAD
168 * @v av Source address vector
169 * @ret rc Return status code
171 static void ib_cm_recv_dreq ( struct ib_device *ibdev,
172 struct ib_mad_interface *mi,
174 struct ib_address_vector *av ) {
175 struct ib_cm_disconnect_request *dreq =
176 &mad->cm.cm_data.disconnect_request;
177 struct ib_connection *conn;
178 uint32_t local_id = ntohl ( dreq->remote_id );
179 uint32_t remote_id = ntohl ( dreq->local_id );
182 /* Identify connection */
183 conn = ib_cm_find ( local_id );
185 /* Notify upper layer */
186 conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
188 sizeof ( dreq->private_data ) );
190 DBG ( "CM unidentified connection %08x\n", local_id );
194 if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
195 remote_id ) ) != 0 ) {
196 /* Ignore errors; the remote end will retry */
200 /** Communication management agents */
201 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
203 .mgmt_class = IB_MGMT_CLASS_CM,
204 .class_version = IB_CM_CLASS_VERSION,
205 .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
206 .handle = ib_cm_recv_rep,
209 .mgmt_class = IB_MGMT_CLASS_CM,
210 .class_version = IB_CM_CLASS_VERSION,
211 .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
212 .handle = ib_cm_recv_dreq,
217 * Convert connection rejection reason to return status code
219 * @v reason Rejection reason (in network byte order)
220 * @ret rc Return status code
222 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
224 case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
226 case htons ( IB_CM_REJECT_STALE_CONN ) :
228 case htons ( IB_CM_REJECT_CONSUMER ) :
236 * Handle connection request transaction completion
238 * @v ibdev Infiniband device
239 * @v mi Management interface
240 * @v madx Management transaction
242 * @v mad Received MAD (or NULL on error)
243 * @v av Source address vector (or NULL on error)
245 static void ib_cm_req_complete ( struct ib_device *ibdev,
246 struct ib_mad_interface *mi,
247 struct ib_mad_transaction *madx,
248 int rc, union ib_mad *mad,
249 struct ib_address_vector *av ) {
250 struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
251 struct ib_queue_pair *qp = conn->qp;
252 struct ib_cm_common *common = &mad->cm.cm_data.common;
253 struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
254 struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
255 void *private_data = NULL;
256 size_t private_data_len = 0;
258 /* Report failures */
259 if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
262 DBGC ( conn, "CM %p connection request failed: %s\n",
263 conn, strerror ( rc ) );
267 /* Record remote communication ID */
268 conn->remote_id = ntohl ( common->local_id );
270 /* Handle response */
271 switch ( mad->hdr.attr_id ) {
273 case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
275 qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
276 qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
277 private_data = &rep->private_data;
278 private_data_len = sizeof ( rep->private_data );
279 DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
280 conn, qp->av.qpn, qp->send.psn );
282 /* Modify queue pair */
283 if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
284 DBGC ( conn, "CM %p could not modify queue pair: %s\n",
285 conn, strerror ( rc ) );
289 /* Send "ready to use" reply */
290 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
291 conn->remote_id ) ) != 0 ) {
292 /* Treat as non-fatal */
297 case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
299 DBGC ( conn, "CM %p connection rejected (reason %d)\n",
300 conn, ntohs ( rej->reason ) );
301 /* Private data is valid only for a Consumer Reject */
302 if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
303 private_data = &rej->private_data;
304 private_data_len = sizeof ( rej->private_data );
306 rc = ib_cm_rejection_reason_to_rc ( rej->reason );
310 DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
311 conn, ntohs ( mad->hdr.attr_id ) );
317 /* Destroy the completed transaction */
318 ib_destroy_madx ( ibdev, ibdev->gsi, madx );
321 /* Hand off to the upper completion handler */
322 conn->op->changed ( ibdev, qp, conn, rc, private_data,
326 /** Connection request operations */
327 static struct ib_mad_transaction_operations ib_cm_req_op = {
328 .complete = ib_cm_req_complete,
332 * Handle connection path transaction completion
334 * @v ibdev Infiniband device
337 * @v av Address vector, or NULL on error
339 static void ib_cm_path_complete ( struct ib_device *ibdev,
340 struct ib_path *path, int rc,
341 struct ib_address_vector *av ) {
342 struct ib_connection *conn = ib_path_get_ownerdata ( path );
343 struct ib_queue_pair *qp = conn->qp;
345 struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
346 size_t private_data_len;
348 /* Report failures */
350 DBGC ( conn, "CM %p path lookup failed: %s\n",
351 conn, strerror ( rc ) );
352 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
356 /* Update queue pair peer path */
357 memcpy ( &qp->av, av, sizeof ( qp->av ) );
359 /* Construct connection request */
360 memset ( &mad, 0, sizeof ( mad ) );
361 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
362 mad.hdr.class_version = IB_CM_CLASS_VERSION;
363 mad.hdr.method = IB_MGMT_METHOD_SEND;
364 mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
365 req->local_id = htonl ( conn->local_id );
366 memcpy ( &req->service_id, &conn->service_id,
367 sizeof ( req->service_id ) );
368 memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
369 req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
370 req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
371 req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
372 htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
374 req->starting_psn__local_timeout__retry_count =
375 htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
377 req->pkey = htons ( ibdev->pkey );
378 req->payload_mtu__rdc_exists__rnr_retry =
379 ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
380 req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
381 req->primary.local_lid = htons ( ibdev->lid );
382 req->primary.remote_lid = htons ( conn->qp->av.lid );
383 memcpy ( &req->primary.local_gid, &ibdev->gid,
384 sizeof ( req->primary.local_gid ) );
385 memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
386 sizeof ( req->primary.remote_gid ) );
387 req->primary.flow_label__rate =
388 htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
389 req->primary.hop_limit = 0;
390 req->primary.sl__subnet_local =
391 ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
392 req->primary.local_ack_timeout = ( 0x13 << 3 );
393 private_data_len = conn->private_data_len;
394 if ( private_data_len > sizeof ( req->private_data ) )
395 private_data_len = sizeof ( req->private_data );
396 memcpy ( &req->private_data, &conn->private_data, private_data_len );
398 /* Create connection request */
399 av->qpn = IB_QPN_GSI;
400 av->qkey = IB_QKEY_GSI;
401 conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
403 if ( ! conn->madx ) {
404 DBGC ( conn, "CM %p could not create connection request\n",
406 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
409 ib_madx_set_ownerdata ( conn->madx, conn );
412 /* Destroy the completed transaction */
413 ib_destroy_path ( ibdev, path );
417 /** Connection path operations */
418 static struct ib_path_operations ib_cm_path_op = {
419 .complete = ib_cm_path_complete,
423 * Create connection to remote QP
425 * @v ibdev Infiniband device
428 * @v service_id Target service ID
429 * @v private_data Connection request private data
430 * @v private_data_len Length of connection request private data
431 * @v op Connection operations
432 * @ret conn Connection
434 struct ib_connection *
435 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
436 union ib_gid *dgid, union ib_guid *service_id,
437 void *private_data, size_t private_data_len,
438 struct ib_connection_operations *op ) {
439 struct ib_connection *conn;
441 /* Allocate and initialise request */
442 conn = zalloc ( sizeof ( *conn ) + private_data_len );
447 memset ( &qp->av, 0, sizeof ( qp->av ) );
448 qp->av.gid_present = 1;
449 memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
450 conn->local_id = random();
451 memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
453 conn->private_data_len = private_data_len;
454 memcpy ( &conn->private_data, private_data, private_data_len );
457 conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
459 goto err_create_path;
460 ib_path_set_ownerdata ( conn->path, conn );
462 /* Add to list of connections */
463 list_add ( &conn->list, &ib_cm_conns );
465 DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
466 conn, ibdev, qp->qpn );
467 DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n",
468 conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
472 ib_destroy_path ( ibdev, conn->path );
480 * Destroy connection to remote QP
482 * @v ibdev Infiniband device
486 void ib_destroy_conn ( struct ib_device *ibdev,
487 struct ib_queue_pair *qp __unused,
488 struct ib_connection *conn ) {
490 list_del ( &conn->list );
492 ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
494 ib_destroy_path ( ibdev, conn->path );