2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 FILE_LICENCE ( GPL2_OR_LATER );
29 #include <ipxe/errortab.h>
30 #include <ipxe/malloc.h>
31 #include <ipxe/if_arp.h>
32 #include <ipxe/if_ether.h>
33 #include <ipxe/ethernet.h>
34 #include <ipxe/iobuf.h>
35 #include <ipxe/netdevice.h>
36 #include <ipxe/infiniband.h>
37 #include <ipxe/ib_pathrec.h>
38 #include <ipxe/ib_mcast.h>
39 #include <ipxe/retry.h>
40 #include <ipxe/ipoib.h>
47 /** Number of IPoIB send work queue entries */
48 #define IPOIB_NUM_SEND_WQES 2
50 /** Number of IPoIB receive work queue entries */
51 #define IPOIB_NUM_RECV_WQES 4
53 /** Number of IPoIB completion entries */
54 #define IPOIB_NUM_CQES 8
56 /** An IPoIB device */
59 struct net_device *netdev;
60 /** Underlying Infiniband device */
61 struct ib_device *ibdev;
62 /** Completion queue */
63 struct ib_completion_queue *cq;
65 struct ib_queue_pair *qp;
69 struct ipoib_mac broadcast;
70 /** Joined to IPv4 broadcast multicast group
72 * This flag indicates whether or not we have initiated the
73 * join to the IPv4 broadcast multicast group.
76 /** IPv4 broadcast multicast group membership */
77 struct ib_mc_membership broadcast_membership;
79 struct list_head peers;
82 /** Broadcast IPoIB address */
83 static struct ipoib_mac ipoib_broadcast = {
84 .flags__qpn = htonl ( IB_QPN_BROADCAST ),
85 .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
86 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
89 /** Link status for "broadcast join in progress" */
90 #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
91 #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
92 ( EINFO_EINPROGRESS, 0x01, "Joining" )
94 /** Human-readable message for the link status */
95 struct errortab ipoib_errors[] __errortab = {
96 __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
99 /****************************************************************************
103 ****************************************************************************
106 /** An IPoIB REMAC cache entry */
108 /** List of REMAC cache entries */
109 struct list_head list;
110 /** Remote Ethermet MAC */
111 struct ipoib_remac remac;
113 struct ipoib_mac mac;
117 * Find IPoIB MAC from REMAC
119 * @v ipoib IPoIB device
120 * @v remac Remote Ethernet MAC
121 * @ret mac IPoIB MAC (or NULL if not found)
123 static struct ipoib_mac * ipoib_find_remac ( struct ipoib_device *ipoib,
124 const struct ipoib_remac *remac ) {
125 struct ipoib_peer *peer;
127 /* Check for broadcast REMAC */
128 if ( is_broadcast_ether_addr ( remac ) )
129 return &ipoib->broadcast;
131 /* Try to find via REMAC cache */
132 list_for_each_entry ( peer, &ipoib->peers, list ) {
133 if ( memcmp ( remac, &peer->remac,
134 sizeof ( peer->remac ) ) == 0 ) {
135 /* Move peer to start of list */
136 list_del ( &peer->list );
137 list_add ( &peer->list, &ipoib->peers );
142 DBGC ( ipoib, "IPoIB %p unknown REMAC %s\n",
143 ipoib, eth_ntoa ( remac ) );
148 * Add IPoIB MAC to REMAC cache
150 * @v ipoib IPoIB device
151 * @v remac Remote Ethernet MAC
153 * @ret rc Return status code
155 static int ipoib_map_remac ( struct ipoib_device *ipoib,
156 const struct ipoib_remac *remac,
157 const struct ipoib_mac *mac ) {
158 struct ipoib_peer *peer;
160 /* Check for existing entry in REMAC cache */
161 list_for_each_entry ( peer, &ipoib->peers, list ) {
162 if ( memcmp ( remac, &peer->remac,
163 sizeof ( peer->remac ) ) == 0 ) {
164 /* Move peer to start of list */
165 list_del ( &peer->list );
166 list_add ( &peer->list, &ipoib->peers );
168 memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
173 /* Create new entry */
174 peer = malloc ( sizeof ( *peer ) );
177 memcpy ( &peer->remac, remac, sizeof ( peer->remac ) );
178 memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
179 list_add ( &peer->list, &ipoib->peers );
187 * @v ipoib IPoIB device
189 static void ipoib_flush_remac ( struct ipoib_device *ipoib ) {
190 struct ipoib_peer *peer;
191 struct ipoib_peer *tmp;
193 list_for_each_entry_safe ( peer, tmp, &ipoib->peers, list ) {
194 list_del ( &peer->list );
200 * Discard some entries from the REMAC cache
202 * @ret discarded Number of cached items discarded
204 static unsigned int ipoib_discard_remac ( void ) {
205 struct ib_device *ibdev;
206 struct ipoib_device *ipoib;
207 struct ipoib_peer *peer;
208 unsigned int discarded = 0;
210 /* Try to discard one cache entry for each IPoIB device */
211 for_each_ibdev ( ibdev ) {
212 ipoib = ib_get_ownerdata ( ibdev );
213 list_for_each_entry_reverse ( peer, &ipoib->peers, list ) {
214 list_del ( &peer->list );
224 /** IPoIB cache discarder */
225 struct cache_discarder ipoib_discarder __cache_discarder ( CACHE_NORMAL ) = {
226 .discard = ipoib_discard_remac,
229 /****************************************************************************
233 ****************************************************************************
237 * Initialise IPoIB link-layer address
239 * @v hw_addr Hardware address
240 * @v ll_addr Link-layer address
242 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
243 const uint8_t *guid = hw_addr;
244 uint8_t *eth_addr = ll_addr;
245 uint8_t guid_mask = IPOIB_GUID_MASK;
248 /* Extract bytes from GUID according to mask */
249 for ( i = 0 ; i < 8 ; i++, guid++, guid_mask <<= 1 ) {
250 if ( guid_mask & 0x80 )
251 *(eth_addr++) = *guid;
255 /** IPoIB protocol */
256 struct ll_protocol ipoib_protocol __ll_protocol = {
258 .ll_proto = htons ( ARPHRD_ETHER ),
259 .hw_addr_len = sizeof ( union ib_guid ),
260 .ll_addr_len = ETH_ALEN,
261 .ll_header_len = ETH_HLEN,
264 .init_addr = ipoib_init_addr,
266 .mc_hash = eth_mc_hash,
267 .eth_addr = eth_eth_addr,
269 .flags = LL_NAME_ONLY,
273 * Allocate IPoIB device
275 * @v priv_size Size of driver private data
276 * @ret netdev Network device, or NULL
278 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
279 struct net_device *netdev;
281 netdev = alloc_netdev ( priv_size );
283 netdev->ll_protocol = &ipoib_protocol;
284 netdev->ll_broadcast = eth_broadcast;
285 netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
290 /****************************************************************************
292 * IPoIB translation layer
294 ****************************************************************************
298 * Translate transmitted ARP packet
300 * @v netdev Network device
301 * @v iobuf Packet to be transmitted (with no link-layer headers)
302 * @ret rc Return status code
304 static int ipoib_translate_tx_arp ( struct net_device *netdev,
305 struct io_buffer *iobuf ) {
306 struct ipoib_device *ipoib = netdev->priv;
307 struct arphdr *arphdr = iobuf->data;
308 struct ipoib_mac *target_ha = NULL;
312 /* Do nothing unless ARP contains eIPoIB link-layer addresses */
313 if ( arphdr->ar_hln != ETH_ALEN )
316 /* Fail unless we have room to expand packet */
317 if ( iob_tailroom ( iobuf ) < ( 2 * ( sizeof ( ipoib->mac ) -
319 DBGC ( ipoib, "IPoIB %p insufficient space in TX ARP\n",
324 /* Look up REMAC, if applicable */
325 if ( arphdr->ar_op == ARPOP_REPLY ) {
326 target_ha = ipoib_find_remac ( ipoib, arp_target_pa ( arphdr ));
331 /* Construct new packet */
332 iob_put ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
333 sender_pa = arp_sender_pa ( arphdr );
334 target_pa = arp_target_pa ( arphdr );
335 arphdr->ar_hrd = htons ( ARPHRD_INFINIBAND );
336 arphdr->ar_hln = sizeof ( ipoib->mac );
337 memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
338 memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
339 memcpy ( arp_sender_ha ( arphdr ), &ipoib->mac, sizeof ( ipoib->mac ) );
340 memset ( arp_target_ha ( arphdr ), 0, sizeof ( ipoib->mac ) );
342 memcpy ( arp_target_ha ( arphdr ), target_ha,
343 sizeof ( *target_ha ) );
350 * Translate transmitted packet
352 * @v netdev Network device
353 * @v iobuf Packet to be transmitted (with no link-layer headers)
354 * @v net_proto Network-layer protocol (in network byte order)
355 * @ret rc Return status code
357 static int ipoib_translate_tx ( struct net_device *netdev,
358 struct io_buffer *iobuf, uint16_t net_proto ) {
360 switch ( net_proto ) {
361 case htons ( ETH_P_ARP ) :
362 return ipoib_translate_tx_arp ( netdev, iobuf );
363 case htons ( ETH_P_IP ) :
364 /* No translation needed */
367 /* Cannot handle other traffic via eIPoIB */
373 * Translate received ARP packet
375 * @v netdev Network device
376 * @v iobuf Received packet (with no link-layer headers)
377 * @v remac Constructed Remote Ethernet MAC
378 * @ret rc Return status code
380 static int ipoib_translate_rx_arp ( struct net_device *netdev,
381 struct io_buffer *iobuf,
382 struct ipoib_remac *remac ) {
383 struct ipoib_device *ipoib = netdev->priv;
384 struct arphdr *arphdr = iobuf->data;
389 /* Do nothing unless ARP contains IPoIB link-layer addresses */
390 if ( arphdr->ar_hln != sizeof ( ipoib->mac ) )
393 /* Create REMAC cache entry */
394 if ( ( rc = ipoib_map_remac ( ipoib, remac,
395 arp_sender_ha ( arphdr ) ) ) != 0 ) {
396 DBGC ( ipoib, "IPoIB %p could not map REMAC: %s\n",
397 ipoib, strerror ( rc ) );
401 /* Construct new packet */
402 sender_pa = arp_sender_pa ( arphdr );
403 target_pa = arp_target_pa ( arphdr );
404 arphdr->ar_hrd = htons ( ARPHRD_ETHER );
405 arphdr->ar_hln = ETH_ALEN;
406 memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
407 memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
408 memcpy ( arp_sender_ha ( arphdr ), remac, ETH_ALEN );
409 memset ( arp_target_ha ( arphdr ), 0, ETH_ALEN );
410 if ( arphdr->ar_op == ARPOP_REPLY ) {
411 /* Assume received replies were directed to us */
412 memcpy ( arp_target_ha ( arphdr ), netdev->ll_addr, ETH_ALEN );
414 iob_unput ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
420 * Translate received packet
422 * @v netdev Network device
423 * @v iobuf Received packet (with no link-layer headers)
424 * @v remac Constructed Remote Ethernet MAC
425 * @v net_proto Network-layer protocol (in network byte order)
426 * @ret rc Return status code
428 static int ipoib_translate_rx ( struct net_device *netdev,
429 struct io_buffer *iobuf,
430 struct ipoib_remac *remac,
431 uint16_t net_proto ) {
433 switch ( net_proto ) {
434 case htons ( ETH_P_ARP ) :
435 return ipoib_translate_rx_arp ( netdev, iobuf, remac );
436 case htons ( ETH_P_IP ) :
437 /* No translation needed */
440 /* Cannot handle other traffic via eIPoIB */
445 /****************************************************************************
447 * IPoIB network device
449 ****************************************************************************
453 * Transmit packet via IPoIB network device
455 * @v netdev Network device
456 * @v iobuf I/O buffer
457 * @ret rc Return status code
459 static int ipoib_transmit ( struct net_device *netdev,
460 struct io_buffer *iobuf ) {
461 struct ipoib_device *ipoib = netdev->priv;
462 struct ib_device *ibdev = ipoib->ibdev;
463 struct ethhdr *ethhdr;
464 struct ipoib_hdr *ipoib_hdr;
465 struct ipoib_mac *mac;
466 struct ib_address_vector dest;
471 if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) {
472 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
476 /* Attempting transmission while link is down will put the
477 * queue pair into an error state, so don't try it.
479 if ( ! ib_link_ok ( ibdev ) )
482 /* Strip eIPoIB header */
483 ethhdr = iobuf->data;
484 net_proto = ethhdr->h_protocol;
485 iob_pull ( iobuf, sizeof ( *ethhdr ) );
487 /* Identify destination address */
488 mac = ipoib_find_remac ( ipoib, ( ( void *) ethhdr->h_dest ) );
492 /* Translate packet if applicable */
493 if ( ( rc = ipoib_translate_tx ( netdev, iobuf, net_proto ) ) != 0 )
496 /* Prepend real IPoIB header */
497 ipoib_hdr = iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
498 ipoib_hdr->proto = net_proto;
499 ipoib_hdr->reserved = 0;
501 /* Construct address vector */
502 memset ( &dest, 0, sizeof ( dest ) );
503 dest.qpn = ( ntohl ( mac->flags__qpn ) & IB_QPN_MASK );
504 dest.gid_present = 1;
505 memcpy ( &dest.gid, &mac->gid, sizeof ( dest.gid ) );
506 if ( ( rc = ib_resolve_path ( ibdev, &dest ) ) != 0 ) {
507 /* Path not resolved yet */
511 return ib_post_send ( ibdev, ipoib->qp, &dest, iobuf );
515 * Handle IPoIB send completion
517 * @v ibdev Infiniband device
519 * @v iobuf I/O buffer
520 * @v rc Completion status code
522 static void ipoib_complete_send ( struct ib_device *ibdev __unused,
523 struct ib_queue_pair *qp,
524 struct io_buffer *iobuf, int rc ) {
525 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
527 netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
531 * Handle IPoIB receive completion
533 * @v ibdev Infiniband device
535 * @v dest Destination address vector, or NULL
536 * @v source Source address vector, or NULL
537 * @v iobuf I/O buffer
538 * @v rc Completion status code
540 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
541 struct ib_queue_pair *qp,
542 struct ib_address_vector *dest,
543 struct ib_address_vector *source,
544 struct io_buffer *iobuf, int rc ) {
545 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
546 struct net_device *netdev = ipoib->netdev;
547 struct ipoib_hdr *ipoib_hdr;
548 struct ethhdr *ethhdr;
549 struct ipoib_remac remac;
554 netdev_rx_err ( netdev, iobuf, rc );
559 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
560 DBGC ( ipoib, "IPoIB %p received packet too short to "
561 "contain IPoIB header\n", ipoib );
562 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
563 netdev_rx_err ( netdev, iobuf, -EIO );
567 DBGC ( ipoib, "IPoIB %p received packet without address "
569 netdev_rx_err ( netdev, iobuf, -ENOTTY );
573 /* Strip real IPoIB header */
574 ipoib_hdr = iobuf->data;
575 net_proto = ipoib_hdr->proto;
576 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
578 /* Construct source address from remote QPN and LID */
579 remac.qpn = htonl ( source->qpn | EIPOIB_QPN_LA );
580 remac.lid = htons ( source->lid );
582 /* Translate packet if applicable */
583 if ( ( rc = ipoib_translate_rx ( netdev, iobuf, &remac,
584 net_proto ) ) != 0 ) {
585 netdev_rx_err ( netdev, iobuf, rc );
589 /* Prepend eIPoIB header */
590 ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) );
591 memcpy ( ðhdr->h_source, &remac, sizeof ( ethhdr->h_source ) );
592 ethhdr->h_protocol = net_proto;
594 /* Construct destination address */
595 if ( dest->gid_present && ( memcmp ( &dest->gid, &ipoib->broadcast.gid,
596 sizeof ( dest->gid ) ) == 0 ) ) {
597 /* Broadcast GID; use the Ethernet broadcast address */
598 memcpy ( ðhdr->h_dest, eth_broadcast,
599 sizeof ( ethhdr->h_dest ) );
601 /* Assume destination address is local Ethernet MAC */
602 memcpy ( ðhdr->h_dest, netdev->ll_addr,
603 sizeof ( ethhdr->h_dest ) );
606 /* Hand off to network layer */
607 netdev_rx ( netdev, iobuf );
610 /** IPoIB completion operations */
611 static struct ib_completion_queue_operations ipoib_cq_op = {
612 .complete_send = ipoib_complete_send,
613 .complete_recv = ipoib_complete_recv,
617 * Allocate IPoIB receive I/O buffer
619 * @v len Length of buffer
620 * @ret iobuf I/O buffer, or NULL
622 * Some Infiniband hardware requires 2kB alignment of receive buffers
623 * and provides no way to disable header separation. The result is
624 * that there are only four bytes of link-layer header (the real IPoIB
625 * header) before the payload. This is not sufficient space to insert
626 * an eIPoIB link-layer pseudo-header.
628 * We therefore allocate I/O buffers offset to start slightly before
629 * the natural alignment boundary, in order to allow sufficient space.
631 static struct io_buffer * ipoib_alloc_iob ( size_t len ) {
632 struct io_buffer *iobuf;
635 /* Calculate additional length required at start of buffer */
636 reserve_len = ( sizeof ( struct ethhdr ) -
637 sizeof ( struct ipoib_hdr ) );
639 /* Allocate buffer */
640 iobuf = alloc_iob_raw ( ( len + reserve_len ), len, -reserve_len );
642 iob_reserve ( iobuf, reserve_len );
647 /** IPoIB queue pair operations */
648 static struct ib_queue_pair_operations ipoib_qp_op = {
649 .alloc_iob = ipoib_alloc_iob,
653 * Poll IPoIB network device
655 * @v netdev Network device
657 static void ipoib_poll ( struct net_device *netdev ) {
658 struct ipoib_device *ipoib = netdev->priv;
659 struct ib_device *ibdev = ipoib->ibdev;
661 /* Poll Infiniband device */
662 ib_poll_eq ( ibdev );
664 /* Poll the retry timers (required for IPoIB multicast join) */
669 * Handle IPv4 broadcast multicast group join completion
671 * @v ibdev Infiniband device
673 * @v membership Multicast group membership
675 * @v mad Response MAD (or NULL on error)
677 void ipoib_join_complete ( struct ib_device *ibdev __unused,
678 struct ib_queue_pair *qp __unused,
679 struct ib_mc_membership *membership, int rc,
680 union ib_mad *mad __unused ) {
681 struct ipoib_device *ipoib = container_of ( membership,
682 struct ipoib_device, broadcast_membership );
684 /* Record join status as link status */
685 netdev_link_err ( ipoib->netdev, rc );
689 * Join IPv4 broadcast multicast group
691 * @v ipoib IPoIB device
692 * @ret rc Return status code
694 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
697 if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
698 &ipoib->broadcast_membership,
699 &ipoib->broadcast.gid,
700 ipoib_join_complete ) ) != 0 ) {
701 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
702 ipoib, strerror ( rc ) );
705 ipoib->broadcast_joined = 1;
711 * Leave IPv4 broadcast multicast group
713 * @v ipoib IPoIB device
715 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
717 if ( ipoib->broadcast_joined ) {
718 ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
719 &ipoib->broadcast_membership );
720 ipoib->broadcast_joined = 0;
725 * Handle link status change
727 * @v ibdev Infiniband device
729 static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
730 struct net_device *netdev = ib_get_ownerdata ( ibdev );
731 struct ipoib_device *ipoib = netdev->priv;
734 /* Leave existing broadcast group */
735 ipoib_leave_broadcast_group ( ipoib );
737 /* Update MAC address based on potentially-new GID prefix */
738 memcpy ( &ipoib->mac.gid.s.prefix, &ibdev->gid.s.prefix,
739 sizeof ( ipoib->mac.gid.s.prefix ) );
741 /* Update broadcast GID based on potentially-new partition key */
742 ipoib->broadcast.gid.words[2] =
743 htons ( ibdev->pkey | IB_PKEY_FULL );
745 /* Set net device link state to reflect Infiniband link state */
746 rc = ib_link_rc ( ibdev );
747 netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
749 /* Join new broadcast group */
750 if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) &&
751 ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
752 DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
753 "%s\n", ipoib, strerror ( rc ) );
754 netdev_link_err ( netdev, rc );
760 * Open IPoIB network device
762 * @v netdev Network device
763 * @ret rc Return status code
765 static int ipoib_open ( struct net_device *netdev ) {
766 struct ipoib_device *ipoib = netdev->priv;
767 struct ib_device *ibdev = ipoib->ibdev;
771 if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
772 DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
773 ipoib, strerror ( rc ) );
777 /* Allocate completion queue */
778 ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
780 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
786 /* Allocate queue pair */
787 ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD, IPOIB_NUM_SEND_WQES,
788 ipoib->cq, IPOIB_NUM_RECV_WQES, ipoib->cq,
791 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
796 ib_qp_set_ownerdata ( ipoib->qp, ipoib );
798 /* Update MAC address with QPN */
799 ipoib->mac.flags__qpn = htonl ( ipoib->qp->qpn );
801 /* Fill receive rings */
802 ib_refill_recv ( ibdev, ipoib->qp );
804 /* Fake a link status change to join the broadcast group */
805 ipoib_link_state_changed ( ibdev );
809 ib_destroy_qp ( ibdev, ipoib->qp );
811 ib_destroy_cq ( ibdev, ipoib->cq );
819 * Close IPoIB network device
821 * @v netdev Network device
823 static void ipoib_close ( struct net_device *netdev ) {
824 struct ipoib_device *ipoib = netdev->priv;
825 struct ib_device *ibdev = ipoib->ibdev;
827 /* Flush REMAC cache */
828 ipoib_flush_remac ( ipoib );
830 /* Leave broadcast group */
831 ipoib_leave_broadcast_group ( ipoib );
833 /* Remove QPN from MAC address */
834 ipoib->mac.flags__qpn = 0;
836 /* Tear down the queues */
837 ib_destroy_qp ( ibdev, ipoib->qp );
838 ib_destroy_cq ( ibdev, ipoib->cq );
840 /* Close IB device */
844 /** IPoIB network device operations */
845 static struct net_device_operations ipoib_operations = {
847 .close = ipoib_close,
848 .transmit = ipoib_transmit,
855 * @v ibdev Infiniband device
856 * @ret rc Return status code
858 static int ipoib_probe ( struct ib_device *ibdev ) {
859 struct net_device *netdev;
860 struct ipoib_device *ipoib;
863 /* Allocate network device */
864 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
867 netdev_init ( netdev, &ipoib_operations );
868 ipoib = netdev->priv;
869 ib_set_ownerdata ( ibdev, netdev );
870 netdev->dev = ibdev->dev;
871 memset ( ipoib, 0, sizeof ( *ipoib ) );
872 ipoib->netdev = netdev;
873 ipoib->ibdev = ibdev;
874 INIT_LIST_HEAD ( &ipoib->peers );
876 /* Extract hardware address */
877 memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
878 sizeof ( ibdev->gid.s.guid ) );
880 /* Set local MAC address */
881 memcpy ( &ipoib->mac.gid.s.guid, &ibdev->gid.s.guid,
882 sizeof ( ipoib->mac.gid.s.guid ) );
884 /* Set default broadcast MAC address */
885 memcpy ( &ipoib->broadcast, &ipoib_broadcast,
886 sizeof ( ipoib->broadcast ) );
888 /* Register network device */
889 if ( ( rc = register_netdev ( netdev ) ) != 0 )
890 goto err_register_netdev;
895 netdev_nullify ( netdev );
896 netdev_put ( netdev );
901 * Remove IPoIB device
903 * @v ibdev Infiniband device
905 static void ipoib_remove ( struct ib_device *ibdev ) {
906 struct net_device *netdev = ib_get_ownerdata ( ibdev );
908 unregister_netdev ( netdev );
909 netdev_nullify ( netdev );
910 netdev_put ( netdev );
914 struct ib_driver ipoib_driver __ib_driver = {
916 .probe = ipoib_probe,
917 .notify = ipoib_link_state_changed,
918 .remove = ipoib_remove,