1 /************************************************* -*- linux-c -*-
2 * Myricom 10Gb Network Interface Card Software
3 * Copyright 2009, Myricom, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 ****************************************************************/
20 FILE_LICENCE ( GPL2_ONLY );
23 * Author: Glenn Brown <glenn@myri.com>
27 * General Theory of Operation
29 * This is a minimal Myricom 10 gigabit Ethernet driver for network
34 * myri10ge_pci_probe() is called by iPXE during initialization.
35 * Minimal NIC initialization is performed to minimize resources
36 * consumed when the driver is resident but unused.
40 * myri10ge_net_open() is called by iPXE before attempting to network
41 * boot from the card. Packet buffers are allocated and the NIC
42 * interface is initialized.
46 * myri10ge_net_transmit() enqueues frames for transmission by writing
47 * discriptors to the NIC's tx ring. For simplicity and to avoid
48 * copies, we always have the NIC DMA up the packet. The sent I/O
49 * buffer is released once the NIC signals myri10ge_interrupt_handler()
50 * that the send has completed.
54 * Receives are posted to the NIC's receive ring. The NIC fills a
55 * DMAable receive_completion ring with completion notifications.
56 * myri10ge_net_poll() polls for these receive notifications, posts
57 * replacement receive buffers to the NIC, and passes received frames
62 * This driver supports NonVolatile Storage (nvs) in the NIC EEPROM.
63 * If the last EEPROM block is not otherwise filled, we tell
64 * iPXE it may store NonVolatile Options (nvo) there.
69 * - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
70 * TX overflow, corrupted packets, ...
71 * - DBG2() is for successful events, like packet received,
72 * packet transmitted, and other general notifications.
73 * - DBGP() prints the name of each called function on entry
80 #include <ipxe/ethernet.h>
81 #include <ipxe/if_ether.h>
82 #include <ipxe/iobuf.h>
83 #include <ipxe/malloc.h>
84 #include <ipxe/netdevice.h>
88 #include <ipxe/timer.h>
90 #include "myri10ge_mcp.h"
92 /****************************************************************
93 * Forward declarations
94 ****************************************************************/
96 /* PCI driver entry points */
98 static int myri10ge_pci_probe ( struct pci_device* );
99 static void myri10ge_pci_remove ( struct pci_device* );
101 /* Network device operations */
103 static void myri10ge_net_close ( struct net_device* );
104 static void myri10ge_net_irq ( struct net_device*, int enable );
105 static int myri10ge_net_open ( struct net_device* );
106 static void myri10ge_net_poll ( struct net_device* );
107 static int myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
109 /****************************************************************
111 ****************************************************************/
113 /* Maximum ring indices, used to wrap ring indices. These must be 2**N-1. */
115 #define MYRI10GE_TRANSMIT_WRAP 1U
116 #define MYRI10GE_RECEIVE_WRAP 7U
117 #define MYRI10GE_RECEIVE_COMPLETION_WRAP 31U
119 /****************************************************************
120 * Driver internal data types.
121 ****************************************************************/
123 /* Structure holding all DMA buffers for a NIC, which we will
124 allocated as contiguous read/write DMAable memory when the NIC is
127 struct myri10ge_dma_buffers
129 /* The NIC DMAs receive completion notifications into this ring */
131 mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
133 /* Interrupt details are DMAd here before interrupting. */
135 mcp_irq_data_t irq_data; /* 64B */
137 /* NIC command completion status is DMAd here. */
139 mcp_cmd_response_t command_response; /* 8B */
142 struct myri10ge_private
144 /* Interrupt support */
146 uint32 *irq_claim; /* in NIC SRAM */
147 uint32 *irq_deassert; /* in NIC SRAM */
151 struct myri10ge_dma_buffers *dma;
156 * The counts here are uint32 for easy comparison with
157 * priv->dma->irq_data.send_done_count and with each other.
160 mcp_kreq_ether_send_t *transmit_ring; /* in NIC SRAM */
161 uint32 transmit_ring_wrap;
162 uint32 transmits_posted;
163 uint32 transmits_done;
164 struct io_buffer *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
170 mcp_kreq_ether_recv_t *receive_post_ring; /* in NIC SRAM */
171 unsigned int receive_post_ring_wrap;
172 unsigned int receives_posted;
173 unsigned int receives_done;
174 struct io_buffer *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
176 /* Address for writing commands to the firmware.
177 BEWARE: the value must be written 32 bits at a time. */
182 * Nonvolatile Storage for configuration options.
185 struct nvs_device nvs;
186 struct nvo_block nvo;
187 unsigned int nvo_registered;
189 /* Cached PCI capability locations. */
194 /****************************************************************
195 * Driver internal functions.
196 ****************************************************************/
198 /* Print ring status when debugging. Use this only after a printed
201 #define DBG2_RINGS( priv ) \
202 DBG2 ( "tx %x/%x rx %x/%x in %s() \n", \
203 ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
204 ( priv ) ->receives_done, ( priv ) -> receives_posted, \
208 * Return a pointer to the driver private data for a network device.
210 * @v netdev Network device created by this driver.
211 * @ret priv The corresponding driver private data.
213 static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
215 /* Our private data always follows the network device in memory,
216 since we use alloc_netdev() to allocate the storage. */
218 return ( struct myri10ge_private * ) ( nd + 1 );
222 * Convert a Myri10ge driver private data pointer to a netdev pointer.
224 * @v p Myri10ge device private data.
225 * @ret r The corresponding network device.
227 static inline struct net_device *myri10ge_netdev ( struct myri10ge_private *p )
229 return ( ( struct net_device * ) p ) - 1;
233 * Convert a network device pointer to a PCI device pointer.
235 * @v netdev A Network Device.
236 * @ret r The corresponding PCI device.
238 static inline struct pci_device *myri10ge_pcidev ( struct net_device *netdev )
240 return container_of (netdev->dev, struct pci_device, dev);
244 * Pass a receive buffer to the NIC to be filled.
246 * @v priv The network device to receive the buffer.
247 * @v iob The I/O buffer to fill.
249 * Receive buffers are filled in FIFO order.
251 static void myri10ge_post_receive ( struct myri10ge_private *priv,
252 struct io_buffer *iob )
254 unsigned int receives_posted;
255 mcp_kreq_ether_recv_t *request;
257 /* Record the posted I/O buffer, to be passed to netdev_rx() on
260 receives_posted = priv->receives_posted;
261 priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
263 /* Post the receive. */
265 request = &priv->receive_post_ring[receives_posted
266 & priv->receive_post_ring_wrap];
267 request->addr_high = 0;
269 request->addr_low = htonl ( virt_to_bus ( iob->data ) );
270 priv->receives_posted = ++receives_posted;
274 * Execute a command on the NIC.
276 * @v priv NIC to perform the command.
277 * @v cmd The command to perform.
278 * @v data I/O copy buffer for parameters/results
279 * @ret rc 0 on success, else an error code.
281 static int myri10ge_command ( struct myri10ge_private *priv,
288 unsigned int slept_ms;
289 volatile mcp_cmd_response_t *response;
291 DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
292 command = priv->command;
293 response = &priv->dma->command_response;
295 /* Mark the command as incomplete. */
297 response->result = 0xFFFFFFFF;
299 /* Pass the command to the NIC. */
301 command->cmd = htonl ( cmd );
302 command->data0 = htonl ( data[0] );
303 command->data1 = htonl ( data[1] );
304 command->data2 = htonl ( data[2] );
305 command->response_addr.high = 0;
306 command->response_addr.low
307 = htonl ( virt_to_bus ( &priv->dma->command_response ) );
308 for ( i=0; i<9; i++ )
313 /* Wait up to 2 seconds for a response. */
315 for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
316 result = response->result;
318 data[0] = ntohl ( response->data );
320 } else if ( result != 0xFFFFFFFF ) {
321 DBG ( "cmd%d:0x%x\n",
323 ntohl ( response->result ) );
329 DBG ( "cmd%d:timed out\n", cmd );
334 * Handle any pending interrupt.
336 * @v netdev Device being polled for interrupts.
338 * This is called periodically to let the driver check for interrupts.
340 static void myri10ge_interrupt_handler ( struct net_device *netdev )
342 struct myri10ge_private *priv;
343 mcp_irq_data_t *irq_data;
346 priv = myri10ge_priv ( netdev );
347 irq_data = &priv->dma->irq_data;
349 /* Return if there was no interrupt. */
352 valid = irq_data->valid;
357 /* Tell the NIC to deassert the interrupt and clear
360 *priv->irq_deassert = 0; /* any value is OK. */
363 /* Handle any new receives. */
367 /* Pass the receive interrupt token back to the NIC. */
370 *priv->irq_claim = htonl ( 3 );
374 /* Handle any sent packet by freeing its I/O buffer, now that
375 we know it has been DMAd. */
378 unsigned int nic_done_count;
381 nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
382 while ( priv->transmits_done != nic_done_count ) {
383 struct io_buffer *iob;
385 iob = priv->transmit_iob [priv->transmits_done
386 & MYRI10GE_TRANSMIT_WRAP];
388 netdev_tx_complete ( netdev, iob );
389 ++priv->transmits_done;
393 /* Record any statistics update. */
395 if ( irq_data->stats_updated ) {
397 /* Update the link status. */
400 if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
401 netdev_link_up ( netdev );
403 netdev_link_down ( netdev );
405 /* Ignore all error counters from the NIC. */
408 /* Wait for the interrupt to be deasserted, as indicated by
409 irq_data->valid, which is set by the NIC after the deassert. */
414 } while ( irq_data->valid );
416 /* Claim the interrupt to enable future interrupt generation. */
419 * ( priv->irq_claim + 1 ) = htonl ( 3 );
423 /* Constants for reading the STRING_SPECS via the Myricom
424 Vendor Specific PCI configuration space capability. */
426 #define VS_EEPROM_READ_ADDR ( vs + 0x04 )
427 #define VS_EEPROM_READ_DATA ( vs + 0x08 )
428 #define VS_EEPROM_WRITE ( vs + 0x0C )
429 #define VS_ADDR ( vs + 0x18 )
430 #define VS_DATA ( vs + 0x14 )
431 #define VS_MODE ( vs + 0x10 )
432 #define VS_MODE_READ32 0x3
433 #define VS_MODE_LOCATE 0x8
434 #define VS_LOCATE_STRING_SPECS 0x3
435 #define VS_MODE_EEPROM_STREAM_WRITE 0xB
438 * Read MAC address from its 'string specs' via the vendor-specific
439 * capability. (This capability allows NIC SRAM and ROM to be read
440 * before it is mapped.)
443 * @v vs Offset of the PCI Vendor-Specific Capability.
444 * @v mac Buffer to store the MAC address.
445 * @ret rc Returns 0 on success, else an error code.
447 static int mac_address_from_string_specs ( struct pci_device *pci,
449 uint8 mac[ETH_ALEN] )
451 char string_specs[256];
453 char *to = string_specs;
458 /* Locate the String specs in LANai SRAM. */
460 pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
461 pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
462 pci_read_config_dword ( pci, VS_ADDR, &addr );
463 pci_read_config_dword ( pci, VS_DATA, &len );
464 DBG2 ( "ss@%x,%x\n", addr, len );
466 /* Copy in the string specs. Use 32-bit reads for performance. */
468 if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
469 pci_write_config_byte ( pci, VS_MODE, 0 );
470 DBG ( "SS too big\n" );
474 pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
478 pci_write_config_byte ( pci, VS_ADDR, addr );
479 pci_read_config_dword ( pci, VS_DATA, &tmp );
481 memcpy ( to, &tmp, 4 );
486 pci_write_config_byte ( pci, VS_MODE, 0 );
488 /* Parse the string specs. */
490 DBG2 ( "STRING_SPECS:\n" );
492 limit = string_specs + sizeof ( string_specs );
493 while ( *ptr != '\0' && ptr < limit ) {
494 DBG2 ( "%s\n", ptr );
495 if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
499 for ( i=0; i<6; i++ ) {
500 if ( ( ptr + 2 ) > limit ) {
501 DBG ( "bad MAC addr\n" );
504 mac[i] = strtoul ( ptr, &ptr, 16 );
510 while ( ptr < limit && *ptr++ );
513 /* Verify we parsed all we need. */
516 DBG ( "no MAC addr\n" );
520 DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
521 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
526 /****************************************************************
527 * NonVolatile Storage support
528 ****************************************************************/
531 * Fill a buffer with data read from nonvolatile storage.
533 * @v nvs The NonVolatile Storage device to be read.
534 * @v addr The first NonVolatile Storage address to be read.
535 * @v _buf Pointer to the data buffer to be filled.
536 * @v len The number of bytes to copy.
537 * @ret rc 0 on success, else nonzero.
539 static int myri10ge_nvs_read ( struct nvs_device *nvs,
544 struct myri10ge_private *priv =
545 container_of (nvs, struct myri10ge_private, nvs);
546 struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
547 unsigned int vs = priv->pci_cap_vs;
548 unsigned char *buf = (unsigned char *) _buf;
552 DBGP ( "myri10ge_nvs_read\n" );
554 /* Issue the first read address. */
556 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 3, addr>>16 );
557 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 2, addr>>8 );
558 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
561 /* Issue all the reads, and harvest the results every 4th issue. */
563 for ( i=0; i<len; ++i,addr++ ) {
565 /* Issue the next read address, updating only the
566 bytes that need updating. We always update the
567 LSB, which triggers the read. */
569 if ( ( addr & 0xff ) == 0 ) {
570 if ( ( addr & 0xffff ) == 0 ) {
571 pci_write_config_byte ( pci,
572 VS_EEPROM_READ_ADDR + 3,
575 pci_write_config_byte ( pci,
576 VS_EEPROM_READ_ADDR + 2,
579 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
581 /* If 4 data bytes are available, read them with a single read. */
583 if ( ( i & 3 ) == 3 ) {
584 pci_read_config_dword ( pci,
587 for ( j=0; j<4; j++ ) {
594 /* Harvest any remaining results. */
596 if ( ( i & 3 ) != 0 ) {
597 pci_read_config_dword ( pci, VS_EEPROM_READ_DATA, &data );
598 for ( j=1; j<=(i&3); j++ ) {
604 DBGP_HDA ( addr - len, _buf, len );
609 * Write a buffer into nonvolatile storage.
611 * @v nvs The NonVolatile Storage device to be written.
612 * @v address The NonVolatile Storage address to be written.
613 * @v _buf Pointer to the data to be written.
614 * @v len Length of the buffer to be written.
615 * @ret rc 0 on success, else nonzero.
617 static int myri10ge_nvs_write ( struct nvs_device *nvs,
622 struct myri10ge_private *priv =
623 container_of (nvs, struct myri10ge_private, nvs);
624 struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
625 unsigned int vs = priv->pci_cap_vs;
626 const unsigned char *buf = (const unsigned char *)_buf;
630 DBGP ( "nvs_write " );
631 DBGP_HDA ( addr, _buf, len );
633 /* Start erase of the NonVolatile Options block. */
636 pci_write_config_dword ( pci, VS_EEPROM_WRITE, ( addr << 8 ) | 0xff );
638 /* Wait for erase to complete. */
641 pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
642 while ( verify != 0xff ) {
643 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
644 pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
647 /* Write the data one byte at a time. */
650 pci_write_config_byte ( pci, VS_MODE, VS_MODE_EEPROM_STREAM_WRITE );
651 pci_write_config_dword ( pci, VS_ADDR, addr );
652 for (i=0; i<len; i++, addr++)
653 pci_write_config_byte ( pci, VS_DATA, buf[i] );
654 pci_write_config_dword ( pci, VS_ADDR, 0xffffffff );
655 pci_write_config_byte ( pci, VS_MODE, 0 );
662 * Initialize NonVolatile storage support for a device.
664 * @v priv Device private data for the device.
665 * @ret rc 0 on success, else an error code.
668 static int myri10ge_nv_init ( struct myri10ge_private *priv )
671 struct myri10ge_eeprom_header
675 uint32 eeprom_segment_len;
681 unsigned int nvo_fragment_pos;
683 DBGP ( "myri10ge_nv_init\n" );
685 /* Read the EEPROM header, and byteswap the fields we will use.
686 This is safe even though priv->nvs is not yet initialized. */
688 rc = myri10ge_nvs_read ( &priv->nvs, 0, &hdr, sizeof ( hdr ) );
690 DBG ( "EEPROM header unreadable\n" );
693 hdr.eeprom_len = ntohl ( hdr.eeprom_len );
694 hdr.eeprom_segment_len = ntohl ( hdr.eeprom_segment_len );
695 hdr.mcp2_offset = ntohl ( hdr.mcp2_offset );
696 hdr.version = ntohl ( hdr.version );
697 DBG2 ( "eelen:%xh seglen:%xh mcp2@%xh ver%d\n", hdr.eeprom_len,
698 hdr.eeprom_segment_len, hdr.mcp2_offset, hdr.version );
700 /* If the firmware does not support EEPROM writes, simply return. */
702 if ( hdr.version < 1 ) {
703 DBG ( "No EEPROM write support\n" );
707 /* Read the length of MCP2. */
709 rc = myri10ge_nvs_read ( &priv->nvs, hdr.mcp2_offset, &mcp2_len, 4 );
710 mcp2_len = ntohl ( mcp2_len );
711 DBG2 ( "mcp2len:%xh\n", mcp2_len );
713 /* Determine the position of the NonVolatile Options fragment and
714 simply return if it overlaps other data. */
716 nvo_fragment_pos = hdr.eeprom_len - hdr.eeprom_segment_len;
717 if ( hdr.mcp2_offset + mcp2_len > nvo_fragment_pos ) {
718 DBG ( "EEPROM full\n" );
722 /* Initialize NonVolatile Storage state. */
724 priv->nvs.word_len_log2 = 0;
725 priv->nvs.size = hdr.eeprom_len;
726 priv->nvs.block_size = hdr.eeprom_segment_len;
727 priv->nvs.read = myri10ge_nvs_read;
728 priv->nvs.write = myri10ge_nvs_write;
730 /* Register the NonVolatile Options storage. */
732 nvo_init ( &priv->nvo,
734 nvo_fragment_pos, 0x200,
736 & myri10ge_netdev (priv) -> refcnt );
737 rc = register_nvo ( &priv->nvo,
738 netdev_settings ( myri10ge_netdev ( priv ) ) );
740 DBG ("register_nvo failed");
744 priv->nvo_registered = 1;
745 DBG2 ( "NVO supported\n" );
750 myri10ge_nv_fini ( struct myri10ge_private *priv )
752 /* Simply return if nonvolatile access is not supported. */
754 if ( 0 == priv->nvo_registered )
757 unregister_nvo ( &priv->nvo );
760 /****************************************************************
761 * iPXE PCI Device Driver API functions
762 ****************************************************************/
765 * Initialize the PCI device.
767 * @v pci The device's associated pci_device structure.
768 * @v id The PCI device + vendor id.
769 * @ret rc Returns zero if successfully initialized.
771 * This function is called very early on, while iPXE is initializing.
772 * This is a iPXE PCI Device Driver API function.
774 static int myri10ge_pci_probe ( struct pci_device *pci )
776 static struct net_device_operations myri10ge_operations = {
777 .open = myri10ge_net_open,
778 .close = myri10ge_net_close,
779 .transmit = myri10ge_net_transmit,
780 .poll = myri10ge_net_poll,
781 .irq = myri10ge_net_irq
786 struct net_device *netdev;
787 struct myri10ge_private *priv;
789 DBGP ( "myri10ge_pci_probe: " );
791 netdev = alloc_etherdev ( sizeof ( *priv ) );
794 dbg = "alloc_etherdev";
795 goto abort_with_nothing;
798 netdev_init ( netdev, &myri10ge_operations );
799 priv = myri10ge_priv ( netdev );
801 pci_set_drvdata ( pci, netdev );
802 netdev->dev = &pci->dev;
804 /* Make sure interrupts are disabled. */
806 myri10ge_net_irq ( netdev, 0 );
808 /* Find the PCI Vendor-Specific capability. */
810 priv->pci_cap_vs = pci_find_capability ( pci , PCI_CAP_ID_VNDR );
811 if ( 0 == priv->pci_cap_vs ) {
814 goto abort_with_netdev_init;
817 /* Read the NIC HW address. */
819 rc = mac_address_from_string_specs ( pci,
824 goto abort_with_netdev_init;
828 /* Enable bus master, etc. */
830 adjust_pci_device ( pci );
833 /* Register the initialized network device. */
835 rc = register_netdev ( netdev );
837 dbg = "register_netdev";
838 goto abort_with_netdev_init;
841 /* Initialize NonVolatile Storage support. */
843 rc = myri10ge_nv_init ( priv );
845 dbg = "myri10ge_nv_init";
846 goto abort_with_registered_netdev;
853 abort_with_registered_netdev:
854 unregister_netdev ( netdev );
855 abort_with_netdev_init:
856 netdev_nullify ( netdev );
857 netdev_put ( netdev );
859 DBG ( "%s:%s\n", dbg, strerror ( rc ) );
864 * Remove a device from the PCI device list.
866 * @v pci PCI device to remove.
868 * This is a PCI Device Driver API function.
870 static void myri10ge_pci_remove ( struct pci_device *pci )
872 struct net_device *netdev;
874 DBGP ( "myri10ge_pci_remove\n" );
875 netdev = pci_get_drvdata ( pci );
877 myri10ge_nv_fini ( myri10ge_priv ( netdev ) );
878 unregister_netdev ( netdev );
879 netdev_nullify ( netdev );
880 netdev_put ( netdev );
883 /****************************************************************
884 * iPXE Network Device Driver Operations
885 ****************************************************************/
888 * Close a network device.
890 * @v netdev Device to close.
892 * This is a iPXE Network Device Driver API function.
894 static void myri10ge_net_close ( struct net_device *netdev )
896 struct myri10ge_private *priv;
899 DBGP ( "myri10ge_net_close\n" );
900 priv = myri10ge_priv ( netdev );
902 /* disable interrupts */
904 myri10ge_net_irq ( netdev, 0 );
906 /* Reset the NIC interface, so we won't get any more events from
909 myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
911 /* Free receive buffers that were never filled. */
913 while ( priv->receives_done != priv->receives_posted ) {
914 free_iob ( priv->receive_iob[priv->receives_done
915 & MYRI10GE_RECEIVE_WRAP] );
916 ++priv->receives_done;
919 /* Release DMAable memory. */
921 free_dma ( priv->dma, sizeof ( *priv->dma ) );
923 /* Erase all state from the open. */
925 memset ( priv, 0, sizeof ( *priv ) );
931 * Enable or disable IRQ masking.
933 * @v netdev Device to control.
934 * @v enable Zero to mask off IRQ, non-zero to enable IRQ.
936 * This is a iPXE Network Driver API function.
938 static void myri10ge_net_irq ( struct net_device *netdev, int enable )
940 struct pci_device *pci_dev;
943 DBGP ( "myri10ge_net_irq\n" );
944 pci_dev = ( struct pci_device * ) netdev->dev;
946 /* Adjust the Interrupt Disable bit in the Command register of the
949 pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
951 val &= ~PCI_COMMAND_INTX_DISABLE;
953 val |= PCI_COMMAND_INTX_DISABLE;
954 pci_write_config_word ( pci_dev, PCI_COMMAND, val );
958 * Opens a network device.
960 * @v netdev Device to be opened.
961 * @ret rc Non-zero if failed to open.
963 * This enables tx and rx on the device.
964 * This is a iPXE Network Device Driver API function.
966 static int myri10ge_net_open ( struct net_device *netdev )
968 const char *dbg; /* printed upon error return */
970 struct io_buffer *iob;
971 struct myri10ge_private *priv;
973 struct pci_device *pci_dev;
976 DBGP ( "myri10ge_net_open\n" );
977 priv = myri10ge_priv ( netdev );
978 pci_dev = ( struct pci_device * ) netdev->dev;
979 membase = phys_to_virt ( pci_dev->membase );
981 /* Compute address for passing commands to the firmware. */
983 priv->command = membase + MXGEFW_ETH_CMD;
985 /* Ensure interrupts are disabled. */
987 myri10ge_net_irq ( netdev, 0 );
989 /* Allocate cleared DMAable buffers. */
991 priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
995 goto abort_with_nothing;
997 memset ( priv->dma, 0, sizeof ( *priv->dma ) );
999 /* Simplify following code. */
1001 #define TRY( prefix, base, suffix ) do { \
1002 rc = myri10ge_command ( priv, \
1010 goto abort_with_dma; \
1014 /* Send a reset command to the card to see if it is alive,
1015 and to reset its queue state. */
1017 TRY ( CMD_, RESET , );
1019 /* Set the interrupt queue size. */
1021 data[0] = ( (uint32_t)( sizeof ( priv->dma->receive_completion ) )
1022 | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
1023 TRY ( CMD_SET_ , INTRQ_SIZE , );
1025 /* Set the interrupt queue DMA address. */
1027 data[0] = virt_to_bus ( &priv->dma->receive_completion );
1029 TRY ( CMD_SET_, INTRQ_DMA, );
1031 /* Get the NIC interrupt claim address. */
1033 TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
1034 priv->irq_claim = membase + data[0];
1036 /* Get the NIC interrupt assert address. */
1038 TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
1039 priv->irq_deassert = membase + data[0];
1041 /* Disable interrupt coalescing, which is inappropriate for the
1042 minimal buffering we provide. */
1044 TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
1045 * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
1047 /* Set the NIC mac address. */
1049 data[0] = ( netdev->ll_addr[0] << 24
1050 | netdev->ll_addr[1] << 16
1051 | netdev->ll_addr[2] << 8
1052 | netdev->ll_addr[3] );
1053 data[1] = ( ( netdev->ll_addr[4] << 8 )
1054 | netdev->ll_addr[5] );
1055 TRY ( SET_ , MAC_ADDRESS , );
1057 /* Enable multicast receives, because some iPXE clients don't work
1058 without multicast. . */
1060 TRY ( ENABLE_ , ALLMULTI , );
1062 /* Disable Ethernet flow control, so the NIC cannot deadlock the
1063 network under any circumstances. */
1065 TRY ( DISABLE_ , FLOW , _CONTROL );
1067 /* Compute transmit ring sizes. */
1069 data[0] = 0; /* slice 0 */
1070 TRY ( CMD_GET_, SEND_RING, _SIZE );
1071 priv->transmit_ring_wrap
1072 = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
1073 if ( priv->transmit_ring_wrap
1074 & ( priv->transmit_ring_wrap + 1 ) ) {
1077 goto abort_with_dma;
1080 /* Compute receive ring sizes. */
1082 data[0] = 0; /* slice 0 */
1083 TRY ( CMD_GET_ , RX_RING , _SIZE );
1084 priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
1085 if ( priv->receive_post_ring_wrap
1086 & ( priv->receive_post_ring_wrap + 1 ) ) {
1089 goto abort_with_dma;
1092 /* Get NIC transmit ring address. */
1094 data[0] = 0; /* slice 0. */
1095 TRY ( CMD_GET_, SEND, _OFFSET );
1096 priv->transmit_ring = membase + data[0];
1098 /* Get the NIC receive ring address. */
1100 data[0] = 0; /* slice 0. */
1101 TRY ( CMD_GET_, SMALL_RX, _OFFSET );
1102 priv->receive_post_ring = membase + data[0];
1104 /* Set the Nic MTU. */
1106 data[0] = ETH_FRAME_LEN;
1107 TRY ( CMD_SET_, MTU, );
1109 /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
1110 set both buffer sizes to the same value, which will force all
1111 received frames to use small buffers. ) */
1113 data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1114 TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
1115 data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1116 TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
1118 /* Tell firmware where to DMA IRQ data */
1120 data[0] = virt_to_bus ( &priv->dma->irq_data );
1122 data[2] = sizeof ( priv->dma->irq_data );
1123 TRY ( CMD_SET_, STATS_DMA_V2, );
1125 /* Post receives. */
1127 while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
1129 /* Reserve 2 extra bytes at the start of packets, since
1130 the firmware always skips the first 2 bytes of the buffer
1131 so TCP headers will be aligned. */
1133 iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1137 goto abort_with_receives_posted;
1139 iob_reserve ( iob, MXGEFW_PAD );
1140 myri10ge_post_receive ( priv, iob );
1143 /* Bring up the link. */
1145 TRY ( CMD_, ETHERNET_UP, );
1147 DBG2_RINGS ( priv );
1150 abort_with_receives_posted:
1151 while ( priv->receives_posted-- )
1152 free_iob ( priv->receive_iob[priv->receives_posted] );
1154 /* Because the link is not up, we don't have to reset the NIC here. */
1155 free_dma ( priv->dma, sizeof ( *priv->dma ) );
1157 /* Erase all signs of the failed open. */
1158 memset ( priv, 0, sizeof ( *priv ) );
1159 DBG ( "%s: %s\n", dbg, strerror ( rc ) );
1164 * This function allows a driver to process events during operation.
1166 * @v netdev Device being polled.
1168 * This is called periodically by iPXE to let the driver check the status of
1169 * transmitted packets and to allow the driver to check for received packets.
1170 * This is a iPXE Network Device Driver API function.
1172 static void myri10ge_net_poll ( struct net_device *netdev )
1174 struct io_buffer *iob;
1175 struct io_buffer *replacement;
1176 struct myri10ge_dma_buffers *dma;
1177 struct myri10ge_private *priv;
1178 unsigned int length;
1179 unsigned int orig_receives_posted;
1181 DBGP ( "myri10ge_net_poll\n" );
1182 priv = myri10ge_priv ( netdev );
1185 /* Process any pending interrupt. */
1187 myri10ge_interrupt_handler ( netdev );
1189 /* Pass up received frames, but limit ourselves to receives posted
1190 before this function was called, so we cannot livelock if
1191 receives are arriving faster than we process them. */
1193 orig_receives_posted = priv->receives_posted;
1194 while ( priv->receives_done != orig_receives_posted ) {
1196 /* Stop if there is no pending receive. */
1198 length = ntohs ( dma->receive_completion
1199 [priv->receives_done
1200 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1205 /* Allocate a replacement buffer. If none is available,
1206 stop passing up packets until a buffer is available.
1208 Reserve 2 extra bytes at the start of packets, since
1209 the firmware always skips the first 2 bytes of the buffer
1210 so TCP headers will be aligned. */
1212 replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1213 if ( !replacement ) {
1214 DBG ( "NO RX BUF\n" );
1217 iob_reserve ( replacement, MXGEFW_PAD );
1219 /* Pass up the received frame. */
1221 iob = priv->receive_iob[priv->receives_done
1222 & MYRI10GE_RECEIVE_WRAP];
1223 iob_put ( iob, length );
1224 netdev_rx ( netdev, iob );
1226 /* We have consumed the packet, so clear the receive
1229 dma->receive_completion [priv->receives_done
1230 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1234 /* Replace the passed-up I/O buffer. */
1236 myri10ge_post_receive ( priv, replacement );
1237 ++priv->receives_done;
1238 DBG2_RINGS ( priv );
1243 * This transmits a packet.
1245 * @v netdev Device to transmit from.
1246 * @v iobuf Data to transmit.
1247 * @ret rc Non-zero if failed to transmit.
1249 * This is a iPXE Network Driver API function.
1251 static int myri10ge_net_transmit ( struct net_device *netdev,
1252 struct io_buffer *iobuf )
1254 mcp_kreq_ether_send_t *kreq;
1256 struct myri10ge_private *priv;
1257 uint32 transmits_posted;
1259 DBGP ( "myri10ge_net_transmit\n" );
1260 priv = myri10ge_priv ( netdev );
1262 /* Confirm space in the send ring. */
1264 transmits_posted = priv->transmits_posted;
1265 if ( transmits_posted - priv->transmits_done
1266 > MYRI10GE_TRANSMIT_WRAP ) {
1267 DBG ( "TX ring full\n" );
1271 DBG2 ( "TX %p+%zd ", iobuf->data, iob_len ( iobuf ) );
1272 DBG2_HD ( iobuf->data, 14 );
1274 /* Record the packet being transmitted, so we can later report
1277 priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
1279 /* Copy and pad undersized frames, because the NIC does not pad,
1280 and we would rather copy small frames than do a gather. */
1282 len = iob_len ( iobuf );
1283 if ( len < ETH_ZLEN ) {
1284 iob_pad ( iobuf, ETH_ZLEN );
1288 /* Enqueue the packet by writing a descriptor to the NIC.
1289 This is a bit tricky because the HW requires 32-bit writes,
1290 but the structure has smaller fields. */
1292 kreq = &priv->transmit_ring[transmits_posted
1293 & priv->transmit_ring_wrap];
1294 kreq->addr_high = 0;
1295 kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1296 ( ( uint32 * ) kreq ) [2] = htonl (
1297 0x0000 << 16 /* pseudo_header_offset */
1298 | ( len & 0xFFFF ) /* length */
1301 ( ( uint32 * ) kreq ) [3] = htonl (
1302 0x00 << 24 /* pad */
1303 | 0x01 << 16 /* rdma_count */
1304 | 0x00 << 8 /* cksum_offset */
1305 | ( MXGEFW_FLAGS_SMALL
1306 | MXGEFW_FLAGS_FIRST
1307 | MXGEFW_FLAGS_NO_TSO ) /* flags */
1311 /* Mark the slot as consumed and return. */
1313 priv->transmits_posted = ++transmits_posted;
1314 DBG2_RINGS ( priv );
1318 static struct pci_device_id myri10ge_nics[] = {
1319 /* Each of these macros must be a single line to satisfy a script. */
1320 PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1323 struct pci_driver myri10ge_driver __pci_driver = {
1324 .ids = myri10ge_nics,
1325 .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1326 .probe = myri10ge_pci_probe,
1327 .remove = myri10ge_pci_remove