These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / staging / unisys / visornic / visornic_main.c
1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
2  * All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT.  See the GNU General Public License for more
12  * details.
13  */
14
15 /* This driver lives in a spar partition, and registers to ethernet io
16  * channels from the visorbus driver. It creates netdev devices and
17  * forwards transmit to the IO channel and accepts rcvs from the IO
18  * Partition via the IO channel.
19  */
20
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
27
28 #include "visorbus.h"
29 #include "iochannel.h"
30
31 #define VISORNIC_INFINITE_RSP_WAIT 0
32 #define VISORNICSOPENMAX 32
33 #define MAXDEVICES     16384
34
35 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36  *         = 163840 bytes
37  */
38 #define MAX_BUF 163840
39
40 static int visornic_probe(struct visor_device *dev);
41 static void visornic_remove(struct visor_device *dev);
42 static int visornic_pause(struct visor_device *dev,
43                           visorbus_state_complete_func complete_func);
44 static int visornic_resume(struct visor_device *dev,
45                            visorbus_state_complete_func complete_func);
46
47 /* DEBUGFS declarations */
48 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
49                                  size_t len, loff_t *offset);
50 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
51                                  size_t len, loff_t *ppos);
52 static struct dentry *visornic_debugfs_dir;
53 static const struct file_operations debugfs_info_fops = {
54         .read = info_debugfs_read,
55 };
56
57 static const struct file_operations debugfs_enable_ints_fops = {
58         .write = enable_ints_write,
59 };
60
61 static struct workqueue_struct *visornic_timeout_reset_workqueue;
62
63 /* GUIDS for director channel type supported by this driver.  */
64 static struct visor_channeltype_descriptor visornic_channel_types[] = {
65         /* Note that the only channel type we expect to be reported by the
66          * bus driver is the SPAR_VNIC channel.
67          */
68         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
69         { NULL_UUID_LE, NULL }
70 };
71 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
72 /*
73  * FIXME XXX: This next line of code must be fixed and removed before
74  * acceptance into the 'normal' part of the kernel.  It is only here as a place
75  * holder to get module autoloading functionality working for visorbus.  Code
76  * must be added to scripts/mode/file2alias.c, etc., to get this working
77  * properly.
78  */
79 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
80
81 /* This is used to tell the visor bus driver which types of visor devices
82  * we support, and what functions to call when a visor device that we support
83  * is attached or removed.
84  */
85 static struct visor_driver visornic_driver = {
86         .name = "visornic",
87         .version = "1.0.0.0",
88         .vertag = NULL,
89         .owner = THIS_MODULE,
90         .channel_types = visornic_channel_types,
91         .probe = visornic_probe,
92         .remove = visornic_remove,
93         .pause = visornic_pause,
94         .resume = visornic_resume,
95         .channel_interrupt = NULL,
96 };
97
98 struct chanstat {
99         unsigned long got_rcv;
100         unsigned long got_enbdisack;
101         unsigned long got_xmit_done;
102         unsigned long xmit_fail;
103         unsigned long sent_enbdis;
104         unsigned long sent_promisc;
105         unsigned long sent_post;
106         unsigned long sent_post_failed;
107         unsigned long sent_xmit;
108         unsigned long reject_count;
109         unsigned long extra_rcvbufs_sent;
110 };
111
112 struct visornic_devdata {
113         unsigned short enabled;         /* 0 disabled 1 enabled to receive */
114         unsigned short enab_dis_acked;  /* NET_RCV_ENABLE/DISABLE acked by
115                                          * IOPART
116                                          */
117         struct visor_device *dev;
118         struct net_device *netdev;
119         struct net_device_stats net_stats;
120         atomic_t interrupt_rcvd;
121         wait_queue_head_t rsp_queue;
122         struct sk_buff **rcvbuf;
123         u64 incarnation_id;             /* lets IOPART know about re-birth */
124         unsigned short old_flags;       /* flags as they were prior to
125                                          * set_multicast_list
126                                          */
127         atomic_t usage;                 /* count of users */
128         int num_rcv_bufs;               /* indicates how many rcv buffers
129                                          * the vnic will post
130                                          */
131         int num_rcv_bufs_could_not_alloc;
132         atomic_t num_rcvbuf_in_iovm;
133         unsigned long alloc_failed_in_if_needed_cnt;
134         unsigned long alloc_failed_in_repost_rtn_cnt;
135         unsigned long max_outstanding_net_xmits; /* absolute max number of
136                                                   * outstanding xmits - should
137                                                   * never hit this
138                                                   */
139         unsigned long upper_threshold_net_xmits;  /* high water mark for
140                                                    * calling netif_stop_queue()
141                                                    */
142         unsigned long lower_threshold_net_xmits; /* high water mark for calling
143                                                   * netif_wake_queue()
144                                                   */
145         struct sk_buff_head xmitbufhead; /* xmitbufhead is the head of the
146                                           * xmit buffer list that have been
147                                           * sent to the IOPART end
148                                           */
149         visorbus_state_complete_func server_down_complete_func;
150         struct work_struct timeout_reset;
151         struct uiscmdrsp *cmdrsp_rcv;    /* cmdrsp_rcv is used for
152                                           * posting/unposting rcv buffers
153                                           */
154         struct uiscmdrsp *xmit_cmdrsp;   /* used to issue NET_XMIT - there is
155                                           * never more that one xmit in
156                                           * progress at a time
157                                           */
158         bool server_down;                /* IOPART is down */
159         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
160         bool going_away;                 /* device is being torn down */
161         struct dentry *eth_debugfs_dir;
162         u64 interrupts_rcvd;
163         u64 interrupts_notme;
164         u64 interrupts_disabled;
165         u64 busy_cnt;
166         spinlock_t priv_lock;  /* spinlock to access devdata structures */
167
168         /* flow control counter */
169         u64 flow_control_upper_hits;
170         u64 flow_control_lower_hits;
171
172         /* debug counters */
173         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
174         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
175         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
176         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
177         unsigned long found_repost_rcvbuf_cnt;  /* # times we called
178                                                  *   repost_rcvbuf_cnt
179                                                  */
180         unsigned long repost_found_skb_cnt;     /* # times found the skb */
181         unsigned long n_repost_deficit;         /* # times we couldn't find
182                                                  *   all of the rcv buffers
183                                                  */
184         unsigned long bad_rcv_buf;              /* # times we negleted to
185                                                  * free the rcv skb because
186                                                  * we didn't know where it
187                                                  * came from
188                                                  */
189         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
190
191         int queuefullmsg_logged;
192         struct chanstat chstat;
193         struct timer_list irq_poll_timer;
194         struct napi_struct napi;
195         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
196 };
197
198 static int visornic_poll(struct napi_struct *napi, int budget);
199 static void poll_for_irq(unsigned long v);
200
201 /**
202  *      visor_copy_fragsinfo_from_skb(
203  *      @skb_in: skbuff that we are pulling the frags from
204  *      @firstfraglen: length of first fragment in skb
205  *      @frags_max: max len of frags array
206  *      @frags: frags array filled in on output
207  *
208  *      Copy the fragment list in the SKB to a phys_info
209  *      array that the IOPART understands.
210  *      Return value indicates number of entries filled in frags
211  *      Negative values indicate an error.
212  */
213 static unsigned int
214 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
215                               unsigned int frags_max,
216                               struct phys_info frags[])
217 {
218         unsigned int count = 0, ii, size, offset = 0, numfrags;
219         unsigned int total_count;
220
221         numfrags = skb_shinfo(skb)->nr_frags;
222
223         /*
224          * Compute the number of fragments this skb has, and if its more than
225          * frag array can hold, linearize the skb
226          */
227         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
228         if (firstfraglen % PI_PAGE_SIZE)
229                 total_count++;
230
231         if (total_count > frags_max) {
232                 if (skb_linearize(skb))
233                         return -EINVAL;
234                 numfrags = skb_shinfo(skb)->nr_frags;
235                 firstfraglen = 0;
236         }
237
238         while (firstfraglen) {
239                 if (count == frags_max)
240                         return -EINVAL;
241
242                 frags[count].pi_pfn =
243                         page_to_pfn(virt_to_page(skb->data + offset));
244                 frags[count].pi_off =
245                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
246                 size = min_t(unsigned int, firstfraglen,
247                              PI_PAGE_SIZE - frags[count].pi_off);
248
249                 /* can take smallest of firstfraglen (what's left) OR
250                  * bytes left in the page
251                  */
252                 frags[count].pi_len = size;
253                 firstfraglen -= size;
254                 offset += size;
255                 count++;
256         }
257         if (numfrags) {
258                 if ((count + numfrags) > frags_max)
259                         return -EINVAL;
260
261                 for (ii = 0; ii < numfrags; ii++) {
262                         count = add_physinfo_entries(page_to_pfn(
263                                 skb_frag_page(&skb_shinfo(skb)->frags[ii])),
264                                               skb_shinfo(skb)->frags[ii].
265                                               page_offset,
266                                               skb_shinfo(skb)->frags[ii].
267                                               size, count, frags_max, frags);
268                         /*
269                          * add_physinfo_entries only returns
270                          * zero if the frags array is out of room
271                          * That should never happen because we
272                          * fail above, if count+numfrags > frags_max.
273                          * Given that theres no recovery mechanism from putting
274                          * half a packet in the I/O channel, panic here as this
275                          * should never happen
276                          */
277                         BUG_ON(!count);
278                 }
279         }
280         if (skb_shinfo(skb)->frag_list) {
281                 struct sk_buff *skbinlist;
282                 int c;
283
284                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
285                      skbinlist = skbinlist->next) {
286                         c = visor_copy_fragsinfo_from_skb(skbinlist,
287                                                           skbinlist->len -
288                                                           skbinlist->data_len,
289                                                           frags_max - count,
290                                                           &frags[count]);
291                         if (c < 0)
292                                 return c;
293                         count += c;
294                 }
295         }
296         return count;
297 }
298
299 static ssize_t enable_ints_write(struct file *file,
300                                  const char __user *buffer,
301                                  size_t count, loff_t *ppos)
302 {
303         /*
304          * Don't want to break ABI here by having a debugfs
305          * file that no longer exists or is writable, so
306          * lets just make this a vestigual function
307          */
308         return count;
309 }
310
311 /**
312  *      visornic_serverdown_complete - IOPART went down, need to pause
313  *                                     device
314  *      @work: Work queue it was scheduled on
315  *
316  *      The IO partition has gone down and we need to do some cleanup
317  *      for when it comes back. Treat the IO partition as the link
318  *      being down.
319  *      Returns void.
320  */
321 static void
322 visornic_serverdown_complete(struct visornic_devdata *devdata)
323 {
324         struct net_device *netdev;
325
326         netdev = devdata->netdev;
327
328         /* Stop polling for interrupts */
329         del_timer_sync(&devdata->irq_poll_timer);
330
331         rtnl_lock();
332         dev_close(netdev);
333         rtnl_unlock();
334
335         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
336         devdata->chstat.sent_xmit = 0;
337         devdata->chstat.got_xmit_done = 0;
338
339         if (devdata->server_down_complete_func)
340                 (*devdata->server_down_complete_func)(devdata->dev, 0);
341
342         devdata->server_down = true;
343         devdata->server_change_state = false;
344         devdata->server_down_complete_func = NULL;
345 }
346
347 /**
348  *      visornic_serverdown - Command has notified us that IOPARt is down
349  *      @devdata: device that is being managed by IOPART
350  *
351  *      Schedule the work needed to handle the server down request. Make
352  *      sure we haven't already handled the server change state event.
353  *      Returns 0 if we scheduled the work, -EINVAL on error.
354  */
355 static int
356 visornic_serverdown(struct visornic_devdata *devdata,
357                     visorbus_state_complete_func complete_func)
358 {
359         unsigned long flags;
360
361         spin_lock_irqsave(&devdata->priv_lock, flags);
362         if (!devdata->server_down && !devdata->server_change_state) {
363                 if (devdata->going_away) {
364                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
365                         dev_dbg(&devdata->dev->device,
366                                 "%s aborting because device removal pending\n",
367                                 __func__);
368                         return -ENODEV;
369                 }
370                 devdata->server_change_state = true;
371                 devdata->server_down_complete_func = complete_func;
372                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
373                 visornic_serverdown_complete(devdata);
374         } else if (devdata->server_change_state) {
375                 dev_dbg(&devdata->dev->device, "%s changing state\n",
376                         __func__);
377                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
378                 return -EINVAL;
379         } else
380                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
381         return 0;
382 }
383
384 /**
385  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
386  *      @netdev: network adapter the rcv bufs are attached too.
387  *
388  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
389  *      so that it can write rcv data into our memory space.
390  *      Return pointer to sk_buff
391  */
392 static struct sk_buff *
393 alloc_rcv_buf(struct net_device *netdev)
394 {
395         struct sk_buff *skb;
396
397         /* NOTE: the first fragment in each rcv buffer is pointed to by
398          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
399          * in length, so the firstfrag is large enough to hold 1514.
400          */
401         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
402         if (!skb)
403                 return NULL;
404         skb->dev = netdev;
405         skb->len = RCVPOST_BUF_SIZE;
406         /* current value of mtu doesn't come into play here; large
407          * packets will just end up using multiple rcv buffers all of
408          * same size
409          */
410         skb->data_len = 0;      /* dev_alloc_skb already zeroes it out
411                                  * for clarification.
412                                  */
413         return skb;
414 }
415
416 /**
417  *      post_skb        - post a skb to the IO Partition.
418  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
419  *      @devdata: visornic_devdata to post the skb too
420  *      @skb: skb to give to the IO partition
421  *
422  *      Send the skb to the IO Partition.
423  *      Returns void
424  */
425 static inline void
426 post_skb(struct uiscmdrsp *cmdrsp,
427          struct visornic_devdata *devdata, struct sk_buff *skb)
428 {
429         cmdrsp->net.buf = skb;
430         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
431         cmdrsp->net.rcvpost.frag.pi_off =
432                 (unsigned long)skb->data & PI_PAGE_MASK;
433         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
434         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
435
436         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
437                 cmdrsp->net.type = NET_RCV_POST;
438                 cmdrsp->cmdtype = CMD_NET_TYPE;
439                 if (visorchannel_signalinsert(devdata->dev->visorchannel,
440                                           IOCHAN_TO_IOPART,
441                                           cmdrsp)) {
442                         atomic_inc(&devdata->num_rcvbuf_in_iovm);
443                         devdata->chstat.sent_post++;
444                 } else {
445                         devdata->chstat.sent_post_failed++;
446                 }
447         }
448 }
449
450 /**
451  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
452  *      @netdev: netdevice we are enable/disable, used as context
453  *               return value
454  *      @state: enable = 1/disable = 0
455  *      @devdata: visornic device we are enabling/disabling
456  *
457  *      Send the enable/disable message to the IO Partition.
458  *      Returns void
459  */
460 static void
461 send_enbdis(struct net_device *netdev, int state,
462             struct visornic_devdata *devdata)
463 {
464         devdata->cmdrsp_rcv->net.enbdis.enable = state;
465         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
466         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
467         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
468         if (visorchannel_signalinsert(devdata->dev->visorchannel,
469                                   IOCHAN_TO_IOPART,
470                                   devdata->cmdrsp_rcv))
471                 devdata->chstat.sent_enbdis++;
472 }
473
474 /**
475  *      visornic_disable_with_timeout - Disable network adapter
476  *      @netdev: netdevice to disale
477  *      @timeout: timeout to wait for disable
478  *
479  *      Disable the network adapter and inform the IO Partition that we
480  *      are disabled, reclaim memory from rcv bufs.
481  *      Returns 0 on success, negative for failure of IO Partition
482  *      responding.
483  *
484  */
485 static int
486 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
487 {
488         struct visornic_devdata *devdata = netdev_priv(netdev);
489         int i;
490         unsigned long flags;
491         int wait = 0;
492
493         /* send a msg telling the other end we are stopping incoming pkts */
494         spin_lock_irqsave(&devdata->priv_lock, flags);
495         devdata->enabled = 0;
496         devdata->enab_dis_acked = 0; /* must wait for ack */
497         spin_unlock_irqrestore(&devdata->priv_lock, flags);
498
499         /* send disable and wait for ack -- don't hold lock when sending
500          * disable because if the queue is full, insert might sleep.
501          */
502         send_enbdis(netdev, 0, devdata);
503
504         /* wait for ack to arrive before we try to free rcv buffers
505          * NOTE: the other end automatically unposts the rcv buffers when
506          * when it gets a disable.
507          */
508         spin_lock_irqsave(&devdata->priv_lock, flags);
509         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
510                (wait < timeout)) {
511                 if (devdata->enab_dis_acked)
512                         break;
513                 if (devdata->server_down || devdata->server_change_state) {
514                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
515                         dev_dbg(&netdev->dev, "%s server went away\n",
516                                 __func__);
517                         return -EIO;
518                 }
519                 set_current_state(TASK_INTERRUPTIBLE);
520                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
521                 wait += schedule_timeout(msecs_to_jiffies(10));
522                 spin_lock_irqsave(&devdata->priv_lock, flags);
523         }
524
525         /* Wait for usage to go to 1 (no other users) before freeing
526          * rcv buffers
527          */
528         if (atomic_read(&devdata->usage) > 1) {
529                 while (1) {
530                         set_current_state(TASK_INTERRUPTIBLE);
531                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
532                         schedule_timeout(msecs_to_jiffies(10));
533                         spin_lock_irqsave(&devdata->priv_lock, flags);
534                         if (atomic_read(&devdata->usage))
535                                 break;
536                 }
537         }
538         /* we've set enabled to 0, so we can give up the lock. */
539         spin_unlock_irqrestore(&devdata->priv_lock, flags);
540
541         /* stop the transmit queue so nothing more can be transmitted */
542         netif_stop_queue(netdev);
543
544         napi_disable(&devdata->napi);
545
546         skb_queue_purge(&devdata->xmitbufhead);
547
548         /* Free rcv buffers - other end has automatically unposed them on
549          * disable
550          */
551         for (i = 0; i < devdata->num_rcv_bufs; i++) {
552                 if (devdata->rcvbuf[i]) {
553                         kfree_skb(devdata->rcvbuf[i]);
554                         devdata->rcvbuf[i] = NULL;
555                 }
556         }
557
558         return 0;
559 }
560
561 /**
562  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
563  *      @netdev: struct netdevice
564  *      @devdata: visornic_devdata
565  *
566  *      Allocate rcv buffers and post them to the IO Partition.
567  *      Return 0 for success, and negative for failure.
568  */
569 static int
570 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
571 {
572         int i, count;
573
574         /* allocate fixed number of receive buffers to post to uisnic
575          * post receive buffers after we've allocated a required amount
576          */
577         for (i = 0; i < devdata->num_rcv_bufs; i++) {
578                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
579                 if (!devdata->rcvbuf[i])
580                         break; /* if we failed to allocate one let us stop */
581         }
582         if (i == 0) /* couldn't even allocate one -- bail out */
583                 return -ENOMEM;
584         count = i;
585
586         /* Ensure we can alloc 2/3rd of the requeested number of buffers.
587          * 2/3 is an arbitrary choice; used also in ndis init.c
588          */
589         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
590                 /* free receive buffers we did alloc and then bail out */
591                 for (i = 0; i < count; i++) {
592                         kfree_skb(devdata->rcvbuf[i]);
593                         devdata->rcvbuf[i] = NULL;
594                 }
595                 return -ENOMEM;
596         }
597
598         /* post receive buffers to receive incoming input - without holding
599          * lock - we've not enabled nor started the queue so there shouldn't
600          * be any rcv or xmit activity
601          */
602         for (i = 0; i < count; i++)
603                 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
604
605         return 0;
606 }
607
608 /**
609  *      visornic_enable_with_timeout    - send enable to IO Part
610  *      @netdev: struct net_device
611  *      @timeout: Time to wait for the ACK from the enable
612  *
613  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
614  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
615  *      Return 0 for success, negavite for failure.
616  */
617 static int
618 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
619 {
620         int i;
621         struct visornic_devdata *devdata = netdev_priv(netdev);
622         unsigned long flags;
623         int wait = 0;
624
625         /* NOTE: the other end automatically unposts the rcv buffers when it
626          * gets a disable.
627          */
628         i = init_rcv_bufs(netdev, devdata);
629         if (i < 0) {
630                 dev_err(&netdev->dev,
631                         "%s failed to init rcv bufs (%d)\n", __func__, i);
632                 return i;
633         }
634
635         spin_lock_irqsave(&devdata->priv_lock, flags);
636         devdata->enabled = 1;
637         devdata->enab_dis_acked = 0;
638
639         /* now we're ready, let's send an ENB to uisnic but until we get
640          * an ACK back from uisnic, we'll drop the packets
641          */
642         devdata->n_rcv_packets_not_accepted = 0;
643         spin_unlock_irqrestore(&devdata->priv_lock, flags);
644
645         /* send enable and wait for ack -- don't hold lock when sending enable
646          * because if the queue is full, insert might sleep.
647          */
648         napi_enable(&devdata->napi);
649         send_enbdis(netdev, 1, devdata);
650
651         spin_lock_irqsave(&devdata->priv_lock, flags);
652         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
653                (wait < timeout)) {
654                 if (devdata->enab_dis_acked)
655                         break;
656                 if (devdata->server_down || devdata->server_change_state) {
657                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
658                         dev_dbg(&netdev->dev, "%s server went away\n",
659                                 __func__);
660                         return -EIO;
661                 }
662                 set_current_state(TASK_INTERRUPTIBLE);
663                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
664                 wait += schedule_timeout(msecs_to_jiffies(10));
665                 spin_lock_irqsave(&devdata->priv_lock, flags);
666         }
667
668         spin_unlock_irqrestore(&devdata->priv_lock, flags);
669
670         if (!devdata->enab_dis_acked) {
671                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
672                 return -EIO;
673         }
674
675         netif_start_queue(netdev);
676
677         return 0;
678 }
679
680 /**
681  *      visornic_timeout_reset  - handle xmit timeout resets
682  *      @work   work item that scheduled the work
683  *
684  *      Transmit Timeouts are typically handled by resetting the
685  *      device for our virtual NIC we will send a Disable and Enable
686  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
687  */
688 static void
689 visornic_timeout_reset(struct work_struct *work)
690 {
691         struct visornic_devdata *devdata;
692         struct net_device *netdev;
693         int response = 0;
694
695         devdata = container_of(work, struct visornic_devdata, timeout_reset);
696         netdev = devdata->netdev;
697
698         rtnl_lock();
699         if (!netif_running(netdev)) {
700                 rtnl_unlock();
701                 return;
702         }
703
704         response = visornic_disable_with_timeout(netdev,
705                                                  VISORNIC_INFINITE_RSP_WAIT);
706         if (response)
707                 goto call_serverdown;
708
709         response = visornic_enable_with_timeout(netdev,
710                                                 VISORNIC_INFINITE_RSP_WAIT);
711         if (response)
712                 goto call_serverdown;
713
714         rtnl_unlock();
715
716         return;
717
718 call_serverdown:
719         visornic_serverdown(devdata, NULL);
720         rtnl_unlock();
721 }
722
723 /**
724  *      visornic_open - Enable the visornic device and mark the queue started
725  *      @netdev: netdevice to start
726  *
727  *      Enable the device and start the transmit queue.
728  *      Return 0 for success
729  */
730 static int
731 visornic_open(struct net_device *netdev)
732 {
733         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
734
735         return 0;
736 }
737
738 /**
739  *      visornic_close - Disables the visornic device and stops the queues
740  *      @netdev: netdevice to start
741  *
742  *      Disable the device and stop the transmit queue.
743  *      Return 0 for success
744  */
745 static int
746 visornic_close(struct net_device *netdev)
747 {
748         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
749
750         return 0;
751 }
752
753 /**
754  *      devdata_xmits_outstanding - compute outstanding xmits
755  *      @devdata: visornic_devdata for device
756  *
757  *      Return value is the number of outstanding xmits.
758  */
759 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
760 {
761         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
762                 return devdata->chstat.sent_xmit -
763                         devdata->chstat.got_xmit_done;
764         else
765                 return (ULONG_MAX - devdata->chstat.got_xmit_done
766                         + devdata->chstat.sent_xmit + 1);
767 }
768
769 /**
770  *      vnic_hit_high_watermark
771  *      @devdata: indicates visornic device we are checking
772  *      @high_watermark: max num of unacked xmits we will tolerate,
773  *                       before we will start throttling
774  *
775  *      Returns true iff the number of unacked xmits sent to
776  *      the IO partition is >= high_watermark.
777  */
778 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
779                                            ulong high_watermark)
780 {
781         return (devdata_xmits_outstanding(devdata) >= high_watermark);
782 }
783
784 /**
785  *      vnic_hit_low_watermark
786  *      @devdata: indicates visornic device we are checking
787  *      @low_watermark: we will wait until the num of unacked xmits
788  *                      drops to this value or lower before we start
789  *                      transmitting again
790  *
791  *      Returns true iff the number of unacked xmits sent to
792  *      the IO partition is <= low_watermark.
793  */
794 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
795                                           ulong low_watermark)
796 {
797         return (devdata_xmits_outstanding(devdata) <= low_watermark);
798 }
799
800 /**
801  *      visornic_xmit - send a packet to the IO Partition
802  *      @skb: Packet to be sent
803  *      @netdev: net device the packet is being sent from
804  *
805  *      Convert the skb to a cmdrsp so the IO Partition can undersand it.
806  *      Send the XMIT command to the IO Partition for processing. This
807  *      function is protected from concurrent calls by a spinlock xmit_lock
808  *      in the net_device struct, but as soon as the function returns it
809  *      can be called again.
810  *      Returns NETDEV_TX_OK.
811  */
812 static int
813 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
814 {
815         struct visornic_devdata *devdata;
816         int len, firstfraglen, padlen;
817         struct uiscmdrsp *cmdrsp = NULL;
818         unsigned long flags;
819
820         devdata = netdev_priv(netdev);
821         spin_lock_irqsave(&devdata->priv_lock, flags);
822
823         if (netif_queue_stopped(netdev) || devdata->server_down ||
824             devdata->server_change_state) {
825                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
826                 devdata->busy_cnt++;
827                 dev_dbg(&netdev->dev,
828                         "%s busy - queue stopped\n", __func__);
829                 kfree_skb(skb);
830                 return NETDEV_TX_OK;
831         }
832
833         /* sk_buff struct is used to host network data throughout all the
834          * linux network subsystems
835          */
836         len = skb->len;
837
838         /* skb->len is the FULL length of data (including fragmentary portion)
839          * skb->data_len is the length of the fragment portion in frags
840          * skb->len - skb->data_len is size of the 1st fragment in skb->data
841          * calculate the length of the first fragment that skb->data is
842          * pointing to
843          */
844         firstfraglen = skb->len - skb->data_len;
845         if (firstfraglen < ETH_HEADER_SIZE) {
846                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
847                 devdata->busy_cnt++;
848                 dev_err(&netdev->dev,
849                         "%s busy - first frag too small (%d)\n",
850                         __func__, firstfraglen);
851                 kfree_skb(skb);
852                 return NETDEV_TX_OK;
853         }
854
855         if ((len < ETH_MIN_PACKET_SIZE) &&
856             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
857                 /* pad the packet out to minimum size */
858                 padlen = ETH_MIN_PACKET_SIZE - len;
859                 memset(&skb->data[len], 0, padlen);
860                 skb->tail += padlen;
861                 skb->len += padlen;
862                 len += padlen;
863                 firstfraglen += padlen;
864         }
865
866         cmdrsp = devdata->xmit_cmdrsp;
867         /* clear cmdrsp */
868         memset(cmdrsp, 0, SIZEOF_CMDRSP);
869         cmdrsp->net.type = NET_XMIT;
870         cmdrsp->cmdtype = CMD_NET_TYPE;
871
872         /* save the pointer to skb -- we'll need it for completion */
873         cmdrsp->net.buf = skb;
874
875         if (vnic_hit_high_watermark(devdata,
876                                     devdata->max_outstanding_net_xmits)) {
877                 /* too many NET_XMITs queued over to IOVM - need to wait
878                  */
879                 devdata->chstat.reject_count++;
880                 if (!devdata->queuefullmsg_logged &&
881                     ((devdata->chstat.reject_count & 0x3ff) == 1))
882                         devdata->queuefullmsg_logged = 1;
883                 netif_stop_queue(netdev);
884                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
885                 devdata->busy_cnt++;
886                 dev_dbg(&netdev->dev,
887                         "%s busy - waiting for iovm to catch up\n",
888                         __func__);
889                 kfree_skb(skb);
890                 return NETDEV_TX_OK;
891         }
892         if (devdata->queuefullmsg_logged)
893                 devdata->queuefullmsg_logged = 0;
894
895         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
896                 cmdrsp->net.xmt.lincsum.valid = 1;
897                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
898                 if (skb_transport_header(skb) > skb->data) {
899                         cmdrsp->net.xmt.lincsum.hrawoff =
900                                 skb_transport_header(skb) - skb->data;
901                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
902                 }
903                 if (skb_network_header(skb) > skb->data) {
904                         cmdrsp->net.xmt.lincsum.nhrawoff =
905                                 skb_network_header(skb) - skb->data;
906                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
907                 }
908                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
909         } else {
910                 cmdrsp->net.xmt.lincsum.valid = 0;
911         }
912
913         /* save off the length of the entire data packet */
914         cmdrsp->net.xmt.len = len;
915
916         /* copy ethernet header from first frag into ocmdrsp
917          * - everything else will be pass in frags & DMA'ed
918          */
919         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
920         /* copy frags info - from skb->data we need to only provide access
921          * beyond eth header
922          */
923         cmdrsp->net.xmt.num_frags =
924                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
925                                               MAX_PHYS_INFO,
926                                               cmdrsp->net.xmt.frags);
927         if (cmdrsp->net.xmt.num_frags < 0) {
928                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
929                 devdata->busy_cnt++;
930                 dev_err(&netdev->dev,
931                         "%s busy - copy frags failed\n", __func__);
932                 kfree_skb(skb);
933                 return NETDEV_TX_OK;
934         }
935
936         if (!visorchannel_signalinsert(devdata->dev->visorchannel,
937                                        IOCHAN_TO_IOPART, cmdrsp)) {
938                 netif_stop_queue(netdev);
939                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
940                 devdata->busy_cnt++;
941                 dev_dbg(&netdev->dev,
942                         "%s busy - signalinsert failed\n", __func__);
943                 kfree_skb(skb);
944                 return NETDEV_TX_OK;
945         }
946
947         /* Track the skbs that have been sent to the IOVM for XMIT */
948         skb_queue_head(&devdata->xmitbufhead, skb);
949
950         /* update xmt stats */
951         devdata->net_stats.tx_packets++;
952         devdata->net_stats.tx_bytes += skb->len;
953         devdata->chstat.sent_xmit++;
954
955         /* check to see if we have hit the high watermark for
956          * netif_stop_queue()
957          */
958         if (vnic_hit_high_watermark(devdata,
959                                     devdata->upper_threshold_net_xmits)) {
960                 /* too many NET_XMITs queued over to IOVM - need to wait */
961                 netif_stop_queue(netdev); /* calling stop queue - call
962                                            * netif_wake_queue() after lower
963                                            * threshold
964                                            */
965                 dev_dbg(&netdev->dev,
966                         "%s busy - invoking iovm flow control\n",
967                         __func__);
968                 devdata->flow_control_upper_hits++;
969         }
970         spin_unlock_irqrestore(&devdata->priv_lock, flags);
971
972         /* skb will be freed when we get back NET_XMIT_DONE */
973         return NETDEV_TX_OK;
974 }
975
976 /**
977  *      visornic_get_stats - returns net_stats of the visornic device
978  *      @netdev: netdevice
979  *
980  *      Returns the net_device_stats for the device
981  */
982 static struct net_device_stats *
983 visornic_get_stats(struct net_device *netdev)
984 {
985         struct visornic_devdata *devdata = netdev_priv(netdev);
986
987         return &devdata->net_stats;
988 }
989
990 /**
991  *      visornic_change_mtu - changes mtu of device.
992  *      @netdev: netdevice
993  *      @new_mtu: value of new mtu
994  *
995  *      MTU cannot be changed by system, must be changed via
996  *      CONTROLVM message. All vnics and pnics in a switch have
997  *      to have the same MTU for everything to work.
998  *      Currently not supported.
999  *      Returns EINVAL
1000  */
1001 static int
1002 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1003 {
1004         return -EINVAL;
1005 }
1006
1007 /**
1008  *      visornic_set_multi - changes mtu of device.
1009  *      @netdev: netdevice
1010  *
1011  *      Only flag we support currently is IFF_PROMISC
1012  *      Returns void
1013  */
1014 static void
1015 visornic_set_multi(struct net_device *netdev)
1016 {
1017         struct uiscmdrsp *cmdrsp;
1018         struct visornic_devdata *devdata = netdev_priv(netdev);
1019
1020         /* any filtering changes */
1021         if (devdata->old_flags != netdev->flags) {
1022                 if ((netdev->flags & IFF_PROMISC) !=
1023                     (devdata->old_flags & IFF_PROMISC)) {
1024                         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1025                         if (!cmdrsp)
1026                                 return;
1027                         cmdrsp->cmdtype = CMD_NET_TYPE;
1028                         cmdrsp->net.type = NET_RCV_PROMISC;
1029                         cmdrsp->net.enbdis.context = netdev;
1030                         cmdrsp->net.enbdis.enable =
1031                                 (netdev->flags & IFF_PROMISC);
1032                         visorchannel_signalinsert(devdata->dev->visorchannel,
1033                                                   IOCHAN_TO_IOPART,
1034                                                   cmdrsp);
1035                         kfree(cmdrsp);
1036                 }
1037                 devdata->old_flags = netdev->flags;
1038         }
1039 }
1040
1041 /**
1042  *      visornic_xmit_timeout - request to timeout the xmit
1043  *      @netdev
1044  *
1045  *      Queue the work and return. Make sure we have not already
1046  *      been informed the IO Partition is gone, if it is gone
1047  *      we will already timeout the xmits.
1048  */
1049 static void
1050 visornic_xmit_timeout(struct net_device *netdev)
1051 {
1052         struct visornic_devdata *devdata = netdev_priv(netdev);
1053         unsigned long flags;
1054
1055         spin_lock_irqsave(&devdata->priv_lock, flags);
1056         if (devdata->going_away) {
1057                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1058                 dev_dbg(&devdata->dev->device,
1059                         "%s aborting because device removal pending\n",
1060                         __func__);
1061                 return;
1062         }
1063
1064         /* Ensure that a ServerDown message hasn't been received */
1065         if (!devdata->enabled ||
1066             (devdata->server_down && !devdata->server_change_state)) {
1067                 dev_dbg(&netdev->dev, "%s no processing\n",
1068                         __func__);
1069                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1070                 return;
1071         }
1072         queue_work(visornic_timeout_reset_workqueue, &devdata->timeout_reset);
1073         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1074 }
1075
1076 /**
1077  *      repost_return   - repost rcv bufs that have come back
1078  *      @cmdrsp: io channel command struct to post
1079  *      @devdata: visornic devdata for the device
1080  *      @skb: skb
1081  *      @netdev: netdevice
1082  *
1083  *      Repost rcv buffers that have been returned to us when
1084  *      we are finished with them.
1085  *      Returns 0 for success, -1 for error.
1086  */
1087 static inline int
1088 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1089               struct sk_buff *skb, struct net_device *netdev)
1090 {
1091         struct net_pkt_rcv copy;
1092         int i = 0, cc, numreposted;
1093         int found_skb = 0;
1094         int status = 0;
1095
1096         copy = cmdrsp->net.rcv;
1097         switch (copy.numrcvbufs) {
1098         case 0:
1099                 devdata->n_rcv0++;
1100                 break;
1101         case 1:
1102                 devdata->n_rcv1++;
1103                 break;
1104         case 2:
1105                 devdata->n_rcv2++;
1106                 break;
1107         default:
1108                 devdata->n_rcvx++;
1109                 break;
1110         }
1111         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1112                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1113                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1114                                 continue;
1115
1116                         if ((skb) && devdata->rcvbuf[i] == skb) {
1117                                 devdata->found_repost_rcvbuf_cnt++;
1118                                 found_skb = 1;
1119                                 devdata->repost_found_skb_cnt++;
1120                         }
1121                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1122                         if (!devdata->rcvbuf[i]) {
1123                                 devdata->num_rcv_bufs_could_not_alloc++;
1124                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1125                                 status = -ENOMEM;
1126                                 break;
1127                         }
1128                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1129                         numreposted++;
1130                         break;
1131                 }
1132         }
1133         if (numreposted != copy.numrcvbufs) {
1134                 devdata->n_repost_deficit++;
1135                 status = -EINVAL;
1136         }
1137         if (skb) {
1138                 if (found_skb) {
1139                         kfree_skb(skb);
1140                 } else {
1141                         status = -EINVAL;
1142                         devdata->bad_rcv_buf++;
1143                 }
1144         }
1145         return status;
1146 }
1147
1148 /**
1149  *      visornic_rx - Handle receive packets coming back from IO Part
1150  *      @cmdrsp: Receive packet returned from IO Part
1151  *
1152  *      Got a receive packet back from the IO Part, handle it and send
1153  *      it up the stack.
1154  *      Returns void
1155  */
1156 static int
1157 visornic_rx(struct uiscmdrsp *cmdrsp)
1158 {
1159         struct visornic_devdata *devdata;
1160         struct sk_buff *skb, *prev, *curr;
1161         struct net_device *netdev;
1162         int cc, currsize, off;
1163         struct ethhdr *eth;
1164         unsigned long flags;
1165         int rx_count = 0;
1166
1167         /* post new rcv buf to the other end using the cmdrsp we have at hand
1168          * post it without holding lock - but we'll use the signal lock to
1169          * synchronize the queue insert the cmdrsp that contains the net.rcv
1170          * is the one we are using to repost, so copy the info we need from it.
1171          */
1172         skb = cmdrsp->net.buf;
1173         netdev = skb->dev;
1174
1175         devdata = netdev_priv(netdev);
1176
1177         spin_lock_irqsave(&devdata->priv_lock, flags);
1178         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1179
1180         /* set length to how much was ACTUALLY received -
1181          * NOTE: rcv_done_len includes actual length of data rcvd
1182          * including ethhdr
1183          */
1184         skb->len = cmdrsp->net.rcv.rcv_done_len;
1185
1186         /* update rcv stats - call it with priv_lock held */
1187         devdata->net_stats.rx_packets++;
1188         devdata->net_stats.rx_bytes += skb->len;
1189
1190         /* test enabled while holding lock */
1191         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1192                 /* don't process it unless we're in enable mode and until
1193                  * we've gotten an ACK saying the other end got our RCV enable
1194                  */
1195                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1196                 repost_return(cmdrsp, devdata, skb, netdev);
1197                 return rx_count;
1198         }
1199
1200         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1201
1202         /* when skb was allocated, skb->dev, skb->data, skb->len and
1203          * skb->data_len were setup. AND, data has already put into the
1204          * skb (both first frag and in frags pages)
1205          * NOTE: firstfragslen is the amount of data in skb->data and that
1206          * which is not in nr_frags or frag_list. This is now simply
1207          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1208          * firstfrag & set data_len to show rest see if we have to chain
1209          * frag_list.
1210          */
1211         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1212                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1213                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1214                                 dev_err(&devdata->netdev->dev,
1215                                         "repost_return failed");
1216                         return rx_count;
1217                 }
1218                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1219                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1220                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1221                                                                    will be in
1222                                                                    frag_list */
1223         } else {
1224                 /* data fits in this skb - no chaining - do
1225                  * PRECAUTIONARY check
1226                  */
1227                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1228                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1229                                 dev_err(&devdata->netdev->dev,
1230                                         "repost_return failed");
1231                         return rx_count;
1232                 }
1233                 skb->tail += skb->len;
1234                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1235         }
1236         off = skb_tail_pointer(skb) - skb->data;
1237
1238         /* amount we bumped tail by in the head skb
1239          * it is used to calculate the size of each chained skb below
1240          * it is also used to index into bufline to continue the copy
1241          * (for chansocktwopc)
1242          * if necessary chain the rcv skbs together.
1243          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1244          * chain the rest to that one.
1245          * - do PRECAUTIONARY check
1246          */
1247         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1248                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1249                         dev_err(&devdata->netdev->dev, "repost_return failed");
1250                 return rx_count;
1251         }
1252
1253         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1254                 /* chain the various rcv buffers into the skb's frag_list. */
1255                 /* Note: off was initialized above  */
1256                 for (cc = 1, prev = NULL;
1257                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1258                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1259                         curr->next = NULL;
1260                         if (!prev)      /* start of list- set head */
1261                                 skb_shinfo(skb)->frag_list = curr;
1262                         else
1263                                 prev->next = curr;
1264                         prev = curr;
1265
1266                         /* should we set skb->len and skb->data_len for each
1267                          * buffer being chained??? can't hurt!
1268                          */
1269                         currsize = min(skb->len - off,
1270                                        (unsigned int)RCVPOST_BUF_SIZE);
1271                         curr->len = currsize;
1272                         curr->tail += currsize;
1273                         curr->data_len = 0;
1274                         off += currsize;
1275                 }
1276                 /* assert skb->len == off */
1277                 if (skb->len != off) {
1278                         netdev_err(devdata->netdev,
1279                                    "something wrong; skb->len:%d != off:%d\n",
1280                                    skb->len, off);
1281                 }
1282         }
1283
1284         /* set up packet's protocl type using ethernet header - this
1285          * sets up skb->pkt_type & it also PULLS out the eth header
1286          */
1287         skb->protocol = eth_type_trans(skb, netdev);
1288
1289         eth = eth_hdr(skb);
1290
1291         skb->csum = 0;
1292         skb->ip_summed = CHECKSUM_NONE;
1293
1294         do {
1295                 if (netdev->flags & IFF_PROMISC)
1296                         break;  /* accept all packets */
1297                 if (skb->pkt_type == PACKET_BROADCAST) {
1298                         if (netdev->flags & IFF_BROADCAST)
1299                                 break;  /* accept all broadcast packets */
1300                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1301                         if ((netdev->flags & IFF_MULTICAST) &&
1302                             (netdev_mc_count(netdev))) {
1303                                 struct netdev_hw_addr *ha;
1304                                 int found_mc = 0;
1305
1306                                 /* only accept multicast packets that we can
1307                                  * find in our multicast address list
1308                                  */
1309                                 netdev_for_each_mc_addr(ha, netdev) {
1310                                         if (ether_addr_equal(eth->h_dest,
1311                                                              ha->addr)) {
1312                                                 found_mc = 1;
1313                                                 break;
1314                                         }
1315                                 }
1316                                 if (found_mc)
1317                                         break;  /* accept packet, dest
1318                                                    matches a multicast
1319                                                    address */
1320                         }
1321                 } else if (skb->pkt_type == PACKET_HOST) {
1322                         break;  /* accept packet, h_dest must match vnic
1323                                    mac address */
1324                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1325                         /* something is not right */
1326                         dev_err(&devdata->netdev->dev,
1327                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1328                                 netdev->name, eth->h_dest, netdev->dev_addr);
1329                 }
1330                 /* drop packet - don't forward it up to OS */
1331                 devdata->n_rcv_packets_not_accepted++;
1332                 repost_return(cmdrsp, devdata, skb, netdev);
1333                 return rx_count;
1334         } while (0);
1335
1336         rx_count++;
1337         netif_receive_skb(skb);
1338         /* netif_rx returns various values, but "in practice most drivers
1339          * ignore the return value
1340          */
1341
1342         skb = NULL;
1343         /*
1344          * whether the packet got dropped or handled, the skb is freed by
1345          * kernel code, so we shouldn't free it. but we should repost a
1346          * new rcv buffer.
1347          */
1348         repost_return(cmdrsp, devdata, skb, netdev);
1349         return rx_count;
1350 }
1351
1352 /**
1353  *      devdata_initialize      - Initialize devdata structure
1354  *      @devdata: visornic_devdata structure to initialize
1355  *      #dev: visorbus_deviced it belongs to
1356  *
1357  *      Setup initial values for the visornic based on channel and default
1358  *      values.
1359  *      Returns a pointer to the devdata if successful, else NULL
1360  */
1361 static struct visornic_devdata *
1362 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1363 {
1364         if (!devdata)
1365                 return NULL;
1366         memset(devdata, '\0', sizeof(struct visornic_devdata));
1367         devdata->dev = dev;
1368         devdata->incarnation_id = get_jiffies_64();
1369         return devdata;
1370 }
1371
1372 /**
1373  *      devdata_release - Frees up references in devdata
1374  *      @devdata: struct to clean up
1375  *
1376  *      Frees up references in devdata.
1377  *      Returns void
1378  */
1379 static void devdata_release(struct visornic_devdata *devdata)
1380 {
1381         kfree(devdata->rcvbuf);
1382         kfree(devdata->cmdrsp_rcv);
1383         kfree(devdata->xmit_cmdrsp);
1384 }
1385
1386 static const struct net_device_ops visornic_dev_ops = {
1387         .ndo_open = visornic_open,
1388         .ndo_stop = visornic_close,
1389         .ndo_start_xmit = visornic_xmit,
1390         .ndo_get_stats = visornic_get_stats,
1391         .ndo_change_mtu = visornic_change_mtu,
1392         .ndo_tx_timeout = visornic_xmit_timeout,
1393         .ndo_set_rx_mode = visornic_set_multi,
1394 };
1395
1396 /* DebugFS code */
1397 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1398                                  size_t len, loff_t *offset)
1399 {
1400         ssize_t bytes_read = 0;
1401         int str_pos = 0;
1402         struct visornic_devdata *devdata;
1403         struct net_device *dev;
1404         char *vbuf;
1405
1406         if (len > MAX_BUF)
1407                 len = MAX_BUF;
1408         vbuf = kzalloc(len, GFP_KERNEL);
1409         if (!vbuf)
1410                 return -ENOMEM;
1411
1412         /* for each vnic channel
1413          * dump out channel specific data
1414          */
1415         rcu_read_lock();
1416         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1417                 /*
1418                  * Only consider netdevs that are visornic, and are open
1419                  */
1420                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1421                     (!netif_queue_stopped(dev)))
1422                         continue;
1423
1424                 devdata = netdev_priv(dev);
1425                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1426                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1427                                      dev->name,
1428                                      dev,
1429                                      dev->dev_addr);
1430                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1431                                      "VisorNic Dev Info = 0x%p\n", devdata);
1432                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1433                                      " num_rcv_bufs = %d\n",
1434                                      devdata->num_rcv_bufs);
1435                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1436                                      " max_oustanding_next_xmits = %lu\n",
1437                                     devdata->max_outstanding_net_xmits);
1438                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1439                                      " upper_threshold_net_xmits = %lu\n",
1440                                      devdata->upper_threshold_net_xmits);
1441                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1442                                      " lower_threshold_net_xmits = %lu\n",
1443                                      devdata->lower_threshold_net_xmits);
1444                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1445                                      " queuefullmsg_logged = %d\n",
1446                                      devdata->queuefullmsg_logged);
1447                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448                                      " chstat.got_rcv = %lu\n",
1449                                      devdata->chstat.got_rcv);
1450                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451                                      " chstat.got_enbdisack = %lu\n",
1452                                      devdata->chstat.got_enbdisack);
1453                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1454                                      " chstat.got_xmit_done = %lu\n",
1455                                      devdata->chstat.got_xmit_done);
1456                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457                                      " chstat.xmit_fail = %lu\n",
1458                                      devdata->chstat.xmit_fail);
1459                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460                                      " chstat.sent_enbdis = %lu\n",
1461                                      devdata->chstat.sent_enbdis);
1462                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463                                      " chstat.sent_promisc = %lu\n",
1464                                      devdata->chstat.sent_promisc);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      " chstat.sent_post = %lu\n",
1467                                      devdata->chstat.sent_post);
1468                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469                                      " chstat.sent_post_failed = %lu\n",
1470                                      devdata->chstat.sent_post_failed);
1471                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472                                      " chstat.sent_xmit = %lu\n",
1473                                      devdata->chstat.sent_xmit);
1474                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475                                      " chstat.reject_count = %lu\n",
1476                                      devdata->chstat.reject_count);
1477                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478                                      " chstat.extra_rcvbufs_sent = %lu\n",
1479                                      devdata->chstat.extra_rcvbufs_sent);
1480                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1482                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1484                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1486                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1488                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                      " num_rcvbuf_in_iovm = %d\n",
1490                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1491                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1493                                      devdata->alloc_failed_in_if_needed_cnt);
1494                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1496                                      devdata->alloc_failed_in_repost_rtn_cnt);
1497                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1499                  *                   devdata->inner_loop_limit_reached_cnt);
1500                  */
1501                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502                                      " found_repost_rcvbuf_cnt = %lu\n",
1503                                      devdata->found_repost_rcvbuf_cnt);
1504                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505                                      " repost_found_skb_cnt = %lu\n",
1506                                      devdata->repost_found_skb_cnt);
1507                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1508                                      " n_repost_deficit = %lu\n",
1509                                      devdata->n_repost_deficit);
1510                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                      " bad_rcv_buf = %lu\n",
1512                                      devdata->bad_rcv_buf);
1513                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514                                      " n_rcv_packets_not_accepted = %lu\n",
1515                                      devdata->n_rcv_packets_not_accepted);
1516                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1517                                      " interrupts_rcvd = %llu\n",
1518                                      devdata->interrupts_rcvd);
1519                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520                                      " interrupts_notme = %llu\n",
1521                                      devdata->interrupts_notme);
1522                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1523                                      " interrupts_disabled = %llu\n",
1524                                      devdata->interrupts_disabled);
1525                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1526                                      " busy_cnt = %llu\n",
1527                                      devdata->busy_cnt);
1528                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1529                                      " flow_control_upper_hits = %llu\n",
1530                                      devdata->flow_control_upper_hits);
1531                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1532                                      " flow_control_lower_hits = %llu\n",
1533                                      devdata->flow_control_lower_hits);
1534                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535                                      " netif_queue = %s\n",
1536                                      netif_queue_stopped(devdata->netdev) ?
1537                                      "stopped" : "running");
1538                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1539                                      " xmits_outstanding = %lu\n",
1540                                      devdata_xmits_outstanding(devdata));
1541         }
1542         rcu_read_unlock();
1543         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1544         kfree(vbuf);
1545         return bytes_read;
1546 }
1547
1548 /**
1549  *      send_rcv_posts_if_needed
1550  *      @devdata: visornic device
1551  *
1552  *      Send receive buffers to the IO Partition.
1553  *      Returns void
1554  */
1555 static void
1556 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1557 {
1558         int i;
1559         struct net_device *netdev;
1560         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1561         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1562
1563         /* don't do this until vnic is marked ready */
1564         if (!(devdata->enabled && devdata->enab_dis_acked))
1565                 return;
1566
1567         netdev = devdata->netdev;
1568         rcv_bufs_allocated = 0;
1569         /* this code is trying to prevent getting stuck here forever,
1570          * but still retry it if you cant allocate them all this time.
1571          */
1572         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1573         while (cur_num_rcv_bufs_to_alloc > 0) {
1574                 cur_num_rcv_bufs_to_alloc--;
1575                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1576                         if (devdata->rcvbuf[i])
1577                                 continue;
1578                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1579                         if (!devdata->rcvbuf[i]) {
1580                                 devdata->alloc_failed_in_if_needed_cnt++;
1581                                 break;
1582                         }
1583                         rcv_bufs_allocated++;
1584                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1585                         devdata->chstat.extra_rcvbufs_sent++;
1586                 }
1587         }
1588         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1589 }
1590
1591 /**
1592  *      drain_resp_queue  - drains and ignores all messages from the resp queue
1593  *      @cmdrsp: io channel command response message
1594  *      @devdata: visornic device to drain
1595  */
1596 static void
1597 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1598 {
1599         while (visorchannel_signalremove(devdata->dev->visorchannel,
1600                                          IOCHAN_FROM_IOPART,
1601                                          cmdrsp))
1602                 ;
1603 }
1604
1605 /**
1606  *      service_resp_queue      - drains the response queue
1607  *      @cmdrsp: io channel command response message
1608  *      @devdata: visornic device to drain
1609  *
1610  *      Drain the respones queue of any responses from the IO partition.
1611  *      Process the responses as we get them.
1612  *      Returns when response queue is empty or when the threadd stops.
1613  */
1614 static void
1615 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1616                    int *rx_work_done)
1617 {
1618         unsigned long flags;
1619         struct net_device *netdev;
1620
1621         /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1622          * moment */
1623         for (;;) {
1624                 if (!visorchannel_signalremove(devdata->dev->visorchannel,
1625                                                IOCHAN_FROM_IOPART,
1626                                                cmdrsp))
1627                         break; /* queue empty */
1628
1629                 switch (cmdrsp->net.type) {
1630                 case NET_RCV:
1631                         devdata->chstat.got_rcv++;
1632                         /* process incoming packet */
1633                         *rx_work_done += visornic_rx(cmdrsp);
1634                         break;
1635                 case NET_XMIT_DONE:
1636                         spin_lock_irqsave(&devdata->priv_lock, flags);
1637                         devdata->chstat.got_xmit_done++;
1638                         if (cmdrsp->net.xmtdone.xmt_done_result)
1639                                 devdata->chstat.xmit_fail++;
1640                         /* only call queue wake if we stopped it */
1641                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1642                         /* ASSERT netdev == vnicinfo->netdev; */
1643                         if ((netdev == devdata->netdev) &&
1644                             netif_queue_stopped(netdev)) {
1645                                 /* check to see if we have crossed
1646                                  * the lower watermark for
1647                                  * netif_wake_queue()
1648                                  */
1649                                 if (vnic_hit_low_watermark(devdata,
1650                                         devdata->lower_threshold_net_xmits)) {
1651                                         /* enough NET_XMITs completed
1652                                          * so can restart netif queue
1653                                          */
1654                                         netif_wake_queue(netdev);
1655                                         devdata->flow_control_lower_hits++;
1656                                 }
1657                         }
1658                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1659                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1660                         kfree_skb(cmdrsp->net.buf);
1661                         break;
1662                 case NET_RCV_ENBDIS_ACK:
1663                         devdata->chstat.got_enbdisack++;
1664                         netdev = (struct net_device *)
1665                         cmdrsp->net.enbdis.context;
1666                         spin_lock_irqsave(&devdata->priv_lock, flags);
1667                         devdata->enab_dis_acked = 1;
1668                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1669
1670                         if (devdata->server_down &&
1671                             devdata->server_change_state) {
1672                                 /* Inform Linux that the link is up */
1673                                 devdata->server_down = false;
1674                                 devdata->server_change_state = false;
1675                                 netif_wake_queue(netdev);
1676                                 netif_carrier_on(netdev);
1677                         }
1678                         break;
1679                 case NET_CONNECT_STATUS:
1680                         netdev = devdata->netdev;
1681                         if (cmdrsp->net.enbdis.enable == 1) {
1682                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1683                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1684                                 spin_unlock_irqrestore(&devdata->priv_lock,
1685                                                        flags);
1686                                 netif_wake_queue(netdev);
1687                                 netif_carrier_on(netdev);
1688                         } else {
1689                                 netif_stop_queue(netdev);
1690                                 netif_carrier_off(netdev);
1691                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1692                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1693                                 spin_unlock_irqrestore(&devdata->priv_lock,
1694                                                        flags);
1695                         }
1696                         break;
1697                 default:
1698                         break;
1699                 }
1700                 /* cmdrsp is now available for reuse  */
1701         }
1702 }
1703
1704 static int visornic_poll(struct napi_struct *napi, int budget)
1705 {
1706         struct visornic_devdata *devdata = container_of(napi,
1707                                                         struct visornic_devdata,
1708                                                         napi);
1709         int rx_count = 0;
1710
1711         send_rcv_posts_if_needed(devdata);
1712         service_resp_queue(devdata->cmdrsp, devdata, &rx_count);
1713
1714         /*
1715          * If there aren't any more packets to receive
1716          * stop the poll
1717          */
1718         if (rx_count < budget)
1719                 napi_complete(napi);
1720
1721         return rx_count;
1722 }
1723
1724 /**
1725  *      poll_for_irq    - Checks the status of the response queue.
1726  *      @v: void pointer to the visronic devdata
1727  *
1728  *      Main function of the vnic_incoming thread. Peridocially check the
1729  *      response queue and drain it if needed.
1730  *      Returns when thread has stopped.
1731  */
1732 static void
1733 poll_for_irq(unsigned long v)
1734 {
1735         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1736
1737         if (!visorchannel_signalempty(
1738                                    devdata->dev->visorchannel,
1739                                    IOCHAN_FROM_IOPART))
1740                 napi_schedule(&devdata->napi);
1741
1742         atomic_set(&devdata->interrupt_rcvd, 0);
1743
1744         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1745
1746 }
1747
1748 /**
1749  *      visornic_probe  - probe function for visornic devices
1750  *      @dev: The visor device discovered
1751  *
1752  *      Called when visorbus discovers a visornic device on its
1753  *      bus. It creates a new visornic ethernet adapter.
1754  *      Returns 0 or negative for error.
1755  */
1756 static int visornic_probe(struct visor_device *dev)
1757 {
1758         struct visornic_devdata *devdata = NULL;
1759         struct net_device *netdev = NULL;
1760         int err;
1761         int channel_offset = 0;
1762         u64 features;
1763
1764         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1765         if (!netdev) {
1766                 dev_err(&dev->device,
1767                         "%s alloc_etherdev failed\n", __func__);
1768                 return -ENOMEM;
1769         }
1770
1771         netdev->netdev_ops = &visornic_dev_ops;
1772         netdev->watchdog_timeo = (5 * HZ);
1773         SET_NETDEV_DEV(netdev, &dev->device);
1774
1775         /* Get MAC adddress from channel and read it into the device. */
1776         netdev->addr_len = ETH_ALEN;
1777         channel_offset = offsetof(struct spar_io_channel_protocol,
1778                                   vnic.macaddr);
1779         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1780                                     ETH_ALEN);
1781         if (err < 0) {
1782                 dev_err(&dev->device,
1783                         "%s failed to get mac addr from chan (%d)\n",
1784                         __func__, err);
1785                 goto cleanup_netdev;
1786         }
1787
1788         devdata = devdata_initialize(netdev_priv(netdev), dev);
1789         if (!devdata) {
1790                 dev_err(&dev->device,
1791                         "%s devdata_initialize failed\n", __func__);
1792                 err = -ENOMEM;
1793                 goto cleanup_netdev;
1794         }
1795         /* don't trust messages laying around in the channel */
1796         drain_resp_queue(devdata->cmdrsp, devdata);
1797
1798         devdata->netdev = netdev;
1799         dev_set_drvdata(&dev->device, devdata);
1800         init_waitqueue_head(&devdata->rsp_queue);
1801         spin_lock_init(&devdata->priv_lock);
1802         devdata->enabled = 0; /* not yet */
1803         atomic_set(&devdata->usage, 1);
1804
1805         /* Setup rcv bufs */
1806         channel_offset = offsetof(struct spar_io_channel_protocol,
1807                                   vnic.num_rcv_bufs);
1808         err = visorbus_read_channel(dev, channel_offset,
1809                                     &devdata->num_rcv_bufs, 4);
1810         if (err) {
1811                 dev_err(&dev->device,
1812                         "%s failed to get #rcv bufs from chan (%d)\n",
1813                         __func__, err);
1814                 goto cleanup_netdev;
1815         }
1816
1817         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1818                                   sizeof(struct sk_buff *), GFP_KERNEL);
1819         if (!devdata->rcvbuf) {
1820                 err = -ENOMEM;
1821                 goto cleanup_rcvbuf;
1822         }
1823
1824         /* set the net_xmit outstanding threshold */
1825         /* always leave two slots open but you should have 3 at a minimum */
1826         /* note that max_outstanding_net_xmits must be > 0 */
1827         devdata->max_outstanding_net_xmits =
1828                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1829         devdata->upper_threshold_net_xmits =
1830                 max_t(unsigned long,
1831                       2, (devdata->max_outstanding_net_xmits - 1));
1832         devdata->lower_threshold_net_xmits =
1833                 max_t(unsigned long,
1834                       1, (devdata->max_outstanding_net_xmits / 2));
1835
1836         skb_queue_head_init(&devdata->xmitbufhead);
1837
1838         /* create a cmdrsp we can use to post and unpost rcv buffers */
1839         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1840         if (!devdata->cmdrsp_rcv) {
1841                 err = -ENOMEM;
1842                 goto cleanup_cmdrsp_rcv;
1843         }
1844         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1845         if (!devdata->xmit_cmdrsp) {
1846                 err = -ENOMEM;
1847                 goto cleanup_xmit_cmdrsp;
1848         }
1849         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1850         devdata->server_down = false;
1851         devdata->server_change_state = false;
1852
1853         /*set the default mtu */
1854         channel_offset = offsetof(struct spar_io_channel_protocol,
1855                                   vnic.mtu);
1856         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1857         if (err) {
1858                 dev_err(&dev->device,
1859                         "%s failed to get mtu from chan (%d)\n",
1860                         __func__, err);
1861                 goto cleanup_xmit_cmdrsp;
1862         }
1863
1864         /* TODO: Setup Interrupt information */
1865         /* Let's start our threads to get responses */
1866         netif_napi_add(netdev, &devdata->napi, visornic_poll, 64);
1867
1868         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1869                     (unsigned long)devdata);
1870         /*
1871          * Note: This time has to start running before the while
1872          * loop below because the napi routine is responsible for
1873          * setting enab_dis_acked
1874          */
1875         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1876
1877         channel_offset = offsetof(struct spar_io_channel_protocol,
1878                                   channel_header.features);
1879         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1880         if (err) {
1881                 dev_err(&dev->device,
1882                         "%s failed to get features from chan (%d)\n",
1883                         __func__, err);
1884                 goto cleanup_napi_add;
1885         }
1886
1887         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1888         features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1889         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1890         if (err) {
1891                 dev_err(&dev->device,
1892                         "%s failed to set features in chan (%d)\n",
1893                         __func__, err);
1894                 goto cleanup_napi_add;
1895         }
1896
1897         err = register_netdev(netdev);
1898         if (err) {
1899                 dev_err(&dev->device,
1900                         "%s register_netdev failed (%d)\n", __func__, err);
1901                 goto cleanup_napi_add;
1902         }
1903
1904         /* create debgug/sysfs directories */
1905         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1906                                                       visornic_debugfs_dir);
1907         if (!devdata->eth_debugfs_dir) {
1908                 dev_err(&dev->device,
1909                         "%s debugfs_create_dir %s failed\n",
1910                         __func__, netdev->name);
1911                 err = -ENOMEM;
1912                 goto cleanup_register_netdev;
1913         }
1914
1915         dev_info(&dev->device, "%s success netdev=%s\n",
1916                  __func__, netdev->name);
1917         return 0;
1918
1919 cleanup_register_netdev:
1920         unregister_netdev(netdev);
1921
1922 cleanup_napi_add:
1923         del_timer_sync(&devdata->irq_poll_timer);
1924         netif_napi_del(&devdata->napi);
1925
1926 cleanup_xmit_cmdrsp:
1927         kfree(devdata->xmit_cmdrsp);
1928
1929 cleanup_cmdrsp_rcv:
1930         kfree(devdata->cmdrsp_rcv);
1931
1932 cleanup_rcvbuf:
1933         kfree(devdata->rcvbuf);
1934
1935 cleanup_netdev:
1936         free_netdev(netdev);
1937         return err;
1938 }
1939
1940 /**
1941  *      host_side_disappeared   - IO part is gone.
1942  *      @devdata: device object
1943  *
1944  *      IO partition servicing this device is gone, do cleanup
1945  *      Returns void.
1946  */
1947 static void host_side_disappeared(struct visornic_devdata *devdata)
1948 {
1949         unsigned long flags;
1950
1951         spin_lock_irqsave(&devdata->priv_lock, flags);
1952         devdata->dev = NULL;   /* indicate device destroyed */
1953         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1954 }
1955
1956 /**
1957  *      visornic_remove         - Called when visornic dev goes away
1958  *      @dev: visornic device that is being removed
1959  *
1960  *      Called when DEVICE_DESTROY gets called to remove device.
1961  *      Returns void
1962  */
1963 static void visornic_remove(struct visor_device *dev)
1964 {
1965         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1966         struct net_device *netdev;
1967         unsigned long flags;
1968
1969         if (!devdata) {
1970                 dev_err(&dev->device, "%s no devdata\n", __func__);
1971                 return;
1972         }
1973         spin_lock_irqsave(&devdata->priv_lock, flags);
1974         if (devdata->going_away) {
1975                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1976                 dev_err(&dev->device, "%s already being removed\n", __func__);
1977                 return;
1978         }
1979         devdata->going_away = true;
1980         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1981         netdev = devdata->netdev;
1982         if (!netdev) {
1983                 dev_err(&dev->device, "%s not net device\n", __func__);
1984                 return;
1985         }
1986
1987         /* going_away prevents new items being added to the workqueues */
1988         flush_workqueue(visornic_timeout_reset_workqueue);
1989
1990         debugfs_remove_recursive(devdata->eth_debugfs_dir);
1991
1992         unregister_netdev(netdev);  /* this will call visornic_close() */
1993
1994         del_timer_sync(&devdata->irq_poll_timer);
1995         netif_napi_del(&devdata->napi);
1996
1997         dev_set_drvdata(&dev->device, NULL);
1998         host_side_disappeared(devdata);
1999         devdata_release(devdata);
2000         free_netdev(netdev);
2001 }
2002
2003 /**
2004  *      visornic_pause          - Called when IO Part disappears
2005  *      @dev: visornic device that is being serviced
2006  *      @complete_func: call when finished.
2007  *
2008  *      Called when the IO Partition has gone down. Need to free
2009  *      up resources and wait for IO partition to come back. Mark
2010  *      link as down and don't attempt any DMA. When we have freed
2011  *      memory call the complete_func so that Command knows we are
2012  *      done. If we don't call complete_func, IO part will never
2013  *      come back.
2014  *      Returns 0 for success.
2015  */
2016 static int visornic_pause(struct visor_device *dev,
2017                           visorbus_state_complete_func complete_func)
2018 {
2019         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2020
2021         visornic_serverdown(devdata, complete_func);
2022         return 0;
2023 }
2024
2025 /**
2026  *      visornic_resume         - Called when IO part has recovered
2027  *      @dev: visornic device that is being serviced
2028  *      @compelte_func: call when finished
2029  *
2030  *      Called when the IO partition has recovered. Reestablish
2031  *      connection to the IO part and set the link up. Okay to do
2032  *      DMA again.
2033  *      Returns 0 for success.
2034  */
2035 static int visornic_resume(struct visor_device *dev,
2036                            visorbus_state_complete_func complete_func)
2037 {
2038         struct visornic_devdata *devdata;
2039         struct net_device *netdev;
2040         unsigned long flags;
2041
2042         devdata = dev_get_drvdata(&dev->device);
2043         if (!devdata) {
2044                 dev_err(&dev->device, "%s no devdata\n", __func__);
2045                 return -EINVAL;
2046         }
2047
2048         netdev = devdata->netdev;
2049
2050         spin_lock_irqsave(&devdata->priv_lock, flags);
2051         if (devdata->server_change_state) {
2052                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2053                 dev_err(&dev->device, "%s server already changing state\n",
2054                         __func__);
2055                 return -EINVAL;
2056         }
2057         if (!devdata->server_down) {
2058                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2059                 dev_err(&dev->device, "%s server not down\n", __func__);
2060                 complete_func(dev, 0);
2061                 return 0;
2062         }
2063         devdata->server_change_state = true;
2064         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2065
2066         /* Must transition channel to ATTACHED state BEFORE
2067          * we can start using the device again.
2068          * TODO: State transitions
2069          */
2070         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2071
2072         init_rcv_bufs(netdev, devdata);
2073
2074         rtnl_lock();
2075         dev_open(netdev);
2076         rtnl_unlock();
2077
2078         complete_func(dev, 0);
2079         return 0;
2080 }
2081
2082 /**
2083  *      visornic_init   - Init function
2084  *
2085  *      Init function for the visornic driver. Do initial driver setup
2086  *      and wait for devices.
2087  *      Returns 0 for success, negative for error.
2088  */
2089 static int visornic_init(void)
2090 {
2091         struct dentry *ret;
2092         int err = -ENOMEM;
2093
2094         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2095         if (!visornic_debugfs_dir)
2096                 return err;
2097
2098         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2099                                   &debugfs_info_fops);
2100         if (!ret)
2101                 goto cleanup_debugfs;
2102         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2103                                   NULL, &debugfs_enable_ints_fops);
2104         if (!ret)
2105                 goto cleanup_debugfs;
2106
2107         /* create workqueue for tx timeout reset */
2108         visornic_timeout_reset_workqueue =
2109                 create_singlethread_workqueue("visornic_timeout_reset");
2110         if (!visornic_timeout_reset_workqueue)
2111                 goto cleanup_workqueue;
2112
2113         err = visorbus_register_visor_driver(&visornic_driver);
2114         if (!err)
2115                 return 0;
2116
2117 cleanup_workqueue:
2118         if (visornic_timeout_reset_workqueue) {
2119                 flush_workqueue(visornic_timeout_reset_workqueue);
2120                 destroy_workqueue(visornic_timeout_reset_workqueue);
2121         }
2122 cleanup_debugfs:
2123         debugfs_remove_recursive(visornic_debugfs_dir);
2124
2125         return err;
2126 }
2127
2128 /**
2129  *      visornic_cleanup        - driver exit routine
2130  *
2131  *      Unregister driver from the bus and free up memory.
2132  */
2133 static void visornic_cleanup(void)
2134 {
2135         visorbus_unregister_visor_driver(&visornic_driver);
2136
2137         if (visornic_timeout_reset_workqueue) {
2138                 flush_workqueue(visornic_timeout_reset_workqueue);
2139                 destroy_workqueue(visornic_timeout_reset_workqueue);
2140         }
2141         debugfs_remove_recursive(visornic_debugfs_dir);
2142 }
2143
2144 module_init(visornic_init);
2145 module_exit(visornic_cleanup);
2146
2147 MODULE_AUTHOR("Unisys");
2148 MODULE_LICENSE("GPL");
2149 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2150 MODULE_VERSION("1.0.0.0");