Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / hv / ring_buffer.c
1 /*
2  *
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16  * Place - Suite 330, Boston, MA 02111-1307 USA.
17  *
18  * Authors:
19  *   Haiyang Zhang <haiyangz@microsoft.com>
20  *   Hank Janssen  <hjanssen@microsoft.com>
21  *   K. Y. Srinivasan <kys@microsoft.com>
22  *
23  */
24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26 #include <linux/kernel.h>
27 #include <linux/mm.h>
28 #include <linux/hyperv.h>
29 #include <linux/uio.h>
30
31 #include "hyperv_vmbus.h"
32
33 void hv_begin_read(struct hv_ring_buffer_info *rbi)
34 {
35         rbi->ring_buffer->interrupt_mask = 1;
36         mb();
37 }
38
39 u32 hv_end_read(struct hv_ring_buffer_info *rbi)
40 {
41         u32 read;
42         u32 write;
43
44         rbi->ring_buffer->interrupt_mask = 0;
45         mb();
46
47         /*
48          * Now check to see if the ring buffer is still empty.
49          * If it is not, we raced and we need to process new
50          * incoming messages.
51          */
52         hv_get_ringbuffer_availbytes(rbi, &read, &write);
53
54         return read;
55 }
56
57 /*
58  * When we write to the ring buffer, check if the host needs to
59  * be signaled. Here is the details of this protocol:
60  *
61  *      1. The host guarantees that while it is draining the
62  *         ring buffer, it will set the interrupt_mask to
63  *         indicate it does not need to be interrupted when
64  *         new data is placed.
65  *
66  *      2. The host guarantees that it will completely drain
67  *         the ring buffer before exiting the read loop. Further,
68  *         once the ring buffer is empty, it will clear the
69  *         interrupt_mask and re-check to see if new data has
70  *         arrived.
71  */
72
73 static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
74 {
75         mb();
76         if (rbi->ring_buffer->interrupt_mask)
77                 return false;
78
79         /* check interrupt_mask before read_index */
80         rmb();
81         /*
82          * This is the only case we need to signal when the
83          * ring transitions from being empty to non-empty.
84          */
85         if (old_write == rbi->ring_buffer->read_index)
86                 return true;
87
88         return false;
89 }
90
91 /*
92  * To optimize the flow management on the send-side,
93  * when the sender is blocked because of lack of
94  * sufficient space in the ring buffer, potential the
95  * consumer of the ring buffer can signal the producer.
96  * This is controlled by the following parameters:
97  *
98  * 1. pending_send_sz: This is the size in bytes that the
99  *    producer is trying to send.
100  * 2. The feature bit feat_pending_send_sz set to indicate if
101  *    the consumer of the ring will signal when the ring
102  *    state transitions from being full to a state where
103  *    there is room for the producer to send the pending packet.
104  */
105
106 static bool hv_need_to_signal_on_read(u32 old_rd,
107                                          struct hv_ring_buffer_info *rbi)
108 {
109         u32 prev_write_sz;
110         u32 cur_write_sz;
111         u32 r_size;
112         u32 write_loc = rbi->ring_buffer->write_index;
113         u32 read_loc = rbi->ring_buffer->read_index;
114         u32 pending_sz = rbi->ring_buffer->pending_send_sz;
115
116         /*
117          * If the other end is not blocked on write don't bother.
118          */
119         if (pending_sz == 0)
120                 return false;
121
122         r_size = rbi->ring_datasize;
123         cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) :
124                         read_loc - write_loc;
125
126         prev_write_sz = write_loc >= old_rd ? r_size - (write_loc - old_rd) :
127                         old_rd - write_loc;
128
129
130         if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz))
131                 return true;
132
133         return false;
134 }
135
136 /*
137  * hv_get_next_write_location()
138  *
139  * Get the next write location for the specified ring buffer
140  *
141  */
142 static inline u32
143 hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
144 {
145         u32 next = ring_info->ring_buffer->write_index;
146
147         return next;
148 }
149
150 /*
151  * hv_set_next_write_location()
152  *
153  * Set the next write location for the specified ring buffer
154  *
155  */
156 static inline void
157 hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
158                      u32 next_write_location)
159 {
160         ring_info->ring_buffer->write_index = next_write_location;
161 }
162
163 /*
164  * hv_get_next_read_location()
165  *
166  * Get the next read location for the specified ring buffer
167  */
168 static inline u32
169 hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
170 {
171         u32 next = ring_info->ring_buffer->read_index;
172
173         return next;
174 }
175
176 /*
177  * hv_get_next_readlocation_withoffset()
178  *
179  * Get the next read location + offset for the specified ring buffer.
180  * This allows the caller to skip
181  */
182 static inline u32
183 hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
184                                  u32 offset)
185 {
186         u32 next = ring_info->ring_buffer->read_index;
187
188         next += offset;
189         next %= ring_info->ring_datasize;
190
191         return next;
192 }
193
194 /*
195  *
196  * hv_set_next_read_location()
197  *
198  * Set the next read location for the specified ring buffer
199  *
200  */
201 static inline void
202 hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
203                     u32 next_read_location)
204 {
205         ring_info->ring_buffer->read_index = next_read_location;
206 }
207
208
209 /*
210  *
211  * hv_get_ring_buffer()
212  *
213  * Get the start of the ring buffer
214  */
215 static inline void *
216 hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
217 {
218         return (void *)ring_info->ring_buffer->buffer;
219 }
220
221
222 /*
223  *
224  * hv_get_ring_buffersize()
225  *
226  * Get the size of the ring buffer
227  */
228 static inline u32
229 hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
230 {
231         return ring_info->ring_datasize;
232 }
233
234 /*
235  *
236  * hv_get_ring_bufferindices()
237  *
238  * Get the read and write indices as u64 of the specified ring buffer
239  *
240  */
241 static inline u64
242 hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
243 {
244         return (u64)ring_info->ring_buffer->write_index << 32;
245 }
246
247 /*
248  *
249  * hv_copyfrom_ringbuffer()
250  *
251  * Helper routine to copy to source from ring buffer.
252  * Assume there is enough room. Handles wrap-around in src case only!!
253  *
254  */
255 static u32 hv_copyfrom_ringbuffer(
256         struct hv_ring_buffer_info      *ring_info,
257         void                            *dest,
258         u32                             destlen,
259         u32                             start_read_offset)
260 {
261         void *ring_buffer = hv_get_ring_buffer(ring_info);
262         u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
263
264         u32 frag_len;
265
266         /* wrap-around detected at the src */
267         if (destlen > ring_buffer_size - start_read_offset) {
268                 frag_len = ring_buffer_size - start_read_offset;
269
270                 memcpy(dest, ring_buffer + start_read_offset, frag_len);
271                 memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
272         } else
273
274                 memcpy(dest, ring_buffer + start_read_offset, destlen);
275
276
277         start_read_offset += destlen;
278         start_read_offset %= ring_buffer_size;
279
280         return start_read_offset;
281 }
282
283
284 /*
285  *
286  * hv_copyto_ringbuffer()
287  *
288  * Helper routine to copy from source to ring buffer.
289  * Assume there is enough room. Handles wrap-around in dest case only!!
290  *
291  */
292 static u32 hv_copyto_ringbuffer(
293         struct hv_ring_buffer_info      *ring_info,
294         u32                             start_write_offset,
295         void                            *src,
296         u32                             srclen)
297 {
298         void *ring_buffer = hv_get_ring_buffer(ring_info);
299         u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
300         u32 frag_len;
301
302         /* wrap-around detected! */
303         if (srclen > ring_buffer_size - start_write_offset) {
304                 frag_len = ring_buffer_size - start_write_offset;
305                 memcpy(ring_buffer + start_write_offset, src, frag_len);
306                 memcpy(ring_buffer, src + frag_len, srclen - frag_len);
307         } else
308                 memcpy(ring_buffer + start_write_offset, src, srclen);
309
310         start_write_offset += srclen;
311         start_write_offset %= ring_buffer_size;
312
313         return start_write_offset;
314 }
315
316 /*
317  *
318  * hv_ringbuffer_get_debuginfo()
319  *
320  * Get various debug metrics for the specified ring buffer
321  *
322  */
323 void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
324                             struct hv_ring_buffer_debug_info *debug_info)
325 {
326         u32 bytes_avail_towrite;
327         u32 bytes_avail_toread;
328
329         if (ring_info->ring_buffer) {
330                 hv_get_ringbuffer_availbytes(ring_info,
331                                         &bytes_avail_toread,
332                                         &bytes_avail_towrite);
333
334                 debug_info->bytes_avail_toread = bytes_avail_toread;
335                 debug_info->bytes_avail_towrite = bytes_avail_towrite;
336                 debug_info->current_read_index =
337                         ring_info->ring_buffer->read_index;
338                 debug_info->current_write_index =
339                         ring_info->ring_buffer->write_index;
340                 debug_info->current_interrupt_mask =
341                         ring_info->ring_buffer->interrupt_mask;
342         }
343 }
344
345 /*
346  *
347  * hv_ringbuffer_init()
348  *
349  *Initialize the ring buffer
350  *
351  */
352 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
353                    void *buffer, u32 buflen)
354 {
355         if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
356                 return -EINVAL;
357
358         memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
359
360         ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
361         ring_info->ring_buffer->read_index =
362                 ring_info->ring_buffer->write_index = 0;
363
364         /*
365          * Set the feature bit for enabling flow control.
366          */
367         ring_info->ring_buffer->feature_bits.value = 1;
368
369         ring_info->ring_size = buflen;
370         ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
371
372         spin_lock_init(&ring_info->ring_lock);
373
374         return 0;
375 }
376
377 /*
378  *
379  * hv_ringbuffer_cleanup()
380  *
381  * Cleanup the ring buffer
382  *
383  */
384 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
385 {
386 }
387
388 /*
389  *
390  * hv_ringbuffer_write()
391  *
392  * Write to the ring buffer
393  *
394  */
395 int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
396                     struct kvec *kv_list, u32 kv_count, bool *signal)
397 {
398         int i = 0;
399         u32 bytes_avail_towrite;
400         u32 bytes_avail_toread;
401         u32 totalbytes_towrite = 0;
402
403         u32 next_write_location;
404         u32 old_write;
405         u64 prev_indices = 0;
406         unsigned long flags;
407
408         for (i = 0; i < kv_count; i++)
409                 totalbytes_towrite += kv_list[i].iov_len;
410
411         totalbytes_towrite += sizeof(u64);
412
413         spin_lock_irqsave(&outring_info->ring_lock, flags);
414
415         hv_get_ringbuffer_availbytes(outring_info,
416                                 &bytes_avail_toread,
417                                 &bytes_avail_towrite);
418
419
420         /* If there is only room for the packet, assume it is full. */
421         /* Otherwise, the next time around, we think the ring buffer */
422         /* is empty since the read index == write index */
423         if (bytes_avail_towrite <= totalbytes_towrite) {
424                 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
425                 return -EAGAIN;
426         }
427
428         /* Write to the ring buffer */
429         next_write_location = hv_get_next_write_location(outring_info);
430
431         old_write = next_write_location;
432
433         for (i = 0; i < kv_count; i++) {
434                 next_write_location = hv_copyto_ringbuffer(outring_info,
435                                                      next_write_location,
436                                                      kv_list[i].iov_base,
437                                                      kv_list[i].iov_len);
438         }
439
440         /* Set previous packet start */
441         prev_indices = hv_get_ring_bufferindices(outring_info);
442
443         next_write_location = hv_copyto_ringbuffer(outring_info,
444                                              next_write_location,
445                                              &prev_indices,
446                                              sizeof(u64));
447
448         /* Issue a full memory barrier before updating the write index */
449         mb();
450
451         /* Now, update the write location */
452         hv_set_next_write_location(outring_info, next_write_location);
453
454
455         spin_unlock_irqrestore(&outring_info->ring_lock, flags);
456
457         *signal = hv_need_to_signal(old_write, outring_info);
458         return 0;
459 }
460
461
462 /*
463  *
464  * hv_ringbuffer_peek()
465  *
466  * Read without advancing the read index
467  *
468  */
469 int hv_ringbuffer_peek(struct hv_ring_buffer_info *Inring_info,
470                    void *Buffer, u32 buflen)
471 {
472         u32 bytes_avail_towrite;
473         u32 bytes_avail_toread;
474         u32 next_read_location = 0;
475         unsigned long flags;
476
477         spin_lock_irqsave(&Inring_info->ring_lock, flags);
478
479         hv_get_ringbuffer_availbytes(Inring_info,
480                                 &bytes_avail_toread,
481                                 &bytes_avail_towrite);
482
483         /* Make sure there is something to read */
484         if (bytes_avail_toread < buflen) {
485
486                 spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
487
488                 return -EAGAIN;
489         }
490
491         /* Convert to byte offset */
492         next_read_location = hv_get_next_read_location(Inring_info);
493
494         next_read_location = hv_copyfrom_ringbuffer(Inring_info,
495                                                 Buffer,
496                                                 buflen,
497                                                 next_read_location);
498
499         spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
500
501         return 0;
502 }
503
504
505 /*
506  *
507  * hv_ringbuffer_read()
508  *
509  * Read and advance the read index
510  *
511  */
512 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
513                    u32 buflen, u32 offset, bool *signal)
514 {
515         u32 bytes_avail_towrite;
516         u32 bytes_avail_toread;
517         u32 next_read_location = 0;
518         u64 prev_indices = 0;
519         unsigned long flags;
520         u32 old_read;
521
522         if (buflen <= 0)
523                 return -EINVAL;
524
525         spin_lock_irqsave(&inring_info->ring_lock, flags);
526
527         hv_get_ringbuffer_availbytes(inring_info,
528                                 &bytes_avail_toread,
529                                 &bytes_avail_towrite);
530
531         old_read = bytes_avail_toread;
532
533         /* Make sure there is something to read */
534         if (bytes_avail_toread < buflen) {
535                 spin_unlock_irqrestore(&inring_info->ring_lock, flags);
536
537                 return -EAGAIN;
538         }
539
540         next_read_location =
541                 hv_get_next_readlocation_withoffset(inring_info, offset);
542
543         next_read_location = hv_copyfrom_ringbuffer(inring_info,
544                                                 buffer,
545                                                 buflen,
546                                                 next_read_location);
547
548         next_read_location = hv_copyfrom_ringbuffer(inring_info,
549                                                 &prev_indices,
550                                                 sizeof(u64),
551                                                 next_read_location);
552
553         /* Make sure all reads are done before we update the read index since */
554         /* the writer may start writing to the read area once the read index */
555         /*is updated */
556         mb();
557
558         /* Update the read index */
559         hv_set_next_read_location(inring_info, next_read_location);
560
561         spin_unlock_irqrestore(&inring_info->ring_lock, flags);
562
563         *signal = hv_need_to_signal_on_read(old_read, inring_info);
564
565         return 0;
566 }