Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / sunrpc / xprtrdma / svc_rdma_marshal.c
1 /*
2  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the BSD-type
8  * license below:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  *      Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *
17  *      Redistributions in binary form must reproduce the above
18  *      copyright notice, this list of conditions and the following
19  *      disclaimer in the documentation and/or other materials provided
20  *      with the distribution.
21  *
22  *      Neither the name of the Network Appliance, Inc. nor the names of
23  *      its contributors may be used to endorse or promote products
24  *      derived from this software without specific prior written
25  *      permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * Author: Tom Tucker <tom@opengridcomputing.com>
40  */
41
42 #include <linux/sunrpc/xdr.h>
43 #include <linux/sunrpc/debug.h>
44 #include <asm/unaligned.h>
45 #include <linux/sunrpc/rpc_rdma.h>
46 #include <linux/sunrpc/svc_rdma.h>
47
48 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
49
50 /*
51  * Decodes a read chunk list. The expected format is as follows:
52  *    descrim  : xdr_one
53  *    position : u32 offset into XDR stream
54  *    handle   : u32 RKEY
55  *    . . .
56  *  end-of-list: xdr_zero
57  */
58 static u32 *decode_read_list(u32 *va, u32 *vaend)
59 {
60         struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61
62         while (ch->rc_discrim != xdr_zero) {
63                 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64                     (unsigned long)vaend) {
65                         dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66                         return NULL;
67                 }
68                 ch++;
69         }
70         return (u32 *)&ch->rc_position;
71 }
72
73 /*
74  * Decodes a write chunk list. The expected format is as follows:
75  *    descrim  : xdr_one
76  *    nchunks  : <count>
77  *       handle   : u32 RKEY              ---+
78  *       length   : u32 <len of segment>     |
79  *       offset   : remove va                + <count>
80  *       . . .                               |
81  *                                        ---+
82  */
83 static u32 *decode_write_list(u32 *va, u32 *vaend)
84 {
85         unsigned long start, end;
86         int nchunks;
87
88         struct rpcrdma_write_array *ary =
89                 (struct rpcrdma_write_array *)va;
90
91         /* Check for not write-array */
92         if (ary->wc_discrim == xdr_zero)
93                 return (u32 *)&ary->wc_nchunks;
94
95         if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
96             (unsigned long)vaend) {
97                 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98                 return NULL;
99         }
100         nchunks = ntohl(ary->wc_nchunks);
101
102         start = (unsigned long)&ary->wc_array[0];
103         end = (unsigned long)vaend;
104         if (nchunks < 0 ||
105             nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
106             (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
107                 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
108                         ary, nchunks, vaend);
109                 return NULL;
110         }
111         /*
112          * rs_length is the 2nd 4B field in wc_target and taking its
113          * address skips the list terminator
114          */
115         return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
116 }
117
118 static u32 *decode_reply_array(u32 *va, u32 *vaend)
119 {
120         unsigned long start, end;
121         int nchunks;
122         struct rpcrdma_write_array *ary =
123                 (struct rpcrdma_write_array *)va;
124
125         /* Check for no reply-array */
126         if (ary->wc_discrim == xdr_zero)
127                 return (u32 *)&ary->wc_nchunks;
128
129         if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130             (unsigned long)vaend) {
131                 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132                 return NULL;
133         }
134         nchunks = ntohl(ary->wc_nchunks);
135
136         start = (unsigned long)&ary->wc_array[0];
137         end = (unsigned long)vaend;
138         if (nchunks < 0 ||
139             nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
140             (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
141                 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
142                         ary, nchunks, vaend);
143                 return NULL;
144         }
145         return (u32 *)&ary->wc_array[nchunks];
146 }
147
148 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
149                             struct svc_rqst *rqstp)
150 {
151         struct rpcrdma_msg *rmsgp = NULL;
152         u32 *va;
153         u32 *vaend;
154         u32 hdr_len;
155
156         rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
157
158         /* Verify that there's enough bytes for header + something */
159         if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
160                 dprintk("svcrdma: header too short = %d\n",
161                         rqstp->rq_arg.len);
162                 return -EINVAL;
163         }
164
165         /* Decode the header */
166         rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
167         rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
168         rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
169         rmsgp->rm_type = ntohl(rmsgp->rm_type);
170
171         if (rmsgp->rm_vers != RPCRDMA_VERSION)
172                 return -ENOSYS;
173
174         /* Pull in the extra for the padded case and bump our pointer */
175         if (rmsgp->rm_type == RDMA_MSGP) {
176                 int hdrlen;
177                 rmsgp->rm_body.rm_padded.rm_align =
178                         ntohl(rmsgp->rm_body.rm_padded.rm_align);
179                 rmsgp->rm_body.rm_padded.rm_thresh =
180                         ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
181
182                 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
183                 rqstp->rq_arg.head[0].iov_base = va;
184                 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
185                 rqstp->rq_arg.head[0].iov_len -= hdrlen;
186                 if (hdrlen > rqstp->rq_arg.len)
187                         return -EINVAL;
188                 return hdrlen;
189         }
190
191         /* The chunk list may contain either a read chunk list or a write
192          * chunk list and a reply chunk list.
193          */
194         va = &rmsgp->rm_body.rm_chunks[0];
195         vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
196         va = decode_read_list(va, vaend);
197         if (!va)
198                 return -EINVAL;
199         va = decode_write_list(va, vaend);
200         if (!va)
201                 return -EINVAL;
202         va = decode_reply_array(va, vaend);
203         if (!va)
204                 return -EINVAL;
205
206         rqstp->rq_arg.head[0].iov_base = va;
207         hdr_len = (unsigned long)va - (unsigned long)rmsgp;
208         rqstp->rq_arg.head[0].iov_len -= hdr_len;
209
210         *rdma_req = rmsgp;
211         return hdr_len;
212 }
213
214 int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
215 {
216         struct rpcrdma_msg *rmsgp = NULL;
217         struct rpcrdma_read_chunk *ch;
218         struct rpcrdma_write_array *ary;
219         u32 *va;
220         u32 hdrlen;
221
222         dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
223                 rqstp);
224         rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
225
226         /* Pull in the extra for the padded case and bump our pointer */
227         if (rmsgp->rm_type == RDMA_MSGP) {
228                 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
229                 rqstp->rq_arg.head[0].iov_base = va;
230                 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
231                 rqstp->rq_arg.head[0].iov_len -= hdrlen;
232                 return hdrlen;
233         }
234
235         /*
236          * Skip all chunks to find RPC msg. These were previously processed
237          */
238         va = &rmsgp->rm_body.rm_chunks[0];
239
240         /* Skip read-list */
241         for (ch = (struct rpcrdma_read_chunk *)va;
242              ch->rc_discrim != xdr_zero; ch++);
243         va = (u32 *)&ch->rc_position;
244
245         /* Skip write-list */
246         ary = (struct rpcrdma_write_array *)va;
247         if (ary->wc_discrim == xdr_zero)
248                 va = (u32 *)&ary->wc_nchunks;
249         else
250                 /*
251                  * rs_length is the 2nd 4B field in wc_target and taking its
252                  * address skips the list terminator
253                  */
254                 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
255
256         /* Skip reply-array */
257         ary = (struct rpcrdma_write_array *)va;
258         if (ary->wc_discrim == xdr_zero)
259                 va = (u32 *)&ary->wc_nchunks;
260         else
261                 va = (u32 *)&ary->wc_array[ary->wc_nchunks];
262
263         rqstp->rq_arg.head[0].iov_base = va;
264         hdrlen = (unsigned long)va - (unsigned long)rmsgp;
265         rqstp->rq_arg.head[0].iov_len -= hdrlen;
266
267         return hdrlen;
268 }
269
270 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
271                               struct rpcrdma_msg *rmsgp,
272                               enum rpcrdma_errcode err, u32 *va)
273 {
274         u32 *startp = va;
275
276         *va++ = htonl(rmsgp->rm_xid);
277         *va++ = htonl(rmsgp->rm_vers);
278         *va++ = htonl(xprt->sc_max_requests);
279         *va++ = htonl(RDMA_ERROR);
280         *va++ = htonl(err);
281         if (err == ERR_VERS) {
282                 *va++ = htonl(RPCRDMA_VERSION);
283                 *va++ = htonl(RPCRDMA_VERSION);
284         }
285
286         return (int)((unsigned long)va - (unsigned long)startp);
287 }
288
289 int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
290 {
291         struct rpcrdma_write_array *wr_ary;
292
293         /* There is no read-list in a reply */
294
295         /* skip write list */
296         wr_ary = (struct rpcrdma_write_array *)
297                 &rmsgp->rm_body.rm_chunks[1];
298         if (wr_ary->wc_discrim)
299                 wr_ary = (struct rpcrdma_write_array *)
300                         &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
301                         wc_target.rs_length;
302         else
303                 wr_ary = (struct rpcrdma_write_array *)
304                         &wr_ary->wc_nchunks;
305
306         /* skip reply array */
307         if (wr_ary->wc_discrim)
308                 wr_ary = (struct rpcrdma_write_array *)
309                         &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
310         else
311                 wr_ary = (struct rpcrdma_write_array *)
312                         &wr_ary->wc_nchunks;
313
314         return (unsigned long) wr_ary - (unsigned long) rmsgp;
315 }
316
317 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
318 {
319         struct rpcrdma_write_array *ary;
320
321         /* no read-list */
322         rmsgp->rm_body.rm_chunks[0] = xdr_zero;
323
324         /* write-array discrim */
325         ary = (struct rpcrdma_write_array *)
326                 &rmsgp->rm_body.rm_chunks[1];
327         ary->wc_discrim = xdr_one;
328         ary->wc_nchunks = htonl(chunks);
329
330         /* write-list terminator */
331         ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
332
333         /* reply-array discriminator */
334         ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
335 }
336
337 void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
338                                  int chunks)
339 {
340         ary->wc_discrim = xdr_one;
341         ary->wc_nchunks = htonl(chunks);
342 }
343
344 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
345                                      int chunk_no,
346                                      __be32 rs_handle,
347                                      __be64 rs_offset,
348                                      u32 write_len)
349 {
350         struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
351         seg->rs_handle = rs_handle;
352         seg->rs_offset = rs_offset;
353         seg->rs_length = htonl(write_len);
354 }
355
356 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
357                                   struct rpcrdma_msg *rdma_argp,
358                                   struct rpcrdma_msg *rdma_resp,
359                                   enum rpcrdma_proc rdma_type)
360 {
361         rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
362         rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
363         rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
364         rdma_resp->rm_type = htonl(rdma_type);
365
366         /* Encode <nul> chunks lists */
367         rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
368         rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
369         rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
370 }