Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / fs / nfs / blocklayout / dev.c
1 /*
2  * Copyright (c) 2014 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9
10 #include "blocklayout.h"
11
12 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
13
14 static void
15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17         if (dev->nr_children) {
18                 int i;
19
20                 for (i = 0; i < dev->nr_children; i++)
21                         bl_free_device(&dev->children[i]);
22                 kfree(dev->children);
23         } else {
24                 if (dev->bdev)
25                         blkdev_put(dev->bdev, FMODE_READ);
26         }
27 }
28
29 void
30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32         struct pnfs_block_dev *dev =
33                 container_of(d, struct pnfs_block_dev, node);
34
35         bl_free_device(dev);
36         kfree_rcu(dev, node.rcu);
37 }
38
39 static int
40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42         __be32 *p;
43         int i;
44
45         p = xdr_inline_decode(xdr, 4);
46         if (!p)
47                 return -EIO;
48         b->type = be32_to_cpup(p++);
49
50         switch (b->type) {
51         case PNFS_BLOCK_VOLUME_SIMPLE:
52                 p = xdr_inline_decode(xdr, 4);
53                 if (!p)
54                         return -EIO;
55                 b->simple.nr_sigs = be32_to_cpup(p++);
56                 if (!b->simple.nr_sigs) {
57                         dprintk("no signature\n");
58                         return -EIO;
59                 }
60
61                 b->simple.len = 4 + 4;
62                 for (i = 0; i < b->simple.nr_sigs; i++) {
63                         p = xdr_inline_decode(xdr, 8 + 4);
64                         if (!p)
65                                 return -EIO;
66                         p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67                         b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68
69                         p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
70                         if (!p)
71                                 return -EIO;
72                         memcpy(&b->simple.sigs[i].sig, p,
73                                 b->simple.sigs[i].sig_len);
74
75                         b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
76                 }
77                 break;
78         case PNFS_BLOCK_VOLUME_SLICE:
79                 p = xdr_inline_decode(xdr, 8 + 8 + 4);
80                 if (!p)
81                         return -EIO;
82                 p = xdr_decode_hyper(p, &b->slice.start);
83                 p = xdr_decode_hyper(p, &b->slice.len);
84                 b->slice.volume = be32_to_cpup(p++);
85                 break;
86         case PNFS_BLOCK_VOLUME_CONCAT:
87                 p = xdr_inline_decode(xdr, 4);
88                 if (!p)
89                         return -EIO;
90                 b->concat.volumes_count = be32_to_cpup(p++);
91
92                 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
93                 if (!p)
94                         return -EIO;
95                 for (i = 0; i < b->concat.volumes_count; i++)
96                         b->concat.volumes[i] = be32_to_cpup(p++);
97                 break;
98         case PNFS_BLOCK_VOLUME_STRIPE:
99                 p = xdr_inline_decode(xdr, 8 + 4);
100                 if (!p)
101                         return -EIO;
102                 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
103                 b->stripe.volumes_count = be32_to_cpup(p++);
104
105                 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
106                 if (!p)
107                         return -EIO;
108                 for (i = 0; i < b->stripe.volumes_count; i++)
109                         b->stripe.volumes[i] = be32_to_cpup(p++);
110                 break;
111         default:
112                 dprintk("unknown volume type!\n");
113                 return -EIO;
114         }
115
116         return 0;
117 }
118
119 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
120                 struct pnfs_block_dev_map *map)
121 {
122         map->start = dev->start;
123         map->len = dev->len;
124         map->disk_offset = dev->disk_offset;
125         map->bdev = dev->bdev;
126         return true;
127 }
128
129 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
130                 struct pnfs_block_dev_map *map)
131 {
132         int i;
133
134         for (i = 0; i < dev->nr_children; i++) {
135                 struct pnfs_block_dev *child = &dev->children[i];
136
137                 if (child->start > offset ||
138                     child->start + child->len <= offset)
139                         continue;
140
141                 child->map(child, offset - child->start, map);
142                 return true;
143         }
144
145         dprintk("%s: ran off loop!\n", __func__);
146         return false;
147 }
148
149 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
150                 struct pnfs_block_dev_map *map)
151 {
152         struct pnfs_block_dev *child;
153         u64 chunk;
154         u32 chunk_idx;
155         u64 disk_offset;
156
157         chunk = div_u64(offset, dev->chunk_size);
158         div_u64_rem(chunk, dev->nr_children, &chunk_idx);
159
160         if (chunk_idx > dev->nr_children) {
161                 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
162                         __func__, chunk_idx, offset, dev->chunk_size);
163                 /* error, should not happen */
164                 return false;
165         }
166
167         /* truncate offset to the beginning of the stripe */
168         offset = chunk * dev->chunk_size;
169
170         /* disk offset of the stripe */
171         disk_offset = div_u64(offset, dev->nr_children);
172
173         child = &dev->children[chunk_idx];
174         child->map(child, disk_offset, map);
175
176         map->start += offset;
177         map->disk_offset += disk_offset;
178         map->len = dev->chunk_size;
179         return true;
180 }
181
182 static int
183 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
184                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
185
186
187 static int
188 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
189                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
190 {
191         struct pnfs_block_volume *v = &volumes[idx];
192         dev_t dev;
193
194         dev = bl_resolve_deviceid(server, v, gfp_mask);
195         if (!dev)
196                 return -EIO;
197
198         d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
199         if (IS_ERR(d->bdev)) {
200                 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
201                         MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
202                 return PTR_ERR(d->bdev);
203         }
204
205
206         d->len = i_size_read(d->bdev->bd_inode);
207         d->map = bl_map_simple;
208
209         printk(KERN_INFO "pNFS: using block device %s\n",
210                 d->bdev->bd_disk->disk_name);
211         return 0;
212 }
213
214 static int
215 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
216                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
217 {
218         struct pnfs_block_volume *v = &volumes[idx];
219         int ret;
220
221         ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
222         if (ret)
223                 return ret;
224
225         d->disk_offset = v->slice.start;
226         d->len = v->slice.len;
227         return 0;
228 }
229
230 static int
231 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
232                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
233 {
234         struct pnfs_block_volume *v = &volumes[idx];
235         u64 len = 0;
236         int ret, i;
237
238         d->children = kcalloc(v->concat.volumes_count,
239                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
240         if (!d->children)
241                 return -ENOMEM;
242
243         for (i = 0; i < v->concat.volumes_count; i++) {
244                 ret = bl_parse_deviceid(server, &d->children[i],
245                                 volumes, v->concat.volumes[i], gfp_mask);
246                 if (ret)
247                         return ret;
248
249                 d->nr_children++;
250                 d->children[i].start += len;
251                 len += d->children[i].len;
252         }
253
254         d->len = len;
255         d->map = bl_map_concat;
256         return 0;
257 }
258
259 static int
260 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
261                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
262 {
263         struct pnfs_block_volume *v = &volumes[idx];
264         u64 len = 0;
265         int ret, i;
266
267         d->children = kcalloc(v->stripe.volumes_count,
268                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
269         if (!d->children)
270                 return -ENOMEM;
271
272         for (i = 0; i < v->stripe.volumes_count; i++) {
273                 ret = bl_parse_deviceid(server, &d->children[i],
274                                 volumes, v->stripe.volumes[i], gfp_mask);
275                 if (ret)
276                         return ret;
277
278                 d->nr_children++;
279                 len += d->children[i].len;
280         }
281
282         d->len = len;
283         d->chunk_size = v->stripe.chunk_size;
284         d->map = bl_map_stripe;
285         return 0;
286 }
287
288 static int
289 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
290                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
291 {
292         switch (volumes[idx].type) {
293         case PNFS_BLOCK_VOLUME_SIMPLE:
294                 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
295         case PNFS_BLOCK_VOLUME_SLICE:
296                 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
297         case PNFS_BLOCK_VOLUME_CONCAT:
298                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
299         case PNFS_BLOCK_VOLUME_STRIPE:
300                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
301         default:
302                 dprintk("unsupported volume type: %d\n", volumes[idx].type);
303                 return -EIO;
304         }
305 }
306
307 struct nfs4_deviceid_node *
308 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
309                 gfp_t gfp_mask)
310 {
311         struct nfs4_deviceid_node *node = NULL;
312         struct pnfs_block_volume *volumes;
313         struct pnfs_block_dev *top;
314         struct xdr_stream xdr;
315         struct xdr_buf buf;
316         struct page *scratch;
317         int nr_volumes, ret, i;
318         __be32 *p;
319
320         scratch = alloc_page(gfp_mask);
321         if (!scratch)
322                 goto out;
323
324         xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
325         xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
326
327         p = xdr_inline_decode(&xdr, sizeof(__be32));
328         if (!p)
329                 goto out_free_scratch;
330         nr_volumes = be32_to_cpup(p++);
331
332         volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
333                           gfp_mask);
334         if (!volumes)
335                 goto out_free_scratch;
336
337         for (i = 0; i < nr_volumes; i++) {
338                 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
339                 if (ret < 0)
340                         goto out_free_volumes;
341         }
342
343         top = kzalloc(sizeof(*top), gfp_mask);
344         if (!top)
345                 goto out_free_volumes;
346
347         ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
348         if (ret) {
349                 bl_free_device(top);
350                 kfree(top);
351                 goto out_free_volumes;
352         }
353
354         node = &top->node;
355         nfs4_init_deviceid_node(node, server, &pdev->dev_id);
356
357 out_free_volumes:
358         kfree(volumes);
359 out_free_scratch:
360         __free_page(scratch);
361 out:
362         return node;
363 }