These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / nfs / blocklayout / dev.c
1 /*
2  * Copyright (c) 2014 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9
10 #include "blocklayout.h"
11
12 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
13
14 static void
15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17         if (dev->nr_children) {
18                 int i;
19
20                 for (i = 0; i < dev->nr_children; i++)
21                         bl_free_device(&dev->children[i]);
22                 kfree(dev->children);
23         } else {
24                 if (dev->bdev)
25                         blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
26         }
27 }
28
29 void
30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32         struct pnfs_block_dev *dev =
33                 container_of(d, struct pnfs_block_dev, node);
34
35         bl_free_device(dev);
36         kfree_rcu(dev, node.rcu);
37 }
38
39 static int
40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42         __be32 *p;
43         int i;
44
45         p = xdr_inline_decode(xdr, 4);
46         if (!p)
47                 return -EIO;
48         b->type = be32_to_cpup(p++);
49
50         switch (b->type) {
51         case PNFS_BLOCK_VOLUME_SIMPLE:
52                 p = xdr_inline_decode(xdr, 4);
53                 if (!p)
54                         return -EIO;
55                 b->simple.nr_sigs = be32_to_cpup(p++);
56                 if (!b->simple.nr_sigs) {
57                         dprintk("no signature\n");
58                         return -EIO;
59                 }
60
61                 b->simple.len = 4 + 4;
62                 for (i = 0; i < b->simple.nr_sigs; i++) {
63                         p = xdr_inline_decode(xdr, 8 + 4);
64                         if (!p)
65                                 return -EIO;
66                         p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67                         b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68                         if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
69                                 pr_info("signature too long: %d\n",
70                                         b->simple.sigs[i].sig_len);
71                                 return -EIO;
72                         }
73
74                         p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
75                         if (!p)
76                                 return -EIO;
77                         memcpy(&b->simple.sigs[i].sig, p,
78                                 b->simple.sigs[i].sig_len);
79
80                         b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
81                 }
82                 break;
83         case PNFS_BLOCK_VOLUME_SLICE:
84                 p = xdr_inline_decode(xdr, 8 + 8 + 4);
85                 if (!p)
86                         return -EIO;
87                 p = xdr_decode_hyper(p, &b->slice.start);
88                 p = xdr_decode_hyper(p, &b->slice.len);
89                 b->slice.volume = be32_to_cpup(p++);
90                 break;
91         case PNFS_BLOCK_VOLUME_CONCAT:
92                 p = xdr_inline_decode(xdr, 4);
93                 if (!p)
94                         return -EIO;
95                 b->concat.volumes_count = be32_to_cpup(p++);
96
97                 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
98                 if (!p)
99                         return -EIO;
100                 for (i = 0; i < b->concat.volumes_count; i++)
101                         b->concat.volumes[i] = be32_to_cpup(p++);
102                 break;
103         case PNFS_BLOCK_VOLUME_STRIPE:
104                 p = xdr_inline_decode(xdr, 8 + 4);
105                 if (!p)
106                         return -EIO;
107                 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
108                 b->stripe.volumes_count = be32_to_cpup(p++);
109
110                 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
111                 if (!p)
112                         return -EIO;
113                 for (i = 0; i < b->stripe.volumes_count; i++)
114                         b->stripe.volumes[i] = be32_to_cpup(p++);
115                 break;
116         default:
117                 dprintk("unknown volume type!\n");
118                 return -EIO;
119         }
120
121         return 0;
122 }
123
124 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
125                 struct pnfs_block_dev_map *map)
126 {
127         map->start = dev->start;
128         map->len = dev->len;
129         map->disk_offset = dev->disk_offset;
130         map->bdev = dev->bdev;
131         return true;
132 }
133
134 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
135                 struct pnfs_block_dev_map *map)
136 {
137         int i;
138
139         for (i = 0; i < dev->nr_children; i++) {
140                 struct pnfs_block_dev *child = &dev->children[i];
141
142                 if (child->start > offset ||
143                     child->start + child->len <= offset)
144                         continue;
145
146                 child->map(child, offset - child->start, map);
147                 return true;
148         }
149
150         dprintk("%s: ran off loop!\n", __func__);
151         return false;
152 }
153
154 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
155                 struct pnfs_block_dev_map *map)
156 {
157         struct pnfs_block_dev *child;
158         u64 chunk;
159         u32 chunk_idx;
160         u64 disk_offset;
161
162         chunk = div_u64(offset, dev->chunk_size);
163         div_u64_rem(chunk, dev->nr_children, &chunk_idx);
164
165         if (chunk_idx > dev->nr_children) {
166                 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
167                         __func__, chunk_idx, offset, dev->chunk_size);
168                 /* error, should not happen */
169                 return false;
170         }
171
172         /* truncate offset to the beginning of the stripe */
173         offset = chunk * dev->chunk_size;
174
175         /* disk offset of the stripe */
176         disk_offset = div_u64(offset, dev->nr_children);
177
178         child = &dev->children[chunk_idx];
179         child->map(child, disk_offset, map);
180
181         map->start += offset;
182         map->disk_offset += disk_offset;
183         map->len = dev->chunk_size;
184         return true;
185 }
186
187 static int
188 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
189                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
190
191
192 static int
193 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
194                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
195 {
196         struct pnfs_block_volume *v = &volumes[idx];
197         dev_t dev;
198
199         dev = bl_resolve_deviceid(server, v, gfp_mask);
200         if (!dev)
201                 return -EIO;
202
203         d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
204         if (IS_ERR(d->bdev)) {
205                 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
206                         MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
207                 return PTR_ERR(d->bdev);
208         }
209
210
211         d->len = i_size_read(d->bdev->bd_inode);
212         d->map = bl_map_simple;
213
214         printk(KERN_INFO "pNFS: using block device %s\n",
215                 d->bdev->bd_disk->disk_name);
216         return 0;
217 }
218
219 static int
220 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
221                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
222 {
223         struct pnfs_block_volume *v = &volumes[idx];
224         int ret;
225
226         ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
227         if (ret)
228                 return ret;
229
230         d->disk_offset = v->slice.start;
231         d->len = v->slice.len;
232         return 0;
233 }
234
235 static int
236 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
237                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
238 {
239         struct pnfs_block_volume *v = &volumes[idx];
240         u64 len = 0;
241         int ret, i;
242
243         d->children = kcalloc(v->concat.volumes_count,
244                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
245         if (!d->children)
246                 return -ENOMEM;
247
248         for (i = 0; i < v->concat.volumes_count; i++) {
249                 ret = bl_parse_deviceid(server, &d->children[i],
250                                 volumes, v->concat.volumes[i], gfp_mask);
251                 if (ret)
252                         return ret;
253
254                 d->nr_children++;
255                 d->children[i].start += len;
256                 len += d->children[i].len;
257         }
258
259         d->len = len;
260         d->map = bl_map_concat;
261         return 0;
262 }
263
264 static int
265 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
266                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
267 {
268         struct pnfs_block_volume *v = &volumes[idx];
269         u64 len = 0;
270         int ret, i;
271
272         d->children = kcalloc(v->stripe.volumes_count,
273                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
274         if (!d->children)
275                 return -ENOMEM;
276
277         for (i = 0; i < v->stripe.volumes_count; i++) {
278                 ret = bl_parse_deviceid(server, &d->children[i],
279                                 volumes, v->stripe.volumes[i], gfp_mask);
280                 if (ret)
281                         return ret;
282
283                 d->nr_children++;
284                 len += d->children[i].len;
285         }
286
287         d->len = len;
288         d->chunk_size = v->stripe.chunk_size;
289         d->map = bl_map_stripe;
290         return 0;
291 }
292
293 static int
294 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
295                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
296 {
297         switch (volumes[idx].type) {
298         case PNFS_BLOCK_VOLUME_SIMPLE:
299                 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
300         case PNFS_BLOCK_VOLUME_SLICE:
301                 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
302         case PNFS_BLOCK_VOLUME_CONCAT:
303                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
304         case PNFS_BLOCK_VOLUME_STRIPE:
305                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
306         default:
307                 dprintk("unsupported volume type: %d\n", volumes[idx].type);
308                 return -EIO;
309         }
310 }
311
312 struct nfs4_deviceid_node *
313 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
314                 gfp_t gfp_mask)
315 {
316         struct nfs4_deviceid_node *node = NULL;
317         struct pnfs_block_volume *volumes;
318         struct pnfs_block_dev *top;
319         struct xdr_stream xdr;
320         struct xdr_buf buf;
321         struct page *scratch;
322         int nr_volumes, ret, i;
323         __be32 *p;
324
325         scratch = alloc_page(gfp_mask);
326         if (!scratch)
327                 goto out;
328
329         xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
330         xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
331
332         p = xdr_inline_decode(&xdr, sizeof(__be32));
333         if (!p)
334                 goto out_free_scratch;
335         nr_volumes = be32_to_cpup(p++);
336
337         volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
338                           gfp_mask);
339         if (!volumes)
340                 goto out_free_scratch;
341
342         for (i = 0; i < nr_volumes; i++) {
343                 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
344                 if (ret < 0)
345                         goto out_free_volumes;
346         }
347
348         top = kzalloc(sizeof(*top), gfp_mask);
349         if (!top)
350                 goto out_free_volumes;
351
352         ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
353         if (ret) {
354                 bl_free_device(top);
355                 kfree(top);
356                 goto out_free_volumes;
357         }
358
359         node = &top->node;
360         nfs4_init_deviceid_node(node, server, &pdev->dev_id);
361
362 out_free_volumes:
363         kfree(volumes);
364 out_free_scratch:
365         __free_page(scratch);
366 out:
367         return node;
368 }