These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / staging / lustre / lustre / lov / lov_pack.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lov/lov_pack.c
37  *
38  * (Un)packing of OST/MDS requests
39  *
40  * Author: Andreas Dilger <adilger@clusterfs.com>
41  */
42
43 #define DEBUG_SUBSYSTEM S_LOV
44
45 #include "../include/lustre_net.h"
46 #include "../include/obd.h"
47 #include "../include/obd_class.h"
48 #include "../include/obd_support.h"
49 #include "../include/lustre/lustre_user.h"
50
51 #include "lov_internal.h"
52
53 void lov_dump_lmm_common(int level, void *lmmp)
54 {
55         struct lov_mds_md *lmm = lmmp;
56         struct ost_id   oi;
57
58         lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi);
59         CDEBUG(level, "objid "DOSTID", magic 0x%08x, pattern %#x\n",
60                POSTID(&oi), le32_to_cpu(lmm->lmm_magic),
61                le32_to_cpu(lmm->lmm_pattern));
62         CDEBUG(level, "stripe_size %u, stripe_count %u, layout_gen %u\n",
63                le32_to_cpu(lmm->lmm_stripe_size),
64                le16_to_cpu(lmm->lmm_stripe_count),
65                le16_to_cpu(lmm->lmm_layout_gen));
66 }
67
68 static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod,
69                                  int stripe_count)
70 {
71         int i;
72
73         if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
74                 CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
75                        stripe_count, LOV_V1_INSANE_STRIPE_COUNT);
76                 return;
77         }
78
79         for (i = 0; i < stripe_count; ++i, ++lod) {
80                 struct ost_id   oi;
81
82                 ostid_le_to_cpu(&lod->l_ost_oi, &oi);
83                 CDEBUG(level, "stripe %u idx %u subobj "DOSTID"\n", i,
84                        le32_to_cpu(lod->l_ost_idx), POSTID(&oi));
85         }
86 }
87
88 void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm)
89 {
90         lov_dump_lmm_common(level, lmm);
91         lov_dump_lmm_objects(level, lmm->lmm_objects,
92                              le16_to_cpu(lmm->lmm_stripe_count));
93 }
94
95 void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
96 {
97         lov_dump_lmm_common(level, lmm);
98         CDEBUG(level, "pool_name "LOV_POOLNAMEF"\n", lmm->lmm_pool_name);
99         lov_dump_lmm_objects(level, lmm->lmm_objects,
100                              le16_to_cpu(lmm->lmm_stripe_count));
101 }
102
103 /* Pack LOV object metadata for disk storage.  It is packed in LE byte
104  * order and is opaque to the networking layer.
105  *
106  * XXX In the future, this will be enhanced to get the EA size from the
107  *     underlying OSC device(s) to get their EA sizes so we can stack
108  *     LOVs properly.  For now lov_mds_md_size() just assumes one u64
109  *     per stripe.
110  */
111 int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
112                struct lov_stripe_md *lsm)
113 {
114         struct obd_device *obd = class_exp2obd(exp);
115         struct lov_obd *lov = &obd->u.lov;
116         struct lov_mds_md_v1 *lmmv1;
117         struct lov_mds_md_v3 *lmmv3;
118         __u16 stripe_count;
119         struct lov_ost_data_v1 *lmm_objects;
120         int lmm_size, lmm_magic;
121         int i;
122         int cplen = 0;
123
124         if (lsm) {
125                 lmm_magic = lsm->lsm_magic;
126         } else {
127                 if (lmmp && *lmmp)
128                         lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
129                 else
130                         /* lsm == NULL and lmmp == NULL */
131                         lmm_magic = LOV_MAGIC;
132         }
133
134         if ((lmm_magic != LOV_MAGIC_V1) &&
135             (lmm_magic != LOV_MAGIC_V3)) {
136                 CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
137                         lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
138                 return -EINVAL;
139
140         }
141
142         if (lsm) {
143                 /* If we are just sizing the EA, limit the stripe count
144                  * to the actual number of OSTs in this filesystem. */
145                 if (!lmmp) {
146                         stripe_count = lov_get_stripecnt(lov, lmm_magic,
147                                                         lsm->lsm_stripe_count);
148                         lsm->lsm_stripe_count = stripe_count;
149                 } else if (!lsm_is_released(lsm)) {
150                         stripe_count = lsm->lsm_stripe_count;
151                 } else {
152                         stripe_count = 0;
153                 }
154         } else {
155                 /* No need to allocate more than maximum supported stripes.
156                  * Anyway, this is pretty inaccurate since ld_tgt_count now
157                  * represents max index and we should rely on the actual number
158                  * of OSTs instead */
159                 stripe_count = lov_mds_md_max_stripe_count(
160                         lov->lov_ocd.ocd_max_easize, lmm_magic);
161
162                 if (stripe_count > lov->desc.ld_tgt_count)
163                         stripe_count = lov->desc.ld_tgt_count;
164         }
165
166         /* XXX LOV STACKING call into osc for sizes */
167         lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
168
169         if (!lmmp)
170                 return lmm_size;
171
172         if (*lmmp && !lsm) {
173                 stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
174                 lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
175                 kvfree(*lmmp);
176                 *lmmp = NULL;
177                 return 0;
178         }
179
180         if (!*lmmp) {
181                 *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
182                 if (!*lmmp)
183                         return -ENOMEM;
184         }
185
186         CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
187                lmm_magic, lmm_size);
188
189         lmmv1 = *lmmp;
190         lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
191         if (lmm_magic == LOV_MAGIC_V3)
192                 lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
193         else
194                 lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
195
196         if (!lsm)
197                 return lmm_size;
198
199         /* lmmv1 and lmmv3 point to the same struct and have the
200          * same first fields
201          */
202         lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
203         lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
204         lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
205         lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
206         lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
207         if (lsm->lsm_magic == LOV_MAGIC_V3) {
208                 cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
209                                 sizeof(lmmv3->lmm_pool_name));
210                 if (cplen >= sizeof(lmmv3->lmm_pool_name))
211                         return -E2BIG;
212                 lmm_objects = lmmv3->lmm_objects;
213         } else {
214                 lmm_objects = lmmv1->lmm_objects;
215         }
216
217         for (i = 0; i < stripe_count; i++) {
218                 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
219                 /* XXX LOV STACKING call down to osc_packmd() to do packing */
220                 LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID
221                          " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi),
222                          i, stripe_count, loi->loi_ost_idx);
223                 ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
224                 lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
225                 lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
226         }
227
228         return lmm_size;
229 }
230
231 /* Find the max stripecount we should use */
232 __u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
233 {
234         __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
235
236         if (!stripe_count)
237                 stripe_count = lov->desc.ld_default_stripe_count;
238         if (stripe_count > lov->desc.ld_active_tgt_count)
239                 stripe_count = lov->desc.ld_active_tgt_count;
240         if (!stripe_count)
241                 stripe_count = 1;
242
243         /* stripe count is based on whether ldiskfs can handle
244          * larger EA sizes */
245         if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
246             lov->lov_ocd.ocd_max_easize)
247                 max_stripes = lov_mds_md_max_stripe_count(
248                         lov->lov_ocd.ocd_max_easize, magic);
249
250         if (stripe_count > max_stripes)
251                 stripe_count = max_stripes;
252
253         return stripe_count;
254 }
255
256 static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
257 {
258         int rc;
259
260         if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
261                 char *buffer;
262                 int sz;
263
264                 CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
265                        le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
266                 sz = lmm_bytes * 2 + 1;
267                 buffer = libcfs_kvzalloc(sz, GFP_NOFS);
268                 if (buffer != NULL) {
269                         int i;
270
271                         for (i = 0; i < lmm_bytes; i++)
272                                 sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
273                         buffer[sz - 1] = '\0';
274                         CERROR("%s\n", buffer);
275                         kvfree(buffer);
276                 }
277                 return -EINVAL;
278         }
279         rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
280                                      lmm_bytes, stripe_count);
281         return rc;
282 }
283
284 int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
285                     int pattern, int magic)
286 {
287         int i, lsm_size;
288
289         CDEBUG(D_INFO, "alloc lsm, stripe_count %d\n", stripe_count);
290
291         *lsmp = lsm_alloc_plain(stripe_count, &lsm_size);
292         if (!*lsmp) {
293                 CERROR("can't allocate lsmp stripe_count %d\n", stripe_count);
294                 return -ENOMEM;
295         }
296
297         atomic_set(&(*lsmp)->lsm_refc, 1);
298         spin_lock_init(&(*lsmp)->lsm_lock);
299         (*lsmp)->lsm_magic = magic;
300         (*lsmp)->lsm_stripe_count = stripe_count;
301         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
302         (*lsmp)->lsm_pattern = pattern;
303         (*lsmp)->lsm_pool_name[0] = '\0';
304         (*lsmp)->lsm_layout_gen = 0;
305         if (stripe_count > 0)
306                 (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
307
308         for (i = 0; i < stripe_count; i++)
309                 loi_init((*lsmp)->lsm_oinfo[i]);
310
311         return lsm_size;
312 }
313
314 int lov_free_memmd(struct lov_stripe_md **lsmp)
315 {
316         struct lov_stripe_md *lsm = *lsmp;
317         int refc;
318
319         *lsmp = NULL;
320         LASSERT(atomic_read(&lsm->lsm_refc) > 0);
321         refc = atomic_dec_return(&lsm->lsm_refc);
322         if (refc == 0) {
323                 LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
324                 lsm_op_find(lsm->lsm_magic)->lsm_free(lsm);
325         }
326         return refc;
327 }
328
329 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
330  * order and is opaque to the networking layer.
331  */
332 int lov_unpackmd(struct obd_export *exp,  struct lov_stripe_md **lsmp,
333                  struct lov_mds_md *lmm, int lmm_bytes)
334 {
335         struct obd_device *obd = class_exp2obd(exp);
336         struct lov_obd *lov = &obd->u.lov;
337         int rc = 0, lsm_size;
338         __u16 stripe_count;
339         __u32 magic;
340         __u32 pattern;
341
342         /* If passed an MDS struct use values from there, otherwise defaults */
343         if (lmm) {
344                 rc = lov_verify_lmm(lmm, lmm_bytes, &stripe_count);
345                 if (rc)
346                         return rc;
347                 magic = le32_to_cpu(lmm->lmm_magic);
348                 pattern = le32_to_cpu(lmm->lmm_pattern);
349         } else {
350                 magic = LOV_MAGIC;
351                 stripe_count = lov_get_stripecnt(lov, magic, 0);
352                 pattern = LOV_PATTERN_RAID0;
353         }
354
355         /* If we aren't passed an lsmp struct, we just want the size */
356         if (!lsmp) {
357                 /* XXX LOV STACKING call into osc for sizes */
358                 LBUG();
359                 return lov_stripe_md_size(stripe_count);
360         }
361         /* If we are passed an allocated struct but nothing to unpack, free */
362         if (*lsmp && !lmm) {
363                 lov_free_memmd(lsmp);
364                 return 0;
365         }
366
367         lsm_size = lov_alloc_memmd(lsmp, stripe_count, pattern, magic);
368         if (lsm_size < 0)
369                 return lsm_size;
370
371         /* If we are passed a pointer but nothing to unpack, we only alloc */
372         if (!lmm)
373                 return lsm_size;
374
375         LASSERT(lsm_op_find(magic) != NULL);
376         rc = lsm_op_find(magic)->lsm_unpackmd(lov, *lsmp, lmm);
377         if (rc) {
378                 lov_free_memmd(lsmp);
379                 return rc;
380         }
381
382         return lsm_size;
383 }
384
385 /* Retrieve object striping information.
386  *
387  * @lump is a pointer to an in-core struct with lmm_ost_count indicating
388  * the maximum number of OST indices which will fit in the user buffer.
389  * lmm_magic must be LOV_USER_MAGIC.
390  */
391 int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
392                   struct lov_user_md *lump)
393 {
394         /*
395          * XXX huge struct allocated on stack.
396          */
397         /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
398         struct lov_user_md_v3 lum;
399         struct lov_mds_md *lmmk = NULL;
400         int rc, lmm_size;
401         int lum_size;
402         mm_segment_t seg;
403
404         if (!lsm)
405                 return -ENODATA;
406
407         /*
408          * "Switch to kernel segment" to allow copying from kernel space by
409          * copy_{to,from}_user().
410          */
411         seg = get_fs();
412         set_fs(KERNEL_DS);
413
414         /* we only need the header part from user space to get lmm_magic and
415          * lmm_stripe_count, (the header part is common to v1 and v3) */
416         lum_size = sizeof(struct lov_user_md_v1);
417         if (copy_from_user(&lum, lump, lum_size)) {
418                 rc = -EFAULT;
419                 goto out_set;
420         } else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
421                  (lum.lmm_magic != LOV_USER_MAGIC_V3)) {
422                 rc = -EINVAL;
423                 goto out_set;
424         }
425
426         if (lum.lmm_stripe_count &&
427             (lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
428                 /* Return right size of stripe to user */
429                 lum.lmm_stripe_count = lsm->lsm_stripe_count;
430                 rc = copy_to_user(lump, &lum, lum_size);
431                 rc = -EOVERFLOW;
432                 goto out_set;
433         }
434         rc = lov_packmd(exp, &lmmk, lsm);
435         if (rc < 0)
436                 goto out_set;
437         lmm_size = rc;
438         rc = 0;
439
440         /* FIXME: Bug 1185 - copy fields properly when structs change */
441         /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
442         CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
443         CLASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0]));
444
445         if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
446             ((lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) ||
447             (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)))) {
448                 lustre_swab_lov_mds_md(lmmk);
449                 lustre_swab_lov_user_md_objects(
450                                 (struct lov_user_ost_data *)lmmk->lmm_objects,
451                                 lmmk->lmm_stripe_count);
452         }
453         if (lum.lmm_magic == LOV_USER_MAGIC) {
454                 /* User request for v1, we need skip lmm_pool_name */
455                 if (lmmk->lmm_magic == LOV_MAGIC_V3) {
456                         memmove((char *)(&lmmk->lmm_stripe_count) +
457                                 sizeof(lmmk->lmm_stripe_count),
458                                 ((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
459                                 lmmk->lmm_stripe_count *
460                                 sizeof(struct lov_ost_data_v1));
461                         lmm_size -= LOV_MAXPOOLNAME;
462                 }
463         } else {
464                 /* if v3 we just have to update the lum_size */
465                 lum_size = sizeof(struct lov_user_md_v3);
466         }
467
468         /* User wasn't expecting this many OST entries */
469         if (lum.lmm_stripe_count == 0)
470                 lmm_size = lum_size;
471         else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
472                 rc = -EOVERFLOW;
473                 goto out_set;
474         }
475         /*
476          * Have a difference between lov_mds_md & lov_user_md.
477          * So we have to re-order the data before copy to user.
478          */
479         lum.lmm_stripe_count = lmmk->lmm_stripe_count;
480         lum.lmm_layout_gen = lmmk->lmm_layout_gen;
481         ((struct lov_user_md *)lmmk)->lmm_layout_gen = lum.lmm_layout_gen;
482         ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count;
483         if (copy_to_user(lump, lmmk, lmm_size))
484                 rc = -EFAULT;
485
486         obd_free_diskmd(exp, &lmmk);
487 out_set:
488         set_fs(seg);
489         return rc;
490 }