[Fuel-plugin] Install kernel in post-deployment.
[kvmfornfv.git] / kernel / drivers / nvme / host / lightnvm.c
1 /*
2  * nvme-lightnvm.c - LightNVM NVMe device
3  *
4  * Copyright (C) 2014-2015 IT University of Copenhagen
5  * Initial release: Matias Bjorling <mb@lightnvm.io>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License version
9  * 2 as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; see the file COPYING.  If not, write to
18  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19  * USA.
20  *
21  */
22
23 #include "nvme.h"
24
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
29
30 enum nvme_nvm_admin_opcode {
31         nvme_nvm_admin_identity         = 0xe2,
32         nvme_nvm_admin_get_l2p_tbl      = 0xea,
33         nvme_nvm_admin_get_bb_tbl       = 0xf2,
34         nvme_nvm_admin_set_bb_tbl       = 0xf1,
35 };
36
37 struct nvme_nvm_hb_rw {
38         __u8                    opcode;
39         __u8                    flags;
40         __u16                   command_id;
41         __le32                  nsid;
42         __u64                   rsvd2;
43         __le64                  metadata;
44         __le64                  prp1;
45         __le64                  prp2;
46         __le64                  spba;
47         __le16                  length;
48         __le16                  control;
49         __le32                  dsmgmt;
50         __le64                  slba;
51 };
52
53 struct nvme_nvm_ph_rw {
54         __u8                    opcode;
55         __u8                    flags;
56         __u16                   command_id;
57         __le32                  nsid;
58         __u64                   rsvd2;
59         __le64                  metadata;
60         __le64                  prp1;
61         __le64                  prp2;
62         __le64                  spba;
63         __le16                  length;
64         __le16                  control;
65         __le32                  dsmgmt;
66         __le64                  resv;
67 };
68
69 struct nvme_nvm_identity {
70         __u8                    opcode;
71         __u8                    flags;
72         __u16                   command_id;
73         __le32                  nsid;
74         __u64                   rsvd[2];
75         __le64                  prp1;
76         __le64                  prp2;
77         __le32                  chnl_off;
78         __u32                   rsvd11[5];
79 };
80
81 struct nvme_nvm_l2ptbl {
82         __u8                    opcode;
83         __u8                    flags;
84         __u16                   command_id;
85         __le32                  nsid;
86         __le32                  cdw2[4];
87         __le64                  prp1;
88         __le64                  prp2;
89         __le64                  slba;
90         __le32                  nlb;
91         __le16                  cdw14[6];
92 };
93
94 struct nvme_nvm_getbbtbl {
95         __u8                    opcode;
96         __u8                    flags;
97         __u16                   command_id;
98         __le32                  nsid;
99         __u64                   rsvd[2];
100         __le64                  prp1;
101         __le64                  prp2;
102         __le64                  spba;
103         __u32                   rsvd4[4];
104 };
105
106 struct nvme_nvm_setbbtbl {
107         __u8                    opcode;
108         __u8                    flags;
109         __u16                   command_id;
110         __le32                  nsid;
111         __le64                  rsvd[2];
112         __le64                  prp1;
113         __le64                  prp2;
114         __le64                  spba;
115         __le16                  nlb;
116         __u8                    value;
117         __u8                    rsvd3;
118         __u32                   rsvd4[3];
119 };
120
121 struct nvme_nvm_erase_blk {
122         __u8                    opcode;
123         __u8                    flags;
124         __u16                   command_id;
125         __le32                  nsid;
126         __u64                   rsvd[2];
127         __le64                  prp1;
128         __le64                  prp2;
129         __le64                  spba;
130         __le16                  length;
131         __le16                  control;
132         __le32                  dsmgmt;
133         __le64                  resv;
134 };
135
136 struct nvme_nvm_command {
137         union {
138                 struct nvme_common_command common;
139                 struct nvme_nvm_identity identity;
140                 struct nvme_nvm_hb_rw hb_rw;
141                 struct nvme_nvm_ph_rw ph_rw;
142                 struct nvme_nvm_l2ptbl l2p;
143                 struct nvme_nvm_getbbtbl get_bb;
144                 struct nvme_nvm_setbbtbl set_bb;
145                 struct nvme_nvm_erase_blk erase;
146         };
147 };
148
149 struct nvme_nvm_id_group {
150         __u8                    mtype;
151         __u8                    fmtype;
152         __le16                  res16;
153         __u8                    num_ch;
154         __u8                    num_lun;
155         __u8                    num_pln;
156         __u8                    rsvd1;
157         __le16                  num_blk;
158         __le16                  num_pg;
159         __le16                  fpg_sz;
160         __le16                  csecs;
161         __le16                  sos;
162         __le16                  rsvd2;
163         __le32                  trdt;
164         __le32                  trdm;
165         __le32                  tprt;
166         __le32                  tprm;
167         __le32                  tbet;
168         __le32                  tbem;
169         __le32                  mpos;
170         __le32                  mccap;
171         __le16                  cpar;
172         __u8                    reserved[906];
173 } __packed;
174
175 struct nvme_nvm_addr_format {
176         __u8                    ch_offset;
177         __u8                    ch_len;
178         __u8                    lun_offset;
179         __u8                    lun_len;
180         __u8                    pln_offset;
181         __u8                    pln_len;
182         __u8                    blk_offset;
183         __u8                    blk_len;
184         __u8                    pg_offset;
185         __u8                    pg_len;
186         __u8                    sect_offset;
187         __u8                    sect_len;
188         __u8                    res[4];
189 } __packed;
190
191 struct nvme_nvm_id {
192         __u8                    ver_id;
193         __u8                    vmnt;
194         __u8                    cgrps;
195         __u8                    res;
196         __le32                  cap;
197         __le32                  dom;
198         struct nvme_nvm_addr_format ppaf;
199         __u8                    resv[228];
200         struct nvme_nvm_id_group groups[4];
201 } __packed;
202
203 struct nvme_nvm_bb_tbl {
204         __u8    tblid[4];
205         __le16  verid;
206         __le16  revid;
207         __le32  rvsd1;
208         __le32  tblks;
209         __le32  tfact;
210         __le32  tgrown;
211         __le32  tdresv;
212         __le32  thresv;
213         __le32  rsvd2[8];
214         __u8    blk[0];
215 };
216
217 /*
218  * Check we didn't inadvertently grow the command struct
219  */
220 static inline void _nvme_nvm_check_size(void)
221 {
222         BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
223         BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
224         BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
225         BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
226         BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
227         BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
228         BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
229         BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
230         BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
231         BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
232         BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
233 }
234
235 static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
236 {
237         struct nvme_nvm_id_group *src;
238         struct nvm_id_group *dst;
239         int i, end;
240
241         end = min_t(u32, 4, nvm_id->cgrps);
242
243         for (i = 0; i < end; i++) {
244                 src = &nvme_nvm_id->groups[i];
245                 dst = &nvm_id->groups[i];
246
247                 dst->mtype = src->mtype;
248                 dst->fmtype = src->fmtype;
249                 dst->num_ch = src->num_ch;
250                 dst->num_lun = src->num_lun;
251                 dst->num_pln = src->num_pln;
252
253                 dst->num_pg = le16_to_cpu(src->num_pg);
254                 dst->num_blk = le16_to_cpu(src->num_blk);
255                 dst->fpg_sz = le16_to_cpu(src->fpg_sz);
256                 dst->csecs = le16_to_cpu(src->csecs);
257                 dst->sos = le16_to_cpu(src->sos);
258
259                 dst->trdt = le32_to_cpu(src->trdt);
260                 dst->trdm = le32_to_cpu(src->trdm);
261                 dst->tprt = le32_to_cpu(src->tprt);
262                 dst->tprm = le32_to_cpu(src->tprm);
263                 dst->tbet = le32_to_cpu(src->tbet);
264                 dst->tbem = le32_to_cpu(src->tbem);
265                 dst->mpos = le32_to_cpu(src->mpos);
266                 dst->mccap = le32_to_cpu(src->mccap);
267
268                 dst->cpar = le16_to_cpu(src->cpar);
269         }
270
271         return 0;
272 }
273
274 static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
275 {
276         struct nvme_ns *ns = nvmdev->q->queuedata;
277         struct nvme_dev *dev = ns->dev;
278         struct nvme_nvm_id *nvme_nvm_id;
279         struct nvme_nvm_command c = {};
280         int ret;
281
282         c.identity.opcode = nvme_nvm_admin_identity;
283         c.identity.nsid = cpu_to_le32(ns->ns_id);
284         c.identity.chnl_off = 0;
285
286         nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
287         if (!nvme_nvm_id)
288                 return -ENOMEM;
289
290         ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
291                                 nvme_nvm_id, sizeof(struct nvme_nvm_id));
292         if (ret) {
293                 ret = -EIO;
294                 goto out;
295         }
296
297         nvm_id->ver_id = nvme_nvm_id->ver_id;
298         nvm_id->vmnt = nvme_nvm_id->vmnt;
299         nvm_id->cgrps = nvme_nvm_id->cgrps;
300         nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
301         nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
302         memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
303                                         sizeof(struct nvme_nvm_addr_format));
304
305         ret = init_grps(nvm_id, nvme_nvm_id);
306 out:
307         kfree(nvme_nvm_id);
308         return ret;
309 }
310
311 static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
312                                 nvm_l2p_update_fn *update_l2p, void *priv)
313 {
314         struct nvme_ns *ns = nvmdev->q->queuedata;
315         struct nvme_dev *dev = ns->dev;
316         struct nvme_nvm_command c = {};
317         u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
318         u32 nlb_pr_rq = len / sizeof(u64);
319         u64 cmd_slba = slba;
320         void *entries;
321         int ret = 0;
322
323         c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
324         c.l2p.nsid = cpu_to_le32(ns->ns_id);
325         entries = kmalloc(len, GFP_KERNEL);
326         if (!entries)
327                 return -ENOMEM;
328
329         while (nlb) {
330                 u32 cmd_nlb = min(nlb_pr_rq, nlb);
331
332                 c.l2p.slba = cpu_to_le64(cmd_slba);
333                 c.l2p.nlb = cpu_to_le32(cmd_nlb);
334
335                 ret = nvme_submit_sync_cmd(dev->admin_q,
336                                 (struct nvme_command *)&c, entries, len);
337                 if (ret) {
338                         dev_err(dev->dev, "L2P table transfer failed (%d)\n",
339                                                                         ret);
340                         ret = -EIO;
341                         goto out;
342                 }
343
344                 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
345                         ret = -EINTR;
346                         goto out;
347                 }
348
349                 cmd_slba += cmd_nlb;
350                 nlb -= cmd_nlb;
351         }
352
353 out:
354         kfree(entries);
355         return ret;
356 }
357
358 static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
359                                 int nr_blocks, nvm_bb_update_fn *update_bbtbl,
360                                 void *priv)
361 {
362         struct request_queue *q = nvmdev->q;
363         struct nvme_ns *ns = q->queuedata;
364         struct nvme_dev *dev = ns->dev;
365         struct nvme_nvm_command c = {};
366         struct nvme_nvm_bb_tbl *bb_tbl;
367         int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
368         int ret = 0;
369
370         c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
371         c.get_bb.nsid = cpu_to_le32(ns->ns_id);
372         c.get_bb.spba = cpu_to_le64(ppa.ppa);
373
374         bb_tbl = kzalloc(tblsz, GFP_KERNEL);
375         if (!bb_tbl)
376                 return -ENOMEM;
377
378         ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
379                                                                 bb_tbl, tblsz);
380         if (ret) {
381                 dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
382                 ret = -EIO;
383                 goto out;
384         }
385
386         if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
387                 bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
388                 dev_err(dev->dev, "bbt format mismatch\n");
389                 ret = -EINVAL;
390                 goto out;
391         }
392
393         if (le16_to_cpu(bb_tbl->verid) != 1) {
394                 ret = -EINVAL;
395                 dev_err(dev->dev, "bbt version not supported\n");
396                 goto out;
397         }
398
399         if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
400                 ret = -EINVAL;
401                 dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
402                                         le32_to_cpu(bb_tbl->tblks), nr_blocks);
403                 goto out;
404         }
405
406         ppa = dev_to_generic_addr(nvmdev, ppa);
407         ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
408         if (ret) {
409                 ret = -EINTR;
410                 goto out;
411         }
412
413 out:
414         kfree(bb_tbl);
415         return ret;
416 }
417
418 static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
419                                                                 int type)
420 {
421         struct nvme_ns *ns = nvmdev->q->queuedata;
422         struct nvme_dev *dev = ns->dev;
423         struct nvme_nvm_command c = {};
424         int ret = 0;
425
426         c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
427         c.set_bb.nsid = cpu_to_le32(ns->ns_id);
428         c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa);
429         c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
430         c.set_bb.value = type;
431
432         ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
433                                                                 NULL, 0);
434         if (ret)
435                 dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
436         return ret;
437 }
438
439 static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
440                                 struct nvme_ns *ns, struct nvme_nvm_command *c)
441 {
442         c->ph_rw.opcode = rqd->opcode;
443         c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
444         c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
445         c->ph_rw.control = cpu_to_le16(rqd->flags);
446         c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1);
447
448         if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
449                 c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
450                                                 rqd->bio->bi_iter.bi_sector));
451 }
452
453 static void nvme_nvm_end_io(struct request *rq, int error)
454 {
455         struct nvm_rq *rqd = rq->end_io_data;
456         struct nvm_dev *dev = rqd->dev;
457
458         if (dev->mt && dev->mt->end_io(rqd, error))
459                 pr_err("nvme: err status: %x result: %lx\n",
460                                 rq->errors, (unsigned long)rq->special);
461
462         kfree(rq->cmd);
463         blk_mq_free_request(rq);
464 }
465
466 static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
467 {
468         struct request_queue *q = dev->q;
469         struct nvme_ns *ns = q->queuedata;
470         struct request *rq;
471         struct bio *bio = rqd->bio;
472         struct nvme_nvm_command *cmd;
473
474         rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
475         if (IS_ERR(rq))
476                 return -ENOMEM;
477
478         cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
479         if (!cmd) {
480                 blk_mq_free_request(rq);
481                 return -ENOMEM;
482         }
483
484         rq->cmd_type = REQ_TYPE_DRV_PRIV;
485         rq->ioprio = bio_prio(bio);
486
487         if (bio_has_data(bio))
488                 rq->nr_phys_segments = bio_phys_segments(q, bio);
489
490         rq->__data_len = bio->bi_iter.bi_size;
491         rq->bio = rq->biotail = bio;
492
493         nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
494
495         rq->cmd = (unsigned char *)cmd;
496         rq->cmd_len = sizeof(struct nvme_nvm_command);
497         rq->special = (void *)0;
498
499         rq->end_io_data = rqd;
500
501         blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
502
503         return 0;
504 }
505
506 static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
507 {
508         struct request_queue *q = dev->q;
509         struct nvme_ns *ns = q->queuedata;
510         struct nvme_nvm_command c = {};
511
512         c.erase.opcode = NVM_OP_ERASE;
513         c.erase.nsid = cpu_to_le32(ns->ns_id);
514         c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
515         c.erase.length = cpu_to_le16(rqd->nr_pages - 1);
516
517         return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
518 }
519
520 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
521 {
522         struct nvme_ns *ns = nvmdev->q->queuedata;
523         struct nvme_dev *dev = ns->dev;
524
525         return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
526 }
527
528 static void nvme_nvm_destroy_dma_pool(void *pool)
529 {
530         struct dma_pool *dma_pool = pool;
531
532         dma_pool_destroy(dma_pool);
533 }
534
535 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
536                                     gfp_t mem_flags, dma_addr_t *dma_handler)
537 {
538         return dma_pool_alloc(pool, mem_flags, dma_handler);
539 }
540
541 static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list,
542                                                         dma_addr_t dma_handler)
543 {
544         dma_pool_free(pool, ppa_list, dma_handler);
545 }
546
547 static struct nvm_dev_ops nvme_nvm_dev_ops = {
548         .identity               = nvme_nvm_identity,
549
550         .get_l2p_tbl            = nvme_nvm_get_l2p_tbl,
551
552         .get_bb_tbl             = nvme_nvm_get_bb_tbl,
553         .set_bb_tbl             = nvme_nvm_set_bb_tbl,
554
555         .submit_io              = nvme_nvm_submit_io,
556         .erase_block            = nvme_nvm_erase_block,
557
558         .create_dma_pool        = nvme_nvm_create_dma_pool,
559         .destroy_dma_pool       = nvme_nvm_destroy_dma_pool,
560         .dev_dma_alloc          = nvme_nvm_dev_dma_alloc,
561         .dev_dma_free           = nvme_nvm_dev_dma_free,
562
563         .max_phys_sect          = 64,
564 };
565
566 int nvme_nvm_register(struct request_queue *q, char *disk_name)
567 {
568         return nvm_register(q, disk_name, &nvme_nvm_dev_ops);
569 }
570
571 void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
572 {
573         nvm_unregister(disk_name);
574 }
575
576 /* move to shared place when used in multiple places. */
577 #define PCI_VENDOR_ID_CNEX 0x1d1d
578 #define PCI_DEVICE_ID_CNEX_WL 0x2807
579 #define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
580
581 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
582 {
583         struct nvme_dev *dev = ns->dev;
584         struct pci_dev *pdev = to_pci_dev(dev->dev);
585
586         /* QEMU NVMe simulator - PCI ID + Vendor specific bit */
587         if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
588                                 pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
589                                                         id->vs[0] == 0x1)
590                 return 1;
591
592         /* CNEX Labs - PCI ID + Vendor specific bit */
593         if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
594                                 pdev->device == PCI_DEVICE_ID_CNEX_WL &&
595                                                         id->vs[0] == 0x1)
596                 return 1;
597
598         return 0;
599 }