blk.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. * NVDIMM Block Window Driver
  3. * Copyright (c) 2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. */
  14. #include <linux/blkdev.h>
  15. #include <linux/fs.h>
  16. #include <linux/genhd.h>
  17. #include <linux/module.h>
  18. #include <linux/moduleparam.h>
  19. #include <linux/nd.h>
  20. #include <linux/sizes.h>
  21. #include "nd.h"
  22. struct nd_blk_device {
  23. struct request_queue *queue;
  24. struct gendisk *disk;
  25. struct nd_namespace_blk *nsblk;
  26. struct nd_blk_region *ndbr;
  27. size_t disk_size;
  28. u32 sector_size;
  29. u32 internal_lbasize;
  30. };
  31. static int nd_blk_major;
  32. static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
  33. {
  34. return blk_dev->nsblk->lbasize - blk_dev->sector_size;
  35. }
  36. static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
  37. resource_size_t ns_offset, unsigned int len)
  38. {
  39. int i;
  40. for (i = 0; i < nsblk->num_resources; i++) {
  41. if (ns_offset < resource_size(nsblk->res[i])) {
  42. if (ns_offset + len > resource_size(nsblk->res[i])) {
  43. dev_WARN_ONCE(&nsblk->common.dev, 1,
  44. "illegal request\n");
  45. return SIZE_MAX;
  46. }
  47. return nsblk->res[i]->start + ns_offset;
  48. }
  49. ns_offset -= resource_size(nsblk->res[i]);
  50. }
  51. dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
  52. return SIZE_MAX;
  53. }
  54. #ifdef CONFIG_BLK_DEV_INTEGRITY
  55. static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
  56. struct bio_integrity_payload *bip, u64 lba,
  57. int rw)
  58. {
  59. unsigned int len = nd_blk_meta_size(blk_dev);
  60. resource_size_t dev_offset, ns_offset;
  61. struct nd_namespace_blk *nsblk;
  62. struct nd_blk_region *ndbr;
  63. int err = 0;
  64. nsblk = blk_dev->nsblk;
  65. ndbr = blk_dev->ndbr;
  66. ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
  67. dev_offset = to_dev_offset(nsblk, ns_offset, len);
  68. if (dev_offset == SIZE_MAX)
  69. return -EIO;
  70. while (len) {
  71. unsigned int cur_len;
  72. struct bio_vec bv;
  73. void *iobuf;
  74. bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
  75. /*
  76. * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
  77. * .bv_offset already adjusted for iter->bi_bvec_done, and we
  78. * can use those directly
  79. */
  80. cur_len = min(len, bv.bv_len);
  81. iobuf = kmap_atomic(bv.bv_page);
  82. err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
  83. cur_len, rw);
  84. kunmap_atomic(iobuf);
  85. if (err)
  86. return err;
  87. len -= cur_len;
  88. dev_offset += cur_len;
  89. bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
  90. }
  91. return err;
  92. }
  93. #else /* CONFIG_BLK_DEV_INTEGRITY */
  94. static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
  95. struct bio_integrity_payload *bip, u64 lba,
  96. int rw)
  97. {
  98. return 0;
  99. }
  100. #endif
  101. static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
  102. struct bio_integrity_payload *bip, struct page *page,
  103. unsigned int len, unsigned int off, int rw,
  104. sector_t sector)
  105. {
  106. struct nd_blk_region *ndbr = blk_dev->ndbr;
  107. resource_size_t dev_offset, ns_offset;
  108. int err = 0;
  109. void *iobuf;
  110. u64 lba;
  111. while (len) {
  112. unsigned int cur_len;
  113. /*
  114. * If we don't have an integrity payload, we don't have to
  115. * split the bvec into sectors, as this would cause unnecessary
  116. * Block Window setup/move steps. the do_io routine is capable
  117. * of handling len <= PAGE_SIZE.
  118. */
  119. cur_len = bip ? min(len, blk_dev->sector_size) : len;
  120. lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
  121. ns_offset = lba * blk_dev->internal_lbasize;
  122. dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
  123. if (dev_offset == SIZE_MAX)
  124. return -EIO;
  125. iobuf = kmap_atomic(page);
  126. err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
  127. kunmap_atomic(iobuf);
  128. if (err)
  129. return err;
  130. if (bip) {
  131. err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
  132. if (err)
  133. return err;
  134. }
  135. len -= cur_len;
  136. off += cur_len;
  137. sector += blk_dev->sector_size >> SECTOR_SHIFT;
  138. }
  139. return err;
  140. }
  141. static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
  142. {
  143. struct block_device *bdev = bio->bi_bdev;
  144. struct gendisk *disk = bdev->bd_disk;
  145. struct bio_integrity_payload *bip;
  146. struct nd_blk_device *blk_dev;
  147. struct bvec_iter iter;
  148. unsigned long start;
  149. struct bio_vec bvec;
  150. int err = 0, rw;
  151. bool do_acct;
  152. /*
  153. * bio_integrity_enabled also checks if the bio already has an
  154. * integrity payload attached. If it does, we *don't* do a
  155. * bio_integrity_prep here - the payload has been generated by
  156. * another kernel subsystem, and we just pass it through.
  157. */
  158. if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
  159. bio->bi_error = -EIO;
  160. goto out;
  161. }
  162. bip = bio_integrity(bio);
  163. blk_dev = disk->private_data;
  164. rw = bio_data_dir(bio);
  165. do_acct = nd_iostat_start(bio, &start);
  166. bio_for_each_segment(bvec, bio, iter) {
  167. unsigned int len = bvec.bv_len;
  168. BUG_ON(len > PAGE_SIZE);
  169. err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
  170. bvec.bv_offset, rw, iter.bi_sector);
  171. if (err) {
  172. dev_info(&blk_dev->nsblk->common.dev,
  173. "io error in %s sector %lld, len %d,\n",
  174. (rw == READ) ? "READ" : "WRITE",
  175. (unsigned long long) iter.bi_sector, len);
  176. bio->bi_error = err;
  177. break;
  178. }
  179. }
  180. if (do_acct)
  181. nd_iostat_end(bio, start);
  182. out:
  183. bio_endio(bio);
  184. return BLK_QC_T_NONE;
  185. }
  186. static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
  187. resource_size_t offset, void *iobuf, size_t n, int rw)
  188. {
  189. struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
  190. struct nd_namespace_blk *nsblk = blk_dev->nsblk;
  191. struct nd_blk_region *ndbr = blk_dev->ndbr;
  192. resource_size_t dev_offset;
  193. dev_offset = to_dev_offset(nsblk, offset, n);
  194. if (unlikely(offset + n > blk_dev->disk_size)) {
  195. dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
  196. return -EFAULT;
  197. }
  198. if (dev_offset == SIZE_MAX)
  199. return -EIO;
  200. return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
  201. }
  202. static const struct block_device_operations nd_blk_fops = {
  203. .owner = THIS_MODULE,
  204. .revalidate_disk = nvdimm_revalidate_disk,
  205. };
  206. static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
  207. struct nd_blk_device *blk_dev)
  208. {
  209. resource_size_t available_disk_size;
  210. struct gendisk *disk;
  211. u64 internal_nlba;
  212. internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
  213. available_disk_size = internal_nlba * blk_dev->sector_size;
  214. blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
  215. if (!blk_dev->queue)
  216. return -ENOMEM;
  217. blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
  218. blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
  219. blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
  220. blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
  221. queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
  222. disk = blk_dev->disk = alloc_disk(0);
  223. if (!disk) {
  224. blk_cleanup_queue(blk_dev->queue);
  225. return -ENOMEM;
  226. }
  227. disk->driverfs_dev = &ndns->dev;
  228. disk->major = nd_blk_major;
  229. disk->first_minor = 0;
  230. disk->fops = &nd_blk_fops;
  231. disk->private_data = blk_dev;
  232. disk->queue = blk_dev->queue;
  233. disk->flags = GENHD_FL_EXT_DEVT;
  234. nvdimm_namespace_disk_name(ndns, disk->disk_name);
  235. set_capacity(disk, 0);
  236. add_disk(disk);
  237. if (nd_blk_meta_size(blk_dev)) {
  238. int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
  239. if (rc) {
  240. del_gendisk(disk);
  241. put_disk(disk);
  242. blk_cleanup_queue(blk_dev->queue);
  243. return rc;
  244. }
  245. }
  246. set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
  247. revalidate_disk(disk);
  248. return 0;
  249. }
  250. static int nd_blk_probe(struct device *dev)
  251. {
  252. struct nd_namespace_common *ndns;
  253. struct nd_namespace_blk *nsblk;
  254. struct nd_blk_device *blk_dev;
  255. int rc;
  256. ndns = nvdimm_namespace_common_probe(dev);
  257. if (IS_ERR(ndns))
  258. return PTR_ERR(ndns);
  259. blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
  260. if (!blk_dev)
  261. return -ENOMEM;
  262. nsblk = to_nd_namespace_blk(&ndns->dev);
  263. blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
  264. blk_dev->ndbr = to_nd_blk_region(dev->parent);
  265. blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
  266. blk_dev->internal_lbasize = roundup(nsblk->lbasize,
  267. INT_LBASIZE_ALIGNMENT);
  268. blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
  269. dev_set_drvdata(dev, blk_dev);
  270. ndns->rw_bytes = nd_blk_rw_bytes;
  271. if (is_nd_btt(dev))
  272. rc = nvdimm_namespace_attach_btt(ndns);
  273. else if (nd_btt_probe(ndns, blk_dev) == 0) {
  274. /* we'll come back as btt-blk */
  275. rc = -ENXIO;
  276. } else
  277. rc = nd_blk_attach_disk(ndns, blk_dev);
  278. if (rc)
  279. kfree(blk_dev);
  280. return rc;
  281. }
  282. static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
  283. {
  284. del_gendisk(blk_dev->disk);
  285. put_disk(blk_dev->disk);
  286. blk_cleanup_queue(blk_dev->queue);
  287. }
  288. static int nd_blk_remove(struct device *dev)
  289. {
  290. struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
  291. if (is_nd_btt(dev))
  292. nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
  293. else
  294. nd_blk_detach_disk(blk_dev);
  295. kfree(blk_dev);
  296. return 0;
  297. }
  298. static struct nd_device_driver nd_blk_driver = {
  299. .probe = nd_blk_probe,
  300. .remove = nd_blk_remove,
  301. .drv = {
  302. .name = "nd_blk",
  303. },
  304. .type = ND_DRIVER_NAMESPACE_BLK,
  305. };
  306. static int __init nd_blk_init(void)
  307. {
  308. int rc;
  309. rc = register_blkdev(0, "nd_blk");
  310. if (rc < 0)
  311. return rc;
  312. nd_blk_major = rc;
  313. rc = nd_driver_register(&nd_blk_driver);
  314. if (rc < 0)
  315. unregister_blkdev(nd_blk_major, "nd_blk");
  316. return rc;
  317. }
  318. static void __exit nd_blk_exit(void)
  319. {
  320. driver_unregister(&nd_blk_driver.drv);
  321. unregister_blkdev(nd_blk_major, "nd_blk");
  322. }
  323. MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
  324. MODULE_LICENSE("GPL v2");
  325. MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
  326. module_init(nd_blk_init);
  327. module_exit(nd_blk_exit);