virtio_pci_common.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /*
  2. * Virtio PCI driver - common functionality for all device versions
  3. *
  4. * This module allows virtio devices to be used over a virtual PCI device.
  5. * This can be used with QEMU based VMMs like KVM or Xen.
  6. *
  7. * Copyright IBM Corp. 2007
  8. * Copyright Red Hat, Inc. 2014
  9. *
  10. * Authors:
  11. * Anthony Liguori <aliguori@us.ibm.com>
  12. * Rusty Russell <rusty@rustcorp.com.au>
  13. * Michael S. Tsirkin <mst@redhat.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  16. * See the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include "virtio_pci_common.h"
  20. static bool force_legacy = false;
  21. #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
  22. module_param(force_legacy, bool, 0444);
  23. MODULE_PARM_DESC(force_legacy,
  24. "Force legacy mode for transitional virtio 1 devices");
  25. #endif
  26. /* wait for pending irq handlers */
  27. void vp_synchronize_vectors(struct virtio_device *vdev)
  28. {
  29. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  30. int i;
  31. if (vp_dev->intx_enabled)
  32. synchronize_irq(vp_dev->pci_dev->irq);
  33. for (i = 0; i < vp_dev->msix_vectors; ++i)
  34. synchronize_irq(vp_dev->msix_entries[i].vector);
  35. }
  36. /* the notify function used when creating a virt queue */
  37. bool vp_notify(struct virtqueue *vq)
  38. {
  39. /* we write the queue's selector into the notification register to
  40. * signal the other end */
  41. iowrite16(vq->index, (void __iomem *)vq->priv);
  42. return true;
  43. }
  44. /* Handle a configuration change: Tell driver if it wants to know. */
  45. static irqreturn_t vp_config_changed(int irq, void *opaque)
  46. {
  47. struct virtio_pci_device *vp_dev = opaque;
  48. virtio_config_changed(&vp_dev->vdev);
  49. return IRQ_HANDLED;
  50. }
  51. /* Notify all virtqueues on an interrupt. */
  52. static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
  53. {
  54. struct virtio_pci_device *vp_dev = opaque;
  55. struct virtio_pci_vq_info *info;
  56. irqreturn_t ret = IRQ_NONE;
  57. unsigned long flags;
  58. spin_lock_irqsave(&vp_dev->lock, flags);
  59. list_for_each_entry(info, &vp_dev->virtqueues, node) {
  60. if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
  61. ret = IRQ_HANDLED;
  62. }
  63. spin_unlock_irqrestore(&vp_dev->lock, flags);
  64. return ret;
  65. }
  66. /* A small wrapper to also acknowledge the interrupt when it's handled.
  67. * I really need an EIO hook for the vring so I can ack the interrupt once we
  68. * know that we'll be handling the IRQ but before we invoke the callback since
  69. * the callback may notify the host which results in the host attempting to
  70. * raise an interrupt that we would then mask once we acknowledged the
  71. * interrupt. */
  72. static irqreturn_t vp_interrupt(int irq, void *opaque)
  73. {
  74. struct virtio_pci_device *vp_dev = opaque;
  75. u8 isr;
  76. /* reading the ISR has the effect of also clearing it so it's very
  77. * important to save off the value. */
  78. isr = ioread8(vp_dev->isr);
  79. /* It's definitely not us if the ISR was not high */
  80. if (!isr)
  81. return IRQ_NONE;
  82. /* Configuration change? Tell driver if it wants to know. */
  83. if (isr & VIRTIO_PCI_ISR_CONFIG)
  84. vp_config_changed(irq, opaque);
  85. return vp_vring_interrupt(irq, opaque);
  86. }
  87. static void vp_free_vectors(struct virtio_device *vdev)
  88. {
  89. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  90. int i;
  91. if (vp_dev->intx_enabled) {
  92. free_irq(vp_dev->pci_dev->irq, vp_dev);
  93. vp_dev->intx_enabled = 0;
  94. }
  95. for (i = 0; i < vp_dev->msix_used_vectors; ++i)
  96. free_irq(vp_dev->msix_entries[i].vector, vp_dev);
  97. for (i = 0; i < vp_dev->msix_vectors; i++)
  98. if (vp_dev->msix_affinity_masks[i])
  99. free_cpumask_var(vp_dev->msix_affinity_masks[i]);
  100. if (vp_dev->msix_enabled) {
  101. /* Disable the vector used for configuration */
  102. vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
  103. pci_disable_msix(vp_dev->pci_dev);
  104. vp_dev->msix_enabled = 0;
  105. }
  106. vp_dev->msix_vectors = 0;
  107. vp_dev->msix_used_vectors = 0;
  108. kfree(vp_dev->msix_names);
  109. vp_dev->msix_names = NULL;
  110. kfree(vp_dev->msix_entries);
  111. vp_dev->msix_entries = NULL;
  112. kfree(vp_dev->msix_affinity_masks);
  113. vp_dev->msix_affinity_masks = NULL;
  114. }
  115. static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
  116. bool per_vq_vectors)
  117. {
  118. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  119. const char *name = dev_name(&vp_dev->vdev.dev);
  120. unsigned i, v;
  121. int err = -ENOMEM;
  122. vp_dev->msix_vectors = nvectors;
  123. vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
  124. GFP_KERNEL);
  125. if (!vp_dev->msix_entries)
  126. goto error;
  127. vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
  128. GFP_KERNEL);
  129. if (!vp_dev->msix_names)
  130. goto error;
  131. vp_dev->msix_affinity_masks
  132. = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
  133. GFP_KERNEL);
  134. if (!vp_dev->msix_affinity_masks)
  135. goto error;
  136. for (i = 0; i < nvectors; ++i)
  137. if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
  138. GFP_KERNEL))
  139. goto error;
  140. for (i = 0; i < nvectors; ++i)
  141. vp_dev->msix_entries[i].entry = i;
  142. err = pci_enable_msix_exact(vp_dev->pci_dev,
  143. vp_dev->msix_entries, nvectors);
  144. if (err)
  145. goto error;
  146. vp_dev->msix_enabled = 1;
  147. /* Set the vector used for configuration */
  148. v = vp_dev->msix_used_vectors;
  149. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  150. "%s-config", name);
  151. err = request_irq(vp_dev->msix_entries[v].vector,
  152. vp_config_changed, 0, vp_dev->msix_names[v],
  153. vp_dev);
  154. if (err)
  155. goto error;
  156. ++vp_dev->msix_used_vectors;
  157. v = vp_dev->config_vector(vp_dev, v);
  158. /* Verify we had enough resources to assign the vector */
  159. if (v == VIRTIO_MSI_NO_VECTOR) {
  160. err = -EBUSY;
  161. goto error;
  162. }
  163. if (!per_vq_vectors) {
  164. /* Shared vector for all VQs */
  165. v = vp_dev->msix_used_vectors;
  166. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  167. "%s-virtqueues", name);
  168. err = request_irq(vp_dev->msix_entries[v].vector,
  169. vp_vring_interrupt, 0, vp_dev->msix_names[v],
  170. vp_dev);
  171. if (err)
  172. goto error;
  173. ++vp_dev->msix_used_vectors;
  174. }
  175. return 0;
  176. error:
  177. vp_free_vectors(vdev);
  178. return err;
  179. }
  180. static int vp_request_intx(struct virtio_device *vdev)
  181. {
  182. int err;
  183. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  184. err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
  185. IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
  186. if (!err)
  187. vp_dev->intx_enabled = 1;
  188. return err;
  189. }
  190. static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
  191. void (*callback)(struct virtqueue *vq),
  192. const char *name,
  193. u16 msix_vec)
  194. {
  195. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  196. struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
  197. struct virtqueue *vq;
  198. unsigned long flags;
  199. /* fill out our structure that represents an active queue */
  200. if (!info)
  201. return ERR_PTR(-ENOMEM);
  202. vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
  203. if (IS_ERR(vq))
  204. goto out_info;
  205. info->vq = vq;
  206. if (callback) {
  207. spin_lock_irqsave(&vp_dev->lock, flags);
  208. list_add(&info->node, &vp_dev->virtqueues);
  209. spin_unlock_irqrestore(&vp_dev->lock, flags);
  210. } else {
  211. INIT_LIST_HEAD(&info->node);
  212. }
  213. vp_dev->vqs[index] = info;
  214. return vq;
  215. out_info:
  216. kfree(info);
  217. return vq;
  218. }
  219. static void vp_del_vq(struct virtqueue *vq)
  220. {
  221. struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  222. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  223. unsigned long flags;
  224. spin_lock_irqsave(&vp_dev->lock, flags);
  225. list_del(&info->node);
  226. spin_unlock_irqrestore(&vp_dev->lock, flags);
  227. vp_dev->del_vq(info);
  228. kfree(info);
  229. }
  230. /* the config->del_vqs() implementation */
  231. void vp_del_vqs(struct virtio_device *vdev)
  232. {
  233. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  234. struct virtqueue *vq, *n;
  235. struct virtio_pci_vq_info *info;
  236. list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
  237. info = vp_dev->vqs[vq->index];
  238. if (vp_dev->per_vq_vectors &&
  239. info->msix_vector != VIRTIO_MSI_NO_VECTOR)
  240. free_irq(vp_dev->msix_entries[info->msix_vector].vector,
  241. vq);
  242. vp_del_vq(vq);
  243. }
  244. vp_dev->per_vq_vectors = false;
  245. vp_free_vectors(vdev);
  246. kfree(vp_dev->vqs);
  247. vp_dev->vqs = NULL;
  248. }
  249. static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  250. struct virtqueue *vqs[],
  251. vq_callback_t *callbacks[],
  252. const char *names[],
  253. bool use_msix,
  254. bool per_vq_vectors)
  255. {
  256. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  257. u16 msix_vec;
  258. int i, err, nvectors, allocated_vectors;
  259. vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL);
  260. if (!vp_dev->vqs)
  261. return -ENOMEM;
  262. if (!use_msix) {
  263. /* Old style: one normal interrupt for change and all vqs. */
  264. err = vp_request_intx(vdev);
  265. if (err)
  266. goto error_find;
  267. } else {
  268. if (per_vq_vectors) {
  269. /* Best option: one for change interrupt, one per vq. */
  270. nvectors = 1;
  271. for (i = 0; i < nvqs; ++i)
  272. if (callbacks[i])
  273. ++nvectors;
  274. } else {
  275. /* Second best: one for change, shared for all vqs. */
  276. nvectors = 2;
  277. }
  278. err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
  279. if (err)
  280. goto error_find;
  281. }
  282. vp_dev->per_vq_vectors = per_vq_vectors;
  283. allocated_vectors = vp_dev->msix_used_vectors;
  284. for (i = 0; i < nvqs; ++i) {
  285. if (!names[i]) {
  286. vqs[i] = NULL;
  287. continue;
  288. } else if (!callbacks[i] || !vp_dev->msix_enabled)
  289. msix_vec = VIRTIO_MSI_NO_VECTOR;
  290. else if (vp_dev->per_vq_vectors)
  291. msix_vec = allocated_vectors++;
  292. else
  293. msix_vec = VP_MSIX_VQ_VECTOR;
  294. vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
  295. if (IS_ERR(vqs[i])) {
  296. err = PTR_ERR(vqs[i]);
  297. goto error_find;
  298. }
  299. if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
  300. continue;
  301. /* allocate per-vq irq if available and necessary */
  302. snprintf(vp_dev->msix_names[msix_vec],
  303. sizeof *vp_dev->msix_names,
  304. "%s-%s",
  305. dev_name(&vp_dev->vdev.dev), names[i]);
  306. err = request_irq(vp_dev->msix_entries[msix_vec].vector,
  307. vring_interrupt, 0,
  308. vp_dev->msix_names[msix_vec],
  309. vqs[i]);
  310. if (err) {
  311. vp_del_vq(vqs[i]);
  312. goto error_find;
  313. }
  314. }
  315. return 0;
  316. error_find:
  317. vp_del_vqs(vdev);
  318. return err;
  319. }
  320. /* the config->find_vqs() implementation */
  321. int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  322. struct virtqueue *vqs[],
  323. vq_callback_t *callbacks[],
  324. const char *names[])
  325. {
  326. int err;
  327. /* Try MSI-X with one vector per queue. */
  328. err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
  329. if (!err)
  330. return 0;
  331. /* Fallback: MSI-X with one vector for config, one shared for queues. */
  332. err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
  333. true, false);
  334. if (!err)
  335. return 0;
  336. /* Finally fall back to regular interrupts. */
  337. return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
  338. false, false);
  339. }
  340. const char *vp_bus_name(struct virtio_device *vdev)
  341. {
  342. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  343. return pci_name(vp_dev->pci_dev);
  344. }
  345. /* Setup the affinity for a virtqueue:
  346. * - force the affinity for per vq vector
  347. * - OR over all affinities for shared MSI
  348. * - ignore the affinity request if we're using INTX
  349. */
  350. int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
  351. {
  352. struct virtio_device *vdev = vq->vdev;
  353. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  354. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  355. struct cpumask *mask;
  356. unsigned int irq;
  357. if (!vq->callback)
  358. return -EINVAL;
  359. if (vp_dev->msix_enabled) {
  360. mask = vp_dev->msix_affinity_masks[info->msix_vector];
  361. irq = vp_dev->msix_entries[info->msix_vector].vector;
  362. if (cpu == -1)
  363. irq_set_affinity_hint(irq, NULL);
  364. else {
  365. cpumask_clear(mask);
  366. cpumask_set_cpu(cpu, mask);
  367. irq_set_affinity_hint(irq, mask);
  368. }
  369. }
  370. return 0;
  371. }
  372. #ifdef CONFIG_PM_SLEEP
  373. static int virtio_pci_freeze(struct device *dev)
  374. {
  375. struct pci_dev *pci_dev = to_pci_dev(dev);
  376. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  377. int ret;
  378. ret = virtio_device_freeze(&vp_dev->vdev);
  379. if (!ret)
  380. pci_disable_device(pci_dev);
  381. return ret;
  382. }
  383. static int virtio_pci_restore(struct device *dev)
  384. {
  385. struct pci_dev *pci_dev = to_pci_dev(dev);
  386. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  387. int ret;
  388. ret = pci_enable_device(pci_dev);
  389. if (ret)
  390. return ret;
  391. pci_set_master(pci_dev);
  392. return virtio_device_restore(&vp_dev->vdev);
  393. }
  394. static const struct dev_pm_ops virtio_pci_pm_ops = {
  395. SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
  396. };
  397. #endif
  398. /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
  399. static const struct pci_device_id virtio_pci_id_table[] = {
  400. { PCI_DEVICE(0x1af4, PCI_ANY_ID) },
  401. { 0 }
  402. };
  403. MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
  404. static void virtio_pci_release_dev(struct device *_d)
  405. {
  406. struct virtio_device *vdev = dev_to_virtio(_d);
  407. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  408. /* As struct device is a kobject, it's not safe to
  409. * free the memory (including the reference counter itself)
  410. * until it's release callback. */
  411. kfree(vp_dev);
  412. }
  413. static int virtio_pci_probe(struct pci_dev *pci_dev,
  414. const struct pci_device_id *id)
  415. {
  416. struct virtio_pci_device *vp_dev;
  417. int rc;
  418. /* allocate our structure and fill it out */
  419. vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
  420. if (!vp_dev)
  421. return -ENOMEM;
  422. pci_set_drvdata(pci_dev, vp_dev);
  423. vp_dev->vdev.dev.parent = &pci_dev->dev;
  424. vp_dev->vdev.dev.release = virtio_pci_release_dev;
  425. vp_dev->pci_dev = pci_dev;
  426. INIT_LIST_HEAD(&vp_dev->virtqueues);
  427. spin_lock_init(&vp_dev->lock);
  428. /* enable the device */
  429. rc = pci_enable_device(pci_dev);
  430. if (rc)
  431. goto err_enable_device;
  432. if (force_legacy) {
  433. rc = virtio_pci_legacy_probe(vp_dev);
  434. /* Also try modern mode if we can't map BAR0 (no IO space). */
  435. if (rc == -ENODEV || rc == -ENOMEM)
  436. rc = virtio_pci_modern_probe(vp_dev);
  437. if (rc)
  438. goto err_probe;
  439. } else {
  440. rc = virtio_pci_modern_probe(vp_dev);
  441. if (rc == -ENODEV)
  442. rc = virtio_pci_legacy_probe(vp_dev);
  443. if (rc)
  444. goto err_probe;
  445. }
  446. pci_set_master(pci_dev);
  447. rc = register_virtio_device(&vp_dev->vdev);
  448. if (rc)
  449. goto err_register;
  450. return 0;
  451. err_register:
  452. if (vp_dev->ioaddr)
  453. virtio_pci_legacy_remove(vp_dev);
  454. else
  455. virtio_pci_modern_remove(vp_dev);
  456. err_probe:
  457. pci_disable_device(pci_dev);
  458. err_enable_device:
  459. kfree(vp_dev);
  460. return rc;
  461. }
  462. static void virtio_pci_remove(struct pci_dev *pci_dev)
  463. {
  464. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  465. struct device *dev = get_device(&vp_dev->vdev.dev);
  466. unregister_virtio_device(&vp_dev->vdev);
  467. if (vp_dev->ioaddr)
  468. virtio_pci_legacy_remove(vp_dev);
  469. else
  470. virtio_pci_modern_remove(vp_dev);
  471. pci_disable_device(pci_dev);
  472. put_device(dev);
  473. }
  474. static struct pci_driver virtio_pci_driver = {
  475. .name = "virtio-pci",
  476. .id_table = virtio_pci_id_table,
  477. .probe = virtio_pci_probe,
  478. .remove = virtio_pci_remove,
  479. #ifdef CONFIG_PM_SLEEP
  480. .driver.pm = &virtio_pci_pm_ops,
  481. #endif
  482. };
  483. module_pci_driver(virtio_pci_driver);
  484. MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
  485. MODULE_DESCRIPTION("virtio-pci");
  486. MODULE_LICENSE("GPL");
  487. MODULE_VERSION("1");