vringh.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. /*
  2. * Helpers for the host side of a virtio ring.
  3. *
  4. * Since these may be in userspace, we use (inline) accessors.
  5. */
  6. #include <linux/module.h>
  7. #include <linux/vringh.h>
  8. #include <linux/virtio_ring.h>
  9. #include <linux/kernel.h>
  10. #include <linux/ratelimit.h>
  11. #include <linux/uaccess.h>
  12. #include <linux/slab.h>
  13. #include <linux/export.h>
  14. #include <uapi/linux/virtio_config.h>
  15. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  16. {
  17. static DEFINE_RATELIMIT_STATE(vringh_rs,
  18. DEFAULT_RATELIMIT_INTERVAL,
  19. DEFAULT_RATELIMIT_BURST);
  20. if (__ratelimit(&vringh_rs)) {
  21. va_list ap;
  22. va_start(ap, fmt);
  23. printk(KERN_NOTICE "vringh:");
  24. vprintk(fmt, ap);
  25. va_end(ap);
  26. }
  27. }
  28. /* Returns vring->num if empty, -ve on error. */
  29. static inline int __vringh_get_head(const struct vringh *vrh,
  30. int (*getu16)(const struct vringh *vrh,
  31. u16 *val, const __virtio16 *p),
  32. u16 *last_avail_idx)
  33. {
  34. u16 avail_idx, i, head;
  35. int err;
  36. err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
  37. if (err) {
  38. vringh_bad("Failed to access avail idx at %p",
  39. &vrh->vring.avail->idx);
  40. return err;
  41. }
  42. if (*last_avail_idx == avail_idx)
  43. return vrh->vring.num;
  44. /* Only get avail ring entries after they have been exposed by guest. */
  45. virtio_rmb(vrh->weak_barriers);
  46. i = *last_avail_idx & (vrh->vring.num - 1);
  47. err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
  48. if (err) {
  49. vringh_bad("Failed to read head: idx %d address %p",
  50. *last_avail_idx, &vrh->vring.avail->ring[i]);
  51. return err;
  52. }
  53. if (head >= vrh->vring.num) {
  54. vringh_bad("Guest says index %u > %u is available",
  55. head, vrh->vring.num);
  56. return -EINVAL;
  57. }
  58. (*last_avail_idx)++;
  59. return head;
  60. }
  61. /* Copy some bytes to/from the iovec. Returns num copied. */
  62. static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
  63. void *ptr, size_t len,
  64. int (*xfer)(void *addr, void *ptr,
  65. size_t len))
  66. {
  67. int err, done = 0;
  68. while (len && iov->i < iov->used) {
  69. size_t partlen;
  70. partlen = min(iov->iov[iov->i].iov_len, len);
  71. err = xfer(iov->iov[iov->i].iov_base, ptr, partlen);
  72. if (err)
  73. return err;
  74. done += partlen;
  75. len -= partlen;
  76. ptr += partlen;
  77. iov->consumed += partlen;
  78. iov->iov[iov->i].iov_len -= partlen;
  79. iov->iov[iov->i].iov_base += partlen;
  80. if (!iov->iov[iov->i].iov_len) {
  81. /* Fix up old iov element then increment. */
  82. iov->iov[iov->i].iov_len = iov->consumed;
  83. iov->iov[iov->i].iov_base -= iov->consumed;
  84. iov->consumed = 0;
  85. iov->i++;
  86. }
  87. }
  88. return done;
  89. }
  90. /* May reduce *len if range is shorter. */
  91. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  92. struct vringh_range *range,
  93. bool (*getrange)(struct vringh *,
  94. u64, struct vringh_range *))
  95. {
  96. if (addr < range->start || addr > range->end_incl) {
  97. if (!getrange(vrh, addr, range))
  98. return false;
  99. }
  100. BUG_ON(addr < range->start || addr > range->end_incl);
  101. /* To end of memory? */
  102. if (unlikely(addr + *len == 0)) {
  103. if (range->end_incl == -1ULL)
  104. return true;
  105. goto truncate;
  106. }
  107. /* Otherwise, don't wrap. */
  108. if (addr + *len < addr) {
  109. vringh_bad("Wrapping descriptor %zu@0x%llx",
  110. *len, (unsigned long long)addr);
  111. return false;
  112. }
  113. if (unlikely(addr + *len - 1 > range->end_incl))
  114. goto truncate;
  115. return true;
  116. truncate:
  117. *len = range->end_incl + 1 - addr;
  118. return true;
  119. }
  120. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  121. struct vringh_range *range,
  122. bool (*getrange)(struct vringh *,
  123. u64, struct vringh_range *))
  124. {
  125. return true;
  126. }
  127. /* No reason for this code to be inline. */
  128. static int move_to_indirect(const struct vringh *vrh,
  129. int *up_next, u16 *i, void *addr,
  130. const struct vring_desc *desc,
  131. struct vring_desc **descs, int *desc_max)
  132. {
  133. u32 len;
  134. /* Indirect tables can't have indirect. */
  135. if (*up_next != -1) {
  136. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  137. return -EINVAL;
  138. }
  139. len = vringh32_to_cpu(vrh, desc->len);
  140. if (unlikely(len % sizeof(struct vring_desc))) {
  141. vringh_bad("Strange indirect len %u", desc->len);
  142. return -EINVAL;
  143. }
  144. /* We will check this when we follow it! */
  145. if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
  146. *up_next = vringh16_to_cpu(vrh, desc->next);
  147. else
  148. *up_next = -2;
  149. *descs = addr;
  150. *desc_max = len / sizeof(struct vring_desc);
  151. /* Now, start at the first indirect. */
  152. *i = 0;
  153. return 0;
  154. }
  155. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  156. {
  157. struct kvec *new;
  158. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  159. if (new_num < 8)
  160. new_num = 8;
  161. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  162. if (flag)
  163. new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp);
  164. else {
  165. new = kmalloc(new_num * sizeof(struct iovec), gfp);
  166. if (new) {
  167. memcpy(new, iov->iov,
  168. iov->max_num * sizeof(struct iovec));
  169. flag = VRINGH_IOV_ALLOCATED;
  170. }
  171. }
  172. if (!new)
  173. return -ENOMEM;
  174. iov->iov = new;
  175. iov->max_num = (new_num | flag);
  176. return 0;
  177. }
  178. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  179. struct vring_desc **descs, int *desc_max)
  180. {
  181. u16 i = *up_next;
  182. *up_next = -1;
  183. *descs = vrh->vring.desc;
  184. *desc_max = vrh->vring.num;
  185. return i;
  186. }
  187. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  188. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  189. struct vringh_range *range,
  190. bool (*getrange)(struct vringh *vrh,
  191. u64,
  192. struct vringh_range *)),
  193. bool (*getrange)(struct vringh *vrh,
  194. u64 addr,
  195. struct vringh_range *r),
  196. struct vringh_range *range,
  197. int (*copy)(void *dst, const void *src, size_t len))
  198. {
  199. size_t part, len = sizeof(struct vring_desc);
  200. do {
  201. u64 addr;
  202. int err;
  203. part = len;
  204. addr = (u64)(unsigned long)src - range->offset;
  205. if (!rcheck(vrh, addr, &part, range, getrange))
  206. return -EINVAL;
  207. err = copy(dst, src, part);
  208. if (err)
  209. return err;
  210. dst += part;
  211. src += part;
  212. len -= part;
  213. } while (len);
  214. return 0;
  215. }
  216. static inline int
  217. __vringh_iov(struct vringh *vrh, u16 i,
  218. struct vringh_kiov *riov,
  219. struct vringh_kiov *wiov,
  220. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  221. struct vringh_range *range,
  222. bool (*getrange)(struct vringh *, u64,
  223. struct vringh_range *)),
  224. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  225. gfp_t gfp,
  226. int (*copy)(void *dst, const void *src, size_t len))
  227. {
  228. int err, count = 0, up_next, desc_max;
  229. struct vring_desc desc, *descs;
  230. struct vringh_range range = { -1ULL, 0 }, slowrange;
  231. bool slow = false;
  232. /* We start traversing vring's descriptor table. */
  233. descs = vrh->vring.desc;
  234. desc_max = vrh->vring.num;
  235. up_next = -1;
  236. if (riov)
  237. riov->i = riov->used = 0;
  238. else if (wiov)
  239. wiov->i = wiov->used = 0;
  240. else
  241. /* You must want something! */
  242. BUG();
  243. for (;;) {
  244. void *addr;
  245. struct vringh_kiov *iov;
  246. size_t len;
  247. if (unlikely(slow))
  248. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  249. &slowrange, copy);
  250. else
  251. err = copy(&desc, &descs[i], sizeof(desc));
  252. if (unlikely(err))
  253. goto fail;
  254. if (unlikely(desc.flags &
  255. cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
  256. u64 a = vringh64_to_cpu(vrh, desc.addr);
  257. /* Make sure it's OK, and get offset. */
  258. len = vringh32_to_cpu(vrh, desc.len);
  259. if (!rcheck(vrh, a, &len, &range, getrange)) {
  260. err = -EINVAL;
  261. goto fail;
  262. }
  263. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  264. slow = true;
  265. /* We need to save this range to use offset */
  266. slowrange = range;
  267. }
  268. addr = (void *)(long)(a + range.offset);
  269. err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
  270. &descs, &desc_max);
  271. if (err)
  272. goto fail;
  273. continue;
  274. }
  275. if (count++ == vrh->vring.num) {
  276. vringh_bad("Descriptor loop in %p", descs);
  277. err = -ELOOP;
  278. goto fail;
  279. }
  280. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
  281. iov = wiov;
  282. else {
  283. iov = riov;
  284. if (unlikely(wiov && wiov->i)) {
  285. vringh_bad("Readable desc %p after writable",
  286. &descs[i]);
  287. err = -EINVAL;
  288. goto fail;
  289. }
  290. }
  291. if (!iov) {
  292. vringh_bad("Unexpected %s desc",
  293. !wiov ? "writable" : "readable");
  294. err = -EPROTO;
  295. goto fail;
  296. }
  297. again:
  298. /* Make sure it's OK, and get offset. */
  299. len = vringh32_to_cpu(vrh, desc.len);
  300. if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
  301. getrange)) {
  302. err = -EINVAL;
  303. goto fail;
  304. }
  305. addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
  306. range.offset);
  307. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  308. err = resize_iovec(iov, gfp);
  309. if (err)
  310. goto fail;
  311. }
  312. iov->iov[iov->used].iov_base = addr;
  313. iov->iov[iov->used].iov_len = len;
  314. iov->used++;
  315. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  316. desc.len = cpu_to_vringh32(vrh,
  317. vringh32_to_cpu(vrh, desc.len) - len);
  318. desc.addr = cpu_to_vringh64(vrh,
  319. vringh64_to_cpu(vrh, desc.addr) + len);
  320. goto again;
  321. }
  322. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
  323. i = vringh16_to_cpu(vrh, desc.next);
  324. } else {
  325. /* Just in case we need to finish traversing above. */
  326. if (unlikely(up_next > 0)) {
  327. i = return_from_indirect(vrh, &up_next,
  328. &descs, &desc_max);
  329. slow = false;
  330. } else
  331. break;
  332. }
  333. if (i >= desc_max) {
  334. vringh_bad("Chained index %u > %u", i, desc_max);
  335. err = -EINVAL;
  336. goto fail;
  337. }
  338. }
  339. return 0;
  340. fail:
  341. return err;
  342. }
  343. static inline int __vringh_complete(struct vringh *vrh,
  344. const struct vring_used_elem *used,
  345. unsigned int num_used,
  346. int (*putu16)(const struct vringh *vrh,
  347. __virtio16 *p, u16 val),
  348. int (*putused)(struct vring_used_elem *dst,
  349. const struct vring_used_elem
  350. *src, unsigned num))
  351. {
  352. struct vring_used *used_ring;
  353. int err;
  354. u16 used_idx, off;
  355. used_ring = vrh->vring.used;
  356. used_idx = vrh->last_used_idx + vrh->completed;
  357. off = used_idx % vrh->vring.num;
  358. /* Compiler knows num_used == 1 sometimes, hence extra check */
  359. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  360. u16 part = vrh->vring.num - off;
  361. err = putused(&used_ring->ring[off], used, part);
  362. if (!err)
  363. err = putused(&used_ring->ring[0], used + part,
  364. num_used - part);
  365. } else
  366. err = putused(&used_ring->ring[off], used, num_used);
  367. if (err) {
  368. vringh_bad("Failed to write %u used entries %u at %p",
  369. num_used, off, &used_ring->ring[off]);
  370. return err;
  371. }
  372. /* Make sure buffer is written before we update index. */
  373. virtio_wmb(vrh->weak_barriers);
  374. err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
  375. if (err) {
  376. vringh_bad("Failed to update used index at %p",
  377. &vrh->vring.used->idx);
  378. return err;
  379. }
  380. vrh->completed += num_used;
  381. return 0;
  382. }
  383. static inline int __vringh_need_notify(struct vringh *vrh,
  384. int (*getu16)(const struct vringh *vrh,
  385. u16 *val,
  386. const __virtio16 *p))
  387. {
  388. bool notify;
  389. u16 used_event;
  390. int err;
  391. /* Flush out used index update. This is paired with the
  392. * barrier that the Guest executes when enabling
  393. * interrupts. */
  394. virtio_mb(vrh->weak_barriers);
  395. /* Old-style, without event indices. */
  396. if (!vrh->event_indices) {
  397. u16 flags;
  398. err = getu16(vrh, &flags, &vrh->vring.avail->flags);
  399. if (err) {
  400. vringh_bad("Failed to get flags at %p",
  401. &vrh->vring.avail->flags);
  402. return err;
  403. }
  404. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  405. }
  406. /* Modern: we know when other side wants to know. */
  407. err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
  408. if (err) {
  409. vringh_bad("Failed to get used event idx at %p",
  410. &vring_used_event(&vrh->vring));
  411. return err;
  412. }
  413. /* Just in case we added so many that we wrap. */
  414. if (unlikely(vrh->completed > 0xffff))
  415. notify = true;
  416. else
  417. notify = vring_need_event(used_event,
  418. vrh->last_used_idx + vrh->completed,
  419. vrh->last_used_idx);
  420. vrh->last_used_idx += vrh->completed;
  421. vrh->completed = 0;
  422. return notify;
  423. }
  424. static inline bool __vringh_notify_enable(struct vringh *vrh,
  425. int (*getu16)(const struct vringh *vrh,
  426. u16 *val, const __virtio16 *p),
  427. int (*putu16)(const struct vringh *vrh,
  428. __virtio16 *p, u16 val))
  429. {
  430. u16 avail;
  431. if (!vrh->event_indices) {
  432. /* Old-school; update flags. */
  433. if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
  434. vringh_bad("Clearing used flags %p",
  435. &vrh->vring.used->flags);
  436. return true;
  437. }
  438. } else {
  439. if (putu16(vrh, &vring_avail_event(&vrh->vring),
  440. vrh->last_avail_idx) != 0) {
  441. vringh_bad("Updating avail event index %p",
  442. &vring_avail_event(&vrh->vring));
  443. return true;
  444. }
  445. }
  446. /* They could have slipped one in as we were doing that: make
  447. * sure it's written, then check again. */
  448. virtio_mb(vrh->weak_barriers);
  449. if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
  450. vringh_bad("Failed to check avail idx at %p",
  451. &vrh->vring.avail->idx);
  452. return true;
  453. }
  454. /* This is unlikely, so we just leave notifications enabled
  455. * (if we're using event_indices, we'll only get one
  456. * notification anyway). */
  457. return avail == vrh->last_avail_idx;
  458. }
  459. static inline void __vringh_notify_disable(struct vringh *vrh,
  460. int (*putu16)(const struct vringh *vrh,
  461. __virtio16 *p, u16 val))
  462. {
  463. if (!vrh->event_indices) {
  464. /* Old-school; update flags. */
  465. if (putu16(vrh, &vrh->vring.used->flags,
  466. VRING_USED_F_NO_NOTIFY)) {
  467. vringh_bad("Setting used flags %p",
  468. &vrh->vring.used->flags);
  469. }
  470. }
  471. }
  472. /* Userspace access helpers: in this case, addresses are really userspace. */
  473. static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
  474. {
  475. __virtio16 v = 0;
  476. int rc = get_user(v, (__force __virtio16 __user *)p);
  477. *val = vringh16_to_cpu(vrh, v);
  478. return rc;
  479. }
  480. static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
  481. {
  482. __virtio16 v = cpu_to_vringh16(vrh, val);
  483. return put_user(v, (__force __virtio16 __user *)p);
  484. }
  485. static inline int copydesc_user(void *dst, const void *src, size_t len)
  486. {
  487. return copy_from_user(dst, (__force void __user *)src, len) ?
  488. -EFAULT : 0;
  489. }
  490. static inline int putused_user(struct vring_used_elem *dst,
  491. const struct vring_used_elem *src,
  492. unsigned int num)
  493. {
  494. return copy_to_user((__force void __user *)dst, src,
  495. sizeof(*dst) * num) ? -EFAULT : 0;
  496. }
  497. static inline int xfer_from_user(void *src, void *dst, size_t len)
  498. {
  499. return copy_from_user(dst, (__force void __user *)src, len) ?
  500. -EFAULT : 0;
  501. }
  502. static inline int xfer_to_user(void *dst, void *src, size_t len)
  503. {
  504. return copy_to_user((__force void __user *)dst, src, len) ?
  505. -EFAULT : 0;
  506. }
  507. /**
  508. * vringh_init_user - initialize a vringh for a userspace vring.
  509. * @vrh: the vringh to initialize.
  510. * @features: the feature bits for this ring.
  511. * @num: the number of elements.
  512. * @weak_barriers: true if we only need memory barriers, not I/O.
  513. * @desc: the userpace descriptor pointer.
  514. * @avail: the userpace avail pointer.
  515. * @used: the userpace used pointer.
  516. *
  517. * Returns an error if num is invalid: you should check pointers
  518. * yourself!
  519. */
  520. int vringh_init_user(struct vringh *vrh, u64 features,
  521. unsigned int num, bool weak_barriers,
  522. struct vring_desc __user *desc,
  523. struct vring_avail __user *avail,
  524. struct vring_used __user *used)
  525. {
  526. /* Sane power of 2 please! */
  527. if (!num || num > 0xffff || (num & (num - 1))) {
  528. vringh_bad("Bad ring size %u", num);
  529. return -EINVAL;
  530. }
  531. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  532. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  533. vrh->weak_barriers = weak_barriers;
  534. vrh->completed = 0;
  535. vrh->last_avail_idx = 0;
  536. vrh->last_used_idx = 0;
  537. vrh->vring.num = num;
  538. /* vring expects kernel addresses, but only used via accessors. */
  539. vrh->vring.desc = (__force struct vring_desc *)desc;
  540. vrh->vring.avail = (__force struct vring_avail *)avail;
  541. vrh->vring.used = (__force struct vring_used *)used;
  542. return 0;
  543. }
  544. EXPORT_SYMBOL(vringh_init_user);
  545. /**
  546. * vringh_getdesc_user - get next available descriptor from userspace ring.
  547. * @vrh: the userspace vring.
  548. * @riov: where to put the readable descriptors (or NULL)
  549. * @wiov: where to put the writable descriptors (or NULL)
  550. * @getrange: function to call to check ranges.
  551. * @head: head index we received, for passing to vringh_complete_user().
  552. *
  553. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  554. *
  555. * Note that on error return, you can tell the difference between an
  556. * invalid ring and a single invalid descriptor: in the former case,
  557. * *head will be vrh->vring.num. You may be able to ignore an invalid
  558. * descriptor, but there's not much you can do with an invalid ring.
  559. *
  560. * Note that you may need to clean up riov and wiov, even on error!
  561. */
  562. int vringh_getdesc_user(struct vringh *vrh,
  563. struct vringh_iov *riov,
  564. struct vringh_iov *wiov,
  565. bool (*getrange)(struct vringh *vrh,
  566. u64 addr, struct vringh_range *r),
  567. u16 *head)
  568. {
  569. int err;
  570. *head = vrh->vring.num;
  571. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  572. if (err < 0)
  573. return err;
  574. /* Empty... */
  575. if (err == vrh->vring.num)
  576. return 0;
  577. /* We need the layouts to be the identical for this to work */
  578. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  579. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  580. offsetof(struct vringh_iov, iov));
  581. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  582. offsetof(struct vringh_iov, i));
  583. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  584. offsetof(struct vringh_iov, used));
  585. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  586. offsetof(struct vringh_iov, max_num));
  587. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  588. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  589. offsetof(struct kvec, iov_base));
  590. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  591. offsetof(struct kvec, iov_len));
  592. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  593. != sizeof(((struct kvec *)NULL)->iov_base));
  594. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  595. != sizeof(((struct kvec *)NULL)->iov_len));
  596. *head = err;
  597. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  598. (struct vringh_kiov *)wiov,
  599. range_check, getrange, GFP_KERNEL, copydesc_user);
  600. if (err)
  601. return err;
  602. return 1;
  603. }
  604. EXPORT_SYMBOL(vringh_getdesc_user);
  605. /**
  606. * vringh_iov_pull_user - copy bytes from vring_iov.
  607. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  608. * @dst: the place to copy.
  609. * @len: the maximum length to copy.
  610. *
  611. * Returns the bytes copied <= len or a negative errno.
  612. */
  613. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  614. {
  615. return vringh_iov_xfer((struct vringh_kiov *)riov,
  616. dst, len, xfer_from_user);
  617. }
  618. EXPORT_SYMBOL(vringh_iov_pull_user);
  619. /**
  620. * vringh_iov_push_user - copy bytes into vring_iov.
  621. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  622. * @dst: the place to copy.
  623. * @len: the maximum length to copy.
  624. *
  625. * Returns the bytes copied <= len or a negative errno.
  626. */
  627. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  628. const void *src, size_t len)
  629. {
  630. return vringh_iov_xfer((struct vringh_kiov *)wiov,
  631. (void *)src, len, xfer_to_user);
  632. }
  633. EXPORT_SYMBOL(vringh_iov_push_user);
  634. /**
  635. * vringh_abandon_user - we've decided not to handle the descriptor(s).
  636. * @vrh: the vring.
  637. * @num: the number of descriptors to put back (ie. num
  638. * vringh_get_user() to undo).
  639. *
  640. * The next vringh_get_user() will return the old descriptor(s) again.
  641. */
  642. void vringh_abandon_user(struct vringh *vrh, unsigned int num)
  643. {
  644. /* We only update vring_avail_event(vr) when we want to be notified,
  645. * so we haven't changed that yet. */
  646. vrh->last_avail_idx -= num;
  647. }
  648. EXPORT_SYMBOL(vringh_abandon_user);
  649. /**
  650. * vringh_complete_user - we've finished with descriptor, publish it.
  651. * @vrh: the vring.
  652. * @head: the head as filled in by vringh_getdesc_user.
  653. * @len: the length of data we have written.
  654. *
  655. * You should check vringh_need_notify_user() after one or more calls
  656. * to this function.
  657. */
  658. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  659. {
  660. struct vring_used_elem used;
  661. used.id = cpu_to_vringh32(vrh, head);
  662. used.len = cpu_to_vringh32(vrh, len);
  663. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  664. }
  665. EXPORT_SYMBOL(vringh_complete_user);
  666. /**
  667. * vringh_complete_multi_user - we've finished with many descriptors.
  668. * @vrh: the vring.
  669. * @used: the head, length pairs.
  670. * @num_used: the number of used elements.
  671. *
  672. * You should check vringh_need_notify_user() after one or more calls
  673. * to this function.
  674. */
  675. int vringh_complete_multi_user(struct vringh *vrh,
  676. const struct vring_used_elem used[],
  677. unsigned num_used)
  678. {
  679. return __vringh_complete(vrh, used, num_used,
  680. putu16_user, putused_user);
  681. }
  682. EXPORT_SYMBOL(vringh_complete_multi_user);
  683. /**
  684. * vringh_notify_enable_user - we want to know if something changes.
  685. * @vrh: the vring.
  686. *
  687. * This always enables notifications, but returns false if there are
  688. * now more buffers available in the vring.
  689. */
  690. bool vringh_notify_enable_user(struct vringh *vrh)
  691. {
  692. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  693. }
  694. EXPORT_SYMBOL(vringh_notify_enable_user);
  695. /**
  696. * vringh_notify_disable_user - don't tell us if something changes.
  697. * @vrh: the vring.
  698. *
  699. * This is our normal running state: we disable and then only enable when
  700. * we're going to sleep.
  701. */
  702. void vringh_notify_disable_user(struct vringh *vrh)
  703. {
  704. __vringh_notify_disable(vrh, putu16_user);
  705. }
  706. EXPORT_SYMBOL(vringh_notify_disable_user);
  707. /**
  708. * vringh_need_notify_user - must we tell the other side about used buffers?
  709. * @vrh: the vring we've called vringh_complete_user() on.
  710. *
  711. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  712. */
  713. int vringh_need_notify_user(struct vringh *vrh)
  714. {
  715. return __vringh_need_notify(vrh, getu16_user);
  716. }
  717. EXPORT_SYMBOL(vringh_need_notify_user);
  718. /* Kernelspace access helpers. */
  719. static inline int getu16_kern(const struct vringh *vrh,
  720. u16 *val, const __virtio16 *p)
  721. {
  722. *val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
  723. return 0;
  724. }
  725. static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
  726. {
  727. ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
  728. return 0;
  729. }
  730. static inline int copydesc_kern(void *dst, const void *src, size_t len)
  731. {
  732. memcpy(dst, src, len);
  733. return 0;
  734. }
  735. static inline int putused_kern(struct vring_used_elem *dst,
  736. const struct vring_used_elem *src,
  737. unsigned int num)
  738. {
  739. memcpy(dst, src, num * sizeof(*dst));
  740. return 0;
  741. }
  742. static inline int xfer_kern(void *src, void *dst, size_t len)
  743. {
  744. memcpy(dst, src, len);
  745. return 0;
  746. }
  747. /**
  748. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  749. * @vrh: the vringh to initialize.
  750. * @features: the feature bits for this ring.
  751. * @num: the number of elements.
  752. * @weak_barriers: true if we only need memory barriers, not I/O.
  753. * @desc: the userpace descriptor pointer.
  754. * @avail: the userpace avail pointer.
  755. * @used: the userpace used pointer.
  756. *
  757. * Returns an error if num is invalid.
  758. */
  759. int vringh_init_kern(struct vringh *vrh, u64 features,
  760. unsigned int num, bool weak_barriers,
  761. struct vring_desc *desc,
  762. struct vring_avail *avail,
  763. struct vring_used *used)
  764. {
  765. /* Sane power of 2 please! */
  766. if (!num || num > 0xffff || (num & (num - 1))) {
  767. vringh_bad("Bad ring size %u", num);
  768. return -EINVAL;
  769. }
  770. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  771. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  772. vrh->weak_barriers = weak_barriers;
  773. vrh->completed = 0;
  774. vrh->last_avail_idx = 0;
  775. vrh->last_used_idx = 0;
  776. vrh->vring.num = num;
  777. vrh->vring.desc = desc;
  778. vrh->vring.avail = avail;
  779. vrh->vring.used = used;
  780. return 0;
  781. }
  782. EXPORT_SYMBOL(vringh_init_kern);
  783. /**
  784. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  785. * @vrh: the kernelspace vring.
  786. * @riov: where to put the readable descriptors (or NULL)
  787. * @wiov: where to put the writable descriptors (or NULL)
  788. * @head: head index we received, for passing to vringh_complete_kern().
  789. * @gfp: flags for allocating larger riov/wiov.
  790. *
  791. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  792. *
  793. * Note that on error return, you can tell the difference between an
  794. * invalid ring and a single invalid descriptor: in the former case,
  795. * *head will be vrh->vring.num. You may be able to ignore an invalid
  796. * descriptor, but there's not much you can do with an invalid ring.
  797. *
  798. * Note that you may need to clean up riov and wiov, even on error!
  799. */
  800. int vringh_getdesc_kern(struct vringh *vrh,
  801. struct vringh_kiov *riov,
  802. struct vringh_kiov *wiov,
  803. u16 *head,
  804. gfp_t gfp)
  805. {
  806. int err;
  807. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  808. if (err < 0)
  809. return err;
  810. /* Empty... */
  811. if (err == vrh->vring.num)
  812. return 0;
  813. *head = err;
  814. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  815. gfp, copydesc_kern);
  816. if (err)
  817. return err;
  818. return 1;
  819. }
  820. EXPORT_SYMBOL(vringh_getdesc_kern);
  821. /**
  822. * vringh_iov_pull_kern - copy bytes from vring_iov.
  823. * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
  824. * @dst: the place to copy.
  825. * @len: the maximum length to copy.
  826. *
  827. * Returns the bytes copied <= len or a negative errno.
  828. */
  829. ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
  830. {
  831. return vringh_iov_xfer(riov, dst, len, xfer_kern);
  832. }
  833. EXPORT_SYMBOL(vringh_iov_pull_kern);
  834. /**
  835. * vringh_iov_push_kern - copy bytes into vring_iov.
  836. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
  837. * @dst: the place to copy.
  838. * @len: the maximum length to copy.
  839. *
  840. * Returns the bytes copied <= len or a negative errno.
  841. */
  842. ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
  843. const void *src, size_t len)
  844. {
  845. return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
  846. }
  847. EXPORT_SYMBOL(vringh_iov_push_kern);
  848. /**
  849. * vringh_abandon_kern - we've decided not to handle the descriptor(s).
  850. * @vrh: the vring.
  851. * @num: the number of descriptors to put back (ie. num
  852. * vringh_get_kern() to undo).
  853. *
  854. * The next vringh_get_kern() will return the old descriptor(s) again.
  855. */
  856. void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
  857. {
  858. /* We only update vring_avail_event(vr) when we want to be notified,
  859. * so we haven't changed that yet. */
  860. vrh->last_avail_idx -= num;
  861. }
  862. EXPORT_SYMBOL(vringh_abandon_kern);
  863. /**
  864. * vringh_complete_kern - we've finished with descriptor, publish it.
  865. * @vrh: the vring.
  866. * @head: the head as filled in by vringh_getdesc_kern.
  867. * @len: the length of data we have written.
  868. *
  869. * You should check vringh_need_notify_kern() after one or more calls
  870. * to this function.
  871. */
  872. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  873. {
  874. struct vring_used_elem used;
  875. used.id = cpu_to_vringh32(vrh, head);
  876. used.len = cpu_to_vringh32(vrh, len);
  877. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  878. }
  879. EXPORT_SYMBOL(vringh_complete_kern);
  880. /**
  881. * vringh_notify_enable_kern - we want to know if something changes.
  882. * @vrh: the vring.
  883. *
  884. * This always enables notifications, but returns false if there are
  885. * now more buffers available in the vring.
  886. */
  887. bool vringh_notify_enable_kern(struct vringh *vrh)
  888. {
  889. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  890. }
  891. EXPORT_SYMBOL(vringh_notify_enable_kern);
  892. /**
  893. * vringh_notify_disable_kern - don't tell us if something changes.
  894. * @vrh: the vring.
  895. *
  896. * This is our normal running state: we disable and then only enable when
  897. * we're going to sleep.
  898. */
  899. void vringh_notify_disable_kern(struct vringh *vrh)
  900. {
  901. __vringh_notify_disable(vrh, putu16_kern);
  902. }
  903. EXPORT_SYMBOL(vringh_notify_disable_kern);
  904. /**
  905. * vringh_need_notify_kern - must we tell the other side about used buffers?
  906. * @vrh: the vring we've called vringh_complete_kern() on.
  907. *
  908. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  909. */
  910. int vringh_need_notify_kern(struct vringh *vrh)
  911. {
  912. return __vringh_need_notify(vrh, getu16_kern);
  913. }
  914. EXPORT_SYMBOL(vringh_need_notify_kern);
  915. MODULE_LICENSE("GPL");