ring_buffer.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. *
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  16. * Place - Suite 330, Boston, MA 02111-1307 USA.
  17. *
  18. * Authors:
  19. * Haiyang Zhang <haiyangz@microsoft.com>
  20. * Hank Janssen <hjanssen@microsoft.com>
  21. * K. Y. Srinivasan <kys@microsoft.com>
  22. *
  23. */
  24. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  25. #include <linux/kernel.h>
  26. #include <linux/mm.h>
  27. #include <linux/hyperv.h>
  28. #include <linux/uio.h>
  29. #include "hyperv_vmbus.h"
  30. void hv_begin_read(struct hv_ring_buffer_info *rbi)
  31. {
  32. rbi->ring_buffer->interrupt_mask = 1;
  33. mb();
  34. }
  35. u32 hv_end_read(struct hv_ring_buffer_info *rbi)
  36. {
  37. u32 read;
  38. u32 write;
  39. rbi->ring_buffer->interrupt_mask = 0;
  40. mb();
  41. /*
  42. * Now check to see if the ring buffer is still empty.
  43. * If it is not, we raced and we need to process new
  44. * incoming messages.
  45. */
  46. hv_get_ringbuffer_availbytes(rbi, &read, &write);
  47. return read;
  48. }
  49. /*
  50. * When we write to the ring buffer, check if the host needs to
  51. * be signaled. Here is the details of this protocol:
  52. *
  53. * 1. The host guarantees that while it is draining the
  54. * ring buffer, it will set the interrupt_mask to
  55. * indicate it does not need to be interrupted when
  56. * new data is placed.
  57. *
  58. * 2. The host guarantees that it will completely drain
  59. * the ring buffer before exiting the read loop. Further,
  60. * once the ring buffer is empty, it will clear the
  61. * interrupt_mask and re-check to see if new data has
  62. * arrived.
  63. */
  64. static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
  65. {
  66. mb();
  67. if (rbi->ring_buffer->interrupt_mask)
  68. return false;
  69. /* check interrupt_mask before read_index */
  70. rmb();
  71. /*
  72. * This is the only case we need to signal when the
  73. * ring transitions from being empty to non-empty.
  74. */
  75. if (old_write == rbi->ring_buffer->read_index)
  76. return true;
  77. return false;
  78. }
  79. /*
  80. * To optimize the flow management on the send-side,
  81. * when the sender is blocked because of lack of
  82. * sufficient space in the ring buffer, potential the
  83. * consumer of the ring buffer can signal the producer.
  84. * This is controlled by the following parameters:
  85. *
  86. * 1. pending_send_sz: This is the size in bytes that the
  87. * producer is trying to send.
  88. * 2. The feature bit feat_pending_send_sz set to indicate if
  89. * the consumer of the ring will signal when the ring
  90. * state transitions from being full to a state where
  91. * there is room for the producer to send the pending packet.
  92. */
  93. static bool hv_need_to_signal_on_read(u32 prev_write_sz,
  94. struct hv_ring_buffer_info *rbi)
  95. {
  96. u32 cur_write_sz;
  97. u32 r_size;
  98. u32 write_loc = rbi->ring_buffer->write_index;
  99. u32 read_loc = rbi->ring_buffer->read_index;
  100. u32 pending_sz = rbi->ring_buffer->pending_send_sz;
  101. /*
  102. * If the other end is not blocked on write don't bother.
  103. */
  104. if (pending_sz == 0)
  105. return false;
  106. r_size = rbi->ring_datasize;
  107. cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) :
  108. read_loc - write_loc;
  109. if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz))
  110. return true;
  111. return false;
  112. }
  113. /*
  114. * hv_get_next_write_location()
  115. *
  116. * Get the next write location for the specified ring buffer
  117. *
  118. */
  119. static inline u32
  120. hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
  121. {
  122. u32 next = ring_info->ring_buffer->write_index;
  123. return next;
  124. }
  125. /*
  126. * hv_set_next_write_location()
  127. *
  128. * Set the next write location for the specified ring buffer
  129. *
  130. */
  131. static inline void
  132. hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
  133. u32 next_write_location)
  134. {
  135. ring_info->ring_buffer->write_index = next_write_location;
  136. }
  137. /*
  138. * hv_get_next_read_location()
  139. *
  140. * Get the next read location for the specified ring buffer
  141. */
  142. static inline u32
  143. hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
  144. {
  145. u32 next = ring_info->ring_buffer->read_index;
  146. return next;
  147. }
  148. /*
  149. * hv_get_next_readlocation_withoffset()
  150. *
  151. * Get the next read location + offset for the specified ring buffer.
  152. * This allows the caller to skip
  153. */
  154. static inline u32
  155. hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
  156. u32 offset)
  157. {
  158. u32 next = ring_info->ring_buffer->read_index;
  159. next += offset;
  160. next %= ring_info->ring_datasize;
  161. return next;
  162. }
  163. /*
  164. *
  165. * hv_set_next_read_location()
  166. *
  167. * Set the next read location for the specified ring buffer
  168. *
  169. */
  170. static inline void
  171. hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
  172. u32 next_read_location)
  173. {
  174. ring_info->ring_buffer->read_index = next_read_location;
  175. }
  176. /*
  177. *
  178. * hv_get_ring_buffer()
  179. *
  180. * Get the start of the ring buffer
  181. */
  182. static inline void *
  183. hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
  184. {
  185. return (void *)ring_info->ring_buffer->buffer;
  186. }
  187. /*
  188. *
  189. * hv_get_ring_buffersize()
  190. *
  191. * Get the size of the ring buffer
  192. */
  193. static inline u32
  194. hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
  195. {
  196. return ring_info->ring_datasize;
  197. }
  198. /*
  199. *
  200. * hv_get_ring_bufferindices()
  201. *
  202. * Get the read and write indices as u64 of the specified ring buffer
  203. *
  204. */
  205. static inline u64
  206. hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
  207. {
  208. return (u64)ring_info->ring_buffer->write_index << 32;
  209. }
  210. /*
  211. *
  212. * hv_copyfrom_ringbuffer()
  213. *
  214. * Helper routine to copy to source from ring buffer.
  215. * Assume there is enough room. Handles wrap-around in src case only!!
  216. *
  217. */
  218. static u32 hv_copyfrom_ringbuffer(
  219. struct hv_ring_buffer_info *ring_info,
  220. void *dest,
  221. u32 destlen,
  222. u32 start_read_offset)
  223. {
  224. void *ring_buffer = hv_get_ring_buffer(ring_info);
  225. u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
  226. u32 frag_len;
  227. /* wrap-around detected at the src */
  228. if (destlen > ring_buffer_size - start_read_offset) {
  229. frag_len = ring_buffer_size - start_read_offset;
  230. memcpy(dest, ring_buffer + start_read_offset, frag_len);
  231. memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
  232. } else
  233. memcpy(dest, ring_buffer + start_read_offset, destlen);
  234. start_read_offset += destlen;
  235. start_read_offset %= ring_buffer_size;
  236. return start_read_offset;
  237. }
  238. /*
  239. *
  240. * hv_copyto_ringbuffer()
  241. *
  242. * Helper routine to copy from source to ring buffer.
  243. * Assume there is enough room. Handles wrap-around in dest case only!!
  244. *
  245. */
  246. static u32 hv_copyto_ringbuffer(
  247. struct hv_ring_buffer_info *ring_info,
  248. u32 start_write_offset,
  249. void *src,
  250. u32 srclen)
  251. {
  252. void *ring_buffer = hv_get_ring_buffer(ring_info);
  253. u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
  254. u32 frag_len;
  255. /* wrap-around detected! */
  256. if (srclen > ring_buffer_size - start_write_offset) {
  257. frag_len = ring_buffer_size - start_write_offset;
  258. memcpy(ring_buffer + start_write_offset, src, frag_len);
  259. memcpy(ring_buffer, src + frag_len, srclen - frag_len);
  260. } else
  261. memcpy(ring_buffer + start_write_offset, src, srclen);
  262. start_write_offset += srclen;
  263. start_write_offset %= ring_buffer_size;
  264. return start_write_offset;
  265. }
  266. /*
  267. *
  268. * hv_ringbuffer_get_debuginfo()
  269. *
  270. * Get various debug metrics for the specified ring buffer
  271. *
  272. */
  273. void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
  274. struct hv_ring_buffer_debug_info *debug_info)
  275. {
  276. u32 bytes_avail_towrite;
  277. u32 bytes_avail_toread;
  278. if (ring_info->ring_buffer) {
  279. hv_get_ringbuffer_availbytes(ring_info,
  280. &bytes_avail_toread,
  281. &bytes_avail_towrite);
  282. debug_info->bytes_avail_toread = bytes_avail_toread;
  283. debug_info->bytes_avail_towrite = bytes_avail_towrite;
  284. debug_info->current_read_index =
  285. ring_info->ring_buffer->read_index;
  286. debug_info->current_write_index =
  287. ring_info->ring_buffer->write_index;
  288. debug_info->current_interrupt_mask =
  289. ring_info->ring_buffer->interrupt_mask;
  290. }
  291. }
  292. /*
  293. *
  294. * hv_ringbuffer_init()
  295. *
  296. *Initialize the ring buffer
  297. *
  298. */
  299. int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
  300. void *buffer, u32 buflen)
  301. {
  302. if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
  303. return -EINVAL;
  304. memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
  305. ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
  306. ring_info->ring_buffer->read_index =
  307. ring_info->ring_buffer->write_index = 0;
  308. /*
  309. * Set the feature bit for enabling flow control.
  310. */
  311. ring_info->ring_buffer->feature_bits.value = 1;
  312. ring_info->ring_size = buflen;
  313. ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
  314. spin_lock_init(&ring_info->ring_lock);
  315. return 0;
  316. }
  317. /*
  318. *
  319. * hv_ringbuffer_cleanup()
  320. *
  321. * Cleanup the ring buffer
  322. *
  323. */
  324. void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
  325. {
  326. }
  327. /*
  328. *
  329. * hv_ringbuffer_write()
  330. *
  331. * Write to the ring buffer
  332. *
  333. */
  334. int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
  335. struct kvec *kv_list, u32 kv_count, bool *signal)
  336. {
  337. int i = 0;
  338. u32 bytes_avail_towrite;
  339. u32 bytes_avail_toread;
  340. u32 totalbytes_towrite = 0;
  341. u32 next_write_location;
  342. u32 old_write;
  343. u64 prev_indices = 0;
  344. unsigned long flags;
  345. for (i = 0; i < kv_count; i++)
  346. totalbytes_towrite += kv_list[i].iov_len;
  347. totalbytes_towrite += sizeof(u64);
  348. spin_lock_irqsave(&outring_info->ring_lock, flags);
  349. hv_get_ringbuffer_availbytes(outring_info,
  350. &bytes_avail_toread,
  351. &bytes_avail_towrite);
  352. /* If there is only room for the packet, assume it is full. */
  353. /* Otherwise, the next time around, we think the ring buffer */
  354. /* is empty since the read index == write index */
  355. if (bytes_avail_towrite <= totalbytes_towrite) {
  356. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  357. return -EAGAIN;
  358. }
  359. /* Write to the ring buffer */
  360. next_write_location = hv_get_next_write_location(outring_info);
  361. old_write = next_write_location;
  362. for (i = 0; i < kv_count; i++) {
  363. next_write_location = hv_copyto_ringbuffer(outring_info,
  364. next_write_location,
  365. kv_list[i].iov_base,
  366. kv_list[i].iov_len);
  367. }
  368. /* Set previous packet start */
  369. prev_indices = hv_get_ring_bufferindices(outring_info);
  370. next_write_location = hv_copyto_ringbuffer(outring_info,
  371. next_write_location,
  372. &prev_indices,
  373. sizeof(u64));
  374. /* Issue a full memory barrier before updating the write index */
  375. mb();
  376. /* Now, update the write location */
  377. hv_set_next_write_location(outring_info, next_write_location);
  378. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  379. *signal = hv_need_to_signal(old_write, outring_info);
  380. return 0;
  381. }
  382. /*
  383. *
  384. * hv_ringbuffer_peek()
  385. *
  386. * Read without advancing the read index
  387. *
  388. */
  389. int hv_ringbuffer_peek(struct hv_ring_buffer_info *Inring_info,
  390. void *Buffer, u32 buflen)
  391. {
  392. u32 bytes_avail_towrite;
  393. u32 bytes_avail_toread;
  394. u32 next_read_location = 0;
  395. unsigned long flags;
  396. spin_lock_irqsave(&Inring_info->ring_lock, flags);
  397. hv_get_ringbuffer_availbytes(Inring_info,
  398. &bytes_avail_toread,
  399. &bytes_avail_towrite);
  400. /* Make sure there is something to read */
  401. if (bytes_avail_toread < buflen) {
  402. spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
  403. return -EAGAIN;
  404. }
  405. /* Convert to byte offset */
  406. next_read_location = hv_get_next_read_location(Inring_info);
  407. next_read_location = hv_copyfrom_ringbuffer(Inring_info,
  408. Buffer,
  409. buflen,
  410. next_read_location);
  411. spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
  412. return 0;
  413. }
  414. /*
  415. *
  416. * hv_ringbuffer_read()
  417. *
  418. * Read and advance the read index
  419. *
  420. */
  421. int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
  422. u32 buflen, u32 offset, bool *signal)
  423. {
  424. u32 bytes_avail_towrite;
  425. u32 bytes_avail_toread;
  426. u32 next_read_location = 0;
  427. u64 prev_indices = 0;
  428. unsigned long flags;
  429. if (buflen <= 0)
  430. return -EINVAL;
  431. spin_lock_irqsave(&inring_info->ring_lock, flags);
  432. hv_get_ringbuffer_availbytes(inring_info,
  433. &bytes_avail_toread,
  434. &bytes_avail_towrite);
  435. /* Make sure there is something to read */
  436. if (bytes_avail_toread < buflen) {
  437. spin_unlock_irqrestore(&inring_info->ring_lock, flags);
  438. return -EAGAIN;
  439. }
  440. next_read_location =
  441. hv_get_next_readlocation_withoffset(inring_info, offset);
  442. next_read_location = hv_copyfrom_ringbuffer(inring_info,
  443. buffer,
  444. buflen,
  445. next_read_location);
  446. next_read_location = hv_copyfrom_ringbuffer(inring_info,
  447. &prev_indices,
  448. sizeof(u64),
  449. next_read_location);
  450. /* Make sure all reads are done before we update the read index since */
  451. /* the writer may start writing to the read area once the read index */
  452. /*is updated */
  453. mb();
  454. /* Update the read index */
  455. hv_set_next_read_location(inring_info, next_read_location);
  456. spin_unlock_irqrestore(&inring_info->ring_lock, flags);
  457. *signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info);
  458. return 0;
  459. }