radeon_fence.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <glisse@freedesktop.org>
  29. * Dave Airlie
  30. */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/kref.h>
  35. #include <linux/slab.h>
  36. #include <linux/firmware.h>
  37. #include <drm/drmP.h>
  38. #include "radeon_reg.h"
  39. #include "radeon.h"
  40. #include "radeon_trace.h"
  41. /*
  42. * Fences
  43. * Fences mark an event in the GPUs pipeline and are used
  44. * for GPU/CPU synchronization. When the fence is written,
  45. * it is expected that all buffers associated with that fence
  46. * are no longer in use by the associated ring on the GPU and
  47. * that the the relevant GPU caches have been flushed. Whether
  48. * we use a scratch register or memory location depends on the asic
  49. * and whether writeback is enabled.
  50. */
  51. /**
  52. * radeon_fence_write - write a fence value
  53. *
  54. * @rdev: radeon_device pointer
  55. * @seq: sequence number to write
  56. * @ring: ring index the fence is associated with
  57. *
  58. * Writes a fence value to memory or a scratch register (all asics).
  59. */
  60. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  61. {
  62. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  63. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  64. if (drv->cpu_addr) {
  65. *drv->cpu_addr = cpu_to_le32(seq);
  66. }
  67. } else {
  68. WREG32(drv->scratch_reg, seq);
  69. }
  70. }
  71. /**
  72. * radeon_fence_read - read a fence value
  73. *
  74. * @rdev: radeon_device pointer
  75. * @ring: ring index the fence is associated with
  76. *
  77. * Reads a fence value from memory or a scratch register (all asics).
  78. * Returns the value of the fence read from memory or register.
  79. */
  80. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  81. {
  82. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  83. u32 seq = 0;
  84. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  85. if (drv->cpu_addr) {
  86. seq = le32_to_cpu(*drv->cpu_addr);
  87. } else {
  88. seq = lower_32_bits(atomic64_read(&drv->last_seq));
  89. }
  90. } else {
  91. seq = RREG32(drv->scratch_reg);
  92. }
  93. return seq;
  94. }
  95. /**
  96. * radeon_fence_schedule_check - schedule lockup check
  97. *
  98. * @rdev: radeon_device pointer
  99. * @ring: ring index we should work with
  100. *
  101. * Queues a delayed work item to check for lockups.
  102. */
  103. static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
  104. {
  105. /*
  106. * Do not reset the timer here with mod_delayed_work,
  107. * this can livelock in an interaction with TTM delayed destroy.
  108. */
  109. queue_delayed_work(system_power_efficient_wq,
  110. &rdev->fence_drv[ring].lockup_work,
  111. RADEON_FENCE_JIFFIES_TIMEOUT);
  112. }
  113. /**
  114. * radeon_fence_emit - emit a fence on the requested ring
  115. *
  116. * @rdev: radeon_device pointer
  117. * @fence: radeon fence object
  118. * @ring: ring index the fence is associated with
  119. *
  120. * Emits a fence command on the requested ring (all asics).
  121. * Returns 0 on success, -ENOMEM on failure.
  122. */
  123. int radeon_fence_emit(struct radeon_device *rdev,
  124. struct radeon_fence **fence,
  125. int ring)
  126. {
  127. u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
  128. /* we are protected by the ring emission mutex */
  129. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  130. if ((*fence) == NULL) {
  131. return -ENOMEM;
  132. }
  133. (*fence)->rdev = rdev;
  134. (*fence)->seq = seq;
  135. (*fence)->ring = ring;
  136. (*fence)->is_vm_update = false;
  137. fence_init(&(*fence)->base, &radeon_fence_ops,
  138. &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
  139. radeon_fence_ring_emit(rdev, ring, *fence);
  140. trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
  141. radeon_fence_schedule_check(rdev, ring);
  142. return 0;
  143. }
  144. /**
  145. * radeon_fence_check_signaled - callback from fence_queue
  146. *
  147. * this function is called with fence_queue lock held, which is also used
  148. * for the fence locking itself, so unlocked variants are used for
  149. * fence_signal, and remove_wait_queue.
  150. */
  151. static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
  152. {
  153. struct radeon_fence *fence;
  154. u64 seq;
  155. fence = container_of(wait, struct radeon_fence, fence_wake);
  156. /*
  157. * We cannot use radeon_fence_process here because we're already
  158. * in the waitqueue, in a call from wake_up_all.
  159. */
  160. seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
  161. if (seq >= fence->seq) {
  162. int ret = fence_signal_locked(&fence->base);
  163. if (!ret)
  164. FENCE_TRACE(&fence->base, "signaled from irq context\n");
  165. else
  166. FENCE_TRACE(&fence->base, "was already signaled\n");
  167. radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
  168. __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
  169. fence_put(&fence->base);
  170. } else
  171. FENCE_TRACE(&fence->base, "pending\n");
  172. return 0;
  173. }
  174. /**
  175. * radeon_fence_activity - check for fence activity
  176. *
  177. * @rdev: radeon_device pointer
  178. * @ring: ring index the fence is associated with
  179. *
  180. * Checks the current fence value and calculates the last
  181. * signalled fence value. Returns true if activity occured
  182. * on the ring, and the fence_queue should be waken up.
  183. */
  184. static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
  185. {
  186. uint64_t seq, last_seq, last_emitted;
  187. unsigned count_loop = 0;
  188. bool wake = false;
  189. /* Note there is a scenario here for an infinite loop but it's
  190. * very unlikely to happen. For it to happen, the current polling
  191. * process need to be interrupted by another process and another
  192. * process needs to update the last_seq btw the atomic read and
  193. * xchg of the current process.
  194. *
  195. * More over for this to go in infinite loop there need to be
  196. * continuously new fence signaled ie radeon_fence_read needs
  197. * to return a different value each time for both the currently
  198. * polling process and the other process that xchg the last_seq
  199. * btw atomic read and xchg of the current process. And the
  200. * value the other process set as last seq must be higher than
  201. * the seq value we just read. Which means that current process
  202. * need to be interrupted after radeon_fence_read and before
  203. * atomic xchg.
  204. *
  205. * To be even more safe we count the number of time we loop and
  206. * we bail after 10 loop just accepting the fact that we might
  207. * have temporarly set the last_seq not to the true real last
  208. * seq but to an older one.
  209. */
  210. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  211. do {
  212. last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  213. seq = radeon_fence_read(rdev, ring);
  214. seq |= last_seq & 0xffffffff00000000LL;
  215. if (seq < last_seq) {
  216. seq &= 0xffffffff;
  217. seq |= last_emitted & 0xffffffff00000000LL;
  218. }
  219. if (seq <= last_seq || seq > last_emitted) {
  220. break;
  221. }
  222. /* If we loop over we don't want to return without
  223. * checking if a fence is signaled as it means that the
  224. * seq we just read is different from the previous on.
  225. */
  226. wake = true;
  227. last_seq = seq;
  228. if ((count_loop++) > 10) {
  229. /* We looped over too many time leave with the
  230. * fact that we might have set an older fence
  231. * seq then the current real last seq as signaled
  232. * by the hw.
  233. */
  234. break;
  235. }
  236. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  237. if (seq < last_emitted)
  238. radeon_fence_schedule_check(rdev, ring);
  239. return wake;
  240. }
  241. /**
  242. * radeon_fence_check_lockup - check for hardware lockup
  243. *
  244. * @work: delayed work item
  245. *
  246. * Checks for fence activity and if there is none probe
  247. * the hardware if a lockup occured.
  248. */
  249. static void radeon_fence_check_lockup(struct work_struct *work)
  250. {
  251. struct radeon_fence_driver *fence_drv;
  252. struct radeon_device *rdev;
  253. int ring;
  254. fence_drv = container_of(work, struct radeon_fence_driver,
  255. lockup_work.work);
  256. rdev = fence_drv->rdev;
  257. ring = fence_drv - &rdev->fence_drv[0];
  258. if (!down_read_trylock(&rdev->exclusive_lock)) {
  259. /* just reschedule the check if a reset is going on */
  260. radeon_fence_schedule_check(rdev, ring);
  261. return;
  262. }
  263. if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
  264. unsigned long irqflags;
  265. fence_drv->delayed_irq = false;
  266. spin_lock_irqsave(&rdev->irq.lock, irqflags);
  267. radeon_irq_set(rdev);
  268. spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
  269. }
  270. if (radeon_fence_activity(rdev, ring))
  271. wake_up_all(&rdev->fence_queue);
  272. else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  273. /* good news we believe it's a lockup */
  274. dev_warn(rdev->dev, "GPU lockup (current fence id "
  275. "0x%016llx last fence id 0x%016llx on ring %d)\n",
  276. (uint64_t)atomic64_read(&fence_drv->last_seq),
  277. fence_drv->sync_seq[ring], ring);
  278. /* remember that we need an reset */
  279. rdev->needs_reset = true;
  280. wake_up_all(&rdev->fence_queue);
  281. }
  282. up_read(&rdev->exclusive_lock);
  283. }
  284. /**
  285. * radeon_fence_process - process a fence
  286. *
  287. * @rdev: radeon_device pointer
  288. * @ring: ring index the fence is associated with
  289. *
  290. * Checks the current fence value and wakes the fence queue
  291. * if the sequence number has increased (all asics).
  292. */
  293. void radeon_fence_process(struct radeon_device *rdev, int ring)
  294. {
  295. if (radeon_fence_activity(rdev, ring))
  296. wake_up_all(&rdev->fence_queue);
  297. }
  298. /**
  299. * radeon_fence_seq_signaled - check if a fence sequence number has signaled
  300. *
  301. * @rdev: radeon device pointer
  302. * @seq: sequence number
  303. * @ring: ring index the fence is associated with
  304. *
  305. * Check if the last signaled fence sequnce number is >= the requested
  306. * sequence number (all asics).
  307. * Returns true if the fence has signaled (current fence value
  308. * is >= requested value) or false if it has not (current fence
  309. * value is < the requested value. Helper function for
  310. * radeon_fence_signaled().
  311. */
  312. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  313. u64 seq, unsigned ring)
  314. {
  315. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  316. return true;
  317. }
  318. /* poll new last sequence at least once */
  319. radeon_fence_process(rdev, ring);
  320. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  321. return true;
  322. }
  323. return false;
  324. }
  325. static bool radeon_fence_is_signaled(struct fence *f)
  326. {
  327. struct radeon_fence *fence = to_radeon_fence(f);
  328. struct radeon_device *rdev = fence->rdev;
  329. unsigned ring = fence->ring;
  330. u64 seq = fence->seq;
  331. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  332. return true;
  333. }
  334. if (down_read_trylock(&rdev->exclusive_lock)) {
  335. radeon_fence_process(rdev, ring);
  336. up_read(&rdev->exclusive_lock);
  337. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  338. return true;
  339. }
  340. }
  341. return false;
  342. }
  343. /**
  344. * radeon_fence_enable_signaling - enable signalling on fence
  345. * @fence: fence
  346. *
  347. * This function is called with fence_queue lock held, and adds a callback
  348. * to fence_queue that checks if this fence is signaled, and if so it
  349. * signals the fence and removes itself.
  350. */
  351. static bool radeon_fence_enable_signaling(struct fence *f)
  352. {
  353. struct radeon_fence *fence = to_radeon_fence(f);
  354. struct radeon_device *rdev = fence->rdev;
  355. if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
  356. return false;
  357. if (down_read_trylock(&rdev->exclusive_lock)) {
  358. radeon_irq_kms_sw_irq_get(rdev, fence->ring);
  359. if (radeon_fence_activity(rdev, fence->ring))
  360. wake_up_all_locked(&rdev->fence_queue);
  361. /* did fence get signaled after we enabled the sw irq? */
  362. if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
  363. radeon_irq_kms_sw_irq_put(rdev, fence->ring);
  364. up_read(&rdev->exclusive_lock);
  365. return false;
  366. }
  367. up_read(&rdev->exclusive_lock);
  368. } else {
  369. /* we're probably in a lockup, lets not fiddle too much */
  370. if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
  371. rdev->fence_drv[fence->ring].delayed_irq = true;
  372. radeon_fence_schedule_check(rdev, fence->ring);
  373. }
  374. fence->fence_wake.flags = 0;
  375. fence->fence_wake.private = NULL;
  376. fence->fence_wake.func = radeon_fence_check_signaled;
  377. __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
  378. fence_get(f);
  379. FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
  380. return true;
  381. }
  382. /**
  383. * radeon_fence_signaled - check if a fence has signaled
  384. *
  385. * @fence: radeon fence object
  386. *
  387. * Check if the requested fence has signaled (all asics).
  388. * Returns true if the fence has signaled or false if it has not.
  389. */
  390. bool radeon_fence_signaled(struct radeon_fence *fence)
  391. {
  392. if (!fence)
  393. return true;
  394. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  395. int ret;
  396. ret = fence_signal(&fence->base);
  397. if (!ret)
  398. FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
  399. return true;
  400. }
  401. return false;
  402. }
  403. /**
  404. * radeon_fence_any_seq_signaled - check if any sequence number is signaled
  405. *
  406. * @rdev: radeon device pointer
  407. * @seq: sequence numbers
  408. *
  409. * Check if the last signaled fence sequnce number is >= the requested
  410. * sequence number (all asics).
  411. * Returns true if any has signaled (current value is >= requested value)
  412. * or false if it has not. Helper function for radeon_fence_wait_seq.
  413. */
  414. static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  415. {
  416. unsigned i;
  417. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  418. if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
  419. return true;
  420. }
  421. return false;
  422. }
  423. /**
  424. * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
  425. *
  426. * @rdev: radeon device pointer
  427. * @target_seq: sequence number(s) we want to wait for
  428. * @intr: use interruptable sleep
  429. * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
  430. *
  431. * Wait for the requested sequence number(s) to be written by any ring
  432. * (all asics). Sequnce number array is indexed by ring id.
  433. * @intr selects whether to use interruptable (true) or non-interruptable
  434. * (false) sleep when waiting for the sequence number. Helper function
  435. * for radeon_fence_wait_*().
  436. * Returns remaining time if the sequence number has passed, 0 when
  437. * the wait timeout, or an error for all other cases.
  438. * -EDEADLK is returned when a GPU lockup has been detected.
  439. */
  440. static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
  441. u64 *target_seq, bool intr,
  442. long timeout)
  443. {
  444. long r;
  445. int i;
  446. if (radeon_fence_any_seq_signaled(rdev, target_seq))
  447. return timeout;
  448. /* enable IRQs and tracing */
  449. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  450. if (!target_seq[i])
  451. continue;
  452. trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
  453. radeon_irq_kms_sw_irq_get(rdev, i);
  454. }
  455. if (intr) {
  456. r = wait_event_interruptible_timeout(rdev->fence_queue, (
  457. radeon_fence_any_seq_signaled(rdev, target_seq)
  458. || rdev->needs_reset), timeout);
  459. } else {
  460. r = wait_event_timeout(rdev->fence_queue, (
  461. radeon_fence_any_seq_signaled(rdev, target_seq)
  462. || rdev->needs_reset), timeout);
  463. }
  464. if (rdev->needs_reset)
  465. r = -EDEADLK;
  466. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  467. if (!target_seq[i])
  468. continue;
  469. radeon_irq_kms_sw_irq_put(rdev, i);
  470. trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
  471. }
  472. return r;
  473. }
  474. /**
  475. * radeon_fence_wait - wait for a fence to signal
  476. *
  477. * @fence: radeon fence object
  478. * @intr: use interruptible sleep
  479. *
  480. * Wait for the requested fence to signal (all asics).
  481. * @intr selects whether to use interruptable (true) or non-interruptable
  482. * (false) sleep when waiting for the fence.
  483. * Returns 0 if the fence has passed, error for all other cases.
  484. */
  485. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  486. {
  487. uint64_t seq[RADEON_NUM_RINGS] = {};
  488. long r;
  489. /*
  490. * This function should not be called on !radeon fences.
  491. * If this is the case, it would mean this function can
  492. * also be called on radeon fences belonging to another card.
  493. * exclusive_lock is not held in that case.
  494. */
  495. if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
  496. return fence_wait(&fence->base, intr);
  497. seq[fence->ring] = fence->seq;
  498. r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
  499. if (r < 0) {
  500. return r;
  501. }
  502. r = fence_signal(&fence->base);
  503. if (!r)
  504. FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
  505. return 0;
  506. }
  507. /**
  508. * radeon_fence_wait_any - wait for a fence to signal on any ring
  509. *
  510. * @rdev: radeon device pointer
  511. * @fences: radeon fence object(s)
  512. * @intr: use interruptable sleep
  513. *
  514. * Wait for any requested fence to signal (all asics). Fence
  515. * array is indexed by ring id. @intr selects whether to use
  516. * interruptable (true) or non-interruptable (false) sleep when
  517. * waiting for the fences. Used by the suballocator.
  518. * Returns 0 if any fence has passed, error for all other cases.
  519. */
  520. int radeon_fence_wait_any(struct radeon_device *rdev,
  521. struct radeon_fence **fences,
  522. bool intr)
  523. {
  524. uint64_t seq[RADEON_NUM_RINGS];
  525. unsigned i, num_rings = 0;
  526. long r;
  527. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  528. seq[i] = 0;
  529. if (!fences[i]) {
  530. continue;
  531. }
  532. seq[i] = fences[i]->seq;
  533. ++num_rings;
  534. }
  535. /* nothing to wait for ? */
  536. if (num_rings == 0)
  537. return -ENOENT;
  538. r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
  539. if (r < 0) {
  540. return r;
  541. }
  542. return 0;
  543. }
  544. /**
  545. * radeon_fence_wait_next - wait for the next fence to signal
  546. *
  547. * @rdev: radeon device pointer
  548. * @ring: ring index the fence is associated with
  549. *
  550. * Wait for the next fence on the requested ring to signal (all asics).
  551. * Returns 0 if the next fence has passed, error for all other cases.
  552. * Caller must hold ring lock.
  553. */
  554. int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
  555. {
  556. uint64_t seq[RADEON_NUM_RINGS] = {};
  557. long r;
  558. seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  559. if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
  560. /* nothing to wait for, last_seq is
  561. already the last emited fence */
  562. return -ENOENT;
  563. }
  564. r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  565. if (r < 0)
  566. return r;
  567. return 0;
  568. }
  569. /**
  570. * radeon_fence_wait_empty - wait for all fences to signal
  571. *
  572. * @rdev: radeon device pointer
  573. * @ring: ring index the fence is associated with
  574. *
  575. * Wait for all fences on the requested ring to signal (all asics).
  576. * Returns 0 if the fences have passed, error for all other cases.
  577. * Caller must hold ring lock.
  578. */
  579. int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  580. {
  581. uint64_t seq[RADEON_NUM_RINGS] = {};
  582. long r;
  583. seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
  584. if (!seq[ring])
  585. return 0;
  586. r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  587. if (r < 0) {
  588. if (r == -EDEADLK)
  589. return -EDEADLK;
  590. dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
  591. ring, r);
  592. }
  593. return 0;
  594. }
  595. /**
  596. * radeon_fence_ref - take a ref on a fence
  597. *
  598. * @fence: radeon fence object
  599. *
  600. * Take a reference on a fence (all asics).
  601. * Returns the fence.
  602. */
  603. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  604. {
  605. fence_get(&fence->base);
  606. return fence;
  607. }
  608. /**
  609. * radeon_fence_unref - remove a ref on a fence
  610. *
  611. * @fence: radeon fence object
  612. *
  613. * Remove a reference on a fence (all asics).
  614. */
  615. void radeon_fence_unref(struct radeon_fence **fence)
  616. {
  617. struct radeon_fence *tmp = *fence;
  618. *fence = NULL;
  619. if (tmp) {
  620. fence_put(&tmp->base);
  621. }
  622. }
  623. /**
  624. * radeon_fence_count_emitted - get the count of emitted fences
  625. *
  626. * @rdev: radeon device pointer
  627. * @ring: ring index the fence is associated with
  628. *
  629. * Get the number of fences emitted on the requested ring (all asics).
  630. * Returns the number of emitted fences on the ring. Used by the
  631. * dynpm code to ring track activity.
  632. */
  633. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  634. {
  635. uint64_t emitted;
  636. /* We are not protected by ring lock when reading the last sequence
  637. * but it's ok to report slightly wrong fence count here.
  638. */
  639. radeon_fence_process(rdev, ring);
  640. emitted = rdev->fence_drv[ring].sync_seq[ring]
  641. - atomic64_read(&rdev->fence_drv[ring].last_seq);
  642. /* to avoid 32bits warp around */
  643. if (emitted > 0x10000000) {
  644. emitted = 0x10000000;
  645. }
  646. return (unsigned)emitted;
  647. }
  648. /**
  649. * radeon_fence_need_sync - do we need a semaphore
  650. *
  651. * @fence: radeon fence object
  652. * @dst_ring: which ring to check against
  653. *
  654. * Check if the fence needs to be synced against another ring
  655. * (all asics). If so, we need to emit a semaphore.
  656. * Returns true if we need to sync with another ring, false if
  657. * not.
  658. */
  659. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  660. {
  661. struct radeon_fence_driver *fdrv;
  662. if (!fence) {
  663. return false;
  664. }
  665. if (fence->ring == dst_ring) {
  666. return false;
  667. }
  668. /* we are protected by the ring mutex */
  669. fdrv = &fence->rdev->fence_drv[dst_ring];
  670. if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  671. return false;
  672. }
  673. return true;
  674. }
  675. /**
  676. * radeon_fence_note_sync - record the sync point
  677. *
  678. * @fence: radeon fence object
  679. * @dst_ring: which ring to check against
  680. *
  681. * Note the sequence number at which point the fence will
  682. * be synced with the requested ring (all asics).
  683. */
  684. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  685. {
  686. struct radeon_fence_driver *dst, *src;
  687. unsigned i;
  688. if (!fence) {
  689. return;
  690. }
  691. if (fence->ring == dst_ring) {
  692. return;
  693. }
  694. /* we are protected by the ring mutex */
  695. src = &fence->rdev->fence_drv[fence->ring];
  696. dst = &fence->rdev->fence_drv[dst_ring];
  697. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  698. if (i == dst_ring) {
  699. continue;
  700. }
  701. dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  702. }
  703. }
  704. /**
  705. * radeon_fence_driver_start_ring - make the fence driver
  706. * ready for use on the requested ring.
  707. *
  708. * @rdev: radeon device pointer
  709. * @ring: ring index to start the fence driver on
  710. *
  711. * Make the fence driver ready for processing (all asics).
  712. * Not all asics have all rings, so each asic will only
  713. * start the fence driver on the rings it has.
  714. * Returns 0 for success, errors for failure.
  715. */
  716. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  717. {
  718. uint64_t index;
  719. int r;
  720. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  721. if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
  722. rdev->fence_drv[ring].scratch_reg = 0;
  723. if (ring != R600_RING_TYPE_UVD_INDEX) {
  724. index = R600_WB_EVENT_OFFSET + ring * 4;
  725. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  726. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
  727. index;
  728. } else {
  729. /* put fence directly behind firmware */
  730. index = ALIGN(rdev->uvd_fw->size, 8);
  731. rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
  732. rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
  733. }
  734. } else {
  735. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  736. if (r) {
  737. dev_err(rdev->dev, "fence failed to get scratch register\n");
  738. return r;
  739. }
  740. index = RADEON_WB_SCRATCH_OFFSET +
  741. rdev->fence_drv[ring].scratch_reg -
  742. rdev->scratch.reg_base;
  743. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  744. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  745. }
  746. radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  747. rdev->fence_drv[ring].initialized = true;
  748. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  749. ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  750. return 0;
  751. }
  752. /**
  753. * radeon_fence_driver_init_ring - init the fence driver
  754. * for the requested ring.
  755. *
  756. * @rdev: radeon device pointer
  757. * @ring: ring index to start the fence driver on
  758. *
  759. * Init the fence driver for the requested ring (all asics).
  760. * Helper function for radeon_fence_driver_init().
  761. */
  762. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  763. {
  764. int i;
  765. rdev->fence_drv[ring].scratch_reg = -1;
  766. rdev->fence_drv[ring].cpu_addr = NULL;
  767. rdev->fence_drv[ring].gpu_addr = 0;
  768. for (i = 0; i < RADEON_NUM_RINGS; ++i)
  769. rdev->fence_drv[ring].sync_seq[i] = 0;
  770. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  771. rdev->fence_drv[ring].initialized = false;
  772. INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
  773. radeon_fence_check_lockup);
  774. rdev->fence_drv[ring].rdev = rdev;
  775. }
  776. /**
  777. * radeon_fence_driver_init - init the fence driver
  778. * for all possible rings.
  779. *
  780. * @rdev: radeon device pointer
  781. *
  782. * Init the fence driver for all possible rings (all asics).
  783. * Not all asics have all rings, so each asic will only
  784. * start the fence driver on the rings it has using
  785. * radeon_fence_driver_start_ring().
  786. * Returns 0 for success.
  787. */
  788. int radeon_fence_driver_init(struct radeon_device *rdev)
  789. {
  790. int ring;
  791. init_waitqueue_head(&rdev->fence_queue);
  792. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  793. radeon_fence_driver_init_ring(rdev, ring);
  794. }
  795. if (radeon_debugfs_fence_init(rdev)) {
  796. dev_err(rdev->dev, "fence debugfs file creation failed\n");
  797. }
  798. return 0;
  799. }
  800. /**
  801. * radeon_fence_driver_fini - tear down the fence driver
  802. * for all possible rings.
  803. *
  804. * @rdev: radeon device pointer
  805. *
  806. * Tear down the fence driver for all possible rings (all asics).
  807. */
  808. void radeon_fence_driver_fini(struct radeon_device *rdev)
  809. {
  810. int ring, r;
  811. mutex_lock(&rdev->ring_lock);
  812. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  813. if (!rdev->fence_drv[ring].initialized)
  814. continue;
  815. r = radeon_fence_wait_empty(rdev, ring);
  816. if (r) {
  817. /* no need to trigger GPU reset as we are unloading */
  818. radeon_fence_driver_force_completion(rdev, ring);
  819. }
  820. cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
  821. wake_up_all(&rdev->fence_queue);
  822. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  823. rdev->fence_drv[ring].initialized = false;
  824. }
  825. mutex_unlock(&rdev->ring_lock);
  826. }
  827. /**
  828. * radeon_fence_driver_force_completion - force all fence waiter to complete
  829. *
  830. * @rdev: radeon device pointer
  831. * @ring: the ring to complete
  832. *
  833. * In case of GPU reset failure make sure no process keep waiting on fence
  834. * that will never complete.
  835. */
  836. void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
  837. {
  838. if (rdev->fence_drv[ring].initialized) {
  839. radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
  840. cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
  841. }
  842. }
  843. /*
  844. * Fence debugfs
  845. */
  846. #if defined(CONFIG_DEBUG_FS)
  847. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  848. {
  849. struct drm_info_node *node = (struct drm_info_node *)m->private;
  850. struct drm_device *dev = node->minor->dev;
  851. struct radeon_device *rdev = dev->dev_private;
  852. int i, j;
  853. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  854. if (!rdev->fence_drv[i].initialized)
  855. continue;
  856. radeon_fence_process(rdev, i);
  857. seq_printf(m, "--- ring %d ---\n", i);
  858. seq_printf(m, "Last signaled fence 0x%016llx\n",
  859. (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  860. seq_printf(m, "Last emitted 0x%016llx\n",
  861. rdev->fence_drv[i].sync_seq[i]);
  862. for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  863. if (i != j && rdev->fence_drv[j].initialized)
  864. seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  865. j, rdev->fence_drv[i].sync_seq[j]);
  866. }
  867. }
  868. return 0;
  869. }
  870. /**
  871. * radeon_debugfs_gpu_reset - manually trigger a gpu reset
  872. *
  873. * Manually trigger a gpu reset at the next fence wait.
  874. */
  875. static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
  876. {
  877. struct drm_info_node *node = (struct drm_info_node *) m->private;
  878. struct drm_device *dev = node->minor->dev;
  879. struct radeon_device *rdev = dev->dev_private;
  880. down_read(&rdev->exclusive_lock);
  881. seq_printf(m, "%d\n", rdev->needs_reset);
  882. rdev->needs_reset = true;
  883. wake_up_all(&rdev->fence_queue);
  884. up_read(&rdev->exclusive_lock);
  885. return 0;
  886. }
  887. static struct drm_info_list radeon_debugfs_fence_list[] = {
  888. {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  889. {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
  890. };
  891. #endif
  892. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  893. {
  894. #if defined(CONFIG_DEBUG_FS)
  895. return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
  896. #else
  897. return 0;
  898. #endif
  899. }
  900. static const char *radeon_fence_get_driver_name(struct fence *fence)
  901. {
  902. return "radeon";
  903. }
  904. static const char *radeon_fence_get_timeline_name(struct fence *f)
  905. {
  906. struct radeon_fence *fence = to_radeon_fence(f);
  907. switch (fence->ring) {
  908. case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
  909. case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
  910. case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
  911. case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
  912. case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
  913. case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
  914. case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
  915. case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
  916. default: WARN_ON_ONCE(1); return "radeon.unk";
  917. }
  918. }
  919. static inline bool radeon_test_signaled(struct radeon_fence *fence)
  920. {
  921. return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
  922. }
  923. struct radeon_wait_cb {
  924. struct fence_cb base;
  925. struct task_struct *task;
  926. };
  927. static void
  928. radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
  929. {
  930. struct radeon_wait_cb *wait =
  931. container_of(cb, struct radeon_wait_cb, base);
  932. wake_up_process(wait->task);
  933. }
  934. static signed long radeon_fence_default_wait(struct fence *f, bool intr,
  935. signed long t)
  936. {
  937. struct radeon_fence *fence = to_radeon_fence(f);
  938. struct radeon_device *rdev = fence->rdev;
  939. struct radeon_wait_cb cb;
  940. cb.task = current;
  941. if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
  942. return t;
  943. while (t > 0) {
  944. if (intr)
  945. set_current_state(TASK_INTERRUPTIBLE);
  946. else
  947. set_current_state(TASK_UNINTERRUPTIBLE);
  948. /*
  949. * radeon_test_signaled must be called after
  950. * set_current_state to prevent a race with wake_up_process
  951. */
  952. if (radeon_test_signaled(fence))
  953. break;
  954. if (rdev->needs_reset) {
  955. t = -EDEADLK;
  956. break;
  957. }
  958. t = schedule_timeout(t);
  959. if (t > 0 && intr && signal_pending(current))
  960. t = -ERESTARTSYS;
  961. }
  962. __set_current_state(TASK_RUNNING);
  963. fence_remove_callback(f, &cb.base);
  964. return t;
  965. }
  966. const struct fence_ops radeon_fence_ops = {
  967. .get_driver_name = radeon_fence_get_driver_name,
  968. .get_timeline_name = radeon_fence_get_timeline_name,
  969. .enable_signaling = radeon_fence_enable_signaling,
  970. .signaled = radeon_fence_is_signaled,
  971. .wait = radeon_fence_default_wait,
  972. .release = NULL,
  973. };