atomic.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. /*
  2. * Copyright 2007-2008 Analog Devices Inc.
  3. * Philippe Gerum <rpm@xenomai.org>
  4. *
  5. * Licensed under the GPL-2 or later.
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/blackfin.h>
  9. #include <asm/cache.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/rwlock.h>
  12. #include <asm/cplb.h>
  13. .text
  14. .macro coreslot_loadaddr reg:req
  15. \reg\().l = _corelock;
  16. \reg\().h = _corelock;
  17. .endm
  18. .macro safe_testset addr:req, scratch:req
  19. #if ANOMALY_05000477
  20. cli \scratch;
  21. testset (\addr);
  22. sti \scratch;
  23. #else
  24. testset (\addr);
  25. #endif
  26. .endm
  27. /*
  28. * r0 = address of atomic data to flush and invalidate (32bit).
  29. *
  30. * Clear interrupts and return the old mask.
  31. * We assume that no atomic data can span cachelines.
  32. *
  33. * Clobbers: r2:0, p0
  34. */
  35. ENTRY(_get_core_lock)
  36. r1 = -L1_CACHE_BYTES;
  37. r1 = r0 & r1;
  38. cli r0;
  39. coreslot_loadaddr p0;
  40. .Lretry_corelock:
  41. safe_testset p0, r2;
  42. if cc jump .Ldone_corelock;
  43. SSYNC(r2);
  44. jump .Lretry_corelock
  45. .Ldone_corelock:
  46. p0 = r1;
  47. /* flush core internal write buffer before invalidate dcache */
  48. CSYNC(r2);
  49. flushinv[p0];
  50. SSYNC(r2);
  51. rts;
  52. ENDPROC(_get_core_lock)
  53. /*
  54. * r0 = address of atomic data in uncacheable memory region (32bit).
  55. *
  56. * Clear interrupts and return the old mask.
  57. *
  58. * Clobbers: r0, p0
  59. */
  60. ENTRY(_get_core_lock_noflush)
  61. cli r0;
  62. coreslot_loadaddr p0;
  63. .Lretry_corelock_noflush:
  64. safe_testset p0, r2;
  65. if cc jump .Ldone_corelock_noflush;
  66. SSYNC(r2);
  67. jump .Lretry_corelock_noflush
  68. .Ldone_corelock_noflush:
  69. /*
  70. * SMP kgdb runs into dead loop without NOP here, when one core
  71. * single steps over get_core_lock_noflush and the other executes
  72. * get_core_lock as a slave node.
  73. */
  74. nop;
  75. CSYNC(r2);
  76. rts;
  77. ENDPROC(_get_core_lock_noflush)
  78. /*
  79. * r0 = interrupt mask to restore.
  80. * r1 = address of atomic data to flush and invalidate (32bit).
  81. *
  82. * Interrupts are masked on entry (see _get_core_lock).
  83. * Clobbers: r2:0, p0
  84. */
  85. ENTRY(_put_core_lock)
  86. /* Write-through cache assumed, so no flush needed here. */
  87. coreslot_loadaddr p0;
  88. r1 = 0;
  89. [p0] = r1;
  90. SSYNC(r2);
  91. sti r0;
  92. rts;
  93. ENDPROC(_put_core_lock)
  94. #ifdef __ARCH_SYNC_CORE_DCACHE
  95. ENTRY(___raw_smp_mark_barrier_asm)
  96. [--sp] = rets;
  97. [--sp] = ( r7:5 );
  98. [--sp] = r0;
  99. [--sp] = p1;
  100. [--sp] = p0;
  101. call _get_core_lock_noflush;
  102. /*
  103. * Calculate current core mask
  104. */
  105. GET_CPUID(p1, r7);
  106. r6 = 1;
  107. r6 <<= r7;
  108. /*
  109. * Set bit of other cores in barrier mask. Don't change current core bit.
  110. */
  111. p1.l = _barrier_mask;
  112. p1.h = _barrier_mask;
  113. r7 = [p1];
  114. r5 = r7 & r6;
  115. r7 = ~r6;
  116. cc = r5 == 0;
  117. if cc jump 1f;
  118. r7 = r7 | r6;
  119. 1:
  120. [p1] = r7;
  121. SSYNC(r2);
  122. call _put_core_lock;
  123. p0 = [sp++];
  124. p1 = [sp++];
  125. r0 = [sp++];
  126. ( r7:5 ) = [sp++];
  127. rets = [sp++];
  128. rts;
  129. ENDPROC(___raw_smp_mark_barrier_asm)
  130. ENTRY(___raw_smp_check_barrier_asm)
  131. [--sp] = rets;
  132. [--sp] = ( r7:5 );
  133. [--sp] = r0;
  134. [--sp] = p1;
  135. [--sp] = p0;
  136. call _get_core_lock_noflush;
  137. /*
  138. * Calculate current core mask
  139. */
  140. GET_CPUID(p1, r7);
  141. r6 = 1;
  142. r6 <<= r7;
  143. /*
  144. * Clear current core bit in barrier mask if it is set.
  145. */
  146. p1.l = _barrier_mask;
  147. p1.h = _barrier_mask;
  148. r7 = [p1];
  149. r5 = r7 & r6;
  150. cc = r5 == 0;
  151. if cc jump 1f;
  152. r6 = ~r6;
  153. r7 = r7 & r6;
  154. [p1] = r7;
  155. SSYNC(r2);
  156. call _put_core_lock;
  157. /*
  158. * Invalidate the entire D-cache of current core.
  159. */
  160. sp += -12;
  161. call _resync_core_dcache
  162. sp += 12;
  163. jump 2f;
  164. 1:
  165. call _put_core_lock;
  166. 2:
  167. p0 = [sp++];
  168. p1 = [sp++];
  169. r0 = [sp++];
  170. ( r7:5 ) = [sp++];
  171. rets = [sp++];
  172. rts;
  173. ENDPROC(___raw_smp_check_barrier_asm)
  174. /*
  175. * r0 = irqflags
  176. * r1 = address of atomic data
  177. *
  178. * Clobbers: r2:0, p1:0
  179. */
  180. _start_lock_coherent:
  181. [--sp] = rets;
  182. [--sp] = ( r7:6 );
  183. r7 = r0;
  184. p1 = r1;
  185. /*
  186. * Determine whether the atomic data was previously
  187. * owned by another CPU (=r6).
  188. */
  189. GET_CPUID(p0, r2);
  190. r1 = 1;
  191. r1 <<= r2;
  192. r2 = ~r1;
  193. r1 = [p1];
  194. r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
  195. r6 = r1 & r2;
  196. r1 = [p1];
  197. r1 <<= 4;
  198. r1 >>= 4;
  199. [p1] = r1;
  200. /*
  201. * Release the core lock now, but keep IRQs disabled while we are
  202. * performing the remaining housekeeping chores for the current CPU.
  203. */
  204. coreslot_loadaddr p0;
  205. r1 = 0;
  206. [p0] = r1;
  207. /*
  208. * If another CPU has owned the same atomic section before us,
  209. * then our D-cached copy of the shared data protected by the
  210. * current spin/write_lock may be obsolete.
  211. */
  212. cc = r6 == 0;
  213. if cc jump .Lcache_synced
  214. /*
  215. * Invalidate the entire D-cache of the current core.
  216. */
  217. sp += -12;
  218. call _resync_core_dcache
  219. sp += 12;
  220. .Lcache_synced:
  221. SSYNC(r2);
  222. sti r7;
  223. ( r7:6 ) = [sp++];
  224. rets = [sp++];
  225. rts
  226. /*
  227. * r0 = irqflags
  228. * r1 = address of atomic data
  229. *
  230. * Clobbers: r2:0, p1:0
  231. */
  232. _end_lock_coherent:
  233. p1 = r1;
  234. GET_CPUID(p0, r2);
  235. r2 += 28;
  236. r1 = 1;
  237. r1 <<= r2;
  238. r2 = [p1];
  239. r2 = r1 | r2;
  240. [p1] = r2;
  241. r1 = p1;
  242. jump _put_core_lock;
  243. #endif /* __ARCH_SYNC_CORE_DCACHE */
  244. /*
  245. * r0 = &spinlock->lock
  246. *
  247. * Clobbers: r3:0, p1:0
  248. */
  249. ENTRY(___raw_spin_is_locked_asm)
  250. p1 = r0;
  251. [--sp] = rets;
  252. call _get_core_lock;
  253. r3 = [p1];
  254. cc = bittst( r3, 0 );
  255. r3 = cc;
  256. r1 = p1;
  257. call _put_core_lock;
  258. rets = [sp++];
  259. r0 = r3;
  260. rts;
  261. ENDPROC(___raw_spin_is_locked_asm)
  262. /*
  263. * r0 = &spinlock->lock
  264. *
  265. * Clobbers: r3:0, p1:0
  266. */
  267. ENTRY(___raw_spin_lock_asm)
  268. p1 = r0;
  269. [--sp] = rets;
  270. .Lretry_spinlock:
  271. call _get_core_lock;
  272. r1 = p1;
  273. r2 = [p1];
  274. cc = bittst( r2, 0 );
  275. if cc jump .Lbusy_spinlock
  276. #ifdef __ARCH_SYNC_CORE_DCACHE
  277. r3 = p1;
  278. bitset ( r2, 0 ); /* Raise the lock bit. */
  279. [p1] = r2;
  280. call _start_lock_coherent
  281. #else
  282. r2 = 1;
  283. [p1] = r2;
  284. call _put_core_lock;
  285. #endif
  286. rets = [sp++];
  287. rts;
  288. .Lbusy_spinlock:
  289. /* We don't touch the atomic area if busy, so that flush
  290. will behave like nop in _put_core_lock. */
  291. call _put_core_lock;
  292. SSYNC(r2);
  293. r0 = p1;
  294. jump .Lretry_spinlock
  295. ENDPROC(___raw_spin_lock_asm)
  296. /*
  297. * r0 = &spinlock->lock
  298. *
  299. * Clobbers: r3:0, p1:0
  300. */
  301. ENTRY(___raw_spin_trylock_asm)
  302. p1 = r0;
  303. [--sp] = rets;
  304. call _get_core_lock;
  305. r1 = p1;
  306. r3 = [p1];
  307. cc = bittst( r3, 0 );
  308. if cc jump .Lfailed_trylock
  309. #ifdef __ARCH_SYNC_CORE_DCACHE
  310. bitset ( r3, 0 ); /* Raise the lock bit. */
  311. [p1] = r3;
  312. call _start_lock_coherent
  313. #else
  314. r2 = 1;
  315. [p1] = r2;
  316. call _put_core_lock;
  317. #endif
  318. r0 = 1;
  319. rets = [sp++];
  320. rts;
  321. .Lfailed_trylock:
  322. call _put_core_lock;
  323. r0 = 0;
  324. rets = [sp++];
  325. rts;
  326. ENDPROC(___raw_spin_trylock_asm)
  327. /*
  328. * r0 = &spinlock->lock
  329. *
  330. * Clobbers: r2:0, p1:0
  331. */
  332. ENTRY(___raw_spin_unlock_asm)
  333. p1 = r0;
  334. [--sp] = rets;
  335. call _get_core_lock;
  336. r2 = [p1];
  337. bitclr ( r2, 0 );
  338. [p1] = r2;
  339. r1 = p1;
  340. #ifdef __ARCH_SYNC_CORE_DCACHE
  341. call _end_lock_coherent
  342. #else
  343. call _put_core_lock;
  344. #endif
  345. rets = [sp++];
  346. rts;
  347. ENDPROC(___raw_spin_unlock_asm)
  348. /*
  349. * r0 = &rwlock->lock
  350. *
  351. * Clobbers: r2:0, p1:0
  352. */
  353. ENTRY(___raw_read_lock_asm)
  354. p1 = r0;
  355. [--sp] = rets;
  356. call _get_core_lock;
  357. .Lrdlock_try:
  358. r1 = [p1];
  359. r1 += -1;
  360. [p1] = r1;
  361. cc = r1 < 0;
  362. if cc jump .Lrdlock_failed
  363. r1 = p1;
  364. #ifdef __ARCH_SYNC_CORE_DCACHE
  365. call _start_lock_coherent
  366. #else
  367. call _put_core_lock;
  368. #endif
  369. rets = [sp++];
  370. rts;
  371. .Lrdlock_failed:
  372. r1 += 1;
  373. [p1] = r1;
  374. .Lrdlock_wait:
  375. r1 = p1;
  376. call _put_core_lock;
  377. SSYNC(r2);
  378. r0 = p1;
  379. call _get_core_lock;
  380. r1 = [p1];
  381. cc = r1 < 2;
  382. if cc jump .Lrdlock_wait;
  383. jump .Lrdlock_try
  384. ENDPROC(___raw_read_lock_asm)
  385. /*
  386. * r0 = &rwlock->lock
  387. *
  388. * Clobbers: r3:0, p1:0
  389. */
  390. ENTRY(___raw_read_trylock_asm)
  391. p1 = r0;
  392. [--sp] = rets;
  393. call _get_core_lock;
  394. r1 = [p1];
  395. cc = r1 <= 0;
  396. if cc jump .Lfailed_tryrdlock;
  397. r1 += -1;
  398. [p1] = r1;
  399. r1 = p1;
  400. #ifdef __ARCH_SYNC_CORE_DCACHE
  401. call _start_lock_coherent
  402. #else
  403. call _put_core_lock;
  404. #endif
  405. rets = [sp++];
  406. r0 = 1;
  407. rts;
  408. .Lfailed_tryrdlock:
  409. r1 = p1;
  410. call _put_core_lock;
  411. rets = [sp++];
  412. r0 = 0;
  413. rts;
  414. ENDPROC(___raw_read_trylock_asm)
  415. /*
  416. * r0 = &rwlock->lock
  417. *
  418. * Note: Processing controlled by a reader lock should not have
  419. * any side-effect on cache issues with the other core, so we
  420. * just release the core lock and exit (no _end_lock_coherent).
  421. *
  422. * Clobbers: r3:0, p1:0
  423. */
  424. ENTRY(___raw_read_unlock_asm)
  425. p1 = r0;
  426. [--sp] = rets;
  427. call _get_core_lock;
  428. r1 = [p1];
  429. r1 += 1;
  430. [p1] = r1;
  431. r1 = p1;
  432. call _put_core_lock;
  433. rets = [sp++];
  434. rts;
  435. ENDPROC(___raw_read_unlock_asm)
  436. /*
  437. * r0 = &rwlock->lock
  438. *
  439. * Clobbers: r3:0, p1:0
  440. */
  441. ENTRY(___raw_write_lock_asm)
  442. p1 = r0;
  443. r3.l = lo(RW_LOCK_BIAS);
  444. r3.h = hi(RW_LOCK_BIAS);
  445. [--sp] = rets;
  446. call _get_core_lock;
  447. .Lwrlock_try:
  448. r1 = [p1];
  449. r1 = r1 - r3;
  450. #ifdef __ARCH_SYNC_CORE_DCACHE
  451. r2 = r1;
  452. r2 <<= 4;
  453. r2 >>= 4;
  454. cc = r2 == 0;
  455. #else
  456. cc = r1 == 0;
  457. #endif
  458. if !cc jump .Lwrlock_wait
  459. [p1] = r1;
  460. r1 = p1;
  461. #ifdef __ARCH_SYNC_CORE_DCACHE
  462. call _start_lock_coherent
  463. #else
  464. call _put_core_lock;
  465. #endif
  466. rets = [sp++];
  467. rts;
  468. .Lwrlock_wait:
  469. r1 = p1;
  470. call _put_core_lock;
  471. SSYNC(r2);
  472. r0 = p1;
  473. call _get_core_lock;
  474. r1 = [p1];
  475. #ifdef __ARCH_SYNC_CORE_DCACHE
  476. r1 <<= 4;
  477. r1 >>= 4;
  478. #endif
  479. cc = r1 == r3;
  480. if !cc jump .Lwrlock_wait;
  481. jump .Lwrlock_try
  482. ENDPROC(___raw_write_lock_asm)
  483. /*
  484. * r0 = &rwlock->lock
  485. *
  486. * Clobbers: r3:0, p1:0
  487. */
  488. ENTRY(___raw_write_trylock_asm)
  489. p1 = r0;
  490. [--sp] = rets;
  491. call _get_core_lock;
  492. r1 = [p1];
  493. r2.l = lo(RW_LOCK_BIAS);
  494. r2.h = hi(RW_LOCK_BIAS);
  495. cc = r1 == r2;
  496. if !cc jump .Lfailed_trywrlock;
  497. #ifdef __ARCH_SYNC_CORE_DCACHE
  498. r1 >>= 28;
  499. r1 <<= 28;
  500. #else
  501. r1 = 0;
  502. #endif
  503. [p1] = r1;
  504. r1 = p1;
  505. #ifdef __ARCH_SYNC_CORE_DCACHE
  506. call _start_lock_coherent
  507. #else
  508. call _put_core_lock;
  509. #endif
  510. rets = [sp++];
  511. r0 = 1;
  512. rts;
  513. .Lfailed_trywrlock:
  514. r1 = p1;
  515. call _put_core_lock;
  516. rets = [sp++];
  517. r0 = 0;
  518. rts;
  519. ENDPROC(___raw_write_trylock_asm)
  520. /*
  521. * r0 = &rwlock->lock
  522. *
  523. * Clobbers: r3:0, p1:0
  524. */
  525. ENTRY(___raw_write_unlock_asm)
  526. p1 = r0;
  527. r3.l = lo(RW_LOCK_BIAS);
  528. r3.h = hi(RW_LOCK_BIAS);
  529. [--sp] = rets;
  530. call _get_core_lock;
  531. r1 = [p1];
  532. r1 = r1 + r3;
  533. [p1] = r1;
  534. r1 = p1;
  535. #ifdef __ARCH_SYNC_CORE_DCACHE
  536. call _end_lock_coherent
  537. #else
  538. call _put_core_lock;
  539. #endif
  540. rets = [sp++];
  541. rts;
  542. ENDPROC(___raw_write_unlock_asm)
  543. /*
  544. * r0 = ptr
  545. * r1 = value
  546. *
  547. * ADD a signed value to a 32bit word and return the new value atomically.
  548. * Clobbers: r3:0, p1:0
  549. */
  550. ENTRY(___raw_atomic_add_asm)
  551. p1 = r0;
  552. r3 = r1;
  553. [--sp] = rets;
  554. call _get_core_lock;
  555. r2 = [p1];
  556. r3 = r3 + r2;
  557. [p1] = r3;
  558. r1 = p1;
  559. call _put_core_lock;
  560. r0 = r3;
  561. rets = [sp++];
  562. rts;
  563. ENDPROC(___raw_atomic_add_asm)
  564. /*
  565. * r0 = ptr
  566. * r1 = mask
  567. *
  568. * AND the mask bits from a 32bit word and return the old 32bit value
  569. * atomically.
  570. * Clobbers: r3:0, p1:0
  571. */
  572. ENTRY(___raw_atomic_and_asm)
  573. p1 = r0;
  574. r3 = r1;
  575. [--sp] = rets;
  576. call _get_core_lock;
  577. r2 = [p1];
  578. r3 = r2 & r3;
  579. [p1] = r3;
  580. r3 = r2;
  581. r1 = p1;
  582. call _put_core_lock;
  583. r0 = r3;
  584. rets = [sp++];
  585. rts;
  586. ENDPROC(___raw_atomic_and_asm)
  587. /*
  588. * r0 = ptr
  589. * r1 = mask
  590. *
  591. * OR the mask bits into a 32bit word and return the old 32bit value
  592. * atomically.
  593. * Clobbers: r3:0, p1:0
  594. */
  595. ENTRY(___raw_atomic_or_asm)
  596. p1 = r0;
  597. r3 = r1;
  598. [--sp] = rets;
  599. call _get_core_lock;
  600. r2 = [p1];
  601. r3 = r2 | r3;
  602. [p1] = r3;
  603. r3 = r2;
  604. r1 = p1;
  605. call _put_core_lock;
  606. r0 = r3;
  607. rets = [sp++];
  608. rts;
  609. ENDPROC(___raw_atomic_or_asm)
  610. /*
  611. * r0 = ptr
  612. * r1 = mask
  613. *
  614. * XOR the mask bits with a 32bit word and return the old 32bit value
  615. * atomically.
  616. * Clobbers: r3:0, p1:0
  617. */
  618. ENTRY(___raw_atomic_xor_asm)
  619. p1 = r0;
  620. r3 = r1;
  621. [--sp] = rets;
  622. call _get_core_lock;
  623. r2 = [p1];
  624. r3 = r2 ^ r3;
  625. [p1] = r3;
  626. r3 = r2;
  627. r1 = p1;
  628. call _put_core_lock;
  629. r0 = r3;
  630. rets = [sp++];
  631. rts;
  632. ENDPROC(___raw_atomic_xor_asm)
  633. /*
  634. * r0 = ptr
  635. * r1 = mask
  636. *
  637. * Perform a logical AND between the mask bits and a 32bit word, and
  638. * return the masked value. We need this on this architecture in
  639. * order to invalidate the local cache before testing.
  640. *
  641. * Clobbers: r3:0, p1:0
  642. */
  643. ENTRY(___raw_atomic_test_asm)
  644. p1 = r0;
  645. r3 = r1;
  646. r1 = -L1_CACHE_BYTES;
  647. r1 = r0 & r1;
  648. p0 = r1;
  649. /* flush core internal write buffer before invalidate dcache */
  650. CSYNC(r2);
  651. flushinv[p0];
  652. SSYNC(r2);
  653. r0 = [p1];
  654. r0 = r0 & r3;
  655. rts;
  656. ENDPROC(___raw_atomic_test_asm)
  657. /*
  658. * r0 = ptr
  659. * r1 = value
  660. *
  661. * Swap *ptr with value and return the old 32bit value atomically.
  662. * Clobbers: r3:0, p1:0
  663. */
  664. #define __do_xchg(src, dst) \
  665. p1 = r0; \
  666. r3 = r1; \
  667. [--sp] = rets; \
  668. call _get_core_lock; \
  669. r2 = src; \
  670. dst = r3; \
  671. r3 = r2; \
  672. r1 = p1; \
  673. call _put_core_lock; \
  674. r0 = r3; \
  675. rets = [sp++]; \
  676. rts;
  677. ENTRY(___raw_xchg_1_asm)
  678. __do_xchg(b[p1] (z), b[p1])
  679. ENDPROC(___raw_xchg_1_asm)
  680. ENTRY(___raw_xchg_2_asm)
  681. __do_xchg(w[p1] (z), w[p1])
  682. ENDPROC(___raw_xchg_2_asm)
  683. ENTRY(___raw_xchg_4_asm)
  684. __do_xchg([p1], [p1])
  685. ENDPROC(___raw_xchg_4_asm)
  686. /*
  687. * r0 = ptr
  688. * r1 = new
  689. * r2 = old
  690. *
  691. * Swap *ptr with new if *ptr == old and return the previous *ptr
  692. * value atomically.
  693. *
  694. * Clobbers: r3:0, p1:0
  695. */
  696. #define __do_cmpxchg(src, dst) \
  697. [--sp] = rets; \
  698. [--sp] = r4; \
  699. p1 = r0; \
  700. r3 = r1; \
  701. r4 = r2; \
  702. call _get_core_lock; \
  703. r2 = src; \
  704. cc = r2 == r4; \
  705. if !cc jump 1f; \
  706. dst = r3; \
  707. 1: r3 = r2; \
  708. r1 = p1; \
  709. call _put_core_lock; \
  710. r0 = r3; \
  711. r4 = [sp++]; \
  712. rets = [sp++]; \
  713. rts;
  714. ENTRY(___raw_cmpxchg_1_asm)
  715. __do_cmpxchg(b[p1] (z), b[p1])
  716. ENDPROC(___raw_cmpxchg_1_asm)
  717. ENTRY(___raw_cmpxchg_2_asm)
  718. __do_cmpxchg(w[p1] (z), w[p1])
  719. ENDPROC(___raw_cmpxchg_2_asm)
  720. ENTRY(___raw_cmpxchg_4_asm)
  721. __do_cmpxchg([p1], [p1])
  722. ENDPROC(___raw_cmpxchg_4_asm)
  723. /*
  724. * r0 = ptr
  725. * r1 = bitnr
  726. *
  727. * Set a bit in a 32bit word and return the old 32bit value atomically.
  728. * Clobbers: r3:0, p1:0
  729. */
  730. ENTRY(___raw_bit_set_asm)
  731. r2 = r1;
  732. r1 = 1;
  733. r1 <<= r2;
  734. jump ___raw_atomic_or_asm
  735. ENDPROC(___raw_bit_set_asm)
  736. /*
  737. * r0 = ptr
  738. * r1 = bitnr
  739. *
  740. * Clear a bit in a 32bit word and return the old 32bit value atomically.
  741. * Clobbers: r3:0, p1:0
  742. */
  743. ENTRY(___raw_bit_clear_asm)
  744. r2 = 1;
  745. r2 <<= r1;
  746. r1 = ~r2;
  747. jump ___raw_atomic_and_asm
  748. ENDPROC(___raw_bit_clear_asm)
  749. /*
  750. * r0 = ptr
  751. * r1 = bitnr
  752. *
  753. * Toggle a bit in a 32bit word and return the old 32bit value atomically.
  754. * Clobbers: r3:0, p1:0
  755. */
  756. ENTRY(___raw_bit_toggle_asm)
  757. r2 = r1;
  758. r1 = 1;
  759. r1 <<= r2;
  760. jump ___raw_atomic_xor_asm
  761. ENDPROC(___raw_bit_toggle_asm)
  762. /*
  763. * r0 = ptr
  764. * r1 = bitnr
  765. *
  766. * Test-and-set a bit in a 32bit word and return the old bit value atomically.
  767. * Clobbers: r3:0, p1:0
  768. */
  769. ENTRY(___raw_bit_test_set_asm)
  770. [--sp] = rets;
  771. [--sp] = r1;
  772. call ___raw_bit_set_asm
  773. r1 = [sp++];
  774. r2 = 1;
  775. r2 <<= r1;
  776. r0 = r0 & r2;
  777. cc = r0 == 0;
  778. if cc jump 1f
  779. r0 = 1;
  780. 1:
  781. rets = [sp++];
  782. rts;
  783. ENDPROC(___raw_bit_test_set_asm)
  784. /*
  785. * r0 = ptr
  786. * r1 = bitnr
  787. *
  788. * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
  789. * Clobbers: r3:0, p1:0
  790. */
  791. ENTRY(___raw_bit_test_clear_asm)
  792. [--sp] = rets;
  793. [--sp] = r1;
  794. call ___raw_bit_clear_asm
  795. r1 = [sp++];
  796. r2 = 1;
  797. r2 <<= r1;
  798. r0 = r0 & r2;
  799. cc = r0 == 0;
  800. if cc jump 1f
  801. r0 = 1;
  802. 1:
  803. rets = [sp++];
  804. rts;
  805. ENDPROC(___raw_bit_test_clear_asm)
  806. /*
  807. * r0 = ptr
  808. * r1 = bitnr
  809. *
  810. * Test-and-toggle a bit in a 32bit word,
  811. * and return the old bit value atomically.
  812. * Clobbers: r3:0, p1:0
  813. */
  814. ENTRY(___raw_bit_test_toggle_asm)
  815. [--sp] = rets;
  816. [--sp] = r1;
  817. call ___raw_bit_toggle_asm
  818. r1 = [sp++];
  819. r2 = 1;
  820. r2 <<= r1;
  821. r0 = r0 & r2;
  822. cc = r0 == 0;
  823. if cc jump 1f
  824. r0 = 1;
  825. 1:
  826. rets = [sp++];
  827. rts;
  828. ENDPROC(___raw_bit_test_toggle_asm)
  829. /*
  830. * r0 = ptr
  831. * r1 = bitnr
  832. *
  833. * Test a bit in a 32bit word and return its value.
  834. * We need this on this architecture in order to invalidate
  835. * the local cache before testing.
  836. *
  837. * Clobbers: r3:0, p1:0
  838. */
  839. ENTRY(___raw_bit_test_asm)
  840. r2 = r1;
  841. r1 = 1;
  842. r1 <<= r2;
  843. jump ___raw_atomic_test_asm
  844. ENDPROC(___raw_bit_test_asm)
  845. /*
  846. * r0 = ptr
  847. *
  848. * Fetch and return an uncached 32bit value.
  849. *
  850. * Clobbers: r2:0, p1:0
  851. */
  852. ENTRY(___raw_uncached_fetch_asm)
  853. p1 = r0;
  854. r1 = -L1_CACHE_BYTES;
  855. r1 = r0 & r1;
  856. p0 = r1;
  857. /* flush core internal write buffer before invalidate dcache */
  858. CSYNC(r2);
  859. flushinv[p0];
  860. SSYNC(r2);
  861. r0 = [p1];
  862. rts;
  863. ENDPROC(___raw_uncached_fetch_asm)