prep.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. /*
  2. * Intel I/OAT DMA Linux driver
  3. * Copyright(c) 2004 - 2015 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * The full GNU General Public License is included in this distribution in
  15. * the file called "COPYING".
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/gfp.h>
  21. #include <linux/dmaengine.h>
  22. #include <linux/dma-mapping.h>
  23. #include <linux/prefetch.h>
  24. #include "../dmaengine.h"
  25. #include "registers.h"
  26. #include "hw.h"
  27. #include "dma.h"
  28. #define MAX_SCF 1024
  29. /* provide a lookup table for setting the source address in the base or
  30. * extended descriptor of an xor or pq descriptor
  31. */
  32. static const u8 xor_idx_to_desc = 0xe0;
  33. static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
  34. static const u8 pq_idx_to_desc = 0xf8;
  35. static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
  36. 2, 2, 2, 2, 2, 2, 2 };
  37. static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
  38. static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
  39. 0, 1, 2, 3, 4, 5, 6 };
  40. static void xor_set_src(struct ioat_raw_descriptor *descs[2],
  41. dma_addr_t addr, u32 offset, int idx)
  42. {
  43. struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  44. raw->field[xor_idx_to_field[idx]] = addr + offset;
  45. }
  46. static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  47. {
  48. struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  49. return raw->field[pq_idx_to_field[idx]];
  50. }
  51. static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
  52. {
  53. struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
  54. return raw->field[pq16_idx_to_field[idx]];
  55. }
  56. static void pq_set_src(struct ioat_raw_descriptor *descs[2],
  57. dma_addr_t addr, u32 offset, u8 coef, int idx)
  58. {
  59. struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
  60. struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  61. raw->field[pq_idx_to_field[idx]] = addr + offset;
  62. pq->coef[idx] = coef;
  63. }
  64. static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
  65. dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
  66. {
  67. struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
  68. struct ioat_pq16a_descriptor *pq16 =
  69. (struct ioat_pq16a_descriptor *)desc[1];
  70. struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
  71. raw->field[pq16_idx_to_field[idx]] = addr + offset;
  72. if (idx < 8)
  73. pq->coef[idx] = coef;
  74. else
  75. pq16->coef[idx - 8] = coef;
  76. }
  77. static struct ioat_sed_ent *
  78. ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
  79. {
  80. struct ioat_sed_ent *sed;
  81. gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
  82. sed = kmem_cache_alloc(ioat_sed_cache, flags);
  83. if (!sed)
  84. return NULL;
  85. sed->hw_pool = hw_pool;
  86. sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
  87. flags, &sed->dma);
  88. if (!sed->hw) {
  89. kmem_cache_free(ioat_sed_cache, sed);
  90. return NULL;
  91. }
  92. return sed;
  93. }
  94. struct dma_async_tx_descriptor *
  95. ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
  96. dma_addr_t dma_src, size_t len, unsigned long flags)
  97. {
  98. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  99. struct ioat_dma_descriptor *hw;
  100. struct ioat_ring_ent *desc;
  101. dma_addr_t dst = dma_dest;
  102. dma_addr_t src = dma_src;
  103. size_t total_len = len;
  104. int num_descs, idx, i;
  105. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  106. return NULL;
  107. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  108. if (likely(num_descs) &&
  109. ioat_check_space_lock(ioat_chan, num_descs) == 0)
  110. idx = ioat_chan->head;
  111. else
  112. return NULL;
  113. i = 0;
  114. do {
  115. size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
  116. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  117. hw = desc->hw;
  118. hw->size = copy;
  119. hw->ctl = 0;
  120. hw->src_addr = src;
  121. hw->dst_addr = dst;
  122. len -= copy;
  123. dst += copy;
  124. src += copy;
  125. dump_desc_dbg(ioat_chan, desc);
  126. } while (++i < num_descs);
  127. desc->txd.flags = flags;
  128. desc->len = total_len;
  129. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  130. hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  131. hw->ctl_f.compl_write = 1;
  132. dump_desc_dbg(ioat_chan, desc);
  133. /* we leave the channel locked to ensure in order submission */
  134. return &desc->txd;
  135. }
  136. static struct dma_async_tx_descriptor *
  137. __ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
  138. dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
  139. size_t len, unsigned long flags)
  140. {
  141. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  142. struct ioat_ring_ent *compl_desc;
  143. struct ioat_ring_ent *desc;
  144. struct ioat_ring_ent *ext;
  145. size_t total_len = len;
  146. struct ioat_xor_descriptor *xor;
  147. struct ioat_xor_ext_descriptor *xor_ex = NULL;
  148. struct ioat_dma_descriptor *hw;
  149. int num_descs, with_ext, idx, i;
  150. u32 offset = 0;
  151. u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
  152. BUG_ON(src_cnt < 2);
  153. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  154. /* we need 2x the number of descriptors to cover greater than 5
  155. * sources
  156. */
  157. if (src_cnt > 5) {
  158. with_ext = 1;
  159. num_descs *= 2;
  160. } else
  161. with_ext = 0;
  162. /* completion writes from the raid engine may pass completion
  163. * writes from the legacy engine, so we need one extra null
  164. * (legacy) descriptor to ensure all completion writes arrive in
  165. * order.
  166. */
  167. if (likely(num_descs) &&
  168. ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
  169. idx = ioat_chan->head;
  170. else
  171. return NULL;
  172. i = 0;
  173. do {
  174. struct ioat_raw_descriptor *descs[2];
  175. size_t xfer_size = min_t(size_t,
  176. len, 1 << ioat_chan->xfercap_log);
  177. int s;
  178. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  179. xor = desc->xor;
  180. /* save a branch by unconditionally retrieving the
  181. * extended descriptor xor_set_src() knows to not write
  182. * to it in the single descriptor case
  183. */
  184. ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
  185. xor_ex = ext->xor_ex;
  186. descs[0] = (struct ioat_raw_descriptor *) xor;
  187. descs[1] = (struct ioat_raw_descriptor *) xor_ex;
  188. for (s = 0; s < src_cnt; s++)
  189. xor_set_src(descs, src[s], offset, s);
  190. xor->size = xfer_size;
  191. xor->dst_addr = dest + offset;
  192. xor->ctl = 0;
  193. xor->ctl_f.op = op;
  194. xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
  195. len -= xfer_size;
  196. offset += xfer_size;
  197. dump_desc_dbg(ioat_chan, desc);
  198. } while ((i += 1 + with_ext) < num_descs);
  199. /* last xor descriptor carries the unmap parameters and fence bit */
  200. desc->txd.flags = flags;
  201. desc->len = total_len;
  202. if (result)
  203. desc->result = result;
  204. xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  205. /* completion descriptor carries interrupt bit */
  206. compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
  207. compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
  208. hw = compl_desc->hw;
  209. hw->ctl = 0;
  210. hw->ctl_f.null = 1;
  211. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  212. hw->ctl_f.compl_write = 1;
  213. hw->size = NULL_DESC_BUFFER_SIZE;
  214. dump_desc_dbg(ioat_chan, compl_desc);
  215. /* we leave the channel locked to ensure in order submission */
  216. return &compl_desc->txd;
  217. }
  218. struct dma_async_tx_descriptor *
  219. ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
  220. unsigned int src_cnt, size_t len, unsigned long flags)
  221. {
  222. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  223. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  224. return NULL;
  225. return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
  226. }
  227. struct dma_async_tx_descriptor *
  228. ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
  229. unsigned int src_cnt, size_t len,
  230. enum sum_check_flags *result, unsigned long flags)
  231. {
  232. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  233. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  234. return NULL;
  235. /* the cleanup routine only sets bits on validate failure, it
  236. * does not clear bits on validate success... so clear it here
  237. */
  238. *result = 0;
  239. return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
  240. src_cnt - 1, len, flags);
  241. }
  242. static void
  243. dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
  244. struct ioat_ring_ent *ext)
  245. {
  246. struct device *dev = to_dev(ioat_chan);
  247. struct ioat_pq_descriptor *pq = desc->pq;
  248. struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
  249. struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
  250. int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
  251. int i;
  252. dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
  253. " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
  254. " src_cnt: %d)\n",
  255. desc_id(desc), (unsigned long long) desc->txd.phys,
  256. (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
  257. desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
  258. pq->ctl_f.int_en, pq->ctl_f.compl_write,
  259. pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
  260. pq->ctl_f.src_cnt);
  261. for (i = 0; i < src_cnt; i++)
  262. dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
  263. (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
  264. dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
  265. dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
  266. dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
  267. }
  268. static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
  269. struct ioat_ring_ent *desc)
  270. {
  271. struct device *dev = to_dev(ioat_chan);
  272. struct ioat_pq_descriptor *pq = desc->pq;
  273. struct ioat_raw_descriptor *descs[] = { (void *)pq,
  274. (void *)pq,
  275. (void *)pq };
  276. int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
  277. int i;
  278. if (desc->sed) {
  279. descs[1] = (void *)desc->sed->hw;
  280. descs[2] = (void *)desc->sed->hw + 64;
  281. }
  282. dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
  283. " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
  284. " src_cnt: %d)\n",
  285. desc_id(desc), (unsigned long long) desc->txd.phys,
  286. (unsigned long long) pq->next,
  287. desc->txd.flags, pq->size, pq->ctl,
  288. pq->ctl_f.op, pq->ctl_f.int_en,
  289. pq->ctl_f.compl_write,
  290. pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
  291. pq->ctl_f.src_cnt);
  292. for (i = 0; i < src_cnt; i++) {
  293. dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
  294. (unsigned long long) pq16_get_src(descs, i),
  295. pq->coef[i]);
  296. }
  297. dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
  298. dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
  299. }
  300. static struct dma_async_tx_descriptor *
  301. __ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
  302. const dma_addr_t *dst, const dma_addr_t *src,
  303. unsigned int src_cnt, const unsigned char *scf,
  304. size_t len, unsigned long flags)
  305. {
  306. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  307. struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
  308. struct ioat_ring_ent *compl_desc;
  309. struct ioat_ring_ent *desc;
  310. struct ioat_ring_ent *ext;
  311. size_t total_len = len;
  312. struct ioat_pq_descriptor *pq;
  313. struct ioat_pq_ext_descriptor *pq_ex = NULL;
  314. struct ioat_dma_descriptor *hw;
  315. u32 offset = 0;
  316. u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
  317. int i, s, idx, with_ext, num_descs;
  318. int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
  319. dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
  320. /* the engine requires at least two sources (we provide
  321. * at least 1 implied source in the DMA_PREP_CONTINUE case)
  322. */
  323. BUG_ON(src_cnt + dmaf_continue(flags) < 2);
  324. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  325. /* we need 2x the number of descriptors to cover greater than 3
  326. * sources (we need 1 extra source in the q-only continuation
  327. * case and 3 extra sources in the p+q continuation case.
  328. */
  329. if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
  330. (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
  331. with_ext = 1;
  332. num_descs *= 2;
  333. } else
  334. with_ext = 0;
  335. /* completion writes from the raid engine may pass completion
  336. * writes from the legacy engine, so we need one extra null
  337. * (legacy) descriptor to ensure all completion writes arrive in
  338. * order.
  339. */
  340. if (likely(num_descs) &&
  341. ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
  342. idx = ioat_chan->head;
  343. else
  344. return NULL;
  345. i = 0;
  346. do {
  347. struct ioat_raw_descriptor *descs[2];
  348. size_t xfer_size = min_t(size_t, len,
  349. 1 << ioat_chan->xfercap_log);
  350. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  351. pq = desc->pq;
  352. /* save a branch by unconditionally retrieving the
  353. * extended descriptor pq_set_src() knows to not write
  354. * to it in the single descriptor case
  355. */
  356. ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
  357. pq_ex = ext->pq_ex;
  358. descs[0] = (struct ioat_raw_descriptor *) pq;
  359. descs[1] = (struct ioat_raw_descriptor *) pq_ex;
  360. for (s = 0; s < src_cnt; s++)
  361. pq_set_src(descs, src[s], offset, scf[s], s);
  362. /* see the comment for dma_maxpq in include/linux/dmaengine.h */
  363. if (dmaf_p_disabled_continue(flags))
  364. pq_set_src(descs, dst[1], offset, 1, s++);
  365. else if (dmaf_continue(flags)) {
  366. pq_set_src(descs, dst[0], offset, 0, s++);
  367. pq_set_src(descs, dst[1], offset, 1, s++);
  368. pq_set_src(descs, dst[1], offset, 0, s++);
  369. }
  370. pq->size = xfer_size;
  371. pq->p_addr = dst[0] + offset;
  372. pq->q_addr = dst[1] + offset;
  373. pq->ctl = 0;
  374. pq->ctl_f.op = op;
  375. /* we turn on descriptor write back error status */
  376. if (ioat_dma->cap & IOAT_CAP_DWBES)
  377. pq->ctl_f.wb_en = result ? 1 : 0;
  378. pq->ctl_f.src_cnt = src_cnt_to_hw(s);
  379. pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
  380. pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
  381. len -= xfer_size;
  382. offset += xfer_size;
  383. } while ((i += 1 + with_ext) < num_descs);
  384. /* last pq descriptor carries the unmap parameters and fence bit */
  385. desc->txd.flags = flags;
  386. desc->len = total_len;
  387. if (result)
  388. desc->result = result;
  389. pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  390. dump_pq_desc_dbg(ioat_chan, desc, ext);
  391. if (!cb32) {
  392. pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  393. pq->ctl_f.compl_write = 1;
  394. compl_desc = desc;
  395. } else {
  396. /* completion descriptor carries interrupt bit */
  397. compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
  398. compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
  399. hw = compl_desc->hw;
  400. hw->ctl = 0;
  401. hw->ctl_f.null = 1;
  402. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  403. hw->ctl_f.compl_write = 1;
  404. hw->size = NULL_DESC_BUFFER_SIZE;
  405. dump_desc_dbg(ioat_chan, compl_desc);
  406. }
  407. /* we leave the channel locked to ensure in order submission */
  408. return &compl_desc->txd;
  409. }
  410. static struct dma_async_tx_descriptor *
  411. __ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
  412. const dma_addr_t *dst, const dma_addr_t *src,
  413. unsigned int src_cnt, const unsigned char *scf,
  414. size_t len, unsigned long flags)
  415. {
  416. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  417. struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
  418. struct ioat_ring_ent *desc;
  419. size_t total_len = len;
  420. struct ioat_pq_descriptor *pq;
  421. u32 offset = 0;
  422. u8 op;
  423. int i, s, idx, num_descs;
  424. /* this function is only called with 9-16 sources */
  425. op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
  426. dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
  427. num_descs = ioat_xferlen_to_descs(ioat_chan, len);
  428. /*
  429. * 16 source pq is only available on cb3.3 and has no completion
  430. * write hw bug.
  431. */
  432. if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
  433. idx = ioat_chan->head;
  434. else
  435. return NULL;
  436. i = 0;
  437. do {
  438. struct ioat_raw_descriptor *descs[4];
  439. size_t xfer_size = min_t(size_t, len,
  440. 1 << ioat_chan->xfercap_log);
  441. desc = ioat_get_ring_ent(ioat_chan, idx + i);
  442. pq = desc->pq;
  443. descs[0] = (struct ioat_raw_descriptor *) pq;
  444. desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
  445. if (!desc->sed) {
  446. dev_err(to_dev(ioat_chan),
  447. "%s: no free sed entries\n", __func__);
  448. return NULL;
  449. }
  450. pq->sed_addr = desc->sed->dma;
  451. desc->sed->parent = desc;
  452. descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
  453. descs[2] = (void *)descs[1] + 64;
  454. for (s = 0; s < src_cnt; s++)
  455. pq16_set_src(descs, src[s], offset, scf[s], s);
  456. /* see the comment for dma_maxpq in include/linux/dmaengine.h */
  457. if (dmaf_p_disabled_continue(flags))
  458. pq16_set_src(descs, dst[1], offset, 1, s++);
  459. else if (dmaf_continue(flags)) {
  460. pq16_set_src(descs, dst[0], offset, 0, s++);
  461. pq16_set_src(descs, dst[1], offset, 1, s++);
  462. pq16_set_src(descs, dst[1], offset, 0, s++);
  463. }
  464. pq->size = xfer_size;
  465. pq->p_addr = dst[0] + offset;
  466. pq->q_addr = dst[1] + offset;
  467. pq->ctl = 0;
  468. pq->ctl_f.op = op;
  469. pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
  470. /* we turn on descriptor write back error status */
  471. if (ioat_dma->cap & IOAT_CAP_DWBES)
  472. pq->ctl_f.wb_en = result ? 1 : 0;
  473. pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
  474. pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
  475. len -= xfer_size;
  476. offset += xfer_size;
  477. } while (++i < num_descs);
  478. /* last pq descriptor carries the unmap parameters and fence bit */
  479. desc->txd.flags = flags;
  480. desc->len = total_len;
  481. if (result)
  482. desc->result = result;
  483. pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  484. /* with cb3.3 we should be able to do completion w/o a null desc */
  485. pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  486. pq->ctl_f.compl_write = 1;
  487. dump_pq16_desc_dbg(ioat_chan, desc);
  488. /* we leave the channel locked to ensure in order submission */
  489. return &desc->txd;
  490. }
  491. static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
  492. {
  493. if (dmaf_p_disabled_continue(flags))
  494. return src_cnt + 1;
  495. else if (dmaf_continue(flags))
  496. return src_cnt + 3;
  497. else
  498. return src_cnt;
  499. }
  500. struct dma_async_tx_descriptor *
  501. ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
  502. unsigned int src_cnt, const unsigned char *scf, size_t len,
  503. unsigned long flags)
  504. {
  505. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  506. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  507. return NULL;
  508. /* specify valid address for disabled result */
  509. if (flags & DMA_PREP_PQ_DISABLE_P)
  510. dst[0] = dst[1];
  511. if (flags & DMA_PREP_PQ_DISABLE_Q)
  512. dst[1] = dst[0];
  513. /* handle the single source multiply case from the raid6
  514. * recovery path
  515. */
  516. if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
  517. dma_addr_t single_source[2];
  518. unsigned char single_source_coef[2];
  519. BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
  520. single_source[0] = src[0];
  521. single_source[1] = src[0];
  522. single_source_coef[0] = scf[0];
  523. single_source_coef[1] = 0;
  524. return src_cnt_flags(src_cnt, flags) > 8 ?
  525. __ioat_prep_pq16_lock(chan, NULL, dst, single_source,
  526. 2, single_source_coef, len,
  527. flags) :
  528. __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
  529. single_source_coef, len, flags);
  530. } else {
  531. return src_cnt_flags(src_cnt, flags) > 8 ?
  532. __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
  533. scf, len, flags) :
  534. __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
  535. scf, len, flags);
  536. }
  537. }
  538. struct dma_async_tx_descriptor *
  539. ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
  540. unsigned int src_cnt, const unsigned char *scf, size_t len,
  541. enum sum_check_flags *pqres, unsigned long flags)
  542. {
  543. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  544. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  545. return NULL;
  546. /* specify valid address for disabled result */
  547. if (flags & DMA_PREP_PQ_DISABLE_P)
  548. pq[0] = pq[1];
  549. if (flags & DMA_PREP_PQ_DISABLE_Q)
  550. pq[1] = pq[0];
  551. /* the cleanup routine only sets bits on validate failure, it
  552. * does not clear bits on validate success... so clear it here
  553. */
  554. *pqres = 0;
  555. return src_cnt_flags(src_cnt, flags) > 8 ?
  556. __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
  557. flags) :
  558. __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
  559. flags);
  560. }
  561. struct dma_async_tx_descriptor *
  562. ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
  563. unsigned int src_cnt, size_t len, unsigned long flags)
  564. {
  565. unsigned char scf[MAX_SCF];
  566. dma_addr_t pq[2];
  567. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  568. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  569. return NULL;
  570. if (src_cnt > MAX_SCF)
  571. return NULL;
  572. memset(scf, 0, src_cnt);
  573. pq[0] = dst;
  574. flags |= DMA_PREP_PQ_DISABLE_Q;
  575. pq[1] = dst; /* specify valid address for disabled result */
  576. return src_cnt_flags(src_cnt, flags) > 8 ?
  577. __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
  578. flags) :
  579. __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
  580. flags);
  581. }
  582. struct dma_async_tx_descriptor *
  583. ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
  584. unsigned int src_cnt, size_t len,
  585. enum sum_check_flags *result, unsigned long flags)
  586. {
  587. unsigned char scf[MAX_SCF];
  588. dma_addr_t pq[2];
  589. struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
  590. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  591. return NULL;
  592. if (src_cnt > MAX_SCF)
  593. return NULL;
  594. /* the cleanup routine only sets bits on validate failure, it
  595. * does not clear bits on validate success... so clear it here
  596. */
  597. *result = 0;
  598. memset(scf, 0, src_cnt);
  599. pq[0] = src[0];
  600. flags |= DMA_PREP_PQ_DISABLE_Q;
  601. pq[1] = pq[0]; /* specify valid address for disabled result */
  602. return src_cnt_flags(src_cnt, flags) > 8 ?
  603. __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
  604. scf, len, flags) :
  605. __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
  606. scf, len, flags);
  607. }
  608. struct dma_async_tx_descriptor *
  609. ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
  610. {
  611. struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
  612. struct ioat_ring_ent *desc;
  613. struct ioat_dma_descriptor *hw;
  614. if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
  615. return NULL;
  616. if (ioat_check_space_lock(ioat_chan, 1) == 0)
  617. desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
  618. else
  619. return NULL;
  620. hw = desc->hw;
  621. hw->ctl = 0;
  622. hw->ctl_f.null = 1;
  623. hw->ctl_f.int_en = 1;
  624. hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
  625. hw->ctl_f.compl_write = 1;
  626. hw->size = NULL_DESC_BUFFER_SIZE;
  627. hw->src_addr = 0;
  628. hw->dst_addr = 0;
  629. desc->txd.flags = flags;
  630. desc->len = 1;
  631. dump_desc_dbg(ioat_chan, desc);
  632. /* we leave the channel locked to ensure in order submission */
  633. return &desc->txd;
  634. }