kfd_dbgdev.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/types.h>
  24. #include <linux/kernel.h>
  25. #include <linux/log2.h>
  26. #include <linux/sched.h>
  27. #include <linux/slab.h>
  28. #include <linux/mutex.h>
  29. #include <linux/device.h>
  30. #include "kfd_pm4_headers.h"
  31. #include "kfd_pm4_headers_diq.h"
  32. #include "kfd_kernel_queue.h"
  33. #include "kfd_priv.h"
  34. #include "kfd_pm4_opcodes.h"
  35. #include "cik_regs.h"
  36. #include "kfd_dbgmgr.h"
  37. #include "kfd_dbgdev.h"
  38. #include "kfd_device_queue_manager.h"
  39. #include "../../radeon/cik_reg.h"
  40. static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
  41. {
  42. BUG_ON(!dev || !dev->kfd2kgd);
  43. dev->kfd2kgd->address_watch_disable(dev->kgd);
  44. }
  45. static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
  46. unsigned int pasid, uint64_t vmid0_address,
  47. uint32_t *packet_buff, size_t size_in_bytes)
  48. {
  49. struct pm4__release_mem *rm_packet;
  50. struct pm4__indirect_buffer_pasid *ib_packet;
  51. struct kfd_mem_obj *mem_obj;
  52. size_t pq_packets_size_in_bytes;
  53. union ULARGE_INTEGER *largep;
  54. union ULARGE_INTEGER addr;
  55. struct kernel_queue *kq;
  56. uint64_t *rm_state;
  57. unsigned int *ib_packet_buff;
  58. int status;
  59. BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
  60. kq = dbgdev->kq;
  61. pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
  62. sizeof(struct pm4__indirect_buffer_pasid);
  63. /*
  64. * We acquire a buffer from DIQ
  65. * The receive packet buff will be sitting on the Indirect Buffer
  66. * and in the PQ we put the IB packet + sync packet(s).
  67. */
  68. status = kq->ops.acquire_packet_buffer(kq,
  69. pq_packets_size_in_bytes / sizeof(uint32_t),
  70. &ib_packet_buff);
  71. if (status != 0) {
  72. pr_err("amdkfd: acquire_packet_buffer failed\n");
  73. return status;
  74. }
  75. memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
  76. ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
  77. ib_packet->header.count = 3;
  78. ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
  79. ib_packet->header.type = PM4_TYPE_3;
  80. largep = (union ULARGE_INTEGER *) &vmid0_address;
  81. ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
  82. ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
  83. ib_packet->control = (1 << 23) | (1 << 31) |
  84. ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
  85. ib_packet->bitfields5.pasid = pasid;
  86. /*
  87. * for now we use release mem for GPU-CPU synchronization
  88. * Consider WaitRegMem + WriteData as a better alternative
  89. * we get a GART allocations ( gpu/cpu mapping),
  90. * for the sync variable, and wait until:
  91. * (a) Sync with HW
  92. * (b) Sync var is written by CP to mem.
  93. */
  94. rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
  95. (sizeof(struct pm4__indirect_buffer_pasid) /
  96. sizeof(unsigned int)));
  97. status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
  98. &mem_obj);
  99. if (status != 0) {
  100. pr_err("amdkfd: Failed to allocate GART memory\n");
  101. kq->ops.rollback_packet(kq);
  102. return status;
  103. }
  104. rm_state = (uint64_t *) mem_obj->cpu_ptr;
  105. *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
  106. rm_packet->header.opcode = IT_RELEASE_MEM;
  107. rm_packet->header.type = PM4_TYPE_3;
  108. rm_packet->header.count = sizeof(struct pm4__release_mem) /
  109. sizeof(unsigned int) - 2;
  110. rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
  111. rm_packet->bitfields2.event_index =
  112. event_index___release_mem__end_of_pipe;
  113. rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
  114. rm_packet->bitfields2.atc = 0;
  115. rm_packet->bitfields2.tc_wb_action_ena = 1;
  116. addr.quad_part = mem_obj->gpu_addr;
  117. rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
  118. rm_packet->address_hi = addr.u.high_part;
  119. rm_packet->bitfields3.data_sel =
  120. data_sel___release_mem__send_64_bit_data;
  121. rm_packet->bitfields3.int_sel =
  122. int_sel___release_mem__send_data_after_write_confirm;
  123. rm_packet->bitfields3.dst_sel =
  124. dst_sel___release_mem__memory_controller;
  125. rm_packet->data_lo = QUEUESTATE__ACTIVE;
  126. kq->ops.submit_packet(kq);
  127. /* Wait till CP writes sync code: */
  128. status = amdkfd_fence_wait_timeout(
  129. (unsigned int *) rm_state,
  130. QUEUESTATE__ACTIVE, 1500);
  131. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  132. return status;
  133. }
  134. static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
  135. {
  136. BUG_ON(!dbgdev);
  137. /*
  138. * no action is needed in this case,
  139. * just make sure diq will not be used
  140. */
  141. dbgdev->kq = NULL;
  142. return 0;
  143. }
  144. static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
  145. {
  146. struct queue_properties properties;
  147. unsigned int qid;
  148. struct kernel_queue *kq = NULL;
  149. int status;
  150. BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
  151. status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
  152. &properties, 0, KFD_QUEUE_TYPE_DIQ,
  153. &qid);
  154. if (status) {
  155. pr_err("amdkfd: Failed to create DIQ\n");
  156. return status;
  157. }
  158. pr_debug("DIQ Created with queue id: %d\n", qid);
  159. kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
  160. if (kq == NULL) {
  161. pr_err("amdkfd: Error getting DIQ\n");
  162. pqm_destroy_queue(dbgdev->pqm, qid);
  163. return -EFAULT;
  164. }
  165. dbgdev->kq = kq;
  166. return status;
  167. }
  168. static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
  169. {
  170. BUG_ON(!dbgdev || !dbgdev->dev);
  171. /* disable watch address */
  172. dbgdev_address_watch_disable_nodiq(dbgdev->dev);
  173. return 0;
  174. }
  175. static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
  176. {
  177. /* todo - disable address watch */
  178. int status;
  179. BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
  180. status = pqm_destroy_queue(dbgdev->pqm,
  181. dbgdev->kq->queue->properties.queue_id);
  182. dbgdev->kq = NULL;
  183. return status;
  184. }
  185. static void dbgdev_address_watch_set_registers(
  186. const struct dbg_address_watch_info *adw_info,
  187. union TCP_WATCH_ADDR_H_BITS *addrHi,
  188. union TCP_WATCH_ADDR_L_BITS *addrLo,
  189. union TCP_WATCH_CNTL_BITS *cntl,
  190. unsigned int index, unsigned int vmid)
  191. {
  192. union ULARGE_INTEGER addr;
  193. BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
  194. addr.quad_part = 0;
  195. addrHi->u32All = 0;
  196. addrLo->u32All = 0;
  197. cntl->u32All = 0;
  198. if (adw_info->watch_mask != NULL)
  199. cntl->bitfields.mask =
  200. (uint32_t) (adw_info->watch_mask[index] &
  201. ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
  202. else
  203. cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
  204. addr.quad_part = (unsigned long long) adw_info->watch_address[index];
  205. addrHi->bitfields.addr = addr.u.high_part &
  206. ADDRESS_WATCH_REG_ADDHIGH_MASK;
  207. addrLo->bitfields.addr =
  208. (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
  209. cntl->bitfields.mode = adw_info->watch_mode[index];
  210. cntl->bitfields.vmid = (uint32_t) vmid;
  211. /* for now assume it is an ATC address */
  212. cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
  213. pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
  214. pr_debug("\t\t%20s %08x\n", "set reg add high :",
  215. addrHi->bitfields.addr);
  216. pr_debug("\t\t%20s %08x\n", "set reg add low :",
  217. addrLo->bitfields.addr);
  218. }
  219. static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
  220. struct dbg_address_watch_info *adw_info)
  221. {
  222. union TCP_WATCH_ADDR_H_BITS addrHi;
  223. union TCP_WATCH_ADDR_L_BITS addrLo;
  224. union TCP_WATCH_CNTL_BITS cntl;
  225. struct kfd_process_device *pdd;
  226. unsigned int i;
  227. BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
  228. /* taking the vmid for that process on the safe way using pdd */
  229. pdd = kfd_get_process_device_data(dbgdev->dev,
  230. adw_info->process);
  231. if (!pdd) {
  232. pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
  233. return -EFAULT;
  234. }
  235. addrHi.u32All = 0;
  236. addrLo.u32All = 0;
  237. cntl.u32All = 0;
  238. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  239. (adw_info->num_watch_points == 0)) {
  240. pr_err("amdkfd: num_watch_points is invalid\n");
  241. return -EINVAL;
  242. }
  243. if ((adw_info->watch_mode == NULL) ||
  244. (adw_info->watch_address == NULL)) {
  245. pr_err("amdkfd: adw_info fields are not valid\n");
  246. return -EINVAL;
  247. }
  248. for (i = 0 ; i < adw_info->num_watch_points ; i++) {
  249. dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
  250. &cntl, i, pdd->qpd.vmid);
  251. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  252. pr_debug("\t\t%20s %08x\n", "register index :", i);
  253. pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
  254. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  255. addrLo.bitfields.addr);
  256. pr_debug("\t\t%20s %08x\n", "Address high is :",
  257. addrHi.bitfields.addr);
  258. pr_debug("\t\t%20s %08x\n", "Address high is :",
  259. addrHi.bitfields.addr);
  260. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  261. cntl.bitfields.mask);
  262. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  263. cntl.bitfields.mode);
  264. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  265. cntl.bitfields.vmid);
  266. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  267. cntl.bitfields.atc);
  268. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  269. pdd->dev->kfd2kgd->address_watch_execute(
  270. dbgdev->dev->kgd,
  271. i,
  272. cntl.u32All,
  273. addrHi.u32All,
  274. addrLo.u32All);
  275. }
  276. return 0;
  277. }
  278. static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
  279. struct dbg_address_watch_info *adw_info)
  280. {
  281. struct pm4__set_config_reg *packets_vec;
  282. union TCP_WATCH_ADDR_H_BITS addrHi;
  283. union TCP_WATCH_ADDR_L_BITS addrLo;
  284. union TCP_WATCH_CNTL_BITS cntl;
  285. struct kfd_mem_obj *mem_obj;
  286. unsigned int aw_reg_add_dword;
  287. uint32_t *packet_buff_uint;
  288. unsigned int i;
  289. int status;
  290. size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
  291. /* we do not control the vmid in DIQ mode, just a place holder */
  292. unsigned int vmid = 0;
  293. BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
  294. addrHi.u32All = 0;
  295. addrLo.u32All = 0;
  296. cntl.u32All = 0;
  297. if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
  298. (adw_info->num_watch_points == 0)) {
  299. pr_err("amdkfd: num_watch_points is invalid\n");
  300. return -EINVAL;
  301. }
  302. if ((NULL == adw_info->watch_mode) ||
  303. (NULL == adw_info->watch_address)) {
  304. pr_err("amdkfd: adw_info fields are not valid\n");
  305. return -EINVAL;
  306. }
  307. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  308. if (status != 0) {
  309. pr_err("amdkfd: Failed to allocate GART memory\n");
  310. return status;
  311. }
  312. packet_buff_uint = mem_obj->cpu_ptr;
  313. memset(packet_buff_uint, 0, ib_size);
  314. packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
  315. packets_vec[0].header.count = 1;
  316. packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
  317. packets_vec[0].header.type = PM4_TYPE_3;
  318. packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  319. packets_vec[0].bitfields2.insert_vmid = 1;
  320. packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
  321. packets_vec[1].bitfields2.insert_vmid = 0;
  322. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  323. packets_vec[2].bitfields2.insert_vmid = 0;
  324. packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
  325. packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
  326. packets_vec[3].bitfields2.insert_vmid = 1;
  327. for (i = 0; i < adw_info->num_watch_points; i++) {
  328. dbgdev_address_watch_set_registers(adw_info,
  329. &addrHi,
  330. &addrLo,
  331. &cntl,
  332. i,
  333. vmid);
  334. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  335. pr_debug("\t\t%20s %08x\n", "register index :", i);
  336. pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
  337. pr_debug("\t\t%20s %p\n", "Add ptr is :",
  338. adw_info->watch_address);
  339. pr_debug("\t\t%20s %08llx\n", "Add is :",
  340. adw_info->watch_address[i]);
  341. pr_debug("\t\t%20s %08x\n", "Address Low is :",
  342. addrLo.bitfields.addr);
  343. pr_debug("\t\t%20s %08x\n", "Address high is :",
  344. addrHi.bitfields.addr);
  345. pr_debug("\t\t%20s %08x\n", "Control Mask is :",
  346. cntl.bitfields.mask);
  347. pr_debug("\t\t%20s %08x\n", "Control Mode is :",
  348. cntl.bitfields.mode);
  349. pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
  350. cntl.bitfields.vmid);
  351. pr_debug("\t\t%20s %08x\n", "Control atc is :",
  352. cntl.bitfields.atc);
  353. pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
  354. aw_reg_add_dword =
  355. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  356. dbgdev->dev->kgd,
  357. i,
  358. ADDRESS_WATCH_REG_CNTL);
  359. aw_reg_add_dword /= sizeof(uint32_t);
  360. packets_vec[0].bitfields2.reg_offset =
  361. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  362. packets_vec[0].reg_data[0] = cntl.u32All;
  363. aw_reg_add_dword =
  364. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  365. dbgdev->dev->kgd,
  366. i,
  367. ADDRESS_WATCH_REG_ADDR_HI);
  368. aw_reg_add_dword /= sizeof(uint32_t);
  369. packets_vec[1].bitfields2.reg_offset =
  370. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  371. packets_vec[1].reg_data[0] = addrHi.u32All;
  372. aw_reg_add_dword =
  373. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  374. dbgdev->dev->kgd,
  375. i,
  376. ADDRESS_WATCH_REG_ADDR_LO);
  377. aw_reg_add_dword /= sizeof(uint32_t);
  378. packets_vec[2].bitfields2.reg_offset =
  379. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  380. packets_vec[2].reg_data[0] = addrLo.u32All;
  381. /* enable watch flag if address is not zero*/
  382. if (adw_info->watch_address[i] > 0)
  383. cntl.bitfields.valid = 1;
  384. else
  385. cntl.bitfields.valid = 0;
  386. aw_reg_add_dword =
  387. dbgdev->dev->kfd2kgd->address_watch_get_offset(
  388. dbgdev->dev->kgd,
  389. i,
  390. ADDRESS_WATCH_REG_CNTL);
  391. aw_reg_add_dword /= sizeof(uint32_t);
  392. packets_vec[3].bitfields2.reg_offset =
  393. aw_reg_add_dword - AMD_CONFIG_REG_BASE;
  394. packets_vec[3].reg_data[0] = cntl.u32All;
  395. status = dbgdev_diq_submit_ib(
  396. dbgdev,
  397. adw_info->process->pasid,
  398. mem_obj->gpu_addr,
  399. packet_buff_uint,
  400. ib_size);
  401. if (status != 0) {
  402. pr_err("amdkfd: Failed to submit IB to DIQ\n");
  403. break;
  404. }
  405. }
  406. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  407. return status;
  408. }
  409. static int dbgdev_wave_control_set_registers(
  410. struct dbg_wave_control_info *wac_info,
  411. union SQ_CMD_BITS *in_reg_sq_cmd,
  412. union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
  413. {
  414. int status = 0;
  415. union SQ_CMD_BITS reg_sq_cmd;
  416. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  417. struct HsaDbgWaveMsgAMDGen2 *pMsg;
  418. BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
  419. reg_sq_cmd.u32All = 0;
  420. reg_gfx_index.u32All = 0;
  421. pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
  422. switch (wac_info->mode) {
  423. /* Send command to single wave */
  424. case HSA_DBG_WAVEMODE_SINGLE:
  425. /*
  426. * Limit access to the process waves only,
  427. * by setting vmid check
  428. */
  429. reg_sq_cmd.bits.check_vmid = 1;
  430. reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
  431. reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
  432. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
  433. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  434. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  435. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  436. break;
  437. /* Send command to all waves with matching VMID */
  438. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
  439. reg_gfx_index.bits.sh_broadcast_writes = 1;
  440. reg_gfx_index.bits.se_broadcast_writes = 1;
  441. reg_gfx_index.bits.instance_broadcast_writes = 1;
  442. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  443. break;
  444. /* Send command to all CU waves with matching VMID */
  445. case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
  446. reg_sq_cmd.bits.check_vmid = 1;
  447. reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
  448. reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
  449. reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
  450. reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
  451. break;
  452. default:
  453. return -EINVAL;
  454. }
  455. switch (wac_info->operand) {
  456. case HSA_DBG_WAVEOP_HALT:
  457. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
  458. break;
  459. case HSA_DBG_WAVEOP_RESUME:
  460. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
  461. break;
  462. case HSA_DBG_WAVEOP_KILL:
  463. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
  464. break;
  465. case HSA_DBG_WAVEOP_DEBUG:
  466. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
  467. break;
  468. case HSA_DBG_WAVEOP_TRAP:
  469. if (wac_info->trapId < MAX_TRAPID) {
  470. reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
  471. reg_sq_cmd.bits.trap_id = wac_info->trapId;
  472. } else {
  473. status = -EINVAL;
  474. }
  475. break;
  476. default:
  477. status = -EINVAL;
  478. break;
  479. }
  480. if (status == 0) {
  481. *in_reg_sq_cmd = reg_sq_cmd;
  482. *in_reg_gfx_index = reg_gfx_index;
  483. }
  484. return status;
  485. }
  486. static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
  487. struct dbg_wave_control_info *wac_info)
  488. {
  489. int status;
  490. union SQ_CMD_BITS reg_sq_cmd;
  491. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  492. struct kfd_mem_obj *mem_obj;
  493. uint32_t *packet_buff_uint;
  494. struct pm4__set_config_reg *packets_vec;
  495. size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
  496. BUG_ON(!dbgdev || !wac_info);
  497. reg_sq_cmd.u32All = 0;
  498. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  499. &reg_gfx_index);
  500. if (status) {
  501. pr_err("amdkfd: Failed to set wave control registers\n");
  502. return status;
  503. }
  504. /* we do not control the VMID in DIQ,so reset it to a known value */
  505. reg_sq_cmd.bits.vm_id = 0;
  506. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  507. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  508. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  509. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  510. pr_debug("\t\t msg value is: %u\n",
  511. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  512. pr_debug("\t\t vmid is: N/A\n");
  513. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  514. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  515. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  516. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  517. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  518. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  519. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  520. pr_debug("\t\t ibw is : %u\n",
  521. reg_gfx_index.bitfields.instance_broadcast_writes);
  522. pr_debug("\t\t ii is : %u\n",
  523. reg_gfx_index.bitfields.instance_index);
  524. pr_debug("\t\t sebw is : %u\n",
  525. reg_gfx_index.bitfields.se_broadcast_writes);
  526. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  527. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  528. pr_debug("\t\t sbw is : %u\n",
  529. reg_gfx_index.bitfields.sh_broadcast_writes);
  530. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  531. status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
  532. if (status != 0) {
  533. pr_err("amdkfd: Failed to allocate GART memory\n");
  534. return status;
  535. }
  536. packet_buff_uint = mem_obj->cpu_ptr;
  537. memset(packet_buff_uint, 0, ib_size);
  538. packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
  539. packets_vec[0].header.count = 1;
  540. packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
  541. packets_vec[0].header.type = PM4_TYPE_3;
  542. packets_vec[0].bitfields2.reg_offset =
  543. GRBM_GFX_INDEX / (sizeof(uint32_t)) -
  544. USERCONFIG_REG_BASE;
  545. packets_vec[0].bitfields2.insert_vmid = 0;
  546. packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
  547. packets_vec[1].header.count = 1;
  548. packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
  549. packets_vec[1].header.type = PM4_TYPE_3;
  550. packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
  551. AMD_CONFIG_REG_BASE;
  552. packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
  553. packets_vec[1].bitfields2.insert_vmid = 1;
  554. packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
  555. /* Restore the GRBM_GFX_INDEX register */
  556. reg_gfx_index.u32All = 0;
  557. reg_gfx_index.bits.sh_broadcast_writes = 1;
  558. reg_gfx_index.bits.instance_broadcast_writes = 1;
  559. reg_gfx_index.bits.se_broadcast_writes = 1;
  560. packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
  561. packets_vec[2].bitfields2.reg_offset =
  562. GRBM_GFX_INDEX / (sizeof(uint32_t)) -
  563. USERCONFIG_REG_BASE;
  564. packets_vec[2].bitfields2.insert_vmid = 0;
  565. packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
  566. status = dbgdev_diq_submit_ib(
  567. dbgdev,
  568. wac_info->process->pasid,
  569. mem_obj->gpu_addr,
  570. packet_buff_uint,
  571. ib_size);
  572. if (status != 0)
  573. pr_err("amdkfd: Failed to submit IB to DIQ\n");
  574. kfd_gtt_sa_free(dbgdev->dev, mem_obj);
  575. return status;
  576. }
  577. static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
  578. struct dbg_wave_control_info *wac_info)
  579. {
  580. int status;
  581. union SQ_CMD_BITS reg_sq_cmd;
  582. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  583. struct kfd_process_device *pdd;
  584. BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
  585. reg_sq_cmd.u32All = 0;
  586. /* taking the VMID for that process on the safe way using PDD */
  587. pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
  588. if (!pdd) {
  589. pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
  590. return -EFAULT;
  591. }
  592. status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
  593. &reg_gfx_index);
  594. if (status) {
  595. pr_err("amdkfd: Failed to set wave control registers\n");
  596. return status;
  597. }
  598. /* for non DIQ we need to patch the VMID: */
  599. reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
  600. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  601. pr_debug("\t\t mode is: %u\n", wac_info->mode);
  602. pr_debug("\t\t operand is: %u\n", wac_info->operand);
  603. pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
  604. pr_debug("\t\t msg value is: %u\n",
  605. wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  606. pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
  607. pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
  608. pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
  609. pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
  610. pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
  611. pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
  612. pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
  613. pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
  614. pr_debug("\t\t ibw is : %u\n",
  615. reg_gfx_index.bitfields.instance_broadcast_writes);
  616. pr_debug("\t\t ii is : %u\n",
  617. reg_gfx_index.bitfields.instance_index);
  618. pr_debug("\t\t sebw is : %u\n",
  619. reg_gfx_index.bitfields.se_broadcast_writes);
  620. pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
  621. pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
  622. pr_debug("\t\t sbw is : %u\n",
  623. reg_gfx_index.bitfields.sh_broadcast_writes);
  624. pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
  625. return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
  626. reg_gfx_index.u32All,
  627. reg_sq_cmd.u32All);
  628. }
  629. int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
  630. {
  631. int status = 0;
  632. unsigned int vmid;
  633. union SQ_CMD_BITS reg_sq_cmd;
  634. union GRBM_GFX_INDEX_BITS reg_gfx_index;
  635. struct kfd_process_device *pdd;
  636. struct dbg_wave_control_info wac_info;
  637. int temp;
  638. int first_vmid_to_scan = 8;
  639. int last_vmid_to_scan = 15;
  640. first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
  641. temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
  642. last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
  643. reg_sq_cmd.u32All = 0;
  644. status = 0;
  645. wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
  646. wac_info.operand = HSA_DBG_WAVEOP_KILL;
  647. pr_debug("Killing all process wavefronts\n");
  648. /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
  649. * ATC_VMID15_PASID_MAPPING
  650. * to check which VMID the current process is mapped to. */
  651. for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
  652. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
  653. (dev->kgd, vmid)) {
  654. if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
  655. (dev->kgd, vmid) == p->pasid) {
  656. pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
  657. vmid, p->pasid);
  658. break;
  659. }
  660. }
  661. }
  662. if (vmid > last_vmid_to_scan) {
  663. pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
  664. return -EFAULT;
  665. }
  666. /* taking the VMID for that process on the safe way using PDD */
  667. pdd = kfd_get_process_device_data(dev, p);
  668. if (!pdd)
  669. return -EFAULT;
  670. status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
  671. &reg_gfx_index);
  672. if (status != 0)
  673. return -EINVAL;
  674. /* for non DIQ we need to patch the VMID: */
  675. reg_sq_cmd.bits.vm_id = vmid;
  676. dev->kfd2kgd->wave_control_execute(dev->kgd,
  677. reg_gfx_index.u32All,
  678. reg_sq_cmd.u32All);
  679. return 0;
  680. }
  681. void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
  682. enum DBGDEV_TYPE type)
  683. {
  684. BUG_ON(!pdbgdev || !pdev);
  685. pdbgdev->dev = pdev;
  686. pdbgdev->kq = NULL;
  687. pdbgdev->type = type;
  688. pdbgdev->pqm = NULL;
  689. switch (type) {
  690. case DBGDEV_TYPE_NODIQ:
  691. pdbgdev->dbgdev_register = dbgdev_register_nodiq;
  692. pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
  693. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
  694. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
  695. break;
  696. case DBGDEV_TYPE_DIQ:
  697. default:
  698. pdbgdev->dbgdev_register = dbgdev_register_diq;
  699. pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
  700. pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
  701. pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
  702. break;
  703. }
  704. }