stat-shadow.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #include <stdio.h>
  2. #include "evsel.h"
  3. #include "stat.h"
  4. #include "color.h"
  5. enum {
  6. CTX_BIT_USER = 1 << 0,
  7. CTX_BIT_KERNEL = 1 << 1,
  8. CTX_BIT_HV = 1 << 2,
  9. CTX_BIT_HOST = 1 << 3,
  10. CTX_BIT_IDLE = 1 << 4,
  11. CTX_BIT_MAX = 1 << 5,
  12. };
  13. #define NUM_CTX CTX_BIT_MAX
  14. static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
  15. static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
  16. static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
  17. static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
  18. static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
  19. static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
  20. static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
  21. static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
  22. static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
  23. static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
  24. static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
  25. static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
  26. static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
  27. static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
  28. struct stats walltime_nsecs_stats;
  29. static int evsel_context(struct perf_evsel *evsel)
  30. {
  31. int ctx = 0;
  32. if (evsel->attr.exclude_kernel)
  33. ctx |= CTX_BIT_KERNEL;
  34. if (evsel->attr.exclude_user)
  35. ctx |= CTX_BIT_USER;
  36. if (evsel->attr.exclude_hv)
  37. ctx |= CTX_BIT_HV;
  38. if (evsel->attr.exclude_host)
  39. ctx |= CTX_BIT_HOST;
  40. if (evsel->attr.exclude_idle)
  41. ctx |= CTX_BIT_IDLE;
  42. return ctx;
  43. }
  44. void perf_stat__reset_shadow_stats(void)
  45. {
  46. memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
  47. memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
  48. memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
  49. memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
  50. memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
  51. memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
  52. memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
  53. memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
  54. memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
  55. memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
  56. memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
  57. memset(runtime_cycles_in_tx_stats, 0,
  58. sizeof(runtime_cycles_in_tx_stats));
  59. memset(runtime_transaction_stats, 0,
  60. sizeof(runtime_transaction_stats));
  61. memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
  62. memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
  63. }
  64. /*
  65. * Update various tracking values we maintain to print
  66. * more semantic information such as miss/hit ratios,
  67. * instruction rates, etc:
  68. */
  69. void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
  70. int cpu)
  71. {
  72. int ctx = evsel_context(counter);
  73. if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
  74. update_stats(&runtime_nsecs_stats[cpu], count[0]);
  75. else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
  76. update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
  77. else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
  78. update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
  79. else if (perf_stat_evsel__is(counter, TRANSACTION_START))
  80. update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
  81. else if (perf_stat_evsel__is(counter, ELISION_START))
  82. update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
  83. else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
  84. update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
  85. else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
  86. update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
  87. else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
  88. update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
  89. else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
  90. update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
  91. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
  92. update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
  93. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
  94. update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
  95. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
  96. update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
  97. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
  98. update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
  99. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
  100. update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
  101. }
  102. /* used for get_ratio_color() */
  103. enum grc_type {
  104. GRC_STALLED_CYCLES_FE,
  105. GRC_STALLED_CYCLES_BE,
  106. GRC_CACHE_MISSES,
  107. GRC_MAX_NR
  108. };
  109. static const char *get_ratio_color(enum grc_type type, double ratio)
  110. {
  111. static const double grc_table[GRC_MAX_NR][3] = {
  112. [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
  113. [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
  114. [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
  115. };
  116. const char *color = PERF_COLOR_NORMAL;
  117. if (ratio > grc_table[type][0])
  118. color = PERF_COLOR_RED;
  119. else if (ratio > grc_table[type][1])
  120. color = PERF_COLOR_MAGENTA;
  121. else if (ratio > grc_table[type][2])
  122. color = PERF_COLOR_YELLOW;
  123. return color;
  124. }
  125. static void print_stalled_cycles_frontend(FILE *out, int cpu,
  126. struct perf_evsel *evsel
  127. __maybe_unused, double avg)
  128. {
  129. double total, ratio = 0.0;
  130. const char *color;
  131. int ctx = evsel_context(evsel);
  132. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  133. if (total)
  134. ratio = avg / total * 100.0;
  135. color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
  136. fprintf(out, " # ");
  137. color_fprintf(out, color, "%6.2f%%", ratio);
  138. fprintf(out, " frontend cycles idle ");
  139. }
  140. static void print_stalled_cycles_backend(FILE *out, int cpu,
  141. struct perf_evsel *evsel
  142. __maybe_unused, double avg)
  143. {
  144. double total, ratio = 0.0;
  145. const char *color;
  146. int ctx = evsel_context(evsel);
  147. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  148. if (total)
  149. ratio = avg / total * 100.0;
  150. color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
  151. fprintf(out, " # ");
  152. color_fprintf(out, color, "%6.2f%%", ratio);
  153. fprintf(out, " backend cycles idle ");
  154. }
  155. static void print_branch_misses(FILE *out, int cpu,
  156. struct perf_evsel *evsel __maybe_unused,
  157. double avg)
  158. {
  159. double total, ratio = 0.0;
  160. const char *color;
  161. int ctx = evsel_context(evsel);
  162. total = avg_stats(&runtime_branches_stats[ctx][cpu]);
  163. if (total)
  164. ratio = avg / total * 100.0;
  165. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  166. fprintf(out, " # ");
  167. color_fprintf(out, color, "%6.2f%%", ratio);
  168. fprintf(out, " of all branches ");
  169. }
  170. static void print_l1_dcache_misses(FILE *out, int cpu,
  171. struct perf_evsel *evsel __maybe_unused,
  172. double avg)
  173. {
  174. double total, ratio = 0.0;
  175. const char *color;
  176. int ctx = evsel_context(evsel);
  177. total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
  178. if (total)
  179. ratio = avg / total * 100.0;
  180. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  181. fprintf(out, " # ");
  182. color_fprintf(out, color, "%6.2f%%", ratio);
  183. fprintf(out, " of all L1-dcache hits ");
  184. }
  185. static void print_l1_icache_misses(FILE *out, int cpu,
  186. struct perf_evsel *evsel __maybe_unused,
  187. double avg)
  188. {
  189. double total, ratio = 0.0;
  190. const char *color;
  191. int ctx = evsel_context(evsel);
  192. total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
  193. if (total)
  194. ratio = avg / total * 100.0;
  195. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  196. fprintf(out, " # ");
  197. color_fprintf(out, color, "%6.2f%%", ratio);
  198. fprintf(out, " of all L1-icache hits ");
  199. }
  200. static void print_dtlb_cache_misses(FILE *out, int cpu,
  201. struct perf_evsel *evsel __maybe_unused,
  202. double avg)
  203. {
  204. double total, ratio = 0.0;
  205. const char *color;
  206. int ctx = evsel_context(evsel);
  207. total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
  208. if (total)
  209. ratio = avg / total * 100.0;
  210. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  211. fprintf(out, " # ");
  212. color_fprintf(out, color, "%6.2f%%", ratio);
  213. fprintf(out, " of all dTLB cache hits ");
  214. }
  215. static void print_itlb_cache_misses(FILE *out, int cpu,
  216. struct perf_evsel *evsel __maybe_unused,
  217. double avg)
  218. {
  219. double total, ratio = 0.0;
  220. const char *color;
  221. int ctx = evsel_context(evsel);
  222. total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
  223. if (total)
  224. ratio = avg / total * 100.0;
  225. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  226. fprintf(out, " # ");
  227. color_fprintf(out, color, "%6.2f%%", ratio);
  228. fprintf(out, " of all iTLB cache hits ");
  229. }
  230. static void print_ll_cache_misses(FILE *out, int cpu,
  231. struct perf_evsel *evsel __maybe_unused,
  232. double avg)
  233. {
  234. double total, ratio = 0.0;
  235. const char *color;
  236. int ctx = evsel_context(evsel);
  237. total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
  238. if (total)
  239. ratio = avg / total * 100.0;
  240. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  241. fprintf(out, " # ");
  242. color_fprintf(out, color, "%6.2f%%", ratio);
  243. fprintf(out, " of all LL-cache hits ");
  244. }
  245. void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
  246. double avg, int cpu, enum aggr_mode aggr)
  247. {
  248. double total, ratio = 0.0, total2;
  249. int ctx = evsel_context(evsel);
  250. if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
  251. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  252. if (total) {
  253. ratio = avg / total;
  254. fprintf(out, " # %5.2f insns per cycle ", ratio);
  255. } else {
  256. fprintf(out, " ");
  257. }
  258. total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
  259. total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
  260. if (total && avg) {
  261. ratio = total / avg;
  262. fprintf(out, "\n");
  263. if (aggr == AGGR_NONE)
  264. fprintf(out, " ");
  265. fprintf(out, " # %5.2f stalled cycles per insn", ratio);
  266. }
  267. } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
  268. runtime_branches_stats[ctx][cpu].n != 0) {
  269. print_branch_misses(out, cpu, evsel, avg);
  270. } else if (
  271. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  272. evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
  273. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  274. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  275. runtime_l1_dcache_stats[ctx][cpu].n != 0) {
  276. print_l1_dcache_misses(out, cpu, evsel, avg);
  277. } else if (
  278. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  279. evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
  280. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  281. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  282. runtime_l1_icache_stats[ctx][cpu].n != 0) {
  283. print_l1_icache_misses(out, cpu, evsel, avg);
  284. } else if (
  285. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  286. evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
  287. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  288. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  289. runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
  290. print_dtlb_cache_misses(out, cpu, evsel, avg);
  291. } else if (
  292. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  293. evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
  294. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  295. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  296. runtime_itlb_cache_stats[ctx][cpu].n != 0) {
  297. print_itlb_cache_misses(out, cpu, evsel, avg);
  298. } else if (
  299. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  300. evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
  301. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  302. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
  303. runtime_ll_cache_stats[ctx][cpu].n != 0) {
  304. print_ll_cache_misses(out, cpu, evsel, avg);
  305. } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
  306. runtime_cacherefs_stats[ctx][cpu].n != 0) {
  307. total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
  308. if (total)
  309. ratio = avg * 100 / total;
  310. fprintf(out, " # %8.3f %% of all cache refs ", ratio);
  311. } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
  312. print_stalled_cycles_frontend(out, cpu, evsel, avg);
  313. } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
  314. print_stalled_cycles_backend(out, cpu, evsel, avg);
  315. } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
  316. total = avg_stats(&runtime_nsecs_stats[cpu]);
  317. if (total) {
  318. ratio = avg / total;
  319. fprintf(out, " # %8.3f GHz ", ratio);
  320. } else {
  321. fprintf(out, " ");
  322. }
  323. } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
  324. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  325. if (total)
  326. fprintf(out,
  327. " # %5.2f%% transactional cycles ",
  328. 100.0 * (avg / total));
  329. } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
  330. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  331. total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  332. if (total2 < avg)
  333. total2 = avg;
  334. if (total)
  335. fprintf(out,
  336. " # %5.2f%% aborted cycles ",
  337. 100.0 * ((total2-avg) / total));
  338. } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
  339. runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
  340. total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  341. if (avg)
  342. ratio = total / avg;
  343. fprintf(out, " # %8.0f cycles / transaction ", ratio);
  344. } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
  345. runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
  346. total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  347. if (avg)
  348. ratio = total / avg;
  349. fprintf(out, " # %8.0f cycles / elision ", ratio);
  350. } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
  351. if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
  352. fprintf(out, " # %8.3f CPUs utilized ", avg / ratio);
  353. else
  354. fprintf(out, " ");
  355. } else if (runtime_nsecs_stats[cpu].n != 0) {
  356. char unit = 'M';
  357. total = avg_stats(&runtime_nsecs_stats[cpu]);
  358. if (total)
  359. ratio = 1000.0 * avg / total;
  360. if (ratio < 0.001) {
  361. ratio *= 1000;
  362. unit = 'K';
  363. }
  364. fprintf(out, " # %8.3f %c/sec ", ratio, unit);
  365. } else {
  366. fprintf(out, " ");
  367. }
  368. }