trace-pagealloc-postprocess.pl 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. #!/usr/bin/perl
  2. # This is a POC (proof of concept or piece of crap, take your pick) for reading the
  3. # text representation of trace output related to page allocation. It makes an attempt
  4. # to extract some high-level information on what is going on. The accuracy of the parser
  5. # may vary considerably
  6. #
  7. # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
  8. # other options
  9. # --prepend-parent Report on the parent proc and PID
  10. # --read-procstat If the trace lacks process info, get it from /proc
  11. # --ignore-pid Aggregate processes of the same name together
  12. #
  13. # Copyright (c) IBM Corporation 2009
  14. # Author: Mel Gorman <mel@csn.ul.ie>
  15. use strict;
  16. use Getopt::Long;
  17. # Tracepoint events
  18. use constant MM_PAGE_ALLOC => 1;
  19. use constant MM_PAGE_FREE => 2;
  20. use constant MM_PAGE_FREE_BATCHED => 3;
  21. use constant MM_PAGE_PCPU_DRAIN => 4;
  22. use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
  23. use constant MM_PAGE_ALLOC_EXTFRAG => 6;
  24. use constant EVENT_UNKNOWN => 7;
  25. # Constants used to track state
  26. use constant STATE_PCPU_PAGES_DRAINED => 8;
  27. use constant STATE_PCPU_PAGES_REFILLED => 9;
  28. # High-level events extrapolated from tracepoints
  29. use constant HIGH_PCPU_DRAINS => 10;
  30. use constant HIGH_PCPU_REFILLS => 11;
  31. use constant HIGH_EXT_FRAGMENT => 12;
  32. use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
  33. use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
  34. use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
  35. my %perprocesspid;
  36. my %perprocess;
  37. my $opt_ignorepid;
  38. my $opt_read_procstat;
  39. my $opt_prepend_parent;
  40. # Catch sigint and exit on request
  41. my $sigint_report = 0;
  42. my $sigint_exit = 0;
  43. my $sigint_pending = 0;
  44. my $sigint_received = 0;
  45. sub sigint_handler {
  46. my $current_time = time;
  47. if ($current_time - 2 > $sigint_received) {
  48. print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
  49. $sigint_report = 1;
  50. } else {
  51. if (!$sigint_exit) {
  52. print "Second SIGINT received quickly, exiting\n";
  53. }
  54. $sigint_exit++;
  55. }
  56. if ($sigint_exit > 3) {
  57. print "Many SIGINTs received, exiting now without report\n";
  58. exit;
  59. }
  60. $sigint_received = $current_time;
  61. $sigint_pending = 1;
  62. }
  63. $SIG{INT} = "sigint_handler";
  64. # Parse command line options
  65. GetOptions(
  66. 'ignore-pid' => \$opt_ignorepid,
  67. 'read-procstat' => \$opt_read_procstat,
  68. 'prepend-parent' => \$opt_prepend_parent,
  69. );
  70. # Defaults for dynamically discovered regex's
  71. my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
  72. # Dyanically discovered regex
  73. my $regex_fragdetails;
  74. # Static regex used. Specified like this for readability and for use with /o
  75. # (process_pid) (cpus ) ( time ) (tpoint ) (details)
  76. my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
  77. my $regex_statname = '[-0-9]*\s\((.*)\).*';
  78. my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
  79. sub generate_traceevent_regex {
  80. my $event = shift;
  81. my $default = shift;
  82. my $regex;
  83. # Read the event format or use the default
  84. if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
  85. $regex = $default;
  86. } else {
  87. my $line;
  88. while (!eof(FORMAT)) {
  89. $line = <FORMAT>;
  90. if ($line =~ /^print fmt:\s"(.*)",.*/) {
  91. $regex = $1;
  92. $regex =~ s/%p/\([0-9a-f]*\)/g;
  93. $regex =~ s/%d/\([-0-9]*\)/g;
  94. $regex =~ s/%lu/\([0-9]*\)/g;
  95. }
  96. }
  97. }
  98. # Verify fields are in the right order
  99. my $tuple;
  100. foreach $tuple (split /\s/, $regex) {
  101. my ($key, $value) = split(/=/, $tuple);
  102. my $expected = shift;
  103. if ($key ne $expected) {
  104. print("WARNING: Format not as expected '$key' != '$expected'");
  105. $regex =~ s/$key=\((.*)\)/$key=$1/;
  106. }
  107. }
  108. if (defined shift) {
  109. die("Fewer fields than expected in format");
  110. }
  111. return $regex;
  112. }
  113. $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
  114. $regex_fragdetails_default,
  115. "page", "pfn",
  116. "alloc_order", "fallback_order", "pageblock_order",
  117. "alloc_migratetype", "fallback_migratetype",
  118. "fragmenting", "change_ownership");
  119. sub read_statline($) {
  120. my $pid = $_[0];
  121. my $statline;
  122. if (open(STAT, "/proc/$pid/stat")) {
  123. $statline = <STAT>;
  124. close(STAT);
  125. }
  126. if ($statline eq '') {
  127. $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
  128. }
  129. return $statline;
  130. }
  131. sub guess_process_pid($$) {
  132. my $pid = $_[0];
  133. my $statline = $_[1];
  134. if ($pid == 0) {
  135. return "swapper-0";
  136. }
  137. if ($statline !~ /$regex_statname/o) {
  138. die("Failed to math stat line for process name :: $statline");
  139. }
  140. return "$1-$pid";
  141. }
  142. sub parent_info($$) {
  143. my $pid = $_[0];
  144. my $statline = $_[1];
  145. my $ppid;
  146. if ($pid == 0) {
  147. return "NOPARENT-0";
  148. }
  149. if ($statline !~ /$regex_statppid/o) {
  150. die("Failed to match stat line process ppid:: $statline");
  151. }
  152. # Read the ppid stat line
  153. $ppid = $1;
  154. return guess_process_pid($ppid, read_statline($ppid));
  155. }
  156. sub process_events {
  157. my $traceevent;
  158. my $process_pid;
  159. my $cpus;
  160. my $timestamp;
  161. my $tracepoint;
  162. my $details;
  163. my $statline;
  164. # Read each line of the event log
  165. EVENT_PROCESS:
  166. while ($traceevent = <STDIN>) {
  167. if ($traceevent =~ /$regex_traceevent/o) {
  168. $process_pid = $1;
  169. $tracepoint = $4;
  170. if ($opt_read_procstat || $opt_prepend_parent) {
  171. $process_pid =~ /(.*)-([0-9]*)$/;
  172. my $process = $1;
  173. my $pid = $2;
  174. $statline = read_statline($pid);
  175. if ($opt_read_procstat && $process eq '') {
  176. $process_pid = guess_process_pid($pid, $statline);
  177. }
  178. if ($opt_prepend_parent) {
  179. $process_pid = parent_info($pid, $statline) . " :: $process_pid";
  180. }
  181. }
  182. # Unnecessary in this script. Uncomment if required
  183. # $cpus = $2;
  184. # $timestamp = $3;
  185. } else {
  186. next;
  187. }
  188. # Perl Switch() sucks majorly
  189. if ($tracepoint eq "mm_page_alloc") {
  190. $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
  191. } elsif ($tracepoint eq "mm_page_free") {
  192. $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
  193. } elsif ($tracepoint eq "mm_page_free_batched") {
  194. $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
  195. } elsif ($tracepoint eq "mm_page_pcpu_drain") {
  196. $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
  197. $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
  198. } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
  199. $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
  200. $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
  201. } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
  202. # Extract the details of the event now
  203. $details = $5;
  204. my ($page, $pfn);
  205. my ($alloc_order, $fallback_order, $pageblock_order);
  206. my ($alloc_migratetype, $fallback_migratetype);
  207. my ($fragmenting, $change_ownership);
  208. if ($details !~ /$regex_fragdetails/o) {
  209. print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
  210. next;
  211. }
  212. $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
  213. $page = $1;
  214. $pfn = $2;
  215. $alloc_order = $3;
  216. $fallback_order = $4;
  217. $pageblock_order = $5;
  218. $alloc_migratetype = $6;
  219. $fallback_migratetype = $7;
  220. $fragmenting = $8;
  221. $change_ownership = $9;
  222. if ($fragmenting) {
  223. $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
  224. if ($fallback_order <= 3) {
  225. $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
  226. } else {
  227. $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
  228. }
  229. }
  230. if ($change_ownership) {
  231. $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
  232. }
  233. } else {
  234. $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
  235. }
  236. # Catch a full pcpu drain event
  237. if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
  238. $tracepoint ne "mm_page_pcpu_drain") {
  239. $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
  240. $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
  241. }
  242. # Catch a full pcpu refill event
  243. if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
  244. $tracepoint ne "mm_page_alloc_zone_locked") {
  245. $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
  246. $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
  247. }
  248. if ($sigint_pending) {
  249. last EVENT_PROCESS;
  250. }
  251. }
  252. }
  253. sub dump_stats {
  254. my $hashref = shift;
  255. my %stats = %$hashref;
  256. # Dump per-process stats
  257. my $process_pid;
  258. my $max_strlen = 0;
  259. # Get the maximum process name
  260. foreach $process_pid (keys %perprocesspid) {
  261. my $len = length($process_pid);
  262. if ($len > $max_strlen) {
  263. $max_strlen = $len;
  264. }
  265. }
  266. $max_strlen += 2;
  267. printf("\n");
  268. printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
  269. "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
  270. printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
  271. "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
  272. printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
  273. "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
  274. foreach $process_pid (keys %stats) {
  275. # Dump final aggregates
  276. if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
  277. $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
  278. $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
  279. }
  280. if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
  281. $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
  282. $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
  283. }
  284. printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
  285. $process_pid,
  286. $stats{$process_pid}->{MM_PAGE_ALLOC},
  287. $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
  288. $stats{$process_pid}->{MM_PAGE_FREE},
  289. $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
  290. $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
  291. $stats{$process_pid}->{HIGH_PCPU_DRAINS},
  292. $stats{$process_pid}->{HIGH_PCPU_REFILLS},
  293. $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
  294. $stats{$process_pid}->{HIGH_EXT_FRAG},
  295. $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
  296. $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
  297. $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
  298. $stats{$process_pid}->{EVENT_UNKNOWN});
  299. }
  300. }
  301. sub aggregate_perprocesspid() {
  302. my $process_pid;
  303. my $process;
  304. undef %perprocess;
  305. foreach $process_pid (keys %perprocesspid) {
  306. $process = $process_pid;
  307. $process =~ s/-([0-9])*$//;
  308. if ($process eq '') {
  309. $process = "NO_PROCESS_NAME";
  310. }
  311. $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
  312. $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
  313. $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
  314. $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
  315. $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
  316. $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
  317. $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
  318. $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
  319. $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
  320. $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
  321. $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
  322. $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
  323. $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
  324. }
  325. }
  326. sub report() {
  327. if (!$opt_ignorepid) {
  328. dump_stats(\%perprocesspid);
  329. } else {
  330. aggregate_perprocesspid();
  331. dump_stats(\%perprocess);
  332. }
  333. }
  334. # Process events or signals until neither is available
  335. sub signal_loop() {
  336. my $sigint_processed;
  337. do {
  338. $sigint_processed = 0;
  339. process_events();
  340. # Handle pending signals if any
  341. if ($sigint_pending) {
  342. my $current_time = time;
  343. if ($sigint_exit) {
  344. print "Received exit signal\n";
  345. $sigint_pending = 0;
  346. }
  347. if ($sigint_report) {
  348. if ($current_time >= $sigint_received + 2) {
  349. report();
  350. $sigint_report = 0;
  351. $sigint_pending = 0;
  352. $sigint_processed = 1;
  353. }
  354. }
  355. }
  356. } while ($sigint_pending || $sigint_processed);
  357. }
  358. signal_loop();
  359. report();