timer_stats.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /*
  2. * kernel/time/timer_stats.c
  3. *
  4. * Collect timer usage statistics.
  5. *
  6. * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
  7. * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  8. *
  9. * timer_stats is based on timer_top, a similar functionality which was part of
  10. * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
  11. * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
  12. * on dynamic allocation of the statistics entries and linear search based
  13. * lookup combined with a global lock, rather than the static array, hash
  14. * and per-CPU locking which is used by timer_stats. It was written for the
  15. * pre hrtimer kernel code and therefore did not take hrtimers into account.
  16. * Nevertheless it provided the base for the timer_stats implementation and
  17. * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
  18. * for this effort.
  19. *
  20. * timer_top.c is
  21. * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
  22. * Written by Daniel Petrini <d.pensator@gmail.com>
  23. * timer_top.c was released under the GNU General Public License version 2
  24. *
  25. * We export the addresses and counting of timer functions being called,
  26. * the pid and cmdline from the owner process if applicable.
  27. *
  28. * Start/stop data collection:
  29. * # echo [1|0] >/proc/timer_stats
  30. *
  31. * Display the information collected so far:
  32. * # cat /proc/timer_stats
  33. *
  34. * This program is free software; you can redistribute it and/or modify
  35. * it under the terms of the GNU General Public License version 2 as
  36. * published by the Free Software Foundation.
  37. */
  38. #include <linux/proc_fs.h>
  39. #include <linux/module.h>
  40. #include <linux/spinlock.h>
  41. #include <linux/sched.h>
  42. #include <linux/seq_file.h>
  43. #include <linux/kallsyms.h>
  44. #include <asm/uaccess.h>
  45. /*
  46. * This is our basic unit of interest: a timer expiry event identified
  47. * by the timer, its start/expire functions and the PID of the task that
  48. * started the timer. We count the number of times an event happens:
  49. */
  50. struct entry {
  51. /*
  52. * Hash list:
  53. */
  54. struct entry *next;
  55. /*
  56. * Hash keys:
  57. */
  58. void *timer;
  59. void *start_func;
  60. void *expire_func;
  61. pid_t pid;
  62. /*
  63. * Number of timeout events:
  64. */
  65. unsigned long count;
  66. u32 flags;
  67. /*
  68. * We save the command-line string to preserve
  69. * this information past task exit:
  70. */
  71. char comm[TASK_COMM_LEN + 1];
  72. } ____cacheline_aligned_in_smp;
  73. /*
  74. * Spinlock protecting the tables - not taken during lookup:
  75. */
  76. static DEFINE_RAW_SPINLOCK(table_lock);
  77. /*
  78. * Per-CPU lookup locks for fast hash lookup:
  79. */
  80. static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
  81. /*
  82. * Mutex to serialize state changes with show-stats activities:
  83. */
  84. static DEFINE_MUTEX(show_mutex);
  85. /*
  86. * Collection status, active/inactive:
  87. */
  88. int __read_mostly timer_stats_active;
  89. /*
  90. * Beginning/end timestamps of measurement:
  91. */
  92. static ktime_t time_start, time_stop;
  93. /*
  94. * tstat entry structs only get allocated while collection is
  95. * active and never freed during that time - this simplifies
  96. * things quite a bit.
  97. *
  98. * They get freed when a new collection period is started.
  99. */
  100. #define MAX_ENTRIES_BITS 10
  101. #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
  102. static unsigned long nr_entries;
  103. static struct entry entries[MAX_ENTRIES];
  104. static atomic_t overflow_count;
  105. /*
  106. * The entries are in a hash-table, for fast lookup:
  107. */
  108. #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
  109. #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
  110. #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
  111. #define __tstat_hashfn(entry) \
  112. (((unsigned long)(entry)->timer ^ \
  113. (unsigned long)(entry)->start_func ^ \
  114. (unsigned long)(entry)->expire_func ^ \
  115. (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
  116. #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
  117. static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
  118. static void reset_entries(void)
  119. {
  120. nr_entries = 0;
  121. memset(entries, 0, sizeof(entries));
  122. memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
  123. atomic_set(&overflow_count, 0);
  124. }
  125. static struct entry *alloc_entry(void)
  126. {
  127. if (nr_entries >= MAX_ENTRIES)
  128. return NULL;
  129. return entries + nr_entries++;
  130. }
  131. static int match_entries(struct entry *entry1, struct entry *entry2)
  132. {
  133. return entry1->timer == entry2->timer &&
  134. entry1->start_func == entry2->start_func &&
  135. entry1->expire_func == entry2->expire_func &&
  136. entry1->pid == entry2->pid;
  137. }
  138. /*
  139. * Look up whether an entry matching this item is present
  140. * in the hash already. Must be called with irqs off and the
  141. * lookup lock held:
  142. */
  143. static struct entry *tstat_lookup(struct entry *entry, char *comm)
  144. {
  145. struct entry **head, *curr, *prev;
  146. head = tstat_hashentry(entry);
  147. curr = *head;
  148. /*
  149. * The fastpath is when the entry is already hashed,
  150. * we do this with the lookup lock held, but with the
  151. * table lock not held:
  152. */
  153. while (curr) {
  154. if (match_entries(curr, entry))
  155. return curr;
  156. curr = curr->next;
  157. }
  158. /*
  159. * Slowpath: allocate, set up and link a new hash entry:
  160. */
  161. prev = NULL;
  162. curr = *head;
  163. raw_spin_lock(&table_lock);
  164. /*
  165. * Make sure we have not raced with another CPU:
  166. */
  167. while (curr) {
  168. if (match_entries(curr, entry))
  169. goto out_unlock;
  170. prev = curr;
  171. curr = curr->next;
  172. }
  173. curr = alloc_entry();
  174. if (curr) {
  175. *curr = *entry;
  176. curr->count = 0;
  177. curr->next = NULL;
  178. memcpy(curr->comm, comm, TASK_COMM_LEN);
  179. smp_mb(); /* Ensure that curr is initialized before insert */
  180. if (prev)
  181. prev->next = curr;
  182. else
  183. *head = curr;
  184. }
  185. out_unlock:
  186. raw_spin_unlock(&table_lock);
  187. return curr;
  188. }
  189. /**
  190. * timer_stats_update_stats - Update the statistics for a timer.
  191. * @timer: pointer to either a timer_list or a hrtimer
  192. * @pid: the pid of the task which set up the timer
  193. * @startf: pointer to the function which did the timer setup
  194. * @timerf: pointer to the timer callback function of the timer
  195. * @comm: name of the process which set up the timer
  196. * @tflags: The flags field of the timer
  197. *
  198. * When the timer is already registered, then the event counter is
  199. * incremented. Otherwise the timer is registered in a free slot.
  200. */
  201. void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
  202. void *timerf, char *comm, u32 tflags)
  203. {
  204. /*
  205. * It doesn't matter which lock we take:
  206. */
  207. raw_spinlock_t *lock;
  208. struct entry *entry, input;
  209. unsigned long flags;
  210. if (likely(!timer_stats_active))
  211. return;
  212. lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
  213. input.timer = timer;
  214. input.start_func = startf;
  215. input.expire_func = timerf;
  216. input.pid = pid;
  217. input.flags = tflags;
  218. raw_spin_lock_irqsave(lock, flags);
  219. if (!timer_stats_active)
  220. goto out_unlock;
  221. entry = tstat_lookup(&input, comm);
  222. if (likely(entry))
  223. entry->count++;
  224. else
  225. atomic_inc(&overflow_count);
  226. out_unlock:
  227. raw_spin_unlock_irqrestore(lock, flags);
  228. }
  229. static void print_name_offset(struct seq_file *m, unsigned long addr)
  230. {
  231. char symname[KSYM_NAME_LEN];
  232. if (lookup_symbol_name(addr, symname) < 0)
  233. seq_printf(m, "<%p>", (void *)addr);
  234. else
  235. seq_printf(m, "%s", symname);
  236. }
  237. static int tstats_show(struct seq_file *m, void *v)
  238. {
  239. struct timespec period;
  240. struct entry *entry;
  241. unsigned long ms;
  242. long events = 0;
  243. ktime_t time;
  244. int i;
  245. mutex_lock(&show_mutex);
  246. /*
  247. * If still active then calculate up to now:
  248. */
  249. if (timer_stats_active)
  250. time_stop = ktime_get();
  251. time = ktime_sub(time_stop, time_start);
  252. period = ktime_to_timespec(time);
  253. ms = period.tv_nsec / 1000000;
  254. seq_puts(m, "Timer Stats Version: v0.3\n");
  255. seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
  256. if (atomic_read(&overflow_count))
  257. seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
  258. seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
  259. for (i = 0; i < nr_entries; i++) {
  260. entry = entries + i;
  261. if (entry->flags & TIMER_DEFERRABLE) {
  262. seq_printf(m, "%4luD, %5d %-16s ",
  263. entry->count, entry->pid, entry->comm);
  264. } else {
  265. seq_printf(m, " %4lu, %5d %-16s ",
  266. entry->count, entry->pid, entry->comm);
  267. }
  268. print_name_offset(m, (unsigned long)entry->start_func);
  269. seq_puts(m, " (");
  270. print_name_offset(m, (unsigned long)entry->expire_func);
  271. seq_puts(m, ")\n");
  272. events += entry->count;
  273. }
  274. ms += period.tv_sec * 1000;
  275. if (!ms)
  276. ms = 1;
  277. if (events && period.tv_sec)
  278. seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
  279. events, events * 1000 / ms,
  280. (events * 1000000 / ms) % 1000);
  281. else
  282. seq_printf(m, "%ld total events\n", events);
  283. mutex_unlock(&show_mutex);
  284. return 0;
  285. }
  286. /*
  287. * After a state change, make sure all concurrent lookup/update
  288. * activities have stopped:
  289. */
  290. static void sync_access(void)
  291. {
  292. unsigned long flags;
  293. int cpu;
  294. for_each_online_cpu(cpu) {
  295. raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
  296. raw_spin_lock_irqsave(lock, flags);
  297. /* nothing */
  298. raw_spin_unlock_irqrestore(lock, flags);
  299. }
  300. }
  301. static ssize_t tstats_write(struct file *file, const char __user *buf,
  302. size_t count, loff_t *offs)
  303. {
  304. char ctl[2];
  305. if (count != 2 || *offs)
  306. return -EINVAL;
  307. if (copy_from_user(ctl, buf, count))
  308. return -EFAULT;
  309. mutex_lock(&show_mutex);
  310. switch (ctl[0]) {
  311. case '0':
  312. if (timer_stats_active) {
  313. timer_stats_active = 0;
  314. time_stop = ktime_get();
  315. sync_access();
  316. }
  317. break;
  318. case '1':
  319. if (!timer_stats_active) {
  320. reset_entries();
  321. time_start = ktime_get();
  322. smp_mb();
  323. timer_stats_active = 1;
  324. }
  325. break;
  326. default:
  327. count = -EINVAL;
  328. }
  329. mutex_unlock(&show_mutex);
  330. return count;
  331. }
  332. static int tstats_open(struct inode *inode, struct file *filp)
  333. {
  334. return single_open(filp, tstats_show, NULL);
  335. }
  336. static const struct file_operations tstats_fops = {
  337. .open = tstats_open,
  338. .read = seq_read,
  339. .write = tstats_write,
  340. .llseek = seq_lseek,
  341. .release = single_release,
  342. };
  343. void __init init_timer_stats(void)
  344. {
  345. int cpu;
  346. for_each_possible_cpu(cpu)
  347. raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
  348. }
  349. static int __init init_tstats_procfs(void)
  350. {
  351. struct proc_dir_entry *pe;
  352. pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
  353. if (!pe)
  354. return -ENOMEM;
  355. return 0;
  356. }
  357. __initcall(init_tstats_procfs);