astcanary.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /*
  2. * Asterisk -- An open source telephony toolkit.
  3. *
  4. * Copyright (C) 2007, Digium, Inc.
  5. *
  6. * Tilghman Lesher <tlesher AT digium DOT com>
  7. *
  8. * See http://www.asterisk.org for more information about
  9. * the Asterisk project. Please do not directly contact
  10. * any of the maintainers of this project for assistance;
  11. * the project provides a web site, mailing lists and IRC
  12. * channels for your use.
  13. *
  14. * This program is free software, distributed under the terms of
  15. * the GNU General Public License Version 2. See the LICENSE file
  16. * at the top of the source tree.
  17. */
  18. /*** MODULEINFO
  19. <support_level>core</support_level>
  20. ***/
  21. #include <sys/types.h>
  22. #include <sys/stat.h>
  23. #include <sys/time.h>
  24. #include <sys/resource.h>
  25. #include <utime.h>
  26. #include <fcntl.h>
  27. #include <unistd.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include <stdio.h>
  31. /*!\brief
  32. * At one time, canaries were carried along with coal miners down
  33. * into a mine. Their purpose was to alert the miners when they
  34. * had drilled into a pocket of methane gas or another noxious
  35. * substance. The canary, being the most sensitive animal, would
  36. * immediately fall over. Seeing this, the miners could take
  37. * action to escape the mine, seeing an imminent danger.
  38. *
  39. * This process serves a similar purpose, though with the realtime
  40. * priority being the reason. When a thread starts running away
  41. * with the processor, it is typically difficult to tell what
  42. * thread caused the problem, as the machine acts as if it is
  43. * locked up (in fact, what has happened is that Asterisk runs at
  44. * a higher priority than even the login shell, so the runaway
  45. * thread hogs all available CPU time.
  46. *
  47. * If that happens, this canary process will cease to get any
  48. * process time, which we can monitor with a realtime thread in
  49. * Asterisk. Should that happen, that monitoring thread may take
  50. * immediate action to slow down Asterisk to regular priority,
  51. * thus allowing an administrator to login to the system and
  52. * restart Asterisk or perhaps take another course of action
  53. * (such as retrieving a backtrace to let the developers know
  54. * what precisely went wrong).
  55. *
  56. * Note that according to POSIX.1, all threads inside a single
  57. * process must share the same priority, so when the monitoring
  58. * thread deprioritizes itself, it deprioritizes all threads at
  59. * the same time. This is also why this canary must exist as a
  60. * completely separate process and not simply as a thread within
  61. * Asterisk itself.
  62. *
  63. * Quote:
  64. * "The nice value set with setpriority() shall be applied to the
  65. * process. If the process is multi-threaded, the nice value shall
  66. * affect all system scope threads in the process."
  67. *
  68. * Source:
  69. * http://www.opengroup.org/onlinepubs/000095399/functions/setpriority.html
  70. *
  71. * In answer to the question, what aren't system scope threads, the
  72. * answer is, in Asterisk, nothing. Process scope threads are the
  73. * alternative, but they aren't supported in Linux.
  74. */
  75. static const char explanation[] =
  76. "This file is created when Asterisk is run with a realtime priority (-p). It\n"
  77. "must continue to exist, and the astcanary process must be allowed to continue\n"
  78. "running, or else the Asterisk process will, within a short period of time,\n"
  79. "slow itself down to regular priority.\n\n"
  80. "The technical explanation for this file is to provide an assurance to Asterisk\n"
  81. "that there are no threads that have gone into runaway mode, thus hogging the\n"
  82. "CPU, and making the Asterisk machine seem to be unresponsive. When that\n"
  83. "happens, the astcanary process will be unable to update the timestamp on this\n"
  84. "file, and Asterisk will notice within 120 seconds and react. Slowing the\n"
  85. "Asterisk process down to regular priority will permit an administrator to\n"
  86. "intervene, thus avoiding a need to reboot the entire machine.\n";
  87. int main(int argc, char *argv[])
  88. {
  89. int fd;
  90. pid_t parent;
  91. if (argc < 3) {
  92. fprintf(stderr, "Usage: %s <monitor-filename> <ppid>\n", argv[0]);
  93. exit(1);
  94. }
  95. /* Run at normal priority */
  96. setpriority(PRIO_PROCESS, 0, 0);
  97. /*!\note
  98. * See http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap03.html#tag_03_265
  99. * for a justification of this approach. The PPID after the creator dies in Linux and
  100. * most other Unix-like systems will be 1, but this is not strictly the case. The POSIX
  101. * specification allows it to be an implementation-defined system process. However, it
  102. * most certainly will not be the original parent PID, which makes the following code
  103. * POSIX-compliant.
  104. */
  105. for (parent = atoi(argv[2]); parent == getppid() ;) {
  106. /* Update the modification times (checked from Asterisk) */
  107. if (utime(argv[1], NULL)) {
  108. /* Recreate the file if it doesn't exist */
  109. if ((fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT, 0777)) > -1) {
  110. if (write(fd, explanation, strlen(explanation)) < 0) {
  111. exit(1);
  112. }
  113. close(fd);
  114. } else {
  115. exit(1);
  116. }
  117. continue;
  118. }
  119. /* Run occasionally */
  120. sleep(5);
  121. }
  122. /* Exit when the parent dies */
  123. return 0;
  124. }