res_corosync.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. /*
  2. * Asterisk -- An open source telephony toolkit.
  3. *
  4. * Copyright (C) 2007, Digium, Inc.
  5. * Copyright (C) 2012, Russell Bryant
  6. *
  7. * Russell Bryant <russell@russellbryant.net>
  8. *
  9. * See http://www.asterisk.org for more information about
  10. * the Asterisk project. Please do not directly contact
  11. * any of the maintainers of this project for assistance;
  12. * the project provides a web site, mailing lists and IRC
  13. * channels for your use.
  14. *
  15. * This program is free software, distributed under the terms of
  16. * the GNU General Public License Version 2. See the LICENSE file
  17. * at the top of the source tree.
  18. */
  19. /*!
  20. * \file
  21. * \author Russell Bryant <russell@russellbryant.net>
  22. *
  23. * This module is based on and replaces the previous res_ais module.
  24. */
  25. /*** MODULEINFO
  26. <depend>corosync</depend>
  27. <defaultenabled>no</defaultenabled>
  28. <support_level>extended</support_level>
  29. ***/
  30. #include "asterisk.h"
  31. ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
  32. #include <corosync/cpg.h>
  33. #include <corosync/cfg.h>
  34. #include "asterisk/module.h"
  35. #include "asterisk/logger.h"
  36. #include "asterisk/poll-compat.h"
  37. #include "asterisk/config.h"
  38. #include "asterisk/event.h"
  39. #include "asterisk/cli.h"
  40. #include "asterisk/devicestate.h"
  41. #include "asterisk/app.h"
  42. #include "asterisk/stasis.h"
  43. #include "asterisk/stasis_message_router.h"
  44. #include "asterisk/stasis_system.h"
  45. AST_RWLOCK_DEFINE_STATIC(event_types_lock);
  46. static void publish_mwi_to_stasis(struct ast_event *event);
  47. static void publish_device_state_to_stasis(struct ast_event *event);
  48. static void publish_cluster_discovery_to_stasis(struct ast_event *event);
  49. /*! \brief All the nodes that we're aware of */
  50. static struct ao2_container *nodes;
  51. /*! \brief The internal topic used for message forwarding and pings */
  52. static struct stasis_topic *corosync_aggregate_topic;
  53. /*! \brief Our \ref stasis message router */
  54. static struct stasis_message_router *stasis_router;
  55. /*! \brief Internal accessor for our topic */
  56. static struct stasis_topic *corosync_topic(void)
  57. {
  58. return corosync_aggregate_topic;
  59. }
  60. struct corosync_node {
  61. /*! The corosync ID */
  62. int id;
  63. /*! The Asterisk EID */
  64. struct ast_eid eid;
  65. /*! The IP address of the node */
  66. struct ast_sockaddr addr;
  67. };
  68. /*! \brief Corosync ipc dispatch/request and reply size */
  69. #define COROSYNC_IPC_BUFFER_SIZE (8192 * 128)
  70. /*! \brief Version of pthread_create to ensure stack is large enough */
  71. #define corosync_pthread_create_background(a, b, c, d) \
  72. ast_pthread_create_stack(a, b, c, d, \
  73. (AST_BACKGROUND_STACKSIZE + (3 * COROSYNC_IPC_BUFFER_SIZE)), \
  74. __FILE__, __FUNCTION__, __LINE__, #c)
  75. static struct corosync_node *corosync_node_alloc(struct ast_event *event)
  76. {
  77. struct corosync_node *node;
  78. node = ao2_alloc_options(sizeof(*node), NULL, AO2_ALLOC_OPT_LOCK_NOLOCK);
  79. if (!node) {
  80. return NULL;
  81. }
  82. memcpy(&node->eid, (struct ast_eid *)ast_event_get_ie_raw(event, AST_EVENT_IE_EID), sizeof(node->eid));
  83. node->id = ast_event_get_ie_uint(event, AST_EVENT_IE_NODE_ID);
  84. ast_sockaddr_parse(&node->addr, ast_event_get_ie_str(event, AST_EVENT_IE_LOCAL_ADDR), PARSE_PORT_IGNORE);
  85. return node;
  86. }
  87. static int corosync_node_hash_fn(const void *obj, const int flags)
  88. {
  89. const struct corosync_node *node;
  90. const int *id;
  91. switch (flags & OBJ_SEARCH_MASK) {
  92. case OBJ_SEARCH_KEY:
  93. id = obj;
  94. break;
  95. case OBJ_SEARCH_OBJECT:
  96. node = obj;
  97. id = &node->id;
  98. break;
  99. default:
  100. ast_assert(0);
  101. return 0;
  102. }
  103. return *id;
  104. }
  105. static int corosync_node_cmp_fn(void *obj, void *arg, int flags)
  106. {
  107. struct corosync_node *left = obj;
  108. struct corosync_node *right = arg;
  109. const int *id = arg;
  110. int cmp;
  111. switch (flags & OBJ_SEARCH_MASK) {
  112. case OBJ_SEARCH_OBJECT:
  113. id = &right->id;
  114. /* Fall through */
  115. case OBJ_SEARCH_KEY:
  116. cmp = (left->id == *id);
  117. break;
  118. case OBJ_SEARCH_PARTIAL_KEY:
  119. cmp = (left->id == right->id);
  120. break;
  121. default:
  122. /* Sort can only work on something with a full or partial key. */
  123. ast_assert(0);
  124. cmp = 1;
  125. break;
  126. }
  127. return cmp ? CMP_MATCH : 0;
  128. }
  129. /*! \brief A payload wrapper around a corosync ping event */
  130. struct corosync_ping_payload {
  131. /*! The corosync ping event being passed over \ref stasis */
  132. struct ast_event *event;
  133. };
  134. /*! \brief Destructor for the \ref corosync_ping_payload wrapper object */
  135. static void corosync_ping_payload_dtor(void *obj)
  136. {
  137. struct corosync_ping_payload *payload = obj;
  138. ast_free(payload->event);
  139. }
  140. /*! \brief Convert a Corosync PING to a \ref ast_event */
  141. static struct ast_event *corosync_ping_to_event(struct stasis_message *message)
  142. {
  143. struct corosync_ping_payload *payload;
  144. struct ast_event *event;
  145. struct ast_eid *event_eid;
  146. if (!message) {
  147. return NULL;
  148. }
  149. payload = stasis_message_data(message);
  150. if (!payload->event) {
  151. return NULL;
  152. }
  153. event_eid = (struct ast_eid *)ast_event_get_ie_raw(payload->event, AST_EVENT_IE_EID);
  154. event = ast_event_new(AST_EVENT_PING,
  155. AST_EVENT_IE_EID, AST_EVENT_IE_PLTYPE_RAW, event_eid, sizeof(*event_eid),
  156. AST_EVENT_IE_END);
  157. return event;
  158. }
  159. STASIS_MESSAGE_TYPE_DEFN_LOCAL(corosync_ping_message_type,
  160. .to_event = corosync_ping_to_event, );
  161. /*! \brief Publish a Corosync ping to \ref stasis */
  162. static void publish_corosync_ping_to_stasis(struct ast_event *event)
  163. {
  164. struct corosync_ping_payload *payload;
  165. struct stasis_message *message;
  166. ast_assert(ast_event_get_type(event) == AST_EVENT_PING);
  167. ast_assert(event != NULL);
  168. if (!corosync_ping_message_type()) {
  169. return;
  170. }
  171. payload = ao2_t_alloc(sizeof(*payload), corosync_ping_payload_dtor, "Create ping payload");
  172. if (!payload) {
  173. return;
  174. }
  175. payload->event = event;
  176. message = stasis_message_create(corosync_ping_message_type(), payload);
  177. if (!message) {
  178. ao2_t_ref(payload, -1, "Destroy payload on off nominal");
  179. return;
  180. }
  181. stasis_publish(corosync_topic(), message);
  182. ao2_t_ref(payload, -1, "Hand ref to stasis");
  183. ao2_t_ref(message, -1, "Hand ref to stasis");
  184. }
  185. static struct {
  186. const char *name;
  187. struct stasis_forward *sub;
  188. unsigned char publish;
  189. unsigned char publish_default;
  190. unsigned char subscribe;
  191. unsigned char subscribe_default;
  192. struct stasis_topic *(* topic_fn)(void);
  193. struct stasis_cache *(* cache_fn)(void);
  194. struct stasis_message_type *(* message_type_fn)(void);
  195. void (* publish_to_stasis)(struct ast_event *);
  196. } event_types[] = {
  197. [AST_EVENT_MWI] = { .name = "mwi",
  198. .topic_fn = ast_mwi_topic_all,
  199. .cache_fn = ast_mwi_state_cache,
  200. .message_type_fn = ast_mwi_state_type,
  201. .publish_to_stasis = publish_mwi_to_stasis, },
  202. [AST_EVENT_DEVICE_STATE_CHANGE] = { .name = "device_state",
  203. .topic_fn = ast_device_state_topic_all,
  204. .cache_fn = ast_device_state_cache,
  205. .message_type_fn = ast_device_state_message_type,
  206. .publish_to_stasis = publish_device_state_to_stasis, },
  207. [AST_EVENT_PING] = { .name = "ping",
  208. .publish_default = 1,
  209. .subscribe_default = 1,
  210. .topic_fn = corosync_topic,
  211. .message_type_fn = corosync_ping_message_type,
  212. .publish_to_stasis = publish_corosync_ping_to_stasis, },
  213. [AST_EVENT_CLUSTER_DISCOVERY] = { .name = "cluster_discovery",
  214. .publish_default = 1,
  215. .subscribe_default = 1,
  216. .topic_fn = ast_system_topic,
  217. .message_type_fn = ast_cluster_discovery_type,
  218. .publish_to_stasis = publish_cluster_discovery_to_stasis, },
  219. };
  220. static struct {
  221. pthread_t id;
  222. int alert_pipe[2];
  223. unsigned int stop:1;
  224. } dispatch_thread = {
  225. .id = AST_PTHREADT_NULL,
  226. .alert_pipe = { -1, -1 },
  227. };
  228. static cpg_handle_t cpg_handle;
  229. static corosync_cfg_handle_t cfg_handle;
  230. #ifdef HAVE_COROSYNC_CFG_STATE_TRACK
  231. static void cfg_state_track_cb(
  232. corosync_cfg_state_notification_buffer_t *notification_buffer,
  233. cs_error_t error);
  234. #endif /* HAVE_COROSYNC_CFG_STATE_TRACK */
  235. static void cfg_shutdown_cb(corosync_cfg_handle_t cfg_handle,
  236. corosync_cfg_shutdown_flags_t flags);
  237. static corosync_cfg_callbacks_t cfg_callbacks = {
  238. #ifdef HAVE_COROSYNC_CFG_STATE_TRACK
  239. .corosync_cfg_state_track_callback = cfg_state_track_cb,
  240. #endif /* HAVE_COROSYNC_CFG_STATE_TRACK */
  241. .corosync_cfg_shutdown_callback = cfg_shutdown_cb,
  242. };
  243. /*! \brief Publish cluster discovery to \ref stasis */
  244. static void publish_cluster_discovery_to_stasis_full(struct corosync_node *node, int joined)
  245. {
  246. struct ast_json *json;
  247. struct ast_json_payload *payload;
  248. struct stasis_message *message;
  249. char eid[18];
  250. const char *addr;
  251. ast_eid_to_str(eid, sizeof(eid), &node->eid);
  252. addr = ast_sockaddr_stringify_addr(&node->addr);
  253. ast_log(AST_LOG_NOTICE, "Node %u (%s) at %s %s the cluster\n",
  254. node->id,
  255. eid,
  256. addr,
  257. joined ? "joined" : "left");
  258. json = ast_json_pack("{s: s, s: i, s: s, s: i}",
  259. "address", addr,
  260. "node_id", node->id,
  261. "eid", eid,
  262. "joined", joined);
  263. if (!json) {
  264. return;
  265. }
  266. payload = ast_json_payload_create(json);
  267. if (!payload) {
  268. ast_json_unref(json);
  269. return;
  270. }
  271. message = stasis_message_create(ast_cluster_discovery_type(), payload);
  272. if (!message) {
  273. ast_json_unref(json);
  274. ao2_ref(payload, -1);
  275. return;
  276. }
  277. stasis_publish(ast_system_topic(), message);
  278. ast_json_unref(json);
  279. ao2_ref(payload, -1);
  280. ao2_ref(message, -1);
  281. }
  282. static void send_cluster_notify(void);
  283. /*! \brief Publish a received cluster discovery \ref ast_event to \ref stasis */
  284. static void publish_cluster_discovery_to_stasis(struct ast_event *event)
  285. {
  286. struct corosync_node *node;
  287. int id = ast_event_get_ie_uint(event, AST_EVENT_IE_NODE_ID);
  288. struct ast_eid *event_eid;
  289. ast_assert(ast_event_get_type(event) == AST_EVENT_CLUSTER_DISCOVERY);
  290. event_eid = (struct ast_eid *)ast_event_get_ie_raw(event, AST_EVENT_IE_EID);
  291. if (!ast_eid_cmp(&ast_eid_default, event_eid)) {
  292. /* Don't feed events back in that originated locally. */
  293. return;
  294. }
  295. ao2_lock(nodes);
  296. node = ao2_find(nodes, &id, OBJ_SEARCH_KEY | OBJ_NOLOCK);
  297. if (node) {
  298. /* We already know about this node */
  299. ao2_unlock(nodes);
  300. ao2_ref(node, -1);
  301. return;
  302. }
  303. node = corosync_node_alloc(event);
  304. if (!node) {
  305. ao2_unlock(nodes);
  306. return;
  307. }
  308. ao2_link_flags(nodes, node, OBJ_NOLOCK);
  309. ao2_unlock(nodes);
  310. publish_cluster_discovery_to_stasis_full(node, 1);
  311. ao2_ref(node, -1);
  312. /*
  313. * When we get news that someone else has joined, we need to let them
  314. * know we exist as well.
  315. */
  316. send_cluster_notify();
  317. }
  318. /*! \brief Publish a received MWI \ref ast_event to \ref stasis */
  319. static void publish_mwi_to_stasis(struct ast_event *event)
  320. {
  321. const char *mailbox;
  322. const char *context;
  323. unsigned int new_msgs;
  324. unsigned int old_msgs;
  325. struct ast_eid *event_eid;
  326. ast_assert(ast_event_get_type(event) == AST_EVENT_MWI);
  327. mailbox = ast_event_get_ie_str(event, AST_EVENT_IE_MAILBOX);
  328. context = ast_event_get_ie_str(event, AST_EVENT_IE_CONTEXT);
  329. new_msgs = ast_event_get_ie_uint(event, AST_EVENT_IE_NEWMSGS);
  330. old_msgs = ast_event_get_ie_uint(event, AST_EVENT_IE_OLDMSGS);
  331. event_eid = (struct ast_eid *)ast_event_get_ie_raw(event, AST_EVENT_IE_EID);
  332. if (ast_strlen_zero(mailbox) || ast_strlen_zero(context)) {
  333. return;
  334. }
  335. if (new_msgs > INT_MAX) {
  336. new_msgs = INT_MAX;
  337. }
  338. if (old_msgs > INT_MAX) {
  339. old_msgs = INT_MAX;
  340. }
  341. if (ast_publish_mwi_state_full(mailbox, context, (int)new_msgs,
  342. (int)old_msgs, NULL, event_eid)) {
  343. char eid[18];
  344. ast_eid_to_str(eid, sizeof(eid), event_eid);
  345. ast_log(LOG_WARNING, "Failed to publish MWI message for %s@%s from %s\n",
  346. mailbox, context, eid);
  347. }
  348. }
  349. /*! \brief Publish a received device state \ref ast_event to \ref stasis */
  350. static void publish_device_state_to_stasis(struct ast_event *event)
  351. {
  352. const char *device;
  353. enum ast_device_state state;
  354. unsigned int cachable;
  355. struct ast_eid *event_eid;
  356. ast_assert(ast_event_get_type(event) == AST_EVENT_DEVICE_STATE_CHANGE);
  357. device = ast_event_get_ie_str(event, AST_EVENT_IE_DEVICE);
  358. state = ast_event_get_ie_uint(event, AST_EVENT_IE_STATE);
  359. cachable = ast_event_get_ie_uint(event, AST_EVENT_IE_CACHABLE);
  360. event_eid = (struct ast_eid *)ast_event_get_ie_raw(event, AST_EVENT_IE_EID);
  361. if (ast_strlen_zero(device)) {
  362. return;
  363. }
  364. if (ast_publish_device_state_full(device, state, cachable, event_eid)) {
  365. char eid[18];
  366. ast_eid_to_str(eid, sizeof(eid), event_eid);
  367. ast_log(LOG_WARNING, "Failed to publish device state message for %s from %s\n",
  368. device, eid);
  369. }
  370. }
  371. static void cpg_deliver_cb(cpg_handle_t handle, const struct cpg_name *group_name,
  372. uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len);
  373. static void cpg_confchg_cb(cpg_handle_t handle, const struct cpg_name *group_name,
  374. const struct cpg_address *member_list, size_t member_list_entries,
  375. const struct cpg_address *left_list, size_t left_list_entries,
  376. const struct cpg_address *joined_list, size_t joined_list_entries);
  377. static cpg_callbacks_t cpg_callbacks = {
  378. .cpg_deliver_fn = cpg_deliver_cb,
  379. .cpg_confchg_fn = cpg_confchg_cb,
  380. };
  381. #ifdef HAVE_COROSYNC_CFG_STATE_TRACK
  382. static void cfg_state_track_cb(
  383. corosync_cfg_state_notification_buffer_t *notification_buffer,
  384. cs_error_t error)
  385. {
  386. }
  387. #endif /* HAVE_COROSYNC_CFG_STATE_TRACK */
  388. static void cfg_shutdown_cb(corosync_cfg_handle_t cfg_handle,
  389. corosync_cfg_shutdown_flags_t flags)
  390. {
  391. }
  392. static void cpg_deliver_cb(cpg_handle_t handle, const struct cpg_name *group_name,
  393. uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
  394. {
  395. struct ast_event *event;
  396. void (*publish_handler)(struct ast_event *) = NULL;
  397. enum ast_event_type event_type;
  398. if (msg_len < ast_event_minimum_length()) {
  399. ast_debug(1, "Ignoring event that's too small. %u < %u\n",
  400. (unsigned int) msg_len,
  401. (unsigned int) ast_event_minimum_length());
  402. return;
  403. }
  404. if (!ast_eid_cmp(&ast_eid_default, ast_event_get_ie_raw(msg, AST_EVENT_IE_EID))) {
  405. /* Don't feed events back in that originated locally. */
  406. return;
  407. }
  408. event_type = ast_event_get_type(msg);
  409. if (event_type > AST_EVENT_TOTAL) {
  410. /* Egads, we don't support this */
  411. return;
  412. }
  413. ast_rwlock_rdlock(&event_types_lock);
  414. publish_handler = event_types[event_type].publish_to_stasis;
  415. if (!event_types[event_type].subscribe || !publish_handler) {
  416. /* We are not configured to subscribe to these events or
  417. we have no way to publish it internally. */
  418. ast_rwlock_unlock(&event_types_lock);
  419. return;
  420. }
  421. ast_rwlock_unlock(&event_types_lock);
  422. if (!(event = ast_malloc(msg_len))) {
  423. return;
  424. }
  425. memcpy(event, msg, msg_len);
  426. if (event_type == AST_EVENT_PING) {
  427. const struct ast_eid *eid;
  428. char buf[128] = "";
  429. eid = ast_event_get_ie_raw(event, AST_EVENT_IE_EID);
  430. ast_eid_to_str(buf, sizeof(buf), (struct ast_eid *) eid);
  431. ast_log(LOG_NOTICE, "Got event PING from server with EID: '%s'\n", buf);
  432. }
  433. ast_debug(5, "Publishing event %s (%u) to stasis\n",
  434. ast_event_get_type_name(event), event_type);
  435. publish_handler(event);
  436. }
  437. static void publish_event_to_corosync(struct ast_event *event)
  438. {
  439. cs_error_t cs_err;
  440. struct iovec iov;
  441. iov.iov_base = (void *)event;
  442. iov.iov_len = ast_event_get_size(event);
  443. ast_debug(5, "Publishing event %s (%u) to corosync\n",
  444. ast_event_get_type_name(event), ast_event_get_type(event));
  445. /* The stasis subscription will only exist if we are configured to publish
  446. * these events, so just send away. */
  447. if ((cs_err = cpg_mcast_joined(cpg_handle, CPG_TYPE_FIFO, &iov, 1)) != CS_OK) {
  448. ast_log(LOG_WARNING, "CPG mcast failed (%u) for event %s (%u)\n",
  449. cs_err, ast_event_get_type_name(event), ast_event_get_type(event));
  450. }
  451. }
  452. static void publish_to_corosync(struct stasis_message *message)
  453. {
  454. struct ast_event *event;
  455. event = stasis_message_to_event(message);
  456. if (!event) {
  457. return;
  458. }
  459. if (ast_eid_cmp(&ast_eid_default, ast_event_get_ie_raw(event, AST_EVENT_IE_EID))) {
  460. /* If the event didn't originate from this server, don't send it back out. */
  461. ast_event_destroy(event);
  462. return;
  463. }
  464. if (ast_event_get_type(event) == AST_EVENT_PING) {
  465. const struct ast_eid *eid;
  466. char buf[128] = "";
  467. eid = ast_event_get_ie_raw(event, AST_EVENT_IE_EID);
  468. ast_eid_to_str(buf, sizeof(buf), (struct ast_eid *) eid);
  469. ast_log(LOG_NOTICE, "Sending event PING from this server with EID: '%s'\n", buf);
  470. }
  471. publish_event_to_corosync(event);
  472. }
  473. static void stasis_message_cb(void *data, struct stasis_subscription *sub, struct stasis_message *message)
  474. {
  475. if (!message) {
  476. return;
  477. }
  478. publish_to_corosync(message);
  479. }
  480. static int dump_cache_cb(void *obj, void *arg, int flags)
  481. {
  482. struct stasis_message *message = obj;
  483. if (!message) {
  484. return 0;
  485. }
  486. publish_to_corosync(message);
  487. return 0;
  488. }
  489. static void cpg_confchg_cb(cpg_handle_t handle, const struct cpg_name *group_name,
  490. const struct cpg_address *member_list, size_t member_list_entries,
  491. const struct cpg_address *left_list, size_t left_list_entries,
  492. const struct cpg_address *joined_list, size_t joined_list_entries)
  493. {
  494. unsigned int i;
  495. for (i = 0; i < left_list_entries; i++) {
  496. const struct cpg_address *cpg_node = &left_list[i];
  497. struct corosync_node* node;
  498. node = ao2_find(nodes, &cpg_node->nodeid, OBJ_UNLINK | OBJ_SEARCH_KEY);
  499. if (!node) {
  500. continue;
  501. }
  502. publish_cluster_discovery_to_stasis_full(node, 0);
  503. ao2_ref(node, -1);
  504. }
  505. /* If any new nodes have joined, dump our cache of events we are publishing
  506. * that originated from this server. */
  507. if (!joined_list_entries) {
  508. return;
  509. }
  510. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  511. struct ao2_container *messages;
  512. ast_rwlock_rdlock(&event_types_lock);
  513. if (!event_types[i].publish) {
  514. ast_rwlock_unlock(&event_types_lock);
  515. continue;
  516. }
  517. if (!event_types[i].cache_fn || !event_types[i].message_type_fn) {
  518. ast_rwlock_unlock(&event_types_lock);
  519. continue;
  520. }
  521. messages = stasis_cache_dump_by_eid(event_types[i].cache_fn(),
  522. event_types[i].message_type_fn(),
  523. &ast_eid_default);
  524. ast_rwlock_unlock(&event_types_lock);
  525. ao2_callback(messages, OBJ_NODATA, dump_cache_cb, NULL);
  526. ao2_t_ref(messages, -1, "Dispose of dumped cache");
  527. }
  528. }
  529. /*! \brief Informs the cluster of our EID and our IP addresses */
  530. static void send_cluster_notify(void)
  531. {
  532. struct ast_event *event;
  533. unsigned int node_id;
  534. cs_error_t cs_err;
  535. corosync_cfg_node_address_t corosync_addr;
  536. int num_addrs = 0;
  537. struct sockaddr *sa;
  538. size_t sa_len;
  539. char buf[128];
  540. int res;
  541. if ((cs_err = corosync_cfg_local_get(cfg_handle, &node_id)) != CS_OK) {
  542. ast_log(LOG_WARNING, "Failed to extract Corosync node ID for this node. Not informing cluster of existance.\n");
  543. return;
  544. }
  545. if (((cs_err = corosync_cfg_get_node_addrs(cfg_handle, node_id, 1, &num_addrs, &corosync_addr)) != CS_OK) || (num_addrs < 1)) {
  546. ast_log(LOG_WARNING, "Failed to get local Corosync address. Not informing cluster of existance.\n");
  547. return;
  548. }
  549. sa = (struct sockaddr *)corosync_addr.address;
  550. sa_len = (size_t)corosync_addr.address_length;
  551. if ((res = getnameinfo(sa, sa_len, buf, sizeof(buf), NULL, 0, NI_NUMERICHOST))) {
  552. ast_log(LOG_WARNING, "Failed to determine name of local Corosync address: %s (%d). Not informing cluster of existance.\n",
  553. gai_strerror(res), res);
  554. return;
  555. }
  556. event = ast_event_new(AST_EVENT_CLUSTER_DISCOVERY,
  557. AST_EVENT_IE_NODE_ID, AST_EVENT_IE_PLTYPE_UINT, node_id,
  558. AST_EVENT_IE_LOCAL_ADDR, AST_EVENT_IE_PLTYPE_STR, buf,
  559. AST_EVENT_IE_END);
  560. publish_event_to_corosync(event);
  561. ast_free(event);
  562. }
  563. static void *dispatch_thread_handler(void *data)
  564. {
  565. cs_error_t cs_err;
  566. struct pollfd pfd[3] = {
  567. { .events = POLLIN, },
  568. { .events = POLLIN, },
  569. { .events = POLLIN, },
  570. };
  571. if ((cs_err = cpg_fd_get(cpg_handle, &pfd[0].fd)) != CS_OK) {
  572. ast_log(LOG_ERROR, "Failed to get CPG fd. This module is now broken.\n");
  573. return NULL;
  574. }
  575. if ((cs_err = corosync_cfg_fd_get(cfg_handle, &pfd[1].fd)) != CS_OK) {
  576. ast_log(LOG_ERROR, "Failed to get CFG fd. This module is now broken.\n");
  577. return NULL;
  578. }
  579. pfd[2].fd = dispatch_thread.alert_pipe[0];
  580. send_cluster_notify();
  581. while (!dispatch_thread.stop) {
  582. int res;
  583. cs_err = CS_OK;
  584. pfd[0].revents = 0;
  585. pfd[1].revents = 0;
  586. pfd[2].revents = 0;
  587. res = ast_poll(pfd, ARRAY_LEN(pfd), -1);
  588. if (res == -1 && errno != EINTR && errno != EAGAIN) {
  589. ast_log(LOG_ERROR, "poll() error: %s (%d)\n", strerror(errno), errno);
  590. continue;
  591. }
  592. if (pfd[0].revents & POLLIN) {
  593. if ((cs_err = cpg_dispatch(cpg_handle, CS_DISPATCH_ALL)) != CS_OK) {
  594. ast_log(LOG_WARNING, "Failed CPG dispatch: %u\n", cs_err);
  595. }
  596. }
  597. if (pfd[1].revents & POLLIN) {
  598. if ((cs_err = corosync_cfg_dispatch(cfg_handle, CS_DISPATCH_ALL)) != CS_OK) {
  599. ast_log(LOG_WARNING, "Failed CFG dispatch: %u\n", cs_err);
  600. }
  601. }
  602. if (cs_err == CS_ERR_LIBRARY || cs_err == CS_ERR_BAD_HANDLE) {
  603. struct cpg_name name;
  604. /* If corosync gets restarted out from under Asterisk, try to recover. */
  605. ast_log(LOG_NOTICE, "Attempting to recover from corosync failure.\n");
  606. if ((cs_err = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)) != CS_OK) {
  607. ast_log(LOG_ERROR, "Failed to initialize cfg (%d)\n", (int) cs_err);
  608. sleep(5);
  609. continue;
  610. }
  611. if ((cs_err = cpg_initialize(&cpg_handle, &cpg_callbacks) != CS_OK)) {
  612. ast_log(LOG_ERROR, "Failed to initialize cpg (%d)\n", (int) cs_err);
  613. sleep(5);
  614. continue;
  615. }
  616. if ((cs_err = cpg_fd_get(cpg_handle, &pfd[0].fd)) != CS_OK) {
  617. ast_log(LOG_ERROR, "Failed to get CPG fd.\n");
  618. sleep(5);
  619. continue;
  620. }
  621. if ((cs_err = corosync_cfg_fd_get(cfg_handle, &pfd[1].fd)) != CS_OK) {
  622. ast_log(LOG_ERROR, "Failed to get CFG fd.\n");
  623. sleep(5);
  624. continue;
  625. }
  626. ast_copy_string(name.value, "asterisk", sizeof(name.value));
  627. name.length = strlen(name.value);
  628. if ((cs_err = cpg_join(cpg_handle, &name)) != CS_OK) {
  629. ast_log(LOG_ERROR, "Failed to join cpg (%d)\n", (int) cs_err);
  630. sleep(5);
  631. continue;
  632. }
  633. ast_log(LOG_NOTICE, "Corosync recovery complete.\n");
  634. send_cluster_notify();
  635. }
  636. }
  637. return NULL;
  638. }
  639. static char *corosync_show_members(struct ast_cli_entry *e, int cmd, struct ast_cli_args *a)
  640. {
  641. cs_error_t cs_err;
  642. cpg_iteration_handle_t cpg_iter;
  643. struct cpg_iteration_description_t cpg_desc;
  644. unsigned int i;
  645. switch (cmd) {
  646. case CLI_INIT:
  647. e->command = "corosync show members";
  648. e->usage =
  649. "Usage: corosync show members\n"
  650. " Show corosync cluster members\n";
  651. return NULL;
  652. case CLI_GENERATE:
  653. return NULL; /* no completion */
  654. }
  655. if (a->argc != e->args) {
  656. return CLI_SHOWUSAGE;
  657. }
  658. cs_err = cpg_iteration_initialize(cpg_handle, CPG_ITERATION_ALL, NULL, &cpg_iter);
  659. if (cs_err != CS_OK) {
  660. ast_cli(a->fd, "Failed to initialize CPG iterator.\n");
  661. return CLI_FAILURE;
  662. }
  663. ast_cli(a->fd, "\n"
  664. "=============================================================\n"
  665. "=== Cluster members =========================================\n"
  666. "=============================================================\n"
  667. "===\n");
  668. for (i = 1, cs_err = cpg_iteration_next(cpg_iter, &cpg_desc);
  669. cs_err == CS_OK;
  670. cs_err = cpg_iteration_next(cpg_iter, &cpg_desc), i++) {
  671. #ifdef HAVE_COROSYNC_CFG_STATE_TRACK
  672. corosync_cfg_node_address_t addrs[8];
  673. int num_addrs = 0;
  674. unsigned int j;
  675. #endif
  676. ast_cli(a->fd, "=== Node %u\n", i);
  677. ast_cli(a->fd, "=== --> Group: %s\n", cpg_desc.group.value);
  678. #ifdef HAVE_COROSYNC_CFG_STATE_TRACK
  679. /*
  680. * Corosync 2.x cfg lib needs to allocate 1M on stack after calling
  681. * corosync_cfg_get_node_addrs. netconsole thread has allocated only 0.5M
  682. * resulting in crash.
  683. */
  684. cs_err = corosync_cfg_get_node_addrs(cfg_handle, cpg_desc.nodeid,
  685. ARRAY_LEN(addrs), &num_addrs, addrs);
  686. if (cs_err != CS_OK) {
  687. ast_log(LOG_WARNING, "Failed to get node addresses\n");
  688. continue;
  689. }
  690. for (j = 0; j < num_addrs; j++) {
  691. struct sockaddr *sa = (struct sockaddr *) addrs[j].address;
  692. size_t sa_len = (size_t) addrs[j].address_length;
  693. char buf[128];
  694. getnameinfo(sa, sa_len, buf, sizeof(buf), NULL, 0, NI_NUMERICHOST);
  695. ast_cli(a->fd, "=== --> Address %u: %s\n", j + 1, buf);
  696. }
  697. #else
  698. ast_cli(a->fd, "=== --> Nodeid: %"PRIu32"\n", cpg_desc.nodeid);
  699. #endif
  700. }
  701. ast_cli(a->fd, "===\n"
  702. "=============================================================\n"
  703. "\n");
  704. cpg_iteration_finalize(cpg_iter);
  705. return CLI_SUCCESS;
  706. }
  707. static char *corosync_ping(struct ast_cli_entry *e, int cmd, struct ast_cli_args *a)
  708. {
  709. struct ast_event *event;
  710. switch (cmd) {
  711. case CLI_INIT:
  712. e->command = "corosync ping";
  713. e->usage =
  714. "Usage: corosync ping\n"
  715. " Send a test ping to the cluster.\n"
  716. "A NOTICE will be in the log for every ping received\n"
  717. "on a server.\n If you send a ping, you should see a NOTICE\n"
  718. "in the log for every server in the cluster.\n";
  719. return NULL;
  720. case CLI_GENERATE:
  721. return NULL; /* no completion */
  722. }
  723. if (a->argc != e->args) {
  724. return CLI_SHOWUSAGE;
  725. }
  726. event = ast_event_new(AST_EVENT_PING, AST_EVENT_IE_END);
  727. if (!event) {
  728. return CLI_FAILURE;
  729. }
  730. ast_rwlock_rdlock(&event_types_lock);
  731. event_types[AST_EVENT_PING].publish_to_stasis(event);
  732. ast_rwlock_unlock(&event_types_lock);
  733. return CLI_SUCCESS;
  734. }
  735. static char *corosync_show_config(struct ast_cli_entry *e, int cmd, struct ast_cli_args *a)
  736. {
  737. unsigned int i;
  738. switch (cmd) {
  739. case CLI_INIT:
  740. e->command = "corosync show config";
  741. e->usage =
  742. "Usage: corosync show config\n"
  743. " Show configuration loaded from res_corosync.conf\n";
  744. return NULL;
  745. case CLI_GENERATE:
  746. return NULL; /* no completion */
  747. }
  748. if (a->argc != e->args) {
  749. return CLI_SHOWUSAGE;
  750. }
  751. ast_cli(a->fd, "\n"
  752. "=============================================================\n"
  753. "=== res_corosync config =====================================\n"
  754. "=============================================================\n"
  755. "===\n");
  756. ast_rwlock_rdlock(&event_types_lock);
  757. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  758. if (event_types[i].publish) {
  759. ast_cli(a->fd, "=== ==> Publishing Event Type: %s\n",
  760. event_types[i].name);
  761. }
  762. if (event_types[i].subscribe) {
  763. ast_cli(a->fd, "=== ==> Subscribing to Event Type: %s\n",
  764. event_types[i].name);
  765. }
  766. }
  767. ast_rwlock_unlock(&event_types_lock);
  768. ast_cli(a->fd, "===\n"
  769. "=============================================================\n"
  770. "\n");
  771. return CLI_SUCCESS;
  772. }
  773. static struct ast_cli_entry corosync_cli[] = {
  774. AST_CLI_DEFINE(corosync_show_config, "Show configuration"),
  775. AST_CLI_DEFINE(corosync_show_members, "Show cluster members"),
  776. AST_CLI_DEFINE(corosync_ping, "Send a test ping to the cluster"),
  777. };
  778. enum {
  779. PUBLISH,
  780. SUBSCRIBE,
  781. };
  782. static int set_event(const char *event_type, int pubsub)
  783. {
  784. unsigned int i;
  785. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  786. if (!event_types[i].name || strcasecmp(event_type, event_types[i].name)) {
  787. continue;
  788. }
  789. switch (pubsub) {
  790. case PUBLISH:
  791. event_types[i].publish = 1;
  792. break;
  793. case SUBSCRIBE:
  794. event_types[i].subscribe = 1;
  795. break;
  796. }
  797. break;
  798. }
  799. return (i == ARRAY_LEN(event_types)) ? -1 : 0;
  800. }
  801. static int load_general_config(struct ast_config *cfg)
  802. {
  803. struct ast_variable *v;
  804. int res = 0;
  805. unsigned int i;
  806. ast_rwlock_wrlock(&event_types_lock);
  807. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  808. event_types[i].publish = event_types[i].publish_default;
  809. event_types[i].subscribe = event_types[i].subscribe_default;
  810. }
  811. for (v = ast_variable_browse(cfg, "general"); v && !res; v = v->next) {
  812. if (!strcasecmp(v->name, "publish_event")) {
  813. res = set_event(v->value, PUBLISH);
  814. } else if (!strcasecmp(v->name, "subscribe_event")) {
  815. res = set_event(v->value, SUBSCRIBE);
  816. } else {
  817. ast_log(LOG_WARNING, "Unknown option '%s'\n", v->name);
  818. }
  819. }
  820. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  821. if (event_types[i].publish && !event_types[i].sub) {
  822. event_types[i].sub = stasis_forward_all(event_types[i].topic_fn(),
  823. corosync_topic());
  824. stasis_message_router_add(stasis_router,
  825. event_types[i].message_type_fn(),
  826. stasis_message_cb,
  827. NULL);
  828. } else if (!event_types[i].publish && event_types[i].sub) {
  829. event_types[i].sub = stasis_forward_cancel(event_types[i].sub);
  830. stasis_message_router_remove(stasis_router,
  831. event_types[i].message_type_fn());
  832. }
  833. }
  834. ast_rwlock_unlock(&event_types_lock);
  835. return res;
  836. }
  837. static int load_config(unsigned int reload)
  838. {
  839. static const char filename[] = "res_corosync.conf";
  840. struct ast_config *cfg;
  841. const char *cat = NULL;
  842. struct ast_flags config_flags = { 0 };
  843. int res = 0;
  844. cfg = ast_config_load(filename, config_flags);
  845. if (cfg == CONFIG_STATUS_FILEMISSING || cfg == CONFIG_STATUS_FILEINVALID) {
  846. return -1;
  847. }
  848. while ((cat = ast_category_browse(cfg, cat))) {
  849. if (!strcasecmp(cat, "general")) {
  850. res = load_general_config(cfg);
  851. } else {
  852. ast_log(LOG_WARNING, "Unknown configuration section '%s'\n", cat);
  853. }
  854. }
  855. ast_config_destroy(cfg);
  856. return res;
  857. }
  858. static void cleanup_module(void)
  859. {
  860. cs_error_t cs_err;
  861. unsigned int i;
  862. if (stasis_router) {
  863. /* Unsubscribe all topic forwards and cancel all message routes */
  864. ast_rwlock_wrlock(&event_types_lock);
  865. for (i = 0; i < ARRAY_LEN(event_types); i++) {
  866. if (event_types[i].sub) {
  867. event_types[i].sub = stasis_forward_cancel(event_types[i].sub);
  868. stasis_message_router_remove(stasis_router,
  869. event_types[i].message_type_fn());
  870. }
  871. event_types[i].publish = 0;
  872. event_types[i].subscribe = 0;
  873. }
  874. ast_rwlock_unlock(&event_types_lock);
  875. stasis_message_router_unsubscribe_and_join(stasis_router);
  876. stasis_router = NULL;
  877. }
  878. if (corosync_aggregate_topic) {
  879. ao2_t_ref(corosync_aggregate_topic, -1, "Dispose of topic on cleanup");
  880. corosync_aggregate_topic = NULL;
  881. }
  882. STASIS_MESSAGE_TYPE_CLEANUP(corosync_ping_message_type);
  883. if (dispatch_thread.id != AST_PTHREADT_NULL) {
  884. char meepmeep = 'x';
  885. dispatch_thread.stop = 1;
  886. if (ast_carefulwrite(dispatch_thread.alert_pipe[1], &meepmeep, 1,
  887. 5000) == -1) {
  888. ast_log(LOG_ERROR, "Failed to write to pipe: %s (%d)\n",
  889. strerror(errno), errno);
  890. }
  891. pthread_join(dispatch_thread.id, NULL);
  892. }
  893. if (dispatch_thread.alert_pipe[0] != -1) {
  894. close(dispatch_thread.alert_pipe[0]);
  895. dispatch_thread.alert_pipe[0] = -1;
  896. }
  897. if (dispatch_thread.alert_pipe[1] != -1) {
  898. close(dispatch_thread.alert_pipe[1]);
  899. dispatch_thread.alert_pipe[1] = -1;
  900. }
  901. if (cpg_handle && (cs_err = cpg_finalize(cpg_handle)) != CS_OK) {
  902. ast_log(LOG_ERROR, "Failed to finalize cpg (%d)\n", (int) cs_err);
  903. }
  904. cpg_handle = 0;
  905. if (cfg_handle && (cs_err = corosync_cfg_finalize(cfg_handle)) != CS_OK) {
  906. ast_log(LOG_ERROR, "Failed to finalize cfg (%d)\n", (int) cs_err);
  907. }
  908. cfg_handle = 0;
  909. ao2_cleanup(nodes);
  910. nodes = NULL;
  911. }
  912. static int load_module(void)
  913. {
  914. cs_error_t cs_err;
  915. struct cpg_name name;
  916. if (ast_eid_is_empty(&ast_eid_default)) {
  917. ast_log(LOG_ERROR, "Entity ID is not set.\n");
  918. return AST_MODULE_LOAD_DECLINE;
  919. }
  920. nodes = ao2_container_alloc_hash(AO2_ALLOC_OPT_LOCK_MUTEX, 0, 23,
  921. corosync_node_hash_fn, NULL, corosync_node_cmp_fn);
  922. if (!nodes) {
  923. goto failed;
  924. }
  925. corosync_aggregate_topic = stasis_topic_create("corosync_aggregate_topic");
  926. if (!corosync_aggregate_topic) {
  927. ast_log(AST_LOG_ERROR, "Failed to create stasis topic for corosync\n");
  928. goto failed;
  929. }
  930. stasis_router = stasis_message_router_create(corosync_aggregate_topic);
  931. if (!stasis_router) {
  932. ast_log(AST_LOG_ERROR, "Failed to create message router for corosync topic\n");
  933. goto failed;
  934. }
  935. if (STASIS_MESSAGE_TYPE_INIT(corosync_ping_message_type) != 0) {
  936. ast_log(AST_LOG_ERROR, "Failed to initialize corosync ping message type\n");
  937. goto failed;
  938. }
  939. if (load_config(0)) {
  940. /* simply not configured is not a fatal error */
  941. goto failed;
  942. }
  943. if ((cs_err = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)) != CS_OK) {
  944. ast_log(LOG_ERROR, "Failed to initialize cfg: (%d)\n", (int) cs_err);
  945. goto failed;
  946. }
  947. if ((cs_err = cpg_initialize(&cpg_handle, &cpg_callbacks)) != CS_OK) {
  948. ast_log(LOG_ERROR, "Failed to initialize cpg: (%d)\n", (int) cs_err);
  949. goto failed;
  950. }
  951. ast_copy_string(name.value, "asterisk", sizeof(name.value));
  952. name.length = strlen(name.value);
  953. if ((cs_err = cpg_join(cpg_handle, &name)) != CS_OK) {
  954. ast_log(LOG_ERROR, "Failed to join: (%d)\n", (int) cs_err);
  955. goto failed;
  956. }
  957. if (pipe(dispatch_thread.alert_pipe) == -1) {
  958. ast_log(LOG_ERROR, "Failed to create alert pipe: %s (%d)\n",
  959. strerror(errno), errno);
  960. goto failed;
  961. }
  962. if (corosync_pthread_create_background(&dispatch_thread.id, NULL,
  963. dispatch_thread_handler, NULL)) {
  964. ast_log(LOG_ERROR, "Error starting CPG dispatch thread.\n");
  965. goto failed;
  966. }
  967. ast_cli_register_multiple(corosync_cli, ARRAY_LEN(corosync_cli));
  968. return AST_MODULE_LOAD_SUCCESS;
  969. failed:
  970. cleanup_module();
  971. return AST_MODULE_LOAD_DECLINE;
  972. }
  973. static int unload_module(void)
  974. {
  975. ast_cli_unregister_multiple(corosync_cli, ARRAY_LEN(corosync_cli));
  976. cleanup_module();
  977. return 0;
  978. }
  979. AST_MODULE_INFO_STANDARD_EXTENDED(ASTERISK_GPL_KEY, "Corosync");