ip_vs_proto_sctp.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. #include <linux/kernel.h>
  2. #include <linux/ip.h>
  3. #include <linux/sctp.h>
  4. #include <net/ip.h>
  5. #include <net/ip6_checksum.h>
  6. #include <linux/netfilter.h>
  7. #include <linux/netfilter_ipv4.h>
  8. #include <net/sctp/checksum.h>
  9. #include <net/ip_vs.h>
  10. static int
  11. sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
  12. struct ip_vs_proto_data *pd,
  13. int *verdict, struct ip_vs_conn **cpp,
  14. struct ip_vs_iphdr *iph)
  15. {
  16. struct ip_vs_service *svc;
  17. sctp_chunkhdr_t _schunkh, *sch;
  18. sctp_sctphdr_t *sh, _sctph;
  19. __be16 _ports[2], *ports = NULL;
  20. if (likely(!ip_vs_iph_icmp(iph))) {
  21. sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
  22. if (sh) {
  23. sch = skb_header_pointer(
  24. skb, iph->len + sizeof(sctp_sctphdr_t),
  25. sizeof(_schunkh), &_schunkh);
  26. if (sch && (sch->type == SCTP_CID_INIT ||
  27. sysctl_sloppy_sctp(ipvs)))
  28. ports = &sh->source;
  29. }
  30. } else {
  31. ports = skb_header_pointer(
  32. skb, iph->len, sizeof(_ports), &_ports);
  33. }
  34. if (!ports) {
  35. *verdict = NF_DROP;
  36. return 0;
  37. }
  38. rcu_read_lock();
  39. if (likely(!ip_vs_iph_inverse(iph)))
  40. svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  41. &iph->daddr, ports[1]);
  42. else
  43. svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  44. &iph->saddr, ports[0]);
  45. if (svc) {
  46. int ignored;
  47. if (ip_vs_todrop(ipvs)) {
  48. /*
  49. * It seems that we are very loaded.
  50. * We have to drop this packet :(
  51. */
  52. rcu_read_unlock();
  53. *verdict = NF_DROP;
  54. return 0;
  55. }
  56. /*
  57. * Let the virtual server select a real server for the
  58. * incoming connection, and create a connection entry.
  59. */
  60. *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
  61. if (!*cpp && ignored <= 0) {
  62. if (!ignored)
  63. *verdict = ip_vs_leave(svc, skb, pd, iph);
  64. else
  65. *verdict = NF_DROP;
  66. rcu_read_unlock();
  67. return 0;
  68. }
  69. }
  70. rcu_read_unlock();
  71. /* NF_ACCEPT */
  72. return 1;
  73. }
  74. static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
  75. unsigned int sctphoff)
  76. {
  77. sctph->checksum = sctp_compute_cksum(skb, sctphoff);
  78. skb->ip_summed = CHECKSUM_UNNECESSARY;
  79. }
  80. static int
  81. sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
  82. struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
  83. {
  84. sctp_sctphdr_t *sctph;
  85. unsigned int sctphoff = iph->len;
  86. bool payload_csum = false;
  87. #ifdef CONFIG_IP_VS_IPV6
  88. if (cp->af == AF_INET6 && iph->fragoffs)
  89. return 1;
  90. #endif
  91. /* csum_check requires unshared skb */
  92. if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
  93. return 0;
  94. if (unlikely(cp->app != NULL)) {
  95. int ret;
  96. /* Some checks before mangling */
  97. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  98. return 0;
  99. /* Call application helper if needed */
  100. ret = ip_vs_app_pkt_out(cp, skb);
  101. if (ret == 0)
  102. return 0;
  103. /* ret=2: csum update is needed after payload mangling */
  104. if (ret == 2)
  105. payload_csum = true;
  106. }
  107. sctph = (void *) skb_network_header(skb) + sctphoff;
  108. /* Only update csum if we really have to */
  109. if (sctph->source != cp->vport || payload_csum ||
  110. skb->ip_summed == CHECKSUM_PARTIAL) {
  111. sctph->source = cp->vport;
  112. sctp_nat_csum(skb, sctph, sctphoff);
  113. } else {
  114. skb->ip_summed = CHECKSUM_UNNECESSARY;
  115. }
  116. return 1;
  117. }
  118. static int
  119. sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
  120. struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
  121. {
  122. sctp_sctphdr_t *sctph;
  123. unsigned int sctphoff = iph->len;
  124. bool payload_csum = false;
  125. #ifdef CONFIG_IP_VS_IPV6
  126. if (cp->af == AF_INET6 && iph->fragoffs)
  127. return 1;
  128. #endif
  129. /* csum_check requires unshared skb */
  130. if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
  131. return 0;
  132. if (unlikely(cp->app != NULL)) {
  133. int ret;
  134. /* Some checks before mangling */
  135. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  136. return 0;
  137. /* Call application helper if needed */
  138. ret = ip_vs_app_pkt_in(cp, skb);
  139. if (ret == 0)
  140. return 0;
  141. /* ret=2: csum update is needed after payload mangling */
  142. if (ret == 2)
  143. payload_csum = true;
  144. }
  145. sctph = (void *) skb_network_header(skb) + sctphoff;
  146. /* Only update csum if we really have to */
  147. if (sctph->dest != cp->dport || payload_csum ||
  148. (skb->ip_summed == CHECKSUM_PARTIAL &&
  149. !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
  150. sctph->dest = cp->dport;
  151. sctp_nat_csum(skb, sctph, sctphoff);
  152. } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
  153. skb->ip_summed = CHECKSUM_UNNECESSARY;
  154. }
  155. return 1;
  156. }
  157. static int
  158. sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
  159. {
  160. unsigned int sctphoff;
  161. struct sctphdr *sh, _sctph;
  162. __le32 cmp, val;
  163. #ifdef CONFIG_IP_VS_IPV6
  164. if (af == AF_INET6)
  165. sctphoff = sizeof(struct ipv6hdr);
  166. else
  167. #endif
  168. sctphoff = ip_hdrlen(skb);
  169. sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
  170. if (sh == NULL)
  171. return 0;
  172. cmp = sh->checksum;
  173. val = sctp_compute_cksum(skb, sctphoff);
  174. if (val != cmp) {
  175. /* CRC failure, dump it. */
  176. IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
  177. "Failed checksum for");
  178. return 0;
  179. }
  180. return 1;
  181. }
  182. enum ipvs_sctp_event_t {
  183. IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */
  184. IP_VS_SCTP_INIT,
  185. IP_VS_SCTP_INIT_ACK,
  186. IP_VS_SCTP_COOKIE_ECHO,
  187. IP_VS_SCTP_COOKIE_ACK,
  188. IP_VS_SCTP_SHUTDOWN,
  189. IP_VS_SCTP_SHUTDOWN_ACK,
  190. IP_VS_SCTP_SHUTDOWN_COMPLETE,
  191. IP_VS_SCTP_ERROR,
  192. IP_VS_SCTP_ABORT,
  193. IP_VS_SCTP_EVENT_LAST
  194. };
  195. /* RFC 2960, 3.2 Chunk Field Descriptions */
  196. static __u8 sctp_events[] = {
  197. [SCTP_CID_DATA] = IP_VS_SCTP_DATA,
  198. [SCTP_CID_INIT] = IP_VS_SCTP_INIT,
  199. [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK,
  200. [SCTP_CID_SACK] = IP_VS_SCTP_DATA,
  201. [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA,
  202. [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA,
  203. [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT,
  204. [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN,
  205. [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK,
  206. [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR,
  207. [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO,
  208. [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK,
  209. [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA,
  210. [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA,
  211. [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE,
  212. };
  213. /* SCTP States:
  214. * See RFC 2960, 4. SCTP Association State Diagram
  215. *
  216. * New states (not in diagram):
  217. * - INIT1 state: use shorter timeout for dropped INIT packets
  218. * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
  219. * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
  220. *
  221. * The states are as seen in real server. In the diagram, INIT1, INIT,
  222. * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
  223. *
  224. * States as per packets from client (C) and server (S):
  225. *
  226. * Setup of client connection:
  227. * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
  228. * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
  229. * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
  230. * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
  231. *
  232. * Setup of server connection:
  233. * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
  234. * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
  235. * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
  236. */
  237. #define sNO IP_VS_SCTP_S_NONE
  238. #define sI1 IP_VS_SCTP_S_INIT1
  239. #define sIN IP_VS_SCTP_S_INIT
  240. #define sCS IP_VS_SCTP_S_COOKIE_SENT
  241. #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
  242. #define sCW IP_VS_SCTP_S_COOKIE_WAIT
  243. #define sCO IP_VS_SCTP_S_COOKIE
  244. #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
  245. #define sES IP_VS_SCTP_S_ESTABLISHED
  246. #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
  247. #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
  248. #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
  249. #define sRJ IP_VS_SCTP_S_REJECTED
  250. #define sCL IP_VS_SCTP_S_CLOSED
  251. static const __u8 sctp_states
  252. [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
  253. { /* INPUT */
  254. /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
  255. /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  256. /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
  257. /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  258. /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  259. /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
  260. /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
  261. /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
  262. /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
  263. /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
  264. /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
  265. },
  266. { /* OUTPUT */
  267. /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
  268. /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  269. /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
  270. /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  271. /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  272. /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
  273. /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
  274. /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
  275. /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  276. /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  277. /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
  278. },
  279. { /* INPUT-ONLY */
  280. /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
  281. /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  282. /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
  283. /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  284. /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  285. /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
  286. /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
  287. /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
  288. /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
  289. /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
  290. /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
  291. },
  292. };
  293. #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ)
  294. /* Timeout table[state] */
  295. static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
  296. [IP_VS_SCTP_S_NONE] = 2 * HZ,
  297. [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ,
  298. [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO,
  299. [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO,
  300. [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO,
  301. [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO,
  302. [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO,
  303. [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO,
  304. [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
  305. [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO,
  306. [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO,
  307. [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO,
  308. [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ,
  309. [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO,
  310. [IP_VS_SCTP_S_LAST] = 2 * HZ,
  311. };
  312. static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
  313. [IP_VS_SCTP_S_NONE] = "NONE",
  314. [IP_VS_SCTP_S_INIT1] = "INIT1",
  315. [IP_VS_SCTP_S_INIT] = "INIT",
  316. [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT",
  317. [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED",
  318. [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT",
  319. [IP_VS_SCTP_S_COOKIE] = "COOKIE",
  320. [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED",
  321. [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED",
  322. [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT",
  323. [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED",
  324. [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT",
  325. [IP_VS_SCTP_S_REJECTED] = "REJECTED",
  326. [IP_VS_SCTP_S_CLOSED] = "CLOSED",
  327. [IP_VS_SCTP_S_LAST] = "BUG!",
  328. };
  329. static const char *sctp_state_name(int state)
  330. {
  331. if (state >= IP_VS_SCTP_S_LAST)
  332. return "ERR!";
  333. if (sctp_state_name_table[state])
  334. return sctp_state_name_table[state];
  335. return "?";
  336. }
  337. static inline void
  338. set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
  339. int direction, const struct sk_buff *skb)
  340. {
  341. sctp_chunkhdr_t _sctpch, *sch;
  342. unsigned char chunk_type;
  343. int event, next_state;
  344. int ihl, cofs;
  345. #ifdef CONFIG_IP_VS_IPV6
  346. ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
  347. #else
  348. ihl = ip_hdrlen(skb);
  349. #endif
  350. cofs = ihl + sizeof(sctp_sctphdr_t);
  351. sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
  352. if (sch == NULL)
  353. return;
  354. chunk_type = sch->type;
  355. /*
  356. * Section 3: Multiple chunks can be bundled into one SCTP packet
  357. * up to the MTU size, except for the INIT, INIT ACK, and
  358. * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
  359. * any other chunk in a packet.
  360. *
  361. * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
  362. * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
  363. * bundled with an ABORT, but they MUST be placed before the ABORT
  364. * in the SCTP packet or they will be ignored by the receiver.
  365. */
  366. if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
  367. (sch->type == SCTP_CID_COOKIE_ACK)) {
  368. int clen = ntohs(sch->length);
  369. if (clen >= sizeof(sctp_chunkhdr_t)) {
  370. sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
  371. sizeof(_sctpch), &_sctpch);
  372. if (sch && sch->type == SCTP_CID_ABORT)
  373. chunk_type = sch->type;
  374. }
  375. }
  376. event = (chunk_type < sizeof(sctp_events)) ?
  377. sctp_events[chunk_type] : IP_VS_SCTP_DATA;
  378. /* Update direction to INPUT_ONLY if necessary
  379. * or delete NO_OUTPUT flag if output packet detected
  380. */
  381. if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
  382. if (direction == IP_VS_DIR_OUTPUT)
  383. cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
  384. else
  385. direction = IP_VS_DIR_INPUT_ONLY;
  386. }
  387. next_state = sctp_states[direction][event][cp->state];
  388. if (next_state != cp->state) {
  389. struct ip_vs_dest *dest = cp->dest;
  390. IP_VS_DBG_BUF(8, "%s %s %s:%d->"
  391. "%s:%d state: %s->%s conn->refcnt:%d\n",
  392. pd->pp->name,
  393. ((direction == IP_VS_DIR_OUTPUT) ?
  394. "output " : "input "),
  395. IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
  396. ntohs(cp->dport),
  397. IP_VS_DBG_ADDR(cp->af, &cp->caddr),
  398. ntohs(cp->cport),
  399. sctp_state_name(cp->state),
  400. sctp_state_name(next_state),
  401. atomic_read(&cp->refcnt));
  402. if (dest) {
  403. if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  404. (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
  405. atomic_dec(&dest->activeconns);
  406. atomic_inc(&dest->inactconns);
  407. cp->flags |= IP_VS_CONN_F_INACTIVE;
  408. } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  409. (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
  410. atomic_inc(&dest->activeconns);
  411. atomic_dec(&dest->inactconns);
  412. cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  413. }
  414. }
  415. }
  416. if (likely(pd))
  417. cp->timeout = pd->timeout_table[cp->state = next_state];
  418. else /* What to do ? */
  419. cp->timeout = sctp_timeouts[cp->state = next_state];
  420. }
  421. static void
  422. sctp_state_transition(struct ip_vs_conn *cp, int direction,
  423. const struct sk_buff *skb, struct ip_vs_proto_data *pd)
  424. {
  425. spin_lock_bh(&cp->lock);
  426. set_sctp_state(pd, cp, direction, skb);
  427. spin_unlock_bh(&cp->lock);
  428. }
  429. static inline __u16 sctp_app_hashkey(__be16 port)
  430. {
  431. return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
  432. & SCTP_APP_TAB_MASK;
  433. }
  434. static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
  435. {
  436. struct ip_vs_app *i;
  437. __u16 hash;
  438. __be16 port = inc->port;
  439. int ret = 0;
  440. struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
  441. hash = sctp_app_hashkey(port);
  442. list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
  443. if (i->port == port) {
  444. ret = -EEXIST;
  445. goto out;
  446. }
  447. }
  448. list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
  449. atomic_inc(&pd->appcnt);
  450. out:
  451. return ret;
  452. }
  453. static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
  454. {
  455. struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
  456. atomic_dec(&pd->appcnt);
  457. list_del_rcu(&inc->p_list);
  458. }
  459. static int sctp_app_conn_bind(struct ip_vs_conn *cp)
  460. {
  461. struct netns_ipvs *ipvs = cp->ipvs;
  462. int hash;
  463. struct ip_vs_app *inc;
  464. int result = 0;
  465. /* Default binding: bind app only for NAT */
  466. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  467. return 0;
  468. /* Lookup application incarnations and bind the right one */
  469. hash = sctp_app_hashkey(cp->vport);
  470. rcu_read_lock();
  471. list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
  472. if (inc->port == cp->vport) {
  473. if (unlikely(!ip_vs_app_inc_get(inc)))
  474. break;
  475. rcu_read_unlock();
  476. IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
  477. "%s:%u to app %s on port %u\n",
  478. __func__,
  479. IP_VS_DBG_ADDR(cp->af, &cp->caddr),
  480. ntohs(cp->cport),
  481. IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
  482. ntohs(cp->vport),
  483. inc->name, ntohs(inc->port));
  484. cp->app = inc;
  485. if (inc->init_conn)
  486. result = inc->init_conn(inc, cp);
  487. goto out;
  488. }
  489. }
  490. rcu_read_unlock();
  491. out:
  492. return result;
  493. }
  494. /* ---------------------------------------------
  495. * timeouts is netns related now.
  496. * ---------------------------------------------
  497. */
  498. static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
  499. {
  500. ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
  501. pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
  502. sizeof(sctp_timeouts));
  503. if (!pd->timeout_table)
  504. return -ENOMEM;
  505. return 0;
  506. }
  507. static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
  508. {
  509. kfree(pd->timeout_table);
  510. }
  511. struct ip_vs_protocol ip_vs_protocol_sctp = {
  512. .name = "SCTP",
  513. .protocol = IPPROTO_SCTP,
  514. .num_states = IP_VS_SCTP_S_LAST,
  515. .dont_defrag = 0,
  516. .init = NULL,
  517. .exit = NULL,
  518. .init_netns = __ip_vs_sctp_init,
  519. .exit_netns = __ip_vs_sctp_exit,
  520. .register_app = sctp_register_app,
  521. .unregister_app = sctp_unregister_app,
  522. .conn_schedule = sctp_conn_schedule,
  523. .conn_in_get = ip_vs_conn_in_get_proto,
  524. .conn_out_get = ip_vs_conn_out_get_proto,
  525. .snat_handler = sctp_snat_handler,
  526. .dnat_handler = sctp_dnat_handler,
  527. .csum_check = sctp_csum_check,
  528. .state_name = sctp_state_name,
  529. .state_transition = sctp_state_transition,
  530. .app_conn_bind = sctp_app_conn_bind,
  531. .debug_packet = ip_vs_tcpudp_debug_packet,
  532. .timeout_change = NULL,
  533. };