ip_vs_nfct.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*
  2. * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
  3. *
  4. * Portions Copyright (C) 2001-2002
  5. * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
  6. *
  7. * Portions Copyright (C) 2003-2010
  8. * Julian Anastasov
  9. *
  10. *
  11. * This code is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  23. *
  24. *
  25. * Authors:
  26. * Ben North <ben@redfrontdoor.org>
  27. * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
  28. * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
  29. *
  30. *
  31. * Current status:
  32. *
  33. * - provide conntrack confirmation for new and related connections, by
  34. * this way we can see their proper conntrack state in all hooks
  35. * - support for all forwarding methods, not only NAT
  36. * - FTP support (NAT), ability to support other NAT apps with expectations
  37. * - to correctly create expectations for related NAT connections the proper
  38. * NF conntrack support must be already installed, eg. ip_vs_ftp requires
  39. * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
  40. * NAT rules are needed)
  41. * - alter reply for NAT when forwarding packet in original direction:
  42. * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
  43. * when RELATED conntrack is created from real server (Active FTP DATA)
  44. * - if iptables_nat is not loaded the Passive FTP will not work (the
  45. * PASV response can not be NAT-ed) but Active FTP should work
  46. *
  47. */
  48. #define KMSG_COMPONENT "IPVS"
  49. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  50. #include <linux/module.h>
  51. #include <linux/types.h>
  52. #include <linux/kernel.h>
  53. #include <linux/errno.h>
  54. #include <linux/compiler.h>
  55. #include <linux/vmalloc.h>
  56. #include <linux/skbuff.h>
  57. #include <net/ip.h>
  58. #include <linux/netfilter.h>
  59. #include <linux/netfilter_ipv4.h>
  60. #include <net/ip_vs.h>
  61. #include <net/netfilter/nf_conntrack_core.h>
  62. #include <net/netfilter/nf_conntrack_expect.h>
  63. #include <net/netfilter/nf_conntrack_seqadj.h>
  64. #include <net/netfilter/nf_conntrack_helper.h>
  65. #include <net/netfilter/nf_conntrack_zones.h>
  66. #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
  67. #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
  68. &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
  69. (T)->dst.protonum
  70. #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
  71. #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
  72. &((C)->vaddr.ip), ntohs((C)->vport), \
  73. &((C)->daddr.ip), ntohs((C)->dport), \
  74. (C)->protocol, (C)->state
  75. void
  76. ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
  77. {
  78. enum ip_conntrack_info ctinfo;
  79. struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
  80. struct nf_conntrack_tuple new_tuple;
  81. if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
  82. nf_ct_is_dying(ct))
  83. return;
  84. /* Never alter conntrack for non-NAT conns */
  85. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  86. return;
  87. /* Alter reply only in original direction */
  88. if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
  89. return;
  90. /* Applications may adjust TCP seqs */
  91. if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP &&
  92. !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct))
  93. return;
  94. /*
  95. * The connection is not yet in the hashtable, so we update it.
  96. * CIP->VIP will remain the same, so leave the tuple in
  97. * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
  98. * real-server we will see RIP->DIP.
  99. */
  100. new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  101. /*
  102. * This will also take care of UDP and other protocols.
  103. */
  104. if (outin) {
  105. new_tuple.src.u3 = cp->daddr;
  106. if (new_tuple.dst.protonum != IPPROTO_ICMP &&
  107. new_tuple.dst.protonum != IPPROTO_ICMPV6)
  108. new_tuple.src.u.tcp.port = cp->dport;
  109. } else {
  110. new_tuple.dst.u3 = cp->vaddr;
  111. if (new_tuple.dst.protonum != IPPROTO_ICMP &&
  112. new_tuple.dst.protonum != IPPROTO_ICMPV6)
  113. new_tuple.dst.u.tcp.port = cp->vport;
  114. }
  115. IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
  116. "ctinfo=%d, old reply=" FMT_TUPLE
  117. ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
  118. __func__, ct, ct->status, ctinfo,
  119. ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
  120. ARG_TUPLE(&new_tuple), ARG_CONN(cp));
  121. nf_conntrack_alter_reply(ct, &new_tuple);
  122. }
  123. int ip_vs_confirm_conntrack(struct sk_buff *skb)
  124. {
  125. return nf_conntrack_confirm(skb);
  126. }
  127. /*
  128. * Called from init_conntrack() as expectfn handler.
  129. */
  130. static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
  131. struct nf_conntrack_expect *exp)
  132. {
  133. struct nf_conntrack_tuple *orig, new_reply;
  134. struct ip_vs_conn *cp;
  135. struct ip_vs_conn_param p;
  136. struct net *net = nf_ct_net(ct);
  137. if (exp->tuple.src.l3num != PF_INET)
  138. return;
  139. /*
  140. * We assume that no NF locks are held before this callback.
  141. * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
  142. * expectations even if they use wildcard values, now we provide the
  143. * actual values from the newly created original conntrack direction.
  144. * The conntrack is confirmed when packet reaches IPVS hooks.
  145. */
  146. /* RS->CLIENT */
  147. orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
  148. ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
  149. &orig->src.u3, orig->src.u.tcp.port,
  150. &orig->dst.u3, orig->dst.u.tcp.port, &p);
  151. cp = ip_vs_conn_out_get(&p);
  152. if (cp) {
  153. /* Change reply CLIENT->RS to CLIENT->VS */
  154. new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  155. IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
  156. FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
  157. __func__, ct, ct->status,
  158. ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
  159. ARG_CONN(cp));
  160. new_reply.dst.u3 = cp->vaddr;
  161. new_reply.dst.u.tcp.port = cp->vport;
  162. IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
  163. ", inout cp=" FMT_CONN "\n",
  164. __func__, ct,
  165. ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
  166. ARG_CONN(cp));
  167. goto alter;
  168. }
  169. /* CLIENT->VS */
  170. cp = ip_vs_conn_in_get(&p);
  171. if (cp) {
  172. /* Change reply VS->CLIENT to RS->CLIENT */
  173. new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  174. IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
  175. FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
  176. __func__, ct, ct->status,
  177. ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
  178. ARG_CONN(cp));
  179. new_reply.src.u3 = cp->daddr;
  180. new_reply.src.u.tcp.port = cp->dport;
  181. IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
  182. FMT_TUPLE ", outin cp=" FMT_CONN "\n",
  183. __func__, ct,
  184. ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
  185. ARG_CONN(cp));
  186. goto alter;
  187. }
  188. IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
  189. " - unknown expect\n",
  190. __func__, ct, ct->status, ARG_TUPLE(orig));
  191. return;
  192. alter:
  193. /* Never alter conntrack for non-NAT conns */
  194. if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
  195. nf_conntrack_alter_reply(ct, &new_reply);
  196. ip_vs_conn_put(cp);
  197. return;
  198. }
  199. /*
  200. * Create NF conntrack expectation with wildcard (optional) source port.
  201. * Then the default callback function will alter the reply and will confirm
  202. * the conntrack entry when the first packet comes.
  203. * Use port 0 to expect connection from any port.
  204. */
  205. void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
  206. struct ip_vs_conn *cp, u_int8_t proto,
  207. const __be16 port, int from_rs)
  208. {
  209. struct nf_conntrack_expect *exp;
  210. if (ct == NULL || nf_ct_is_untracked(ct))
  211. return;
  212. exp = nf_ct_expect_alloc(ct);
  213. if (!exp)
  214. return;
  215. nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
  216. from_rs ? &cp->daddr : &cp->caddr,
  217. from_rs ? &cp->caddr : &cp->vaddr,
  218. proto, port ? &port : NULL,
  219. from_rs ? &cp->cport : &cp->vport);
  220. exp->expectfn = ip_vs_nfct_expect_callback;
  221. IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
  222. __func__, ct, ARG_TUPLE(&exp->tuple));
  223. nf_ct_expect_related(exp);
  224. nf_ct_expect_put(exp);
  225. }
  226. EXPORT_SYMBOL(ip_vs_nfct_expect_related);
  227. /*
  228. * Our connection was terminated, try to drop the conntrack immediately
  229. */
  230. void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
  231. {
  232. struct nf_conntrack_tuple_hash *h;
  233. struct nf_conn *ct;
  234. struct nf_conntrack_tuple tuple;
  235. if (!cp->cport)
  236. return;
  237. tuple = (struct nf_conntrack_tuple) {
  238. .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
  239. tuple.src.u3 = cp->caddr;
  240. tuple.src.u.all = cp->cport;
  241. tuple.src.l3num = cp->af;
  242. tuple.dst.u3 = cp->vaddr;
  243. tuple.dst.u.all = cp->vport;
  244. IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
  245. " for conn " FMT_CONN "\n",
  246. __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
  247. h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
  248. if (h) {
  249. ct = nf_ct_tuplehash_to_ctrack(h);
  250. /* Show what happens instead of calling nf_ct_kill() */
  251. if (del_timer(&ct->timeout)) {
  252. IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
  253. FMT_TUPLE "\n",
  254. __func__, ct, ARG_TUPLE(&tuple));
  255. if (ct->timeout.function)
  256. ct->timeout.function(ct->timeout.data);
  257. } else {
  258. IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
  259. FMT_TUPLE "\n",
  260. __func__, ct, ARG_TUPLE(&tuple));
  261. }
  262. nf_ct_put(ct);
  263. } else {
  264. IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
  265. __func__, ARG_TUPLE(&tuple));
  266. }
  267. }