nf_conntrack_core.c 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851
  1. /* Connection state tracking for netfilter. This is separated from,
  2. but required by, the NAT layer; it can also be used by an iptables
  3. extension. */
  4. /* (C) 1999-2001 Paul `Rusty' Russell
  5. * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
  6. * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
  7. * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License version 2 as
  11. * published by the Free Software Foundation.
  12. */
  13. #include <linux/types.h>
  14. #include <linux/netfilter.h>
  15. #include <linux/module.h>
  16. #include <linux/sched.h>
  17. #include <linux/skbuff.h>
  18. #include <linux/proc_fs.h>
  19. #include <linux/vmalloc.h>
  20. #include <linux/stddef.h>
  21. #include <linux/slab.h>
  22. #include <linux/random.h>
  23. #include <linux/jhash.h>
  24. #include <linux/err.h>
  25. #include <linux/percpu.h>
  26. #include <linux/moduleparam.h>
  27. #include <linux/notifier.h>
  28. #include <linux/kernel.h>
  29. #include <linux/netdevice.h>
  30. #include <linux/socket.h>
  31. #include <linux/mm.h>
  32. #include <linux/nsproxy.h>
  33. #include <linux/rculist_nulls.h>
  34. #include <net/netfilter/nf_conntrack.h>
  35. #include <net/netfilter/nf_conntrack_l3proto.h>
  36. #include <net/netfilter/nf_conntrack_l4proto.h>
  37. #include <net/netfilter/nf_conntrack_expect.h>
  38. #include <net/netfilter/nf_conntrack_helper.h>
  39. #include <net/netfilter/nf_conntrack_seqadj.h>
  40. #include <net/netfilter/nf_conntrack_core.h>
  41. #include <net/netfilter/nf_conntrack_extend.h>
  42. #include <net/netfilter/nf_conntrack_acct.h>
  43. #include <net/netfilter/nf_conntrack_ecache.h>
  44. #include <net/netfilter/nf_conntrack_zones.h>
  45. #include <net/netfilter/nf_conntrack_timestamp.h>
  46. #include <net/netfilter/nf_conntrack_timeout.h>
  47. #include <net/netfilter/nf_conntrack_labels.h>
  48. #include <net/netfilter/nf_conntrack_synproxy.h>
  49. #include <net/netfilter/nf_nat.h>
  50. #include <net/netfilter/nf_nat_core.h>
  51. #include <net/netfilter/nf_nat_helper.h>
  52. #define NF_CONNTRACK_VERSION "0.5.0"
  53. int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
  54. enum nf_nat_manip_type manip,
  55. const struct nlattr *attr) __read_mostly;
  56. EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
  57. __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
  58. EXPORT_SYMBOL_GPL(nf_conntrack_locks);
  59. __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
  60. EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
  61. static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
  62. {
  63. h1 %= CONNTRACK_LOCKS;
  64. h2 %= CONNTRACK_LOCKS;
  65. spin_unlock(&nf_conntrack_locks[h1]);
  66. if (h1 != h2)
  67. spin_unlock(&nf_conntrack_locks[h2]);
  68. }
  69. /* return true if we need to recompute hashes (in case hash table was resized) */
  70. static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
  71. unsigned int h2, unsigned int sequence)
  72. {
  73. h1 %= CONNTRACK_LOCKS;
  74. h2 %= CONNTRACK_LOCKS;
  75. if (h1 <= h2) {
  76. spin_lock(&nf_conntrack_locks[h1]);
  77. if (h1 != h2)
  78. spin_lock_nested(&nf_conntrack_locks[h2],
  79. SINGLE_DEPTH_NESTING);
  80. } else {
  81. spin_lock(&nf_conntrack_locks[h2]);
  82. spin_lock_nested(&nf_conntrack_locks[h1],
  83. SINGLE_DEPTH_NESTING);
  84. }
  85. if (read_seqcount_retry(&net->ct.generation, sequence)) {
  86. nf_conntrack_double_unlock(h1, h2);
  87. return true;
  88. }
  89. return false;
  90. }
  91. static void nf_conntrack_all_lock(void)
  92. {
  93. int i;
  94. for (i = 0; i < CONNTRACK_LOCKS; i++)
  95. spin_lock_nested(&nf_conntrack_locks[i], i);
  96. }
  97. static void nf_conntrack_all_unlock(void)
  98. {
  99. int i;
  100. for (i = 0; i < CONNTRACK_LOCKS; i++)
  101. spin_unlock(&nf_conntrack_locks[i]);
  102. }
  103. unsigned int nf_conntrack_htable_size __read_mostly;
  104. EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
  105. unsigned int nf_conntrack_max __read_mostly;
  106. EXPORT_SYMBOL_GPL(nf_conntrack_max);
  107. DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
  108. EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
  109. unsigned int nf_conntrack_hash_rnd __read_mostly;
  110. EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
  111. static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
  112. {
  113. unsigned int n;
  114. /* The direction must be ignored, so we hash everything up to the
  115. * destination ports (which is a multiple of 4) and treat the last
  116. * three bytes manually.
  117. */
  118. n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
  119. return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
  120. (((__force __u16)tuple->dst.u.all << 16) |
  121. tuple->dst.protonum));
  122. }
  123. static u32 __hash_bucket(u32 hash, unsigned int size)
  124. {
  125. return reciprocal_scale(hash, size);
  126. }
  127. static u32 hash_bucket(u32 hash, const struct net *net)
  128. {
  129. return __hash_bucket(hash, net->ct.htable_size);
  130. }
  131. static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
  132. unsigned int size)
  133. {
  134. return __hash_bucket(hash_conntrack_raw(tuple), size);
  135. }
  136. static inline u_int32_t hash_conntrack(const struct net *net,
  137. const struct nf_conntrack_tuple *tuple)
  138. {
  139. return __hash_conntrack(tuple, net->ct.htable_size);
  140. }
  141. bool
  142. nf_ct_get_tuple(const struct sk_buff *skb,
  143. unsigned int nhoff,
  144. unsigned int dataoff,
  145. u_int16_t l3num,
  146. u_int8_t protonum,
  147. struct net *net,
  148. struct nf_conntrack_tuple *tuple,
  149. const struct nf_conntrack_l3proto *l3proto,
  150. const struct nf_conntrack_l4proto *l4proto)
  151. {
  152. memset(tuple, 0, sizeof(*tuple));
  153. tuple->src.l3num = l3num;
  154. if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
  155. return false;
  156. tuple->dst.protonum = protonum;
  157. tuple->dst.dir = IP_CT_DIR_ORIGINAL;
  158. return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
  159. }
  160. EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
  161. bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
  162. u_int16_t l3num,
  163. struct net *net, struct nf_conntrack_tuple *tuple)
  164. {
  165. struct nf_conntrack_l3proto *l3proto;
  166. struct nf_conntrack_l4proto *l4proto;
  167. unsigned int protoff;
  168. u_int8_t protonum;
  169. int ret;
  170. rcu_read_lock();
  171. l3proto = __nf_ct_l3proto_find(l3num);
  172. ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
  173. if (ret != NF_ACCEPT) {
  174. rcu_read_unlock();
  175. return false;
  176. }
  177. l4proto = __nf_ct_l4proto_find(l3num, protonum);
  178. ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
  179. l3proto, l4proto);
  180. rcu_read_unlock();
  181. return ret;
  182. }
  183. EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
  184. bool
  185. nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
  186. const struct nf_conntrack_tuple *orig,
  187. const struct nf_conntrack_l3proto *l3proto,
  188. const struct nf_conntrack_l4proto *l4proto)
  189. {
  190. memset(inverse, 0, sizeof(*inverse));
  191. inverse->src.l3num = orig->src.l3num;
  192. if (l3proto->invert_tuple(inverse, orig) == 0)
  193. return false;
  194. inverse->dst.dir = !orig->dst.dir;
  195. inverse->dst.protonum = orig->dst.protonum;
  196. return l4proto->invert_tuple(inverse, orig);
  197. }
  198. EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
  199. static void
  200. clean_from_lists(struct nf_conn *ct)
  201. {
  202. pr_debug("clean_from_lists(%p)\n", ct);
  203. hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
  204. hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
  205. /* Destroy all pending expectations */
  206. nf_ct_remove_expectations(ct);
  207. }
  208. /* must be called with local_bh_disable */
  209. static void nf_ct_add_to_dying_list(struct nf_conn *ct)
  210. {
  211. struct ct_pcpu *pcpu;
  212. /* add this conntrack to the (per cpu) dying list */
  213. ct->cpu = smp_processor_id();
  214. pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
  215. spin_lock(&pcpu->lock);
  216. hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
  217. &pcpu->dying);
  218. spin_unlock(&pcpu->lock);
  219. }
  220. /* must be called with local_bh_disable */
  221. static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
  222. {
  223. struct ct_pcpu *pcpu;
  224. /* add this conntrack to the (per cpu) unconfirmed list */
  225. ct->cpu = smp_processor_id();
  226. pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
  227. spin_lock(&pcpu->lock);
  228. hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
  229. &pcpu->unconfirmed);
  230. spin_unlock(&pcpu->lock);
  231. }
  232. /* must be called with local_bh_disable */
  233. static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
  234. {
  235. struct ct_pcpu *pcpu;
  236. /* We overload first tuple to link into unconfirmed or dying list.*/
  237. pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
  238. spin_lock(&pcpu->lock);
  239. BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
  240. hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
  241. spin_unlock(&pcpu->lock);
  242. }
  243. /* Released via destroy_conntrack() */
  244. struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
  245. const struct nf_conntrack_zone *zone,
  246. gfp_t flags)
  247. {
  248. struct nf_conn *tmpl;
  249. tmpl = kzalloc(sizeof(*tmpl), flags);
  250. if (tmpl == NULL)
  251. return NULL;
  252. tmpl->status = IPS_TEMPLATE;
  253. write_pnet(&tmpl->ct_net, net);
  254. if (nf_ct_zone_add(tmpl, flags, zone) < 0)
  255. goto out_free;
  256. atomic_set(&tmpl->ct_general.use, 0);
  257. return tmpl;
  258. out_free:
  259. kfree(tmpl);
  260. return NULL;
  261. }
  262. EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
  263. void nf_ct_tmpl_free(struct nf_conn *tmpl)
  264. {
  265. nf_ct_ext_destroy(tmpl);
  266. nf_ct_ext_free(tmpl);
  267. kfree(tmpl);
  268. }
  269. EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
  270. static void
  271. destroy_conntrack(struct nf_conntrack *nfct)
  272. {
  273. struct nf_conn *ct = (struct nf_conn *)nfct;
  274. struct net *net = nf_ct_net(ct);
  275. struct nf_conntrack_l4proto *l4proto;
  276. pr_debug("destroy_conntrack(%p)\n", ct);
  277. NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
  278. NF_CT_ASSERT(!timer_pending(&ct->timeout));
  279. if (unlikely(nf_ct_is_template(ct))) {
  280. nf_ct_tmpl_free(ct);
  281. return;
  282. }
  283. rcu_read_lock();
  284. l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
  285. if (l4proto && l4proto->destroy)
  286. l4proto->destroy(ct);
  287. rcu_read_unlock();
  288. local_bh_disable();
  289. /* Expectations will have been removed in clean_from_lists,
  290. * except TFTP can create an expectation on the first packet,
  291. * before connection is in the list, so we need to clean here,
  292. * too.
  293. */
  294. nf_ct_remove_expectations(ct);
  295. nf_ct_del_from_dying_or_unconfirmed_list(ct);
  296. NF_CT_STAT_INC(net, delete);
  297. local_bh_enable();
  298. if (ct->master)
  299. nf_ct_put(ct->master);
  300. pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
  301. nf_conntrack_free(ct);
  302. }
  303. static void nf_ct_delete_from_lists(struct nf_conn *ct)
  304. {
  305. struct net *net = nf_ct_net(ct);
  306. unsigned int hash, reply_hash;
  307. unsigned int sequence;
  308. nf_ct_helper_destroy(ct);
  309. local_bh_disable();
  310. do {
  311. sequence = read_seqcount_begin(&net->ct.generation);
  312. hash = hash_conntrack(net,
  313. &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
  314. reply_hash = hash_conntrack(net,
  315. &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
  316. } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  317. clean_from_lists(ct);
  318. nf_conntrack_double_unlock(hash, reply_hash);
  319. nf_ct_add_to_dying_list(ct);
  320. NF_CT_STAT_INC(net, delete_list);
  321. local_bh_enable();
  322. }
  323. bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
  324. {
  325. struct nf_conn_tstamp *tstamp;
  326. tstamp = nf_conn_tstamp_find(ct);
  327. if (tstamp && tstamp->stop == 0)
  328. tstamp->stop = ktime_get_real_ns();
  329. if (nf_ct_is_dying(ct))
  330. goto delete;
  331. if (nf_conntrack_event_report(IPCT_DESTROY, ct,
  332. portid, report) < 0) {
  333. /* destroy event was not delivered */
  334. nf_ct_delete_from_lists(ct);
  335. nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
  336. return false;
  337. }
  338. nf_conntrack_ecache_work(nf_ct_net(ct));
  339. set_bit(IPS_DYING_BIT, &ct->status);
  340. delete:
  341. nf_ct_delete_from_lists(ct);
  342. nf_ct_put(ct);
  343. return true;
  344. }
  345. EXPORT_SYMBOL_GPL(nf_ct_delete);
  346. static void death_by_timeout(unsigned long ul_conntrack)
  347. {
  348. nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
  349. }
  350. static inline bool
  351. nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
  352. const struct nf_conntrack_tuple *tuple,
  353. const struct nf_conntrack_zone *zone)
  354. {
  355. struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
  356. /* A conntrack can be recreated with the equal tuple,
  357. * so we need to check that the conntrack is confirmed
  358. */
  359. return nf_ct_tuple_equal(tuple, &h->tuple) &&
  360. nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
  361. nf_ct_is_confirmed(ct);
  362. }
  363. /*
  364. * Warning :
  365. * - Caller must take a reference on returned object
  366. * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
  367. */
  368. static struct nf_conntrack_tuple_hash *
  369. ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
  370. const struct nf_conntrack_tuple *tuple, u32 hash)
  371. {
  372. struct nf_conntrack_tuple_hash *h;
  373. struct hlist_nulls_node *n;
  374. unsigned int bucket = hash_bucket(hash, net);
  375. /* Disable BHs the entire time since we normally need to disable them
  376. * at least once for the stats anyway.
  377. */
  378. local_bh_disable();
  379. begin:
  380. hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
  381. if (nf_ct_key_equal(h, tuple, zone)) {
  382. NF_CT_STAT_INC(net, found);
  383. local_bh_enable();
  384. return h;
  385. }
  386. NF_CT_STAT_INC(net, searched);
  387. }
  388. /*
  389. * if the nulls value we got at the end of this lookup is
  390. * not the expected one, we must restart lookup.
  391. * We probably met an item that was moved to another chain.
  392. */
  393. if (get_nulls_value(n) != bucket) {
  394. NF_CT_STAT_INC(net, search_restart);
  395. goto begin;
  396. }
  397. local_bh_enable();
  398. return NULL;
  399. }
  400. /* Find a connection corresponding to a tuple. */
  401. static struct nf_conntrack_tuple_hash *
  402. __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
  403. const struct nf_conntrack_tuple *tuple, u32 hash)
  404. {
  405. struct nf_conntrack_tuple_hash *h;
  406. struct nf_conn *ct;
  407. rcu_read_lock();
  408. begin:
  409. h = ____nf_conntrack_find(net, zone, tuple, hash);
  410. if (h) {
  411. ct = nf_ct_tuplehash_to_ctrack(h);
  412. if (unlikely(nf_ct_is_dying(ct) ||
  413. !atomic_inc_not_zero(&ct->ct_general.use)))
  414. h = NULL;
  415. else {
  416. if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
  417. nf_ct_put(ct);
  418. goto begin;
  419. }
  420. }
  421. }
  422. rcu_read_unlock();
  423. return h;
  424. }
  425. struct nf_conntrack_tuple_hash *
  426. nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
  427. const struct nf_conntrack_tuple *tuple)
  428. {
  429. return __nf_conntrack_find_get(net, zone, tuple,
  430. hash_conntrack_raw(tuple));
  431. }
  432. EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
  433. static void __nf_conntrack_hash_insert(struct nf_conn *ct,
  434. unsigned int hash,
  435. unsigned int reply_hash)
  436. {
  437. struct net *net = nf_ct_net(ct);
  438. hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
  439. &net->ct.hash[hash]);
  440. hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
  441. &net->ct.hash[reply_hash]);
  442. }
  443. int
  444. nf_conntrack_hash_check_insert(struct nf_conn *ct)
  445. {
  446. const struct nf_conntrack_zone *zone;
  447. struct net *net = nf_ct_net(ct);
  448. unsigned int hash, reply_hash;
  449. struct nf_conntrack_tuple_hash *h;
  450. struct hlist_nulls_node *n;
  451. unsigned int sequence;
  452. zone = nf_ct_zone(ct);
  453. local_bh_disable();
  454. do {
  455. sequence = read_seqcount_begin(&net->ct.generation);
  456. hash = hash_conntrack(net,
  457. &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
  458. reply_hash = hash_conntrack(net,
  459. &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
  460. } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  461. /* See if there's one in the list already, including reverse */
  462. hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
  463. if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
  464. &h->tuple) &&
  465. nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
  466. NF_CT_DIRECTION(h)))
  467. goto out;
  468. hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
  469. if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
  470. &h->tuple) &&
  471. nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
  472. NF_CT_DIRECTION(h)))
  473. goto out;
  474. add_timer(&ct->timeout);
  475. smp_wmb();
  476. /* The caller holds a reference to this object */
  477. atomic_set(&ct->ct_general.use, 2);
  478. __nf_conntrack_hash_insert(ct, hash, reply_hash);
  479. nf_conntrack_double_unlock(hash, reply_hash);
  480. NF_CT_STAT_INC(net, insert);
  481. local_bh_enable();
  482. return 0;
  483. out:
  484. nf_conntrack_double_unlock(hash, reply_hash);
  485. NF_CT_STAT_INC(net, insert_failed);
  486. local_bh_enable();
  487. return -EEXIST;
  488. }
  489. EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
  490. /* Confirm a connection given skb; places it in hash table */
  491. int
  492. __nf_conntrack_confirm(struct sk_buff *skb)
  493. {
  494. const struct nf_conntrack_zone *zone;
  495. unsigned int hash, reply_hash;
  496. struct nf_conntrack_tuple_hash *h;
  497. struct nf_conn *ct;
  498. struct nf_conn_help *help;
  499. struct nf_conn_tstamp *tstamp;
  500. struct hlist_nulls_node *n;
  501. enum ip_conntrack_info ctinfo;
  502. struct net *net;
  503. unsigned int sequence;
  504. ct = nf_ct_get(skb, &ctinfo);
  505. net = nf_ct_net(ct);
  506. /* ipt_REJECT uses nf_conntrack_attach to attach related
  507. ICMP/TCP RST packets in other direction. Actual packet
  508. which created connection will be IP_CT_NEW or for an
  509. expected connection, IP_CT_RELATED. */
  510. if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
  511. return NF_ACCEPT;
  512. zone = nf_ct_zone(ct);
  513. local_bh_disable();
  514. do {
  515. sequence = read_seqcount_begin(&net->ct.generation);
  516. /* reuse the hash saved before */
  517. hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
  518. hash = hash_bucket(hash, net);
  519. reply_hash = hash_conntrack(net,
  520. &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
  521. } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  522. /* We're not in hash table, and we refuse to set up related
  523. * connections for unconfirmed conns. But packet copies and
  524. * REJECT will give spurious warnings here.
  525. */
  526. /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
  527. /* No external references means no one else could have
  528. * confirmed us.
  529. */
  530. NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
  531. pr_debug("Confirming conntrack %p\n", ct);
  532. /* We have to check the DYING flag after unlink to prevent
  533. * a race against nf_ct_get_next_corpse() possibly called from
  534. * user context, else we insert an already 'dead' hash, blocking
  535. * further use of that particular connection -JM.
  536. */
  537. nf_ct_del_from_dying_or_unconfirmed_list(ct);
  538. if (unlikely(nf_ct_is_dying(ct)))
  539. goto out;
  540. /* See if there's one in the list already, including reverse:
  541. NAT could have grabbed it without realizing, since we're
  542. not in the hash. If there is, we lost race. */
  543. hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
  544. if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
  545. &h->tuple) &&
  546. nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
  547. NF_CT_DIRECTION(h)))
  548. goto out;
  549. hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
  550. if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
  551. &h->tuple) &&
  552. nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
  553. NF_CT_DIRECTION(h)))
  554. goto out;
  555. /* Timer relative to confirmation time, not original
  556. setting time, otherwise we'd get timer wrap in
  557. weird delay cases. */
  558. ct->timeout.expires += jiffies;
  559. add_timer(&ct->timeout);
  560. atomic_inc(&ct->ct_general.use);
  561. ct->status |= IPS_CONFIRMED;
  562. /* set conntrack timestamp, if enabled. */
  563. tstamp = nf_conn_tstamp_find(ct);
  564. if (tstamp) {
  565. if (skb->tstamp.tv64 == 0)
  566. __net_timestamp(skb);
  567. tstamp->start = ktime_to_ns(skb->tstamp);
  568. }
  569. /* Since the lookup is lockless, hash insertion must be done after
  570. * starting the timer and setting the CONFIRMED bit. The RCU barriers
  571. * guarantee that no other CPU can find the conntrack before the above
  572. * stores are visible.
  573. */
  574. __nf_conntrack_hash_insert(ct, hash, reply_hash);
  575. nf_conntrack_double_unlock(hash, reply_hash);
  576. NF_CT_STAT_INC(net, insert);
  577. local_bh_enable();
  578. help = nfct_help(ct);
  579. if (help && help->helper)
  580. nf_conntrack_event_cache(IPCT_HELPER, ct);
  581. nf_conntrack_event_cache(master_ct(ct) ?
  582. IPCT_RELATED : IPCT_NEW, ct);
  583. return NF_ACCEPT;
  584. out:
  585. nf_ct_add_to_dying_list(ct);
  586. nf_conntrack_double_unlock(hash, reply_hash);
  587. NF_CT_STAT_INC(net, insert_failed);
  588. local_bh_enable();
  589. return NF_DROP;
  590. }
  591. EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
  592. /* Returns true if a connection correspondings to the tuple (required
  593. for NAT). */
  594. int
  595. nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
  596. const struct nf_conn *ignored_conntrack)
  597. {
  598. struct net *net = nf_ct_net(ignored_conntrack);
  599. const struct nf_conntrack_zone *zone;
  600. struct nf_conntrack_tuple_hash *h;
  601. struct hlist_nulls_node *n;
  602. struct nf_conn *ct;
  603. unsigned int hash;
  604. zone = nf_ct_zone(ignored_conntrack);
  605. hash = hash_conntrack(net, tuple);
  606. /* Disable BHs the entire time since we need to disable them at
  607. * least once for the stats anyway.
  608. */
  609. rcu_read_lock_bh();
  610. begin:
  611. hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
  612. ct = nf_ct_tuplehash_to_ctrack(h);
  613. if (ct != ignored_conntrack &&
  614. nf_ct_tuple_equal(tuple, &h->tuple) &&
  615. nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
  616. NF_CT_STAT_INC(net, found);
  617. rcu_read_unlock_bh();
  618. return 1;
  619. }
  620. NF_CT_STAT_INC(net, searched);
  621. }
  622. if (get_nulls_value(n) != hash) {
  623. NF_CT_STAT_INC(net, search_restart);
  624. goto begin;
  625. }
  626. rcu_read_unlock_bh();
  627. return 0;
  628. }
  629. EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
  630. #define NF_CT_EVICTION_RANGE 8
  631. /* There's a small race here where we may free a just-assured
  632. connection. Too bad: we're in trouble anyway. */
  633. static noinline int early_drop(struct net *net, unsigned int _hash)
  634. {
  635. /* Use oldest entry, which is roughly LRU */
  636. struct nf_conntrack_tuple_hash *h;
  637. struct nf_conn *ct = NULL, *tmp;
  638. struct hlist_nulls_node *n;
  639. unsigned int i = 0, cnt = 0;
  640. int dropped = 0;
  641. unsigned int hash, sequence;
  642. spinlock_t *lockp;
  643. local_bh_disable();
  644. restart:
  645. sequence = read_seqcount_begin(&net->ct.generation);
  646. hash = hash_bucket(_hash, net);
  647. for (; i < net->ct.htable_size; i++) {
  648. lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
  649. spin_lock(lockp);
  650. if (read_seqcount_retry(&net->ct.generation, sequence)) {
  651. spin_unlock(lockp);
  652. goto restart;
  653. }
  654. hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
  655. hnnode) {
  656. tmp = nf_ct_tuplehash_to_ctrack(h);
  657. if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
  658. !nf_ct_is_dying(tmp) &&
  659. atomic_inc_not_zero(&tmp->ct_general.use)) {
  660. ct = tmp;
  661. break;
  662. }
  663. cnt++;
  664. }
  665. hash = (hash + 1) % net->ct.htable_size;
  666. spin_unlock(lockp);
  667. if (ct || cnt >= NF_CT_EVICTION_RANGE)
  668. break;
  669. }
  670. local_bh_enable();
  671. if (!ct)
  672. return dropped;
  673. if (del_timer(&ct->timeout)) {
  674. if (nf_ct_delete(ct, 0, 0)) {
  675. dropped = 1;
  676. NF_CT_STAT_INC_ATOMIC(net, early_drop);
  677. }
  678. }
  679. nf_ct_put(ct);
  680. return dropped;
  681. }
  682. void init_nf_conntrack_hash_rnd(void)
  683. {
  684. unsigned int rand;
  685. /*
  686. * Why not initialize nf_conntrack_rnd in a "init()" function ?
  687. * Because there isn't enough entropy when system initializing,
  688. * and we initialize it as late as possible.
  689. */
  690. do {
  691. get_random_bytes(&rand, sizeof(rand));
  692. } while (!rand);
  693. cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
  694. }
  695. static struct nf_conn *
  696. __nf_conntrack_alloc(struct net *net,
  697. const struct nf_conntrack_zone *zone,
  698. const struct nf_conntrack_tuple *orig,
  699. const struct nf_conntrack_tuple *repl,
  700. gfp_t gfp, u32 hash)
  701. {
  702. struct nf_conn *ct;
  703. if (unlikely(!nf_conntrack_hash_rnd)) {
  704. init_nf_conntrack_hash_rnd();
  705. /* recompute the hash as nf_conntrack_hash_rnd is initialized */
  706. hash = hash_conntrack_raw(orig);
  707. }
  708. /* We don't want any race condition at early drop stage */
  709. atomic_inc(&net->ct.count);
  710. if (nf_conntrack_max &&
  711. unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
  712. if (!early_drop(net, hash)) {
  713. atomic_dec(&net->ct.count);
  714. net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
  715. return ERR_PTR(-ENOMEM);
  716. }
  717. }
  718. /*
  719. * Do not use kmem_cache_zalloc(), as this cache uses
  720. * SLAB_DESTROY_BY_RCU.
  721. */
  722. ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
  723. if (ct == NULL)
  724. goto out;
  725. spin_lock_init(&ct->lock);
  726. ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
  727. ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
  728. ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
  729. /* save hash for reusing when confirming */
  730. *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
  731. ct->status = 0;
  732. /* Don't set timer yet: wait for confirmation */
  733. setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
  734. write_pnet(&ct->ct_net, net);
  735. memset(&ct->__nfct_init_offset[0], 0,
  736. offsetof(struct nf_conn, proto) -
  737. offsetof(struct nf_conn, __nfct_init_offset[0]));
  738. if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
  739. goto out_free;
  740. /* Because we use RCU lookups, we set ct_general.use to zero before
  741. * this is inserted in any list.
  742. */
  743. atomic_set(&ct->ct_general.use, 0);
  744. return ct;
  745. out_free:
  746. kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
  747. out:
  748. atomic_dec(&net->ct.count);
  749. return ERR_PTR(-ENOMEM);
  750. }
  751. struct nf_conn *nf_conntrack_alloc(struct net *net,
  752. const struct nf_conntrack_zone *zone,
  753. const struct nf_conntrack_tuple *orig,
  754. const struct nf_conntrack_tuple *repl,
  755. gfp_t gfp)
  756. {
  757. return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
  758. }
  759. EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
  760. void nf_conntrack_free(struct nf_conn *ct)
  761. {
  762. struct net *net = nf_ct_net(ct);
  763. /* A freed object has refcnt == 0, that's
  764. * the golden rule for SLAB_DESTROY_BY_RCU
  765. */
  766. NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
  767. nf_ct_ext_destroy(ct);
  768. nf_ct_ext_free(ct);
  769. kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
  770. smp_mb__before_atomic();
  771. atomic_dec(&net->ct.count);
  772. }
  773. EXPORT_SYMBOL_GPL(nf_conntrack_free);
  774. /* Allocate a new conntrack: we return -ENOMEM if classification
  775. failed due to stress. Otherwise it really is unclassifiable. */
  776. static struct nf_conntrack_tuple_hash *
  777. init_conntrack(struct net *net, struct nf_conn *tmpl,
  778. const struct nf_conntrack_tuple *tuple,
  779. struct nf_conntrack_l3proto *l3proto,
  780. struct nf_conntrack_l4proto *l4proto,
  781. struct sk_buff *skb,
  782. unsigned int dataoff, u32 hash)
  783. {
  784. struct nf_conn *ct;
  785. struct nf_conn_help *help;
  786. struct nf_conntrack_tuple repl_tuple;
  787. struct nf_conntrack_ecache *ecache;
  788. struct nf_conntrack_expect *exp = NULL;
  789. const struct nf_conntrack_zone *zone;
  790. struct nf_conn_timeout *timeout_ext;
  791. struct nf_conntrack_zone tmp;
  792. unsigned int *timeouts;
  793. if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
  794. pr_debug("Can't invert tuple.\n");
  795. return NULL;
  796. }
  797. zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
  798. ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
  799. hash);
  800. if (IS_ERR(ct))
  801. return (struct nf_conntrack_tuple_hash *)ct;
  802. if (tmpl && nfct_synproxy(tmpl)) {
  803. nfct_seqadj_ext_add(ct);
  804. nfct_synproxy_ext_add(ct);
  805. }
  806. timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
  807. if (timeout_ext) {
  808. timeouts = nf_ct_timeout_data(timeout_ext);
  809. if (unlikely(!timeouts))
  810. timeouts = l4proto->get_timeouts(net);
  811. } else {
  812. timeouts = l4proto->get_timeouts(net);
  813. }
  814. if (!l4proto->new(ct, skb, dataoff, timeouts)) {
  815. nf_conntrack_free(ct);
  816. pr_debug("init conntrack: can't track with proto module\n");
  817. return NULL;
  818. }
  819. if (timeout_ext)
  820. nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
  821. GFP_ATOMIC);
  822. nf_ct_acct_ext_add(ct, GFP_ATOMIC);
  823. nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
  824. nf_ct_labels_ext_add(ct);
  825. ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
  826. nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
  827. ecache ? ecache->expmask : 0,
  828. GFP_ATOMIC);
  829. local_bh_disable();
  830. if (net->ct.expect_count) {
  831. spin_lock(&nf_conntrack_expect_lock);
  832. exp = nf_ct_find_expectation(net, zone, tuple);
  833. if (exp) {
  834. pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
  835. ct, exp);
  836. /* Welcome, Mr. Bond. We've been expecting you... */
  837. __set_bit(IPS_EXPECTED_BIT, &ct->status);
  838. /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
  839. ct->master = exp->master;
  840. if (exp->helper) {
  841. help = nf_ct_helper_ext_add(ct, exp->helper,
  842. GFP_ATOMIC);
  843. if (help)
  844. rcu_assign_pointer(help->helper, exp->helper);
  845. }
  846. #ifdef CONFIG_NF_CONNTRACK_MARK
  847. ct->mark = exp->master->mark;
  848. #endif
  849. #ifdef CONFIG_NF_CONNTRACK_SECMARK
  850. ct->secmark = exp->master->secmark;
  851. #endif
  852. NF_CT_STAT_INC(net, expect_new);
  853. }
  854. spin_unlock(&nf_conntrack_expect_lock);
  855. }
  856. if (!exp) {
  857. __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
  858. NF_CT_STAT_INC(net, new);
  859. }
  860. /* Now it is inserted into the unconfirmed list, bump refcount */
  861. nf_conntrack_get(&ct->ct_general);
  862. nf_ct_add_to_unconfirmed_list(ct);
  863. local_bh_enable();
  864. if (exp) {
  865. if (exp->expectfn)
  866. exp->expectfn(ct, exp);
  867. nf_ct_expect_put(exp);
  868. }
  869. return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
  870. }
  871. /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
  872. static inline struct nf_conn *
  873. resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
  874. struct sk_buff *skb,
  875. unsigned int dataoff,
  876. u_int16_t l3num,
  877. u_int8_t protonum,
  878. struct nf_conntrack_l3proto *l3proto,
  879. struct nf_conntrack_l4proto *l4proto,
  880. int *set_reply,
  881. enum ip_conntrack_info *ctinfo)
  882. {
  883. const struct nf_conntrack_zone *zone;
  884. struct nf_conntrack_tuple tuple;
  885. struct nf_conntrack_tuple_hash *h;
  886. struct nf_conntrack_zone tmp;
  887. struct nf_conn *ct;
  888. u32 hash;
  889. if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
  890. dataoff, l3num, protonum, net, &tuple, l3proto,
  891. l4proto)) {
  892. pr_debug("resolve_normal_ct: Can't get tuple\n");
  893. return NULL;
  894. }
  895. /* look for tuple match */
  896. zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
  897. hash = hash_conntrack_raw(&tuple);
  898. h = __nf_conntrack_find_get(net, zone, &tuple, hash);
  899. if (!h) {
  900. h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
  901. skb, dataoff, hash);
  902. if (!h)
  903. return NULL;
  904. if (IS_ERR(h))
  905. return (void *)h;
  906. }
  907. ct = nf_ct_tuplehash_to_ctrack(h);
  908. /* It exists; we have (non-exclusive) reference. */
  909. if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
  910. *ctinfo = IP_CT_ESTABLISHED_REPLY;
  911. /* Please set reply bit if this packet OK */
  912. *set_reply = 1;
  913. } else {
  914. /* Once we've had two way comms, always ESTABLISHED. */
  915. if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
  916. pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
  917. *ctinfo = IP_CT_ESTABLISHED;
  918. } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
  919. pr_debug("nf_conntrack_in: related packet for %p\n",
  920. ct);
  921. *ctinfo = IP_CT_RELATED;
  922. } else {
  923. pr_debug("nf_conntrack_in: new packet for %p\n", ct);
  924. *ctinfo = IP_CT_NEW;
  925. }
  926. *set_reply = 0;
  927. }
  928. skb->nfct = &ct->ct_general;
  929. skb->nfctinfo = *ctinfo;
  930. return ct;
  931. }
  932. unsigned int
  933. nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
  934. struct sk_buff *skb)
  935. {
  936. struct nf_conn *ct, *tmpl = NULL;
  937. enum ip_conntrack_info ctinfo;
  938. struct nf_conntrack_l3proto *l3proto;
  939. struct nf_conntrack_l4proto *l4proto;
  940. unsigned int *timeouts;
  941. unsigned int dataoff;
  942. u_int8_t protonum;
  943. int set_reply = 0;
  944. int ret;
  945. if (skb->nfct) {
  946. /* Previously seen (loopback or untracked)? Ignore. */
  947. tmpl = (struct nf_conn *)skb->nfct;
  948. if (!nf_ct_is_template(tmpl)) {
  949. NF_CT_STAT_INC_ATOMIC(net, ignore);
  950. return NF_ACCEPT;
  951. }
  952. skb->nfct = NULL;
  953. }
  954. /* rcu_read_lock()ed by nf_hook_slow */
  955. l3proto = __nf_ct_l3proto_find(pf);
  956. ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
  957. &dataoff, &protonum);
  958. if (ret <= 0) {
  959. pr_debug("not prepared to track yet or error occurred\n");
  960. NF_CT_STAT_INC_ATOMIC(net, error);
  961. NF_CT_STAT_INC_ATOMIC(net, invalid);
  962. ret = -ret;
  963. goto out;
  964. }
  965. l4proto = __nf_ct_l4proto_find(pf, protonum);
  966. /* It may be an special packet, error, unclean...
  967. * inverse of the return code tells to the netfilter
  968. * core what to do with the packet. */
  969. if (l4proto->error != NULL) {
  970. ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
  971. pf, hooknum);
  972. if (ret <= 0) {
  973. NF_CT_STAT_INC_ATOMIC(net, error);
  974. NF_CT_STAT_INC_ATOMIC(net, invalid);
  975. ret = -ret;
  976. goto out;
  977. }
  978. /* ICMP[v6] protocol trackers may assign one conntrack. */
  979. if (skb->nfct)
  980. goto out;
  981. }
  982. ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
  983. l3proto, l4proto, &set_reply, &ctinfo);
  984. if (!ct) {
  985. /* Not valid part of a connection */
  986. NF_CT_STAT_INC_ATOMIC(net, invalid);
  987. ret = NF_ACCEPT;
  988. goto out;
  989. }
  990. if (IS_ERR(ct)) {
  991. /* Too stressed to deal. */
  992. NF_CT_STAT_INC_ATOMIC(net, drop);
  993. ret = NF_DROP;
  994. goto out;
  995. }
  996. NF_CT_ASSERT(skb->nfct);
  997. /* Decide what timeout policy we want to apply to this flow. */
  998. timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
  999. ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
  1000. if (ret <= 0) {
  1001. /* Invalid: inverse of the return code tells
  1002. * the netfilter core what to do */
  1003. pr_debug("nf_conntrack_in: Can't track with proto module\n");
  1004. nf_conntrack_put(skb->nfct);
  1005. skb->nfct = NULL;
  1006. NF_CT_STAT_INC_ATOMIC(net, invalid);
  1007. if (ret == -NF_DROP)
  1008. NF_CT_STAT_INC_ATOMIC(net, drop);
  1009. ret = -ret;
  1010. goto out;
  1011. }
  1012. if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
  1013. nf_conntrack_event_cache(IPCT_REPLY, ct);
  1014. out:
  1015. if (tmpl) {
  1016. /* Special case: we have to repeat this hook, assign the
  1017. * template again to this packet. We assume that this packet
  1018. * has no conntrack assigned. This is used by nf_ct_tcp. */
  1019. if (ret == NF_REPEAT)
  1020. skb->nfct = (struct nf_conntrack *)tmpl;
  1021. else
  1022. nf_ct_put(tmpl);
  1023. }
  1024. return ret;
  1025. }
  1026. EXPORT_SYMBOL_GPL(nf_conntrack_in);
  1027. bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
  1028. const struct nf_conntrack_tuple *orig)
  1029. {
  1030. bool ret;
  1031. rcu_read_lock();
  1032. ret = nf_ct_invert_tuple(inverse, orig,
  1033. __nf_ct_l3proto_find(orig->src.l3num),
  1034. __nf_ct_l4proto_find(orig->src.l3num,
  1035. orig->dst.protonum));
  1036. rcu_read_unlock();
  1037. return ret;
  1038. }
  1039. EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
  1040. /* Alter reply tuple (maybe alter helper). This is for NAT, and is
  1041. implicitly racy: see __nf_conntrack_confirm */
  1042. void nf_conntrack_alter_reply(struct nf_conn *ct,
  1043. const struct nf_conntrack_tuple *newreply)
  1044. {
  1045. struct nf_conn_help *help = nfct_help(ct);
  1046. /* Should be unconfirmed, so not in hash table yet */
  1047. NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
  1048. pr_debug("Altering reply tuple of %p to ", ct);
  1049. nf_ct_dump_tuple(newreply);
  1050. ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
  1051. if (ct->master || (help && !hlist_empty(&help->expectations)))
  1052. return;
  1053. rcu_read_lock();
  1054. __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
  1055. rcu_read_unlock();
  1056. }
  1057. EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
  1058. /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
  1059. void __nf_ct_refresh_acct(struct nf_conn *ct,
  1060. enum ip_conntrack_info ctinfo,
  1061. const struct sk_buff *skb,
  1062. unsigned long extra_jiffies,
  1063. int do_acct)
  1064. {
  1065. NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
  1066. NF_CT_ASSERT(skb);
  1067. /* Only update if this is not a fixed timeout */
  1068. if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
  1069. goto acct;
  1070. /* If not in hash table, timer will not be active yet */
  1071. if (!nf_ct_is_confirmed(ct)) {
  1072. ct->timeout.expires = extra_jiffies;
  1073. } else {
  1074. unsigned long newtime = jiffies + extra_jiffies;
  1075. /* Only update the timeout if the new timeout is at least
  1076. HZ jiffies from the old timeout. Need del_timer for race
  1077. avoidance (may already be dying). */
  1078. if (newtime - ct->timeout.expires >= HZ)
  1079. mod_timer_pending(&ct->timeout, newtime);
  1080. }
  1081. acct:
  1082. if (do_acct) {
  1083. struct nf_conn_acct *acct;
  1084. acct = nf_conn_acct_find(ct);
  1085. if (acct) {
  1086. struct nf_conn_counter *counter = acct->counter;
  1087. atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
  1088. atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
  1089. }
  1090. }
  1091. }
  1092. EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
  1093. bool __nf_ct_kill_acct(struct nf_conn *ct,
  1094. enum ip_conntrack_info ctinfo,
  1095. const struct sk_buff *skb,
  1096. int do_acct)
  1097. {
  1098. if (do_acct) {
  1099. struct nf_conn_acct *acct;
  1100. acct = nf_conn_acct_find(ct);
  1101. if (acct) {
  1102. struct nf_conn_counter *counter = acct->counter;
  1103. atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
  1104. atomic64_add(skb->len - skb_network_offset(skb),
  1105. &counter[CTINFO2DIR(ctinfo)].bytes);
  1106. }
  1107. }
  1108. if (del_timer(&ct->timeout)) {
  1109. ct->timeout.function((unsigned long)ct);
  1110. return true;
  1111. }
  1112. return false;
  1113. }
  1114. EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
  1115. #ifdef CONFIG_NF_CONNTRACK_ZONES
  1116. static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
  1117. .len = sizeof(struct nf_conntrack_zone),
  1118. .align = __alignof__(struct nf_conntrack_zone),
  1119. .id = NF_CT_EXT_ZONE,
  1120. };
  1121. #endif
  1122. #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
  1123. #include <linux/netfilter/nfnetlink.h>
  1124. #include <linux/netfilter/nfnetlink_conntrack.h>
  1125. #include <linux/mutex.h>
  1126. /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
  1127. * in ip_conntrack_core, since we don't want the protocols to autoload
  1128. * or depend on ctnetlink */
  1129. int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
  1130. const struct nf_conntrack_tuple *tuple)
  1131. {
  1132. if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
  1133. nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
  1134. goto nla_put_failure;
  1135. return 0;
  1136. nla_put_failure:
  1137. return -1;
  1138. }
  1139. EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
  1140. const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
  1141. [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
  1142. [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
  1143. };
  1144. EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
  1145. int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
  1146. struct nf_conntrack_tuple *t)
  1147. {
  1148. if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
  1149. return -EINVAL;
  1150. t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
  1151. t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
  1152. return 0;
  1153. }
  1154. EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
  1155. int nf_ct_port_nlattr_tuple_size(void)
  1156. {
  1157. return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
  1158. }
  1159. EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
  1160. #endif
  1161. /* Used by ipt_REJECT and ip6t_REJECT. */
  1162. static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
  1163. {
  1164. struct nf_conn *ct;
  1165. enum ip_conntrack_info ctinfo;
  1166. /* This ICMP is in reverse direction to the packet which caused it */
  1167. ct = nf_ct_get(skb, &ctinfo);
  1168. if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
  1169. ctinfo = IP_CT_RELATED_REPLY;
  1170. else
  1171. ctinfo = IP_CT_RELATED;
  1172. /* Attach to new skbuff, and increment count */
  1173. nskb->nfct = &ct->ct_general;
  1174. nskb->nfctinfo = ctinfo;
  1175. nf_conntrack_get(nskb->nfct);
  1176. }
  1177. /* Bring out ya dead! */
  1178. static struct nf_conn *
  1179. get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
  1180. void *data, unsigned int *bucket)
  1181. {
  1182. struct nf_conntrack_tuple_hash *h;
  1183. struct nf_conn *ct;
  1184. struct hlist_nulls_node *n;
  1185. int cpu;
  1186. spinlock_t *lockp;
  1187. for (; *bucket < net->ct.htable_size; (*bucket)++) {
  1188. lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
  1189. local_bh_disable();
  1190. spin_lock(lockp);
  1191. if (*bucket < net->ct.htable_size) {
  1192. hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
  1193. if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
  1194. continue;
  1195. ct = nf_ct_tuplehash_to_ctrack(h);
  1196. if (iter(ct, data))
  1197. goto found;
  1198. }
  1199. }
  1200. spin_unlock(lockp);
  1201. local_bh_enable();
  1202. }
  1203. for_each_possible_cpu(cpu) {
  1204. struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
  1205. spin_lock_bh(&pcpu->lock);
  1206. hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
  1207. ct = nf_ct_tuplehash_to_ctrack(h);
  1208. if (iter(ct, data))
  1209. set_bit(IPS_DYING_BIT, &ct->status);
  1210. }
  1211. spin_unlock_bh(&pcpu->lock);
  1212. }
  1213. return NULL;
  1214. found:
  1215. atomic_inc(&ct->ct_general.use);
  1216. spin_unlock(lockp);
  1217. local_bh_enable();
  1218. return ct;
  1219. }
  1220. void nf_ct_iterate_cleanup(struct net *net,
  1221. int (*iter)(struct nf_conn *i, void *data),
  1222. void *data, u32 portid, int report)
  1223. {
  1224. struct nf_conn *ct;
  1225. unsigned int bucket = 0;
  1226. while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
  1227. /* Time to push up daises... */
  1228. if (del_timer(&ct->timeout))
  1229. nf_ct_delete(ct, portid, report);
  1230. /* ... else the timer will get him soon. */
  1231. nf_ct_put(ct);
  1232. }
  1233. }
  1234. EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
  1235. static int kill_all(struct nf_conn *i, void *data)
  1236. {
  1237. return 1;
  1238. }
  1239. void nf_ct_free_hashtable(void *hash, unsigned int size)
  1240. {
  1241. if (is_vmalloc_addr(hash))
  1242. vfree(hash);
  1243. else
  1244. free_pages((unsigned long)hash,
  1245. get_order(sizeof(struct hlist_head) * size));
  1246. }
  1247. EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
  1248. static int untrack_refs(void)
  1249. {
  1250. int cnt = 0, cpu;
  1251. for_each_possible_cpu(cpu) {
  1252. struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
  1253. cnt += atomic_read(&ct->ct_general.use) - 1;
  1254. }
  1255. return cnt;
  1256. }
  1257. void nf_conntrack_cleanup_start(void)
  1258. {
  1259. RCU_INIT_POINTER(ip_ct_attach, NULL);
  1260. }
  1261. void nf_conntrack_cleanup_end(void)
  1262. {
  1263. RCU_INIT_POINTER(nf_ct_destroy, NULL);
  1264. while (untrack_refs() > 0)
  1265. schedule();
  1266. #ifdef CONFIG_NF_CONNTRACK_ZONES
  1267. nf_ct_extend_unregister(&nf_ct_zone_extend);
  1268. #endif
  1269. nf_conntrack_proto_fini();
  1270. nf_conntrack_seqadj_fini();
  1271. nf_conntrack_labels_fini();
  1272. nf_conntrack_helper_fini();
  1273. nf_conntrack_timeout_fini();
  1274. nf_conntrack_ecache_fini();
  1275. nf_conntrack_tstamp_fini();
  1276. nf_conntrack_acct_fini();
  1277. nf_conntrack_expect_fini();
  1278. }
  1279. /*
  1280. * Mishearing the voices in his head, our hero wonders how he's
  1281. * supposed to kill the mall.
  1282. */
  1283. void nf_conntrack_cleanup_net(struct net *net)
  1284. {
  1285. LIST_HEAD(single);
  1286. list_add(&net->exit_list, &single);
  1287. nf_conntrack_cleanup_net_list(&single);
  1288. }
  1289. void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
  1290. {
  1291. int busy;
  1292. struct net *net;
  1293. /*
  1294. * This makes sure all current packets have passed through
  1295. * netfilter framework. Roll on, two-stage module
  1296. * delete...
  1297. */
  1298. synchronize_net();
  1299. i_see_dead_people:
  1300. busy = 0;
  1301. list_for_each_entry(net, net_exit_list, exit_list) {
  1302. nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
  1303. if (atomic_read(&net->ct.count) != 0)
  1304. busy = 1;
  1305. }
  1306. if (busy) {
  1307. schedule();
  1308. goto i_see_dead_people;
  1309. }
  1310. list_for_each_entry(net, net_exit_list, exit_list) {
  1311. nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
  1312. nf_conntrack_proto_pernet_fini(net);
  1313. nf_conntrack_helper_pernet_fini(net);
  1314. nf_conntrack_ecache_pernet_fini(net);
  1315. nf_conntrack_tstamp_pernet_fini(net);
  1316. nf_conntrack_acct_pernet_fini(net);
  1317. nf_conntrack_expect_pernet_fini(net);
  1318. kmem_cache_destroy(net->ct.nf_conntrack_cachep);
  1319. kfree(net->ct.slabname);
  1320. free_percpu(net->ct.stat);
  1321. free_percpu(net->ct.pcpu_lists);
  1322. }
  1323. }
  1324. void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
  1325. {
  1326. struct hlist_nulls_head *hash;
  1327. unsigned int nr_slots, i;
  1328. size_t sz;
  1329. BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
  1330. nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
  1331. sz = nr_slots * sizeof(struct hlist_nulls_head);
  1332. hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
  1333. get_order(sz));
  1334. if (!hash)
  1335. hash = vzalloc(sz);
  1336. if (hash && nulls)
  1337. for (i = 0; i < nr_slots; i++)
  1338. INIT_HLIST_NULLS_HEAD(&hash[i], i);
  1339. return hash;
  1340. }
  1341. EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
  1342. int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
  1343. {
  1344. int i, bucket, rc;
  1345. unsigned int hashsize, old_size;
  1346. struct hlist_nulls_head *hash, *old_hash;
  1347. struct nf_conntrack_tuple_hash *h;
  1348. struct nf_conn *ct;
  1349. if (current->nsproxy->net_ns != &init_net)
  1350. return -EOPNOTSUPP;
  1351. /* On boot, we can set this without any fancy locking. */
  1352. if (!nf_conntrack_htable_size)
  1353. return param_set_uint(val, kp);
  1354. rc = kstrtouint(val, 0, &hashsize);
  1355. if (rc)
  1356. return rc;
  1357. if (!hashsize)
  1358. return -EINVAL;
  1359. hash = nf_ct_alloc_hashtable(&hashsize, 1);
  1360. if (!hash)
  1361. return -ENOMEM;
  1362. local_bh_disable();
  1363. nf_conntrack_all_lock();
  1364. write_seqcount_begin(&init_net.ct.generation);
  1365. /* Lookups in the old hash might happen in parallel, which means we
  1366. * might get false negatives during connection lookup. New connections
  1367. * created because of a false negative won't make it into the hash
  1368. * though since that required taking the locks.
  1369. */
  1370. for (i = 0; i < init_net.ct.htable_size; i++) {
  1371. while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
  1372. h = hlist_nulls_entry(init_net.ct.hash[i].first,
  1373. struct nf_conntrack_tuple_hash, hnnode);
  1374. ct = nf_ct_tuplehash_to_ctrack(h);
  1375. hlist_nulls_del_rcu(&h->hnnode);
  1376. bucket = __hash_conntrack(&h->tuple, hashsize);
  1377. hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
  1378. }
  1379. }
  1380. old_size = init_net.ct.htable_size;
  1381. old_hash = init_net.ct.hash;
  1382. init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
  1383. init_net.ct.hash = hash;
  1384. write_seqcount_end(&init_net.ct.generation);
  1385. nf_conntrack_all_unlock();
  1386. local_bh_enable();
  1387. nf_ct_free_hashtable(old_hash, old_size);
  1388. return 0;
  1389. }
  1390. EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
  1391. module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
  1392. &nf_conntrack_htable_size, 0600);
  1393. void nf_ct_untracked_status_or(unsigned long bits)
  1394. {
  1395. int cpu;
  1396. for_each_possible_cpu(cpu)
  1397. per_cpu(nf_conntrack_untracked, cpu).status |= bits;
  1398. }
  1399. EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
  1400. int nf_conntrack_init_start(void)
  1401. {
  1402. int max_factor = 8;
  1403. int i, ret, cpu;
  1404. for (i = 0; i < CONNTRACK_LOCKS; i++)
  1405. spin_lock_init(&nf_conntrack_locks[i]);
  1406. if (!nf_conntrack_htable_size) {
  1407. /* Idea from tcp.c: use 1/16384 of memory.
  1408. * On i386: 32MB machine has 512 buckets.
  1409. * >= 1GB machines have 16384 buckets.
  1410. * >= 4GB machines have 65536 buckets.
  1411. */
  1412. nf_conntrack_htable_size
  1413. = (((totalram_pages << PAGE_SHIFT) / 16384)
  1414. / sizeof(struct hlist_head));
  1415. if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
  1416. nf_conntrack_htable_size = 65536;
  1417. else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
  1418. nf_conntrack_htable_size = 16384;
  1419. if (nf_conntrack_htable_size < 32)
  1420. nf_conntrack_htable_size = 32;
  1421. /* Use a max. factor of four by default to get the same max as
  1422. * with the old struct list_heads. When a table size is given
  1423. * we use the old value of 8 to avoid reducing the max.
  1424. * entries. */
  1425. max_factor = 4;
  1426. }
  1427. nf_conntrack_max = max_factor * nf_conntrack_htable_size;
  1428. printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
  1429. NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
  1430. nf_conntrack_max);
  1431. ret = nf_conntrack_expect_init();
  1432. if (ret < 0)
  1433. goto err_expect;
  1434. ret = nf_conntrack_acct_init();
  1435. if (ret < 0)
  1436. goto err_acct;
  1437. ret = nf_conntrack_tstamp_init();
  1438. if (ret < 0)
  1439. goto err_tstamp;
  1440. ret = nf_conntrack_ecache_init();
  1441. if (ret < 0)
  1442. goto err_ecache;
  1443. ret = nf_conntrack_timeout_init();
  1444. if (ret < 0)
  1445. goto err_timeout;
  1446. ret = nf_conntrack_helper_init();
  1447. if (ret < 0)
  1448. goto err_helper;
  1449. ret = nf_conntrack_labels_init();
  1450. if (ret < 0)
  1451. goto err_labels;
  1452. ret = nf_conntrack_seqadj_init();
  1453. if (ret < 0)
  1454. goto err_seqadj;
  1455. #ifdef CONFIG_NF_CONNTRACK_ZONES
  1456. ret = nf_ct_extend_register(&nf_ct_zone_extend);
  1457. if (ret < 0)
  1458. goto err_extend;
  1459. #endif
  1460. ret = nf_conntrack_proto_init();
  1461. if (ret < 0)
  1462. goto err_proto;
  1463. /* Set up fake conntrack: to never be deleted, not in any hashes */
  1464. for_each_possible_cpu(cpu) {
  1465. struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
  1466. write_pnet(&ct->ct_net, &init_net);
  1467. atomic_set(&ct->ct_general.use, 1);
  1468. }
  1469. /* - and look it like as a confirmed connection */
  1470. nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
  1471. return 0;
  1472. err_proto:
  1473. #ifdef CONFIG_NF_CONNTRACK_ZONES
  1474. nf_ct_extend_unregister(&nf_ct_zone_extend);
  1475. err_extend:
  1476. #endif
  1477. nf_conntrack_seqadj_fini();
  1478. err_seqadj:
  1479. nf_conntrack_labels_fini();
  1480. err_labels:
  1481. nf_conntrack_helper_fini();
  1482. err_helper:
  1483. nf_conntrack_timeout_fini();
  1484. err_timeout:
  1485. nf_conntrack_ecache_fini();
  1486. err_ecache:
  1487. nf_conntrack_tstamp_fini();
  1488. err_tstamp:
  1489. nf_conntrack_acct_fini();
  1490. err_acct:
  1491. nf_conntrack_expect_fini();
  1492. err_expect:
  1493. return ret;
  1494. }
  1495. void nf_conntrack_init_end(void)
  1496. {
  1497. /* For use by REJECT target */
  1498. RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
  1499. RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
  1500. }
  1501. /*
  1502. * We need to use special "null" values, not used in hash table
  1503. */
  1504. #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
  1505. #define DYING_NULLS_VAL ((1<<30)+1)
  1506. #define TEMPLATE_NULLS_VAL ((1<<30)+2)
  1507. int nf_conntrack_init_net(struct net *net)
  1508. {
  1509. static atomic64_t unique_id;
  1510. int ret = -ENOMEM;
  1511. int cpu;
  1512. atomic_set(&net->ct.count, 0);
  1513. seqcount_init(&net->ct.generation);
  1514. net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
  1515. if (!net->ct.pcpu_lists)
  1516. goto err_stat;
  1517. for_each_possible_cpu(cpu) {
  1518. struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
  1519. spin_lock_init(&pcpu->lock);
  1520. INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
  1521. INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
  1522. }
  1523. net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
  1524. if (!net->ct.stat)
  1525. goto err_pcpu_lists;
  1526. net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
  1527. (u64)atomic64_inc_return(&unique_id));
  1528. if (!net->ct.slabname)
  1529. goto err_slabname;
  1530. net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
  1531. sizeof(struct nf_conn), 0,
  1532. SLAB_DESTROY_BY_RCU, NULL);
  1533. if (!net->ct.nf_conntrack_cachep) {
  1534. printk(KERN_ERR "Unable to create nf_conn slab cache\n");
  1535. goto err_cache;
  1536. }
  1537. net->ct.htable_size = nf_conntrack_htable_size;
  1538. net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
  1539. if (!net->ct.hash) {
  1540. printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
  1541. goto err_hash;
  1542. }
  1543. ret = nf_conntrack_expect_pernet_init(net);
  1544. if (ret < 0)
  1545. goto err_expect;
  1546. ret = nf_conntrack_acct_pernet_init(net);
  1547. if (ret < 0)
  1548. goto err_acct;
  1549. ret = nf_conntrack_tstamp_pernet_init(net);
  1550. if (ret < 0)
  1551. goto err_tstamp;
  1552. ret = nf_conntrack_ecache_pernet_init(net);
  1553. if (ret < 0)
  1554. goto err_ecache;
  1555. ret = nf_conntrack_helper_pernet_init(net);
  1556. if (ret < 0)
  1557. goto err_helper;
  1558. ret = nf_conntrack_proto_pernet_init(net);
  1559. if (ret < 0)
  1560. goto err_proto;
  1561. return 0;
  1562. err_proto:
  1563. nf_conntrack_helper_pernet_fini(net);
  1564. err_helper:
  1565. nf_conntrack_ecache_pernet_fini(net);
  1566. err_ecache:
  1567. nf_conntrack_tstamp_pernet_fini(net);
  1568. err_tstamp:
  1569. nf_conntrack_acct_pernet_fini(net);
  1570. err_acct:
  1571. nf_conntrack_expect_pernet_fini(net);
  1572. err_expect:
  1573. nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
  1574. err_hash:
  1575. kmem_cache_destroy(net->ct.nf_conntrack_cachep);
  1576. err_cache:
  1577. kfree(net->ct.slabname);
  1578. err_slabname:
  1579. free_percpu(net->ct.stat);
  1580. err_pcpu_lists:
  1581. free_percpu(net->ct.pcpu_lists);
  1582. err_stat:
  1583. return ret;
  1584. }