ip_set_hash_gen.h 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363
  1. /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License version 2 as
  5. * published by the Free Software Foundation.
  6. */
  7. #ifndef _IP_SET_HASH_GEN_H
  8. #define _IP_SET_HASH_GEN_H
  9. #include <linux/rcupdate.h>
  10. #include <linux/jhash.h>
  11. #include <linux/types.h>
  12. #include <linux/netfilter/ipset/ip_set_timeout.h>
  13. #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
  14. #define ipset_dereference_protected(p, set) \
  15. __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
  16. #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
  17. /* Hashing which uses arrays to resolve clashing. The hash table is resized
  18. * (doubled) when searching becomes too long.
  19. * Internally jhash is used with the assumption that the size of the
  20. * stored data is a multiple of sizeof(u32).
  21. *
  22. * Readers and resizing
  23. *
  24. * Resizing can be triggered by userspace command only, and those
  25. * are serialized by the nfnl mutex. During resizing the set is
  26. * read-locked, so the only possible concurrent operations are
  27. * the kernel side readers. Those must be protected by proper RCU locking.
  28. */
  29. /* Number of elements to store in an initial array block */
  30. #define AHASH_INIT_SIZE 4
  31. /* Max number of elements to store in an array block */
  32. #define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE)
  33. /* Max muber of elements in the array block when tuned */
  34. #define AHASH_MAX_TUNED 64
  35. /* Max number of elements can be tuned */
  36. #ifdef IP_SET_HASH_WITH_MULTI
  37. #define AHASH_MAX(h) ((h)->ahash_max)
  38. static inline u8
  39. tune_ahash_max(u8 curr, u32 multi)
  40. {
  41. u32 n;
  42. if (multi < curr)
  43. return curr;
  44. n = curr + AHASH_INIT_SIZE;
  45. /* Currently, at listing one hash bucket must fit into a message.
  46. * Therefore we have a hard limit here.
  47. */
  48. return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
  49. }
  50. #define TUNE_AHASH_MAX(h, multi) \
  51. ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
  52. #else
  53. #define AHASH_MAX(h) AHASH_MAX_SIZE
  54. #define TUNE_AHASH_MAX(h, multi)
  55. #endif
  56. /* A hash bucket */
  57. struct hbucket {
  58. struct rcu_head rcu; /* for call_rcu_bh */
  59. /* Which positions are used in the array */
  60. DECLARE_BITMAP(used, AHASH_MAX_TUNED);
  61. u8 size; /* size of the array */
  62. u8 pos; /* position of the first free entry */
  63. unsigned char value[0] /* the array of the values */
  64. __aligned(__alignof__(u64));
  65. };
  66. /* The hash table: the table size stored here in order to make resizing easy */
  67. struct htable {
  68. atomic_t ref; /* References for resizing */
  69. atomic_t uref; /* References for dumping */
  70. u8 htable_bits; /* size of hash table == 2^htable_bits */
  71. struct hbucket __rcu *bucket[0]; /* hashtable buckets */
  72. };
  73. #define hbucket(h, i) ((h)->bucket[i])
  74. #ifndef IPSET_NET_COUNT
  75. #define IPSET_NET_COUNT 1
  76. #endif
  77. /* Book-keeping of the prefixes added to the set */
  78. struct net_prefixes {
  79. u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
  80. u8 cidr[IPSET_NET_COUNT]; /* the cidr value */
  81. };
  82. /* Compute the hash table size */
  83. static size_t
  84. htable_size(u8 hbits)
  85. {
  86. size_t hsize;
  87. /* We must fit both into u32 in jhash and size_t */
  88. if (hbits > 31)
  89. return 0;
  90. hsize = jhash_size(hbits);
  91. if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
  92. < hsize)
  93. return 0;
  94. return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
  95. }
  96. /* Compute htable_bits from the user input parameter hashsize */
  97. static u8
  98. htable_bits(u32 hashsize)
  99. {
  100. /* Assume that hashsize == 2^htable_bits */
  101. u8 bits = fls(hashsize - 1);
  102. if (jhash_size(bits) != hashsize)
  103. /* Round up to the first 2^n value */
  104. bits = fls(hashsize);
  105. return bits;
  106. }
  107. #ifdef IP_SET_HASH_WITH_NETS
  108. #if IPSET_NET_COUNT > 1
  109. #define __CIDR(cidr, i) (cidr[i])
  110. #else
  111. #define __CIDR(cidr, i) (cidr)
  112. #endif
  113. /* cidr + 1 is stored in net_prefixes to support /0 */
  114. #define NCIDR_PUT(cidr) ((cidr) + 1)
  115. #define NCIDR_GET(cidr) ((cidr) - 1)
  116. #ifdef IP_SET_HASH_WITH_NETS_PACKED
  117. /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
  118. #define DCIDR_PUT(cidr) ((cidr) - 1)
  119. #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1)
  120. #else
  121. #define DCIDR_PUT(cidr) (cidr)
  122. #define DCIDR_GET(cidr, i) __CIDR(cidr, i)
  123. #endif
  124. #define INIT_CIDR(cidr, host_mask) \
  125. DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
  126. #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
  127. #ifdef IP_SET_HASH_WITH_NET0
  128. /* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
  129. #define NLEN(family) (SET_HOST_MASK(family) + 1)
  130. #define CIDR_POS(c) ((c) - 1)
  131. #else
  132. /* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
  133. #define NLEN(family) SET_HOST_MASK(family)
  134. #define CIDR_POS(c) ((c) - 2)
  135. #endif
  136. #else
  137. #define NLEN(family) 0
  138. #endif /* IP_SET_HASH_WITH_NETS */
  139. #endif /* _IP_SET_HASH_GEN_H */
  140. /* Family dependent templates */
  141. #undef ahash_data
  142. #undef mtype_data_equal
  143. #undef mtype_do_data_match
  144. #undef mtype_data_set_flags
  145. #undef mtype_data_reset_elem
  146. #undef mtype_data_reset_flags
  147. #undef mtype_data_netmask
  148. #undef mtype_data_list
  149. #undef mtype_data_next
  150. #undef mtype_elem
  151. #undef mtype_ahash_destroy
  152. #undef mtype_ext_cleanup
  153. #undef mtype_add_cidr
  154. #undef mtype_del_cidr
  155. #undef mtype_ahash_memsize
  156. #undef mtype_flush
  157. #undef mtype_destroy
  158. #undef mtype_same_set
  159. #undef mtype_kadt
  160. #undef mtype_uadt
  161. #undef mtype
  162. #undef mtype_add
  163. #undef mtype_del
  164. #undef mtype_test_cidrs
  165. #undef mtype_test
  166. #undef mtype_uref
  167. #undef mtype_expire
  168. #undef mtype_resize
  169. #undef mtype_head
  170. #undef mtype_list
  171. #undef mtype_gc
  172. #undef mtype_gc_init
  173. #undef mtype_variant
  174. #undef mtype_data_match
  175. #undef HKEY
  176. #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
  177. #ifdef IP_SET_HASH_WITH_NETS
  178. #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match)
  179. #else
  180. #define mtype_do_data_match(d) 1
  181. #endif
  182. #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags)
  183. #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem)
  184. #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags)
  185. #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask)
  186. #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
  187. #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
  188. #define mtype_elem IPSET_TOKEN(MTYPE, _elem)
  189. #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
  190. #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
  191. #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
  192. #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr)
  193. #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
  194. #define mtype_flush IPSET_TOKEN(MTYPE, _flush)
  195. #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
  196. #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
  197. #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
  198. #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
  199. #define mtype MTYPE
  200. #define mtype_add IPSET_TOKEN(MTYPE, _add)
  201. #define mtype_del IPSET_TOKEN(MTYPE, _del)
  202. #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
  203. #define mtype_test IPSET_TOKEN(MTYPE, _test)
  204. #define mtype_uref IPSET_TOKEN(MTYPE, _uref)
  205. #define mtype_expire IPSET_TOKEN(MTYPE, _expire)
  206. #define mtype_resize IPSET_TOKEN(MTYPE, _resize)
  207. #define mtype_head IPSET_TOKEN(MTYPE, _head)
  208. #define mtype_list IPSET_TOKEN(MTYPE, _list)
  209. #define mtype_gc IPSET_TOKEN(MTYPE, _gc)
  210. #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
  211. #define mtype_variant IPSET_TOKEN(MTYPE, _variant)
  212. #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
  213. #ifndef MTYPE
  214. #error "MTYPE is not defined!"
  215. #endif
  216. #ifndef HOST_MASK
  217. #error "HOST_MASK is not defined!"
  218. #endif
  219. #ifndef HKEY_DATALEN
  220. #define HKEY_DATALEN sizeof(struct mtype_elem)
  221. #endif
  222. #define HKEY(data, initval, htable_bits) \
  223. (jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
  224. & jhash_mask(htable_bits))
  225. #ifndef htype
  226. #ifndef HTYPE
  227. #error "HTYPE is not defined!"
  228. #endif /* HTYPE */
  229. #define htype HTYPE
  230. /* The generic hash structure */
  231. struct htype {
  232. struct htable __rcu *table; /* the hash table */
  233. u32 maxelem; /* max elements in the hash */
  234. u32 elements; /* current element (vs timeout) */
  235. u32 initval; /* random jhash init value */
  236. #ifdef IP_SET_HASH_WITH_MARKMASK
  237. u32 markmask; /* markmask value for mark mask to store */
  238. #endif
  239. struct timer_list gc; /* garbage collection when timeout enabled */
  240. struct mtype_elem next; /* temporary storage for uadd */
  241. #ifdef IP_SET_HASH_WITH_MULTI
  242. u8 ahash_max; /* max elements in an array block */
  243. #endif
  244. #ifdef IP_SET_HASH_WITH_NETMASK
  245. u8 netmask; /* netmask value for subnets to store */
  246. #endif
  247. #ifdef IP_SET_HASH_WITH_NETS
  248. struct net_prefixes nets[0]; /* book-keeping of prefixes */
  249. #endif
  250. };
  251. #endif /* htype */
  252. #ifdef IP_SET_HASH_WITH_NETS
  253. /* Network cidr size book keeping when the hash stores different
  254. * sized networks. cidr == real cidr + 1 to support /0.
  255. */
  256. static void
  257. mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
  258. {
  259. int i, j;
  260. /* Add in increasing prefix order, so larger cidr first */
  261. for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
  262. if (j != -1) {
  263. continue;
  264. } else if (h->nets[i].cidr[n] < cidr) {
  265. j = i;
  266. } else if (h->nets[i].cidr[n] == cidr) {
  267. h->nets[CIDR_POS(cidr)].nets[n]++;
  268. return;
  269. }
  270. }
  271. if (j != -1) {
  272. for (; i > j; i--)
  273. h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
  274. }
  275. h->nets[i].cidr[n] = cidr;
  276. h->nets[CIDR_POS(cidr)].nets[n] = 1;
  277. }
  278. static void
  279. mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
  280. {
  281. u8 i, j, net_end = nets_length - 1;
  282. for (i = 0; i < nets_length; i++) {
  283. if (h->nets[i].cidr[n] != cidr)
  284. continue;
  285. h->nets[CIDR_POS(cidr)].nets[n]--;
  286. if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
  287. return;
  288. for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
  289. h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
  290. h->nets[j].cidr[n] = 0;
  291. return;
  292. }
  293. }
  294. #endif
  295. /* Calculate the actual memory size of the set data */
  296. static size_t
  297. mtype_ahash_memsize(const struct htype *h, const struct htable *t,
  298. u8 nets_length, size_t dsize)
  299. {
  300. u32 i;
  301. struct hbucket *n;
  302. size_t memsize = sizeof(*h) + sizeof(*t);
  303. #ifdef IP_SET_HASH_WITH_NETS
  304. memsize += sizeof(struct net_prefixes) * nets_length;
  305. #endif
  306. for (i = 0; i < jhash_size(t->htable_bits); i++) {
  307. n = rcu_dereference_bh(hbucket(t, i));
  308. if (!n)
  309. continue;
  310. memsize += sizeof(struct hbucket) + n->size * dsize;
  311. }
  312. return memsize;
  313. }
  314. /* Get the ith element from the array block n */
  315. #define ahash_data(n, i, dsize) \
  316. ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
  317. static void
  318. mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
  319. {
  320. int i;
  321. for (i = 0; i < n->pos; i++)
  322. if (test_bit(i, n->used))
  323. ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
  324. }
  325. /* Flush a hash type of set: destroy all elements */
  326. static void
  327. mtype_flush(struct ip_set *set)
  328. {
  329. struct htype *h = set->data;
  330. struct htable *t;
  331. struct hbucket *n;
  332. u32 i;
  333. t = ipset_dereference_protected(h->table, set);
  334. for (i = 0; i < jhash_size(t->htable_bits); i++) {
  335. n = __ipset_dereference_protected(hbucket(t, i), 1);
  336. if (!n)
  337. continue;
  338. if (set->extensions & IPSET_EXT_DESTROY)
  339. mtype_ext_cleanup(set, n);
  340. /* FIXME: use slab cache */
  341. rcu_assign_pointer(hbucket(t, i), NULL);
  342. kfree_rcu(n, rcu);
  343. }
  344. #ifdef IP_SET_HASH_WITH_NETS
  345. memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
  346. #endif
  347. h->elements = 0;
  348. }
  349. /* Destroy the hashtable part of the set */
  350. static void
  351. mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
  352. {
  353. struct hbucket *n;
  354. u32 i;
  355. for (i = 0; i < jhash_size(t->htable_bits); i++) {
  356. n = __ipset_dereference_protected(hbucket(t, i), 1);
  357. if (!n)
  358. continue;
  359. if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
  360. mtype_ext_cleanup(set, n);
  361. /* FIXME: use slab cache */
  362. kfree(n);
  363. }
  364. ip_set_free(t);
  365. }
  366. /* Destroy a hash type of set */
  367. static void
  368. mtype_destroy(struct ip_set *set)
  369. {
  370. struct htype *h = set->data;
  371. if (SET_WITH_TIMEOUT(set))
  372. del_timer_sync(&h->gc);
  373. mtype_ahash_destroy(set,
  374. __ipset_dereference_protected(h->table, 1), true);
  375. kfree(h);
  376. set->data = NULL;
  377. }
  378. static void
  379. mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
  380. {
  381. struct htype *h = set->data;
  382. init_timer(&h->gc);
  383. h->gc.data = (unsigned long)set;
  384. h->gc.function = gc;
  385. h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
  386. add_timer(&h->gc);
  387. pr_debug("gc initialized, run in every %u\n",
  388. IPSET_GC_PERIOD(set->timeout));
  389. }
  390. static bool
  391. mtype_same_set(const struct ip_set *a, const struct ip_set *b)
  392. {
  393. const struct htype *x = a->data;
  394. const struct htype *y = b->data;
  395. /* Resizing changes htable_bits, so we ignore it */
  396. return x->maxelem == y->maxelem &&
  397. a->timeout == b->timeout &&
  398. #ifdef IP_SET_HASH_WITH_NETMASK
  399. x->netmask == y->netmask &&
  400. #endif
  401. #ifdef IP_SET_HASH_WITH_MARKMASK
  402. x->markmask == y->markmask &&
  403. #endif
  404. a->extensions == b->extensions;
  405. }
  406. /* Delete expired elements from the hashtable */
  407. static void
  408. mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
  409. {
  410. struct htable *t;
  411. struct hbucket *n, *tmp;
  412. struct mtype_elem *data;
  413. u32 i, j, d;
  414. #ifdef IP_SET_HASH_WITH_NETS
  415. u8 k;
  416. #endif
  417. t = ipset_dereference_protected(h->table, set);
  418. for (i = 0; i < jhash_size(t->htable_bits); i++) {
  419. n = __ipset_dereference_protected(hbucket(t, i), 1);
  420. if (!n)
  421. continue;
  422. for (j = 0, d = 0; j < n->pos; j++) {
  423. if (!test_bit(j, n->used)) {
  424. d++;
  425. continue;
  426. }
  427. data = ahash_data(n, j, dsize);
  428. if (ip_set_timeout_expired(ext_timeout(data, set))) {
  429. pr_debug("expired %u/%u\n", i, j);
  430. clear_bit(j, n->used);
  431. smp_mb__after_atomic();
  432. #ifdef IP_SET_HASH_WITH_NETS
  433. for (k = 0; k < IPSET_NET_COUNT; k++)
  434. mtype_del_cidr(h,
  435. NCIDR_PUT(DCIDR_GET(data->cidr,
  436. k)),
  437. nets_length, k);
  438. #endif
  439. ip_set_ext_destroy(set, data);
  440. h->elements--;
  441. d++;
  442. }
  443. }
  444. if (d >= AHASH_INIT_SIZE) {
  445. if (d >= n->size) {
  446. rcu_assign_pointer(hbucket(t, i), NULL);
  447. kfree_rcu(n, rcu);
  448. continue;
  449. }
  450. tmp = kzalloc(sizeof(*tmp) +
  451. (n->size - AHASH_INIT_SIZE) * dsize,
  452. GFP_ATOMIC);
  453. if (!tmp)
  454. /* Still try to delete expired elements */
  455. continue;
  456. tmp->size = n->size - AHASH_INIT_SIZE;
  457. for (j = 0, d = 0; j < n->pos; j++) {
  458. if (!test_bit(j, n->used))
  459. continue;
  460. data = ahash_data(n, j, dsize);
  461. memcpy(tmp->value + d * dsize, data, dsize);
  462. set_bit(d, tmp->used);
  463. d++;
  464. }
  465. tmp->pos = d;
  466. rcu_assign_pointer(hbucket(t, i), tmp);
  467. kfree_rcu(n, rcu);
  468. }
  469. }
  470. }
  471. static void
  472. mtype_gc(unsigned long ul_set)
  473. {
  474. struct ip_set *set = (struct ip_set *)ul_set;
  475. struct htype *h = set->data;
  476. pr_debug("called\n");
  477. spin_lock_bh(&set->lock);
  478. mtype_expire(set, h, NLEN(set->family), set->dsize);
  479. spin_unlock_bh(&set->lock);
  480. h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
  481. add_timer(&h->gc);
  482. }
  483. /* Resize a hash: create a new hash table with doubling the hashsize
  484. * and inserting the elements to it. Repeat until we succeed or
  485. * fail due to memory pressures.
  486. */
  487. static int
  488. mtype_resize(struct ip_set *set, bool retried)
  489. {
  490. struct htype *h = set->data;
  491. struct htable *t, *orig;
  492. u8 htable_bits;
  493. size_t dsize = set->dsize;
  494. #ifdef IP_SET_HASH_WITH_NETS
  495. u8 flags;
  496. struct mtype_elem *tmp;
  497. #endif
  498. struct mtype_elem *data;
  499. struct mtype_elem *d;
  500. struct hbucket *n, *m;
  501. u32 i, j, key;
  502. int ret;
  503. #ifdef IP_SET_HASH_WITH_NETS
  504. tmp = kmalloc(dsize, GFP_KERNEL);
  505. if (!tmp)
  506. return -ENOMEM;
  507. #endif
  508. rcu_read_lock_bh();
  509. orig = rcu_dereference_bh_nfnl(h->table);
  510. htable_bits = orig->htable_bits;
  511. rcu_read_unlock_bh();
  512. retry:
  513. ret = 0;
  514. htable_bits++;
  515. if (!htable_bits) {
  516. /* In case we have plenty of memory :-) */
  517. pr_warn("Cannot increase the hashsize of set %s further\n",
  518. set->name);
  519. ret = -IPSET_ERR_HASH_FULL;
  520. goto out;
  521. }
  522. t = ip_set_alloc(htable_size(htable_bits));
  523. if (!t) {
  524. ret = -ENOMEM;
  525. goto out;
  526. }
  527. t->htable_bits = htable_bits;
  528. spin_lock_bh(&set->lock);
  529. orig = __ipset_dereference_protected(h->table, 1);
  530. /* There can't be another parallel resizing, but dumping is possible */
  531. atomic_set(&orig->ref, 1);
  532. atomic_inc(&orig->uref);
  533. pr_debug("attempt to resize set %s from %u to %u, t %p\n",
  534. set->name, orig->htable_bits, htable_bits, orig);
  535. for (i = 0; i < jhash_size(orig->htable_bits); i++) {
  536. n = __ipset_dereference_protected(hbucket(orig, i), 1);
  537. if (!n)
  538. continue;
  539. for (j = 0; j < n->pos; j++) {
  540. if (!test_bit(j, n->used))
  541. continue;
  542. data = ahash_data(n, j, dsize);
  543. #ifdef IP_SET_HASH_WITH_NETS
  544. /* We have readers running parallel with us,
  545. * so the live data cannot be modified.
  546. */
  547. flags = 0;
  548. memcpy(tmp, data, dsize);
  549. data = tmp;
  550. mtype_data_reset_flags(data, &flags);
  551. #endif
  552. key = HKEY(data, h->initval, htable_bits);
  553. m = __ipset_dereference_protected(hbucket(t, key), 1);
  554. if (!m) {
  555. m = kzalloc(sizeof(*m) +
  556. AHASH_INIT_SIZE * dsize,
  557. GFP_ATOMIC);
  558. if (!m) {
  559. ret = -ENOMEM;
  560. goto cleanup;
  561. }
  562. m->size = AHASH_INIT_SIZE;
  563. RCU_INIT_POINTER(hbucket(t, key), m);
  564. } else if (m->pos >= m->size) {
  565. struct hbucket *ht;
  566. if (m->size >= AHASH_MAX(h)) {
  567. ret = -EAGAIN;
  568. } else {
  569. ht = kzalloc(sizeof(*ht) +
  570. (m->size + AHASH_INIT_SIZE)
  571. * dsize,
  572. GFP_ATOMIC);
  573. if (!ht)
  574. ret = -ENOMEM;
  575. }
  576. if (ret < 0)
  577. goto cleanup;
  578. memcpy(ht, m, sizeof(struct hbucket) +
  579. m->size * dsize);
  580. ht->size = m->size + AHASH_INIT_SIZE;
  581. kfree(m);
  582. m = ht;
  583. RCU_INIT_POINTER(hbucket(t, key), ht);
  584. }
  585. d = ahash_data(m, m->pos, dsize);
  586. memcpy(d, data, dsize);
  587. set_bit(m->pos++, m->used);
  588. #ifdef IP_SET_HASH_WITH_NETS
  589. mtype_data_reset_flags(d, &flags);
  590. #endif
  591. }
  592. }
  593. rcu_assign_pointer(h->table, t);
  594. spin_unlock_bh(&set->lock);
  595. /* Give time to other readers of the set */
  596. synchronize_rcu_bh();
  597. pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
  598. orig->htable_bits, orig, t->htable_bits, t);
  599. /* If there's nobody else dumping the table, destroy it */
  600. if (atomic_dec_and_test(&orig->uref)) {
  601. pr_debug("Table destroy by resize %p\n", orig);
  602. mtype_ahash_destroy(set, orig, false);
  603. }
  604. out:
  605. #ifdef IP_SET_HASH_WITH_NETS
  606. kfree(tmp);
  607. #endif
  608. return ret;
  609. cleanup:
  610. atomic_set(&orig->ref, 0);
  611. atomic_dec(&orig->uref);
  612. spin_unlock_bh(&set->lock);
  613. mtype_ahash_destroy(set, t, false);
  614. if (ret == -EAGAIN)
  615. goto retry;
  616. goto out;
  617. }
  618. /* Add an element to a hash and update the internal counters when succeeded,
  619. * otherwise report the proper error code.
  620. */
  621. static int
  622. mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
  623. struct ip_set_ext *mext, u32 flags)
  624. {
  625. struct htype *h = set->data;
  626. struct htable *t;
  627. const struct mtype_elem *d = value;
  628. struct mtype_elem *data;
  629. struct hbucket *n, *old = ERR_PTR(-ENOENT);
  630. int i, j = -1;
  631. bool flag_exist = flags & IPSET_FLAG_EXIST;
  632. bool deleted = false, forceadd = false, reuse = false;
  633. u32 key, multi = 0;
  634. if (h->elements >= h->maxelem) {
  635. if (SET_WITH_TIMEOUT(set))
  636. /* FIXME: when set is full, we slow down here */
  637. mtype_expire(set, h, NLEN(set->family), set->dsize);
  638. if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
  639. forceadd = true;
  640. }
  641. t = ipset_dereference_protected(h->table, set);
  642. key = HKEY(value, h->initval, t->htable_bits);
  643. n = __ipset_dereference_protected(hbucket(t, key), 1);
  644. if (!n) {
  645. if (forceadd) {
  646. if (net_ratelimit())
  647. pr_warn("Set %s is full, maxelem %u reached\n",
  648. set->name, h->maxelem);
  649. return -IPSET_ERR_HASH_FULL;
  650. } else if (h->elements >= h->maxelem) {
  651. goto set_full;
  652. }
  653. old = NULL;
  654. n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
  655. GFP_ATOMIC);
  656. if (!n)
  657. return -ENOMEM;
  658. n->size = AHASH_INIT_SIZE;
  659. goto copy_elem;
  660. }
  661. for (i = 0; i < n->pos; i++) {
  662. if (!test_bit(i, n->used)) {
  663. /* Reuse first deleted entry */
  664. if (j == -1) {
  665. deleted = reuse = true;
  666. j = i;
  667. }
  668. continue;
  669. }
  670. data = ahash_data(n, i, set->dsize);
  671. if (mtype_data_equal(data, d, &multi)) {
  672. if (flag_exist ||
  673. (SET_WITH_TIMEOUT(set) &&
  674. ip_set_timeout_expired(ext_timeout(data, set)))) {
  675. /* Just the extensions could be overwritten */
  676. j = i;
  677. goto overwrite_extensions;
  678. }
  679. return -IPSET_ERR_EXIST;
  680. }
  681. /* Reuse first timed out entry */
  682. if (SET_WITH_TIMEOUT(set) &&
  683. ip_set_timeout_expired(ext_timeout(data, set)) &&
  684. j == -1) {
  685. j = i;
  686. reuse = true;
  687. }
  688. }
  689. if (reuse || forceadd) {
  690. data = ahash_data(n, j, set->dsize);
  691. if (!deleted) {
  692. #ifdef IP_SET_HASH_WITH_NETS
  693. for (i = 0; i < IPSET_NET_COUNT; i++)
  694. mtype_del_cidr(h,
  695. NCIDR_PUT(DCIDR_GET(data->cidr, i)),
  696. NLEN(set->family), i);
  697. #endif
  698. ip_set_ext_destroy(set, data);
  699. h->elements--;
  700. }
  701. goto copy_data;
  702. }
  703. if (h->elements >= h->maxelem)
  704. goto set_full;
  705. /* Create a new slot */
  706. if (n->pos >= n->size) {
  707. TUNE_AHASH_MAX(h, multi);
  708. if (n->size >= AHASH_MAX(h)) {
  709. /* Trigger rehashing */
  710. mtype_data_next(&h->next, d);
  711. return -EAGAIN;
  712. }
  713. old = n;
  714. n = kzalloc(sizeof(*n) +
  715. (old->size + AHASH_INIT_SIZE) * set->dsize,
  716. GFP_ATOMIC);
  717. if (!n)
  718. return -ENOMEM;
  719. memcpy(n, old, sizeof(struct hbucket) +
  720. old->size * set->dsize);
  721. n->size = old->size + AHASH_INIT_SIZE;
  722. }
  723. copy_elem:
  724. j = n->pos++;
  725. data = ahash_data(n, j, set->dsize);
  726. copy_data:
  727. h->elements++;
  728. #ifdef IP_SET_HASH_WITH_NETS
  729. for (i = 0; i < IPSET_NET_COUNT; i++)
  730. mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
  731. NLEN(set->family), i);
  732. #endif
  733. memcpy(data, d, sizeof(struct mtype_elem));
  734. overwrite_extensions:
  735. #ifdef IP_SET_HASH_WITH_NETS
  736. mtype_data_set_flags(data, flags);
  737. #endif
  738. if (SET_WITH_COUNTER(set))
  739. ip_set_init_counter(ext_counter(data, set), ext);
  740. if (SET_WITH_COMMENT(set))
  741. ip_set_init_comment(ext_comment(data, set), ext);
  742. if (SET_WITH_SKBINFO(set))
  743. ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
  744. /* Must come last for the case when timed out entry is reused */
  745. if (SET_WITH_TIMEOUT(set))
  746. ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
  747. smp_mb__before_atomic();
  748. set_bit(j, n->used);
  749. if (old != ERR_PTR(-ENOENT)) {
  750. rcu_assign_pointer(hbucket(t, key), n);
  751. if (old)
  752. kfree_rcu(old, rcu);
  753. }
  754. return 0;
  755. set_full:
  756. if (net_ratelimit())
  757. pr_warn("Set %s is full, maxelem %u reached\n",
  758. set->name, h->maxelem);
  759. return -IPSET_ERR_HASH_FULL;
  760. }
  761. /* Delete an element from the hash and free up space if possible.
  762. */
  763. static int
  764. mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
  765. struct ip_set_ext *mext, u32 flags)
  766. {
  767. struct htype *h = set->data;
  768. struct htable *t;
  769. const struct mtype_elem *d = value;
  770. struct mtype_elem *data;
  771. struct hbucket *n;
  772. int i, j, k, ret = -IPSET_ERR_EXIST;
  773. u32 key, multi = 0;
  774. size_t dsize = set->dsize;
  775. t = ipset_dereference_protected(h->table, set);
  776. key = HKEY(value, h->initval, t->htable_bits);
  777. n = __ipset_dereference_protected(hbucket(t, key), 1);
  778. if (!n)
  779. goto out;
  780. for (i = 0, k = 0; i < n->pos; i++) {
  781. if (!test_bit(i, n->used)) {
  782. k++;
  783. continue;
  784. }
  785. data = ahash_data(n, i, dsize);
  786. if (!mtype_data_equal(data, d, &multi))
  787. continue;
  788. if (SET_WITH_TIMEOUT(set) &&
  789. ip_set_timeout_expired(ext_timeout(data, set)))
  790. goto out;
  791. ret = 0;
  792. clear_bit(i, n->used);
  793. smp_mb__after_atomic();
  794. if (i + 1 == n->pos)
  795. n->pos--;
  796. h->elements--;
  797. #ifdef IP_SET_HASH_WITH_NETS
  798. for (j = 0; j < IPSET_NET_COUNT; j++)
  799. mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
  800. NLEN(set->family), j);
  801. #endif
  802. ip_set_ext_destroy(set, data);
  803. for (; i < n->pos; i++) {
  804. if (!test_bit(i, n->used))
  805. k++;
  806. }
  807. if (n->pos == 0 && k == 0) {
  808. rcu_assign_pointer(hbucket(t, key), NULL);
  809. kfree_rcu(n, rcu);
  810. } else if (k >= AHASH_INIT_SIZE) {
  811. struct hbucket *tmp = kzalloc(sizeof(*tmp) +
  812. (n->size - AHASH_INIT_SIZE) * dsize,
  813. GFP_ATOMIC);
  814. if (!tmp)
  815. goto out;
  816. tmp->size = n->size - AHASH_INIT_SIZE;
  817. for (j = 0, k = 0; j < n->pos; j++) {
  818. if (!test_bit(j, n->used))
  819. continue;
  820. data = ahash_data(n, j, dsize);
  821. memcpy(tmp->value + k * dsize, data, dsize);
  822. set_bit(j, tmp->used);
  823. k++;
  824. }
  825. tmp->pos = k;
  826. rcu_assign_pointer(hbucket(t, key), tmp);
  827. kfree_rcu(n, rcu);
  828. }
  829. goto out;
  830. }
  831. out:
  832. return ret;
  833. }
  834. static inline int
  835. mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
  836. struct ip_set_ext *mext, struct ip_set *set, u32 flags)
  837. {
  838. if (SET_WITH_COUNTER(set))
  839. ip_set_update_counter(ext_counter(data, set),
  840. ext, mext, flags);
  841. if (SET_WITH_SKBINFO(set))
  842. ip_set_get_skbinfo(ext_skbinfo(data, set),
  843. ext, mext, flags);
  844. return mtype_do_data_match(data);
  845. }
  846. #ifdef IP_SET_HASH_WITH_NETS
  847. /* Special test function which takes into account the different network
  848. * sizes added to the set
  849. */
  850. static int
  851. mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
  852. const struct ip_set_ext *ext,
  853. struct ip_set_ext *mext, u32 flags)
  854. {
  855. struct htype *h = set->data;
  856. struct htable *t = rcu_dereference_bh(h->table);
  857. struct hbucket *n;
  858. struct mtype_elem *data;
  859. #if IPSET_NET_COUNT == 2
  860. struct mtype_elem orig = *d;
  861. int i, j = 0, k;
  862. #else
  863. int i, j = 0;
  864. #endif
  865. u32 key, multi = 0;
  866. u8 nets_length = NLEN(set->family);
  867. pr_debug("test by nets\n");
  868. for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
  869. #if IPSET_NET_COUNT == 2
  870. mtype_data_reset_elem(d, &orig);
  871. mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
  872. for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
  873. k++) {
  874. mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
  875. true);
  876. #else
  877. mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
  878. #endif
  879. key = HKEY(d, h->initval, t->htable_bits);
  880. n = rcu_dereference_bh(hbucket(t, key));
  881. if (!n)
  882. continue;
  883. for (i = 0; i < n->pos; i++) {
  884. if (!test_bit(i, n->used))
  885. continue;
  886. data = ahash_data(n, i, set->dsize);
  887. if (!mtype_data_equal(data, d, &multi))
  888. continue;
  889. if (SET_WITH_TIMEOUT(set)) {
  890. if (!ip_set_timeout_expired(
  891. ext_timeout(data, set)))
  892. return mtype_data_match(data, ext,
  893. mext, set,
  894. flags);
  895. #ifdef IP_SET_HASH_WITH_MULTI
  896. multi = 0;
  897. #endif
  898. } else
  899. return mtype_data_match(data, ext,
  900. mext, set, flags);
  901. }
  902. #if IPSET_NET_COUNT == 2
  903. }
  904. #endif
  905. }
  906. return 0;
  907. }
  908. #endif
  909. /* Test whether the element is added to the set */
  910. static int
  911. mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
  912. struct ip_set_ext *mext, u32 flags)
  913. {
  914. struct htype *h = set->data;
  915. struct htable *t;
  916. struct mtype_elem *d = value;
  917. struct hbucket *n;
  918. struct mtype_elem *data;
  919. int i, ret = 0;
  920. u32 key, multi = 0;
  921. t = rcu_dereference_bh(h->table);
  922. #ifdef IP_SET_HASH_WITH_NETS
  923. /* If we test an IP address and not a network address,
  924. * try all possible network sizes
  925. */
  926. for (i = 0; i < IPSET_NET_COUNT; i++)
  927. if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
  928. break;
  929. if (i == IPSET_NET_COUNT) {
  930. ret = mtype_test_cidrs(set, d, ext, mext, flags);
  931. goto out;
  932. }
  933. #endif
  934. key = HKEY(d, h->initval, t->htable_bits);
  935. n = rcu_dereference_bh(hbucket(t, key));
  936. if (!n) {
  937. ret = 0;
  938. goto out;
  939. }
  940. for (i = 0; i < n->pos; i++) {
  941. if (!test_bit(i, n->used))
  942. continue;
  943. data = ahash_data(n, i, set->dsize);
  944. if (mtype_data_equal(data, d, &multi) &&
  945. !(SET_WITH_TIMEOUT(set) &&
  946. ip_set_timeout_expired(ext_timeout(data, set)))) {
  947. ret = mtype_data_match(data, ext, mext, set, flags);
  948. goto out;
  949. }
  950. }
  951. out:
  952. return ret;
  953. }
  954. /* Reply a HEADER request: fill out the header part of the set */
  955. static int
  956. mtype_head(struct ip_set *set, struct sk_buff *skb)
  957. {
  958. const struct htype *h = set->data;
  959. const struct htable *t;
  960. struct nlattr *nested;
  961. size_t memsize;
  962. u8 htable_bits;
  963. rcu_read_lock_bh();
  964. t = rcu_dereference_bh_nfnl(h->table);
  965. memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
  966. htable_bits = t->htable_bits;
  967. rcu_read_unlock_bh();
  968. nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
  969. if (!nested)
  970. goto nla_put_failure;
  971. if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
  972. htonl(jhash_size(htable_bits))) ||
  973. nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
  974. goto nla_put_failure;
  975. #ifdef IP_SET_HASH_WITH_NETMASK
  976. if (h->netmask != HOST_MASK &&
  977. nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
  978. goto nla_put_failure;
  979. #endif
  980. #ifdef IP_SET_HASH_WITH_MARKMASK
  981. if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
  982. goto nla_put_failure;
  983. #endif
  984. if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
  985. nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
  986. goto nla_put_failure;
  987. if (unlikely(ip_set_put_flags(skb, set)))
  988. goto nla_put_failure;
  989. ipset_nest_end(skb, nested);
  990. return 0;
  991. nla_put_failure:
  992. return -EMSGSIZE;
  993. }
  994. /* Make possible to run dumping parallel with resizing */
  995. static void
  996. mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
  997. {
  998. struct htype *h = set->data;
  999. struct htable *t;
  1000. if (start) {
  1001. rcu_read_lock_bh();
  1002. t = rcu_dereference_bh_nfnl(h->table);
  1003. atomic_inc(&t->uref);
  1004. cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
  1005. rcu_read_unlock_bh();
  1006. } else if (cb->args[IPSET_CB_PRIVATE]) {
  1007. t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
  1008. if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
  1009. /* Resizing didn't destroy the hash table */
  1010. pr_debug("Table destroy by dump: %p\n", t);
  1011. mtype_ahash_destroy(set, t, false);
  1012. }
  1013. cb->args[IPSET_CB_PRIVATE] = 0;
  1014. }
  1015. }
  1016. /* Reply a LIST/SAVE request: dump the elements of the specified set */
  1017. static int
  1018. mtype_list(const struct ip_set *set,
  1019. struct sk_buff *skb, struct netlink_callback *cb)
  1020. {
  1021. const struct htable *t;
  1022. struct nlattr *atd, *nested;
  1023. const struct hbucket *n;
  1024. const struct mtype_elem *e;
  1025. u32 first = cb->args[IPSET_CB_ARG0];
  1026. /* We assume that one hash bucket fills into one page */
  1027. void *incomplete;
  1028. int i, ret = 0;
  1029. atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
  1030. if (!atd)
  1031. return -EMSGSIZE;
  1032. pr_debug("list hash set %s\n", set->name);
  1033. t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
  1034. /* Expire may replace a hbucket with another one */
  1035. rcu_read_lock();
  1036. for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
  1037. cb->args[IPSET_CB_ARG0]++) {
  1038. incomplete = skb_tail_pointer(skb);
  1039. n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
  1040. pr_debug("cb->arg bucket: %lu, t %p n %p\n",
  1041. cb->args[IPSET_CB_ARG0], t, n);
  1042. if (!n)
  1043. continue;
  1044. for (i = 0; i < n->pos; i++) {
  1045. if (!test_bit(i, n->used))
  1046. continue;
  1047. e = ahash_data(n, i, set->dsize);
  1048. if (SET_WITH_TIMEOUT(set) &&
  1049. ip_set_timeout_expired(ext_timeout(e, set)))
  1050. continue;
  1051. pr_debug("list hash %lu hbucket %p i %u, data %p\n",
  1052. cb->args[IPSET_CB_ARG0], n, i, e);
  1053. nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
  1054. if (!nested) {
  1055. if (cb->args[IPSET_CB_ARG0] == first) {
  1056. nla_nest_cancel(skb, atd);
  1057. ret = -EMSGSIZE;
  1058. goto out;
  1059. }
  1060. goto nla_put_failure;
  1061. }
  1062. if (mtype_data_list(skb, e))
  1063. goto nla_put_failure;
  1064. if (ip_set_put_extensions(skb, set, e, true))
  1065. goto nla_put_failure;
  1066. ipset_nest_end(skb, nested);
  1067. }
  1068. }
  1069. ipset_nest_end(skb, atd);
  1070. /* Set listing finished */
  1071. cb->args[IPSET_CB_ARG0] = 0;
  1072. goto out;
  1073. nla_put_failure:
  1074. nlmsg_trim(skb, incomplete);
  1075. if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
  1076. pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
  1077. set->name);
  1078. cb->args[IPSET_CB_ARG0] = 0;
  1079. ret = -EMSGSIZE;
  1080. } else {
  1081. ipset_nest_end(skb, atd);
  1082. }
  1083. out:
  1084. rcu_read_unlock();
  1085. return ret;
  1086. }
  1087. static int
  1088. IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
  1089. const struct xt_action_param *par,
  1090. enum ipset_adt adt, struct ip_set_adt_opt *opt);
  1091. static int
  1092. IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
  1093. enum ipset_adt adt, u32 *lineno, u32 flags,
  1094. bool retried);
  1095. static const struct ip_set_type_variant mtype_variant = {
  1096. .kadt = mtype_kadt,
  1097. .uadt = mtype_uadt,
  1098. .adt = {
  1099. [IPSET_ADD] = mtype_add,
  1100. [IPSET_DEL] = mtype_del,
  1101. [IPSET_TEST] = mtype_test,
  1102. },
  1103. .destroy = mtype_destroy,
  1104. .flush = mtype_flush,
  1105. .head = mtype_head,
  1106. .list = mtype_list,
  1107. .uref = mtype_uref,
  1108. .resize = mtype_resize,
  1109. .same_set = mtype_same_set,
  1110. };
  1111. #ifdef IP_SET_EMIT_CREATE
  1112. static int
  1113. IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
  1114. struct nlattr *tb[], u32 flags)
  1115. {
  1116. u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
  1117. #ifdef IP_SET_HASH_WITH_MARKMASK
  1118. u32 markmask;
  1119. #endif
  1120. u8 hbits;
  1121. #ifdef IP_SET_HASH_WITH_NETMASK
  1122. u8 netmask;
  1123. #endif
  1124. size_t hsize;
  1125. struct htype *h;
  1126. struct htable *t;
  1127. #ifndef IP_SET_PROTO_UNDEF
  1128. if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
  1129. return -IPSET_ERR_INVALID_FAMILY;
  1130. #endif
  1131. #ifdef IP_SET_HASH_WITH_MARKMASK
  1132. markmask = 0xffffffff;
  1133. #endif
  1134. #ifdef IP_SET_HASH_WITH_NETMASK
  1135. netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
  1136. pr_debug("Create set %s with family %s\n",
  1137. set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
  1138. #endif
  1139. if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
  1140. !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
  1141. !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
  1142. !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
  1143. return -IPSET_ERR_PROTOCOL;
  1144. #ifdef IP_SET_HASH_WITH_MARKMASK
  1145. /* Separated condition in order to avoid directive in argument list */
  1146. if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
  1147. return -IPSET_ERR_PROTOCOL;
  1148. #endif
  1149. if (tb[IPSET_ATTR_HASHSIZE]) {
  1150. hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
  1151. if (hashsize < IPSET_MIMINAL_HASHSIZE)
  1152. hashsize = IPSET_MIMINAL_HASHSIZE;
  1153. }
  1154. if (tb[IPSET_ATTR_MAXELEM])
  1155. maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
  1156. #ifdef IP_SET_HASH_WITH_NETMASK
  1157. if (tb[IPSET_ATTR_NETMASK]) {
  1158. netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
  1159. if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
  1160. (set->family == NFPROTO_IPV6 && netmask > 128) ||
  1161. netmask == 0)
  1162. return -IPSET_ERR_INVALID_NETMASK;
  1163. }
  1164. #endif
  1165. #ifdef IP_SET_HASH_WITH_MARKMASK
  1166. if (tb[IPSET_ATTR_MARKMASK]) {
  1167. markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
  1168. if (markmask == 0)
  1169. return -IPSET_ERR_INVALID_MARKMASK;
  1170. }
  1171. #endif
  1172. hsize = sizeof(*h);
  1173. #ifdef IP_SET_HASH_WITH_NETS
  1174. hsize += sizeof(struct net_prefixes) * NLEN(set->family);
  1175. #endif
  1176. h = kzalloc(hsize, GFP_KERNEL);
  1177. if (!h)
  1178. return -ENOMEM;
  1179. h->maxelem = maxelem;
  1180. #ifdef IP_SET_HASH_WITH_NETMASK
  1181. h->netmask = netmask;
  1182. #endif
  1183. #ifdef IP_SET_HASH_WITH_MARKMASK
  1184. h->markmask = markmask;
  1185. #endif
  1186. get_random_bytes(&h->initval, sizeof(h->initval));
  1187. set->timeout = IPSET_NO_TIMEOUT;
  1188. hbits = htable_bits(hashsize);
  1189. hsize = htable_size(hbits);
  1190. if (hsize == 0) {
  1191. kfree(h);
  1192. return -ENOMEM;
  1193. }
  1194. t = ip_set_alloc(hsize);
  1195. if (!t) {
  1196. kfree(h);
  1197. return -ENOMEM;
  1198. }
  1199. t->htable_bits = hbits;
  1200. rcu_assign_pointer(h->table, t);
  1201. set->data = h;
  1202. #ifndef IP_SET_PROTO_UNDEF
  1203. if (set->family == NFPROTO_IPV4) {
  1204. #endif
  1205. set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
  1206. set->dsize = ip_set_elem_len(set, tb,
  1207. sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
  1208. __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
  1209. #ifndef IP_SET_PROTO_UNDEF
  1210. } else {
  1211. set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
  1212. set->dsize = ip_set_elem_len(set, tb,
  1213. sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
  1214. __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
  1215. }
  1216. #endif
  1217. if (tb[IPSET_ATTR_TIMEOUT]) {
  1218. set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
  1219. #ifndef IP_SET_PROTO_UNDEF
  1220. if (set->family == NFPROTO_IPV4)
  1221. #endif
  1222. IPSET_TOKEN(HTYPE, 4_gc_init)(set,
  1223. IPSET_TOKEN(HTYPE, 4_gc));
  1224. #ifndef IP_SET_PROTO_UNDEF
  1225. else
  1226. IPSET_TOKEN(HTYPE, 6_gc_init)(set,
  1227. IPSET_TOKEN(HTYPE, 6_gc));
  1228. #endif
  1229. }
  1230. pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
  1231. set->name, jhash_size(t->htable_bits),
  1232. t->htable_bits, h->maxelem, set->data, t);
  1233. return 0;
  1234. }
  1235. #endif /* IP_SET_EMIT_CREATE */
  1236. #undef HKEY_DATALEN