nfnetlink_acct.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
  3. * (C) 2011 Intra2net AG <http://www.intra2net.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License version 2 as
  7. * published by the Free Software Foundation (or any later at your option).
  8. */
  9. #include <linux/init.h>
  10. #include <linux/module.h>
  11. #include <linux/kernel.h>
  12. #include <linux/skbuff.h>
  13. #include <linux/atomic.h>
  14. #include <linux/netlink.h>
  15. #include <linux/rculist.h>
  16. #include <linux/slab.h>
  17. #include <linux/types.h>
  18. #include <linux/errno.h>
  19. #include <net/netlink.h>
  20. #include <net/sock.h>
  21. #include <linux/netfilter.h>
  22. #include <linux/netfilter/nfnetlink.h>
  23. #include <linux/netfilter/nfnetlink_acct.h>
  24. MODULE_LICENSE("GPL");
  25. MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
  26. MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
  27. struct nf_acct {
  28. atomic64_t pkts;
  29. atomic64_t bytes;
  30. unsigned long flags;
  31. struct list_head head;
  32. atomic_t refcnt;
  33. char name[NFACCT_NAME_MAX];
  34. struct rcu_head rcu_head;
  35. char data[0];
  36. };
  37. struct nfacct_filter {
  38. u32 value;
  39. u32 mask;
  40. };
  41. #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
  42. #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
  43. static int
  44. nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
  45. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  46. {
  47. struct nf_acct *nfacct, *matching = NULL;
  48. struct net *net = sock_net(nfnl);
  49. char *acct_name;
  50. unsigned int size = 0;
  51. u32 flags = 0;
  52. if (!tb[NFACCT_NAME])
  53. return -EINVAL;
  54. acct_name = nla_data(tb[NFACCT_NAME]);
  55. if (strlen(acct_name) == 0)
  56. return -EINVAL;
  57. list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
  58. if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
  59. continue;
  60. if (nlh->nlmsg_flags & NLM_F_EXCL)
  61. return -EEXIST;
  62. matching = nfacct;
  63. break;
  64. }
  65. if (matching) {
  66. if (nlh->nlmsg_flags & NLM_F_REPLACE) {
  67. /* reset counters if you request a replacement. */
  68. atomic64_set(&matching->pkts, 0);
  69. atomic64_set(&matching->bytes, 0);
  70. smp_mb__before_atomic();
  71. /* reset overquota flag if quota is enabled. */
  72. if ((matching->flags & NFACCT_F_QUOTA))
  73. clear_bit(NFACCT_OVERQUOTA_BIT,
  74. &matching->flags);
  75. return 0;
  76. }
  77. return -EBUSY;
  78. }
  79. if (tb[NFACCT_FLAGS]) {
  80. flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
  81. if (flags & ~NFACCT_F_QUOTA)
  82. return -EOPNOTSUPP;
  83. if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
  84. return -EINVAL;
  85. if (flags & NFACCT_F_OVERQUOTA)
  86. return -EINVAL;
  87. size += sizeof(u64);
  88. }
  89. nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
  90. if (nfacct == NULL)
  91. return -ENOMEM;
  92. if (flags & NFACCT_F_QUOTA) {
  93. u64 *quota = (u64 *)nfacct->data;
  94. *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
  95. nfacct->flags = flags;
  96. }
  97. strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
  98. if (tb[NFACCT_BYTES]) {
  99. atomic64_set(&nfacct->bytes,
  100. be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
  101. }
  102. if (tb[NFACCT_PKTS]) {
  103. atomic64_set(&nfacct->pkts,
  104. be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
  105. }
  106. atomic_set(&nfacct->refcnt, 1);
  107. list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
  108. return 0;
  109. }
  110. static int
  111. nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
  112. int event, struct nf_acct *acct)
  113. {
  114. struct nlmsghdr *nlh;
  115. struct nfgenmsg *nfmsg;
  116. unsigned int flags = portid ? NLM_F_MULTI : 0;
  117. u64 pkts, bytes;
  118. u32 old_flags;
  119. event |= NFNL_SUBSYS_ACCT << 8;
  120. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
  121. if (nlh == NULL)
  122. goto nlmsg_failure;
  123. nfmsg = nlmsg_data(nlh);
  124. nfmsg->nfgen_family = AF_UNSPEC;
  125. nfmsg->version = NFNETLINK_V0;
  126. nfmsg->res_id = 0;
  127. if (nla_put_string(skb, NFACCT_NAME, acct->name))
  128. goto nla_put_failure;
  129. old_flags = acct->flags;
  130. if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
  131. pkts = atomic64_xchg(&acct->pkts, 0);
  132. bytes = atomic64_xchg(&acct->bytes, 0);
  133. smp_mb__before_atomic();
  134. if (acct->flags & NFACCT_F_QUOTA)
  135. clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
  136. } else {
  137. pkts = atomic64_read(&acct->pkts);
  138. bytes = atomic64_read(&acct->bytes);
  139. }
  140. if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) ||
  141. nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
  142. nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
  143. goto nla_put_failure;
  144. if (acct->flags & NFACCT_F_QUOTA) {
  145. u64 *quota = (u64 *)acct->data;
  146. if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
  147. nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
  148. goto nla_put_failure;
  149. }
  150. nlmsg_end(skb, nlh);
  151. return skb->len;
  152. nlmsg_failure:
  153. nla_put_failure:
  154. nlmsg_cancel(skb, nlh);
  155. return -1;
  156. }
  157. static int
  158. nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
  159. {
  160. struct net *net = sock_net(skb->sk);
  161. struct nf_acct *cur, *last;
  162. const struct nfacct_filter *filter = cb->data;
  163. if (cb->args[2])
  164. return 0;
  165. last = (struct nf_acct *)cb->args[1];
  166. if (cb->args[1])
  167. cb->args[1] = 0;
  168. rcu_read_lock();
  169. list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
  170. if (last) {
  171. if (cur != last)
  172. continue;
  173. last = NULL;
  174. }
  175. if (filter && (cur->flags & filter->mask) != filter->value)
  176. continue;
  177. if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
  178. cb->nlh->nlmsg_seq,
  179. NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
  180. NFNL_MSG_ACCT_NEW, cur) < 0) {
  181. cb->args[1] = (unsigned long)cur;
  182. break;
  183. }
  184. }
  185. if (!cb->args[1])
  186. cb->args[2] = 1;
  187. rcu_read_unlock();
  188. return skb->len;
  189. }
  190. static int nfnl_acct_done(struct netlink_callback *cb)
  191. {
  192. kfree(cb->data);
  193. return 0;
  194. }
  195. static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
  196. [NFACCT_FILTER_MASK] = { .type = NLA_U32 },
  197. [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
  198. };
  199. static struct nfacct_filter *
  200. nfacct_filter_alloc(const struct nlattr * const attr)
  201. {
  202. struct nfacct_filter *filter;
  203. struct nlattr *tb[NFACCT_FILTER_MAX + 1];
  204. int err;
  205. err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
  206. if (err < 0)
  207. return ERR_PTR(err);
  208. if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
  209. return ERR_PTR(-EINVAL);
  210. filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
  211. if (!filter)
  212. return ERR_PTR(-ENOMEM);
  213. filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
  214. filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
  215. return filter;
  216. }
  217. static int
  218. nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
  219. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  220. {
  221. struct net *net = sock_net(nfnl);
  222. int ret = -ENOENT;
  223. struct nf_acct *cur;
  224. char *acct_name;
  225. if (nlh->nlmsg_flags & NLM_F_DUMP) {
  226. struct netlink_dump_control c = {
  227. .dump = nfnl_acct_dump,
  228. .done = nfnl_acct_done,
  229. };
  230. if (tb[NFACCT_FILTER]) {
  231. struct nfacct_filter *filter;
  232. filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
  233. if (IS_ERR(filter))
  234. return PTR_ERR(filter);
  235. c.data = filter;
  236. }
  237. return netlink_dump_start(nfnl, skb, nlh, &c);
  238. }
  239. if (!tb[NFACCT_NAME])
  240. return -EINVAL;
  241. acct_name = nla_data(tb[NFACCT_NAME]);
  242. list_for_each_entry(cur, &net->nfnl_acct_list, head) {
  243. struct sk_buff *skb2;
  244. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  245. continue;
  246. skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  247. if (skb2 == NULL) {
  248. ret = -ENOMEM;
  249. break;
  250. }
  251. ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
  252. nlh->nlmsg_seq,
  253. NFNL_MSG_TYPE(nlh->nlmsg_type),
  254. NFNL_MSG_ACCT_NEW, cur);
  255. if (ret <= 0) {
  256. kfree_skb(skb2);
  257. break;
  258. }
  259. ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
  260. MSG_DONTWAIT);
  261. if (ret > 0)
  262. ret = 0;
  263. /* this avoids a loop in nfnetlink. */
  264. return ret == -EAGAIN ? -ENOBUFS : ret;
  265. }
  266. return ret;
  267. }
  268. /* try to delete object, fail if it is still in use. */
  269. static int nfnl_acct_try_del(struct nf_acct *cur)
  270. {
  271. int ret = 0;
  272. /* we want to avoid races with nfnl_acct_find_get. */
  273. if (atomic_dec_and_test(&cur->refcnt)) {
  274. /* We are protected by nfnl mutex. */
  275. list_del_rcu(&cur->head);
  276. kfree_rcu(cur, rcu_head);
  277. } else {
  278. /* still in use, restore reference counter. */
  279. atomic_inc(&cur->refcnt);
  280. ret = -EBUSY;
  281. }
  282. return ret;
  283. }
  284. static int
  285. nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
  286. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  287. {
  288. struct net *net = sock_net(nfnl);
  289. char *acct_name;
  290. struct nf_acct *cur;
  291. int ret = -ENOENT;
  292. if (!tb[NFACCT_NAME]) {
  293. list_for_each_entry(cur, &net->nfnl_acct_list, head)
  294. nfnl_acct_try_del(cur);
  295. return 0;
  296. }
  297. acct_name = nla_data(tb[NFACCT_NAME]);
  298. list_for_each_entry(cur, &net->nfnl_acct_list, head) {
  299. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
  300. continue;
  301. ret = nfnl_acct_try_del(cur);
  302. if (ret < 0)
  303. return ret;
  304. break;
  305. }
  306. return ret;
  307. }
  308. static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
  309. [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
  310. [NFACCT_BYTES] = { .type = NLA_U64 },
  311. [NFACCT_PKTS] = { .type = NLA_U64 },
  312. [NFACCT_FLAGS] = { .type = NLA_U32 },
  313. [NFACCT_QUOTA] = { .type = NLA_U64 },
  314. [NFACCT_FILTER] = {.type = NLA_NESTED },
  315. };
  316. static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
  317. [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
  318. .attr_count = NFACCT_MAX,
  319. .policy = nfnl_acct_policy },
  320. [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
  321. .attr_count = NFACCT_MAX,
  322. .policy = nfnl_acct_policy },
  323. [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
  324. .attr_count = NFACCT_MAX,
  325. .policy = nfnl_acct_policy },
  326. [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
  327. .attr_count = NFACCT_MAX,
  328. .policy = nfnl_acct_policy },
  329. };
  330. static const struct nfnetlink_subsystem nfnl_acct_subsys = {
  331. .name = "acct",
  332. .subsys_id = NFNL_SUBSYS_ACCT,
  333. .cb_count = NFNL_MSG_ACCT_MAX,
  334. .cb = nfnl_acct_cb,
  335. };
  336. MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
  337. struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
  338. {
  339. struct nf_acct *cur, *acct = NULL;
  340. rcu_read_lock();
  341. list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
  342. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  343. continue;
  344. if (!try_module_get(THIS_MODULE))
  345. goto err;
  346. if (!atomic_inc_not_zero(&cur->refcnt)) {
  347. module_put(THIS_MODULE);
  348. goto err;
  349. }
  350. acct = cur;
  351. break;
  352. }
  353. err:
  354. rcu_read_unlock();
  355. return acct;
  356. }
  357. EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
  358. void nfnl_acct_put(struct nf_acct *acct)
  359. {
  360. if (atomic_dec_and_test(&acct->refcnt))
  361. kfree_rcu(acct, rcu_head);
  362. module_put(THIS_MODULE);
  363. }
  364. EXPORT_SYMBOL_GPL(nfnl_acct_put);
  365. void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
  366. {
  367. atomic64_inc(&nfacct->pkts);
  368. atomic64_add(skb->len, &nfacct->bytes);
  369. }
  370. EXPORT_SYMBOL_GPL(nfnl_acct_update);
  371. static void nfnl_overquota_report(struct nf_acct *nfacct)
  372. {
  373. int ret;
  374. struct sk_buff *skb;
  375. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  376. if (skb == NULL)
  377. return;
  378. ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
  379. nfacct);
  380. if (ret <= 0) {
  381. kfree_skb(skb);
  382. return;
  383. }
  384. netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
  385. GFP_ATOMIC);
  386. }
  387. int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
  388. {
  389. u64 now;
  390. u64 *quota;
  391. int ret = NFACCT_UNDERQUOTA;
  392. /* no place here if we don't have a quota */
  393. if (!(nfacct->flags & NFACCT_F_QUOTA))
  394. return NFACCT_NO_QUOTA;
  395. quota = (u64 *)nfacct->data;
  396. now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
  397. atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
  398. ret = now > *quota;
  399. if (now >= *quota &&
  400. !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
  401. nfnl_overquota_report(nfacct);
  402. }
  403. return ret;
  404. }
  405. EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
  406. static int __net_init nfnl_acct_net_init(struct net *net)
  407. {
  408. INIT_LIST_HEAD(&net->nfnl_acct_list);
  409. return 0;
  410. }
  411. static void __net_exit nfnl_acct_net_exit(struct net *net)
  412. {
  413. struct nf_acct *cur, *tmp;
  414. list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
  415. list_del_rcu(&cur->head);
  416. if (atomic_dec_and_test(&cur->refcnt))
  417. kfree_rcu(cur, rcu_head);
  418. }
  419. }
  420. static struct pernet_operations nfnl_acct_ops = {
  421. .init = nfnl_acct_net_init,
  422. .exit = nfnl_acct_net_exit,
  423. };
  424. static int __init nfnl_acct_init(void)
  425. {
  426. int ret;
  427. ret = register_pernet_subsys(&nfnl_acct_ops);
  428. if (ret < 0) {
  429. pr_err("nfnl_acct_init: failed to register pernet ops\n");
  430. goto err_out;
  431. }
  432. pr_info("nfnl_acct: registering with nfnetlink.\n");
  433. ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
  434. if (ret < 0) {
  435. pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
  436. goto cleanup_pernet;
  437. }
  438. return 0;
  439. cleanup_pernet:
  440. unregister_pernet_subsys(&nfnl_acct_ops);
  441. err_out:
  442. return ret;
  443. }
  444. static void __exit nfnl_acct_exit(void)
  445. {
  446. pr_info("nfnl_acct: unregistering from nfnetlink.\n");
  447. nfnetlink_subsys_unregister(&nfnl_acct_subsys);
  448. unregister_pernet_subsys(&nfnl_acct_ops);
  449. }
  450. module_init(nfnl_acct_init);
  451. module_exit(nfnl_acct_exit);