ipt_CLUSTERIP.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. /* Cluster IP hashmark target
  2. * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  3. * based on ideas of Fabio Olive Leite <olive@unixforge.org>
  4. *
  5. * Development of this code funded by SuSE Linux AG, http://www.suse.com/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. */
  12. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13. #include <linux/module.h>
  14. #include <linux/proc_fs.h>
  15. #include <linux/jhash.h>
  16. #include <linux/bitops.h>
  17. #include <linux/skbuff.h>
  18. #include <linux/slab.h>
  19. #include <linux/ip.h>
  20. #include <linux/tcp.h>
  21. #include <linux/udp.h>
  22. #include <linux/icmp.h>
  23. #include <linux/if_arp.h>
  24. #include <linux/seq_file.h>
  25. #include <linux/netfilter_arp.h>
  26. #include <linux/netfilter/x_tables.h>
  27. #include <linux/netfilter_ipv4/ip_tables.h>
  28. #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
  29. #include <net/netfilter/nf_conntrack.h>
  30. #include <net/net_namespace.h>
  31. #include <net/netns/generic.h>
  32. #include <net/checksum.h>
  33. #include <net/ip.h>
  34. #define CLUSTERIP_VERSION "0.8"
  35. MODULE_LICENSE("GPL");
  36. MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  37. MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
  38. struct clusterip_config {
  39. struct list_head list; /* list of all configs */
  40. atomic_t refcount; /* reference count */
  41. atomic_t entries; /* number of entries/rules
  42. * referencing us */
  43. __be32 clusterip; /* the IP address */
  44. u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
  45. struct net_device *dev; /* device */
  46. u_int16_t num_total_nodes; /* total number of nodes */
  47. unsigned long local_nodes; /* node number array */
  48. #ifdef CONFIG_PROC_FS
  49. struct proc_dir_entry *pde; /* proc dir entry */
  50. #endif
  51. enum clusterip_hashmode hash_mode; /* which hashing mode */
  52. u_int32_t hash_initval; /* hash initialization */
  53. struct rcu_head rcu;
  54. };
  55. #ifdef CONFIG_PROC_FS
  56. static const struct file_operations clusterip_proc_fops;
  57. #endif
  58. static int clusterip_net_id __read_mostly;
  59. struct clusterip_net {
  60. struct list_head configs;
  61. /* lock protects the configs list */
  62. spinlock_t lock;
  63. #ifdef CONFIG_PROC_FS
  64. struct proc_dir_entry *procdir;
  65. #endif
  66. };
  67. static inline void
  68. clusterip_config_get(struct clusterip_config *c)
  69. {
  70. atomic_inc(&c->refcount);
  71. }
  72. static void clusterip_config_rcu_free(struct rcu_head *head)
  73. {
  74. kfree(container_of(head, struct clusterip_config, rcu));
  75. }
  76. static inline void
  77. clusterip_config_put(struct clusterip_config *c)
  78. {
  79. if (atomic_dec_and_test(&c->refcount))
  80. call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
  81. }
  82. /* decrease the count of entries using/referencing this config. If last
  83. * entry(rule) is removed, remove the config from lists, but don't free it
  84. * yet, since proc-files could still be holding references */
  85. static inline void
  86. clusterip_config_entry_put(struct clusterip_config *c)
  87. {
  88. struct net *net = dev_net(c->dev);
  89. struct clusterip_net *cn = net_generic(net, clusterip_net_id);
  90. local_bh_disable();
  91. if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
  92. list_del_rcu(&c->list);
  93. spin_unlock(&cn->lock);
  94. local_bh_enable();
  95. dev_mc_del(c->dev, c->clustermac);
  96. dev_put(c->dev);
  97. /* In case anyone still accesses the file, the open/close
  98. * functions are also incrementing the refcount on their own,
  99. * so it's safe to remove the entry even if it's in use. */
  100. #ifdef CONFIG_PROC_FS
  101. proc_remove(c->pde);
  102. #endif
  103. return;
  104. }
  105. local_bh_enable();
  106. }
  107. static struct clusterip_config *
  108. __clusterip_config_find(struct net *net, __be32 clusterip)
  109. {
  110. struct clusterip_config *c;
  111. struct clusterip_net *cn = net_generic(net, clusterip_net_id);
  112. list_for_each_entry_rcu(c, &cn->configs, list) {
  113. if (c->clusterip == clusterip)
  114. return c;
  115. }
  116. return NULL;
  117. }
  118. static inline struct clusterip_config *
  119. clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
  120. {
  121. struct clusterip_config *c;
  122. rcu_read_lock_bh();
  123. c = __clusterip_config_find(net, clusterip);
  124. if (c) {
  125. if (unlikely(!atomic_inc_not_zero(&c->refcount)))
  126. c = NULL;
  127. else if (entry)
  128. atomic_inc(&c->entries);
  129. }
  130. rcu_read_unlock_bh();
  131. return c;
  132. }
  133. static void
  134. clusterip_config_init_nodelist(struct clusterip_config *c,
  135. const struct ipt_clusterip_tgt_info *i)
  136. {
  137. int n;
  138. for (n = 0; n < i->num_local_nodes; n++)
  139. set_bit(i->local_nodes[n] - 1, &c->local_nodes);
  140. }
  141. static struct clusterip_config *
  142. clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
  143. struct net_device *dev)
  144. {
  145. struct clusterip_config *c;
  146. struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
  147. c = kzalloc(sizeof(*c), GFP_ATOMIC);
  148. if (!c)
  149. return NULL;
  150. c->dev = dev;
  151. c->clusterip = ip;
  152. memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
  153. c->num_total_nodes = i->num_total_nodes;
  154. clusterip_config_init_nodelist(c, i);
  155. c->hash_mode = i->hash_mode;
  156. c->hash_initval = i->hash_initval;
  157. atomic_set(&c->refcount, 1);
  158. atomic_set(&c->entries, 1);
  159. #ifdef CONFIG_PROC_FS
  160. {
  161. char buffer[16];
  162. /* create proc dir entry */
  163. sprintf(buffer, "%pI4", &ip);
  164. c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
  165. cn->procdir,
  166. &clusterip_proc_fops, c);
  167. if (!c->pde) {
  168. kfree(c);
  169. return NULL;
  170. }
  171. }
  172. #endif
  173. spin_lock_bh(&cn->lock);
  174. list_add_rcu(&c->list, &cn->configs);
  175. spin_unlock_bh(&cn->lock);
  176. return c;
  177. }
  178. #ifdef CONFIG_PROC_FS
  179. static int
  180. clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
  181. {
  182. if (nodenum == 0 ||
  183. nodenum > c->num_total_nodes)
  184. return 1;
  185. /* check if we already have this number in our bitfield */
  186. if (test_and_set_bit(nodenum - 1, &c->local_nodes))
  187. return 1;
  188. return 0;
  189. }
  190. static bool
  191. clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
  192. {
  193. if (nodenum == 0 ||
  194. nodenum > c->num_total_nodes)
  195. return true;
  196. if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
  197. return false;
  198. return true;
  199. }
  200. #endif
  201. static inline u_int32_t
  202. clusterip_hashfn(const struct sk_buff *skb,
  203. const struct clusterip_config *config)
  204. {
  205. const struct iphdr *iph = ip_hdr(skb);
  206. unsigned long hashval;
  207. u_int16_t sport = 0, dport = 0;
  208. int poff;
  209. poff = proto_ports_offset(iph->protocol);
  210. if (poff >= 0) {
  211. const u_int16_t *ports;
  212. u16 _ports[2];
  213. ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
  214. if (ports) {
  215. sport = ports[0];
  216. dport = ports[1];
  217. }
  218. } else {
  219. net_info_ratelimited("unknown protocol %u\n", iph->protocol);
  220. }
  221. switch (config->hash_mode) {
  222. case CLUSTERIP_HASHMODE_SIP:
  223. hashval = jhash_1word(ntohl(iph->saddr),
  224. config->hash_initval);
  225. break;
  226. case CLUSTERIP_HASHMODE_SIP_SPT:
  227. hashval = jhash_2words(ntohl(iph->saddr), sport,
  228. config->hash_initval);
  229. break;
  230. case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
  231. hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
  232. config->hash_initval);
  233. break;
  234. default:
  235. /* to make gcc happy */
  236. hashval = 0;
  237. /* This cannot happen, unless the check function wasn't called
  238. * at rule load time */
  239. pr_info("unknown mode %u\n", config->hash_mode);
  240. BUG();
  241. break;
  242. }
  243. /* node numbers are 1..n, not 0..n */
  244. return reciprocal_scale(hashval, config->num_total_nodes) + 1;
  245. }
  246. static inline int
  247. clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  248. {
  249. return test_bit(hash - 1, &config->local_nodes);
  250. }
  251. /***********************************************************************
  252. * IPTABLES TARGET
  253. ***********************************************************************/
  254. static unsigned int
  255. clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
  256. {
  257. const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
  258. struct nf_conn *ct;
  259. enum ip_conntrack_info ctinfo;
  260. u_int32_t hash;
  261. /* don't need to clusterip_config_get() here, since refcount
  262. * is only decremented by destroy() - and ip_tables guarantees
  263. * that the ->target() function isn't called after ->destroy() */
  264. ct = nf_ct_get(skb, &ctinfo);
  265. if (ct == NULL)
  266. return NF_DROP;
  267. /* special case: ICMP error handling. conntrack distinguishes between
  268. * error messages (RELATED) and information requests (see below) */
  269. if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
  270. (ctinfo == IP_CT_RELATED ||
  271. ctinfo == IP_CT_RELATED_REPLY))
  272. return XT_CONTINUE;
  273. /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
  274. * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
  275. * on, which all have an ID field [relevant for hashing]. */
  276. hash = clusterip_hashfn(skb, cipinfo->config);
  277. switch (ctinfo) {
  278. case IP_CT_NEW:
  279. ct->mark = hash;
  280. break;
  281. case IP_CT_RELATED:
  282. case IP_CT_RELATED_REPLY:
  283. /* FIXME: we don't handle expectations at the moment.
  284. * They can arrive on a different node than
  285. * the master connection (e.g. FTP passive mode) */
  286. case IP_CT_ESTABLISHED:
  287. case IP_CT_ESTABLISHED_REPLY:
  288. break;
  289. default: /* Prevent gcc warnings */
  290. break;
  291. }
  292. #ifdef DEBUG
  293. nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
  294. #endif
  295. pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
  296. if (!clusterip_responsible(cipinfo->config, hash)) {
  297. pr_debug("not responsible\n");
  298. return NF_DROP;
  299. }
  300. pr_debug("responsible\n");
  301. /* despite being received via linklayer multicast, this is
  302. * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
  303. skb->pkt_type = PACKET_HOST;
  304. return XT_CONTINUE;
  305. }
  306. static int clusterip_tg_check(const struct xt_tgchk_param *par)
  307. {
  308. struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
  309. const struct ipt_entry *e = par->entryinfo;
  310. struct clusterip_config *config;
  311. int ret, i;
  312. if (par->nft_compat) {
  313. pr_err("cannot use CLUSTERIP target from nftables compat\n");
  314. return -EOPNOTSUPP;
  315. }
  316. if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
  317. cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
  318. cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
  319. pr_info("unknown mode %u\n", cipinfo->hash_mode);
  320. return -EINVAL;
  321. }
  322. if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
  323. e->ip.dst.s_addr == 0) {
  324. pr_info("Please specify destination IP\n");
  325. return -EINVAL;
  326. }
  327. if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
  328. pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
  329. return -EINVAL;
  330. }
  331. for (i = 0; i < cipinfo->num_local_nodes; i++) {
  332. if (cipinfo->local_nodes[i] - 1 >=
  333. sizeof(config->local_nodes) * 8) {
  334. pr_info("bad local_nodes[%d] %u\n",
  335. i, cipinfo->local_nodes[i]);
  336. return -EINVAL;
  337. }
  338. }
  339. config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
  340. if (!config) {
  341. if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
  342. pr_info("no config found for %pI4, need 'new'\n",
  343. &e->ip.dst.s_addr);
  344. return -EINVAL;
  345. } else {
  346. struct net_device *dev;
  347. if (e->ip.iniface[0] == '\0') {
  348. pr_info("Please specify an interface name\n");
  349. return -EINVAL;
  350. }
  351. dev = dev_get_by_name(par->net, e->ip.iniface);
  352. if (!dev) {
  353. pr_info("no such interface %s\n",
  354. e->ip.iniface);
  355. return -ENOENT;
  356. }
  357. config = clusterip_config_init(cipinfo,
  358. e->ip.dst.s_addr, dev);
  359. if (!config) {
  360. dev_put(dev);
  361. return -ENOMEM;
  362. }
  363. dev_mc_add(config->dev, config->clustermac);
  364. }
  365. }
  366. cipinfo->config = config;
  367. ret = nf_ct_l3proto_try_module_get(par->family);
  368. if (ret < 0)
  369. pr_info("cannot load conntrack support for proto=%u\n",
  370. par->family);
  371. if (!par->net->xt.clusterip_deprecated_warning) {
  372. pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
  373. "use xt_cluster instead\n");
  374. par->net->xt.clusterip_deprecated_warning = true;
  375. }
  376. return ret;
  377. }
  378. /* drop reference count of cluster config when rule is deleted */
  379. static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
  380. {
  381. const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
  382. /* if no more entries are referencing the config, remove it
  383. * from the list and destroy the proc entry */
  384. clusterip_config_entry_put(cipinfo->config);
  385. clusterip_config_put(cipinfo->config);
  386. nf_ct_l3proto_module_put(par->family);
  387. }
  388. #ifdef CONFIG_COMPAT
  389. struct compat_ipt_clusterip_tgt_info
  390. {
  391. u_int32_t flags;
  392. u_int8_t clustermac[6];
  393. u_int16_t num_total_nodes;
  394. u_int16_t num_local_nodes;
  395. u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
  396. u_int32_t hash_mode;
  397. u_int32_t hash_initval;
  398. compat_uptr_t config;
  399. };
  400. #endif /* CONFIG_COMPAT */
  401. static struct xt_target clusterip_tg_reg __read_mostly = {
  402. .name = "CLUSTERIP",
  403. .family = NFPROTO_IPV4,
  404. .target = clusterip_tg,
  405. .checkentry = clusterip_tg_check,
  406. .destroy = clusterip_tg_destroy,
  407. .targetsize = sizeof(struct ipt_clusterip_tgt_info),
  408. #ifdef CONFIG_COMPAT
  409. .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
  410. #endif /* CONFIG_COMPAT */
  411. .me = THIS_MODULE
  412. };
  413. /***********************************************************************
  414. * ARP MANGLING CODE
  415. ***********************************************************************/
  416. /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
  417. struct arp_payload {
  418. u_int8_t src_hw[ETH_ALEN];
  419. __be32 src_ip;
  420. u_int8_t dst_hw[ETH_ALEN];
  421. __be32 dst_ip;
  422. } __packed;
  423. #ifdef DEBUG
  424. static void arp_print(struct arp_payload *payload)
  425. {
  426. #define HBUFFERLEN 30
  427. char hbuffer[HBUFFERLEN];
  428. int j, k;
  429. for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
  430. hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
  431. hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
  432. hbuffer[k++] = ':';
  433. }
  434. hbuffer[--k] = '\0';
  435. pr_debug("src %pI4@%s, dst %pI4\n",
  436. &payload->src_ip, hbuffer, &payload->dst_ip);
  437. }
  438. #endif
  439. static unsigned int
  440. arp_mangle(void *priv,
  441. struct sk_buff *skb,
  442. const struct nf_hook_state *state)
  443. {
  444. struct arphdr *arp = arp_hdr(skb);
  445. struct arp_payload *payload;
  446. struct clusterip_config *c;
  447. struct net *net = state->net;
  448. /* we don't care about non-ethernet and non-ipv4 ARP */
  449. if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
  450. arp->ar_pro != htons(ETH_P_IP) ||
  451. arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
  452. return NF_ACCEPT;
  453. /* we only want to mangle arp requests and replies */
  454. if (arp->ar_op != htons(ARPOP_REPLY) &&
  455. arp->ar_op != htons(ARPOP_REQUEST))
  456. return NF_ACCEPT;
  457. payload = (void *)(arp+1);
  458. /* if there is no clusterip configuration for the arp reply's
  459. * source ip, we don't want to mangle it */
  460. c = clusterip_config_find_get(net, payload->src_ip, 0);
  461. if (!c)
  462. return NF_ACCEPT;
  463. /* normally the linux kernel always replies to arp queries of
  464. * addresses on different interfacs. However, in the CLUSTERIP case
  465. * this wouldn't work, since we didn't subscribe the mcast group on
  466. * other interfaces */
  467. if (c->dev != state->out) {
  468. pr_debug("not mangling arp reply on different "
  469. "interface: cip'%s'-skb'%s'\n",
  470. c->dev->name, state->out->name);
  471. clusterip_config_put(c);
  472. return NF_ACCEPT;
  473. }
  474. /* mangle reply hardware address */
  475. memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
  476. #ifdef DEBUG
  477. pr_debug("mangled arp reply: ");
  478. arp_print(payload);
  479. #endif
  480. clusterip_config_put(c);
  481. return NF_ACCEPT;
  482. }
  483. static struct nf_hook_ops cip_arp_ops __read_mostly = {
  484. .hook = arp_mangle,
  485. .pf = NFPROTO_ARP,
  486. .hooknum = NF_ARP_OUT,
  487. .priority = -1
  488. };
  489. /***********************************************************************
  490. * PROC DIR HANDLING
  491. ***********************************************************************/
  492. #ifdef CONFIG_PROC_FS
  493. struct clusterip_seq_position {
  494. unsigned int pos; /* position */
  495. unsigned int weight; /* number of bits set == size */
  496. unsigned int bit; /* current bit */
  497. unsigned long val; /* current value */
  498. };
  499. static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
  500. {
  501. struct clusterip_config *c = s->private;
  502. unsigned int weight;
  503. u_int32_t local_nodes;
  504. struct clusterip_seq_position *idx;
  505. /* FIXME: possible race */
  506. local_nodes = c->local_nodes;
  507. weight = hweight32(local_nodes);
  508. if (*pos >= weight)
  509. return NULL;
  510. idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
  511. if (!idx)
  512. return ERR_PTR(-ENOMEM);
  513. idx->pos = *pos;
  514. idx->weight = weight;
  515. idx->bit = ffs(local_nodes);
  516. idx->val = local_nodes;
  517. clear_bit(idx->bit - 1, &idx->val);
  518. return idx;
  519. }
  520. static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
  521. {
  522. struct clusterip_seq_position *idx = v;
  523. *pos = ++idx->pos;
  524. if (*pos >= idx->weight) {
  525. kfree(v);
  526. return NULL;
  527. }
  528. idx->bit = ffs(idx->val);
  529. clear_bit(idx->bit - 1, &idx->val);
  530. return idx;
  531. }
  532. static void clusterip_seq_stop(struct seq_file *s, void *v)
  533. {
  534. if (!IS_ERR(v))
  535. kfree(v);
  536. }
  537. static int clusterip_seq_show(struct seq_file *s, void *v)
  538. {
  539. struct clusterip_seq_position *idx = v;
  540. if (idx->pos != 0)
  541. seq_putc(s, ',');
  542. seq_printf(s, "%u", idx->bit);
  543. if (idx->pos == idx->weight - 1)
  544. seq_putc(s, '\n');
  545. return 0;
  546. }
  547. static const struct seq_operations clusterip_seq_ops = {
  548. .start = clusterip_seq_start,
  549. .next = clusterip_seq_next,
  550. .stop = clusterip_seq_stop,
  551. .show = clusterip_seq_show,
  552. };
  553. static int clusterip_proc_open(struct inode *inode, struct file *file)
  554. {
  555. int ret = seq_open(file, &clusterip_seq_ops);
  556. if (!ret) {
  557. struct seq_file *sf = file->private_data;
  558. struct clusterip_config *c = PDE_DATA(inode);
  559. sf->private = c;
  560. clusterip_config_get(c);
  561. }
  562. return ret;
  563. }
  564. static int clusterip_proc_release(struct inode *inode, struct file *file)
  565. {
  566. struct clusterip_config *c = PDE_DATA(inode);
  567. int ret;
  568. ret = seq_release(inode, file);
  569. if (!ret)
  570. clusterip_config_put(c);
  571. return ret;
  572. }
  573. static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
  574. size_t size, loff_t *ofs)
  575. {
  576. struct clusterip_config *c = PDE_DATA(file_inode(file));
  577. #define PROC_WRITELEN 10
  578. char buffer[PROC_WRITELEN+1];
  579. unsigned long nodenum;
  580. int rc;
  581. if (size > PROC_WRITELEN)
  582. return -EIO;
  583. if (copy_from_user(buffer, input, size))
  584. return -EFAULT;
  585. buffer[size] = 0;
  586. if (*buffer == '+') {
  587. rc = kstrtoul(buffer+1, 10, &nodenum);
  588. if (rc)
  589. return rc;
  590. if (clusterip_add_node(c, nodenum))
  591. return -ENOMEM;
  592. } else if (*buffer == '-') {
  593. rc = kstrtoul(buffer+1, 10, &nodenum);
  594. if (rc)
  595. return rc;
  596. if (clusterip_del_node(c, nodenum))
  597. return -ENOENT;
  598. } else
  599. return -EIO;
  600. return size;
  601. }
  602. static const struct file_operations clusterip_proc_fops = {
  603. .owner = THIS_MODULE,
  604. .open = clusterip_proc_open,
  605. .read = seq_read,
  606. .write = clusterip_proc_write,
  607. .llseek = seq_lseek,
  608. .release = clusterip_proc_release,
  609. };
  610. #endif /* CONFIG_PROC_FS */
  611. static int clusterip_net_init(struct net *net)
  612. {
  613. struct clusterip_net *cn = net_generic(net, clusterip_net_id);
  614. INIT_LIST_HEAD(&cn->configs);
  615. spin_lock_init(&cn->lock);
  616. #ifdef CONFIG_PROC_FS
  617. cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
  618. if (!cn->procdir) {
  619. pr_err("Unable to proc dir entry\n");
  620. return -ENOMEM;
  621. }
  622. #endif /* CONFIG_PROC_FS */
  623. return 0;
  624. }
  625. static void clusterip_net_exit(struct net *net)
  626. {
  627. #ifdef CONFIG_PROC_FS
  628. struct clusterip_net *cn = net_generic(net, clusterip_net_id);
  629. proc_remove(cn->procdir);
  630. #endif
  631. }
  632. static struct pernet_operations clusterip_net_ops = {
  633. .init = clusterip_net_init,
  634. .exit = clusterip_net_exit,
  635. .id = &clusterip_net_id,
  636. .size = sizeof(struct clusterip_net),
  637. };
  638. static int __init clusterip_tg_init(void)
  639. {
  640. int ret;
  641. ret = register_pernet_subsys(&clusterip_net_ops);
  642. if (ret < 0)
  643. return ret;
  644. ret = xt_register_target(&clusterip_tg_reg);
  645. if (ret < 0)
  646. goto cleanup_subsys;
  647. ret = nf_register_hook(&cip_arp_ops);
  648. if (ret < 0)
  649. goto cleanup_target;
  650. pr_info("ClusterIP Version %s loaded successfully\n",
  651. CLUSTERIP_VERSION);
  652. return 0;
  653. cleanup_target:
  654. xt_unregister_target(&clusterip_tg_reg);
  655. cleanup_subsys:
  656. unregister_pernet_subsys(&clusterip_net_ops);
  657. return ret;
  658. }
  659. static void __exit clusterip_tg_exit(void)
  660. {
  661. pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
  662. nf_unregister_hook(&cip_arp_ops);
  663. xt_unregister_target(&clusterip_tg_reg);
  664. unregister_pernet_subsys(&clusterip_net_ops);
  665. /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
  666. rcu_barrier_bh();
  667. }
  668. module_init(clusterip_tg_init);
  669. module_exit(clusterip_tg_exit);