ip_vs_dh.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /*
  2. * IPVS: Destination Hashing scheduling module
  3. *
  4. * Authors: Wensong Zhang <wensong@gnuchina.org>
  5. *
  6. * Inspired by the consistent hashing scheduler patch from
  7. * Thomas Proell <proellt@gmx.de>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. /*
  18. * The dh algorithm is to select server by the hash key of destination IP
  19. * address. The pseudo code is as follows:
  20. *
  21. * n <- servernode[dest_ip];
  22. * if (n is dead) OR
  23. * (n is overloaded) OR (n.weight <= 0) then
  24. * return NULL;
  25. *
  26. * return n;
  27. *
  28. * Notes that servernode is a 256-bucket hash table that maps the hash
  29. * index derived from packet destination IP address to the current server
  30. * array. If the dh scheduler is used in cache cluster, it is good to
  31. * combine it with cache_bypass feature. When the statically assigned
  32. * server is dead or overloaded, the load balancer can bypass the cache
  33. * server and send requests to the original server directly.
  34. *
  35. */
  36. #define KMSG_COMPONENT "IPVS"
  37. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  38. #include <linux/ip.h>
  39. #include <linux/slab.h>
  40. #include <linux/module.h>
  41. #include <linux/kernel.h>
  42. #include <linux/skbuff.h>
  43. #include <net/ip_vs.h>
  44. /*
  45. * IPVS DH bucket
  46. */
  47. struct ip_vs_dh_bucket {
  48. struct ip_vs_dest __rcu *dest; /* real server (cache) */
  49. };
  50. /*
  51. * for IPVS DH entry hash table
  52. */
  53. #ifndef CONFIG_IP_VS_DH_TAB_BITS
  54. #define CONFIG_IP_VS_DH_TAB_BITS 8
  55. #endif
  56. #define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS
  57. #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
  58. #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
  59. struct ip_vs_dh_state {
  60. struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
  61. struct rcu_head rcu_head;
  62. };
  63. /*
  64. * Returns hash value for IPVS DH entry
  65. */
  66. static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
  67. {
  68. __be32 addr_fold = addr->ip;
  69. #ifdef CONFIG_IP_VS_IPV6
  70. if (af == AF_INET6)
  71. addr_fold = addr->ip6[0]^addr->ip6[1]^
  72. addr->ip6[2]^addr->ip6[3];
  73. #endif
  74. return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
  75. }
  76. /*
  77. * Get ip_vs_dest associated with supplied parameters.
  78. */
  79. static inline struct ip_vs_dest *
  80. ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
  81. {
  82. return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
  83. }
  84. /*
  85. * Assign all the hash buckets of the specified table with the service.
  86. */
  87. static int
  88. ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
  89. {
  90. int i;
  91. struct ip_vs_dh_bucket *b;
  92. struct list_head *p;
  93. struct ip_vs_dest *dest;
  94. bool empty;
  95. b = &s->buckets[0];
  96. p = &svc->destinations;
  97. empty = list_empty(p);
  98. for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
  99. dest = rcu_dereference_protected(b->dest, 1);
  100. if (dest)
  101. ip_vs_dest_put(dest);
  102. if (empty)
  103. RCU_INIT_POINTER(b->dest, NULL);
  104. else {
  105. if (p == &svc->destinations)
  106. p = p->next;
  107. dest = list_entry(p, struct ip_vs_dest, n_list);
  108. ip_vs_dest_hold(dest);
  109. RCU_INIT_POINTER(b->dest, dest);
  110. p = p->next;
  111. }
  112. b++;
  113. }
  114. return 0;
  115. }
  116. /*
  117. * Flush all the hash buckets of the specified table.
  118. */
  119. static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
  120. {
  121. int i;
  122. struct ip_vs_dh_bucket *b;
  123. struct ip_vs_dest *dest;
  124. b = &s->buckets[0];
  125. for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
  126. dest = rcu_dereference_protected(b->dest, 1);
  127. if (dest) {
  128. ip_vs_dest_put(dest);
  129. RCU_INIT_POINTER(b->dest, NULL);
  130. }
  131. b++;
  132. }
  133. }
  134. static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
  135. {
  136. struct ip_vs_dh_state *s;
  137. /* allocate the DH table for this service */
  138. s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
  139. if (s == NULL)
  140. return -ENOMEM;
  141. svc->sched_data = s;
  142. IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
  143. "current service\n",
  144. sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
  145. /* assign the hash buckets with current dests */
  146. ip_vs_dh_reassign(s, svc);
  147. return 0;
  148. }
  149. static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
  150. {
  151. struct ip_vs_dh_state *s = svc->sched_data;
  152. /* got to clean up hash buckets here */
  153. ip_vs_dh_flush(s);
  154. /* release the table itself */
  155. kfree_rcu(s, rcu_head);
  156. IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
  157. sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
  158. }
  159. static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
  160. struct ip_vs_dest *dest)
  161. {
  162. struct ip_vs_dh_state *s = svc->sched_data;
  163. /* assign the hash buckets with the updated service */
  164. ip_vs_dh_reassign(s, svc);
  165. return 0;
  166. }
  167. /*
  168. * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
  169. * consider that the server is overloaded here.
  170. */
  171. static inline int is_overloaded(struct ip_vs_dest *dest)
  172. {
  173. return dest->flags & IP_VS_DEST_F_OVERLOAD;
  174. }
  175. /*
  176. * Destination hashing scheduling
  177. */
  178. static struct ip_vs_dest *
  179. ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  180. struct ip_vs_iphdr *iph)
  181. {
  182. struct ip_vs_dest *dest;
  183. struct ip_vs_dh_state *s;
  184. IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
  185. s = (struct ip_vs_dh_state *) svc->sched_data;
  186. dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
  187. if (!dest
  188. || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
  189. || atomic_read(&dest->weight) <= 0
  190. || is_overloaded(dest)) {
  191. ip_vs_scheduler_err(svc, "no destination available");
  192. return NULL;
  193. }
  194. IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
  195. IP_VS_DBG_ADDR(svc->af, &iph->daddr),
  196. IP_VS_DBG_ADDR(dest->af, &dest->addr),
  197. ntohs(dest->port));
  198. return dest;
  199. }
  200. /*
  201. * IPVS DH Scheduler structure
  202. */
  203. static struct ip_vs_scheduler ip_vs_dh_scheduler =
  204. {
  205. .name = "dh",
  206. .refcnt = ATOMIC_INIT(0),
  207. .module = THIS_MODULE,
  208. .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
  209. .init_service = ip_vs_dh_init_svc,
  210. .done_service = ip_vs_dh_done_svc,
  211. .add_dest = ip_vs_dh_dest_changed,
  212. .del_dest = ip_vs_dh_dest_changed,
  213. .schedule = ip_vs_dh_schedule,
  214. };
  215. static int __init ip_vs_dh_init(void)
  216. {
  217. return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
  218. }
  219. static void __exit ip_vs_dh_cleanup(void)
  220. {
  221. unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
  222. synchronize_rcu();
  223. }
  224. module_init(ip_vs_dh_init);
  225. module_exit(ip_vs_dh_cleanup);
  226. MODULE_LICENSE("GPL");