ip_vs_ctl.c 101 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076
  1. /*
  2. * IPVS An implementation of the IP virtual server support for the
  3. * LINUX operating system. IPVS is now implemented as a module
  4. * over the NetFilter framework. IPVS can be used to build a
  5. * high-performance and highly available server based on a
  6. * cluster of servers.
  7. *
  8. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  9. * Peter Kese <peter.kese@ijs.si>
  10. * Julian Anastasov <ja@ssi.bg>
  11. *
  12. * This program is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU General Public License
  14. * as published by the Free Software Foundation; either version
  15. * 2 of the License, or (at your option) any later version.
  16. *
  17. * Changes:
  18. *
  19. */
  20. #define KMSG_COMPONENT "IPVS"
  21. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  22. #include <linux/module.h>
  23. #include <linux/init.h>
  24. #include <linux/types.h>
  25. #include <linux/capability.h>
  26. #include <linux/fs.h>
  27. #include <linux/sysctl.h>
  28. #include <linux/proc_fs.h>
  29. #include <linux/workqueue.h>
  30. #include <linux/swap.h>
  31. #include <linux/seq_file.h>
  32. #include <linux/slab.h>
  33. #include <linux/netfilter.h>
  34. #include <linux/netfilter_ipv4.h>
  35. #include <linux/mutex.h>
  36. #include <net/net_namespace.h>
  37. #include <linux/nsproxy.h>
  38. #include <net/ip.h>
  39. #ifdef CONFIG_IP_VS_IPV6
  40. #include <net/ipv6.h>
  41. #include <net/ip6_route.h>
  42. #endif
  43. #include <net/route.h>
  44. #include <net/sock.h>
  45. #include <net/genetlink.h>
  46. #include <asm/uaccess.h>
  47. #include <net/ip_vs.h>
  48. /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  49. static DEFINE_MUTEX(__ip_vs_mutex);
  50. /* sysctl variables */
  51. #ifdef CONFIG_IP_VS_DEBUG
  52. static int sysctl_ip_vs_debug_level = 0;
  53. int ip_vs_get_debug_level(void)
  54. {
  55. return sysctl_ip_vs_debug_level;
  56. }
  57. #endif
  58. /* Protos */
  59. static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
  60. #ifdef CONFIG_IP_VS_IPV6
  61. /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
  62. static bool __ip_vs_addr_is_local_v6(struct net *net,
  63. const struct in6_addr *addr)
  64. {
  65. struct flowi6 fl6 = {
  66. .daddr = *addr,
  67. };
  68. struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
  69. bool is_local;
  70. is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
  71. dst_release(dst);
  72. return is_local;
  73. }
  74. #endif
  75. #ifdef CONFIG_SYSCTL
  76. /*
  77. * update_defense_level is called from keventd and from sysctl,
  78. * so it needs to protect itself from softirqs
  79. */
  80. static void update_defense_level(struct netns_ipvs *ipvs)
  81. {
  82. struct sysinfo i;
  83. static int old_secure_tcp = 0;
  84. int availmem;
  85. int nomem;
  86. int to_change = -1;
  87. /* we only count free and buffered memory (in pages) */
  88. si_meminfo(&i);
  89. availmem = i.freeram + i.bufferram;
  90. /* however in linux 2.5 the i.bufferram is total page cache size,
  91. we need adjust it */
  92. /* si_swapinfo(&i); */
  93. /* availmem = availmem - (i.totalswap - i.freeswap); */
  94. nomem = (availmem < ipvs->sysctl_amemthresh);
  95. local_bh_disable();
  96. /* drop_entry */
  97. spin_lock(&ipvs->dropentry_lock);
  98. switch (ipvs->sysctl_drop_entry) {
  99. case 0:
  100. atomic_set(&ipvs->dropentry, 0);
  101. break;
  102. case 1:
  103. if (nomem) {
  104. atomic_set(&ipvs->dropentry, 1);
  105. ipvs->sysctl_drop_entry = 2;
  106. } else {
  107. atomic_set(&ipvs->dropentry, 0);
  108. }
  109. break;
  110. case 2:
  111. if (nomem) {
  112. atomic_set(&ipvs->dropentry, 1);
  113. } else {
  114. atomic_set(&ipvs->dropentry, 0);
  115. ipvs->sysctl_drop_entry = 1;
  116. };
  117. break;
  118. case 3:
  119. atomic_set(&ipvs->dropentry, 1);
  120. break;
  121. }
  122. spin_unlock(&ipvs->dropentry_lock);
  123. /* drop_packet */
  124. spin_lock(&ipvs->droppacket_lock);
  125. switch (ipvs->sysctl_drop_packet) {
  126. case 0:
  127. ipvs->drop_rate = 0;
  128. break;
  129. case 1:
  130. if (nomem) {
  131. ipvs->drop_rate = ipvs->drop_counter
  132. = ipvs->sysctl_amemthresh /
  133. (ipvs->sysctl_amemthresh-availmem);
  134. ipvs->sysctl_drop_packet = 2;
  135. } else {
  136. ipvs->drop_rate = 0;
  137. }
  138. break;
  139. case 2:
  140. if (nomem) {
  141. ipvs->drop_rate = ipvs->drop_counter
  142. = ipvs->sysctl_amemthresh /
  143. (ipvs->sysctl_amemthresh-availmem);
  144. } else {
  145. ipvs->drop_rate = 0;
  146. ipvs->sysctl_drop_packet = 1;
  147. }
  148. break;
  149. case 3:
  150. ipvs->drop_rate = ipvs->sysctl_am_droprate;
  151. break;
  152. }
  153. spin_unlock(&ipvs->droppacket_lock);
  154. /* secure_tcp */
  155. spin_lock(&ipvs->securetcp_lock);
  156. switch (ipvs->sysctl_secure_tcp) {
  157. case 0:
  158. if (old_secure_tcp >= 2)
  159. to_change = 0;
  160. break;
  161. case 1:
  162. if (nomem) {
  163. if (old_secure_tcp < 2)
  164. to_change = 1;
  165. ipvs->sysctl_secure_tcp = 2;
  166. } else {
  167. if (old_secure_tcp >= 2)
  168. to_change = 0;
  169. }
  170. break;
  171. case 2:
  172. if (nomem) {
  173. if (old_secure_tcp < 2)
  174. to_change = 1;
  175. } else {
  176. if (old_secure_tcp >= 2)
  177. to_change = 0;
  178. ipvs->sysctl_secure_tcp = 1;
  179. }
  180. break;
  181. case 3:
  182. if (old_secure_tcp < 2)
  183. to_change = 1;
  184. break;
  185. }
  186. old_secure_tcp = ipvs->sysctl_secure_tcp;
  187. if (to_change >= 0)
  188. ip_vs_protocol_timeout_change(ipvs,
  189. ipvs->sysctl_secure_tcp > 1);
  190. spin_unlock(&ipvs->securetcp_lock);
  191. local_bh_enable();
  192. }
  193. /*
  194. * Timer for checking the defense
  195. */
  196. #define DEFENSE_TIMER_PERIOD 1*HZ
  197. static void defense_work_handler(struct work_struct *work)
  198. {
  199. struct netns_ipvs *ipvs =
  200. container_of(work, struct netns_ipvs, defense_work.work);
  201. update_defense_level(ipvs);
  202. if (atomic_read(&ipvs->dropentry))
  203. ip_vs_random_dropentry(ipvs);
  204. schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
  205. }
  206. #endif
  207. int
  208. ip_vs_use_count_inc(void)
  209. {
  210. return try_module_get(THIS_MODULE);
  211. }
  212. void
  213. ip_vs_use_count_dec(void)
  214. {
  215. module_put(THIS_MODULE);
  216. }
  217. /*
  218. * Hash table: for virtual service lookups
  219. */
  220. #define IP_VS_SVC_TAB_BITS 8
  221. #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
  222. #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
  223. /* the service table hashed by <protocol, addr, port> */
  224. static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
  225. /* the service table hashed by fwmark */
  226. static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
  227. /*
  228. * Returns hash value for virtual service
  229. */
  230. static inline unsigned int
  231. ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
  232. const union nf_inet_addr *addr, __be16 port)
  233. {
  234. register unsigned int porth = ntohs(port);
  235. __be32 addr_fold = addr->ip;
  236. __u32 ahash;
  237. #ifdef CONFIG_IP_VS_IPV6
  238. if (af == AF_INET6)
  239. addr_fold = addr->ip6[0]^addr->ip6[1]^
  240. addr->ip6[2]^addr->ip6[3];
  241. #endif
  242. ahash = ntohl(addr_fold);
  243. ahash ^= ((size_t) ipvs >> 8);
  244. return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
  245. IP_VS_SVC_TAB_MASK;
  246. }
  247. /*
  248. * Returns hash value of fwmark for virtual service lookup
  249. */
  250. static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
  251. {
  252. return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
  253. }
  254. /*
  255. * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  256. * or in the ip_vs_svc_fwm_table by fwmark.
  257. * Should be called with locked tables.
  258. */
  259. static int ip_vs_svc_hash(struct ip_vs_service *svc)
  260. {
  261. unsigned int hash;
  262. if (svc->flags & IP_VS_SVC_F_HASHED) {
  263. pr_err("%s(): request for already hashed, called from %pF\n",
  264. __func__, __builtin_return_address(0));
  265. return 0;
  266. }
  267. if (svc->fwmark == 0) {
  268. /*
  269. * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
  270. */
  271. hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
  272. &svc->addr, svc->port);
  273. hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
  274. } else {
  275. /*
  276. * Hash it by fwmark in svc_fwm_table
  277. */
  278. hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
  279. hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
  280. }
  281. svc->flags |= IP_VS_SVC_F_HASHED;
  282. /* increase its refcnt because it is referenced by the svc table */
  283. atomic_inc(&svc->refcnt);
  284. return 1;
  285. }
  286. /*
  287. * Unhashes a service from svc_table / svc_fwm_table.
  288. * Should be called with locked tables.
  289. */
  290. static int ip_vs_svc_unhash(struct ip_vs_service *svc)
  291. {
  292. if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
  293. pr_err("%s(): request for unhash flagged, called from %pF\n",
  294. __func__, __builtin_return_address(0));
  295. return 0;
  296. }
  297. if (svc->fwmark == 0) {
  298. /* Remove it from the svc_table table */
  299. hlist_del_rcu(&svc->s_list);
  300. } else {
  301. /* Remove it from the svc_fwm_table table */
  302. hlist_del_rcu(&svc->f_list);
  303. }
  304. svc->flags &= ~IP_VS_SVC_F_HASHED;
  305. atomic_dec(&svc->refcnt);
  306. return 1;
  307. }
  308. /*
  309. * Get service by {netns, proto,addr,port} in the service table.
  310. */
  311. static inline struct ip_vs_service *
  312. __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
  313. const union nf_inet_addr *vaddr, __be16 vport)
  314. {
  315. unsigned int hash;
  316. struct ip_vs_service *svc;
  317. /* Check for "full" addressed entries */
  318. hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
  319. hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
  320. if ((svc->af == af)
  321. && ip_vs_addr_equal(af, &svc->addr, vaddr)
  322. && (svc->port == vport)
  323. && (svc->protocol == protocol)
  324. && (svc->ipvs == ipvs)) {
  325. /* HIT */
  326. return svc;
  327. }
  328. }
  329. return NULL;
  330. }
  331. /*
  332. * Get service by {fwmark} in the service table.
  333. */
  334. static inline struct ip_vs_service *
  335. __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
  336. {
  337. unsigned int hash;
  338. struct ip_vs_service *svc;
  339. /* Check for fwmark addressed entries */
  340. hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
  341. hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
  342. if (svc->fwmark == fwmark && svc->af == af
  343. && (svc->ipvs == ipvs)) {
  344. /* HIT */
  345. return svc;
  346. }
  347. }
  348. return NULL;
  349. }
  350. /* Find service, called under RCU lock */
  351. struct ip_vs_service *
  352. ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
  353. const union nf_inet_addr *vaddr, __be16 vport)
  354. {
  355. struct ip_vs_service *svc;
  356. /*
  357. * Check the table hashed by fwmark first
  358. */
  359. if (fwmark) {
  360. svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
  361. if (svc)
  362. goto out;
  363. }
  364. /*
  365. * Check the table hashed by <protocol,addr,port>
  366. * for "full" addressed entries
  367. */
  368. svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
  369. if (svc == NULL
  370. && protocol == IPPROTO_TCP
  371. && atomic_read(&ipvs->ftpsvc_counter)
  372. && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
  373. /*
  374. * Check if ftp service entry exists, the packet
  375. * might belong to FTP data connections.
  376. */
  377. svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
  378. }
  379. if (svc == NULL
  380. && atomic_read(&ipvs->nullsvc_counter)) {
  381. /*
  382. * Check if the catch-all port (port zero) exists
  383. */
  384. svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
  385. }
  386. out:
  387. IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
  388. fwmark, ip_vs_proto_name(protocol),
  389. IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
  390. svc ? "hit" : "not hit");
  391. return svc;
  392. }
  393. static inline void
  394. __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
  395. {
  396. atomic_inc(&svc->refcnt);
  397. rcu_assign_pointer(dest->svc, svc);
  398. }
  399. static void ip_vs_service_free(struct ip_vs_service *svc)
  400. {
  401. free_percpu(svc->stats.cpustats);
  402. kfree(svc);
  403. }
  404. static void ip_vs_service_rcu_free(struct rcu_head *head)
  405. {
  406. struct ip_vs_service *svc;
  407. svc = container_of(head, struct ip_vs_service, rcu_head);
  408. ip_vs_service_free(svc);
  409. }
  410. static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
  411. {
  412. if (atomic_dec_and_test(&svc->refcnt)) {
  413. IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
  414. svc->fwmark,
  415. IP_VS_DBG_ADDR(svc->af, &svc->addr),
  416. ntohs(svc->port));
  417. if (do_delay)
  418. call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
  419. else
  420. ip_vs_service_free(svc);
  421. }
  422. }
  423. /*
  424. * Returns hash value for real service
  425. */
  426. static inline unsigned int ip_vs_rs_hashkey(int af,
  427. const union nf_inet_addr *addr,
  428. __be16 port)
  429. {
  430. register unsigned int porth = ntohs(port);
  431. __be32 addr_fold = addr->ip;
  432. #ifdef CONFIG_IP_VS_IPV6
  433. if (af == AF_INET6)
  434. addr_fold = addr->ip6[0]^addr->ip6[1]^
  435. addr->ip6[2]^addr->ip6[3];
  436. #endif
  437. return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
  438. & IP_VS_RTAB_MASK;
  439. }
  440. /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
  441. static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
  442. {
  443. unsigned int hash;
  444. if (dest->in_rs_table)
  445. return;
  446. /*
  447. * Hash by proto,addr,port,
  448. * which are the parameters of the real service.
  449. */
  450. hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
  451. hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
  452. dest->in_rs_table = 1;
  453. }
  454. /* Unhash ip_vs_dest from rs_table. */
  455. static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
  456. {
  457. /*
  458. * Remove it from the rs_table table.
  459. */
  460. if (dest->in_rs_table) {
  461. hlist_del_rcu(&dest->d_list);
  462. dest->in_rs_table = 0;
  463. }
  464. }
  465. /* Check if real service by <proto,addr,port> is present */
  466. bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
  467. const union nf_inet_addr *daddr, __be16 dport)
  468. {
  469. unsigned int hash;
  470. struct ip_vs_dest *dest;
  471. /* Check for "full" addressed entries */
  472. hash = ip_vs_rs_hashkey(af, daddr, dport);
  473. rcu_read_lock();
  474. hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
  475. if (dest->port == dport &&
  476. dest->af == af &&
  477. ip_vs_addr_equal(af, &dest->addr, daddr) &&
  478. (dest->protocol == protocol || dest->vfwmark)) {
  479. /* HIT */
  480. rcu_read_unlock();
  481. return true;
  482. }
  483. }
  484. rcu_read_unlock();
  485. return false;
  486. }
  487. /* Lookup destination by {addr,port} in the given service
  488. * Called under RCU lock.
  489. */
  490. static struct ip_vs_dest *
  491. ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
  492. const union nf_inet_addr *daddr, __be16 dport)
  493. {
  494. struct ip_vs_dest *dest;
  495. /*
  496. * Find the destination for the given service
  497. */
  498. list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
  499. if ((dest->af == dest_af) &&
  500. ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
  501. (dest->port == dport)) {
  502. /* HIT */
  503. return dest;
  504. }
  505. }
  506. return NULL;
  507. }
  508. /*
  509. * Find destination by {daddr,dport,vaddr,protocol}
  510. * Created to be used in ip_vs_process_message() in
  511. * the backup synchronization daemon. It finds the
  512. * destination to be bound to the received connection
  513. * on the backup.
  514. * Called under RCU lock, no refcnt is returned.
  515. */
  516. struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
  517. const union nf_inet_addr *daddr,
  518. __be16 dport,
  519. const union nf_inet_addr *vaddr,
  520. __be16 vport, __u16 protocol, __u32 fwmark,
  521. __u32 flags)
  522. {
  523. struct ip_vs_dest *dest;
  524. struct ip_vs_service *svc;
  525. __be16 port = dport;
  526. svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
  527. if (!svc)
  528. return NULL;
  529. if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
  530. port = 0;
  531. dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
  532. if (!dest)
  533. dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
  534. return dest;
  535. }
  536. void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
  537. {
  538. struct ip_vs_dest_dst *dest_dst = container_of(head,
  539. struct ip_vs_dest_dst,
  540. rcu_head);
  541. dst_release(dest_dst->dst_cache);
  542. kfree(dest_dst);
  543. }
  544. /* Release dest_dst and dst_cache for dest in user context */
  545. static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
  546. {
  547. struct ip_vs_dest_dst *old;
  548. old = rcu_dereference_protected(dest->dest_dst, 1);
  549. if (old) {
  550. RCU_INIT_POINTER(dest->dest_dst, NULL);
  551. call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
  552. }
  553. }
  554. /*
  555. * Lookup dest by {svc,addr,port} in the destination trash.
  556. * The destination trash is used to hold the destinations that are removed
  557. * from the service table but are still referenced by some conn entries.
  558. * The reason to add the destination trash is when the dest is temporary
  559. * down (either by administrator or by monitor program), the dest can be
  560. * picked back from the trash, the remaining connections to the dest can
  561. * continue, and the counting information of the dest is also useful for
  562. * scheduling.
  563. */
  564. static struct ip_vs_dest *
  565. ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
  566. const union nf_inet_addr *daddr, __be16 dport)
  567. {
  568. struct ip_vs_dest *dest;
  569. struct netns_ipvs *ipvs = svc->ipvs;
  570. /*
  571. * Find the destination in trash
  572. */
  573. spin_lock_bh(&ipvs->dest_trash_lock);
  574. list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
  575. IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
  576. "dest->refcnt=%d\n",
  577. dest->vfwmark,
  578. IP_VS_DBG_ADDR(dest->af, &dest->addr),
  579. ntohs(dest->port),
  580. atomic_read(&dest->refcnt));
  581. if (dest->af == dest_af &&
  582. ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
  583. dest->port == dport &&
  584. dest->vfwmark == svc->fwmark &&
  585. dest->protocol == svc->protocol &&
  586. (svc->fwmark ||
  587. (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
  588. dest->vport == svc->port))) {
  589. /* HIT */
  590. list_del(&dest->t_list);
  591. ip_vs_dest_hold(dest);
  592. goto out;
  593. }
  594. }
  595. dest = NULL;
  596. out:
  597. spin_unlock_bh(&ipvs->dest_trash_lock);
  598. return dest;
  599. }
  600. static void ip_vs_dest_free(struct ip_vs_dest *dest)
  601. {
  602. struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
  603. __ip_vs_dst_cache_reset(dest);
  604. __ip_vs_svc_put(svc, false);
  605. free_percpu(dest->stats.cpustats);
  606. ip_vs_dest_put_and_free(dest);
  607. }
  608. /*
  609. * Clean up all the destinations in the trash
  610. * Called by the ip_vs_control_cleanup()
  611. *
  612. * When the ip_vs_control_clearup is activated by ipvs module exit,
  613. * the service tables must have been flushed and all the connections
  614. * are expired, and the refcnt of each destination in the trash must
  615. * be 0, so we simply release them here.
  616. */
  617. static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
  618. {
  619. struct ip_vs_dest *dest, *nxt;
  620. del_timer_sync(&ipvs->dest_trash_timer);
  621. /* No need to use dest_trash_lock */
  622. list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
  623. list_del(&dest->t_list);
  624. ip_vs_dest_free(dest);
  625. }
  626. }
  627. static void
  628. ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
  629. {
  630. #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
  631. spin_lock_bh(&src->lock);
  632. IP_VS_SHOW_STATS_COUNTER(conns);
  633. IP_VS_SHOW_STATS_COUNTER(inpkts);
  634. IP_VS_SHOW_STATS_COUNTER(outpkts);
  635. IP_VS_SHOW_STATS_COUNTER(inbytes);
  636. IP_VS_SHOW_STATS_COUNTER(outbytes);
  637. ip_vs_read_estimator(dst, src);
  638. spin_unlock_bh(&src->lock);
  639. }
  640. static void
  641. ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
  642. {
  643. dst->conns = (u32)src->conns;
  644. dst->inpkts = (u32)src->inpkts;
  645. dst->outpkts = (u32)src->outpkts;
  646. dst->inbytes = src->inbytes;
  647. dst->outbytes = src->outbytes;
  648. dst->cps = (u32)src->cps;
  649. dst->inpps = (u32)src->inpps;
  650. dst->outpps = (u32)src->outpps;
  651. dst->inbps = (u32)src->inbps;
  652. dst->outbps = (u32)src->outbps;
  653. }
  654. static void
  655. ip_vs_zero_stats(struct ip_vs_stats *stats)
  656. {
  657. spin_lock_bh(&stats->lock);
  658. /* get current counters as zero point, rates are zeroed */
  659. #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
  660. IP_VS_ZERO_STATS_COUNTER(conns);
  661. IP_VS_ZERO_STATS_COUNTER(inpkts);
  662. IP_VS_ZERO_STATS_COUNTER(outpkts);
  663. IP_VS_ZERO_STATS_COUNTER(inbytes);
  664. IP_VS_ZERO_STATS_COUNTER(outbytes);
  665. ip_vs_zero_estimator(stats);
  666. spin_unlock_bh(&stats->lock);
  667. }
  668. /*
  669. * Update a destination in the given service
  670. */
  671. static void
  672. __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
  673. struct ip_vs_dest_user_kern *udest, int add)
  674. {
  675. struct netns_ipvs *ipvs = svc->ipvs;
  676. struct ip_vs_service *old_svc;
  677. struct ip_vs_scheduler *sched;
  678. int conn_flags;
  679. /* We cannot modify an address and change the address family */
  680. BUG_ON(!add && udest->af != dest->af);
  681. if (add && udest->af != svc->af)
  682. ipvs->mixed_address_family_dests++;
  683. /* set the weight and the flags */
  684. atomic_set(&dest->weight, udest->weight);
  685. conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
  686. conn_flags |= IP_VS_CONN_F_INACTIVE;
  687. /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
  688. if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
  689. conn_flags |= IP_VS_CONN_F_NOOUTPUT;
  690. } else {
  691. /*
  692. * Put the real service in rs_table if not present.
  693. * For now only for NAT!
  694. */
  695. ip_vs_rs_hash(ipvs, dest);
  696. }
  697. atomic_set(&dest->conn_flags, conn_flags);
  698. /* bind the service */
  699. old_svc = rcu_dereference_protected(dest->svc, 1);
  700. if (!old_svc) {
  701. __ip_vs_bind_svc(dest, svc);
  702. } else {
  703. if (old_svc != svc) {
  704. ip_vs_zero_stats(&dest->stats);
  705. __ip_vs_bind_svc(dest, svc);
  706. __ip_vs_svc_put(old_svc, true);
  707. }
  708. }
  709. /* set the dest status flags */
  710. dest->flags |= IP_VS_DEST_F_AVAILABLE;
  711. if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
  712. dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
  713. dest->u_threshold = udest->u_threshold;
  714. dest->l_threshold = udest->l_threshold;
  715. dest->af = udest->af;
  716. spin_lock_bh(&dest->dst_lock);
  717. __ip_vs_dst_cache_reset(dest);
  718. spin_unlock_bh(&dest->dst_lock);
  719. if (add) {
  720. ip_vs_start_estimator(svc->ipvs, &dest->stats);
  721. list_add_rcu(&dest->n_list, &svc->destinations);
  722. svc->num_dests++;
  723. sched = rcu_dereference_protected(svc->scheduler, 1);
  724. if (sched && sched->add_dest)
  725. sched->add_dest(svc, dest);
  726. } else {
  727. sched = rcu_dereference_protected(svc->scheduler, 1);
  728. if (sched && sched->upd_dest)
  729. sched->upd_dest(svc, dest);
  730. }
  731. }
  732. /*
  733. * Create a destination for the given service
  734. */
  735. static int
  736. ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
  737. struct ip_vs_dest **dest_p)
  738. {
  739. struct ip_vs_dest *dest;
  740. unsigned int atype, i;
  741. EnterFunction(2);
  742. #ifdef CONFIG_IP_VS_IPV6
  743. if (udest->af == AF_INET6) {
  744. atype = ipv6_addr_type(&udest->addr.in6);
  745. if ((!(atype & IPV6_ADDR_UNICAST) ||
  746. atype & IPV6_ADDR_LINKLOCAL) &&
  747. !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
  748. return -EINVAL;
  749. } else
  750. #endif
  751. {
  752. atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
  753. if (atype != RTN_LOCAL && atype != RTN_UNICAST)
  754. return -EINVAL;
  755. }
  756. dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
  757. if (dest == NULL)
  758. return -ENOMEM;
  759. dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
  760. if (!dest->stats.cpustats)
  761. goto err_alloc;
  762. for_each_possible_cpu(i) {
  763. struct ip_vs_cpu_stats *ip_vs_dest_stats;
  764. ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
  765. u64_stats_init(&ip_vs_dest_stats->syncp);
  766. }
  767. dest->af = udest->af;
  768. dest->protocol = svc->protocol;
  769. dest->vaddr = svc->addr;
  770. dest->vport = svc->port;
  771. dest->vfwmark = svc->fwmark;
  772. ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
  773. dest->port = udest->port;
  774. atomic_set(&dest->activeconns, 0);
  775. atomic_set(&dest->inactconns, 0);
  776. atomic_set(&dest->persistconns, 0);
  777. atomic_set(&dest->refcnt, 1);
  778. INIT_HLIST_NODE(&dest->d_list);
  779. spin_lock_init(&dest->dst_lock);
  780. spin_lock_init(&dest->stats.lock);
  781. __ip_vs_update_dest(svc, dest, udest, 1);
  782. *dest_p = dest;
  783. LeaveFunction(2);
  784. return 0;
  785. err_alloc:
  786. kfree(dest);
  787. return -ENOMEM;
  788. }
  789. /*
  790. * Add a destination into an existing service
  791. */
  792. static int
  793. ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  794. {
  795. struct ip_vs_dest *dest;
  796. union nf_inet_addr daddr;
  797. __be16 dport = udest->port;
  798. int ret;
  799. EnterFunction(2);
  800. if (udest->weight < 0) {
  801. pr_err("%s(): server weight less than zero\n", __func__);
  802. return -ERANGE;
  803. }
  804. if (udest->l_threshold > udest->u_threshold) {
  805. pr_err("%s(): lower threshold is higher than upper threshold\n",
  806. __func__);
  807. return -ERANGE;
  808. }
  809. ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
  810. /* We use function that requires RCU lock */
  811. rcu_read_lock();
  812. dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
  813. rcu_read_unlock();
  814. if (dest != NULL) {
  815. IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
  816. return -EEXIST;
  817. }
  818. /*
  819. * Check if the dest already exists in the trash and
  820. * is from the same service
  821. */
  822. dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
  823. if (dest != NULL) {
  824. IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
  825. "dest->refcnt=%d, service %u/%s:%u\n",
  826. IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
  827. atomic_read(&dest->refcnt),
  828. dest->vfwmark,
  829. IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
  830. ntohs(dest->vport));
  831. __ip_vs_update_dest(svc, dest, udest, 1);
  832. ret = 0;
  833. } else {
  834. /*
  835. * Allocate and initialize the dest structure
  836. */
  837. ret = ip_vs_new_dest(svc, udest, &dest);
  838. }
  839. LeaveFunction(2);
  840. return ret;
  841. }
  842. /*
  843. * Edit a destination in the given service
  844. */
  845. static int
  846. ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  847. {
  848. struct ip_vs_dest *dest;
  849. union nf_inet_addr daddr;
  850. __be16 dport = udest->port;
  851. EnterFunction(2);
  852. if (udest->weight < 0) {
  853. pr_err("%s(): server weight less than zero\n", __func__);
  854. return -ERANGE;
  855. }
  856. if (udest->l_threshold > udest->u_threshold) {
  857. pr_err("%s(): lower threshold is higher than upper threshold\n",
  858. __func__);
  859. return -ERANGE;
  860. }
  861. ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
  862. /* We use function that requires RCU lock */
  863. rcu_read_lock();
  864. dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
  865. rcu_read_unlock();
  866. if (dest == NULL) {
  867. IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
  868. return -ENOENT;
  869. }
  870. __ip_vs_update_dest(svc, dest, udest, 0);
  871. LeaveFunction(2);
  872. return 0;
  873. }
  874. /*
  875. * Delete a destination (must be already unlinked from the service)
  876. */
  877. static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
  878. bool cleanup)
  879. {
  880. ip_vs_stop_estimator(ipvs, &dest->stats);
  881. /*
  882. * Remove it from the d-linked list with the real services.
  883. */
  884. ip_vs_rs_unhash(dest);
  885. spin_lock_bh(&ipvs->dest_trash_lock);
  886. IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
  887. IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
  888. atomic_read(&dest->refcnt));
  889. if (list_empty(&ipvs->dest_trash) && !cleanup)
  890. mod_timer(&ipvs->dest_trash_timer,
  891. jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
  892. /* dest lives in trash without reference */
  893. list_add(&dest->t_list, &ipvs->dest_trash);
  894. dest->idle_start = 0;
  895. spin_unlock_bh(&ipvs->dest_trash_lock);
  896. ip_vs_dest_put(dest);
  897. }
  898. /*
  899. * Unlink a destination from the given service
  900. */
  901. static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
  902. struct ip_vs_dest *dest,
  903. int svcupd)
  904. {
  905. dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
  906. /*
  907. * Remove it from the d-linked destination list.
  908. */
  909. list_del_rcu(&dest->n_list);
  910. svc->num_dests--;
  911. if (dest->af != svc->af)
  912. svc->ipvs->mixed_address_family_dests--;
  913. if (svcupd) {
  914. struct ip_vs_scheduler *sched;
  915. sched = rcu_dereference_protected(svc->scheduler, 1);
  916. if (sched && sched->del_dest)
  917. sched->del_dest(svc, dest);
  918. }
  919. }
  920. /*
  921. * Delete a destination server in the given service
  922. */
  923. static int
  924. ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  925. {
  926. struct ip_vs_dest *dest;
  927. __be16 dport = udest->port;
  928. EnterFunction(2);
  929. /* We use function that requires RCU lock */
  930. rcu_read_lock();
  931. dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
  932. rcu_read_unlock();
  933. if (dest == NULL) {
  934. IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
  935. return -ENOENT;
  936. }
  937. /*
  938. * Unlink dest from the service
  939. */
  940. __ip_vs_unlink_dest(svc, dest, 1);
  941. /*
  942. * Delete the destination
  943. */
  944. __ip_vs_del_dest(svc->ipvs, dest, false);
  945. LeaveFunction(2);
  946. return 0;
  947. }
  948. static void ip_vs_dest_trash_expire(unsigned long data)
  949. {
  950. struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
  951. struct ip_vs_dest *dest, *next;
  952. unsigned long now = jiffies;
  953. spin_lock(&ipvs->dest_trash_lock);
  954. list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
  955. if (atomic_read(&dest->refcnt) > 0)
  956. continue;
  957. if (dest->idle_start) {
  958. if (time_before(now, dest->idle_start +
  959. IP_VS_DEST_TRASH_PERIOD))
  960. continue;
  961. } else {
  962. dest->idle_start = max(1UL, now);
  963. continue;
  964. }
  965. IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
  966. dest->vfwmark,
  967. IP_VS_DBG_ADDR(dest->af, &dest->addr),
  968. ntohs(dest->port));
  969. list_del(&dest->t_list);
  970. ip_vs_dest_free(dest);
  971. }
  972. if (!list_empty(&ipvs->dest_trash))
  973. mod_timer(&ipvs->dest_trash_timer,
  974. jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
  975. spin_unlock(&ipvs->dest_trash_lock);
  976. }
  977. /*
  978. * Add a service into the service hash table
  979. */
  980. static int
  981. ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
  982. struct ip_vs_service **svc_p)
  983. {
  984. int ret = 0, i;
  985. struct ip_vs_scheduler *sched = NULL;
  986. struct ip_vs_pe *pe = NULL;
  987. struct ip_vs_service *svc = NULL;
  988. /* increase the module use count */
  989. ip_vs_use_count_inc();
  990. /* Lookup the scheduler by 'u->sched_name' */
  991. if (strcmp(u->sched_name, "none")) {
  992. sched = ip_vs_scheduler_get(u->sched_name);
  993. if (!sched) {
  994. pr_info("Scheduler module ip_vs_%s not found\n",
  995. u->sched_name);
  996. ret = -ENOENT;
  997. goto out_err;
  998. }
  999. }
  1000. if (u->pe_name && *u->pe_name) {
  1001. pe = ip_vs_pe_getbyname(u->pe_name);
  1002. if (pe == NULL) {
  1003. pr_info("persistence engine module ip_vs_pe_%s "
  1004. "not found\n", u->pe_name);
  1005. ret = -ENOENT;
  1006. goto out_err;
  1007. }
  1008. }
  1009. #ifdef CONFIG_IP_VS_IPV6
  1010. if (u->af == AF_INET6) {
  1011. __u32 plen = (__force __u32) u->netmask;
  1012. if (plen < 1 || plen > 128) {
  1013. ret = -EINVAL;
  1014. goto out_err;
  1015. }
  1016. }
  1017. #endif
  1018. svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
  1019. if (svc == NULL) {
  1020. IP_VS_DBG(1, "%s(): no memory\n", __func__);
  1021. ret = -ENOMEM;
  1022. goto out_err;
  1023. }
  1024. svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
  1025. if (!svc->stats.cpustats) {
  1026. ret = -ENOMEM;
  1027. goto out_err;
  1028. }
  1029. for_each_possible_cpu(i) {
  1030. struct ip_vs_cpu_stats *ip_vs_stats;
  1031. ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
  1032. u64_stats_init(&ip_vs_stats->syncp);
  1033. }
  1034. /* I'm the first user of the service */
  1035. atomic_set(&svc->refcnt, 0);
  1036. svc->af = u->af;
  1037. svc->protocol = u->protocol;
  1038. ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
  1039. svc->port = u->port;
  1040. svc->fwmark = u->fwmark;
  1041. svc->flags = u->flags;
  1042. svc->timeout = u->timeout * HZ;
  1043. svc->netmask = u->netmask;
  1044. svc->ipvs = ipvs;
  1045. INIT_LIST_HEAD(&svc->destinations);
  1046. spin_lock_init(&svc->sched_lock);
  1047. spin_lock_init(&svc->stats.lock);
  1048. /* Bind the scheduler */
  1049. if (sched) {
  1050. ret = ip_vs_bind_scheduler(svc, sched);
  1051. if (ret)
  1052. goto out_err;
  1053. sched = NULL;
  1054. }
  1055. /* Bind the ct retriever */
  1056. RCU_INIT_POINTER(svc->pe, pe);
  1057. pe = NULL;
  1058. /* Update the virtual service counters */
  1059. if (svc->port == FTPPORT)
  1060. atomic_inc(&ipvs->ftpsvc_counter);
  1061. else if (svc->port == 0)
  1062. atomic_inc(&ipvs->nullsvc_counter);
  1063. ip_vs_start_estimator(ipvs, &svc->stats);
  1064. /* Count only IPv4 services for old get/setsockopt interface */
  1065. if (svc->af == AF_INET)
  1066. ipvs->num_services++;
  1067. /* Hash the service into the service table */
  1068. ip_vs_svc_hash(svc);
  1069. *svc_p = svc;
  1070. /* Now there is a service - full throttle */
  1071. ipvs->enable = 1;
  1072. return 0;
  1073. out_err:
  1074. if (svc != NULL) {
  1075. ip_vs_unbind_scheduler(svc, sched);
  1076. ip_vs_service_free(svc);
  1077. }
  1078. ip_vs_scheduler_put(sched);
  1079. ip_vs_pe_put(pe);
  1080. /* decrease the module use count */
  1081. ip_vs_use_count_dec();
  1082. return ret;
  1083. }
  1084. /*
  1085. * Edit a service and bind it with a new scheduler
  1086. */
  1087. static int
  1088. ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
  1089. {
  1090. struct ip_vs_scheduler *sched = NULL, *old_sched;
  1091. struct ip_vs_pe *pe = NULL, *old_pe = NULL;
  1092. int ret = 0;
  1093. /*
  1094. * Lookup the scheduler, by 'u->sched_name'
  1095. */
  1096. if (strcmp(u->sched_name, "none")) {
  1097. sched = ip_vs_scheduler_get(u->sched_name);
  1098. if (!sched) {
  1099. pr_info("Scheduler module ip_vs_%s not found\n",
  1100. u->sched_name);
  1101. return -ENOENT;
  1102. }
  1103. }
  1104. old_sched = sched;
  1105. if (u->pe_name && *u->pe_name) {
  1106. pe = ip_vs_pe_getbyname(u->pe_name);
  1107. if (pe == NULL) {
  1108. pr_info("persistence engine module ip_vs_pe_%s "
  1109. "not found\n", u->pe_name);
  1110. ret = -ENOENT;
  1111. goto out;
  1112. }
  1113. old_pe = pe;
  1114. }
  1115. #ifdef CONFIG_IP_VS_IPV6
  1116. if (u->af == AF_INET6) {
  1117. __u32 plen = (__force __u32) u->netmask;
  1118. if (plen < 1 || plen > 128) {
  1119. ret = -EINVAL;
  1120. goto out;
  1121. }
  1122. }
  1123. #endif
  1124. old_sched = rcu_dereference_protected(svc->scheduler, 1);
  1125. if (sched != old_sched) {
  1126. if (old_sched) {
  1127. ip_vs_unbind_scheduler(svc, old_sched);
  1128. RCU_INIT_POINTER(svc->scheduler, NULL);
  1129. /* Wait all svc->sched_data users */
  1130. synchronize_rcu();
  1131. }
  1132. /* Bind the new scheduler */
  1133. if (sched) {
  1134. ret = ip_vs_bind_scheduler(svc, sched);
  1135. if (ret) {
  1136. ip_vs_scheduler_put(sched);
  1137. goto out;
  1138. }
  1139. }
  1140. }
  1141. /*
  1142. * Set the flags and timeout value
  1143. */
  1144. svc->flags = u->flags | IP_VS_SVC_F_HASHED;
  1145. svc->timeout = u->timeout * HZ;
  1146. svc->netmask = u->netmask;
  1147. old_pe = rcu_dereference_protected(svc->pe, 1);
  1148. if (pe != old_pe)
  1149. rcu_assign_pointer(svc->pe, pe);
  1150. out:
  1151. ip_vs_scheduler_put(old_sched);
  1152. ip_vs_pe_put(old_pe);
  1153. return ret;
  1154. }
  1155. /*
  1156. * Delete a service from the service list
  1157. * - The service must be unlinked, unlocked and not referenced!
  1158. * - We are called under _bh lock
  1159. */
  1160. static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
  1161. {
  1162. struct ip_vs_dest *dest, *nxt;
  1163. struct ip_vs_scheduler *old_sched;
  1164. struct ip_vs_pe *old_pe;
  1165. struct netns_ipvs *ipvs = svc->ipvs;
  1166. pr_info("%s: enter\n", __func__);
  1167. /* Count only IPv4 services for old get/setsockopt interface */
  1168. if (svc->af == AF_INET)
  1169. ipvs->num_services--;
  1170. ip_vs_stop_estimator(svc->ipvs, &svc->stats);
  1171. /* Unbind scheduler */
  1172. old_sched = rcu_dereference_protected(svc->scheduler, 1);
  1173. ip_vs_unbind_scheduler(svc, old_sched);
  1174. ip_vs_scheduler_put(old_sched);
  1175. /* Unbind persistence engine, keep svc->pe */
  1176. old_pe = rcu_dereference_protected(svc->pe, 1);
  1177. ip_vs_pe_put(old_pe);
  1178. /*
  1179. * Unlink the whole destination list
  1180. */
  1181. list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
  1182. __ip_vs_unlink_dest(svc, dest, 0);
  1183. __ip_vs_del_dest(svc->ipvs, dest, cleanup);
  1184. }
  1185. /*
  1186. * Update the virtual service counters
  1187. */
  1188. if (svc->port == FTPPORT)
  1189. atomic_dec(&ipvs->ftpsvc_counter);
  1190. else if (svc->port == 0)
  1191. atomic_dec(&ipvs->nullsvc_counter);
  1192. /*
  1193. * Free the service if nobody refers to it
  1194. */
  1195. __ip_vs_svc_put(svc, true);
  1196. /* decrease the module use count */
  1197. ip_vs_use_count_dec();
  1198. }
  1199. /*
  1200. * Unlink a service from list and try to delete it if its refcnt reached 0
  1201. */
  1202. static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
  1203. {
  1204. /* Hold svc to avoid double release from dest_trash */
  1205. atomic_inc(&svc->refcnt);
  1206. /*
  1207. * Unhash it from the service table
  1208. */
  1209. ip_vs_svc_unhash(svc);
  1210. __ip_vs_del_service(svc, cleanup);
  1211. }
  1212. /*
  1213. * Delete a service from the service list
  1214. */
  1215. static int ip_vs_del_service(struct ip_vs_service *svc)
  1216. {
  1217. if (svc == NULL)
  1218. return -EEXIST;
  1219. ip_vs_unlink_service(svc, false);
  1220. return 0;
  1221. }
  1222. /*
  1223. * Flush all the virtual services
  1224. */
  1225. static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
  1226. {
  1227. int idx;
  1228. struct ip_vs_service *svc;
  1229. struct hlist_node *n;
  1230. /*
  1231. * Flush the service table hashed by <netns,protocol,addr,port>
  1232. */
  1233. for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1234. hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
  1235. s_list) {
  1236. if (svc->ipvs == ipvs)
  1237. ip_vs_unlink_service(svc, cleanup);
  1238. }
  1239. }
  1240. /*
  1241. * Flush the service table hashed by fwmark
  1242. */
  1243. for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1244. hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
  1245. f_list) {
  1246. if (svc->ipvs == ipvs)
  1247. ip_vs_unlink_service(svc, cleanup);
  1248. }
  1249. }
  1250. return 0;
  1251. }
  1252. /*
  1253. * Delete service by {netns} in the service table.
  1254. * Called by __ip_vs_cleanup()
  1255. */
  1256. void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
  1257. {
  1258. EnterFunction(2);
  1259. /* Check for "full" addressed entries */
  1260. mutex_lock(&__ip_vs_mutex);
  1261. ip_vs_flush(ipvs, true);
  1262. mutex_unlock(&__ip_vs_mutex);
  1263. LeaveFunction(2);
  1264. }
  1265. /* Put all references for device (dst_cache) */
  1266. static inline void
  1267. ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
  1268. {
  1269. struct ip_vs_dest_dst *dest_dst;
  1270. spin_lock_bh(&dest->dst_lock);
  1271. dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
  1272. if (dest_dst && dest_dst->dst_cache->dev == dev) {
  1273. IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
  1274. dev->name,
  1275. IP_VS_DBG_ADDR(dest->af, &dest->addr),
  1276. ntohs(dest->port),
  1277. atomic_read(&dest->refcnt));
  1278. __ip_vs_dst_cache_reset(dest);
  1279. }
  1280. spin_unlock_bh(&dest->dst_lock);
  1281. }
  1282. /* Netdev event receiver
  1283. * Currently only NETDEV_DOWN is handled to release refs to cached dsts
  1284. */
  1285. static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
  1286. void *ptr)
  1287. {
  1288. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1289. struct net *net = dev_net(dev);
  1290. struct netns_ipvs *ipvs = net_ipvs(net);
  1291. struct ip_vs_service *svc;
  1292. struct ip_vs_dest *dest;
  1293. unsigned int idx;
  1294. if (event != NETDEV_DOWN || !ipvs)
  1295. return NOTIFY_DONE;
  1296. IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
  1297. EnterFunction(2);
  1298. mutex_lock(&__ip_vs_mutex);
  1299. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1300. hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
  1301. if (svc->ipvs == ipvs) {
  1302. list_for_each_entry(dest, &svc->destinations,
  1303. n_list) {
  1304. ip_vs_forget_dev(dest, dev);
  1305. }
  1306. }
  1307. }
  1308. hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
  1309. if (svc->ipvs == ipvs) {
  1310. list_for_each_entry(dest, &svc->destinations,
  1311. n_list) {
  1312. ip_vs_forget_dev(dest, dev);
  1313. }
  1314. }
  1315. }
  1316. }
  1317. spin_lock_bh(&ipvs->dest_trash_lock);
  1318. list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
  1319. ip_vs_forget_dev(dest, dev);
  1320. }
  1321. spin_unlock_bh(&ipvs->dest_trash_lock);
  1322. mutex_unlock(&__ip_vs_mutex);
  1323. LeaveFunction(2);
  1324. return NOTIFY_DONE;
  1325. }
  1326. /*
  1327. * Zero counters in a service or all services
  1328. */
  1329. static int ip_vs_zero_service(struct ip_vs_service *svc)
  1330. {
  1331. struct ip_vs_dest *dest;
  1332. list_for_each_entry(dest, &svc->destinations, n_list) {
  1333. ip_vs_zero_stats(&dest->stats);
  1334. }
  1335. ip_vs_zero_stats(&svc->stats);
  1336. return 0;
  1337. }
  1338. static int ip_vs_zero_all(struct netns_ipvs *ipvs)
  1339. {
  1340. int idx;
  1341. struct ip_vs_service *svc;
  1342. for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1343. hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
  1344. if (svc->ipvs == ipvs)
  1345. ip_vs_zero_service(svc);
  1346. }
  1347. }
  1348. for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1349. hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
  1350. if (svc->ipvs == ipvs)
  1351. ip_vs_zero_service(svc);
  1352. }
  1353. }
  1354. ip_vs_zero_stats(&ipvs->tot_stats);
  1355. return 0;
  1356. }
  1357. #ifdef CONFIG_SYSCTL
  1358. static int zero;
  1359. static int three = 3;
  1360. static int
  1361. proc_do_defense_mode(struct ctl_table *table, int write,
  1362. void __user *buffer, size_t *lenp, loff_t *ppos)
  1363. {
  1364. struct netns_ipvs *ipvs = table->extra2;
  1365. int *valp = table->data;
  1366. int val = *valp;
  1367. int rc;
  1368. rc = proc_dointvec(table, write, buffer, lenp, ppos);
  1369. if (write && (*valp != val)) {
  1370. if ((*valp < 0) || (*valp > 3)) {
  1371. /* Restore the correct value */
  1372. *valp = val;
  1373. } else {
  1374. update_defense_level(ipvs);
  1375. }
  1376. }
  1377. return rc;
  1378. }
  1379. static int
  1380. proc_do_sync_threshold(struct ctl_table *table, int write,
  1381. void __user *buffer, size_t *lenp, loff_t *ppos)
  1382. {
  1383. int *valp = table->data;
  1384. int val[2];
  1385. int rc;
  1386. /* backup the value first */
  1387. memcpy(val, valp, sizeof(val));
  1388. rc = proc_dointvec(table, write, buffer, lenp, ppos);
  1389. if (write && (valp[0] < 0 || valp[1] < 0 ||
  1390. (valp[0] >= valp[1] && valp[1]))) {
  1391. /* Restore the correct value */
  1392. memcpy(valp, val, sizeof(val));
  1393. }
  1394. return rc;
  1395. }
  1396. static int
  1397. proc_do_sync_mode(struct ctl_table *table, int write,
  1398. void __user *buffer, size_t *lenp, loff_t *ppos)
  1399. {
  1400. int *valp = table->data;
  1401. int val = *valp;
  1402. int rc;
  1403. rc = proc_dointvec(table, write, buffer, lenp, ppos);
  1404. if (write && (*valp != val)) {
  1405. if ((*valp < 0) || (*valp > 1)) {
  1406. /* Restore the correct value */
  1407. *valp = val;
  1408. }
  1409. }
  1410. return rc;
  1411. }
  1412. static int
  1413. proc_do_sync_ports(struct ctl_table *table, int write,
  1414. void __user *buffer, size_t *lenp, loff_t *ppos)
  1415. {
  1416. int *valp = table->data;
  1417. int val = *valp;
  1418. int rc;
  1419. rc = proc_dointvec(table, write, buffer, lenp, ppos);
  1420. if (write && (*valp != val)) {
  1421. if (*valp < 1 || !is_power_of_2(*valp)) {
  1422. /* Restore the correct value */
  1423. *valp = val;
  1424. }
  1425. }
  1426. return rc;
  1427. }
  1428. /*
  1429. * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
  1430. * Do not change order or insert new entries without
  1431. * align with netns init in ip_vs_control_net_init()
  1432. */
  1433. static struct ctl_table vs_vars[] = {
  1434. {
  1435. .procname = "amemthresh",
  1436. .maxlen = sizeof(int),
  1437. .mode = 0644,
  1438. .proc_handler = proc_dointvec,
  1439. },
  1440. {
  1441. .procname = "am_droprate",
  1442. .maxlen = sizeof(int),
  1443. .mode = 0644,
  1444. .proc_handler = proc_dointvec,
  1445. },
  1446. {
  1447. .procname = "drop_entry",
  1448. .maxlen = sizeof(int),
  1449. .mode = 0644,
  1450. .proc_handler = proc_do_defense_mode,
  1451. },
  1452. {
  1453. .procname = "drop_packet",
  1454. .maxlen = sizeof(int),
  1455. .mode = 0644,
  1456. .proc_handler = proc_do_defense_mode,
  1457. },
  1458. #ifdef CONFIG_IP_VS_NFCT
  1459. {
  1460. .procname = "conntrack",
  1461. .maxlen = sizeof(int),
  1462. .mode = 0644,
  1463. .proc_handler = &proc_dointvec,
  1464. },
  1465. #endif
  1466. {
  1467. .procname = "secure_tcp",
  1468. .maxlen = sizeof(int),
  1469. .mode = 0644,
  1470. .proc_handler = proc_do_defense_mode,
  1471. },
  1472. {
  1473. .procname = "snat_reroute",
  1474. .maxlen = sizeof(int),
  1475. .mode = 0644,
  1476. .proc_handler = &proc_dointvec,
  1477. },
  1478. {
  1479. .procname = "sync_version",
  1480. .maxlen = sizeof(int),
  1481. .mode = 0644,
  1482. .proc_handler = &proc_do_sync_mode,
  1483. },
  1484. {
  1485. .procname = "sync_ports",
  1486. .maxlen = sizeof(int),
  1487. .mode = 0644,
  1488. .proc_handler = &proc_do_sync_ports,
  1489. },
  1490. {
  1491. .procname = "sync_persist_mode",
  1492. .maxlen = sizeof(int),
  1493. .mode = 0644,
  1494. .proc_handler = proc_dointvec,
  1495. },
  1496. {
  1497. .procname = "sync_qlen_max",
  1498. .maxlen = sizeof(unsigned long),
  1499. .mode = 0644,
  1500. .proc_handler = proc_doulongvec_minmax,
  1501. },
  1502. {
  1503. .procname = "sync_sock_size",
  1504. .maxlen = sizeof(int),
  1505. .mode = 0644,
  1506. .proc_handler = proc_dointvec,
  1507. },
  1508. {
  1509. .procname = "cache_bypass",
  1510. .maxlen = sizeof(int),
  1511. .mode = 0644,
  1512. .proc_handler = proc_dointvec,
  1513. },
  1514. {
  1515. .procname = "expire_nodest_conn",
  1516. .maxlen = sizeof(int),
  1517. .mode = 0644,
  1518. .proc_handler = proc_dointvec,
  1519. },
  1520. {
  1521. .procname = "sloppy_tcp",
  1522. .maxlen = sizeof(int),
  1523. .mode = 0644,
  1524. .proc_handler = proc_dointvec,
  1525. },
  1526. {
  1527. .procname = "sloppy_sctp",
  1528. .maxlen = sizeof(int),
  1529. .mode = 0644,
  1530. .proc_handler = proc_dointvec,
  1531. },
  1532. {
  1533. .procname = "expire_quiescent_template",
  1534. .maxlen = sizeof(int),
  1535. .mode = 0644,
  1536. .proc_handler = proc_dointvec,
  1537. },
  1538. {
  1539. .procname = "sync_threshold",
  1540. .maxlen =
  1541. sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
  1542. .mode = 0644,
  1543. .proc_handler = proc_do_sync_threshold,
  1544. },
  1545. {
  1546. .procname = "sync_refresh_period",
  1547. .maxlen = sizeof(int),
  1548. .mode = 0644,
  1549. .proc_handler = proc_dointvec_jiffies,
  1550. },
  1551. {
  1552. .procname = "sync_retries",
  1553. .maxlen = sizeof(int),
  1554. .mode = 0644,
  1555. .proc_handler = proc_dointvec_minmax,
  1556. .extra1 = &zero,
  1557. .extra2 = &three,
  1558. },
  1559. {
  1560. .procname = "nat_icmp_send",
  1561. .maxlen = sizeof(int),
  1562. .mode = 0644,
  1563. .proc_handler = proc_dointvec,
  1564. },
  1565. {
  1566. .procname = "pmtu_disc",
  1567. .maxlen = sizeof(int),
  1568. .mode = 0644,
  1569. .proc_handler = proc_dointvec,
  1570. },
  1571. {
  1572. .procname = "backup_only",
  1573. .maxlen = sizeof(int),
  1574. .mode = 0644,
  1575. .proc_handler = proc_dointvec,
  1576. },
  1577. {
  1578. .procname = "conn_reuse_mode",
  1579. .maxlen = sizeof(int),
  1580. .mode = 0644,
  1581. .proc_handler = proc_dointvec,
  1582. },
  1583. {
  1584. .procname = "schedule_icmp",
  1585. .maxlen = sizeof(int),
  1586. .mode = 0644,
  1587. .proc_handler = proc_dointvec,
  1588. },
  1589. {
  1590. .procname = "ignore_tunneled",
  1591. .maxlen = sizeof(int),
  1592. .mode = 0644,
  1593. .proc_handler = proc_dointvec,
  1594. },
  1595. #ifdef CONFIG_IP_VS_DEBUG
  1596. {
  1597. .procname = "debug_level",
  1598. .data = &sysctl_ip_vs_debug_level,
  1599. .maxlen = sizeof(int),
  1600. .mode = 0644,
  1601. .proc_handler = proc_dointvec,
  1602. },
  1603. #endif
  1604. { }
  1605. };
  1606. #endif
  1607. #ifdef CONFIG_PROC_FS
  1608. struct ip_vs_iter {
  1609. struct seq_net_private p; /* Do not move this, netns depends upon it*/
  1610. struct hlist_head *table;
  1611. int bucket;
  1612. };
  1613. /*
  1614. * Write the contents of the VS rule table to a PROCfs file.
  1615. * (It is kept just for backward compatibility)
  1616. */
  1617. static inline const char *ip_vs_fwd_name(unsigned int flags)
  1618. {
  1619. switch (flags & IP_VS_CONN_F_FWD_MASK) {
  1620. case IP_VS_CONN_F_LOCALNODE:
  1621. return "Local";
  1622. case IP_VS_CONN_F_TUNNEL:
  1623. return "Tunnel";
  1624. case IP_VS_CONN_F_DROUTE:
  1625. return "Route";
  1626. default:
  1627. return "Masq";
  1628. }
  1629. }
  1630. /* Get the Nth entry in the two lists */
  1631. static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
  1632. {
  1633. struct net *net = seq_file_net(seq);
  1634. struct netns_ipvs *ipvs = net_ipvs(net);
  1635. struct ip_vs_iter *iter = seq->private;
  1636. int idx;
  1637. struct ip_vs_service *svc;
  1638. /* look in hash by protocol */
  1639. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1640. hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
  1641. if ((svc->ipvs == ipvs) && pos-- == 0) {
  1642. iter->table = ip_vs_svc_table;
  1643. iter->bucket = idx;
  1644. return svc;
  1645. }
  1646. }
  1647. }
  1648. /* keep looking in fwmark */
  1649. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1650. hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
  1651. f_list) {
  1652. if ((svc->ipvs == ipvs) && pos-- == 0) {
  1653. iter->table = ip_vs_svc_fwm_table;
  1654. iter->bucket = idx;
  1655. return svc;
  1656. }
  1657. }
  1658. }
  1659. return NULL;
  1660. }
  1661. static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
  1662. __acquires(RCU)
  1663. {
  1664. rcu_read_lock();
  1665. return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
  1666. }
  1667. static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  1668. {
  1669. struct hlist_node *e;
  1670. struct ip_vs_iter *iter;
  1671. struct ip_vs_service *svc;
  1672. ++*pos;
  1673. if (v == SEQ_START_TOKEN)
  1674. return ip_vs_info_array(seq,0);
  1675. svc = v;
  1676. iter = seq->private;
  1677. if (iter->table == ip_vs_svc_table) {
  1678. /* next service in table hashed by protocol */
  1679. e = rcu_dereference(hlist_next_rcu(&svc->s_list));
  1680. if (e)
  1681. return hlist_entry(e, struct ip_vs_service, s_list);
  1682. while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
  1683. hlist_for_each_entry_rcu(svc,
  1684. &ip_vs_svc_table[iter->bucket],
  1685. s_list) {
  1686. return svc;
  1687. }
  1688. }
  1689. iter->table = ip_vs_svc_fwm_table;
  1690. iter->bucket = -1;
  1691. goto scan_fwmark;
  1692. }
  1693. /* next service in hashed by fwmark */
  1694. e = rcu_dereference(hlist_next_rcu(&svc->f_list));
  1695. if (e)
  1696. return hlist_entry(e, struct ip_vs_service, f_list);
  1697. scan_fwmark:
  1698. while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
  1699. hlist_for_each_entry_rcu(svc,
  1700. &ip_vs_svc_fwm_table[iter->bucket],
  1701. f_list)
  1702. return svc;
  1703. }
  1704. return NULL;
  1705. }
  1706. static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
  1707. __releases(RCU)
  1708. {
  1709. rcu_read_unlock();
  1710. }
  1711. static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
  1712. {
  1713. if (v == SEQ_START_TOKEN) {
  1714. seq_printf(seq,
  1715. "IP Virtual Server version %d.%d.%d (size=%d)\n",
  1716. NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
  1717. seq_puts(seq,
  1718. "Prot LocalAddress:Port Scheduler Flags\n");
  1719. seq_puts(seq,
  1720. " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
  1721. } else {
  1722. struct net *net = seq_file_net(seq);
  1723. struct netns_ipvs *ipvs = net_ipvs(net);
  1724. const struct ip_vs_service *svc = v;
  1725. const struct ip_vs_iter *iter = seq->private;
  1726. const struct ip_vs_dest *dest;
  1727. struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
  1728. char *sched_name = sched ? sched->name : "none";
  1729. if (svc->ipvs != ipvs)
  1730. return 0;
  1731. if (iter->table == ip_vs_svc_table) {
  1732. #ifdef CONFIG_IP_VS_IPV6
  1733. if (svc->af == AF_INET6)
  1734. seq_printf(seq, "%s [%pI6]:%04X %s ",
  1735. ip_vs_proto_name(svc->protocol),
  1736. &svc->addr.in6,
  1737. ntohs(svc->port),
  1738. sched_name);
  1739. else
  1740. #endif
  1741. seq_printf(seq, "%s %08X:%04X %s %s ",
  1742. ip_vs_proto_name(svc->protocol),
  1743. ntohl(svc->addr.ip),
  1744. ntohs(svc->port),
  1745. sched_name,
  1746. (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
  1747. } else {
  1748. seq_printf(seq, "FWM %08X %s %s",
  1749. svc->fwmark, sched_name,
  1750. (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
  1751. }
  1752. if (svc->flags & IP_VS_SVC_F_PERSISTENT)
  1753. seq_printf(seq, "persistent %d %08X\n",
  1754. svc->timeout,
  1755. ntohl(svc->netmask));
  1756. else
  1757. seq_putc(seq, '\n');
  1758. list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
  1759. #ifdef CONFIG_IP_VS_IPV6
  1760. if (dest->af == AF_INET6)
  1761. seq_printf(seq,
  1762. " -> [%pI6]:%04X"
  1763. " %-7s %-6d %-10d %-10d\n",
  1764. &dest->addr.in6,
  1765. ntohs(dest->port),
  1766. ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
  1767. atomic_read(&dest->weight),
  1768. atomic_read(&dest->activeconns),
  1769. atomic_read(&dest->inactconns));
  1770. else
  1771. #endif
  1772. seq_printf(seq,
  1773. " -> %08X:%04X "
  1774. "%-7s %-6d %-10d %-10d\n",
  1775. ntohl(dest->addr.ip),
  1776. ntohs(dest->port),
  1777. ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
  1778. atomic_read(&dest->weight),
  1779. atomic_read(&dest->activeconns),
  1780. atomic_read(&dest->inactconns));
  1781. }
  1782. }
  1783. return 0;
  1784. }
  1785. static const struct seq_operations ip_vs_info_seq_ops = {
  1786. .start = ip_vs_info_seq_start,
  1787. .next = ip_vs_info_seq_next,
  1788. .stop = ip_vs_info_seq_stop,
  1789. .show = ip_vs_info_seq_show,
  1790. };
  1791. static int ip_vs_info_open(struct inode *inode, struct file *file)
  1792. {
  1793. return seq_open_net(inode, file, &ip_vs_info_seq_ops,
  1794. sizeof(struct ip_vs_iter));
  1795. }
  1796. static const struct file_operations ip_vs_info_fops = {
  1797. .owner = THIS_MODULE,
  1798. .open = ip_vs_info_open,
  1799. .read = seq_read,
  1800. .llseek = seq_lseek,
  1801. .release = seq_release_net,
  1802. };
  1803. static int ip_vs_stats_show(struct seq_file *seq, void *v)
  1804. {
  1805. struct net *net = seq_file_single_net(seq);
  1806. struct ip_vs_kstats show;
  1807. /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
  1808. seq_puts(seq,
  1809. " Total Incoming Outgoing Incoming Outgoing\n");
  1810. seq_printf(seq,
  1811. " Conns Packets Packets Bytes Bytes\n");
  1812. ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
  1813. seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
  1814. (unsigned long long)show.conns,
  1815. (unsigned long long)show.inpkts,
  1816. (unsigned long long)show.outpkts,
  1817. (unsigned long long)show.inbytes,
  1818. (unsigned long long)show.outbytes);
  1819. /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
  1820. seq_puts(seq,
  1821. " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
  1822. seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
  1823. (unsigned long long)show.cps,
  1824. (unsigned long long)show.inpps,
  1825. (unsigned long long)show.outpps,
  1826. (unsigned long long)show.inbps,
  1827. (unsigned long long)show.outbps);
  1828. return 0;
  1829. }
  1830. static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
  1831. {
  1832. return single_open_net(inode, file, ip_vs_stats_show);
  1833. }
  1834. static const struct file_operations ip_vs_stats_fops = {
  1835. .owner = THIS_MODULE,
  1836. .open = ip_vs_stats_seq_open,
  1837. .read = seq_read,
  1838. .llseek = seq_lseek,
  1839. .release = single_release_net,
  1840. };
  1841. static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
  1842. {
  1843. struct net *net = seq_file_single_net(seq);
  1844. struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
  1845. struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
  1846. struct ip_vs_kstats kstats;
  1847. int i;
  1848. /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
  1849. seq_puts(seq,
  1850. " Total Incoming Outgoing Incoming Outgoing\n");
  1851. seq_printf(seq,
  1852. "CPU Conns Packets Packets Bytes Bytes\n");
  1853. for_each_possible_cpu(i) {
  1854. struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
  1855. unsigned int start;
  1856. u64 conns, inpkts, outpkts, inbytes, outbytes;
  1857. do {
  1858. start = u64_stats_fetch_begin_irq(&u->syncp);
  1859. conns = u->cnt.conns;
  1860. inpkts = u->cnt.inpkts;
  1861. outpkts = u->cnt.outpkts;
  1862. inbytes = u->cnt.inbytes;
  1863. outbytes = u->cnt.outbytes;
  1864. } while (u64_stats_fetch_retry_irq(&u->syncp, start));
  1865. seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
  1866. i, (u64)conns, (u64)inpkts,
  1867. (u64)outpkts, (u64)inbytes,
  1868. (u64)outbytes);
  1869. }
  1870. ip_vs_copy_stats(&kstats, tot_stats);
  1871. seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
  1872. (unsigned long long)kstats.conns,
  1873. (unsigned long long)kstats.inpkts,
  1874. (unsigned long long)kstats.outpkts,
  1875. (unsigned long long)kstats.inbytes,
  1876. (unsigned long long)kstats.outbytes);
  1877. /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
  1878. seq_puts(seq,
  1879. " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
  1880. seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
  1881. kstats.cps,
  1882. kstats.inpps,
  1883. kstats.outpps,
  1884. kstats.inbps,
  1885. kstats.outbps);
  1886. return 0;
  1887. }
  1888. static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
  1889. {
  1890. return single_open_net(inode, file, ip_vs_stats_percpu_show);
  1891. }
  1892. static const struct file_operations ip_vs_stats_percpu_fops = {
  1893. .owner = THIS_MODULE,
  1894. .open = ip_vs_stats_percpu_seq_open,
  1895. .read = seq_read,
  1896. .llseek = seq_lseek,
  1897. .release = single_release_net,
  1898. };
  1899. #endif
  1900. /*
  1901. * Set timeout values for tcp tcpfin udp in the timeout_table.
  1902. */
  1903. static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
  1904. {
  1905. #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
  1906. struct ip_vs_proto_data *pd;
  1907. #endif
  1908. IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
  1909. u->tcp_timeout,
  1910. u->tcp_fin_timeout,
  1911. u->udp_timeout);
  1912. #ifdef CONFIG_IP_VS_PROTO_TCP
  1913. if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
  1914. u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
  1915. return -EINVAL;
  1916. }
  1917. #endif
  1918. #ifdef CONFIG_IP_VS_PROTO_UDP
  1919. if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
  1920. return -EINVAL;
  1921. #endif
  1922. #ifdef CONFIG_IP_VS_PROTO_TCP
  1923. if (u->tcp_timeout) {
  1924. pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
  1925. pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
  1926. = u->tcp_timeout * HZ;
  1927. }
  1928. if (u->tcp_fin_timeout) {
  1929. pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
  1930. pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
  1931. = u->tcp_fin_timeout * HZ;
  1932. }
  1933. #endif
  1934. #ifdef CONFIG_IP_VS_PROTO_UDP
  1935. if (u->udp_timeout) {
  1936. pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
  1937. pd->timeout_table[IP_VS_UDP_S_NORMAL]
  1938. = u->udp_timeout * HZ;
  1939. }
  1940. #endif
  1941. return 0;
  1942. }
  1943. #define CMDID(cmd) (cmd - IP_VS_BASE_CTL)
  1944. struct ip_vs_svcdest_user {
  1945. struct ip_vs_service_user s;
  1946. struct ip_vs_dest_user d;
  1947. };
  1948. static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
  1949. [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user),
  1950. [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user),
  1951. [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user),
  1952. [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user),
  1953. [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user),
  1954. [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user),
  1955. [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
  1956. [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
  1957. [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user),
  1958. [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user),
  1959. };
  1960. union ip_vs_set_arglen {
  1961. struct ip_vs_service_user field_IP_VS_SO_SET_ADD;
  1962. struct ip_vs_service_user field_IP_VS_SO_SET_EDIT;
  1963. struct ip_vs_service_user field_IP_VS_SO_SET_DEL;
  1964. struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST;
  1965. struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST;
  1966. struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST;
  1967. struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT;
  1968. struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON;
  1969. struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON;
  1970. struct ip_vs_service_user field_IP_VS_SO_SET_ZERO;
  1971. };
  1972. #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen)
  1973. static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
  1974. struct ip_vs_service_user *usvc_compat)
  1975. {
  1976. memset(usvc, 0, sizeof(*usvc));
  1977. usvc->af = AF_INET;
  1978. usvc->protocol = usvc_compat->protocol;
  1979. usvc->addr.ip = usvc_compat->addr;
  1980. usvc->port = usvc_compat->port;
  1981. usvc->fwmark = usvc_compat->fwmark;
  1982. /* Deep copy of sched_name is not needed here */
  1983. usvc->sched_name = usvc_compat->sched_name;
  1984. usvc->flags = usvc_compat->flags;
  1985. usvc->timeout = usvc_compat->timeout;
  1986. usvc->netmask = usvc_compat->netmask;
  1987. }
  1988. static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
  1989. struct ip_vs_dest_user *udest_compat)
  1990. {
  1991. memset(udest, 0, sizeof(*udest));
  1992. udest->addr.ip = udest_compat->addr;
  1993. udest->port = udest_compat->port;
  1994. udest->conn_flags = udest_compat->conn_flags;
  1995. udest->weight = udest_compat->weight;
  1996. udest->u_threshold = udest_compat->u_threshold;
  1997. udest->l_threshold = udest_compat->l_threshold;
  1998. udest->af = AF_INET;
  1999. }
  2000. static int
  2001. do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
  2002. {
  2003. struct net *net = sock_net(sk);
  2004. int ret;
  2005. unsigned char arg[MAX_SET_ARGLEN];
  2006. struct ip_vs_service_user *usvc_compat;
  2007. struct ip_vs_service_user_kern usvc;
  2008. struct ip_vs_service *svc;
  2009. struct ip_vs_dest_user *udest_compat;
  2010. struct ip_vs_dest_user_kern udest;
  2011. struct netns_ipvs *ipvs = net_ipvs(net);
  2012. BUILD_BUG_ON(sizeof(arg) > 255);
  2013. if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  2014. return -EPERM;
  2015. if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
  2016. return -EINVAL;
  2017. if (len != set_arglen[CMDID(cmd)]) {
  2018. IP_VS_DBG(1, "set_ctl: len %u != %u\n",
  2019. len, set_arglen[CMDID(cmd)]);
  2020. return -EINVAL;
  2021. }
  2022. if (copy_from_user(arg, user, len) != 0)
  2023. return -EFAULT;
  2024. /* increase the module use count */
  2025. ip_vs_use_count_inc();
  2026. /* Handle daemons since they have another lock */
  2027. if (cmd == IP_VS_SO_SET_STARTDAEMON ||
  2028. cmd == IP_VS_SO_SET_STOPDAEMON) {
  2029. struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
  2030. if (cmd == IP_VS_SO_SET_STARTDAEMON) {
  2031. struct ipvs_sync_daemon_cfg cfg;
  2032. memset(&cfg, 0, sizeof(cfg));
  2033. ret = -EINVAL;
  2034. if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
  2035. sizeof(cfg.mcast_ifn)) <= 0)
  2036. goto out_dec;
  2037. cfg.syncid = dm->syncid;
  2038. ret = start_sync_thread(ipvs, &cfg, dm->state);
  2039. } else {
  2040. mutex_lock(&ipvs->sync_mutex);
  2041. ret = stop_sync_thread(ipvs, dm->state);
  2042. mutex_unlock(&ipvs->sync_mutex);
  2043. }
  2044. goto out_dec;
  2045. }
  2046. mutex_lock(&__ip_vs_mutex);
  2047. if (cmd == IP_VS_SO_SET_FLUSH) {
  2048. /* Flush the virtual service */
  2049. ret = ip_vs_flush(ipvs, false);
  2050. goto out_unlock;
  2051. } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
  2052. /* Set timeout values for (tcp tcpfin udp) */
  2053. ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
  2054. goto out_unlock;
  2055. }
  2056. usvc_compat = (struct ip_vs_service_user *)arg;
  2057. udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
  2058. /* We only use the new structs internally, so copy userspace compat
  2059. * structs to extended internal versions */
  2060. ip_vs_copy_usvc_compat(&usvc, usvc_compat);
  2061. ip_vs_copy_udest_compat(&udest, udest_compat);
  2062. if (cmd == IP_VS_SO_SET_ZERO) {
  2063. /* if no service address is set, zero counters in all */
  2064. if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
  2065. ret = ip_vs_zero_all(ipvs);
  2066. goto out_unlock;
  2067. }
  2068. }
  2069. if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
  2070. strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
  2071. IP_VS_SCHEDNAME_MAXLEN) {
  2072. ret = -EINVAL;
  2073. goto out_unlock;
  2074. }
  2075. /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
  2076. if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
  2077. usvc.protocol != IPPROTO_SCTP) {
  2078. pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
  2079. usvc.protocol, &usvc.addr.ip,
  2080. ntohs(usvc.port));
  2081. ret = -EFAULT;
  2082. goto out_unlock;
  2083. }
  2084. /* Lookup the exact service by <protocol, addr, port> or fwmark */
  2085. rcu_read_lock();
  2086. if (usvc.fwmark == 0)
  2087. svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
  2088. &usvc.addr, usvc.port);
  2089. else
  2090. svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
  2091. rcu_read_unlock();
  2092. if (cmd != IP_VS_SO_SET_ADD
  2093. && (svc == NULL || svc->protocol != usvc.protocol)) {
  2094. ret = -ESRCH;
  2095. goto out_unlock;
  2096. }
  2097. switch (cmd) {
  2098. case IP_VS_SO_SET_ADD:
  2099. if (svc != NULL)
  2100. ret = -EEXIST;
  2101. else
  2102. ret = ip_vs_add_service(ipvs, &usvc, &svc);
  2103. break;
  2104. case IP_VS_SO_SET_EDIT:
  2105. ret = ip_vs_edit_service(svc, &usvc);
  2106. break;
  2107. case IP_VS_SO_SET_DEL:
  2108. ret = ip_vs_del_service(svc);
  2109. if (!ret)
  2110. goto out_unlock;
  2111. break;
  2112. case IP_VS_SO_SET_ZERO:
  2113. ret = ip_vs_zero_service(svc);
  2114. break;
  2115. case IP_VS_SO_SET_ADDDEST:
  2116. ret = ip_vs_add_dest(svc, &udest);
  2117. break;
  2118. case IP_VS_SO_SET_EDITDEST:
  2119. ret = ip_vs_edit_dest(svc, &udest);
  2120. break;
  2121. case IP_VS_SO_SET_DELDEST:
  2122. ret = ip_vs_del_dest(svc, &udest);
  2123. break;
  2124. default:
  2125. ret = -EINVAL;
  2126. }
  2127. out_unlock:
  2128. mutex_unlock(&__ip_vs_mutex);
  2129. out_dec:
  2130. /* decrease the module use count */
  2131. ip_vs_use_count_dec();
  2132. return ret;
  2133. }
  2134. static void
  2135. ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
  2136. {
  2137. struct ip_vs_scheduler *sched;
  2138. struct ip_vs_kstats kstats;
  2139. char *sched_name;
  2140. sched = rcu_dereference_protected(src->scheduler, 1);
  2141. sched_name = sched ? sched->name : "none";
  2142. dst->protocol = src->protocol;
  2143. dst->addr = src->addr.ip;
  2144. dst->port = src->port;
  2145. dst->fwmark = src->fwmark;
  2146. strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
  2147. dst->flags = src->flags;
  2148. dst->timeout = src->timeout / HZ;
  2149. dst->netmask = src->netmask;
  2150. dst->num_dests = src->num_dests;
  2151. ip_vs_copy_stats(&kstats, &src->stats);
  2152. ip_vs_export_stats_user(&dst->stats, &kstats);
  2153. }
  2154. static inline int
  2155. __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
  2156. const struct ip_vs_get_services *get,
  2157. struct ip_vs_get_services __user *uptr)
  2158. {
  2159. int idx, count=0;
  2160. struct ip_vs_service *svc;
  2161. struct ip_vs_service_entry entry;
  2162. int ret = 0;
  2163. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  2164. hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
  2165. /* Only expose IPv4 entries to old interface */
  2166. if (svc->af != AF_INET || (svc->ipvs != ipvs))
  2167. continue;
  2168. if (count >= get->num_services)
  2169. goto out;
  2170. memset(&entry, 0, sizeof(entry));
  2171. ip_vs_copy_service(&entry, svc);
  2172. if (copy_to_user(&uptr->entrytable[count],
  2173. &entry, sizeof(entry))) {
  2174. ret = -EFAULT;
  2175. goto out;
  2176. }
  2177. count++;
  2178. }
  2179. }
  2180. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  2181. hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
  2182. /* Only expose IPv4 entries to old interface */
  2183. if (svc->af != AF_INET || (svc->ipvs != ipvs))
  2184. continue;
  2185. if (count >= get->num_services)
  2186. goto out;
  2187. memset(&entry, 0, sizeof(entry));
  2188. ip_vs_copy_service(&entry, svc);
  2189. if (copy_to_user(&uptr->entrytable[count],
  2190. &entry, sizeof(entry))) {
  2191. ret = -EFAULT;
  2192. goto out;
  2193. }
  2194. count++;
  2195. }
  2196. }
  2197. out:
  2198. return ret;
  2199. }
  2200. static inline int
  2201. __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
  2202. struct ip_vs_get_dests __user *uptr)
  2203. {
  2204. struct ip_vs_service *svc;
  2205. union nf_inet_addr addr = { .ip = get->addr };
  2206. int ret = 0;
  2207. rcu_read_lock();
  2208. if (get->fwmark)
  2209. svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
  2210. else
  2211. svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
  2212. get->port);
  2213. rcu_read_unlock();
  2214. if (svc) {
  2215. int count = 0;
  2216. struct ip_vs_dest *dest;
  2217. struct ip_vs_dest_entry entry;
  2218. struct ip_vs_kstats kstats;
  2219. memset(&entry, 0, sizeof(entry));
  2220. list_for_each_entry(dest, &svc->destinations, n_list) {
  2221. if (count >= get->num_dests)
  2222. break;
  2223. /* Cannot expose heterogeneous members via sockopt
  2224. * interface
  2225. */
  2226. if (dest->af != svc->af)
  2227. continue;
  2228. entry.addr = dest->addr.ip;
  2229. entry.port = dest->port;
  2230. entry.conn_flags = atomic_read(&dest->conn_flags);
  2231. entry.weight = atomic_read(&dest->weight);
  2232. entry.u_threshold = dest->u_threshold;
  2233. entry.l_threshold = dest->l_threshold;
  2234. entry.activeconns = atomic_read(&dest->activeconns);
  2235. entry.inactconns = atomic_read(&dest->inactconns);
  2236. entry.persistconns = atomic_read(&dest->persistconns);
  2237. ip_vs_copy_stats(&kstats, &dest->stats);
  2238. ip_vs_export_stats_user(&entry.stats, &kstats);
  2239. if (copy_to_user(&uptr->entrytable[count],
  2240. &entry, sizeof(entry))) {
  2241. ret = -EFAULT;
  2242. break;
  2243. }
  2244. count++;
  2245. }
  2246. } else
  2247. ret = -ESRCH;
  2248. return ret;
  2249. }
  2250. static inline void
  2251. __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
  2252. {
  2253. #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
  2254. struct ip_vs_proto_data *pd;
  2255. #endif
  2256. memset(u, 0, sizeof (*u));
  2257. #ifdef CONFIG_IP_VS_PROTO_TCP
  2258. pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
  2259. u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
  2260. u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
  2261. #endif
  2262. #ifdef CONFIG_IP_VS_PROTO_UDP
  2263. pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
  2264. u->udp_timeout =
  2265. pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
  2266. #endif
  2267. }
  2268. static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
  2269. [CMDID(IP_VS_SO_GET_VERSION)] = 64,
  2270. [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo),
  2271. [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
  2272. [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry),
  2273. [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests),
  2274. [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
  2275. [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user),
  2276. };
  2277. union ip_vs_get_arglen {
  2278. char field_IP_VS_SO_GET_VERSION[64];
  2279. struct ip_vs_getinfo field_IP_VS_SO_GET_INFO;
  2280. struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES;
  2281. struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE;
  2282. struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS;
  2283. struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT;
  2284. struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2];
  2285. };
  2286. #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen)
  2287. static int
  2288. do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
  2289. {
  2290. unsigned char arg[MAX_GET_ARGLEN];
  2291. int ret = 0;
  2292. unsigned int copylen;
  2293. struct net *net = sock_net(sk);
  2294. struct netns_ipvs *ipvs = net_ipvs(net);
  2295. BUG_ON(!net);
  2296. BUILD_BUG_ON(sizeof(arg) > 255);
  2297. if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  2298. return -EPERM;
  2299. if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
  2300. return -EINVAL;
  2301. copylen = get_arglen[CMDID(cmd)];
  2302. if (*len < (int) copylen) {
  2303. IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
  2304. return -EINVAL;
  2305. }
  2306. if (copy_from_user(arg, user, copylen) != 0)
  2307. return -EFAULT;
  2308. /*
  2309. * Handle daemons first since it has its own locking
  2310. */
  2311. if (cmd == IP_VS_SO_GET_DAEMON) {
  2312. struct ip_vs_daemon_user d[2];
  2313. memset(&d, 0, sizeof(d));
  2314. mutex_lock(&ipvs->sync_mutex);
  2315. if (ipvs->sync_state & IP_VS_STATE_MASTER) {
  2316. d[0].state = IP_VS_STATE_MASTER;
  2317. strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
  2318. sizeof(d[0].mcast_ifn));
  2319. d[0].syncid = ipvs->mcfg.syncid;
  2320. }
  2321. if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
  2322. d[1].state = IP_VS_STATE_BACKUP;
  2323. strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
  2324. sizeof(d[1].mcast_ifn));
  2325. d[1].syncid = ipvs->bcfg.syncid;
  2326. }
  2327. if (copy_to_user(user, &d, sizeof(d)) != 0)
  2328. ret = -EFAULT;
  2329. mutex_unlock(&ipvs->sync_mutex);
  2330. return ret;
  2331. }
  2332. mutex_lock(&__ip_vs_mutex);
  2333. switch (cmd) {
  2334. case IP_VS_SO_GET_VERSION:
  2335. {
  2336. char buf[64];
  2337. sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
  2338. NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
  2339. if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
  2340. ret = -EFAULT;
  2341. goto out;
  2342. }
  2343. *len = strlen(buf)+1;
  2344. }
  2345. break;
  2346. case IP_VS_SO_GET_INFO:
  2347. {
  2348. struct ip_vs_getinfo info;
  2349. info.version = IP_VS_VERSION_CODE;
  2350. info.size = ip_vs_conn_tab_size;
  2351. info.num_services = ipvs->num_services;
  2352. if (copy_to_user(user, &info, sizeof(info)) != 0)
  2353. ret = -EFAULT;
  2354. }
  2355. break;
  2356. case IP_VS_SO_GET_SERVICES:
  2357. {
  2358. struct ip_vs_get_services *get;
  2359. int size;
  2360. get = (struct ip_vs_get_services *)arg;
  2361. size = sizeof(*get) +
  2362. sizeof(struct ip_vs_service_entry) * get->num_services;
  2363. if (*len != size) {
  2364. pr_err("length: %u != %u\n", *len, size);
  2365. ret = -EINVAL;
  2366. goto out;
  2367. }
  2368. ret = __ip_vs_get_service_entries(ipvs, get, user);
  2369. }
  2370. break;
  2371. case IP_VS_SO_GET_SERVICE:
  2372. {
  2373. struct ip_vs_service_entry *entry;
  2374. struct ip_vs_service *svc;
  2375. union nf_inet_addr addr;
  2376. entry = (struct ip_vs_service_entry *)arg;
  2377. addr.ip = entry->addr;
  2378. rcu_read_lock();
  2379. if (entry->fwmark)
  2380. svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
  2381. else
  2382. svc = __ip_vs_service_find(ipvs, AF_INET,
  2383. entry->protocol, &addr,
  2384. entry->port);
  2385. rcu_read_unlock();
  2386. if (svc) {
  2387. ip_vs_copy_service(entry, svc);
  2388. if (copy_to_user(user, entry, sizeof(*entry)) != 0)
  2389. ret = -EFAULT;
  2390. } else
  2391. ret = -ESRCH;
  2392. }
  2393. break;
  2394. case IP_VS_SO_GET_DESTS:
  2395. {
  2396. struct ip_vs_get_dests *get;
  2397. int size;
  2398. get = (struct ip_vs_get_dests *)arg;
  2399. size = sizeof(*get) +
  2400. sizeof(struct ip_vs_dest_entry) * get->num_dests;
  2401. if (*len != size) {
  2402. pr_err("length: %u != %u\n", *len, size);
  2403. ret = -EINVAL;
  2404. goto out;
  2405. }
  2406. ret = __ip_vs_get_dest_entries(ipvs, get, user);
  2407. }
  2408. break;
  2409. case IP_VS_SO_GET_TIMEOUT:
  2410. {
  2411. struct ip_vs_timeout_user t;
  2412. __ip_vs_get_timeouts(ipvs, &t);
  2413. if (copy_to_user(user, &t, sizeof(t)) != 0)
  2414. ret = -EFAULT;
  2415. }
  2416. break;
  2417. default:
  2418. ret = -EINVAL;
  2419. }
  2420. out:
  2421. mutex_unlock(&__ip_vs_mutex);
  2422. return ret;
  2423. }
  2424. static struct nf_sockopt_ops ip_vs_sockopts = {
  2425. .pf = PF_INET,
  2426. .set_optmin = IP_VS_BASE_CTL,
  2427. .set_optmax = IP_VS_SO_SET_MAX+1,
  2428. .set = do_ip_vs_set_ctl,
  2429. .get_optmin = IP_VS_BASE_CTL,
  2430. .get_optmax = IP_VS_SO_GET_MAX+1,
  2431. .get = do_ip_vs_get_ctl,
  2432. .owner = THIS_MODULE,
  2433. };
  2434. /*
  2435. * Generic Netlink interface
  2436. */
  2437. /* IPVS genetlink family */
  2438. static struct genl_family ip_vs_genl_family = {
  2439. .id = GENL_ID_GENERATE,
  2440. .hdrsize = 0,
  2441. .name = IPVS_GENL_NAME,
  2442. .version = IPVS_GENL_VERSION,
  2443. .maxattr = IPVS_CMD_MAX,
  2444. .netnsok = true, /* Make ipvsadm to work on netns */
  2445. };
  2446. /* Policy used for first-level command attributes */
  2447. static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
  2448. [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
  2449. [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
  2450. [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
  2451. [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
  2452. [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
  2453. [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
  2454. };
  2455. /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
  2456. static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
  2457. [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
  2458. [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
  2459. .len = IP_VS_IFNAME_MAXLEN - 1 },
  2460. [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
  2461. [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
  2462. [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
  2463. [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
  2464. [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
  2465. [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
  2466. };
  2467. /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
  2468. static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
  2469. [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
  2470. [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
  2471. [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
  2472. .len = sizeof(union nf_inet_addr) },
  2473. [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
  2474. [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
  2475. [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
  2476. .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
  2477. [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
  2478. .len = IP_VS_PENAME_MAXLEN },
  2479. [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
  2480. .len = sizeof(struct ip_vs_flags) },
  2481. [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
  2482. [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
  2483. [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
  2484. };
  2485. /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
  2486. static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
  2487. [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
  2488. .len = sizeof(union nf_inet_addr) },
  2489. [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
  2490. [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
  2491. [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
  2492. [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
  2493. [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
  2494. [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
  2495. [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
  2496. [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
  2497. [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
  2498. [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
  2499. };
  2500. static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
  2501. struct ip_vs_kstats *kstats)
  2502. {
  2503. struct nlattr *nl_stats = nla_nest_start(skb, container_type);
  2504. if (!nl_stats)
  2505. return -EMSGSIZE;
  2506. if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
  2507. nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
  2508. nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
  2509. nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
  2510. nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
  2511. nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
  2512. nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
  2513. nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
  2514. nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
  2515. nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
  2516. goto nla_put_failure;
  2517. nla_nest_end(skb, nl_stats);
  2518. return 0;
  2519. nla_put_failure:
  2520. nla_nest_cancel(skb, nl_stats);
  2521. return -EMSGSIZE;
  2522. }
  2523. static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
  2524. struct ip_vs_kstats *kstats)
  2525. {
  2526. struct nlattr *nl_stats = nla_nest_start(skb, container_type);
  2527. if (!nl_stats)
  2528. return -EMSGSIZE;
  2529. if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
  2530. nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
  2531. nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
  2532. nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
  2533. nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
  2534. nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
  2535. nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
  2536. nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
  2537. nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
  2538. nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
  2539. goto nla_put_failure;
  2540. nla_nest_end(skb, nl_stats);
  2541. return 0;
  2542. nla_put_failure:
  2543. nla_nest_cancel(skb, nl_stats);
  2544. return -EMSGSIZE;
  2545. }
  2546. static int ip_vs_genl_fill_service(struct sk_buff *skb,
  2547. struct ip_vs_service *svc)
  2548. {
  2549. struct ip_vs_scheduler *sched;
  2550. struct ip_vs_pe *pe;
  2551. struct nlattr *nl_service;
  2552. struct ip_vs_flags flags = { .flags = svc->flags,
  2553. .mask = ~0 };
  2554. struct ip_vs_kstats kstats;
  2555. char *sched_name;
  2556. nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
  2557. if (!nl_service)
  2558. return -EMSGSIZE;
  2559. if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
  2560. goto nla_put_failure;
  2561. if (svc->fwmark) {
  2562. if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
  2563. goto nla_put_failure;
  2564. } else {
  2565. if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
  2566. nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
  2567. nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
  2568. goto nla_put_failure;
  2569. }
  2570. sched = rcu_dereference_protected(svc->scheduler, 1);
  2571. sched_name = sched ? sched->name : "none";
  2572. pe = rcu_dereference_protected(svc->pe, 1);
  2573. if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
  2574. (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
  2575. nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
  2576. nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
  2577. nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
  2578. goto nla_put_failure;
  2579. ip_vs_copy_stats(&kstats, &svc->stats);
  2580. if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
  2581. goto nla_put_failure;
  2582. if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
  2583. goto nla_put_failure;
  2584. nla_nest_end(skb, nl_service);
  2585. return 0;
  2586. nla_put_failure:
  2587. nla_nest_cancel(skb, nl_service);
  2588. return -EMSGSIZE;
  2589. }
  2590. static int ip_vs_genl_dump_service(struct sk_buff *skb,
  2591. struct ip_vs_service *svc,
  2592. struct netlink_callback *cb)
  2593. {
  2594. void *hdr;
  2595. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  2596. &ip_vs_genl_family, NLM_F_MULTI,
  2597. IPVS_CMD_NEW_SERVICE);
  2598. if (!hdr)
  2599. return -EMSGSIZE;
  2600. if (ip_vs_genl_fill_service(skb, svc) < 0)
  2601. goto nla_put_failure;
  2602. genlmsg_end(skb, hdr);
  2603. return 0;
  2604. nla_put_failure:
  2605. genlmsg_cancel(skb, hdr);
  2606. return -EMSGSIZE;
  2607. }
  2608. static int ip_vs_genl_dump_services(struct sk_buff *skb,
  2609. struct netlink_callback *cb)
  2610. {
  2611. int idx = 0, i;
  2612. int start = cb->args[0];
  2613. struct ip_vs_service *svc;
  2614. struct net *net = sock_net(skb->sk);
  2615. struct netns_ipvs *ipvs = net_ipvs(net);
  2616. mutex_lock(&__ip_vs_mutex);
  2617. for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
  2618. hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
  2619. if (++idx <= start || (svc->ipvs != ipvs))
  2620. continue;
  2621. if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
  2622. idx--;
  2623. goto nla_put_failure;
  2624. }
  2625. }
  2626. }
  2627. for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
  2628. hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
  2629. if (++idx <= start || (svc->ipvs != ipvs))
  2630. continue;
  2631. if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
  2632. idx--;
  2633. goto nla_put_failure;
  2634. }
  2635. }
  2636. }
  2637. nla_put_failure:
  2638. mutex_unlock(&__ip_vs_mutex);
  2639. cb->args[0] = idx;
  2640. return skb->len;
  2641. }
  2642. static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
  2643. struct ip_vs_service_user_kern *usvc,
  2644. struct nlattr *nla, int full_entry,
  2645. struct ip_vs_service **ret_svc)
  2646. {
  2647. struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
  2648. struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
  2649. struct ip_vs_service *svc;
  2650. /* Parse mandatory identifying service fields first */
  2651. if (nla == NULL ||
  2652. nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
  2653. return -EINVAL;
  2654. nla_af = attrs[IPVS_SVC_ATTR_AF];
  2655. nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
  2656. nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
  2657. nla_port = attrs[IPVS_SVC_ATTR_PORT];
  2658. nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
  2659. if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
  2660. return -EINVAL;
  2661. memset(usvc, 0, sizeof(*usvc));
  2662. usvc->af = nla_get_u16(nla_af);
  2663. #ifdef CONFIG_IP_VS_IPV6
  2664. if (usvc->af != AF_INET && usvc->af != AF_INET6)
  2665. #else
  2666. if (usvc->af != AF_INET)
  2667. #endif
  2668. return -EAFNOSUPPORT;
  2669. if (nla_fwmark) {
  2670. usvc->protocol = IPPROTO_TCP;
  2671. usvc->fwmark = nla_get_u32(nla_fwmark);
  2672. } else {
  2673. usvc->protocol = nla_get_u16(nla_protocol);
  2674. nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
  2675. usvc->port = nla_get_be16(nla_port);
  2676. usvc->fwmark = 0;
  2677. }
  2678. rcu_read_lock();
  2679. if (usvc->fwmark)
  2680. svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
  2681. else
  2682. svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
  2683. &usvc->addr, usvc->port);
  2684. rcu_read_unlock();
  2685. *ret_svc = svc;
  2686. /* If a full entry was requested, check for the additional fields */
  2687. if (full_entry) {
  2688. struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
  2689. *nla_netmask;
  2690. struct ip_vs_flags flags;
  2691. nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
  2692. nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
  2693. nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
  2694. nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
  2695. nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
  2696. if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
  2697. return -EINVAL;
  2698. nla_memcpy(&flags, nla_flags, sizeof(flags));
  2699. /* prefill flags from service if it already exists */
  2700. if (svc)
  2701. usvc->flags = svc->flags;
  2702. /* set new flags from userland */
  2703. usvc->flags = (usvc->flags & ~flags.mask) |
  2704. (flags.flags & flags.mask);
  2705. usvc->sched_name = nla_data(nla_sched);
  2706. usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
  2707. usvc->timeout = nla_get_u32(nla_timeout);
  2708. usvc->netmask = nla_get_be32(nla_netmask);
  2709. }
  2710. return 0;
  2711. }
  2712. static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
  2713. struct nlattr *nla)
  2714. {
  2715. struct ip_vs_service_user_kern usvc;
  2716. struct ip_vs_service *svc;
  2717. int ret;
  2718. ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
  2719. return ret ? ERR_PTR(ret) : svc;
  2720. }
  2721. static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
  2722. {
  2723. struct nlattr *nl_dest;
  2724. struct ip_vs_kstats kstats;
  2725. nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
  2726. if (!nl_dest)
  2727. return -EMSGSIZE;
  2728. if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
  2729. nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
  2730. nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
  2731. (atomic_read(&dest->conn_flags) &
  2732. IP_VS_CONN_F_FWD_MASK)) ||
  2733. nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
  2734. atomic_read(&dest->weight)) ||
  2735. nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
  2736. nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
  2737. nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
  2738. atomic_read(&dest->activeconns)) ||
  2739. nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
  2740. atomic_read(&dest->inactconns)) ||
  2741. nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
  2742. atomic_read(&dest->persistconns)) ||
  2743. nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
  2744. goto nla_put_failure;
  2745. ip_vs_copy_stats(&kstats, &dest->stats);
  2746. if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
  2747. goto nla_put_failure;
  2748. if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
  2749. goto nla_put_failure;
  2750. nla_nest_end(skb, nl_dest);
  2751. return 0;
  2752. nla_put_failure:
  2753. nla_nest_cancel(skb, nl_dest);
  2754. return -EMSGSIZE;
  2755. }
  2756. static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
  2757. struct netlink_callback *cb)
  2758. {
  2759. void *hdr;
  2760. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  2761. &ip_vs_genl_family, NLM_F_MULTI,
  2762. IPVS_CMD_NEW_DEST);
  2763. if (!hdr)
  2764. return -EMSGSIZE;
  2765. if (ip_vs_genl_fill_dest(skb, dest) < 0)
  2766. goto nla_put_failure;
  2767. genlmsg_end(skb, hdr);
  2768. return 0;
  2769. nla_put_failure:
  2770. genlmsg_cancel(skb, hdr);
  2771. return -EMSGSIZE;
  2772. }
  2773. static int ip_vs_genl_dump_dests(struct sk_buff *skb,
  2774. struct netlink_callback *cb)
  2775. {
  2776. int idx = 0;
  2777. int start = cb->args[0];
  2778. struct ip_vs_service *svc;
  2779. struct ip_vs_dest *dest;
  2780. struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
  2781. struct net *net = sock_net(skb->sk);
  2782. struct netns_ipvs *ipvs = net_ipvs(net);
  2783. mutex_lock(&__ip_vs_mutex);
  2784. /* Try to find the service for which to dump destinations */
  2785. if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
  2786. IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
  2787. goto out_err;
  2788. svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
  2789. if (IS_ERR(svc) || svc == NULL)
  2790. goto out_err;
  2791. /* Dump the destinations */
  2792. list_for_each_entry(dest, &svc->destinations, n_list) {
  2793. if (++idx <= start)
  2794. continue;
  2795. if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
  2796. idx--;
  2797. goto nla_put_failure;
  2798. }
  2799. }
  2800. nla_put_failure:
  2801. cb->args[0] = idx;
  2802. out_err:
  2803. mutex_unlock(&__ip_vs_mutex);
  2804. return skb->len;
  2805. }
  2806. static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
  2807. struct nlattr *nla, int full_entry)
  2808. {
  2809. struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
  2810. struct nlattr *nla_addr, *nla_port;
  2811. struct nlattr *nla_addr_family;
  2812. /* Parse mandatory identifying destination fields first */
  2813. if (nla == NULL ||
  2814. nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
  2815. return -EINVAL;
  2816. nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
  2817. nla_port = attrs[IPVS_DEST_ATTR_PORT];
  2818. nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
  2819. if (!(nla_addr && nla_port))
  2820. return -EINVAL;
  2821. memset(udest, 0, sizeof(*udest));
  2822. nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
  2823. udest->port = nla_get_be16(nla_port);
  2824. if (nla_addr_family)
  2825. udest->af = nla_get_u16(nla_addr_family);
  2826. else
  2827. udest->af = 0;
  2828. /* If a full entry was requested, check for the additional fields */
  2829. if (full_entry) {
  2830. struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
  2831. *nla_l_thresh;
  2832. nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
  2833. nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
  2834. nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
  2835. nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
  2836. if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
  2837. return -EINVAL;
  2838. udest->conn_flags = nla_get_u32(nla_fwd)
  2839. & IP_VS_CONN_F_FWD_MASK;
  2840. udest->weight = nla_get_u32(nla_weight);
  2841. udest->u_threshold = nla_get_u32(nla_u_thresh);
  2842. udest->l_threshold = nla_get_u32(nla_l_thresh);
  2843. }
  2844. return 0;
  2845. }
  2846. static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
  2847. struct ipvs_sync_daemon_cfg *c)
  2848. {
  2849. struct nlattr *nl_daemon;
  2850. nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
  2851. if (!nl_daemon)
  2852. return -EMSGSIZE;
  2853. if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
  2854. nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
  2855. nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
  2856. nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
  2857. nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
  2858. nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
  2859. goto nla_put_failure;
  2860. #ifdef CONFIG_IP_VS_IPV6
  2861. if (c->mcast_af == AF_INET6) {
  2862. if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
  2863. &c->mcast_group.in6))
  2864. goto nla_put_failure;
  2865. } else
  2866. #endif
  2867. if (c->mcast_af == AF_INET &&
  2868. nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
  2869. c->mcast_group.ip))
  2870. goto nla_put_failure;
  2871. nla_nest_end(skb, nl_daemon);
  2872. return 0;
  2873. nla_put_failure:
  2874. nla_nest_cancel(skb, nl_daemon);
  2875. return -EMSGSIZE;
  2876. }
  2877. static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
  2878. struct ipvs_sync_daemon_cfg *c,
  2879. struct netlink_callback *cb)
  2880. {
  2881. void *hdr;
  2882. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  2883. &ip_vs_genl_family, NLM_F_MULTI,
  2884. IPVS_CMD_NEW_DAEMON);
  2885. if (!hdr)
  2886. return -EMSGSIZE;
  2887. if (ip_vs_genl_fill_daemon(skb, state, c))
  2888. goto nla_put_failure;
  2889. genlmsg_end(skb, hdr);
  2890. return 0;
  2891. nla_put_failure:
  2892. genlmsg_cancel(skb, hdr);
  2893. return -EMSGSIZE;
  2894. }
  2895. static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
  2896. struct netlink_callback *cb)
  2897. {
  2898. struct net *net = sock_net(skb->sk);
  2899. struct netns_ipvs *ipvs = net_ipvs(net);
  2900. mutex_lock(&ipvs->sync_mutex);
  2901. if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
  2902. if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
  2903. &ipvs->mcfg, cb) < 0)
  2904. goto nla_put_failure;
  2905. cb->args[0] = 1;
  2906. }
  2907. if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
  2908. if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
  2909. &ipvs->bcfg, cb) < 0)
  2910. goto nla_put_failure;
  2911. cb->args[1] = 1;
  2912. }
  2913. nla_put_failure:
  2914. mutex_unlock(&ipvs->sync_mutex);
  2915. return skb->len;
  2916. }
  2917. static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
  2918. {
  2919. struct ipvs_sync_daemon_cfg c;
  2920. struct nlattr *a;
  2921. int ret;
  2922. memset(&c, 0, sizeof(c));
  2923. if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
  2924. attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
  2925. attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
  2926. return -EINVAL;
  2927. strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
  2928. sizeof(c.mcast_ifn));
  2929. c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
  2930. a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
  2931. if (a)
  2932. c.sync_maxlen = nla_get_u16(a);
  2933. a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
  2934. if (a) {
  2935. c.mcast_af = AF_INET;
  2936. c.mcast_group.ip = nla_get_in_addr(a);
  2937. if (!ipv4_is_multicast(c.mcast_group.ip))
  2938. return -EINVAL;
  2939. } else {
  2940. a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
  2941. if (a) {
  2942. #ifdef CONFIG_IP_VS_IPV6
  2943. int addr_type;
  2944. c.mcast_af = AF_INET6;
  2945. c.mcast_group.in6 = nla_get_in6_addr(a);
  2946. addr_type = ipv6_addr_type(&c.mcast_group.in6);
  2947. if (!(addr_type & IPV6_ADDR_MULTICAST))
  2948. return -EINVAL;
  2949. #else
  2950. return -EAFNOSUPPORT;
  2951. #endif
  2952. }
  2953. }
  2954. a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
  2955. if (a)
  2956. c.mcast_port = nla_get_u16(a);
  2957. a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
  2958. if (a)
  2959. c.mcast_ttl = nla_get_u8(a);
  2960. /* The synchronization protocol is incompatible with mixed family
  2961. * services
  2962. */
  2963. if (ipvs->mixed_address_family_dests > 0)
  2964. return -EINVAL;
  2965. ret = start_sync_thread(ipvs, &c,
  2966. nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
  2967. return ret;
  2968. }
  2969. static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
  2970. {
  2971. int ret;
  2972. if (!attrs[IPVS_DAEMON_ATTR_STATE])
  2973. return -EINVAL;
  2974. mutex_lock(&ipvs->sync_mutex);
  2975. ret = stop_sync_thread(ipvs,
  2976. nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
  2977. mutex_unlock(&ipvs->sync_mutex);
  2978. return ret;
  2979. }
  2980. static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
  2981. {
  2982. struct ip_vs_timeout_user t;
  2983. __ip_vs_get_timeouts(ipvs, &t);
  2984. if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
  2985. t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
  2986. if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
  2987. t.tcp_fin_timeout =
  2988. nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
  2989. if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
  2990. t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
  2991. return ip_vs_set_timeout(ipvs, &t);
  2992. }
  2993. static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
  2994. {
  2995. int ret = -EINVAL, cmd;
  2996. struct net *net = sock_net(skb->sk);
  2997. struct netns_ipvs *ipvs = net_ipvs(net);
  2998. cmd = info->genlhdr->cmd;
  2999. if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
  3000. struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
  3001. if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
  3002. nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
  3003. info->attrs[IPVS_CMD_ATTR_DAEMON],
  3004. ip_vs_daemon_policy))
  3005. goto out;
  3006. if (cmd == IPVS_CMD_NEW_DAEMON)
  3007. ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
  3008. else
  3009. ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
  3010. }
  3011. out:
  3012. return ret;
  3013. }
  3014. static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
  3015. {
  3016. struct ip_vs_service *svc = NULL;
  3017. struct ip_vs_service_user_kern usvc;
  3018. struct ip_vs_dest_user_kern udest;
  3019. int ret = 0, cmd;
  3020. int need_full_svc = 0, need_full_dest = 0;
  3021. struct net *net = sock_net(skb->sk);
  3022. struct netns_ipvs *ipvs = net_ipvs(net);
  3023. cmd = info->genlhdr->cmd;
  3024. mutex_lock(&__ip_vs_mutex);
  3025. if (cmd == IPVS_CMD_FLUSH) {
  3026. ret = ip_vs_flush(ipvs, false);
  3027. goto out;
  3028. } else if (cmd == IPVS_CMD_SET_CONFIG) {
  3029. ret = ip_vs_genl_set_config(ipvs, info->attrs);
  3030. goto out;
  3031. } else if (cmd == IPVS_CMD_ZERO &&
  3032. !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
  3033. ret = ip_vs_zero_all(ipvs);
  3034. goto out;
  3035. }
  3036. /* All following commands require a service argument, so check if we
  3037. * received a valid one. We need a full service specification when
  3038. * adding / editing a service. Only identifying members otherwise. */
  3039. if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
  3040. need_full_svc = 1;
  3041. ret = ip_vs_genl_parse_service(ipvs, &usvc,
  3042. info->attrs[IPVS_CMD_ATTR_SERVICE],
  3043. need_full_svc, &svc);
  3044. if (ret)
  3045. goto out;
  3046. /* Unless we're adding a new service, the service must already exist */
  3047. if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
  3048. ret = -ESRCH;
  3049. goto out;
  3050. }
  3051. /* Destination commands require a valid destination argument. For
  3052. * adding / editing a destination, we need a full destination
  3053. * specification. */
  3054. if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
  3055. cmd == IPVS_CMD_DEL_DEST) {
  3056. if (cmd != IPVS_CMD_DEL_DEST)
  3057. need_full_dest = 1;
  3058. ret = ip_vs_genl_parse_dest(&udest,
  3059. info->attrs[IPVS_CMD_ATTR_DEST],
  3060. need_full_dest);
  3061. if (ret)
  3062. goto out;
  3063. /* Old protocols did not allow the user to specify address
  3064. * family, so we set it to zero instead. We also didn't
  3065. * allow heterogeneous pools in the old code, so it's safe
  3066. * to assume that this will have the same address family as
  3067. * the service.
  3068. */
  3069. if (udest.af == 0)
  3070. udest.af = svc->af;
  3071. if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
  3072. /* The synchronization protocol is incompatible
  3073. * with mixed family services
  3074. */
  3075. if (ipvs->sync_state) {
  3076. ret = -EINVAL;
  3077. goto out;
  3078. }
  3079. /* Which connection types do we support? */
  3080. switch (udest.conn_flags) {
  3081. case IP_VS_CONN_F_TUNNEL:
  3082. /* We are able to forward this */
  3083. break;
  3084. default:
  3085. ret = -EINVAL;
  3086. goto out;
  3087. }
  3088. }
  3089. }
  3090. switch (cmd) {
  3091. case IPVS_CMD_NEW_SERVICE:
  3092. if (svc == NULL)
  3093. ret = ip_vs_add_service(ipvs, &usvc, &svc);
  3094. else
  3095. ret = -EEXIST;
  3096. break;
  3097. case IPVS_CMD_SET_SERVICE:
  3098. ret = ip_vs_edit_service(svc, &usvc);
  3099. break;
  3100. case IPVS_CMD_DEL_SERVICE:
  3101. ret = ip_vs_del_service(svc);
  3102. /* do not use svc, it can be freed */
  3103. break;
  3104. case IPVS_CMD_NEW_DEST:
  3105. ret = ip_vs_add_dest(svc, &udest);
  3106. break;
  3107. case IPVS_CMD_SET_DEST:
  3108. ret = ip_vs_edit_dest(svc, &udest);
  3109. break;
  3110. case IPVS_CMD_DEL_DEST:
  3111. ret = ip_vs_del_dest(svc, &udest);
  3112. break;
  3113. case IPVS_CMD_ZERO:
  3114. ret = ip_vs_zero_service(svc);
  3115. break;
  3116. default:
  3117. ret = -EINVAL;
  3118. }
  3119. out:
  3120. mutex_unlock(&__ip_vs_mutex);
  3121. return ret;
  3122. }
  3123. static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
  3124. {
  3125. struct sk_buff *msg;
  3126. void *reply;
  3127. int ret, cmd, reply_cmd;
  3128. struct net *net = sock_net(skb->sk);
  3129. struct netns_ipvs *ipvs = net_ipvs(net);
  3130. cmd = info->genlhdr->cmd;
  3131. if (cmd == IPVS_CMD_GET_SERVICE)
  3132. reply_cmd = IPVS_CMD_NEW_SERVICE;
  3133. else if (cmd == IPVS_CMD_GET_INFO)
  3134. reply_cmd = IPVS_CMD_SET_INFO;
  3135. else if (cmd == IPVS_CMD_GET_CONFIG)
  3136. reply_cmd = IPVS_CMD_SET_CONFIG;
  3137. else {
  3138. pr_err("unknown Generic Netlink command\n");
  3139. return -EINVAL;
  3140. }
  3141. msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  3142. if (!msg)
  3143. return -ENOMEM;
  3144. mutex_lock(&__ip_vs_mutex);
  3145. reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
  3146. if (reply == NULL)
  3147. goto nla_put_failure;
  3148. switch (cmd) {
  3149. case IPVS_CMD_GET_SERVICE:
  3150. {
  3151. struct ip_vs_service *svc;
  3152. svc = ip_vs_genl_find_service(ipvs,
  3153. info->attrs[IPVS_CMD_ATTR_SERVICE]);
  3154. if (IS_ERR(svc)) {
  3155. ret = PTR_ERR(svc);
  3156. goto out_err;
  3157. } else if (svc) {
  3158. ret = ip_vs_genl_fill_service(msg, svc);
  3159. if (ret)
  3160. goto nla_put_failure;
  3161. } else {
  3162. ret = -ESRCH;
  3163. goto out_err;
  3164. }
  3165. break;
  3166. }
  3167. case IPVS_CMD_GET_CONFIG:
  3168. {
  3169. struct ip_vs_timeout_user t;
  3170. __ip_vs_get_timeouts(ipvs, &t);
  3171. #ifdef CONFIG_IP_VS_PROTO_TCP
  3172. if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
  3173. t.tcp_timeout) ||
  3174. nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
  3175. t.tcp_fin_timeout))
  3176. goto nla_put_failure;
  3177. #endif
  3178. #ifdef CONFIG_IP_VS_PROTO_UDP
  3179. if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
  3180. goto nla_put_failure;
  3181. #endif
  3182. break;
  3183. }
  3184. case IPVS_CMD_GET_INFO:
  3185. if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
  3186. IP_VS_VERSION_CODE) ||
  3187. nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
  3188. ip_vs_conn_tab_size))
  3189. goto nla_put_failure;
  3190. break;
  3191. }
  3192. genlmsg_end(msg, reply);
  3193. ret = genlmsg_reply(msg, info);
  3194. goto out;
  3195. nla_put_failure:
  3196. pr_err("not enough space in Netlink message\n");
  3197. ret = -EMSGSIZE;
  3198. out_err:
  3199. nlmsg_free(msg);
  3200. out:
  3201. mutex_unlock(&__ip_vs_mutex);
  3202. return ret;
  3203. }
  3204. static const struct genl_ops ip_vs_genl_ops[] = {
  3205. {
  3206. .cmd = IPVS_CMD_NEW_SERVICE,
  3207. .flags = GENL_ADMIN_PERM,
  3208. .policy = ip_vs_cmd_policy,
  3209. .doit = ip_vs_genl_set_cmd,
  3210. },
  3211. {
  3212. .cmd = IPVS_CMD_SET_SERVICE,
  3213. .flags = GENL_ADMIN_PERM,
  3214. .policy = ip_vs_cmd_policy,
  3215. .doit = ip_vs_genl_set_cmd,
  3216. },
  3217. {
  3218. .cmd = IPVS_CMD_DEL_SERVICE,
  3219. .flags = GENL_ADMIN_PERM,
  3220. .policy = ip_vs_cmd_policy,
  3221. .doit = ip_vs_genl_set_cmd,
  3222. },
  3223. {
  3224. .cmd = IPVS_CMD_GET_SERVICE,
  3225. .flags = GENL_ADMIN_PERM,
  3226. .doit = ip_vs_genl_get_cmd,
  3227. .dumpit = ip_vs_genl_dump_services,
  3228. .policy = ip_vs_cmd_policy,
  3229. },
  3230. {
  3231. .cmd = IPVS_CMD_NEW_DEST,
  3232. .flags = GENL_ADMIN_PERM,
  3233. .policy = ip_vs_cmd_policy,
  3234. .doit = ip_vs_genl_set_cmd,
  3235. },
  3236. {
  3237. .cmd = IPVS_CMD_SET_DEST,
  3238. .flags = GENL_ADMIN_PERM,
  3239. .policy = ip_vs_cmd_policy,
  3240. .doit = ip_vs_genl_set_cmd,
  3241. },
  3242. {
  3243. .cmd = IPVS_CMD_DEL_DEST,
  3244. .flags = GENL_ADMIN_PERM,
  3245. .policy = ip_vs_cmd_policy,
  3246. .doit = ip_vs_genl_set_cmd,
  3247. },
  3248. {
  3249. .cmd = IPVS_CMD_GET_DEST,
  3250. .flags = GENL_ADMIN_PERM,
  3251. .policy = ip_vs_cmd_policy,
  3252. .dumpit = ip_vs_genl_dump_dests,
  3253. },
  3254. {
  3255. .cmd = IPVS_CMD_NEW_DAEMON,
  3256. .flags = GENL_ADMIN_PERM,
  3257. .policy = ip_vs_cmd_policy,
  3258. .doit = ip_vs_genl_set_daemon,
  3259. },
  3260. {
  3261. .cmd = IPVS_CMD_DEL_DAEMON,
  3262. .flags = GENL_ADMIN_PERM,
  3263. .policy = ip_vs_cmd_policy,
  3264. .doit = ip_vs_genl_set_daemon,
  3265. },
  3266. {
  3267. .cmd = IPVS_CMD_GET_DAEMON,
  3268. .flags = GENL_ADMIN_PERM,
  3269. .dumpit = ip_vs_genl_dump_daemons,
  3270. },
  3271. {
  3272. .cmd = IPVS_CMD_SET_CONFIG,
  3273. .flags = GENL_ADMIN_PERM,
  3274. .policy = ip_vs_cmd_policy,
  3275. .doit = ip_vs_genl_set_cmd,
  3276. },
  3277. {
  3278. .cmd = IPVS_CMD_GET_CONFIG,
  3279. .flags = GENL_ADMIN_PERM,
  3280. .doit = ip_vs_genl_get_cmd,
  3281. },
  3282. {
  3283. .cmd = IPVS_CMD_GET_INFO,
  3284. .flags = GENL_ADMIN_PERM,
  3285. .doit = ip_vs_genl_get_cmd,
  3286. },
  3287. {
  3288. .cmd = IPVS_CMD_ZERO,
  3289. .flags = GENL_ADMIN_PERM,
  3290. .policy = ip_vs_cmd_policy,
  3291. .doit = ip_vs_genl_set_cmd,
  3292. },
  3293. {
  3294. .cmd = IPVS_CMD_FLUSH,
  3295. .flags = GENL_ADMIN_PERM,
  3296. .doit = ip_vs_genl_set_cmd,
  3297. },
  3298. };
  3299. static int __init ip_vs_genl_register(void)
  3300. {
  3301. return genl_register_family_with_ops(&ip_vs_genl_family,
  3302. ip_vs_genl_ops);
  3303. }
  3304. static void ip_vs_genl_unregister(void)
  3305. {
  3306. genl_unregister_family(&ip_vs_genl_family);
  3307. }
  3308. /* End of Generic Netlink interface definitions */
  3309. /*
  3310. * per netns intit/exit func.
  3311. */
  3312. #ifdef CONFIG_SYSCTL
  3313. static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
  3314. {
  3315. struct net *net = ipvs->net;
  3316. int idx;
  3317. struct ctl_table *tbl;
  3318. atomic_set(&ipvs->dropentry, 0);
  3319. spin_lock_init(&ipvs->dropentry_lock);
  3320. spin_lock_init(&ipvs->droppacket_lock);
  3321. spin_lock_init(&ipvs->securetcp_lock);
  3322. if (!net_eq(net, &init_net)) {
  3323. tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
  3324. if (tbl == NULL)
  3325. return -ENOMEM;
  3326. /* Don't export sysctls to unprivileged users */
  3327. if (net->user_ns != &init_user_ns)
  3328. tbl[0].procname = NULL;
  3329. } else
  3330. tbl = vs_vars;
  3331. /* Initialize sysctl defaults */
  3332. for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
  3333. if (tbl[idx].proc_handler == proc_do_defense_mode)
  3334. tbl[idx].extra2 = ipvs;
  3335. }
  3336. idx = 0;
  3337. ipvs->sysctl_amemthresh = 1024;
  3338. tbl[idx++].data = &ipvs->sysctl_amemthresh;
  3339. ipvs->sysctl_am_droprate = 10;
  3340. tbl[idx++].data = &ipvs->sysctl_am_droprate;
  3341. tbl[idx++].data = &ipvs->sysctl_drop_entry;
  3342. tbl[idx++].data = &ipvs->sysctl_drop_packet;
  3343. #ifdef CONFIG_IP_VS_NFCT
  3344. tbl[idx++].data = &ipvs->sysctl_conntrack;
  3345. #endif
  3346. tbl[idx++].data = &ipvs->sysctl_secure_tcp;
  3347. ipvs->sysctl_snat_reroute = 1;
  3348. tbl[idx++].data = &ipvs->sysctl_snat_reroute;
  3349. ipvs->sysctl_sync_ver = 1;
  3350. tbl[idx++].data = &ipvs->sysctl_sync_ver;
  3351. ipvs->sysctl_sync_ports = 1;
  3352. tbl[idx++].data = &ipvs->sysctl_sync_ports;
  3353. tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
  3354. ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
  3355. tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
  3356. ipvs->sysctl_sync_sock_size = 0;
  3357. tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
  3358. tbl[idx++].data = &ipvs->sysctl_cache_bypass;
  3359. tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
  3360. tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
  3361. tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
  3362. tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
  3363. ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
  3364. ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
  3365. tbl[idx].data = &ipvs->sysctl_sync_threshold;
  3366. tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
  3367. ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
  3368. tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
  3369. ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
  3370. tbl[idx++].data = &ipvs->sysctl_sync_retries;
  3371. tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
  3372. ipvs->sysctl_pmtu_disc = 1;
  3373. tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
  3374. tbl[idx++].data = &ipvs->sysctl_backup_only;
  3375. ipvs->sysctl_conn_reuse_mode = 1;
  3376. tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
  3377. tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
  3378. tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
  3379. ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
  3380. if (ipvs->sysctl_hdr == NULL) {
  3381. if (!net_eq(net, &init_net))
  3382. kfree(tbl);
  3383. return -ENOMEM;
  3384. }
  3385. ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
  3386. ipvs->sysctl_tbl = tbl;
  3387. /* Schedule defense work */
  3388. INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
  3389. schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
  3390. return 0;
  3391. }
  3392. static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
  3393. {
  3394. struct net *net = ipvs->net;
  3395. cancel_delayed_work_sync(&ipvs->defense_work);
  3396. cancel_work_sync(&ipvs->defense_work.work);
  3397. unregister_net_sysctl_table(ipvs->sysctl_hdr);
  3398. ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
  3399. if (!net_eq(net, &init_net))
  3400. kfree(ipvs->sysctl_tbl);
  3401. }
  3402. #else
  3403. static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
  3404. static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
  3405. #endif
  3406. static struct notifier_block ip_vs_dst_notifier = {
  3407. .notifier_call = ip_vs_dst_event,
  3408. };
  3409. int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
  3410. {
  3411. int i, idx;
  3412. /* Initialize rs_table */
  3413. for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
  3414. INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
  3415. INIT_LIST_HEAD(&ipvs->dest_trash);
  3416. spin_lock_init(&ipvs->dest_trash_lock);
  3417. setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
  3418. (unsigned long) ipvs);
  3419. atomic_set(&ipvs->ftpsvc_counter, 0);
  3420. atomic_set(&ipvs->nullsvc_counter, 0);
  3421. /* procfs stats */
  3422. ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
  3423. if (!ipvs->tot_stats.cpustats)
  3424. return -ENOMEM;
  3425. for_each_possible_cpu(i) {
  3426. struct ip_vs_cpu_stats *ipvs_tot_stats;
  3427. ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
  3428. u64_stats_init(&ipvs_tot_stats->syncp);
  3429. }
  3430. spin_lock_init(&ipvs->tot_stats.lock);
  3431. proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
  3432. proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
  3433. proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
  3434. &ip_vs_stats_percpu_fops);
  3435. if (ip_vs_control_net_init_sysctl(ipvs))
  3436. goto err;
  3437. return 0;
  3438. err:
  3439. free_percpu(ipvs->tot_stats.cpustats);
  3440. return -ENOMEM;
  3441. }
  3442. void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
  3443. {
  3444. ip_vs_trash_cleanup(ipvs);
  3445. ip_vs_control_net_cleanup_sysctl(ipvs);
  3446. remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
  3447. remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
  3448. remove_proc_entry("ip_vs", ipvs->net->proc_net);
  3449. free_percpu(ipvs->tot_stats.cpustats);
  3450. }
  3451. int __init ip_vs_register_nl_ioctl(void)
  3452. {
  3453. int ret;
  3454. ret = nf_register_sockopt(&ip_vs_sockopts);
  3455. if (ret) {
  3456. pr_err("cannot register sockopt.\n");
  3457. goto err_sock;
  3458. }
  3459. ret = ip_vs_genl_register();
  3460. if (ret) {
  3461. pr_err("cannot register Generic Netlink interface.\n");
  3462. goto err_genl;
  3463. }
  3464. return 0;
  3465. err_genl:
  3466. nf_unregister_sockopt(&ip_vs_sockopts);
  3467. err_sock:
  3468. return ret;
  3469. }
  3470. void ip_vs_unregister_nl_ioctl(void)
  3471. {
  3472. ip_vs_genl_unregister();
  3473. nf_unregister_sockopt(&ip_vs_sockopts);
  3474. }
  3475. int __init ip_vs_control_init(void)
  3476. {
  3477. int idx;
  3478. int ret;
  3479. EnterFunction(2);
  3480. /* Initialize svc_table, ip_vs_svc_fwm_table */
  3481. for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  3482. INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
  3483. INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
  3484. }
  3485. smp_wmb(); /* Do we really need it now ? */
  3486. ret = register_netdevice_notifier(&ip_vs_dst_notifier);
  3487. if (ret < 0)
  3488. return ret;
  3489. LeaveFunction(2);
  3490. return 0;
  3491. }
  3492. void ip_vs_control_cleanup(void)
  3493. {
  3494. EnterFunction(2);
  3495. unregister_netdevice_notifier(&ip_vs_dst_notifier);
  3496. LeaveFunction(2);
  3497. }