nf_conntrack_proto_tcp.c 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743
  1. /* (C) 1999-2001 Paul `Rusty' Russell
  2. * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  3. * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  4. * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/types.h>
  11. #include <linux/timer.h>
  12. #include <linux/module.h>
  13. #include <linux/in.h>
  14. #include <linux/tcp.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/skbuff.h>
  17. #include <linux/ipv6.h>
  18. #include <net/ip6_checksum.h>
  19. #include <asm/unaligned.h>
  20. #include <net/tcp.h>
  21. #include <linux/netfilter.h>
  22. #include <linux/netfilter_ipv4.h>
  23. #include <linux/netfilter_ipv6.h>
  24. #include <net/netfilter/nf_conntrack.h>
  25. #include <net/netfilter/nf_conntrack_l4proto.h>
  26. #include <net/netfilter/nf_conntrack_ecache.h>
  27. #include <net/netfilter/nf_conntrack_seqadj.h>
  28. #include <net/netfilter/nf_conntrack_synproxy.h>
  29. #include <net/netfilter/nf_log.h>
  30. #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  31. #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  32. /* "Be conservative in what you do,
  33. be liberal in what you accept from others."
  34. If it's non-zero, we mark only out of window RST segments as INVALID. */
  35. static int nf_ct_tcp_be_liberal __read_mostly = 0;
  36. /* If it is set to zero, we disable picking up already established
  37. connections. */
  38. static int nf_ct_tcp_loose __read_mostly = 1;
  39. /* Max number of the retransmitted packets without receiving an (acceptable)
  40. ACK from the destination. If this number is reached, a shorter timer
  41. will be started. */
  42. static int nf_ct_tcp_max_retrans __read_mostly = 3;
  43. /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
  44. closely. They're more complex. --RR */
  45. static const char *const tcp_conntrack_names[] = {
  46. "NONE",
  47. "SYN_SENT",
  48. "SYN_RECV",
  49. "ESTABLISHED",
  50. "FIN_WAIT",
  51. "CLOSE_WAIT",
  52. "LAST_ACK",
  53. "TIME_WAIT",
  54. "CLOSE",
  55. "SYN_SENT2",
  56. };
  57. #define SECS * HZ
  58. #define MINS * 60 SECS
  59. #define HOURS * 60 MINS
  60. #define DAYS * 24 HOURS
  61. static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
  62. [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
  63. [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
  64. [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
  65. [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
  66. [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
  67. [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
  68. [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
  69. [TCP_CONNTRACK_CLOSE] = 10 SECS,
  70. [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
  71. /* RFC1122 says the R2 limit should be at least 100 seconds.
  72. Linux uses 15 packets as limit, which corresponds
  73. to ~13-30min depending on RTO. */
  74. [TCP_CONNTRACK_RETRANS] = 5 MINS,
  75. [TCP_CONNTRACK_UNACK] = 5 MINS,
  76. };
  77. #define sNO TCP_CONNTRACK_NONE
  78. #define sSS TCP_CONNTRACK_SYN_SENT
  79. #define sSR TCP_CONNTRACK_SYN_RECV
  80. #define sES TCP_CONNTRACK_ESTABLISHED
  81. #define sFW TCP_CONNTRACK_FIN_WAIT
  82. #define sCW TCP_CONNTRACK_CLOSE_WAIT
  83. #define sLA TCP_CONNTRACK_LAST_ACK
  84. #define sTW TCP_CONNTRACK_TIME_WAIT
  85. #define sCL TCP_CONNTRACK_CLOSE
  86. #define sS2 TCP_CONNTRACK_SYN_SENT2
  87. #define sIV TCP_CONNTRACK_MAX
  88. #define sIG TCP_CONNTRACK_IGNORE
  89. /* What TCP flags are set from RST/SYN/FIN/ACK. */
  90. enum tcp_bit_set {
  91. TCP_SYN_SET,
  92. TCP_SYNACK_SET,
  93. TCP_FIN_SET,
  94. TCP_ACK_SET,
  95. TCP_RST_SET,
  96. TCP_NONE_SET,
  97. };
  98. /*
  99. * The TCP state transition table needs a few words...
  100. *
  101. * We are the man in the middle. All the packets go through us
  102. * but might get lost in transit to the destination.
  103. * It is assumed that the destinations can't receive segments
  104. * we haven't seen.
  105. *
  106. * The checked segment is in window, but our windows are *not*
  107. * equivalent with the ones of the sender/receiver. We always
  108. * try to guess the state of the current sender.
  109. *
  110. * The meaning of the states are:
  111. *
  112. * NONE: initial state
  113. * SYN_SENT: SYN-only packet seen
  114. * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
  115. * SYN_RECV: SYN-ACK packet seen
  116. * ESTABLISHED: ACK packet seen
  117. * FIN_WAIT: FIN packet seen
  118. * CLOSE_WAIT: ACK seen (after FIN)
  119. * LAST_ACK: FIN seen (after FIN)
  120. * TIME_WAIT: last ACK seen
  121. * CLOSE: closed connection (RST)
  122. *
  123. * Packets marked as IGNORED (sIG):
  124. * if they may be either invalid or valid
  125. * and the receiver may send back a connection
  126. * closing RST or a SYN/ACK.
  127. *
  128. * Packets marked as INVALID (sIV):
  129. * if we regard them as truly invalid packets
  130. */
  131. static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  132. {
  133. /* ORIGINAL */
  134. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  135. /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
  136. /*
  137. * sNO -> sSS Initialize a new connection
  138. * sSS -> sSS Retransmitted SYN
  139. * sS2 -> sS2 Late retransmitted SYN
  140. * sSR -> sIG
  141. * sES -> sIG Error: SYNs in window outside the SYN_SENT state
  142. * are errors. Receiver will reply with RST
  143. * and close the connection.
  144. * Or we are not in sync and hold a dead connection.
  145. * sFW -> sIG
  146. * sCW -> sIG
  147. * sLA -> sIG
  148. * sTW -> sSS Reopened connection (RFC 1122).
  149. * sCL -> sSS
  150. */
  151. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  152. /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
  153. /*
  154. * sNO -> sIV Too late and no reason to do anything
  155. * sSS -> sIV Client can't send SYN and then SYN/ACK
  156. * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
  157. * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
  158. * sES -> sIV Invalid SYN/ACK packets sent by the client
  159. * sFW -> sIV
  160. * sCW -> sIV
  161. * sLA -> sIV
  162. * sTW -> sIV
  163. * sCL -> sIV
  164. */
  165. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  166. /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
  167. /*
  168. * sNO -> sIV Too late and no reason to do anything...
  169. * sSS -> sIV Client migth not send FIN in this state:
  170. * we enforce waiting for a SYN/ACK reply first.
  171. * sS2 -> sIV
  172. * sSR -> sFW Close started.
  173. * sES -> sFW
  174. * sFW -> sLA FIN seen in both directions, waiting for
  175. * the last ACK.
  176. * Migth be a retransmitted FIN as well...
  177. * sCW -> sLA
  178. * sLA -> sLA Retransmitted FIN. Remain in the same state.
  179. * sTW -> sTW
  180. * sCL -> sCL
  181. */
  182. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  183. /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
  184. /*
  185. * sNO -> sES Assumed.
  186. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
  187. * sS2 -> sIV
  188. * sSR -> sES Established state is reached.
  189. * sES -> sES :-)
  190. * sFW -> sCW Normal close request answered by ACK.
  191. * sCW -> sCW
  192. * sLA -> sTW Last ACK detected (RFC5961 challenged)
  193. * sTW -> sTW Retransmitted last ACK. Remain in the same state.
  194. * sCL -> sCL
  195. */
  196. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  197. /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
  198. /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
  199. },
  200. {
  201. /* REPLY */
  202. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  203. /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
  204. /*
  205. * sNO -> sIV Never reached.
  206. * sSS -> sS2 Simultaneous open
  207. * sS2 -> sS2 Retransmitted simultaneous SYN
  208. * sSR -> sIV Invalid SYN packets sent by the server
  209. * sES -> sIV
  210. * sFW -> sIV
  211. * sCW -> sIV
  212. * sLA -> sIV
  213. * sTW -> sSS Reopened connection, but server may have switched role
  214. * sCL -> sIV
  215. */
  216. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  217. /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
  218. /*
  219. * sSS -> sSR Standard open.
  220. * sS2 -> sSR Simultaneous open
  221. * sSR -> sIG Retransmitted SYN/ACK, ignore it.
  222. * sES -> sIG Late retransmitted SYN/ACK?
  223. * sFW -> sIG Might be SYN/ACK answering ignored SYN
  224. * sCW -> sIG
  225. * sLA -> sIG
  226. * sTW -> sIG
  227. * sCL -> sIG
  228. */
  229. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  230. /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
  231. /*
  232. * sSS -> sIV Server might not send FIN in this state.
  233. * sS2 -> sIV
  234. * sSR -> sFW Close started.
  235. * sES -> sFW
  236. * sFW -> sLA FIN seen in both directions.
  237. * sCW -> sLA
  238. * sLA -> sLA Retransmitted FIN.
  239. * sTW -> sTW
  240. * sCL -> sCL
  241. */
  242. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  243. /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
  244. /*
  245. * sSS -> sIG Might be a half-open connection.
  246. * sS2 -> sIG
  247. * sSR -> sSR Might answer late resent SYN.
  248. * sES -> sES :-)
  249. * sFW -> sCW Normal close request answered by ACK.
  250. * sCW -> sCW
  251. * sLA -> sTW Last ACK detected (RFC5961 challenged)
  252. * sTW -> sTW Retransmitted last ACK.
  253. * sCL -> sCL
  254. */
  255. /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
  256. /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
  257. /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
  258. }
  259. };
  260. static inline struct nf_tcp_net *tcp_pernet(struct net *net)
  261. {
  262. return &net->ct.nf_ct_proto.tcp;
  263. }
  264. static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
  265. struct net *net, struct nf_conntrack_tuple *tuple)
  266. {
  267. const struct tcphdr *hp;
  268. struct tcphdr _hdr;
  269. /* Actually only need first 8 bytes. */
  270. hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
  271. if (hp == NULL)
  272. return false;
  273. tuple->src.u.tcp.port = hp->source;
  274. tuple->dst.u.tcp.port = hp->dest;
  275. return true;
  276. }
  277. static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
  278. const struct nf_conntrack_tuple *orig)
  279. {
  280. tuple->src.u.tcp.port = orig->dst.u.tcp.port;
  281. tuple->dst.u.tcp.port = orig->src.u.tcp.port;
  282. return true;
  283. }
  284. /* Print out the per-protocol part of the tuple. */
  285. static void tcp_print_tuple(struct seq_file *s,
  286. const struct nf_conntrack_tuple *tuple)
  287. {
  288. seq_printf(s, "sport=%hu dport=%hu ",
  289. ntohs(tuple->src.u.tcp.port),
  290. ntohs(tuple->dst.u.tcp.port));
  291. }
  292. /* Print out the private part of the conntrack. */
  293. static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
  294. {
  295. enum tcp_conntrack state;
  296. spin_lock_bh(&ct->lock);
  297. state = ct->proto.tcp.state;
  298. spin_unlock_bh(&ct->lock);
  299. seq_printf(s, "%s ", tcp_conntrack_names[state]);
  300. }
  301. static unsigned int get_conntrack_index(const struct tcphdr *tcph)
  302. {
  303. if (tcph->rst) return TCP_RST_SET;
  304. else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
  305. else if (tcph->fin) return TCP_FIN_SET;
  306. else if (tcph->ack) return TCP_ACK_SET;
  307. else return TCP_NONE_SET;
  308. }
  309. /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
  310. in IP Filter' by Guido van Rooij.
  311. http://www.sane.nl/events/sane2000/papers.html
  312. http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
  313. The boundaries and the conditions are changed according to RFC793:
  314. the packet must intersect the window (i.e. segments may be
  315. after the right or before the left edge) and thus receivers may ACK
  316. segments after the right edge of the window.
  317. td_maxend = max(sack + max(win,1)) seen in reply packets
  318. td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
  319. td_maxwin += seq + len - sender.td_maxend
  320. if seq + len > sender.td_maxend
  321. td_end = max(seq + len) seen in sent packets
  322. I. Upper bound for valid data: seq <= sender.td_maxend
  323. II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
  324. III. Upper bound for valid (s)ack: sack <= receiver.td_end
  325. IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
  326. where sack is the highest right edge of sack block found in the packet
  327. or ack in the case of packet without SACK option.
  328. The upper bound limit for a valid (s)ack is not ignored -
  329. we doesn't have to deal with fragments.
  330. */
  331. static inline __u32 segment_seq_plus_len(__u32 seq,
  332. size_t len,
  333. unsigned int dataoff,
  334. const struct tcphdr *tcph)
  335. {
  336. /* XXX Should I use payload length field in IP/IPv6 header ?
  337. * - YK */
  338. return (seq + len - dataoff - tcph->doff*4
  339. + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
  340. }
  341. /* Fixme: what about big packets? */
  342. #define MAXACKWINCONST 66000
  343. #define MAXACKWINDOW(sender) \
  344. ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
  345. : MAXACKWINCONST)
  346. /*
  347. * Simplified tcp_parse_options routine from tcp_input.c
  348. */
  349. static void tcp_options(const struct sk_buff *skb,
  350. unsigned int dataoff,
  351. const struct tcphdr *tcph,
  352. struct ip_ct_tcp_state *state)
  353. {
  354. unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
  355. const unsigned char *ptr;
  356. int length = (tcph->doff*4) - sizeof(struct tcphdr);
  357. if (!length)
  358. return;
  359. ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
  360. length, buff);
  361. BUG_ON(ptr == NULL);
  362. state->td_scale =
  363. state->flags = 0;
  364. while (length > 0) {
  365. int opcode=*ptr++;
  366. int opsize;
  367. switch (opcode) {
  368. case TCPOPT_EOL:
  369. return;
  370. case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
  371. length--;
  372. continue;
  373. default:
  374. if (length < 2)
  375. return;
  376. opsize=*ptr++;
  377. if (opsize < 2) /* "silly options" */
  378. return;
  379. if (opsize > length)
  380. return; /* don't parse partial options */
  381. if (opcode == TCPOPT_SACK_PERM
  382. && opsize == TCPOLEN_SACK_PERM)
  383. state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
  384. else if (opcode == TCPOPT_WINDOW
  385. && opsize == TCPOLEN_WINDOW) {
  386. state->td_scale = *(u_int8_t *)ptr;
  387. if (state->td_scale > 14) {
  388. /* See RFC1323 */
  389. state->td_scale = 14;
  390. }
  391. state->flags |=
  392. IP_CT_TCP_FLAG_WINDOW_SCALE;
  393. }
  394. ptr += opsize - 2;
  395. length -= opsize;
  396. }
  397. }
  398. }
  399. static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
  400. const struct tcphdr *tcph, __u32 *sack)
  401. {
  402. unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
  403. const unsigned char *ptr;
  404. int length = (tcph->doff*4) - sizeof(struct tcphdr);
  405. __u32 tmp;
  406. if (!length)
  407. return;
  408. ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
  409. length, buff);
  410. BUG_ON(ptr == NULL);
  411. /* Fast path for timestamp-only option */
  412. if (length == TCPOLEN_TSTAMP_ALIGNED
  413. && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
  414. | (TCPOPT_NOP << 16)
  415. | (TCPOPT_TIMESTAMP << 8)
  416. | TCPOLEN_TIMESTAMP))
  417. return;
  418. while (length > 0) {
  419. int opcode = *ptr++;
  420. int opsize, i;
  421. switch (opcode) {
  422. case TCPOPT_EOL:
  423. return;
  424. case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
  425. length--;
  426. continue;
  427. default:
  428. if (length < 2)
  429. return;
  430. opsize = *ptr++;
  431. if (opsize < 2) /* "silly options" */
  432. return;
  433. if (opsize > length)
  434. return; /* don't parse partial options */
  435. if (opcode == TCPOPT_SACK
  436. && opsize >= (TCPOLEN_SACK_BASE
  437. + TCPOLEN_SACK_PERBLOCK)
  438. && !((opsize - TCPOLEN_SACK_BASE)
  439. % TCPOLEN_SACK_PERBLOCK)) {
  440. for (i = 0;
  441. i < (opsize - TCPOLEN_SACK_BASE);
  442. i += TCPOLEN_SACK_PERBLOCK) {
  443. tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
  444. if (after(tmp, *sack))
  445. *sack = tmp;
  446. }
  447. return;
  448. }
  449. ptr += opsize - 2;
  450. length -= opsize;
  451. }
  452. }
  453. }
  454. static bool tcp_in_window(const struct nf_conn *ct,
  455. struct ip_ct_tcp *state,
  456. enum ip_conntrack_dir dir,
  457. unsigned int index,
  458. const struct sk_buff *skb,
  459. unsigned int dataoff,
  460. const struct tcphdr *tcph,
  461. u_int8_t pf)
  462. {
  463. struct net *net = nf_ct_net(ct);
  464. struct nf_tcp_net *tn = tcp_pernet(net);
  465. struct ip_ct_tcp_state *sender = &state->seen[dir];
  466. struct ip_ct_tcp_state *receiver = &state->seen[!dir];
  467. const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
  468. __u32 seq, ack, sack, end, win, swin;
  469. s32 receiver_offset;
  470. bool res, in_recv_win;
  471. /*
  472. * Get the required data from the packet.
  473. */
  474. seq = ntohl(tcph->seq);
  475. ack = sack = ntohl(tcph->ack_seq);
  476. win = ntohs(tcph->window);
  477. end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
  478. if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
  479. tcp_sack(skb, dataoff, tcph, &sack);
  480. /* Take into account NAT sequence number mangling */
  481. receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
  482. ack -= receiver_offset;
  483. sack -= receiver_offset;
  484. pr_debug("tcp_in_window: START\n");
  485. pr_debug("tcp_in_window: ");
  486. nf_ct_dump_tuple(tuple);
  487. pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
  488. seq, ack, receiver_offset, sack, receiver_offset, win, end);
  489. pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
  490. "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
  491. sender->td_end, sender->td_maxend, sender->td_maxwin,
  492. sender->td_scale,
  493. receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
  494. receiver->td_scale);
  495. if (sender->td_maxwin == 0) {
  496. /*
  497. * Initialize sender data.
  498. */
  499. if (tcph->syn) {
  500. /*
  501. * SYN-ACK in reply to a SYN
  502. * or SYN from reply direction in simultaneous open.
  503. */
  504. sender->td_end =
  505. sender->td_maxend = end;
  506. sender->td_maxwin = (win == 0 ? 1 : win);
  507. tcp_options(skb, dataoff, tcph, sender);
  508. /*
  509. * RFC 1323:
  510. * Both sides must send the Window Scale option
  511. * to enable window scaling in either direction.
  512. */
  513. if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
  514. && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
  515. sender->td_scale =
  516. receiver->td_scale = 0;
  517. if (!tcph->ack)
  518. /* Simultaneous open */
  519. return true;
  520. } else {
  521. /*
  522. * We are in the middle of a connection,
  523. * its history is lost for us.
  524. * Let's try to use the data from the packet.
  525. */
  526. sender->td_end = end;
  527. swin = win << sender->td_scale;
  528. sender->td_maxwin = (swin == 0 ? 1 : swin);
  529. sender->td_maxend = end + sender->td_maxwin;
  530. /*
  531. * We haven't seen traffic in the other direction yet
  532. * but we have to tweak window tracking to pass III
  533. * and IV until that happens.
  534. */
  535. if (receiver->td_maxwin == 0)
  536. receiver->td_end = receiver->td_maxend = sack;
  537. }
  538. } else if (((state->state == TCP_CONNTRACK_SYN_SENT
  539. && dir == IP_CT_DIR_ORIGINAL)
  540. || (state->state == TCP_CONNTRACK_SYN_RECV
  541. && dir == IP_CT_DIR_REPLY))
  542. && after(end, sender->td_end)) {
  543. /*
  544. * RFC 793: "if a TCP is reinitialized ... then it need
  545. * not wait at all; it must only be sure to use sequence
  546. * numbers larger than those recently used."
  547. */
  548. sender->td_end =
  549. sender->td_maxend = end;
  550. sender->td_maxwin = (win == 0 ? 1 : win);
  551. tcp_options(skb, dataoff, tcph, sender);
  552. }
  553. if (!(tcph->ack)) {
  554. /*
  555. * If there is no ACK, just pretend it was set and OK.
  556. */
  557. ack = sack = receiver->td_end;
  558. } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
  559. (TCP_FLAG_ACK|TCP_FLAG_RST))
  560. && (ack == 0)) {
  561. /*
  562. * Broken TCP stacks, that set ACK in RST packets as well
  563. * with zero ack value.
  564. */
  565. ack = sack = receiver->td_end;
  566. }
  567. if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
  568. /*
  569. * RST sent answering SYN.
  570. */
  571. seq = end = sender->td_end;
  572. pr_debug("tcp_in_window: ");
  573. nf_ct_dump_tuple(tuple);
  574. pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
  575. seq, ack, receiver_offset, sack, receiver_offset, win, end);
  576. pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
  577. "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
  578. sender->td_end, sender->td_maxend, sender->td_maxwin,
  579. sender->td_scale,
  580. receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
  581. receiver->td_scale);
  582. /* Is the ending sequence in the receive window (if available)? */
  583. in_recv_win = !receiver->td_maxwin ||
  584. after(end, sender->td_end - receiver->td_maxwin - 1);
  585. pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
  586. before(seq, sender->td_maxend + 1),
  587. (in_recv_win ? 1 : 0),
  588. before(sack, receiver->td_end + 1),
  589. after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
  590. if (before(seq, sender->td_maxend + 1) &&
  591. in_recv_win &&
  592. before(sack, receiver->td_end + 1) &&
  593. after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
  594. /*
  595. * Take into account window scaling (RFC 1323).
  596. */
  597. if (!tcph->syn)
  598. win <<= sender->td_scale;
  599. /*
  600. * Update sender data.
  601. */
  602. swin = win + (sack - ack);
  603. if (sender->td_maxwin < swin)
  604. sender->td_maxwin = swin;
  605. if (after(end, sender->td_end)) {
  606. sender->td_end = end;
  607. sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
  608. }
  609. if (tcph->ack) {
  610. if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
  611. sender->td_maxack = ack;
  612. sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
  613. } else if (after(ack, sender->td_maxack))
  614. sender->td_maxack = ack;
  615. }
  616. /*
  617. * Update receiver data.
  618. */
  619. if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
  620. receiver->td_maxwin += end - sender->td_maxend;
  621. if (after(sack + win, receiver->td_maxend - 1)) {
  622. receiver->td_maxend = sack + win;
  623. if (win == 0)
  624. receiver->td_maxend++;
  625. }
  626. if (ack == receiver->td_end)
  627. receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
  628. /*
  629. * Check retransmissions.
  630. */
  631. if (index == TCP_ACK_SET) {
  632. if (state->last_dir == dir
  633. && state->last_seq == seq
  634. && state->last_ack == ack
  635. && state->last_end == end
  636. && state->last_win == win)
  637. state->retrans++;
  638. else {
  639. state->last_dir = dir;
  640. state->last_seq = seq;
  641. state->last_ack = ack;
  642. state->last_end = end;
  643. state->last_win = win;
  644. state->retrans = 0;
  645. }
  646. }
  647. res = true;
  648. } else {
  649. res = false;
  650. if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
  651. tn->tcp_be_liberal)
  652. res = true;
  653. if (!res && LOG_INVALID(net, IPPROTO_TCP))
  654. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  655. "nf_ct_tcp: %s ",
  656. before(seq, sender->td_maxend + 1) ?
  657. in_recv_win ?
  658. before(sack, receiver->td_end + 1) ?
  659. after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
  660. : "ACK is under the lower bound (possible overly delayed ACK)"
  661. : "ACK is over the upper bound (ACKed data not seen yet)"
  662. : "SEQ is under the lower bound (already ACKed data retransmitted)"
  663. : "SEQ is over the upper bound (over the window of the receiver)");
  664. }
  665. pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
  666. "receiver end=%u maxend=%u maxwin=%u\n",
  667. res, sender->td_end, sender->td_maxend, sender->td_maxwin,
  668. receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
  669. return res;
  670. }
  671. /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
  672. static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
  673. TCPHDR_URG) + 1] =
  674. {
  675. [TCPHDR_SYN] = 1,
  676. [TCPHDR_SYN|TCPHDR_URG] = 1,
  677. [TCPHDR_SYN|TCPHDR_ACK] = 1,
  678. [TCPHDR_RST] = 1,
  679. [TCPHDR_RST|TCPHDR_ACK] = 1,
  680. [TCPHDR_FIN|TCPHDR_ACK] = 1,
  681. [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
  682. [TCPHDR_ACK] = 1,
  683. [TCPHDR_ACK|TCPHDR_URG] = 1,
  684. };
  685. /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
  686. static int tcp_error(struct net *net, struct nf_conn *tmpl,
  687. struct sk_buff *skb,
  688. unsigned int dataoff,
  689. enum ip_conntrack_info *ctinfo,
  690. u_int8_t pf,
  691. unsigned int hooknum)
  692. {
  693. const struct tcphdr *th;
  694. struct tcphdr _tcph;
  695. unsigned int tcplen = skb->len - dataoff;
  696. u_int8_t tcpflags;
  697. /* Smaller that minimal TCP header? */
  698. th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
  699. if (th == NULL) {
  700. if (LOG_INVALID(net, IPPROTO_TCP))
  701. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  702. "nf_ct_tcp: short packet ");
  703. return -NF_ACCEPT;
  704. }
  705. /* Not whole TCP header or malformed packet */
  706. if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
  707. if (LOG_INVALID(net, IPPROTO_TCP))
  708. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  709. "nf_ct_tcp: truncated/malformed packet ");
  710. return -NF_ACCEPT;
  711. }
  712. /* Checksum invalid? Ignore.
  713. * We skip checking packets on the outgoing path
  714. * because the checksum is assumed to be correct.
  715. */
  716. /* FIXME: Source route IP option packets --RR */
  717. if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
  718. nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
  719. if (LOG_INVALID(net, IPPROTO_TCP))
  720. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  721. "nf_ct_tcp: bad TCP checksum ");
  722. return -NF_ACCEPT;
  723. }
  724. /* Check TCP flags. */
  725. tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
  726. if (!tcp_valid_flags[tcpflags]) {
  727. if (LOG_INVALID(net, IPPROTO_TCP))
  728. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  729. "nf_ct_tcp: invalid TCP flag combination ");
  730. return -NF_ACCEPT;
  731. }
  732. return NF_ACCEPT;
  733. }
  734. static unsigned int *tcp_get_timeouts(struct net *net)
  735. {
  736. return tcp_pernet(net)->timeouts;
  737. }
  738. /* Returns verdict for packet, or -1 for invalid. */
  739. static int tcp_packet(struct nf_conn *ct,
  740. const struct sk_buff *skb,
  741. unsigned int dataoff,
  742. enum ip_conntrack_info ctinfo,
  743. u_int8_t pf,
  744. unsigned int hooknum,
  745. unsigned int *timeouts)
  746. {
  747. struct net *net = nf_ct_net(ct);
  748. struct nf_tcp_net *tn = tcp_pernet(net);
  749. struct nf_conntrack_tuple *tuple;
  750. enum tcp_conntrack new_state, old_state;
  751. enum ip_conntrack_dir dir;
  752. const struct tcphdr *th;
  753. struct tcphdr _tcph;
  754. unsigned long timeout;
  755. unsigned int index;
  756. th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
  757. BUG_ON(th == NULL);
  758. spin_lock_bh(&ct->lock);
  759. old_state = ct->proto.tcp.state;
  760. dir = CTINFO2DIR(ctinfo);
  761. index = get_conntrack_index(th);
  762. new_state = tcp_conntracks[dir][index][old_state];
  763. tuple = &ct->tuplehash[dir].tuple;
  764. switch (new_state) {
  765. case TCP_CONNTRACK_SYN_SENT:
  766. if (old_state < TCP_CONNTRACK_TIME_WAIT)
  767. break;
  768. /* RFC 1122: "When a connection is closed actively,
  769. * it MUST linger in TIME-WAIT state for a time 2xMSL
  770. * (Maximum Segment Lifetime). However, it MAY accept
  771. * a new SYN from the remote TCP to reopen the connection
  772. * directly from TIME-WAIT state, if..."
  773. * We ignore the conditions because we are in the
  774. * TIME-WAIT state anyway.
  775. *
  776. * Handle aborted connections: we and the server
  777. * think there is an existing connection but the client
  778. * aborts it and starts a new one.
  779. */
  780. if (((ct->proto.tcp.seen[dir].flags
  781. | ct->proto.tcp.seen[!dir].flags)
  782. & IP_CT_TCP_FLAG_CLOSE_INIT)
  783. || (ct->proto.tcp.last_dir == dir
  784. && ct->proto.tcp.last_index == TCP_RST_SET)) {
  785. /* Attempt to reopen a closed/aborted connection.
  786. * Delete this connection and look up again. */
  787. spin_unlock_bh(&ct->lock);
  788. /* Only repeat if we can actually remove the timer.
  789. * Destruction may already be in progress in process
  790. * context and we must give it a chance to terminate.
  791. */
  792. if (nf_ct_kill(ct))
  793. return -NF_REPEAT;
  794. return NF_DROP;
  795. }
  796. /* Fall through */
  797. case TCP_CONNTRACK_IGNORE:
  798. /* Ignored packets:
  799. *
  800. * Our connection entry may be out of sync, so ignore
  801. * packets which may signal the real connection between
  802. * the client and the server.
  803. *
  804. * a) SYN in ORIGINAL
  805. * b) SYN/ACK in REPLY
  806. * c) ACK in reply direction after initial SYN in original.
  807. *
  808. * If the ignored packet is invalid, the receiver will send
  809. * a RST we'll catch below.
  810. */
  811. if (index == TCP_SYNACK_SET
  812. && ct->proto.tcp.last_index == TCP_SYN_SET
  813. && ct->proto.tcp.last_dir != dir
  814. && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
  815. /* b) This SYN/ACK acknowledges a SYN that we earlier
  816. * ignored as invalid. This means that the client and
  817. * the server are both in sync, while the firewall is
  818. * not. We get in sync from the previously annotated
  819. * values.
  820. */
  821. old_state = TCP_CONNTRACK_SYN_SENT;
  822. new_state = TCP_CONNTRACK_SYN_RECV;
  823. ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
  824. ct->proto.tcp.last_end;
  825. ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
  826. ct->proto.tcp.last_end;
  827. ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
  828. ct->proto.tcp.last_win == 0 ?
  829. 1 : ct->proto.tcp.last_win;
  830. ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
  831. ct->proto.tcp.last_wscale;
  832. ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
  833. ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
  834. ct->proto.tcp.last_flags;
  835. memset(&ct->proto.tcp.seen[dir], 0,
  836. sizeof(struct ip_ct_tcp_state));
  837. break;
  838. }
  839. ct->proto.tcp.last_index = index;
  840. ct->proto.tcp.last_dir = dir;
  841. ct->proto.tcp.last_seq = ntohl(th->seq);
  842. ct->proto.tcp.last_end =
  843. segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
  844. ct->proto.tcp.last_win = ntohs(th->window);
  845. /* a) This is a SYN in ORIGINAL. The client and the server
  846. * may be in sync but we are not. In that case, we annotate
  847. * the TCP options and let the packet go through. If it is a
  848. * valid SYN packet, the server will reply with a SYN/ACK, and
  849. * then we'll get in sync. Otherwise, the server potentially
  850. * responds with a challenge ACK if implementing RFC5961.
  851. */
  852. if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
  853. struct ip_ct_tcp_state seen = {};
  854. ct->proto.tcp.last_flags =
  855. ct->proto.tcp.last_wscale = 0;
  856. tcp_options(skb, dataoff, th, &seen);
  857. if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
  858. ct->proto.tcp.last_flags |=
  859. IP_CT_TCP_FLAG_WINDOW_SCALE;
  860. ct->proto.tcp.last_wscale = seen.td_scale;
  861. }
  862. if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
  863. ct->proto.tcp.last_flags |=
  864. IP_CT_TCP_FLAG_SACK_PERM;
  865. }
  866. /* Mark the potential for RFC5961 challenge ACK,
  867. * this pose a special problem for LAST_ACK state
  868. * as ACK is intrepretated as ACKing last FIN.
  869. */
  870. if (old_state == TCP_CONNTRACK_LAST_ACK)
  871. ct->proto.tcp.last_flags |=
  872. IP_CT_EXP_CHALLENGE_ACK;
  873. }
  874. spin_unlock_bh(&ct->lock);
  875. if (LOG_INVALID(net, IPPROTO_TCP))
  876. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  877. "nf_ct_tcp: invalid packet ignored in "
  878. "state %s ", tcp_conntrack_names[old_state]);
  879. return NF_ACCEPT;
  880. case TCP_CONNTRACK_MAX:
  881. /* Special case for SYN proxy: when the SYN to the server or
  882. * the SYN/ACK from the server is lost, the client may transmit
  883. * a keep-alive packet while in SYN_SENT state. This needs to
  884. * be associated with the original conntrack entry in order to
  885. * generate a new SYN with the correct sequence number.
  886. */
  887. if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
  888. index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
  889. ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
  890. ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
  891. pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
  892. spin_unlock_bh(&ct->lock);
  893. return NF_ACCEPT;
  894. }
  895. /* Invalid packet */
  896. pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
  897. dir, get_conntrack_index(th), old_state);
  898. spin_unlock_bh(&ct->lock);
  899. if (LOG_INVALID(net, IPPROTO_TCP))
  900. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  901. "nf_ct_tcp: invalid state ");
  902. return -NF_ACCEPT;
  903. case TCP_CONNTRACK_TIME_WAIT:
  904. /* RFC5961 compliance cause stack to send "challenge-ACK"
  905. * e.g. in response to spurious SYNs. Conntrack MUST
  906. * not believe this ACK is acking last FIN.
  907. */
  908. if (old_state == TCP_CONNTRACK_LAST_ACK &&
  909. index == TCP_ACK_SET &&
  910. ct->proto.tcp.last_dir != dir &&
  911. ct->proto.tcp.last_index == TCP_SYN_SET &&
  912. (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
  913. /* Detected RFC5961 challenge ACK */
  914. ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
  915. spin_unlock_bh(&ct->lock);
  916. if (LOG_INVALID(net, IPPROTO_TCP))
  917. nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
  918. "nf_ct_tcp: challenge-ACK ignored ");
  919. return NF_ACCEPT; /* Don't change state */
  920. }
  921. break;
  922. case TCP_CONNTRACK_CLOSE:
  923. if (index == TCP_RST_SET
  924. && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
  925. && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
  926. /* Invalid RST */
  927. spin_unlock_bh(&ct->lock);
  928. if (LOG_INVALID(net, IPPROTO_TCP))
  929. nf_log_packet(net, pf, 0, skb, NULL, NULL,
  930. NULL, "nf_ct_tcp: invalid RST ");
  931. return -NF_ACCEPT;
  932. }
  933. if (index == TCP_RST_SET
  934. && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
  935. && ct->proto.tcp.last_index == TCP_SYN_SET)
  936. || (!test_bit(IPS_ASSURED_BIT, &ct->status)
  937. && ct->proto.tcp.last_index == TCP_ACK_SET))
  938. && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
  939. /* RST sent to invalid SYN or ACK we had let through
  940. * at a) and c) above:
  941. *
  942. * a) SYN was in window then
  943. * c) we hold a half-open connection.
  944. *
  945. * Delete our connection entry.
  946. * We skip window checking, because packet might ACK
  947. * segments we ignored. */
  948. goto in_window;
  949. }
  950. /* Just fall through */
  951. default:
  952. /* Keep compilers happy. */
  953. break;
  954. }
  955. if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
  956. skb, dataoff, th, pf)) {
  957. spin_unlock_bh(&ct->lock);
  958. return -NF_ACCEPT;
  959. }
  960. in_window:
  961. /* From now on we have got in-window packets */
  962. ct->proto.tcp.last_index = index;
  963. ct->proto.tcp.last_dir = dir;
  964. pr_debug("tcp_conntracks: ");
  965. nf_ct_dump_tuple(tuple);
  966. pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
  967. (th->syn ? 1 : 0), (th->ack ? 1 : 0),
  968. (th->fin ? 1 : 0), (th->rst ? 1 : 0),
  969. old_state, new_state);
  970. ct->proto.tcp.state = new_state;
  971. if (old_state != new_state
  972. && new_state == TCP_CONNTRACK_FIN_WAIT)
  973. ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
  974. if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
  975. timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
  976. timeout = timeouts[TCP_CONNTRACK_RETRANS];
  977. else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
  978. IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
  979. timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
  980. timeout = timeouts[TCP_CONNTRACK_UNACK];
  981. else
  982. timeout = timeouts[new_state];
  983. spin_unlock_bh(&ct->lock);
  984. if (new_state != old_state)
  985. nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
  986. if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
  987. /* If only reply is a RST, we can consider ourselves not to
  988. have an established connection: this is a fairly common
  989. problem case, so we can delete the conntrack
  990. immediately. --RR */
  991. if (th->rst) {
  992. nf_ct_kill_acct(ct, ctinfo, skb);
  993. return NF_ACCEPT;
  994. }
  995. /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
  996. * pickup with loose=1. Avoid large ESTABLISHED timeout.
  997. */
  998. if (new_state == TCP_CONNTRACK_ESTABLISHED &&
  999. timeout > timeouts[TCP_CONNTRACK_UNACK])
  1000. timeout = timeouts[TCP_CONNTRACK_UNACK];
  1001. } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
  1002. && (old_state == TCP_CONNTRACK_SYN_RECV
  1003. || old_state == TCP_CONNTRACK_ESTABLISHED)
  1004. && new_state == TCP_CONNTRACK_ESTABLISHED) {
  1005. /* Set ASSURED if we see see valid ack in ESTABLISHED
  1006. after SYN_RECV or a valid answer for a picked up
  1007. connection. */
  1008. set_bit(IPS_ASSURED_BIT, &ct->status);
  1009. nf_conntrack_event_cache(IPCT_ASSURED, ct);
  1010. }
  1011. nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
  1012. return NF_ACCEPT;
  1013. }
  1014. /* Called when a new connection for this protocol found. */
  1015. static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
  1016. unsigned int dataoff, unsigned int *timeouts)
  1017. {
  1018. enum tcp_conntrack new_state;
  1019. const struct tcphdr *th;
  1020. struct tcphdr _tcph;
  1021. struct net *net = nf_ct_net(ct);
  1022. struct nf_tcp_net *tn = tcp_pernet(net);
  1023. const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
  1024. const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
  1025. th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
  1026. BUG_ON(th == NULL);
  1027. /* Don't need lock here: this conntrack not in circulation yet */
  1028. new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
  1029. /* Invalid: delete conntrack */
  1030. if (new_state >= TCP_CONNTRACK_MAX) {
  1031. pr_debug("nf_ct_tcp: invalid new deleting.\n");
  1032. return false;
  1033. }
  1034. if (new_state == TCP_CONNTRACK_SYN_SENT) {
  1035. memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
  1036. /* SYN packet */
  1037. ct->proto.tcp.seen[0].td_end =
  1038. segment_seq_plus_len(ntohl(th->seq), skb->len,
  1039. dataoff, th);
  1040. ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
  1041. if (ct->proto.tcp.seen[0].td_maxwin == 0)
  1042. ct->proto.tcp.seen[0].td_maxwin = 1;
  1043. ct->proto.tcp.seen[0].td_maxend =
  1044. ct->proto.tcp.seen[0].td_end;
  1045. tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
  1046. } else if (tn->tcp_loose == 0) {
  1047. /* Don't try to pick up connections. */
  1048. return false;
  1049. } else {
  1050. memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
  1051. /*
  1052. * We are in the middle of a connection,
  1053. * its history is lost for us.
  1054. * Let's try to use the data from the packet.
  1055. */
  1056. ct->proto.tcp.seen[0].td_end =
  1057. segment_seq_plus_len(ntohl(th->seq), skb->len,
  1058. dataoff, th);
  1059. ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
  1060. if (ct->proto.tcp.seen[0].td_maxwin == 0)
  1061. ct->proto.tcp.seen[0].td_maxwin = 1;
  1062. ct->proto.tcp.seen[0].td_maxend =
  1063. ct->proto.tcp.seen[0].td_end +
  1064. ct->proto.tcp.seen[0].td_maxwin;
  1065. /* We assume SACK and liberal window checking to handle
  1066. * window scaling */
  1067. ct->proto.tcp.seen[0].flags =
  1068. ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
  1069. IP_CT_TCP_FLAG_BE_LIBERAL;
  1070. }
  1071. /* tcp_packet will set them */
  1072. ct->proto.tcp.last_index = TCP_NONE_SET;
  1073. pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
  1074. "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
  1075. sender->td_end, sender->td_maxend, sender->td_maxwin,
  1076. sender->td_scale,
  1077. receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
  1078. receiver->td_scale);
  1079. return true;
  1080. }
  1081. #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
  1082. #include <linux/netfilter/nfnetlink.h>
  1083. #include <linux/netfilter/nfnetlink_conntrack.h>
  1084. static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
  1085. struct nf_conn *ct)
  1086. {
  1087. struct nlattr *nest_parms;
  1088. struct nf_ct_tcp_flags tmp = {};
  1089. spin_lock_bh(&ct->lock);
  1090. nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
  1091. if (!nest_parms)
  1092. goto nla_put_failure;
  1093. if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
  1094. nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
  1095. ct->proto.tcp.seen[0].td_scale) ||
  1096. nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
  1097. ct->proto.tcp.seen[1].td_scale))
  1098. goto nla_put_failure;
  1099. tmp.flags = ct->proto.tcp.seen[0].flags;
  1100. if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
  1101. sizeof(struct nf_ct_tcp_flags), &tmp))
  1102. goto nla_put_failure;
  1103. tmp.flags = ct->proto.tcp.seen[1].flags;
  1104. if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
  1105. sizeof(struct nf_ct_tcp_flags), &tmp))
  1106. goto nla_put_failure;
  1107. spin_unlock_bh(&ct->lock);
  1108. nla_nest_end(skb, nest_parms);
  1109. return 0;
  1110. nla_put_failure:
  1111. spin_unlock_bh(&ct->lock);
  1112. return -1;
  1113. }
  1114. static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
  1115. [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
  1116. [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
  1117. [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
  1118. [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
  1119. [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
  1120. };
  1121. static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
  1122. {
  1123. struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
  1124. struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
  1125. int err;
  1126. /* updates could not contain anything about the private
  1127. * protocol info, in that case skip the parsing */
  1128. if (!pattr)
  1129. return 0;
  1130. err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
  1131. if (err < 0)
  1132. return err;
  1133. if (tb[CTA_PROTOINFO_TCP_STATE] &&
  1134. nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
  1135. return -EINVAL;
  1136. spin_lock_bh(&ct->lock);
  1137. if (tb[CTA_PROTOINFO_TCP_STATE])
  1138. ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
  1139. if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
  1140. struct nf_ct_tcp_flags *attr =
  1141. nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
  1142. ct->proto.tcp.seen[0].flags &= ~attr->mask;
  1143. ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
  1144. }
  1145. if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
  1146. struct nf_ct_tcp_flags *attr =
  1147. nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
  1148. ct->proto.tcp.seen[1].flags &= ~attr->mask;
  1149. ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
  1150. }
  1151. if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
  1152. tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
  1153. ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
  1154. ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
  1155. ct->proto.tcp.seen[0].td_scale =
  1156. nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
  1157. ct->proto.tcp.seen[1].td_scale =
  1158. nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
  1159. }
  1160. spin_unlock_bh(&ct->lock);
  1161. return 0;
  1162. }
  1163. static int tcp_nlattr_size(void)
  1164. {
  1165. return nla_total_size(0) /* CTA_PROTOINFO_TCP */
  1166. + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
  1167. }
  1168. static int tcp_nlattr_tuple_size(void)
  1169. {
  1170. return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
  1171. }
  1172. #endif
  1173. #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
  1174. #include <linux/netfilter/nfnetlink.h>
  1175. #include <linux/netfilter/nfnetlink_cttimeout.h>
  1176. static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
  1177. struct net *net, void *data)
  1178. {
  1179. unsigned int *timeouts = data;
  1180. struct nf_tcp_net *tn = tcp_pernet(net);
  1181. int i;
  1182. /* set default TCP timeouts. */
  1183. for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
  1184. timeouts[i] = tn->timeouts[i];
  1185. if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
  1186. timeouts[TCP_CONNTRACK_SYN_SENT] =
  1187. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
  1188. }
  1189. if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
  1190. timeouts[TCP_CONNTRACK_SYN_RECV] =
  1191. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
  1192. }
  1193. if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
  1194. timeouts[TCP_CONNTRACK_ESTABLISHED] =
  1195. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
  1196. }
  1197. if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
  1198. timeouts[TCP_CONNTRACK_FIN_WAIT] =
  1199. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
  1200. }
  1201. if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
  1202. timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
  1203. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
  1204. }
  1205. if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
  1206. timeouts[TCP_CONNTRACK_LAST_ACK] =
  1207. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
  1208. }
  1209. if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
  1210. timeouts[TCP_CONNTRACK_TIME_WAIT] =
  1211. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
  1212. }
  1213. if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
  1214. timeouts[TCP_CONNTRACK_CLOSE] =
  1215. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
  1216. }
  1217. if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
  1218. timeouts[TCP_CONNTRACK_SYN_SENT2] =
  1219. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
  1220. }
  1221. if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
  1222. timeouts[TCP_CONNTRACK_RETRANS] =
  1223. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
  1224. }
  1225. if (tb[CTA_TIMEOUT_TCP_UNACK]) {
  1226. timeouts[TCP_CONNTRACK_UNACK] =
  1227. ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
  1228. }
  1229. return 0;
  1230. }
  1231. static int
  1232. tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
  1233. {
  1234. const unsigned int *timeouts = data;
  1235. if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
  1236. htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
  1237. nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
  1238. htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
  1239. nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
  1240. htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
  1241. nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
  1242. htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
  1243. nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
  1244. htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
  1245. nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
  1246. htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
  1247. nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
  1248. htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
  1249. nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
  1250. htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
  1251. nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
  1252. htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
  1253. nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
  1254. htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
  1255. nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
  1256. htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
  1257. goto nla_put_failure;
  1258. return 0;
  1259. nla_put_failure:
  1260. return -ENOSPC;
  1261. }
  1262. static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
  1263. [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
  1264. [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
  1265. [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
  1266. [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
  1267. [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
  1268. [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
  1269. [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
  1270. [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
  1271. [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
  1272. [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
  1273. [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
  1274. };
  1275. #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
  1276. #ifdef CONFIG_SYSCTL
  1277. static struct ctl_table tcp_sysctl_table[] = {
  1278. {
  1279. .procname = "nf_conntrack_tcp_timeout_syn_sent",
  1280. .maxlen = sizeof(unsigned int),
  1281. .mode = 0644,
  1282. .proc_handler = proc_dointvec_jiffies,
  1283. },
  1284. {
  1285. .procname = "nf_conntrack_tcp_timeout_syn_recv",
  1286. .maxlen = sizeof(unsigned int),
  1287. .mode = 0644,
  1288. .proc_handler = proc_dointvec_jiffies,
  1289. },
  1290. {
  1291. .procname = "nf_conntrack_tcp_timeout_established",
  1292. .maxlen = sizeof(unsigned int),
  1293. .mode = 0644,
  1294. .proc_handler = proc_dointvec_jiffies,
  1295. },
  1296. {
  1297. .procname = "nf_conntrack_tcp_timeout_fin_wait",
  1298. .maxlen = sizeof(unsigned int),
  1299. .mode = 0644,
  1300. .proc_handler = proc_dointvec_jiffies,
  1301. },
  1302. {
  1303. .procname = "nf_conntrack_tcp_timeout_close_wait",
  1304. .maxlen = sizeof(unsigned int),
  1305. .mode = 0644,
  1306. .proc_handler = proc_dointvec_jiffies,
  1307. },
  1308. {
  1309. .procname = "nf_conntrack_tcp_timeout_last_ack",
  1310. .maxlen = sizeof(unsigned int),
  1311. .mode = 0644,
  1312. .proc_handler = proc_dointvec_jiffies,
  1313. },
  1314. {
  1315. .procname = "nf_conntrack_tcp_timeout_time_wait",
  1316. .maxlen = sizeof(unsigned int),
  1317. .mode = 0644,
  1318. .proc_handler = proc_dointvec_jiffies,
  1319. },
  1320. {
  1321. .procname = "nf_conntrack_tcp_timeout_close",
  1322. .maxlen = sizeof(unsigned int),
  1323. .mode = 0644,
  1324. .proc_handler = proc_dointvec_jiffies,
  1325. },
  1326. {
  1327. .procname = "nf_conntrack_tcp_timeout_max_retrans",
  1328. .maxlen = sizeof(unsigned int),
  1329. .mode = 0644,
  1330. .proc_handler = proc_dointvec_jiffies,
  1331. },
  1332. {
  1333. .procname = "nf_conntrack_tcp_timeout_unacknowledged",
  1334. .maxlen = sizeof(unsigned int),
  1335. .mode = 0644,
  1336. .proc_handler = proc_dointvec_jiffies,
  1337. },
  1338. {
  1339. .procname = "nf_conntrack_tcp_loose",
  1340. .maxlen = sizeof(unsigned int),
  1341. .mode = 0644,
  1342. .proc_handler = proc_dointvec,
  1343. },
  1344. {
  1345. .procname = "nf_conntrack_tcp_be_liberal",
  1346. .maxlen = sizeof(unsigned int),
  1347. .mode = 0644,
  1348. .proc_handler = proc_dointvec,
  1349. },
  1350. {
  1351. .procname = "nf_conntrack_tcp_max_retrans",
  1352. .maxlen = sizeof(unsigned int),
  1353. .mode = 0644,
  1354. .proc_handler = proc_dointvec,
  1355. },
  1356. { }
  1357. };
  1358. #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
  1359. static struct ctl_table tcp_compat_sysctl_table[] = {
  1360. {
  1361. .procname = "ip_conntrack_tcp_timeout_syn_sent",
  1362. .maxlen = sizeof(unsigned int),
  1363. .mode = 0644,
  1364. .proc_handler = proc_dointvec_jiffies,
  1365. },
  1366. {
  1367. .procname = "ip_conntrack_tcp_timeout_syn_sent2",
  1368. .maxlen = sizeof(unsigned int),
  1369. .mode = 0644,
  1370. .proc_handler = proc_dointvec_jiffies,
  1371. },
  1372. {
  1373. .procname = "ip_conntrack_tcp_timeout_syn_recv",
  1374. .maxlen = sizeof(unsigned int),
  1375. .mode = 0644,
  1376. .proc_handler = proc_dointvec_jiffies,
  1377. },
  1378. {
  1379. .procname = "ip_conntrack_tcp_timeout_established",
  1380. .maxlen = sizeof(unsigned int),
  1381. .mode = 0644,
  1382. .proc_handler = proc_dointvec_jiffies,
  1383. },
  1384. {
  1385. .procname = "ip_conntrack_tcp_timeout_fin_wait",
  1386. .maxlen = sizeof(unsigned int),
  1387. .mode = 0644,
  1388. .proc_handler = proc_dointvec_jiffies,
  1389. },
  1390. {
  1391. .procname = "ip_conntrack_tcp_timeout_close_wait",
  1392. .maxlen = sizeof(unsigned int),
  1393. .mode = 0644,
  1394. .proc_handler = proc_dointvec_jiffies,
  1395. },
  1396. {
  1397. .procname = "ip_conntrack_tcp_timeout_last_ack",
  1398. .maxlen = sizeof(unsigned int),
  1399. .mode = 0644,
  1400. .proc_handler = proc_dointvec_jiffies,
  1401. },
  1402. {
  1403. .procname = "ip_conntrack_tcp_timeout_time_wait",
  1404. .maxlen = sizeof(unsigned int),
  1405. .mode = 0644,
  1406. .proc_handler = proc_dointvec_jiffies,
  1407. },
  1408. {
  1409. .procname = "ip_conntrack_tcp_timeout_close",
  1410. .maxlen = sizeof(unsigned int),
  1411. .mode = 0644,
  1412. .proc_handler = proc_dointvec_jiffies,
  1413. },
  1414. {
  1415. .procname = "ip_conntrack_tcp_timeout_max_retrans",
  1416. .maxlen = sizeof(unsigned int),
  1417. .mode = 0644,
  1418. .proc_handler = proc_dointvec_jiffies,
  1419. },
  1420. {
  1421. .procname = "ip_conntrack_tcp_loose",
  1422. .maxlen = sizeof(unsigned int),
  1423. .mode = 0644,
  1424. .proc_handler = proc_dointvec,
  1425. },
  1426. {
  1427. .procname = "ip_conntrack_tcp_be_liberal",
  1428. .maxlen = sizeof(unsigned int),
  1429. .mode = 0644,
  1430. .proc_handler = proc_dointvec,
  1431. },
  1432. {
  1433. .procname = "ip_conntrack_tcp_max_retrans",
  1434. .maxlen = sizeof(unsigned int),
  1435. .mode = 0644,
  1436. .proc_handler = proc_dointvec,
  1437. },
  1438. { }
  1439. };
  1440. #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
  1441. #endif /* CONFIG_SYSCTL */
  1442. static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
  1443. struct nf_tcp_net *tn)
  1444. {
  1445. #ifdef CONFIG_SYSCTL
  1446. if (pn->ctl_table)
  1447. return 0;
  1448. pn->ctl_table = kmemdup(tcp_sysctl_table,
  1449. sizeof(tcp_sysctl_table),
  1450. GFP_KERNEL);
  1451. if (!pn->ctl_table)
  1452. return -ENOMEM;
  1453. pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
  1454. pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
  1455. pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
  1456. pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
  1457. pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
  1458. pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
  1459. pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
  1460. pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
  1461. pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
  1462. pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
  1463. pn->ctl_table[10].data = &tn->tcp_loose;
  1464. pn->ctl_table[11].data = &tn->tcp_be_liberal;
  1465. pn->ctl_table[12].data = &tn->tcp_max_retrans;
  1466. #endif
  1467. return 0;
  1468. }
  1469. static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
  1470. struct nf_tcp_net *tn)
  1471. {
  1472. #ifdef CONFIG_SYSCTL
  1473. #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
  1474. pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
  1475. sizeof(tcp_compat_sysctl_table),
  1476. GFP_KERNEL);
  1477. if (!pn->ctl_compat_table)
  1478. return -ENOMEM;
  1479. pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
  1480. pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
  1481. pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
  1482. pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
  1483. pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
  1484. pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
  1485. pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
  1486. pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
  1487. pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
  1488. pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
  1489. pn->ctl_compat_table[10].data = &tn->tcp_loose;
  1490. pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
  1491. pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
  1492. #endif
  1493. #endif
  1494. return 0;
  1495. }
  1496. static int tcp_init_net(struct net *net, u_int16_t proto)
  1497. {
  1498. int ret;
  1499. struct nf_tcp_net *tn = tcp_pernet(net);
  1500. struct nf_proto_net *pn = &tn->pn;
  1501. if (!pn->users) {
  1502. int i;
  1503. for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
  1504. tn->timeouts[i] = tcp_timeouts[i];
  1505. tn->tcp_loose = nf_ct_tcp_loose;
  1506. tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
  1507. tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
  1508. }
  1509. if (proto == AF_INET) {
  1510. ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
  1511. if (ret < 0)
  1512. return ret;
  1513. ret = tcp_kmemdup_sysctl_table(pn, tn);
  1514. if (ret < 0)
  1515. nf_ct_kfree_compat_sysctl_table(pn);
  1516. } else
  1517. ret = tcp_kmemdup_sysctl_table(pn, tn);
  1518. return ret;
  1519. }
  1520. static struct nf_proto_net *tcp_get_net_proto(struct net *net)
  1521. {
  1522. return &net->ct.nf_ct_proto.tcp.pn;
  1523. }
  1524. struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
  1525. {
  1526. .l3proto = PF_INET,
  1527. .l4proto = IPPROTO_TCP,
  1528. .name = "tcp",
  1529. .pkt_to_tuple = tcp_pkt_to_tuple,
  1530. .invert_tuple = tcp_invert_tuple,
  1531. .print_tuple = tcp_print_tuple,
  1532. .print_conntrack = tcp_print_conntrack,
  1533. .packet = tcp_packet,
  1534. .get_timeouts = tcp_get_timeouts,
  1535. .new = tcp_new,
  1536. .error = tcp_error,
  1537. #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
  1538. .to_nlattr = tcp_to_nlattr,
  1539. .nlattr_size = tcp_nlattr_size,
  1540. .from_nlattr = nlattr_to_tcp,
  1541. .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
  1542. .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
  1543. .nlattr_tuple_size = tcp_nlattr_tuple_size,
  1544. .nla_policy = nf_ct_port_nla_policy,
  1545. #endif
  1546. #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
  1547. .ctnl_timeout = {
  1548. .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
  1549. .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
  1550. .nlattr_max = CTA_TIMEOUT_TCP_MAX,
  1551. .obj_size = sizeof(unsigned int) *
  1552. TCP_CONNTRACK_TIMEOUT_MAX,
  1553. .nla_policy = tcp_timeout_nla_policy,
  1554. },
  1555. #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
  1556. .init_net = tcp_init_net,
  1557. .get_net_proto = tcp_get_net_proto,
  1558. };
  1559. EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
  1560. struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
  1561. {
  1562. .l3proto = PF_INET6,
  1563. .l4proto = IPPROTO_TCP,
  1564. .name = "tcp",
  1565. .pkt_to_tuple = tcp_pkt_to_tuple,
  1566. .invert_tuple = tcp_invert_tuple,
  1567. .print_tuple = tcp_print_tuple,
  1568. .print_conntrack = tcp_print_conntrack,
  1569. .packet = tcp_packet,
  1570. .get_timeouts = tcp_get_timeouts,
  1571. .new = tcp_new,
  1572. .error = tcp_error,
  1573. #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
  1574. .to_nlattr = tcp_to_nlattr,
  1575. .nlattr_size = tcp_nlattr_size,
  1576. .from_nlattr = nlattr_to_tcp,
  1577. .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
  1578. .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
  1579. .nlattr_tuple_size = tcp_nlattr_tuple_size,
  1580. .nla_policy = nf_ct_port_nla_policy,
  1581. #endif
  1582. #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
  1583. .ctnl_timeout = {
  1584. .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
  1585. .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
  1586. .nlattr_max = CTA_TIMEOUT_TCP_MAX,
  1587. .obj_size = sizeof(unsigned int) *
  1588. TCP_CONNTRACK_TIMEOUT_MAX,
  1589. .nla_policy = tcp_timeout_nla_policy,
  1590. },
  1591. #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
  1592. .init_net = tcp_init_net,
  1593. .get_net_proto = tcp_get_net_proto,
  1594. };
  1595. EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);