inet_lro.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*
  2. * linux/net/ipv4/inet_lro.c
  3. *
  4. * Large Receive Offload (ipv4 / tcp)
  5. *
  6. * (C) Copyright IBM Corp. 2007
  7. *
  8. * Authors:
  9. * Jan-Bernd Themann <themann@de.ibm.com>
  10. * Christoph Raisch <raisch@de.ibm.com>
  11. *
  12. *
  13. * This program is free software; you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation; either version 2, or (at your option)
  16. * any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program; if not, write to the Free Software
  25. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  26. */
  27. #include <linux/module.h>
  28. #include <linux/if_vlan.h>
  29. #include <linux/inet_lro.h>
  30. #include <net/checksum.h>
  31. MODULE_LICENSE("GPL");
  32. MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
  33. MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
  34. #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
  35. #define IP_HDR_LEN(iph) (iph->ihl << 2)
  36. #define TCP_PAYLOAD_LENGTH(iph, tcph) \
  37. (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
  38. #define IPH_LEN_WO_OPTIONS 5
  39. #define TCPH_LEN_WO_OPTIONS 5
  40. #define TCPH_LEN_W_TIMESTAMP 8
  41. #define LRO_MAX_PG_HLEN 64
  42. #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
  43. /*
  44. * Basic tcp checks whether packet is suitable for LRO
  45. */
  46. static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
  47. int len, const struct net_lro_desc *lro_desc)
  48. {
  49. /* check ip header: don't aggregate padded frames */
  50. if (ntohs(iph->tot_len) != len)
  51. return -1;
  52. if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
  53. return -1;
  54. if (iph->ihl != IPH_LEN_WO_OPTIONS)
  55. return -1;
  56. if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
  57. tcph->rst || tcph->syn || tcph->fin)
  58. return -1;
  59. if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
  60. return -1;
  61. if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
  62. tcph->doff != TCPH_LEN_W_TIMESTAMP)
  63. return -1;
  64. /* check tcp options (only timestamp allowed) */
  65. if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
  66. __be32 *topt = (__be32 *)(tcph + 1);
  67. if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
  68. | (TCPOPT_TIMESTAMP << 8)
  69. | TCPOLEN_TIMESTAMP))
  70. return -1;
  71. /* timestamp should be in right order */
  72. topt++;
  73. if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
  74. ntohl(*topt)))
  75. return -1;
  76. /* timestamp reply should not be zero */
  77. topt++;
  78. if (*topt == 0)
  79. return -1;
  80. }
  81. return 0;
  82. }
  83. static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
  84. {
  85. struct iphdr *iph = lro_desc->iph;
  86. struct tcphdr *tcph = lro_desc->tcph;
  87. __be32 *p;
  88. __wsum tcp_hdr_csum;
  89. tcph->ack_seq = lro_desc->tcp_ack;
  90. tcph->window = lro_desc->tcp_window;
  91. if (lro_desc->tcp_saw_tstamp) {
  92. p = (__be32 *)(tcph + 1);
  93. *(p+2) = lro_desc->tcp_rcv_tsecr;
  94. }
  95. csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
  96. iph->tot_len = htons(lro_desc->ip_tot_len);
  97. tcph->check = 0;
  98. tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
  99. lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
  100. tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
  101. lro_desc->ip_tot_len -
  102. IP_HDR_LEN(iph), IPPROTO_TCP,
  103. lro_desc->data_csum);
  104. }
  105. static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
  106. {
  107. __wsum tcp_csum;
  108. __wsum tcp_hdr_csum;
  109. __wsum tcp_ps_hdr_csum;
  110. tcp_csum = ~csum_unfold(tcph->check);
  111. tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
  112. tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
  113. len + TCP_HDR_LEN(tcph),
  114. IPPROTO_TCP, 0);
  115. return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
  116. tcp_ps_hdr_csum);
  117. }
  118. static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
  119. struct iphdr *iph, struct tcphdr *tcph)
  120. {
  121. int nr_frags;
  122. __be32 *ptr;
  123. u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
  124. nr_frags = skb_shinfo(skb)->nr_frags;
  125. lro_desc->parent = skb;
  126. lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
  127. lro_desc->iph = iph;
  128. lro_desc->tcph = tcph;
  129. lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
  130. lro_desc->tcp_ack = tcph->ack_seq;
  131. lro_desc->tcp_window = tcph->window;
  132. lro_desc->pkt_aggr_cnt = 1;
  133. lro_desc->ip_tot_len = ntohs(iph->tot_len);
  134. if (tcph->doff == 8) {
  135. ptr = (__be32 *)(tcph+1);
  136. lro_desc->tcp_saw_tstamp = 1;
  137. lro_desc->tcp_rcv_tsval = *(ptr+1);
  138. lro_desc->tcp_rcv_tsecr = *(ptr+2);
  139. }
  140. lro_desc->mss = tcp_data_len;
  141. lro_desc->active = 1;
  142. lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
  143. tcp_data_len);
  144. }
  145. static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
  146. {
  147. memset(lro_desc, 0, sizeof(struct net_lro_desc));
  148. }
  149. static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
  150. struct tcphdr *tcph, int tcp_data_len)
  151. {
  152. struct sk_buff *parent = lro_desc->parent;
  153. __be32 *topt;
  154. lro_desc->pkt_aggr_cnt++;
  155. lro_desc->ip_tot_len += tcp_data_len;
  156. lro_desc->tcp_next_seq += tcp_data_len;
  157. lro_desc->tcp_window = tcph->window;
  158. lro_desc->tcp_ack = tcph->ack_seq;
  159. /* don't update tcp_rcv_tsval, would not work with PAWS */
  160. if (lro_desc->tcp_saw_tstamp) {
  161. topt = (__be32 *) (tcph + 1);
  162. lro_desc->tcp_rcv_tsecr = *(topt + 2);
  163. }
  164. lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
  165. lro_tcp_data_csum(iph, tcph,
  166. tcp_data_len),
  167. parent->len);
  168. parent->len += tcp_data_len;
  169. parent->data_len += tcp_data_len;
  170. if (tcp_data_len > lro_desc->mss)
  171. lro_desc->mss = tcp_data_len;
  172. }
  173. static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
  174. struct iphdr *iph, struct tcphdr *tcph)
  175. {
  176. struct sk_buff *parent = lro_desc->parent;
  177. int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
  178. lro_add_common(lro_desc, iph, tcph, tcp_data_len);
  179. skb_pull(skb, (skb->len - tcp_data_len));
  180. parent->truesize += skb->truesize;
  181. if (lro_desc->last_skb)
  182. lro_desc->last_skb->next = skb;
  183. else
  184. skb_shinfo(parent)->frag_list = skb;
  185. lro_desc->last_skb = skb;
  186. }
  187. static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
  188. struct iphdr *iph,
  189. struct tcphdr *tcph)
  190. {
  191. if ((lro_desc->iph->saddr != iph->saddr) ||
  192. (lro_desc->iph->daddr != iph->daddr) ||
  193. (lro_desc->tcph->source != tcph->source) ||
  194. (lro_desc->tcph->dest != tcph->dest))
  195. return -1;
  196. return 0;
  197. }
  198. static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
  199. struct net_lro_desc *lro_arr,
  200. struct iphdr *iph,
  201. struct tcphdr *tcph)
  202. {
  203. struct net_lro_desc *lro_desc = NULL;
  204. struct net_lro_desc *tmp;
  205. int max_desc = lro_mgr->max_desc;
  206. int i;
  207. for (i = 0; i < max_desc; i++) {
  208. tmp = &lro_arr[i];
  209. if (tmp->active)
  210. if (!lro_check_tcp_conn(tmp, iph, tcph)) {
  211. lro_desc = tmp;
  212. goto out;
  213. }
  214. }
  215. for (i = 0; i < max_desc; i++) {
  216. if (!lro_arr[i].active) {
  217. lro_desc = &lro_arr[i];
  218. goto out;
  219. }
  220. }
  221. LRO_INC_STATS(lro_mgr, no_desc);
  222. out:
  223. return lro_desc;
  224. }
  225. static void lro_flush(struct net_lro_mgr *lro_mgr,
  226. struct net_lro_desc *lro_desc)
  227. {
  228. if (lro_desc->pkt_aggr_cnt > 1)
  229. lro_update_tcp_ip_header(lro_desc);
  230. skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
  231. if (lro_mgr->features & LRO_F_NAPI)
  232. netif_receive_skb(lro_desc->parent);
  233. else
  234. netif_rx(lro_desc->parent);
  235. LRO_INC_STATS(lro_mgr, flushed);
  236. lro_clear_desc(lro_desc);
  237. }
  238. static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
  239. void *priv)
  240. {
  241. struct net_lro_desc *lro_desc;
  242. struct iphdr *iph;
  243. struct tcphdr *tcph;
  244. u64 flags;
  245. int vlan_hdr_len = 0;
  246. if (!lro_mgr->get_skb_header ||
  247. lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
  248. &flags, priv))
  249. goto out;
  250. if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
  251. goto out;
  252. lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
  253. if (!lro_desc)
  254. goto out;
  255. if ((skb->protocol == htons(ETH_P_8021Q)) &&
  256. !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
  257. vlan_hdr_len = VLAN_HLEN;
  258. if (!lro_desc->active) { /* start new lro session */
  259. if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
  260. goto out;
  261. skb->ip_summed = lro_mgr->ip_summed_aggr;
  262. lro_init_desc(lro_desc, skb, iph, tcph);
  263. LRO_INC_STATS(lro_mgr, aggregated);
  264. return 0;
  265. }
  266. if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
  267. goto out2;
  268. if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
  269. goto out2;
  270. lro_add_packet(lro_desc, skb, iph, tcph);
  271. LRO_INC_STATS(lro_mgr, aggregated);
  272. if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
  273. lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
  274. lro_flush(lro_mgr, lro_desc);
  275. return 0;
  276. out2: /* send aggregated SKBs to stack */
  277. lro_flush(lro_mgr, lro_desc);
  278. out:
  279. return 1;
  280. }
  281. void lro_receive_skb(struct net_lro_mgr *lro_mgr,
  282. struct sk_buff *skb,
  283. void *priv)
  284. {
  285. if (__lro_proc_skb(lro_mgr, skb, priv)) {
  286. if (lro_mgr->features & LRO_F_NAPI)
  287. netif_receive_skb(skb);
  288. else
  289. netif_rx(skb);
  290. }
  291. }
  292. EXPORT_SYMBOL(lro_receive_skb);
  293. void lro_flush_all(struct net_lro_mgr *lro_mgr)
  294. {
  295. int i;
  296. struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
  297. for (i = 0; i < lro_mgr->max_desc; i++) {
  298. if (lro_desc[i].active)
  299. lro_flush(lro_mgr, &lro_desc[i]);
  300. }
  301. }
  302. EXPORT_SYMBOL(lro_flush_all);