公开学习文档

公开学习文档


IP收包

<h2>概述</h2> <p>调用树:</p> <pre><code class="language-c">ip_rcv // 走 NF_INET_PRE_ROUTING 钩子 ip_rcv_finish // 没有路由,则查路由;查到路由后设置 input = ip_local_deliver dst_input // 即 dst-&amp;gt;input(skb) ip_local_deliver // 分包处理;走 NF_INET_LOCAL_IN 钩子 ip_local_deliver_finish // 根据控制层不同协议调用对应接口,对于 TCP,则是 tcp_v4_rcv</code></pre> <p>ip 注册在 ptype_base 中对应的处理函数是 <code>ip_rcv</code>。</p> <p>1、检测到为 IP 类型时,进入 <code>ip_rcv</code> 2、先检测和设置一些字段 3、走 PRE_ROUTING 钩子 4、查找路由,根据路由结果设置处理函数,并调用。对于到本机的 ip 包,则是 <code>ip_local_deliver</code> 5、走 LOCAL_IN 钩子,并检测包协议类型,如 TCP 或 UDP,再调用对应的处理函数。对于 tcp 包,则是 <code>tcp_v4_rcv</code></p> <h2>分析</h2> <pre><code class="language-c">// file: net/ipv4/ip_input.c /* * Main IP Receive routine. */ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct iphdr *iph; u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ if (skb-&amp;gt;pkt_type == PACKET_OTHERHOST) goto drop; IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb-&amp;gt;len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { // 注意这里会保证 skb 是独有的,因为如果不是独有,就会 clone 一份出来 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; iph = ip_hdr(skb); /* * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length */ if (iph-&amp;gt;ihl &amp;lt; 5 || iph-&amp;gt;version != 4) goto inhdr_error; if (!pskb_may_pull(skb, iph-&amp;gt;ihl*4)) goto inhdr_error; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph-&amp;gt;ihl))) goto csum_error; len = ntohs(iph-&amp;gt;tot_len); if (skb-&amp;gt;len &amp;lt; len) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len &amp;lt; (iph-&amp;gt;ihl*4)) goto inhdr_error; /* Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb-&amp;gt;len holds ntohs(iph-&amp;gt;tot_len). */ if (pskb_trim_rcsum(skb, len)) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } skb-&amp;gt;transport_header = skb-&amp;gt;network_header + iph-&amp;gt;ihl*4; /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); /* Must drop socket now because of tproxy. */ skb_orphan(skb); return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); // 意思是:先依次执行对应的钩子函数,如果都返回 1,则最后执行 ip_rcv_finish 函数 csum_error: IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS); inhdr_error: IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: return NET_RX_DROP; } </code></pre> <p>&gt; skb_pull 解释:<a href="https://blog.csdn.net/qq_24521983/article/details/71423264">https://blog.csdn.net/qq_24521983/article/details/71423264</a> &gt; 其它:<a href="https://blog.csdn.net/rikeyone/article/details/108610841">https://blog.csdn.net/rikeyone/article/details/108610841</a></p> <p>netfilter 框架:</p> <pre><code class="language-c">// file: include/linux/netfilter.h static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sk_buff *)) { return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: &amp;gt; I don't want nf_hook to return anything because people might forget &amp;gt; about async and trust the return value to mean &amp;quot;packet was ok&amp;quot;. AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sk_buff *), int thresh) { int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); if (ret == 1) ret = okfn(skb); return ret; } </code></pre> <p>继续 IP 收包:</p> <pre><code class="language-c">// file: net/ipv4/ip_input.c static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; if (sysctl_ip_early_demux &amp;amp;&amp;amp; !skb_dst(skb) &amp;amp;&amp;amp; skb-&amp;gt;sk == NULL) { const struct net_protocol *ipprot; int protocol = iph-&amp;gt;protocol; ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot &amp;amp;&amp;amp; ipprot-&amp;gt;early_demux) { ipprot-&amp;gt;early_demux(skb); /* must reload iph, skb-&amp;gt;head might have changed */ iph = ip_hdr(skb); } } /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { // 没有路由,则查路由。查出路径,由下一步的处理函数,存放在 dst-&amp;gt;input 中 int err = ip_route_input_noref(skb, iph-&amp;gt;daddr, iph-&amp;gt;saddr, iph-&amp;gt;tos, skb-&amp;gt;dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb-&amp;gt;dev), LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)-&amp;gt;tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)-&amp;gt;tclassid; st[idx&amp;amp;0xFF].o_packets++; st[idx&amp;amp;0xFF].o_bytes += skb-&amp;gt;len; st[(idx&amp;gt;&amp;gt;16)&amp;amp;0xFF].i_packets++; st[(idx&amp;gt;&amp;gt;16)&amp;amp;0xFF].i_bytes += skb-&amp;gt;len; } #endif if (iph-&amp;gt;ihl &amp;gt; 5 &amp;amp;&amp;amp; ip_rcv_options(skb)) goto drop; rt = skb_rtable(skb); if (rt-&amp;gt;rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(dev_net(rt-&amp;gt;dst.dev), IPSTATS_MIB_INMCAST, skb-&amp;gt;len); } else if (rt-&amp;gt;rt_type == RTN_BROADCAST) IP_UPD_PO_STATS_BH(dev_net(rt-&amp;gt;dst.dev), IPSTATS_MIB_INBCAST, skb-&amp;gt;len); return dst_input(skb); // 即 dst-&amp;gt;input(skb) drop: kfree_skb(skb); return NET_RX_DROP; }</code></pre> <p>路由匹配这里暂不分析,有时间可以搜索资料研究。</p> <p>&gt; 关于路由匹配那一段,可参考:<a href="https://blog.csdn.net/Megahertz66/article/details/110239947">https://blog.csdn.net/Megahertz66/article/details/110239947</a></p> <p>这里经过查路由后,得到的 input 函数是 <code>ip_local_deliver</code>,继续分析:</p> <pre><code class="language-c">// file: net/ipv4/ip_input.c /* * Deliver IP Packets to the higher protocol layers. */ int ip_local_deliver(struct sk_buff *skb) { /* * Reassemble IP fragments. */ if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb-&amp;gt;dev, NULL, ip_local_deliver_finish); // 走钩子,最后调用 ip_local_deliver_finish } static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb-&amp;gt;dev); __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { int protocol = ip_hdr(skb)-&amp;gt;protocol; const struct net_protocol *ipprot; int raw; resubmit: raw = raw_local_deliver(skb, protocol); ipprot = rcu_dereference(inet_protos[protocol]); // TCP、UDP ? if (ipprot != NULL) { int ret; if (!ipprot-&amp;gt;no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); goto out; } nf_reset(skb); } ret = ipprot-&amp;gt;handler(skb); // 对于 TCP,则是 tcp_v4_rcv if (ret &amp;lt; 0) { protocol = -ret; goto resubmit; } IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } kfree_skb(skb); } else { IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } } out: rcu_read_unlock(); return 0; }</code></pre>

页面列表

ITEM_HTML