公开学习文档

公开学习文档


领居子系统

<h2>概述</h2> <p>调用树:</p> <pre><code class="language-c">dst_neigh_output neigh_resolve_output // 可能发送 ARP 请求;设置 MAC 头 dev_queue_xmit // 网络子系统层</code></pre> <h2>分析</h2> <p>在向外发送 IP 数据包时,查找下一跳的邻居项,函数如下:</p> <pre><code class="language-c">// file: include/net/arp.c struct neigh_table arp_tbl = { .family = AF_INET, .key_len = 4, .hash = arp_hash, .constructor = arp_constructor, .proxy_redo = parp_redo, .id = &amp;quot;arp_cache&amp;quot;, .parms = { .tbl = &amp;amp;arp_tbl, .base_reachable_time = 30 * HZ, .retrans_time = 1 * HZ, .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, .delay_probe_time = 5 * HZ, .queue_len_bytes = 64*1024, .ucast_probes = 3, .mcast_probes = 3, .anycast_delay = 1 * HZ, .proxy_delay = (8 * HZ) / 10, .proxy_qlen = 64, .locktime = 1 * HZ, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, .gc_thresh2 = 512, .gc_thresh3 = 1024, }; EXPORT_SYMBOL(arp_tbl); static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht); // arp_tbl 是全局变量 struct neighbour *n; u32 hash_val; hash_val = arp_hashfn(key, dev, nht-&amp;gt;hash_rnd[0]) &amp;gt;&amp;gt; (32 - nht-&amp;gt;hash_shift); for (n = rcu_dereference_bh(nht-&amp;gt;hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n-&amp;gt;next)) { if (n-&amp;gt;dev == dev &amp;amp;&amp;amp; *(u32 *)n-&amp;gt;primary_key == key) // 在 hash 槽中查找,判断项就是 2 个参数,其中 dev 为根据路由表查找的出接口 return n; } return NULL; } </code></pre> <p>如果找不到,则创建一个邻居项:</p> <pre><code class="language-c">// file: net/core/neighbour.c struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { u32 hash_val; int key_len = tbl-&amp;gt;key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); // 申请邻居表项 struct neigh_hash_table *nht; if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; } memcpy(n-&amp;gt;primary_key, pkey, key_len); // 赋值 2 个参数 n-&amp;gt;dev = dev; dev_hold(dev); /* Protocol specific setup. */ if (tbl-&amp;gt;constructor &amp;amp;&amp;amp; (error = tbl-&amp;gt;constructor(n)) &amp;lt; 0) { rc = ERR_PTR(error); goto out_neigh_release; } if (dev-&amp;gt;netdev_ops-&amp;gt;ndo_neigh_construct) { error = dev-&amp;gt;netdev_ops-&amp;gt;ndo_neigh_construct(n); if (error &amp;lt; 0) { rc = ERR_PTR(error); goto out_neigh_release; } } /* Device specific setup. */ if (n-&amp;gt;parms-&amp;gt;neigh_setup &amp;amp;&amp;amp; (error = n-&amp;gt;parms-&amp;gt;neigh_setup(n)) &amp;lt; 0) { rc = ERR_PTR(error); goto out_neigh_release; } n-&amp;gt;confirmed = jiffies - (n-&amp;gt;parms-&amp;gt;base_reachable_time &amp;lt;&amp;lt; 1); write_lock_bh(&amp;amp;tbl-&amp;gt;lock); // tbl 即是全局变量 arp_tbl,所以这个锁的粒度好大 nht = rcu_dereference_protected(tbl-&amp;gt;nht, lockdep_is_held(&amp;amp;tbl-&amp;gt;lock)); if (atomic_read(&amp;amp;tbl-&amp;gt;entries) &amp;gt; (1 &amp;lt;&amp;lt; nht-&amp;gt;hash_shift)) nht = neigh_hash_grow(tbl, nht-&amp;gt;hash_shift + 1); hash_val = tbl-&amp;gt;hash(pkey, dev, nht-&amp;gt;hash_rnd) &amp;gt;&amp;gt; (32 - nht-&amp;gt;hash_shift); // hash 值 if (n-&amp;gt;parms-&amp;gt;dead) { rc = ERR_PTR(-EINVAL); goto out_tbl_unlock; } for (n1 = rcu_dereference_protected(nht-&amp;gt;hash_buckets[hash_val], lockdep_is_held(&amp;amp;tbl-&amp;gt;lock)); n1 != NULL; n1 = rcu_dereference_protected(n1-&amp;gt;next, lockdep_is_held(&amp;amp;tbl-&amp;gt;lock))) { if (dev == n1-&amp;gt;dev &amp;amp;&amp;amp; !memcmp(n1-&amp;gt;primary_key, pkey, key_len)) { // 找到了? if (want_ref) neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } n-&amp;gt;dead = 0; if (want_ref) neigh_hold(n); rcu_assign_pointer(n-&amp;gt;next, rcu_dereference_protected(nht-&amp;gt;hash_buckets[hash_val], lockdep_is_held(&amp;amp;tbl-&amp;gt;lock))); rcu_assign_pointer(nht-&amp;gt;hash_buckets[hash_val], n); // 新申请的项放到队头 write_unlock_bh(&amp;amp;tbl-&amp;gt;lock); neigh_dbg(2, &amp;quot;neigh %p is created\n&amp;quot;, n); rc = n; out: return rc; out_tbl_unlock: write_unlock_bh(&amp;amp;tbl-&amp;gt;lock); out_neigh_release: neigh_release(n); goto out; } EXPORT_SYMBOL(__neigh_create); // file: include/net/dst.h static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, struct sk_buff *skb) { const struct hh_cache *hh; if (dst-&amp;gt;pending_confirm) { unsigned long now = jiffies; dst-&amp;gt;pending_confirm = 0; /* avoid dirtying neighbour */ if (n-&amp;gt;confirmed != now) n-&amp;gt;confirmed = now; } hh = &amp;amp;n-&amp;gt;hh; if ((n-&amp;gt;nud_state &amp;amp; NUD_CONNECTED) &amp;amp;&amp;amp; hh-&amp;gt;hh_len) return neigh_hh_output(hh, skb); // 已经有 MAC else return n-&amp;gt;output(n, skb); // 没有 MAC }</code></pre> <p>n-&gt;output 实际指向的是 <code>neigh_resolve_output</code>(待分析流程),继续分析:</p> <pre><code class="language-c">// file: net/core/neighbour.c int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); int rc = 0; if (!dst) goto discard; if (!neigh_event_send(neigh, skb)) { // 这里可能触发发送 arp 请求? int err; struct net_device *dev = neigh-&amp;gt;dev; unsigned int seq; if (dev-&amp;gt;header_ops-&amp;gt;cache &amp;amp;&amp;amp; !neigh-&amp;gt;hh.hh_len) neigh_hh_init(neigh, dst); do { __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&amp;amp;neigh-&amp;gt;ha_lock); err = dev_hard_header(skb, dev, ntohs(skb-&amp;gt;protocol), neigh-&amp;gt;ha, NULL, skb-&amp;gt;len); // 设置 MAC 头。neigh-&amp;gt;ha 是 MAC 地址 } while (read_seqretry(&amp;amp;neigh-&amp;gt;ha_lock, seq)); if (err &amp;gt;= 0) rc = dev_queue_xmit(skb); // 通过网络设备子系统发送 else goto out_kfree_skb; } out: return rc; discard: neigh_dbg(1, &amp;quot;%s: dst=%p neigh=%p\n&amp;quot;, __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); goto out; } EXPORT_SYMBOL(neigh_resolve_output); </code></pre>

页面列表

ITEM_HTML