领居子系统
<h2>概述</h2>
<p>调用树:</p>
<pre><code class="language-c">dst_neigh_output
neigh_resolve_output // 可能发送 ARP 请求;设置 MAC 头
dev_queue_xmit // 网络子系统层</code></pre>
<h2>分析</h2>
<p>在向外发送 IP 数据包时,查找下一跳的邻居项,函数如下:</p>
<pre><code class="language-c">// file: include/net/arp.c
struct neigh_table arp_tbl = {
.family = AF_INET,
.key_len = 4,
.hash = arp_hash,
.constructor = arp_constructor,
.proxy_redo = parp_redo,
.id = &quot;arp_cache&quot;,
.parms = {
.tbl = &amp;arp_tbl,
.base_reachable_time = 30 * HZ,
.retrans_time = 1 * HZ,
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
.queue_len_bytes = 64*1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
.proxy_delay = (8 * HZ) / 10,
.proxy_qlen = 64,
.locktime = 1 * HZ,
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
EXPORT_SYMBOL(arp_tbl);
static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht); // arp_tbl 是全局变量
struct neighbour *n;
u32 hash_val;
hash_val = arp_hashfn(key, dev, nht-&gt;hash_rnd[0]) &gt;&gt; (32 - nht-&gt;hash_shift);
for (n = rcu_dereference_bh(nht-&gt;hash_buckets[hash_val]);
n != NULL;
n = rcu_dereference_bh(n-&gt;next)) {
if (n-&gt;dev == dev &amp;&amp; *(u32 *)n-&gt;primary_key == key) // 在 hash 槽中查找,判断项就是 2 个参数,其中 dev 为根据路由表查找的出接口
return n;
}
return NULL;
}
</code></pre>
<p>如果找不到,则创建一个邻居项:</p>
<pre><code class="language-c">// file: net/core/neighbour.c
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
int key_len = tbl-&gt;key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); // 申请邻居表项
struct neigh_hash_table *nht;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
}
memcpy(n-&gt;primary_key, pkey, key_len); // 赋值 2 个参数
n-&gt;dev = dev;
dev_hold(dev);
/* Protocol specific setup. */
if (tbl-&gt;constructor &amp;&amp; (error = tbl-&gt;constructor(n)) &lt; 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
if (dev-&gt;netdev_ops-&gt;ndo_neigh_construct) {
error = dev-&gt;netdev_ops-&gt;ndo_neigh_construct(n);
if (error &lt; 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
}
/* Device specific setup. */
if (n-&gt;parms-&gt;neigh_setup &amp;&amp;
(error = n-&gt;parms-&gt;neigh_setup(n)) &lt; 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
n-&gt;confirmed = jiffies - (n-&gt;parms-&gt;base_reachable_time &lt;&lt; 1);
write_lock_bh(&amp;tbl-&gt;lock); // tbl 即是全局变量 arp_tbl,所以这个锁的粒度好大
nht = rcu_dereference_protected(tbl-&gt;nht,
lockdep_is_held(&amp;tbl-&gt;lock));
if (atomic_read(&amp;tbl-&gt;entries) &gt; (1 &lt;&lt; nht-&gt;hash_shift))
nht = neigh_hash_grow(tbl, nht-&gt;hash_shift + 1);
hash_val = tbl-&gt;hash(pkey, dev, nht-&gt;hash_rnd) &gt;&gt; (32 - nht-&gt;hash_shift); // hash 值
if (n-&gt;parms-&gt;dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
for (n1 = rcu_dereference_protected(nht-&gt;hash_buckets[hash_val],
lockdep_is_held(&amp;tbl-&gt;lock));
n1 != NULL;
n1 = rcu_dereference_protected(n1-&gt;next,
lockdep_is_held(&amp;tbl-&gt;lock))) {
if (dev == n1-&gt;dev &amp;&amp; !memcmp(n1-&gt;primary_key, pkey, key_len)) { // 找到了?
if (want_ref)
neigh_hold(n1);
rc = n1;
goto out_tbl_unlock;
}
}
n-&gt;dead = 0;
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n-&gt;next,
rcu_dereference_protected(nht-&gt;hash_buckets[hash_val],
lockdep_is_held(&amp;tbl-&gt;lock)));
rcu_assign_pointer(nht-&gt;hash_buckets[hash_val], n); // 新申请的项放到队头
write_unlock_bh(&amp;tbl-&gt;lock);
neigh_dbg(2, &quot;neigh %p is created\n&quot;, n);
rc = n;
out:
return rc;
out_tbl_unlock:
write_unlock_bh(&amp;tbl-&gt;lock);
out_neigh_release:
neigh_release(n);
goto out;
}
EXPORT_SYMBOL(__neigh_create);
// file: include/net/dst.h
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
const struct hh_cache *hh;
if (dst-&gt;pending_confirm) {
unsigned long now = jiffies;
dst-&gt;pending_confirm = 0;
/* avoid dirtying neighbour */
if (n-&gt;confirmed != now)
n-&gt;confirmed = now;
}
hh = &amp;n-&gt;hh;
if ((n-&gt;nud_state &amp; NUD_CONNECTED) &amp;&amp; hh-&gt;hh_len)
return neigh_hh_output(hh, skb); // 已经有 MAC
else
return n-&gt;output(n, skb); // 没有 MAC
}</code></pre>
<p>n->output 实际指向的是 <code>neigh_resolve_output</code>(待分析流程),继续分析:</p>
<pre><code class="language-c">// file: net/core/neighbour.c
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
int rc = 0;
if (!dst)
goto discard;
if (!neigh_event_send(neigh, skb)) { // 这里可能触发发送 arp 请求?
int err;
struct net_device *dev = neigh-&gt;dev;
unsigned int seq;
if (dev-&gt;header_ops-&gt;cache &amp;&amp; !neigh-&gt;hh.hh_len)
neigh_hh_init(neigh, dst);
do {
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&amp;neigh-&gt;ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb-&gt;protocol),
neigh-&gt;ha, NULL, skb-&gt;len); // 设置 MAC 头。neigh-&gt;ha 是 MAC 地址
} while (read_seqretry(&amp;neigh-&gt;ha_lock, seq));
if (err &gt;= 0)
rc = dev_queue_xmit(skb); // 通过网络设备子系统发送
else
goto out_kfree_skb;
}
out:
return rc;
discard:
neigh_dbg(1, &quot;%s: dst=%p neigh=%p\n&quot;, __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}
EXPORT_SYMBOL(neigh_resolve_output);
</code></pre>