GSO机制
<h2>概述</h2>
<h2>分析</h2>
<p>TCP 判断 skb 是否可追加数据时,是通过 size_goal 判断的:</p>
<pre><code class="language-c">mss_now = tcp_send_mss(sk, &amp;size_goal, flags); // 关注 size_goal</code></pre>
<p>继续:</p>
<pre><code class="language-c">// file: net/ipv4/tcp.c
static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
{
int mss_now;
mss_now = tcp_current_mss(sk);
*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags &amp; MSG_OOB));
return mss_now;
}
static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
int large_allowed)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 xmit_size_goal, old_size_goal;
xmit_size_goal = mss_now;
if (large_allowed &amp;&amp; sk_can_gso(sk)) { // 支持 GSO
u32 gso_size, hlen;
/* Maybe we should/could use sk-&gt;sk_prot-&gt;max_header here ? */
hlen = inet_csk(sk)-&gt;icsk_af_ops-&gt;net_header_len +
inet_csk(sk)-&gt;icsk_ext_hdr_len +
tp-&gt;tcp_header_len;
/* Goal is to send at least one packet per ms,
* not one big TSO packet every 100 ms.
* This preserves ACK clocking and is consistent
* with tcp_tso_should_defer() heuristic.
*/
gso_size = sk-&gt;sk_pacing_rate / (2 * MSEC_PER_SEC);
gso_size = max_t(u32, gso_size,
sysctl_tcp_min_tso_segs * mss_now);
xmit_size_goal = min_t(u32, gso_size,
sk-&gt;sk_gso_max_size - 1 - hlen); // 比较
xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); // 窗口限制
/* We try hard to avoid divides here */
old_size_goal = tp-&gt;xmit_size_goal_segs * mss_now;
if (likely(old_size_goal &lt;= xmit_size_goal &amp;&amp;
old_size_goal + mss_now &gt; xmit_size_goal)) { // 新老值接近
xmit_size_goal = old_size_goal;
} else {
tp-&gt;xmit_size_goal_segs =
min_t(u16, xmit_size_goal / mss_now,
sk-&gt;sk_gso_max_segs);
xmit_size_goal = tp-&gt;xmit_size_goal_segs * mss_now;
}
}
return max(xmit_size_goal, mss_now);
}
</code></pre>
<h2>是否支持 GSO</h2>
<pre><code class="language-c">// file: include/net/sock.h
static inline bool sk_can_gso(const struct sock *sk)
{
return net_gso_ok(sk-&gt;sk_route_caps, sk-&gt;sk_gso_type); // 从代码来看,对于 TCPV4,sk-&gt;sk_gso_type = SKB_GSO_TCPV4
}
// file: include/linux/netdevice.h
static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{
netdev_features_t feature = gso_type &lt;&lt; NETIF_F_GSO_SHIFT;
/* check flags correspondence */
BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO &gt;&gt; NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO &gt;&gt; NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST &gt;&gt; NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN &gt;&gt; NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 &gt;&gt; NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO &gt;&gt; NETIF_F_GSO_SHIFT));
return (features &amp; feature) == feature;
}
</code></pre>
<p>在哪里设备上面的值呢?</p>
<p>在 connect 和 accpet 时,会设置:</p>
<pre><code class="language-c">int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
// ...
sk-&gt;sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &amp;rt-&gt;dst);
// ...
}
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
// ...
newsk-&gt;sk_gso_type = SKB_GSO_TCPV4;
inet_sk_rx_dst_set(newsk, skb);
// ...
sk_setup_caps(newsk, dst);
// ...
}
// file: net/core/sock.c
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
__sk_dst_set(sk, dst);
sk-&gt;sk_route_caps = dst-&gt;dev-&gt;features; // 核心来源
if (sk-&gt;sk_route_caps &amp; NETIF_F_GSO)
sk-&gt;sk_route_caps |= NETIF_F_GSO_SOFTWARE; // 这个标记是干嘛的?
sk-&gt;sk_route_caps &amp;= ~sk-&gt;sk_route_nocaps; // 去掉 sk_route_nocaps 能力
if (sk_can_gso(sk)) {
if (dst-&gt;header_len) { // (摘抄)只有使用 IPSec 时,dst-&gt;header_len 才不为 0,这种情况下不能使用 TSO 特性
sk-&gt;sk_route_caps &amp;= ~NETIF_F_GSO_MASK;
} else {
sk-&gt;sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk-&gt;sk_gso_max_size = dst-&gt;dev-&gt;gso_max_size;
sk-&gt;sk_gso_max_segs = dst-&gt;dev-&gt;gso_max_segs;
}
}
}
EXPORT_SYMBOL_GPL(sk_setup_caps);</code></pre>
<h2>TCP发包时skb申请数据大小分析</h2>
<p>这个 size 是选择分配 skb 时线性内存的大小。</p>
<pre><code class="language-c">// file: net/ipv4/tcp.c
static inline int select_size(const struct sock *sk, bool sg)
{
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp-&gt;mss_cache;
if (sg) { // (摘抄)NETIF_F_SG:支持skb分片包的聚合DMA发送能力(与NETIF_F_GSO能力对应,skb_shinfo(skb)-&gt;nr_frags不为空)
if (sk_can_gso(sk)) { // 即 net_gso_ok(sk-&gt;sk_route_caps, sk-&gt;sk_gso_type); 即判断 sk_route_caps 上是否包含 sk_gso_type
/* Small frames wont use a full page:
* Payload will immediately follow tcp header.
*/
tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); // 2048 - MAX_TCP_HEADER - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) // 这么小吗?
} else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); // PAGE_SIZE - ??? // 小于 4 K ?
if (tmp &gt;= pgbreak &amp;&amp;
tmp &lt;= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
tmp = pgbreak;
}
}
return tmp;
}
</code></pre>