RingBuffer内存回收
<h2>概述</h2>
<p>调用树:</p>
<pre><code class="language-c">NET_RX_SOFTIRQ 软件中断
igb_poll // 负责回收发包 ringbuffer,并处理收包
igb_clean_tx_irq
dev_kfree_skb_any // 释放 skb(这是释放克隆的 skb,原始的要等 ACK 才能释放);解除 DMA 映射</code></pre>
<p>当数据发送完以后,还需要清理内存。当发送完成时,网卡会触发一个硬中断(和收到数据包一样的),进一步触发 NET_RX_SOFTIRQ 软中断,其中会清理内存。</p>
<h2>分析</h2>
<p>在 NET_RX_SOFTIRQ 处理时,会调用 <code>igb_poll</code> 来处理,具体逻辑与收包一样,可见前文分析。</p>
<pre><code class="language-c">
// file: igb_main.c
/**
* igb_poll - NAPI Rx polling callback
* @napi: napi polling structure
* @budget: count of how many packets we should handle
**/
static int igb_poll(struct napi_struct *napi, int budget)
{
struct igb_q_vector *q_vector = container_of(napi,
struct igb_q_vector,
napi);
bool clean_complete = true;
#ifdef CONFIG_IGB_DCA
if (q_vector-&gt;adapter-&gt;flags &amp; IGB_FLAG_DCA_ENABLED)
igb_update_dca(q_vector);
#endif
if (q_vector-&gt;tx.ring)
clean_complete = igb_clean_tx_irq(q_vector); // 这里是回收发包 RingBuffer
if (q_vector-&gt;rx.ring)
clean_complete &amp;= igb_clean_rx_irq(q_vector, budget); // 这里是处理收包的
/* If all work not completed, return budget and keep polling */
if (!clean_complete)
return budget;
/* If not enough Rx work done, exit the polling mode */
napi_complete(napi);
igb_ring_irq_enable(q_vector);
return 0;
}
/**
* igb_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: pointer to q_vector containing needed info
*
* returns true if ring is completely cleaned
**/
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
{
struct igb_adapter *adapter = q_vector-&gt;adapter;
struct igb_ring *tx_ring = q_vector-&gt;tx.ring;
struct igb_tx_buffer *tx_buffer;
union e1000_adv_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
unsigned int budget = q_vector-&gt;tx.work_limit;
unsigned int i = tx_ring-&gt;next_to_clean;
if (test_bit(__IGB_DOWN, &amp;adapter-&gt;state))
return true;
tx_buffer = &amp;tx_ring-&gt;tx_buffer_info[i];
tx_desc = IGB_TX_DESC(tx_ring, i);
i -= tx_ring-&gt;count;
do {
union e1000_adv_tx_desc *eop_desc = tx_buffer-&gt;next_to_watch;
/* if next_to_watch is not set then there is no work pending */
if (!eop_desc)
break;
/* prevent any other reads prior to eop_desc */
read_barrier_depends();
/* if DD is not set pending work has not been completed */
if (!(eop_desc-&gt;wb.status &amp; cpu_to_le32(E1000_TXD_STAT_DD)))
break;
/* clear next_to_watch to prevent false hangs */
tx_buffer-&gt;next_to_watch = NULL;
/* update the statistics for this packet */
total_bytes += tx_buffer-&gt;bytecount;
total_packets += tx_buffer-&gt;gso_segs;
// 释放 skb,实际上是否会真正释放呢?是否要等待 ACK 确认呢?
// 答:从传输层进入到网络层时(详见 tcp_transmit_skb 函数),每个 skb 都会克隆出一份,等待 ACK 应答
/* free the skb */
dev_kfree_skb_any(tx_buffer-&gt;skb);
/* unmap skb header data */
dma_unmap_single(tx_ring-&gt;dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
/* clear tx_buffer data */
tx_buffer-&gt;skb = NULL;
dma_unmap_len_set(tx_buffer, len, 0);
/* clear last DMA location and unmap remaining buffers */
while (tx_desc != eop_desc) {
tx_buffer++;
tx_desc++;
i++;
if (unlikely(!i)) {
i -= tx_ring-&gt;count;
tx_buffer = tx_ring-&gt;tx_buffer_info;
tx_desc = IGB_TX_DESC(tx_ring, 0);
}
/* unmap any remaining paged data */
if (dma_unmap_len(tx_buffer, len)) {
dma_unmap_page(tx_ring-&gt;dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
dma_unmap_len_set(tx_buffer, len, 0);
}
}
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
tx_desc++;
i++;
if (unlikely(!i)) {
i -= tx_ring-&gt;count;
tx_buffer = tx_ring-&gt;tx_buffer_info;
tx_desc = IGB_TX_DESC(tx_ring, 0);
}
/* issue prefetch for next Tx descriptor */
prefetch(tx_desc);
/* update budget accounting */
budget--;
} while (likely(budget));
netdev_tx_completed_queue(txring_txq(tx_ring),
total_packets, total_bytes);
i += tx_ring-&gt;count;
tx_ring-&gt;next_to_clean = i;
u64_stats_update_begin(&amp;tx_ring-&gt;tx_syncp);
tx_ring-&gt;tx_stats.bytes += total_bytes;
tx_ring-&gt;tx_stats.packets += total_packets;
u64_stats_update_end(&amp;tx_ring-&gt;tx_syncp);
q_vector-&gt;tx.total_bytes += total_bytes;
q_vector-&gt;tx.total_packets += total_packets;
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &amp;tx_ring-&gt;flags)) {
struct e1000_hw *hw = &amp;adapter-&gt;hw;
/* Detect a transmit hang in hardware, this serializes the
* check with the clearing of time_stamp and movement of i
*/
clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &amp;tx_ring-&gt;flags);
if (tx_buffer-&gt;next_to_watch &amp;&amp;
time_after(jiffies, tx_buffer-&gt;time_stamp +
(adapter-&gt;tx_timeout_factor * HZ)) &amp;&amp;
!(rd32(E1000_STATUS) &amp; E1000_STATUS_TXOFF)) {
/* detected Tx unit hang */
dev_err(tx_ring-&gt;dev,
&quot;Detected Tx Unit Hang\n&quot;
&quot; Tx Queue &lt;%d&gt;\n&quot;
&quot; TDH &lt;%x&gt;\n&quot;
&quot; TDT &lt;%x&gt;\n&quot;
&quot; next_to_use &lt;%x&gt;\n&quot;
&quot; next_to_clean &lt;%x&gt;\n&quot;
&quot;buffer_info[next_to_clean]\n&quot;
&quot; time_stamp &lt;%lx&gt;\n&quot;
&quot; next_to_watch &lt;%p&gt;\n&quot;
&quot; jiffies &lt;%lx&gt;\n&quot;
&quot; desc.status &lt;%x&gt;\n&quot;,
tx_ring-&gt;queue_index,
rd32(E1000_TDH(tx_ring-&gt;reg_idx)),
readl(tx_ring-&gt;tail),
tx_ring-&gt;next_to_use,
tx_ring-&gt;next_to_clean,
tx_buffer-&gt;time_stamp,
tx_buffer-&gt;next_to_watch,
jiffies,
tx_buffer-&gt;next_to_watch-&gt;wb.status);
netif_stop_subqueue(tx_ring-&gt;netdev,
tx_ring-&gt;queue_index);
/* we are about to reset, no point in enabling stuff */
return true;
}
}
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
if (unlikely(total_packets &amp;&amp;
netif_carrier_ok(tx_ring-&gt;netdev) &amp;&amp;
igb_desc_unused(tx_ring) &gt;= TX_WAKE_THRESHOLD)) {
/* Make sure that anybody stopping the queue after this
* sees the new next_to_clean.
*/
smp_mb();
if (__netif_subqueue_stopped(tx_ring-&gt;netdev,
tx_ring-&gt;queue_index) &amp;&amp;
!(test_bit(__IGB_DOWN, &amp;adapter-&gt;state))) {
netif_wake_subqueue(tx_ring-&gt;netdev,
tx_ring-&gt;queue_index);
u64_stats_update_begin(&amp;tx_ring-&gt;tx_syncp);
tx_ring-&gt;tx_stats.restart_queue++;
u64_stats_update_end(&amp;tx_ring-&gt;tx_syncp);
}
}
return !!budget;
}
</code></pre>
<p>主要就是清理 skb,解除 DMA 映射,等等。但其中 skb 没有删除,因为要等收到 ACK 后才会真正删除。</p>