公开学习文档

公开学习文档


RingBuffer内存回收

<h2>概述</h2> <p>调用树:</p> <pre><code class="language-c">NET_RX_SOFTIRQ 软件中断 igb_poll // 负责回收发包 ringbuffer,并处理收包 igb_clean_tx_irq dev_kfree_skb_any // 释放 skb(这是释放克隆的 skb,原始的要等 ACK 才能释放);解除 DMA 映射</code></pre> <p>当数据发送完以后,还需要清理内存。当发送完成时,网卡会触发一个硬中断(和收到数据包一样的),进一步触发 NET_RX_SOFTIRQ 软中断,其中会清理内存。</p> <h2>分析</h2> <p>在 NET_RX_SOFTIRQ 处理时,会调用 <code>igb_poll</code> 来处理,具体逻辑与收包一样,可见前文分析。</p> <pre><code class="language-c"> // file: igb_main.c /** * igb_poll - NAPI Rx polling callback * @napi: napi polling structure * @budget: count of how many packets we should handle **/ static int igb_poll(struct napi_struct *napi, int budget) { struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi); bool clean_complete = true; #ifdef CONFIG_IGB_DCA if (q_vector-&amp;gt;adapter-&amp;gt;flags &amp;amp; IGB_FLAG_DCA_ENABLED) igb_update_dca(q_vector); #endif if (q_vector-&amp;gt;tx.ring) clean_complete = igb_clean_tx_irq(q_vector); // 这里是回收发包 RingBuffer if (q_vector-&amp;gt;rx.ring) clean_complete &amp;amp;= igb_clean_rx_irq(q_vector, budget); // 这里是处理收包的 /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; /* If not enough Rx work done, exit the polling mode */ napi_complete(napi); igb_ring_irq_enable(q_vector); return 0; } /** * igb_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info * * returns true if ring is completely cleaned **/ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector-&amp;gt;adapter; struct igb_ring *tx_ring = q_vector-&amp;gt;tx.ring; struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector-&amp;gt;tx.work_limit; unsigned int i = tx_ring-&amp;gt;next_to_clean; if (test_bit(__IGB_DOWN, &amp;amp;adapter-&amp;gt;state)) return true; tx_buffer = &amp;amp;tx_ring-&amp;gt;tx_buffer_info[i]; tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring-&amp;gt;count; do { union e1000_adv_tx_desc *eop_desc = tx_buffer-&amp;gt;next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; /* prevent any other reads prior to eop_desc */ read_barrier_depends(); /* if DD is not set pending work has not been completed */ if (!(eop_desc-&amp;gt;wb.status &amp;amp; cpu_to_le32(E1000_TXD_STAT_DD))) break; /* clear next_to_watch to prevent false hangs */ tx_buffer-&amp;gt;next_to_watch = NULL; /* update the statistics for this packet */ total_bytes += tx_buffer-&amp;gt;bytecount; total_packets += tx_buffer-&amp;gt;gso_segs; // 释放 skb,实际上是否会真正释放呢?是否要等待 ACK 确认呢? // 答:从传输层进入到网络层时(详见 tcp_transmit_skb 函数),每个 skb 都会克隆出一份,等待 ACK 应答 /* free the skb */ dev_kfree_skb_any(tx_buffer-&amp;gt;skb); /* unmap skb header data */ dma_unmap_single(tx_ring-&amp;gt;dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); /* clear tx_buffer data */ tx_buffer-&amp;gt;skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { tx_buffer++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring-&amp;gt;count; tx_buffer = tx_ring-&amp;gt;tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } /* unmap any remaining paged data */ if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring-&amp;gt;dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); } } /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring-&amp;gt;count; tx_buffer = tx_ring-&amp;gt;tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } /* issue prefetch for next Tx descriptor */ prefetch(tx_desc); /* update budget accounting */ budget--; } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); i += tx_ring-&amp;gt;count; tx_ring-&amp;gt;next_to_clean = i; u64_stats_update_begin(&amp;amp;tx_ring-&amp;gt;tx_syncp); tx_ring-&amp;gt;tx_stats.bytes += total_bytes; tx_ring-&amp;gt;tx_stats.packets += total_packets; u64_stats_update_end(&amp;amp;tx_ring-&amp;gt;tx_syncp); q_vector-&amp;gt;tx.total_bytes += total_bytes; q_vector-&amp;gt;tx.total_packets += total_packets; if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &amp;amp;tx_ring-&amp;gt;flags)) { struct e1000_hw *hw = &amp;amp;adapter-&amp;gt;hw; /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &amp;amp;tx_ring-&amp;gt;flags); if (tx_buffer-&amp;gt;next_to_watch &amp;amp;&amp;amp; time_after(jiffies, tx_buffer-&amp;gt;time_stamp + (adapter-&amp;gt;tx_timeout_factor * HZ)) &amp;amp;&amp;amp; !(rd32(E1000_STATUS) &amp;amp; E1000_STATUS_TXOFF)) { /* detected Tx unit hang */ dev_err(tx_ring-&amp;gt;dev, &amp;quot;Detected Tx Unit Hang\n&amp;quot; &amp;quot; Tx Queue &amp;lt;%d&amp;gt;\n&amp;quot; &amp;quot; TDH &amp;lt;%x&amp;gt;\n&amp;quot; &amp;quot; TDT &amp;lt;%x&amp;gt;\n&amp;quot; &amp;quot; next_to_use &amp;lt;%x&amp;gt;\n&amp;quot; &amp;quot; next_to_clean &amp;lt;%x&amp;gt;\n&amp;quot; &amp;quot;buffer_info[next_to_clean]\n&amp;quot; &amp;quot; time_stamp &amp;lt;%lx&amp;gt;\n&amp;quot; &amp;quot; next_to_watch &amp;lt;%p&amp;gt;\n&amp;quot; &amp;quot; jiffies &amp;lt;%lx&amp;gt;\n&amp;quot; &amp;quot; desc.status &amp;lt;%x&amp;gt;\n&amp;quot;, tx_ring-&amp;gt;queue_index, rd32(E1000_TDH(tx_ring-&amp;gt;reg_idx)), readl(tx_ring-&amp;gt;tail), tx_ring-&amp;gt;next_to_use, tx_ring-&amp;gt;next_to_clean, tx_buffer-&amp;gt;time_stamp, tx_buffer-&amp;gt;next_to_watch, jiffies, tx_buffer-&amp;gt;next_to_watch-&amp;gt;wb.status); netif_stop_subqueue(tx_ring-&amp;gt;netdev, tx_ring-&amp;gt;queue_index); /* we are about to reset, no point in enabling stuff */ return true; } } #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets &amp;amp;&amp;amp; netif_carrier_ok(tx_ring-&amp;gt;netdev) &amp;amp;&amp;amp; igb_desc_unused(tx_ring) &amp;gt;= TX_WAKE_THRESHOLD)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ smp_mb(); if (__netif_subqueue_stopped(tx_ring-&amp;gt;netdev, tx_ring-&amp;gt;queue_index) &amp;amp;&amp;amp; !(test_bit(__IGB_DOWN, &amp;amp;adapter-&amp;gt;state))) { netif_wake_subqueue(tx_ring-&amp;gt;netdev, tx_ring-&amp;gt;queue_index); u64_stats_update_begin(&amp;amp;tx_ring-&amp;gt;tx_syncp); tx_ring-&amp;gt;tx_stats.restart_queue++; u64_stats_update_end(&amp;amp;tx_ring-&amp;gt;tx_syncp); } } return !!budget; } </code></pre> <p>主要就是清理 skb,解除 DMA 映射,等等。但其中 skb 没有删除,因为要等收到 ACK 后才会真正删除。</p>

页面列表

ITEM_HTML