softirq

<h2>概述</h2> <p>ksoftirqd 是内核创建的线程，它本身不断循环执行，检测到有中断时（即每个 CPU 的 penging 位图）就执行对应的中断响应函数。对于网络来说，涉及的软中断有： 1、NET_TX_SOFTIRQ -> net_tx_action （由 net/core/dev.c: net_dev_init()注册） 2、NET_RX_SOFTIRQ -> net_rx_action （同上）</p> <p>> 一些资料：<a href="https://zhuanlan.zhihu.com/p/80680484">https://zhuanlan.zhihu.com/p/80680484</a> > <a href="https://zhuanlan.zhihu.com/p/88883239">https://zhuanlan.zhihu.com/p/88883239</a></p> <h2>创建 ksoftirqd</h2> <p>开机的时候，会调用 <code>spawn_ksoftirqd</code> 创建 ncpu 个 ksoftirqd 线程，如下：</p> <pre><code class="language-c">// file: kernel/softirq.c static struct smp_hotplug_thread softirq_threads = { .store = &amp;ksoftirqd, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, // 主函数 .thread_comm = &quot;ksoftirqd/%u&quot;, }; static __init int spawn_ksoftirqd(void) { register_cpu_notifier(&amp;cpu_nfb); BUG_ON(smpboot_register_percpu_thread(&amp;softirq_threads)); // 这里创建 return 0; } early_initcall(spawn_ksoftirqd); </code></pre> <p>调用关系：<code>smpboot_register_percpu_thread -&gt; __smpboot_create_thread</code>。实际创建的线程函数是 <code>smpboot_thread_fn</code>，它里是一个循环，不断判断是否要执行具体业务逻辑。</p> <pre><code class="language-c">// file: kernel/smpboot.c static int __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) { struct task_struct *tsk = *per_cpu_ptr(ht-&gt;store, cpu); struct smpboot_thread_data *td; if (tsk) return 0; td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); if (!td) return -ENOMEM; td-&gt;cpu = cpu; td-&gt;ht = ht; tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, ht-&gt;thread_comm); // 实际运行的主函数是 smpboot_thread_fn，里面是一个循环 if (IS_ERR(tsk)) { kfree(td); return PTR_ERR(tsk); } get_task_struct(tsk); *per_cpu_ptr(ht-&gt;store, cpu) = tsk; if (ht-&gt;create) { /* * Make sure that the task has actually scheduled out * into park position, before calling the create * callback. At least the migration thread callback * requires that the task is off the runqueue. */ if (!wait_task_inactive(tsk, TASK_PARKED)) WARN_ON(1); else ht-&gt;create(cpu); } return 0; } /** * smpboot_thread_fn - percpu hotplug thread loop function * @data: thread data pointer * * Checks for thread stop and park conditions. Calls the necessary * setup, cleanup, park and unpark functions for the registered * thread. * * Returns 1 when the thread should exit, 0 otherwise. */ static int smpboot_thread_fn(void *data) { struct smpboot_thread_data *td = data; struct smp_hotplug_thread *ht = td-&gt;ht; while (1) { set_current_state(TASK_INTERRUPTIBLE); preempt_disable(); if (kthread_should_stop()) { set_current_state(TASK_RUNNING); preempt_enable(); if (ht-&gt;cleanup) ht-&gt;cleanup(td-&gt;cpu, cpu_online(td-&gt;cpu)); kfree(td); return 0; } if (kthread_should_park()) { __set_current_state(TASK_RUNNING); preempt_enable(); if (ht-&gt;park &amp;&amp; td-&gt;status == HP_THREAD_ACTIVE) { BUG_ON(td-&gt;cpu != smp_processor_id()); ht-&gt;park(td-&gt;cpu); td-&gt;status = HP_THREAD_PARKED; } kthread_parkme(); /* We might have been woken for stop */ continue; } BUG_ON(td-&gt;cpu != smp_processor_id()); /* Check for state change setup */ switch (td-&gt;status) { case HP_THREAD_NONE: preempt_enable(); if (ht-&gt;setup) ht-&gt;setup(td-&gt;cpu); td-&gt;status = HP_THREAD_ACTIVE; preempt_disable(); break; case HP_THREAD_PARKED: preempt_enable(); if (ht-&gt;unpark) ht-&gt;unpark(td-&gt;cpu); td-&gt;status = HP_THREAD_ACTIVE; preempt_disable(); break; } // 主要逻辑在这里 if (!ht-&gt;thread_should_run(td-&gt;cpu)) { // shold_run 字段 preempt_enable(); schedule(); } else { set_current_state(TASK_RUNNING); preempt_enable(); ht-&gt;thread_fn(td-&gt;cpu); // thread_fn 字段 } } }</code></pre> <h2>运行判断条件</h2> <p>ksoftirqd 运行的条件是：是否每个 CPU 上的 <code>__softirq_pending</code> 位图变量非空。</p> <pre><code class="language-c">// file: kernel/softirq.c static int ksoftirqd_should_run(unsigned int cpu) { return local_softirq_pending(); } </code></pre> <pre><code class="language-c">// file: include/linux/irq_cpustat.h /* * Simple wrappers reducing source bloat. Define all irq_stat fields * here, even ones that are arch dependent. That way we get common * definitions instead of differing sets for each arch. */ #ifndef __ARCH_IRQ_STAT extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) // 每个 CPU 上的 __softirq_pending 字段 #endif /* arch independent irq_stat fields */ #define local_softirq_pending() \ __IRQ_STAT(smp_processor_id(), __softirq_pending)</code></pre> <p>那么 pending 字段是什么时候设置呢？其中的一个调用接口就是：</p> <pre><code class="language-c">// file: kernel/softirq.c void raise_softirq(unsigned int nr) { unsigned long flags; local_irq_save(flags); raise_softirq_irqoff(nr); local_irq_restore(flags); } /* * This function must run with irqs disabled! */ inline void raise_softirq_irqoff(unsigned int nr) { __raise_softirq_irqoff(nr); /* * If we're in an interrupt or softirq, we're done * (this also catches softirq-disabled code). We will * actually run the softirq once we return from * the irq or softirq. * * Otherwise we wake up ksoftirqd to make sure we * schedule the softirq soon. */ if (!in_interrupt()) wakeup_softirqd(); } void __raise_softirq_irqoff(unsigned int nr) { trace_softirq_raise(nr); or_softirq_pending(1UL &lt;&lt; nr); // 在此设置 pending 位图 } </code></pre> <h2>主要业务</h2> <p>ksoftirqd 具体业务逻辑如下：</p> <pre><code class="language-c">// file: kernel/softirq.c static void run_ksoftirqd(unsigned int cpu) { local_irq_disable(); if (local_softirq_pending()) { // 条件 __do_softirq(); // 具体业务 local_irq_enable(); cond_resched(); preempt_disable(); rcu_note_context_switch(cpu); preempt_enable(); return; } local_irq_enable(); } /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, * but break the loop if need_resched() is set or after 2 ms. * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in * certain cases, such as stop_machine(), jiffies may cease to * increment and so we need the MAX_SOFTIRQ_RESTART limit as * well to make sure we eventually return from this method. * * These limits have been established via experimentation. * The two things to balance is latency against fairness - * we want to handle softirqs as soon as possible, but they * should not be able to lock up the box. */ #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) #define MAX_SOFTIRQ_RESTART 10 // 注意：不一定是在 ksoftirqd 中运行 asmlinkage void __do_softirq(void) { struct softirq_action *h; __u32 pending; unsigned long end = jiffies + MAX_SOFTIRQ_TIME; int cpu; unsigned long old_flags = current-&gt;flags; int max_restart = MAX_SOFTIRQ_RESTART; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current-&gt;flags &amp;= ~PF_MEMALLOC; pending = local_softirq_pending(); account_irq_enter_time(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); lockdep_softirq_enter(); cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); local_irq_enable(); h = softirq_vec; // 数组？ do { if (pending &amp; 1) { unsigned int vec_nr = h - softirq_vec; int prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); h-&gt;action(h); // 调用函数 trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { printk(KERN_ERR &quot;huh, entered softirq %u %s %p&quot; &quot;with preempt_count %08x,&quot; &quot; exited with %08x?\n&quot;, vec_nr, softirq_to_name[vec_nr], h-&gt;action, prev_count, preempt_count()); preempt_count() = prev_count; } rcu_bh_qs(cpu); } h++; pending &gt;&gt;= 1; // 依次移位 } while (pending); local_irq_disable(); pending = local_softirq_pending(); if (pending) { if (time_before(jiffies, end) &amp;&amp; !need_resched() &amp;&amp; --max_restart) goto restart; wakeup_softirqd(); } lockdep_softirq_exit(); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }</code></pre>

公开学习文档

softirq

页面列表