公开学习文档

公开学习文档


linux-x86中断

<h2>概述</h2> <h2>中断布局</h2> <p>CPU 收到的中断向量定义于 <a href="https://code.woboq.org/linux/linux/arch/x86/include/asm/irq_vectors.h.html"><code>irq_vectors.h</code></a>。下面这一段是该头文件的注释,详细描述了IRQ向量的基本信息:</p> <ul> <li> <p>单个 CPU 拥有 256 个 IDT,即能处理 256 个中断,定义为 <code>NR_VECTORS</code></p> </li> <li> <p>CPU 处理的中断分为几类</p> <ul> <li>0 到 31 位为系统陷入或者异常,这些属于无法屏蔽的中断,必须进行处理</li> <li>32 到 127 位为设备中断</li> <li>128 位即我们常说的 int80 系统调用中断</li> <li>129 至 <code>INVALIDATE_TLB_VECTOR_START</code> 也用来保存设备中断</li> <li><code>INVALIDATE_TLB_VECTOR_START</code> 至 255 作为特殊中断</li> </ul> </li> <li>64 位架构下每个 CPU 有独立的 IDT 表,而 32 位则共享一张表</li> </ul> <p>头文件定义如下:</p> <pre><code class="language-c">// file: arch/x86/include/asm/irq_vectors.h #ifndef _ASM_X86_IRQ_VECTORS_H #define _ASM_X86_IRQ_VECTORS_H #include &amp;lt;linux/threads.h&amp;gt; /* * Linux IRQ vector layout. * * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can * be defined by Linux. They are used as a jump table by the CPU when a * given vector is triggered - by a CPU-external, CPU-internal or * software-triggered event. * * Linux sets the kernel code address each entry jumps to early during * bootup, and never changes them. This is the general layout of the * IDT entries: * * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * * This file enumerates the exact layout of them: */ #define NMI_VECTOR 0x02 #define MCE_VECTOR 0x12 // 省略... </code></pre> <h2>初始化</h2> <pre><code class="language-c">// file: init/main.c asmlinkage void __init start_kernel(void) { // ... trap_init(); // ... init_IRQ(); // ... } // file: arch/x86/kernel/traps.c void __init trap_init(void) { int i; #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); if (readl(p) == 'E' + ('I'&amp;lt;&amp;lt;8) + ('S'&amp;lt;&amp;lt;16) + ('A'&amp;lt;&amp;lt;24)) EISA_bus = 1; early_iounmap(p, 4); #endif set_intr_gate(X86_TRAP_DE, &amp;amp;divide_error); set_intr_gate_ist(X86_TRAP_NMI, &amp;amp;nmi, NMI_STACK); /* int4 can be called from all */ set_system_intr_gate(X86_TRAP_OF, &amp;amp;overflow); set_intr_gate(X86_TRAP_BR, &amp;amp;bounds); set_intr_gate(X86_TRAP_UD, &amp;amp;invalid_op); set_intr_gate(X86_TRAP_NM, &amp;amp;device_not_available); #ifdef CONFIG_X86_32 set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); #else set_intr_gate_ist(X86_TRAP_DF, &amp;amp;double_fault, DOUBLEFAULT_STACK); #endif set_intr_gate(X86_TRAP_OLD_MF, &amp;amp;coprocessor_segment_overrun); set_intr_gate(X86_TRAP_TS, &amp;amp;invalid_TSS); set_intr_gate(X86_TRAP_NP, &amp;amp;segment_not_present); set_intr_gate(X86_TRAP_SS, stack_segment); set_intr_gate(X86_TRAP_GP, &amp;amp;general_protection); set_intr_gate(X86_TRAP_SPURIOUS, &amp;amp;spurious_interrupt_bug); set_intr_gate(X86_TRAP_MF, &amp;amp;coprocessor_error); set_intr_gate(X86_TRAP_AC, &amp;amp;alignment_check); #ifdef CONFIG_X86_MCE set_intr_gate_ist(X86_TRAP_MC, &amp;amp;machine_check, MCE_STACK); #endif set_intr_gate(X86_TRAP_XF, &amp;amp;simd_coprocessor_error); /* Reserve all the builtin and the syscall vector: */ for (i = 0; i &amp;lt; FIRST_EXTERNAL_VECTOR; i++) // 保留 0 - 0x20 中断向量 set_bit(i, used_vectors); #ifdef CONFIG_IA32_EMULATION set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_32 set_system_trap_gate(SYSCALL_VECTOR, &amp;amp;system_call); set_bit(SYSCALL_VECTOR, used_vectors); #endif /* * Set the IDT descriptor to a fixed read-only location, so that the * &amp;quot;sidt&amp;quot; instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); idt_descr.address = fix_to_virt(FIX_RO_IDT); /* * Should be a barrier for any external CPU state: */ cpu_init(); x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 memcpy(&amp;amp;nmi_idt_table, &amp;amp;idt_table, IDT_ENTRIES * 16); set_nmi_gate(X86_TRAP_DB, &amp;amp;debug); set_nmi_gate(X86_TRAP_BP, &amp;amp;int3); #endif } // file: arch/x86/kernel/irqinit.c void __init init_IRQ(void) { int i; /* * We probably need a better place for this, but it works for * now ... */ x86_add_irq_domains(); /* * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If * these IRQ's are handled by more mordern controllers like IO-APIC, * then this vector space can be freed and re-used dynamically as the * irq's migrate etc. */ for (i = 0; i &amp;lt; legacy_pic-&amp;gt;nr_legacy_irqs; i++) per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; // cpu0 的特殊操作:见英文注释说明 x86_init.irqs.intr_init(); } </code></pre> <h2>intr_init 来源</h2> <pre><code class="language-c">// file: arch/x86/kernel/x86_init.c /* * The platform setup functions are preset with the default functions * for standard PC hardware. */ struct x86_init_ops x86_init __initdata = { .resources = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = default_machine_specific_memory_setup, }, .mpparse = { .mpc_record = x86_init_uint_noop, .setup_ioapic_ids = x86_init_noop, .mpc_apic_id = default_mpc_apic_id, .smp_read_mpc_oem = default_smp_read_mpc_oem, .mpc_oem_bus_info = default_mpc_oem_bus_info, .find_smp_config = default_find_smp_config, .get_smp_config = default_get_smp_config, }, .irqs = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, // 此函数 .trap_init = x86_init_noop, }, .oem = { .arch_setup = x86_init_noop, .banner = default_banner, }, .paging = { .pagetable_init = native_pagetable_init, }, .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, .wallclock_init = x86_init_noop, }, .iommu = { .iommu_init = iommu_init_noop, }, .pci = { .init = x86_default_pci_init, .init_irq = x86_default_pci_init_irq, .fixup_irqs = x86_default_pci_fixup_irqs, }, }; </code></pre> <h2>x86 初始化 irq</h2> <pre><code class="language-c">// file: arc/x86/kernel/irqinit.c void __init native_init_IRQ(void) { int i; /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); apic_intr_init(); /* * Cover the whole vector space, no vector can escape * us. (some of these will be overridden and become * 'special' SMP interrupts) */ i = FIRST_EXTERNAL_VECTOR; // 0x20 for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); // 从 0x20 号中断向量开始,设置所有未标记的 vector(摘自网络:used_vectors 中没有标记为 1 的,都是设备中断的部分)。问题:每个 CPU 都这样设置吗? } if (!acpi_ioapic &amp;amp;&amp;amp; !of_ioapic) setup_irq(2, &amp;amp;irq2); #ifdef CONFIG_X86_32 irq_ctx_init(smp_processor_id()); #endif } </code></pre> <p><code>interrupt</code> 定义在 entry_64.S,继续分析:</p> <pre><code class="language-c">// file: arch/x86/kernel/entry_64.S /* * Build the entry stubs and pointer table with some assembler magic. * We pack 7 stubs into a single 32-byte chunk, which will fit in a * single cache line on all modern x86 implementations. */ .section .init.rodata,&amp;quot;a&amp;quot; ENTRY(interrupt) .section .entry.text .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) INTR_FRAME vector=FIRST_EXTERNAL_VECTOR .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 .balign 32 .rept 7 .if vector &amp;lt; NR_VECTORS .if vector &amp;lt;&amp;gt; FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -8 .endif 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) &amp;lt;&amp;gt; 6 jmp 2f .endif .previous .quad 1b .section .entry.text vector=vector+1 .endif .endr 2: jmp common_interrupt // 跳转 .endr CFI_ENDPROC END(irq_entries_start) .previous END(interrupt) .previous</code></pre> <p>上面代码解析,从网上摘抄过来,如下:</p> <p><img src="https://www.showdoc.com.cn/server/api/attachment/visitFile?sign=0bffa63a505c8f346b5e94e9a46ef076&amp;amp;file=file.png" alt="" /></p> <p>&gt; 参考文档:<a href="https://zhuanlan.zhihu.com/p/542455357">https://zhuanlan.zhihu.com/p/542455357</a></p> <p>继续看 <code>common_interrupt</code>:</p> <pre><code class="language-c">// file: arch/x86/kernel/entry_64.S /* * Interrupt entry/exit should be protected against kprobes */ .pushsection .kprobes.text, &amp;quot;ax&amp;quot; /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: XCPT_FRAME ASM_CLAC addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ // 执行 do_IRQ /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF decl PER_CPU_VAR(irq_count) /* Restore saved previous stack */ popq %rsi CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ leaq ARGOFFSET-RBP(%rsi), %rsp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) je retint_kernel /* Interrupt came from user space */ /* * Has a correct top of stack, but a partial stack frame * %rcx: thread info. Interrupts off. */ retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ SWAPGS jmp restore_args retint_restore_args: /* return to kernel space */ DISABLE_INTERRUPTS(CLBR_ANY) /* * The iretq could re-enable interrupts: */ TRACE_IRQS_IRETQ restore_args: RESTORE_ARGS 1,8,1 irq_return: INTERRUPT_RETURN</code></pre> <h2>do_IRQ</h2> <pre><code class="language-c">// file: arch/x86/kernel/irq.c /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). */ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); /* high bit used in ret_from_ code */ unsigned vector = ~regs-&amp;gt;orig_ax; // 中断向量号 unsigned irq; irq_enter(); exit_idle(); // 转换成 irq 号。因为 vector 是每个 CPU 的,但 irq 是全局的,所以需要转换。 // 在系统初始化的时候,我们会调用 __assign_irq_vector(),将虚拟中断信号 irq 分配到某个 CPU 上的中断向量 irq = __this_cpu_read(vector_irq[vector]); if (!handle_irq(irq, regs)) { ack_APIC_irq(); if (printk_ratelimit()) pr_emerg(&amp;quot;%s: %d.%d No irq handler for vector (irq %d)\n&amp;quot;, __func__, smp_processor_id(), vector, irq); } irq_exit(); set_irq_regs(old_regs); return 1; } // file: arch/x86/kernel/irq_64.c bool handle_irq(unsigned irq, struct pt_regs *regs) { struct irq_desc *desc; stack_overflow_check(regs); desc = irq_to_desc(irq); // 获取中断描述 if (unlikely(!desc)) return false; generic_handle_irq_desc(irq, desc); return true; } // file: include/linux/irqdesc.h /* * Architectures call this to let the generic IRQ layer * handle an interrupt. If the descriptor is attached to an * irqchip-style controller then we call the -&amp;gt;handle_irq() handler, * and it calls __do_IRQ() if it's attached to an irqtype-style controller. */ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { desc-&amp;gt;handle_irq(irq, desc); }</code></pre> <p>据说这里的 <code>handle_irq</code> 最终会调用 <code>handle_irq_event_percpu</code>,待确认,但继续分析:</p> <pre><code class="language-c">// file: kernel/irq/handle.c irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) { irqreturn_t retval = IRQ_NONE; unsigned int flags = 0, irq = desc-&amp;gt;irq_data.irq; do { irqreturn_t res; trace_irq_handler_entry(irq, action); res = action-&amp;gt;handler(irq, action-&amp;gt;dev_id); // 注册进来的 action 函数 trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),&amp;quot;irq %u handler %pF enabled interrupts\n&amp;quot;, irq, action-&amp;gt;handler)) local_irq_disable(); switch (res) { case IRQ_WAKE_THREAD: /* * Catch drivers which return WAKE_THREAD but * did not set up a thread function */ if (unlikely(!action-&amp;gt;thread_fn)) { warn_no_thread(irq, action); break; } irq_wake_thread(desc, action); /* Fall through to add to randomness */ case IRQ_HANDLED: flags |= action-&amp;gt;flags; break; default: break; } retval |= res; action = action-&amp;gt;next; // 下一个 action } while (action); add_interrupt_randomness(irq, flags); if (!noirqdebug) note_interrupt(irq, desc, retval); return retval; }</code></pre>

页面列表

ITEM_HTML