公开学习文档

公开学习文档


负载

<h2>概述</h2> <p>应用层是从 <code>/proc/stat</code> 来统计,它是在时钟 TICK 中断中进行更新统计的,本质上属于高频抽样。</p> <p><code>nice</code> 本质上是属于 <code>user</code> 的,只是单独将 nice &gt; 0 的情况拿出来统计;可以通过 top 来看; <code>iowait</code> 本质上是属于 <code>idle</code> 的,只是单独将有 iowait 的情况拿出来统计 <code>sys</code> 就是系统态,但除去硬中断和软中断的场景,如系统调用、内核线程等</p> <h2>分析(基于4.19)</h2> <pre><code class="language-c">// file: fs/proc/stat.c static int __init proc_stat_init(void) { proc_create(&amp;quot;stat&amp;quot;, 0, NULL, &amp;amp;proc_stat_operations); return 0; } static const struct file_operations proc_stat_operations = { .open = stat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int stat_open(struct inode *inode, struct file *file) { unsigned int size = 1024 + 128 * num_online_cpus(); /* minimum size to display an interrupt count : 2 bytes */ size += 2 * nr_irqs; return single_open_size(file, show_stat, NULL, size); // show_stat } static int show_stat(struct seq_file *p, void *v) { int i, j; u64 user, nice, system, idle, iowait, irq, softirq, steal; u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec64 boottime; user = nice = system = idle = iowait = irq = softirq = steal = 0; guest = guest_nice = 0; getboottime64(&amp;amp;boottime); for_each_possible_cpu(i) { user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; // 取变量 kcpustat_cpu(i).cpustat 的值 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE]; system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle += get_idle_time(i); iowait += get_iowait_time(i); irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j &amp;lt; NR_SOFTIRQS; j++) { unsigned int softirq_stat = kstat_softirqs_cpu(j, i); per_softirq_sums[j] += softirq_stat; sum_softirq += softirq_stat; } } sum += arch_irq_stat(); seq_put_decimal_ull(p, &amp;quot;cpu &amp;quot;, nsec_to_clock_t(user)); // 首先是所有 CPU 的占用情况。转换成节拍数并打印出来 seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(nice)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(system)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(idle)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(irq)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(steal)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(guest)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(i); iowait = get_iowait_time(i); irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, &amp;quot;cpu%d&amp;quot;, i); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(user)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(nice)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(system)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(idle)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(irq)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(steal)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(guest)); seq_put_decimal_ull(p, &amp;quot; &amp;quot;, nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); } seq_put_decimal_ull(p, &amp;quot;intr &amp;quot;, (unsigned long long)sum); /* sum again ? it could be updated? */ for_each_irq_nr(j) seq_put_decimal_ull(p, &amp;quot; &amp;quot;, kstat_irqs_usr(j)); seq_printf(p, &amp;quot;\nctxt %llu\n&amp;quot; &amp;quot;btime %llu\n&amp;quot; &amp;quot;processes %lu\n&amp;quot; &amp;quot;procs_running %lu\n&amp;quot; &amp;quot;procs_blocked %lu\n&amp;quot;, nr_context_switches(), (unsigned long long)boottime.tv_sec, total_forks, nr_running(), nr_iowait()); seq_put_decimal_ull(p, &amp;quot;softirq &amp;quot;, (unsigned long long)sum_softirq); for (i = 0; i &amp;lt; NR_SOFTIRQS; i++) seq_put_decimal_ull(p, &amp;quot; &amp;quot;, per_softirq_sums[i]); seq_putc(p, '\n'); return 0; }</code></pre> <p>那么 <code>kcpustat_cpu(i).cpustat</code> 里的值是什么时候更新呢?</p> <h2>kcpustat_cpu(i).cpustat 更新</h2> <p>时钟 TICK 中断时,会调用 <code>update_process_times </code>。</p> <pre><code class="language-c">// file: kernel/time/timer.c /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. */ void update_process_times(int user_tick) { struct task_struct *p = current; /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); // 继续 run_local_timers(); rcu_check_callbacks(user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_tick(); #endif scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(p); } // file: kernel/sched/cputime.c /* * Account a single tick of CPU time. * @p: the process that the CPU time gets accounted to * @user_tick: indicates if the tick is a user or a system tick */ void account_process_tick(struct task_struct *p, int user_tick) { u64 cputime, steal; struct rq *rq = this_rq(); if (vtime_accounting_cpu_enabled()) return; if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq, 1); return; } cputime = TICK_NSEC; // 每个 TICK 的时间,单位是:纳秒 steal = steal_account_process_time(ULONG_MAX); // ? if (steal &amp;gt;= cputime) return; cputime -= steal; if (user_tick) account_user_time(p, cputime); // 1. 用户态 else if ((p != rq-&amp;gt;idle) || (irq_count() != HARDIRQ_OFFSET)) account_system_time(p, HARDIRQ_OFFSET, cputime); // 2. 内核态。宏定义 #define HARDIRQ_OFFSET (1UL &amp;lt;&amp;lt; HARDIRQ_SHIFT) else account_idle_time(cputime); // 3. 空闲时间 } /* * Account user CPU time to a process. * @p: the process that the CPU time gets accounted to * @cputime: the CPU time spent in user space since the last update */ void account_user_time(struct task_struct *p, u64 cputime) { int index; /* Add user time to process. */ p-&amp;gt;utime += cputime; account_group_user_time(p, cputime); index = (task_nice(p) &amp;gt; 0) ? CPUTIME_NICE : CPUTIME_USER; // user 分为这 2 个。注意:nice 值越大,优先级越低 /* Add user time to cpustat. */ task_group_account_field(p, index, cputime); // 这里统计到 kernel_cpustat.cpustat 中 /* Account for user time used */ acct_account_cputime(p); } static inline void task_group_account_field(struct task_struct *p, int index, u64 tmp) { /* * Since all updates are sure to touch the root cgroup, we * get ourselves ahead and touch it first. If the root cgroup * is the only cgroup, then nothing else should be necessary. * */ __this_cpu_add(kernel_cpustat.cpustat[index], tmp); // 统计到 kernel_cpustat.cpustat 中 cgroup_account_cputime_field(p, index, tmp); } /* * Account system CPU time to a process. * @p: the process that the CPU time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the CPU time spent in kernel space since the last update */ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) { int index; if ((p-&amp;gt;flags &amp;amp; PF_VCPU) &amp;amp;&amp;amp; (irq_count() - hardirq_offset == 0)) { // (preempt_count() &amp;amp; (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK)) account_guest_time(p, cputime); return; } if (hardirq_count() - hardirq_offset) // (preempt_count() &amp;amp; HARDIRQ_MASK)。会有硬中断嵌套吗? index = CPUTIME_IRQ; else if (in_serving_softirq()) // (softirq_count() &amp;amp; SOFTIRQ_OFFSET) index = CPUTIME_SOFTIRQ; else index = CPUTIME_SYSTEM; // sys:内核态 - 硬中断 - 软中断:系统调用 account_system_index_time(p, cputime, index); } /* * Account system CPU time to a process and desired cpustat field * @p: the process that the CPU time gets accounted to * @cputime: the CPU time spent in kernel space since the last update * @index: pointer to cpustat field that has to be updated */ void account_system_index_time(struct task_struct *p, u64 cputime, enum cpu_usage_stat index) { /* Add system time to process. */ p-&amp;gt;stime += cputime; account_group_system_time(p, cputime); /* Add system time to cpustat. */ task_group_account_field(p, index, cputime); // 添加 /* Account for system time used */ acct_account_cputime(p); } /* * Account for idle time. * @cputime: the CPU time spent in idle wait */ void account_idle_time(u64 cputime) { u64 *cpustat = kcpustat_this_cpu-&amp;gt;cpustat; struct rq *rq = this_rq(); if (atomic_read(&amp;amp;rq-&amp;gt;nr_iowait) &amp;gt; 0) cpustat[CPUTIME_IOWAIT] += cputime; // iowait else cpustat[CPUTIME_IDLE] += cputime; // idle }</code></pre>

页面列表

ITEM_HTML