进程
<h2>基础</h2>
<p>pid: 线程 ID
tgid: 进程 ID</p>
<p><code>getpid()</code> 获取的是 tgid,即每个线程调用获取的结果是一样的:都是进程 ID</p>
<h2>fork</h2>
<pre><code class="language-c">// file: kernel/fork.c
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
struct kernel_clone_args args = {
.exit_signal = SIGCHLD, // 只设置了这一个参数
};
return kernel_clone(&amp;args);
#else
/* can not support in nommu mode */
return -EINVAL;
#endif
}
#endif
/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*
* args-&gt;exit_signal is expected to be checked for sanity by the caller.
*/
pid_t kernel_clone(struct kernel_clone_args *args)
{
u64 clone_flags = args-&gt;flags;
struct completion vfork;
struct pid *pid;
struct task_struct *p;
int trace = 0;
pid_t nr;
/*
* For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
* to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
* mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
* field in struct clone_args and it still doesn't make sense to have
* them both point at the same memory location. Performing this check
* here has the advantage that we don't need to have a separate helper
* to check for legacy clone().
*/
if ((args-&gt;flags &amp; CLONE_PIDFD) &amp;&amp;
(args-&gt;flags &amp; CLONE_PARENT_SETTID) &amp;&amp;
(args-&gt;pidfd == args-&gt;parent_tid))
return -EINVAL;
/*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
* requested, no event is reported; otherwise, report if the event
* for the type of forking is enabled.
*/
if (!(clone_flags &amp; CLONE_UNTRACED)) {
if (clone_flags &amp; CLONE_VFORK)
trace = PTRACE_EVENT_VFORK;
else if (args-&gt;exit_signal != SIGCHLD)
trace = PTRACE_EVENT_CLONE;
else
trace = PTRACE_EVENT_FORK;
if (likely(!ptrace_event_enabled(current, trace)))
trace = 0;
}
p = copy_process(NULL, trace, NUMA_NO_NODE, args); // 主要逻辑
add_latent_entropy();
if (IS_ERR(p))
return PTR_ERR(p);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
trace_sched_process_fork(current, p);
pid = get_task_pid(p, PIDTYPE_PID);
nr = pid_vnr(pid);
if (clone_flags &amp; CLONE_PARENT_SETTID)
put_user(nr, args-&gt;parent_tid);
if (clone_flags &amp; CLONE_VFORK) {
p-&gt;vfork_done = &amp;vfork;
init_completion(&amp;vfork);
get_task_struct(p);
}
if (IS_ENABLED(CONFIG_LRU_GEN) &amp;&amp; !(clone_flags &amp; CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p-&gt;mm);
task_unlock(p);
}
wake_up_new_task(p); // 加入到就绪队列
/* forking complete and child started to run, tell ptracer */
if (unlikely(trace))
ptrace_event_pid(trace, pid);
if (clone_flags &amp; CLONE_VFORK) {
if (!wait_for_vfork_done(p, &amp;vfork))
ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
}
put_pid(pid);
return nr;
}
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static __latent_entropy struct task_struct *copy_process(
struct pid *pid,
int trace,
int node,
struct kernel_clone_args *args)
{
int pidfd = -1, retval;
struct task_struct *p;
struct multiprocess_signals delayed;
struct file *pidfile = NULL;
const u64 clone_flags = args-&gt;flags;
struct nsproxy *nsp = current-&gt;nsproxy;
// ...
p = dup_task_struct(current, node); // 复制 task_struct
// 复制各类资源
retval = copy_files(clone_flags, p); // files 是新申请的,但 files-&gt;fdt-&gt;fd 里的元素,似乎是将父进程的 fdt-&gt;fd 里的 file 对象增加引用计数,然后直接复制过来
retval = copy_fs(clone_flags, p); // 复制父进程的 root 和 pwd
retval = copy_mm(clone_flags, p);
retval = copy_namespaces(clone_flags, p);
// 申请 pid 并设置进程号。注意:pid 是一个结构体,并不是数字,下面 pid_nr 才是获取数字
pid = alloc_pid(p-&gt;nsproxy-&gt;pid_ns_for_children, args-&gt;set_tid,
args-&gt;set_tid_size); // 基数树
p-&gt;pid = pid_nr(pid);
if (clone_flags &amp; CLONE_THREAD) {
p-&gt;group_leader = current-&gt;group_leader;
p-&gt;tgid = current-&gt;tgid;
} else {
p-&gt;group_leader = p;
p-&gt;tgid = p-&gt;pid;
}
// ...
}</code></pre>
<h2>线程创建</h2>
<p>调用树:<code>pthread_create -&gt; __pthread_create_2_1 -&gt; create_thread -&gt; do_clone -&gt; clone</code>。其中在 <code>create_thread</code> 设置了各种 flag。</p>
<p>系统调用:</p>
<pre><code class="language-c">// file: kernel/fork.c
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, ...)
{
struct kernel_clone_args args = {
.flags = (lower_32_bits(clone_flags) &amp; ~CSIGNAL), // 设置 flags
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (lower_32_bits(clone_flags) &amp; CSIGNAL),
.stack = newsp,
.tls = tls,
};
return kernel_clone(&amp;args); // 和 fork 一样
}</code></pre>
<p>例如,对于 CLONE_FILES 标记而言,新的 task_struct 对象就会直接复制 files 指针,完全共用老进程的内容。这就导致创建进程和创建线程的差异。</p>