epoll-创建
<h2>概述</h2>
<p>1、申请 eventpoll 对象 ep,并与 fd 关联起来,设置私有数据为 ep</p>
<h2>参考文档</h2>
<p><a href="https://zhuanlan.zhihu.com/p/147549069">https://zhuanlan.zhihu.com/p/147549069</a>
<a href="https://cloud.tencent.com/developer/article/1401558">https://cloud.tencent.com/developer/article/1401558</a></p>
<h2>分析</h2>
<p>应用层通常编写模型:</p>
<pre><code class="language-c">cfd1 = accept(xxx);
cfd2 = accept(xxx);
efd = epoll_create(max_size);
epoll_ctl(efd, EPOLL_CTL_ADD, cfd1, ...);
epoll_ctl(efd, EPOLL_CTL_ADD, cfd2, ...);
epoll_wait(efd, ...);</code></pre>
<p><code>epoll_create</code> 系统调用:</p>
<pre><code class="language-c">// file: fs/eventpoll.c
/*
* Open an eventpoll file descriptor.
*/
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
int error, fd;
struct eventpoll *ep = NULL;
struct file *file;
/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
if (flags &amp; ~EPOLL_CLOEXEC)
return -EINVAL;
/*
* Create the internal data structure (&quot;struct eventpoll&quot;).
*/
error = ep_alloc(&amp;ep); // eventpoll 对象
if (error &lt; 0)
return error;
/*
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
*/
fd = get_unused_fd_flags(O_RDWR | (flags &amp; O_CLOEXEC)); // 在当前进程找到一个未使用的 fd
if (fd &lt; 0) {
error = fd;
goto out_free_ep;
}
// 在匿名 inode 文件系统中分配一个 inode,并得到其 file 结构体
// 且 file-&gt;f_op = &amp;eventpoll_fops
// 且 file-&gt;private_data = ep;
file = anon_inode_getfile(&quot;[eventpoll]&quot;, &amp;eventpoll_fops, ep,
O_RDWR | (flags &amp; O_CLOEXEC)); // 这里设置了私有数据为 ep
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto out_free_fd;
}
ep-&gt;file = file;
fd_install(fd, file); // 关联 fd 和 file
return fd;
out_free_fd:
put_unused_fd(fd);
out_free_ep:
ep_free(ep);
return error;
}
/*
* This structure is stored inside the &quot;private_data&quot; member of the file
* structure and represents the main data structure for the eventpoll
* interface.
*/
struct eventpoll {
/* Protect the access to this structure */
spinlock_t lock;
/*
* This mutex is used to ensure that files are not removed
* while epoll is using them. This is held during the event
* collection loop, the file cleanup path, the epoll file exit
* code and the ctl operations.
*/
struct mutex mtx;
/* Wait queue used by sys_epoll_wait() */
wait_queue_head_t wq; // epoll_wait 使用的等待队列
/* Wait queue used by file-&gt;poll() */
wait_queue_head_t poll_wait;
/* List of ready file descriptors */
struct list_head rdllist; // 就绪描述符队列
/* RB tree root used to store monitored fd structs */
struct rb_root rbr; // 红黑树,管理所有添加进来的 socket 连接
/*
* This is a single linked list that chains all the &quot;struct epitem&quot; that
* happened while transferring ready events to userspace w/out
* holding -&gt;lock.
*/
struct epitem *ovflist;
/* wakeup_source used when ep_scan_ready_list is running */
struct wakeup_source *ws;
/* The user that created the eventpoll descriptor */
struct user_struct *user;
struct file *file;
/* used to optimize loop detection check */
int visited;
struct list_head visited_list_link;
};
static int ep_alloc(struct eventpoll **pep)
{
int error;
struct user_struct *user;
struct eventpoll *ep;
user = get_current_user();
error = -ENOMEM;
ep = kzalloc(sizeof(*ep), GFP_KERNEL);
if (unlikely(!ep))
goto free_uid;
spin_lock_init(&amp;ep-&gt;lock);
mutex_init(&amp;ep-&gt;mtx);
init_waitqueue_head(&amp;ep-&gt;wq);
init_waitqueue_head(&amp;ep-&gt;poll_wait);
INIT_LIST_HEAD(&amp;ep-&gt;rdllist);
ep-&gt;rbr = RB_ROOT;
ep-&gt;ovflist = EP_UNACTIVE_PTR;
ep-&gt;user = user;
*pep = ep;
return 0;
free_uid:
free_uid(user);
return error;
}</code></pre>