socket-file上的f_op
<h2>问题</h2>
<p>socket 对应的 struct file* 上的 f_op->poll 函数是什么呢?</p>
<h2>分析</h2>
<p>需要从 socket 创建的时候说起:</p>
<pre><code class="language-c">// file: net/socket.c
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
// ...
retval = sock_create(family, type, protocol, &amp;sock);
if (retval &lt; 0)
goto out;
retval = sock_map_fd(sock, flags &amp; (O_CLOEXEC | O_NONBLOCK)); // 将 socket 对象转换为 fd,其中 struct file* 中的 private_data 就是 struct socket *
if (retval &lt; 0)
goto out_release;
// ...
}
static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
if (unlikely(fd &lt; 0))
return fd;
newfile = sock_alloc_file(sock, flags, NULL); // 继续
if (likely(!IS_ERR(newfile))) {
fd_install(fd, newfile);
return fd;
}
put_unused_fd(fd);
return PTR_ERR(newfile);
}
/*
* Obtains the first available file descriptor and sets it up for use.
*
* These functions create file structures and maps them to fd space
* of the current process. On success it returns file descriptor
* and file struct implicitly stored in sock-&gt;file.
* Note that another thread may close file descriptor before we return
* from this function. We use the fact that now we do not refer
* to socket after mapping. If one day we will need it, this
* function will increment ref. count on file by 1.
*
* In any case returned fd MAY BE not valid!
* This race condition is unavoidable
* with shared fd spaces, we cannot solve it inside kernel,
* but we take care of internal coherence yet.
*/
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct qstr name = { .name = &quot;&quot; };
struct path path;
struct file *file;
if (dname) {
name.name = dname;
name.len = strlen(name.name);
} else if (sock-&gt;sk) {
name.name = sock-&gt;sk-&gt;sk_prot_creator-&gt;name;
name.len = strlen(name.name);
}
path.dentry = d_alloc_pseudo(sock_mnt-&gt;mnt_sb, &amp;name);
if (unlikely(!path.dentry))
return ERR_PTR(-ENOMEM);
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
SOCK_INODE(sock)-&gt;i_fop = &amp;socket_file_ops;
file = alloc_file(&amp;path, FMODE_READ | FMODE_WRITE,
&amp;socket_file_ops); // 注意这里:会设置 file-&gt;f_op = &amp;socket_file_ops
if (unlikely(IS_ERR(file))) {
/* drop dentry, keep inode */
ihold(path.dentry-&gt;d_inode);
path_put(&amp;path);
return file;
}
sock-&gt;file = file;
file-&gt;f_flags = O_RDWR | (flags &amp; O_NONBLOCK);
file-&gt;private_data = sock; // 私有数据,和 epoll 类似
return file;
}
EXPORT_SYMBOL(sock_alloc_file);
// file: fs/file_table.c
/**
* alloc_file - allocate and initialize a 'struct file'
* @mnt: the vfsmount on which the file will reside
* @dentry: the dentry representing the new file
* @mode: the mode with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*
* Use this instead of get_empty_filp() to get a new
* 'struct file'. Do so because of the same initialization
* pitfalls reasons listed for init_file(). This is a
* preferred interface to using init_file().
*
* If all the callers of init_file() are eliminated, its
* code should be moved into this function.
*/
struct file *alloc_file(struct path *path, fmode_t mode,
const struct file_operations *fop)
{
struct file *file;
file = get_empty_filp();
if (IS_ERR(file))
return file;
file-&gt;f_path = *path;
file-&gt;f_inode = path-&gt;dentry-&gt;d_inode;
file-&gt;f_mapping = path-&gt;dentry-&gt;d_inode-&gt;i_mapping;
file-&gt;f_mode = mode;
file-&gt;f_op = fop; // 这个字段
/*
* These mounts don't really matter in practice
* for r/o bind mounts. They aren't userspace-
* visible. We do this for consistency, and so
* that we can do debugging checks at __fput()
*/
if ((mode &amp; FMODE_WRITE) &amp;&amp; !special_file(path-&gt;dentry-&gt;d_inode-&gt;i_mode)) {
file_take_write(file);
WARN_ON(mnt_clone_write(path-&gt;mnt));
}
if ((mode &amp; (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path-&gt;dentry-&gt;d_inode);
return file;
}
EXPORT_SYMBOL(alloc_file);
</code></pre>
<p>而 <code>socket_file_ops</code> 定义如下:</p>
<pre><code class="language-c">// file: net/socket.c
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll, // 这里会在 epoll 里用到
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};</code></pre>
<p>可以看到,那个 <code>poll</code> 函数,就是 <code>sock_poll</code> 函数。</p>