最大文件描述符限制
<h2>概述</h2>
<p>1、系统整体:fs.file-max 表示整体系统可打开的最大文件数,但不限制 root 用户
2、单进程: fs.nr_open 和 soft nofile 都是针对单个进程进行限制。fs.nr_open 是系统级,所有进程都受限制;而 soft nofile 是用户级,可以自由配置</p>
<p><img src="https://www.showdoc.com.cn/server/api/attachment/visitFile?sign=18e915f6a1aabe8091a36cd841769651&amp;file=file.png" alt="" /></p>
<p>3、如果增大 soft nofile,那么 hard nofile 也需要一起调整,实际生效值会取 2 者中的最小值
4、hard nofile 不要超过 nr_open
5、涉及的配置文件:<code>/etc/security/limits.conf</code> 和 <code>/etc/sysctl.conf</code></p>
<h2>分析</h2>
<p>以创建 sokcet 时调用 <code>sock_map_fd</code> 为例:</p>
<pre><code class="language-c">// file: net/socket.c
static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags); // 2 处限制
if (unlikely(fd &lt; 0))
return fd;
newfile = sock_alloc_file(sock, flags, NULL);
if (likely(!IS_ERR(newfile))) {
fd_install(fd, newfile);
return fd;
}
put_unused_fd(fd);
return PTR_ERR(newfile);
}</code></pre>
<pre><code class="language-c">// file: fs/file.c
int get_unused_fd_flags(unsigned flags)
{
return __alloc_fd(current-&gt;files, 0, rlimit(RLIMIT_NOFILE), flags); // RLIMIT_NOFILE = 7
}
EXPORT_SYMBOL(get_unused_fd_flags);
static inline unsigned long rlimit(unsigned int limit)
{
return task_rlimit(current, limit);
}
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
{
return ACCESS_ONCE(tsk-&gt;signal-&gt;rlim[limit].rlim_cur); // rlim_cur 对应 limits.conf 中 soft nofile。拓展:rlim_max 对应 hard nofile。
}</code></pre>
<p>继续看 <code>__alloc_fd</code>:</p>
<pre><code class="language-c">// file: fs/file.c
/*
* allocate a file descriptor, mark it busy.
*/
int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags)
{
unsigned int fd;
int error;
struct fdtable *fdt;
spin_lock(&amp;files-&gt;file_lock);
repeat:
fdt = files_fdtable(files);
fd = start;
if (fd &lt; files-&gt;next_fd)
fd = files-&gt;next_fd;
if (fd &lt; fdt-&gt;max_fds)
fd = find_next_zero_bit(fdt-&gt;open_fds, fdt-&gt;max_fds, fd);
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
error = -EMFILE;
if (fd &gt;= end) // 超过限制,则出错。即 rlimit(RLIMIT_NOFILE),也即 limits.conf 中 soft nofile
goto out;
error = expand_files(files, fd); // 有限制 sysctl_nr_open,即 fs.nr_open
if (error &lt; 0)
goto out;
/*
* If we needed to expand the fs array we
* might have blocked - try again.
*/
if (error)
goto repeat;
if (start &lt;= files-&gt;next_fd)
files-&gt;next_fd = fd + 1;
__set_open_fd(fd, fdt);
if (flags &amp; O_CLOEXEC)
__set_close_on_exec(fd, fdt);
else
__clear_close_on_exec(fd, fdt);
error = fd;
#if 1
/* Sanity check */
if (rcu_dereference_raw(fdt-&gt;fd[fd]) != NULL) {
printk(KERN_WARNING &quot;alloc_fd: slot %d not NULL!\n&quot;, fd);
rcu_assign_pointer(fdt-&gt;fd[fd], NULL);
}
#endif
out:
spin_unlock(&amp;files-&gt;file_lock);
return error;
}
/*
* Expand files.
* This function will expand the file structures, if the requested size exceeds
* the current capacity and there is room for expansion.
* Return &lt;0 error code on error; 0 when nothing done; 1 when files were
* expanded and execution may have blocked.
* The files-&gt;file_lock should be held on entry, and will be held on exit.
*/
static int expand_files(struct files_struct *files, int nr)
{
struct fdtable *fdt;
fdt = files_fdtable(files);
/* Do we need to expand? */
if (nr &lt; fdt-&gt;max_fds)
return 0;
/* Can we expand? */
if (nr &gt;= sysctl_nr_open) // 检查限制,即 fs.nr_open
return -EMFILE;
/* All good, so we try */
return expand_fdtable(files, nr);
}</code></pre>
<p>再看 <code>sock_alloc_file</code>:</p>
<pre><code class="language-c">// file: net/socket.c
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
file = alloc_file(&amp;path, FMODE_READ | FMODE_WRITE,
&amp;socket_file_ops);
// ...
}
// file: fs/file_table.c
struct file *alloc_file(struct path *path, fmode_t mode,
const struct file_operations *fop)
{
struct file *file;
file = get_empty_filp();
if (IS_ERR(file))
return file;
// ...
}
struct file *get_empty_filp(void)
{
const struct cred *cred = current_cred();
static long old_max;
struct file *f;
int error;
/*
* Privileged users can go above max_files
*/
if (get_nr_files() &gt;= files_stat.max_files &amp;&amp; !capable(CAP_SYS_ADMIN)) { // files_stat.max_files 即 fs.file-max,表示整个系统可打开的最大文件数。注意 root 不受限制
/*
* percpu_counters are inaccurate. Do an expensive check before
* we go and fail.
*/
if (percpu_counter_sum_positive(&amp;nr_files) &gt;= files_stat.max_files)
goto over;
}
// ...</code></pre>