公开学习文档

公开学习文档


命名空间

<h2>概述</h2> <h2>分析</h2> <pre><code class="language-c">// file: include/linux/sched.h struct task_struct { /* namespaces */ struct nsproxy *nsproxy; // ... } // file: include/linux/nsproxy.h /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. * * The nsproxy is shared by tasks which share all namespaces. * As soon as a single namespace is cloned or unshared, the * nsproxy is copied. */ struct nsproxy { atomic_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; struct net *net_ns; // 网络命名空间 };</code></pre> <p>继续:</p> <pre><code class="language-c">// file: include/net/net_namespace.h struct net { atomic_t passive; /* To decided when the network * namespace should be freed. */ atomic_t count; /* To decided when the network * namespace should be shut down. */ #ifdef NETNS_REFCNT_DEBUG atomic_t use_count; /* To track references we * destroy on demand */ #endif spinlock_t rules_mod_lock; struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ unsigned int proc_inum; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; #ifdef CONFIG_SYSCTL struct ctl_table_set sysctls; #endif struct sock *rtnl; /* rtnetlink socket */ struct sock *genl_sock; struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; /* core fib_rules */ struct list_head rules_ops; struct net_device *loopback_dev; /* The loopback */ // 每个 net 中都有一个回环设备 struct netns_core core; struct netns_mib mib; struct netns_packet packet; struct netns_unix unx; struct netns_ipv4 ipv4; // 路由表、netfilter 都在这里 #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; #endif #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) struct netns_sctp sctp; #endif #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) struct netns_dccp dccp; #endif #ifdef CONFIG_NETFILTER struct netns_nf nf; struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; #endif struct sock *nfnl; struct sock *nfnl_stash; #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic __rcu *gen; /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif struct netns_ipvs *ipvs; struct sock *diag_nlsk; atomic_t rt_genid; }; // file: include/net/netns/ipv4.h struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; struct ctl_table_header *ipv4_hdr; struct ctl_table_header *route_hdr; struct ctl_table_header *xfrm4_hdr; #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; struct fib_table *fib_local; // 路由表 struct fib_table *fib_main; struct fib_table *fib_default; #endif #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; struct sock **icmp_sk; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; unsigned int tcp_metrics_hash_log; struct sock * __percpu *tcp_sk; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; // netfilter struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; #ifdef CONFIG_SECURITY struct xt_table *iptable_security; #endif struct xt_table *nat_table; #endif int sysctl_icmp_echo_ignore_all; int sysctl_icmp_echo_ignore_broadcasts; int sysctl_icmp_ignore_bogus_error_responses; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; int sysctl_tcp_ecn; kgid_t sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; // 内核参数 atomic_t dev_addr_genid; #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES struct mr_table *mrt; #else struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; #endif #endif }; </code></pre> <h2>默认命名空间初始化</h2> <pre><code class="language-c">// file: init/init_task.c /* Initial task structure */ struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); // file: include/linux/inet_task.h /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ .state = 0, \ .stack = &amp;amp;init_thread_info, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ .static_prio = MAX_PRIO-20, \ .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &amp;amp;init_mm, \ .se = { \ .group_node = LIST_HEAD_INIT(tsk.se.group_node), \ }, \ .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ INIT_CGROUP_SCHED(tsk) \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &amp;amp;tsk, \ .parent = &amp;amp;tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &amp;amp;tsk, \ RCU_POINTER_INITIALIZER(real_cred, &amp;amp;init_cred), \ RCU_POINTER_INITIALIZER(cred, &amp;amp;init_cred), \ .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &amp;amp;init_fs, \ .files = &amp;amp;init_files, \ .signal = &amp;amp;init_signals, \ .sighand = &amp;amp;init_sighand, \ .nsproxy = &amp;amp;init_nsproxy, \ // 命名空间赋值为 init_nsproxy .pending = { \ .list = LIST_HEAD_INIT(tsk.pending.list), \ .signal = {{0}}}, \ .blocked = {{0}}, \ .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ INIT_VTIME(tsk) \ } // file: kernel/nsproxy.c struct nsproxy init_nsproxy = { .count = ATOMIC_INIT(1), .uts_ns = &amp;amp;init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) .ipc_ns = &amp;amp;init_ipc_ns, #endif .mnt_ns = NULL, .pid_ns = &amp;amp;init_pid_ns, #ifdef CONFIG_NET .net_ns = &amp;amp;init_net, // 初始化的网络命名空间 #endif }; </code></pre> <h2>init_net 初始化</h2> <pre><code class="language-c">// file: net/core/net_namespace.c struct net init_net = { .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), }; EXPORT_SYMBOL(init_net); static int __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS net_cachep = kmem_cache_create(&amp;quot;net_namespace&amp;quot;, sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue(&amp;quot;netns&amp;quot;); if (!netns_wq) panic(&amp;quot;Could not create netns workq&amp;quot;); #endif ng = net_alloc_generic(); if (!ng) panic(&amp;quot;Could not allocate generic netns&amp;quot;); rcu_assign_pointer(init_net.gen, ng); mutex_lock(&amp;amp;net_mutex); if (setup_net(&amp;amp;init_net, &amp;amp;init_user_ns)) // 初始化 panic(&amp;quot;Could not setup the initial network namespace&amp;quot;); rtnl_lock(); list_add_tail_rcu(&amp;amp;init_net.list, &amp;amp;net_namespace_list); // 加入到全局变量 net_namespace_list 中 rtnl_unlock(); mutex_unlock(&amp;amp;net_mutex); register_pernet_subsys(&amp;amp;net_ns_ops); // ? return 0; } /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with net_mutex held */ const struct pernet_operations *ops, *saved_ops; int error = 0; LIST_HEAD(net_exit_list); atomic_set(&amp;amp;net-&amp;gt;count, 1); atomic_set(&amp;amp;net-&amp;gt;passive, 1); net-&amp;gt;dev_base_seq = 1; net-&amp;gt;user_ns = user_ns; #ifdef NETNS_REFCNT_DEBUG atomic_set(&amp;amp;net-&amp;gt;use_count, 0); #endif list_for_each_entry(ops, &amp;amp;pernet_list, list) { // 调用每一个子系统的初始化函数。其中 pernet_list 是全局变量 error = ops_init(ops, net); if (error &amp;lt; 0) goto out_undo; } out: return error; out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ list_add(&amp;amp;net-&amp;gt;exit_list, &amp;amp;net_exit_list); saved_ops = ops; list_for_each_entry_continue_reverse(ops, &amp;amp;pernet_list, list) ops_exit_list(ops, &amp;amp;net_exit_list); ops = saved_ops; list_for_each_entry_continue_reverse(ops, &amp;amp;pernet_list, list) ops_free_list(ops, &amp;amp;net_exit_list); rcu_barrier(); goto out; }</code></pre> <h2>子系统 pernet ops</h2> <p>以路由为例:</p> <pre><code class="language-c">// file: net/ipv4/fib_frontend.c static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, }; void __init ip_fib_init(void) { fib_trie_init(); register_pernet_subsys(&amp;amp;fib_net_ops); // 注册到 pernet_list 中 register_netdevice_notifier(&amp;amp;fib_netdev_notifier); register_inetaddr_notifier(&amp;amp;fib_inetaddr_notifier); rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } </code></pre>

页面列表

ITEM_HTML