We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
我们常说的在内核里面默认有raw, mangel, filter, nat表,这些表其实在内核的结构体里面有定义,如下:
// net 结构体的定义 struct net { //....... struct list_head list; /* list of network namespaces */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; struct netns_unix unx; struct netns_ipv4 ipv4; //....... }; // netns_ipv4里面为几张表预留了位置 struct netns_ipv4 { ...... #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; #ifdef CONFIG_SECURITY struct xt_table *iptable_security; #endif struct xt_table *nat_table; struct hlist_head *nat_bysource; unsigned int nat_htable_size; int nat_vmalloced; #endif ...... }; #endif
这几张表在系统初始化的时候会注册上去,每个表里面会注册netfilter框架自己的hook点和回调函数,例如nat表会在NF_INET_PRE_ROUTING这个HOOK点注册nf_nat_in函数。而NF_INET_PRE_ROUTING这个HOOK点的回调在ip_rcv和ip_rcv_finish之间通过NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);来调用。 用户自己通过iptables命令注册的回调函数一些匹配和动作在哪儿调用呢? netfilter自己定义的HOOK回调里面会视情况调用ipt_do_tables,例如在nat表的nf_nat_in回调,当是一条新链接时才会走ipt_do_tables,已经建立的链接就不会走,所以在nat表里面无法做数据包统计这样的操作,而ipt_do_tables就是真正会会执行用户设置的匹配条件和target动作的地方。
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);
以filter表为例,filter表的代码在net/ipv4/netfilter/iptable_filter.c里面,首先iptable_filter_init把自己这个子系统的ops注册在net里面,net初始化(如何初始化还待查清)时会调用下面的回调,这个回调里面把table注册在了ipv4.iptable_filter
static int __net_init iptable_filter_net_init(struct net *net) { struct ipt_replace *repl; repl = ipt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = -forward - 1; // net结构体建立了与filter表的联系 net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); kfree(repl); if (IS_ERR(net->ipv4.iptable_filter)) return PTR_ERR(net->ipv4.iptable_filter); return 0; }
在iptale_filter模块初始化的回调里面xt_hook_link(&packet_filter, iptable_filter_hook);来注册HOOK,其中packet_filter是定义的HOOK点,而iptable_filter_hook则是回调,代码如下:
xt_hook_link(&packet_filter, iptable_filter_hook);
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_FILTER, }; static unsigned int iptable_filter_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; if (hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); // 直接调用ipt_do_table, 所以在filter表添加自己的规则可以确保每个包都能进来 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); } struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) { unsigned int hook_mask = table->valid_hooks; uint8_t i, num_hooks = hweight32(hook_mask); // HOOK点个数,valid_hooks有几位置1 uint8_t hooknum; struct nf_hook_ops *ops; int ret; ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); //初始化ops结构 for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0; hook_mask >>= 1, ++hooknum) { if (!(hook_mask & 1)) continue; ops[i].hook = fn; ops[i].owner = table->me; ops[i].pf = table->af; ops[i].hooknum = hooknum; ops[i].priority = table->priority; ++i; } //注册 ret = nf_register_hooks(ops, num_hooks); if (ret < 0) { kfree(ops); return ERR_PTR(ret); } return ops; }
nf_register_hooks里面会多次调用nf_register_hook来注册多个hook,nf_register_hook源码如下:
int nf_register_hook(struct nf_hook_ops *reg) { struct nf_hook_ops *elem; int err; err = mutex_lock_interruptible(&nf_hook_mutex); if (err < 0) return err; // 遍历nf_hooks全局数组 list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { if (reg->priority < elem->priority) break; } // 加入链表 list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); return 0; }
nf_hooks是一个链表数组,定义为struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS],即每一个元素代表着某个协议的某个钩子点的注册钩子链表。
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]
以IPV4的PREROUTING钩子点为例,前面讲过,钩子调用是通过NF_HOOK这个宏实现,类似的还有一个宏叫NF_HOOK_COND。下面直接看NF_HOO实现的主要函数nf_hook_slow,如下:
/* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int hook_thresh) { struct list_head *elem; unsigned int verdict; int ret = 0; /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); // 找到了对应协议对应钩子点的链表头 elem = &nf_hooks[pf][hook]; next_hook: verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; // 继续调ip_rcv_finish } else if (verdict == NF_DROP) { kfree_skb(skb); ret = -EPERM; // ip_rcv之后就不掉ip_rcv_finish了 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } rcu_read_unlock(); return ret; } unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, int (*okfn)(struct sk_buff *), int hook_thresh) { unsigned int verdict; /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ list_for_each_continue_rcu(*i, head) { struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; if (hook_thresh > elem->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ // 执行回调 verdict = elem->hook(hook, skb, indev, outdev, okfn); // 这里可以看出来如果一个表里accept,会继续走下一个表 if (verdict != NF_ACCEPT) { if (verdict != NF_REPEAT) return verdict; *i = (*i)->prev; } } return NF_ACCEPT; }
The text was updated successfully, but these errors were encountered:
No branches or pull requests
整体框架
我们常说的在内核里面默认有raw, mangel, filter, nat表,这些表其实在内核的结构体里面有定义,如下:
这几张表在系统初始化的时候会注册上去,每个表里面会注册netfilter框架自己的hook点和回调函数,例如nat表会在NF_INET_PRE_ROUTING这个HOOK点注册nf_nat_in函数。而NF_INET_PRE_ROUTING这个HOOK点的回调在ip_rcv和ip_rcv_finish之间通过
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);
来调用。用户自己通过iptables命令注册的回调函数一些匹配和动作在哪儿调用呢?
netfilter自己定义的HOOK回调里面会视情况调用ipt_do_tables,例如在nat表的nf_nat_in回调,当是一条新链接时才会走ipt_do_tables,已经建立的链接就不会走,所以在nat表里面无法做数据包统计这样的操作,而ipt_do_tables就是真正会会执行用户设置的匹配条件和target动作的地方。
表的注册
以filter表为例,filter表的代码在net/ipv4/netfilter/iptable_filter.c里面,首先iptable_filter_init把自己这个子系统的ops注册在net里面,net初始化(如何初始化还待查清)时会调用下面的回调,这个回调里面把table注册在了ipv4.iptable_filter
HOOK点的注册
在iptale_filter模块初始化的回调里面
xt_hook_link(&packet_filter, iptable_filter_hook);
来注册HOOK,其中packet_filter是定义的HOOK点,而iptable_filter_hook则是回调,代码如下:nf_register_hooks里面会多次调用nf_register_hook来注册多个hook,nf_register_hook源码如下:
nf_hooks是一个链表数组,定义为
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]
,即每一个元素代表着某个协议的某个钩子点的注册钩子链表。HOOK点的调用
以IPV4的PREROUTING钩子点为例,前面讲过,钩子调用是通过NF_HOOK这个宏实现,类似的还有一个宏叫NF_HOOK_COND。下面直接看NF_HOO实现的主要函数nf_hook_slow,如下:
The text was updated successfully, but these errors were encountered: