linux netfilter框架代码分析 #12

leon0625 · 2018-05-10T13:43:01Z

整体框架

　　我们常说的在内核里面默认有raw, mangel, filter, nat表，这些表其实在内核的结构体里面有定义，如下：

// net 结构体的定义
struct net {
        //.......
	struct list_head	list;		/* list of network namespaces */
	struct netns_core	core;
	struct netns_mib	mib;
	struct netns_packet	packet;
	struct netns_unix	unx;
	struct netns_ipv4	ipv4;
       //.......
};
// netns_ipv4里面为几张表预留了位置
struct netns_ipv4 {
......
#ifdef CONFIG_NETFILTER
	struct xt_table		*iptable_filter;
	struct xt_table		*iptable_mangle;
	struct xt_table		*iptable_raw;
	struct xt_table		*arptable_filter;
#ifdef CONFIG_SECURITY
	struct xt_table		*iptable_security;
#endif
	struct xt_table		*nat_table;
	struct hlist_head	*nat_bysource;
	unsigned int		nat_htable_size;
	int			nat_vmalloced;
#endif
......
};
#endif

　　这几张表在系统初始化的时候会注册上去，每个表里面会注册netfilter框架自己的hook点和回调函数，例如nat表会在NF_INET_PRE_ROUTING这个HOOK点注册nf_nat_in函数。而NF_INET_PRE_ROUTING这个HOOK点的回调在ip_rcv和ip_rcv_finish之间通过NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);来调用。
　　用户自己通过iptables命令注册的回调函数一些匹配和动作在哪儿调用呢？
　　netfilter自己定义的HOOK回调里面会视情况调用ipt_do_tables，例如在nat表的nf_nat_in回调，当是一条新链接时才会走ipt_do_tables，已经建立的链接就不会走，所以在nat表里面无法做数据包统计这样的操作，而ipt_do_tables就是真正会会执行用户设置的匹配条件和target动作的地方。

表的注册

　　以filter表为例，filter表的代码在net/ipv4/netfilter/iptable_filter.c里面，首先iptable_filter_init把自己这个子系统的ops注册在net里面，net初始化（如何初始化还待查清）时会调用下面的回调，这个回调里面把table注册在了ipv4.iptable_filter

static int __net_init iptable_filter_net_init(struct net *net)
{
	struct ipt_replace *repl;

	repl = ipt_alloc_initial_table(&packet_filter);
	if (repl == NULL)
		return -ENOMEM;
	/* Entry 1 is the FORWARD hook */
	((struct ipt_standard *)repl->entries)[1].target.verdict =
		-forward - 1;

        // net结构体建立了与filter表的联系
	net->ipv4.iptable_filter =
		ipt_register_table(net, &packet_filter, repl);
	kfree(repl);
	if (IS_ERR(net->ipv4.iptable_filter))
		return PTR_ERR(net->ipv4.iptable_filter);
	return 0;
}

HOOK点的注册

　　在iptale_filter模块初始化的回调里面xt_hook_link(&packet_filter, iptable_filter_hook);来注册HOOK，其中packet_filter是定义的HOOK点，而iptable_filter_hook则是回调，代码如下：

#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
			    (1 << NF_INET_FORWARD) | \
			    (1 << NF_INET_LOCAL_OUT))

static const struct xt_table packet_filter = {
	.name		= "filter",
	.valid_hooks	= FILTER_VALID_HOOKS,
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
	.priority	= NF_IP_PRI_FILTER,
};

static unsigned int
iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
		    const struct net_device *in, const struct net_device *out,
		    int (*okfn)(struct sk_buff *))
{
	const struct net *net;

	if (hook == NF_INET_LOCAL_OUT &&
	    (skb->len < sizeof(struct iphdr) ||
	     ip_hdrlen(skb) < sizeof(struct iphdr)))
		/* root is playing with raw sockets. */
		return NF_ACCEPT;

	net = dev_net((in != NULL) ? in : out);
        // 直接调用ipt_do_table, 所以在filter表添加自己的规则可以确保每个包都能进来
	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
}

struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
{
    unsigned int hook_mask = table->valid_hooks;
    uint8_t i, num_hooks = hweight32(hook_mask);    // HOOK点个数，valid_hooks有几位置1
    uint8_t hooknum;
    struct nf_hook_ops *ops;
    int ret;

    ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
    if (ops == NULL)
        return ERR_PTR(-ENOMEM);

    //初始化ops结构
    for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
         hook_mask >>= 1, ++hooknum) {
        if (!(hook_mask & 1))
            continue;
        ops[i].hook     = fn;
        ops[i].owner    = table->me;
        ops[i].pf       = table->af;
        ops[i].hooknum  = hooknum;
        ops[i].priority = table->priority;
        ++i;
    }

    //注册
    ret = nf_register_hooks(ops, num_hooks);
    if (ret < 0) {
        kfree(ops);
        return ERR_PTR(ret);
    }

    return ops;
}

　　nf_register_hooks里面会多次调用nf_register_hook来注册多个hook，nf_register_hook源码如下：

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    // 遍历nf_hooks全局数组
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    // 加入链表
    list_add_rcu(&reg->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
    return 0;
}

　　nf_hooks是一个链表数组，定义为struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]，即每一个元素代表着某个协议的某个钩子点的注册钩子链表。

HOOK点的调用

　　以IPV4的PREROUTING钩子点为例，前面讲过，钩子调用是通过NF_HOOK这个宏实现，类似的还有一个宏叫NF_HOOK_COND。下面直接看NF_HOO实现的主要函数nf_hook_slow，如下：

/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
         struct net_device *indev,
         struct net_device *outdev,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
    struct list_head *elem;
    unsigned int verdict;
    int ret = 0;

    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();

    // 找到了对应协议对应钩子点的链表头
    elem = &nf_hooks[pf][hook];
next_hook:
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                 outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        ret = 1;    // 继续调ip_rcv_finish
    } else if (verdict == NF_DROP) {
        kfree_skb(skb);
        ret = -EPERM;  // ip_rcv之后就不掉ip_rcv_finish了
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                  verdict >> NF_VERDICT_BITS))
            goto next_hook;
    }
    rcu_read_unlock();
    return ret;
}

unsigned int nf_iterate(struct list_head *head,
            struct sk_buff *skb,
            unsigned int hook,
            const struct net_device *indev,
            const struct net_device *outdev,
            struct list_head **i,
            int (*okfn)(struct sk_buff *),
            int hook_thresh)
{
    unsigned int verdict;

    /*
     * The caller must not block between calls to this
     * function because of risk of continuing from deleted element.
     */
    list_for_each_continue_rcu(*i, head) {
        struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

        if (hook_thresh > elem->priority)
            continue;

        /* Optimization: we don't need to hold module
           reference here, since function can't sleep. --RR */
        // 执行回调
        verdict = elem->hook(hook, skb, indev, outdev, okfn);
        // 这里可以看出来如果一个表里accept，会继续走下一个表
        if (verdict != NF_ACCEPT) {
            if (verdict != NF_REPEAT)
                return verdict;
            *i = (*i)->prev;
        }
    }
    return NF_ACCEPT;
}

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

linux netfilter框架代码分析 #12

linux netfilter框架代码分析 #12

leon0625 commented May 10, 2018 •

edited

Loading

linux netfilter框架代码分析 #12

linux netfilter框架代码分析 #12

Comments

leon0625 commented May 10, 2018 • edited Loading

整体框架

表的注册

HOOK点的注册

HOOK点的调用

leon0625 commented May 10, 2018 •

edited

Loading