Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

linux netfilter框架代码分析 #12

Open
leon0625 opened this issue May 10, 2018 · 0 comments
Open

linux netfilter框架代码分析 #12

leon0625 opened this issue May 10, 2018 · 0 comments

Comments

@leon0625
Copy link
Owner

leon0625 commented May 10, 2018

整体框架

  我们常说的在内核里面默认有raw, mangel, filter, nat表,这些表其实在内核的结构体里面有定义,如下:

// net 结构体的定义
struct net {
        //.......
	struct list_head	list;		/* list of network namespaces */
	struct netns_core	core;
	struct netns_mib	mib;
	struct netns_packet	packet;
	struct netns_unix	unx;
	struct netns_ipv4	ipv4;
       //.......
};
// netns_ipv4里面为几张表预留了位置
struct netns_ipv4 {
......
#ifdef CONFIG_NETFILTER
	struct xt_table		*iptable_filter;
	struct xt_table		*iptable_mangle;
	struct xt_table		*iptable_raw;
	struct xt_table		*arptable_filter;
#ifdef CONFIG_SECURITY
	struct xt_table		*iptable_security;
#endif
	struct xt_table		*nat_table;
	struct hlist_head	*nat_bysource;
	unsigned int		nat_htable_size;
	int			nat_vmalloced;
#endif
......
};
#endif

  这几张表在系统初始化的时候会注册上去,每个表里面会注册netfilter框架自己的hook点和回调函数,例如nat表会在NF_INET_PRE_ROUTING这个HOOK点注册nf_nat_in函数。而NF_INET_PRE_ROUTING这个HOOK点的回调在ip_rcv和ip_rcv_finish之间通过NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);来调用。
  用户自己通过iptables命令注册的回调函数一些匹配和动作在哪儿调用呢?
  netfilter自己定义的HOOK回调里面会视情况调用ipt_do_tables,例如在nat表的nf_nat_in回调,当是一条新链接时才会走ipt_do_tables,已经建立的链接就不会走,所以在nat表里面无法做数据包统计这样的操作,而ipt_do_tables就是真正会会执行用户设置的匹配条件和target动作的地方。

表的注册

  以filter表为例,filter表的代码在net/ipv4/netfilter/iptable_filter.c里面,首先iptable_filter_init把自己这个子系统的ops注册在net里面,net初始化(如何初始化还待查清)时会调用下面的回调,这个回调里面把table注册在了ipv4.iptable_filter

static int __net_init iptable_filter_net_init(struct net *net)
{
	struct ipt_replace *repl;

	repl = ipt_alloc_initial_table(&packet_filter);
	if (repl == NULL)
		return -ENOMEM;
	/* Entry 1 is the FORWARD hook */
	((struct ipt_standard *)repl->entries)[1].target.verdict =
		-forward - 1;

        // net结构体建立了与filter表的联系
	net->ipv4.iptable_filter =
		ipt_register_table(net, &packet_filter, repl);
	kfree(repl);
	if (IS_ERR(net->ipv4.iptable_filter))
		return PTR_ERR(net->ipv4.iptable_filter);
	return 0;
}

HOOK点的注册

  在iptale_filter模块初始化的回调里面xt_hook_link(&packet_filter, iptable_filter_hook);来注册HOOK,其中packet_filter是定义的HOOK点,而iptable_filter_hook则是回调,代码如下:

#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
			    (1 << NF_INET_FORWARD) | \
			    (1 << NF_INET_LOCAL_OUT))

static const struct xt_table packet_filter = {
	.name		= "filter",
	.valid_hooks	= FILTER_VALID_HOOKS,
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
	.priority	= NF_IP_PRI_FILTER,
};

static unsigned int
iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
		    const struct net_device *in, const struct net_device *out,
		    int (*okfn)(struct sk_buff *))
{
	const struct net *net;

	if (hook == NF_INET_LOCAL_OUT &&
	    (skb->len < sizeof(struct iphdr) ||
	     ip_hdrlen(skb) < sizeof(struct iphdr)))
		/* root is playing with raw sockets. */
		return NF_ACCEPT;

	net = dev_net((in != NULL) ? in : out);
        // 直接调用ipt_do_table, 所以在filter表添加自己的规则可以确保每个包都能进来
	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
}

struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
{
    unsigned int hook_mask = table->valid_hooks;
    uint8_t i, num_hooks = hweight32(hook_mask);    // HOOK点个数,valid_hooks有几位置1
    uint8_t hooknum;
    struct nf_hook_ops *ops;
    int ret;

    ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
    if (ops == NULL)
        return ERR_PTR(-ENOMEM);

    //初始化ops结构
    for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
         hook_mask >>= 1, ++hooknum) {
        if (!(hook_mask & 1))
            continue;
        ops[i].hook     = fn;
        ops[i].owner    = table->me;
        ops[i].pf       = table->af;
        ops[i].hooknum  = hooknum;
        ops[i].priority = table->priority;
        ++i;
    }

    //注册
    ret = nf_register_hooks(ops, num_hooks);
    if (ret < 0) {
        kfree(ops);
        return ERR_PTR(ret);
    }

    return ops;
}

  nf_register_hooks里面会多次调用nf_register_hook来注册多个hook,nf_register_hook源码如下:

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    // 遍历nf_hooks全局数组
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    // 加入链表
    list_add_rcu(&reg->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
    return 0;
}

  nf_hooks是一个链表数组,定义为struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS],即每一个元素代表着某个协议的某个钩子点的注册钩子链表。

HOOK点的调用

  以IPV4的PREROUTING钩子点为例,前面讲过,钩子调用是通过NF_HOOK这个宏实现,类似的还有一个宏叫NF_HOOK_COND。下面直接看NF_HOO实现的主要函数nf_hook_slow,如下:

/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
         struct net_device *indev,
         struct net_device *outdev,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
    struct list_head *elem;
    unsigned int verdict;
    int ret = 0;

    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();

    // 找到了对应协议对应钩子点的链表头
    elem = &nf_hooks[pf][hook];
next_hook:
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                 outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        ret = 1;    // 继续调ip_rcv_finish
    } else if (verdict == NF_DROP) {
        kfree_skb(skb);
        ret = -EPERM;  // ip_rcv之后就不掉ip_rcv_finish了
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                  verdict >> NF_VERDICT_BITS))
            goto next_hook;
    }
    rcu_read_unlock();
    return ret;
}

unsigned int nf_iterate(struct list_head *head,
            struct sk_buff *skb,
            unsigned int hook,
            const struct net_device *indev,
            const struct net_device *outdev,
            struct list_head **i,
            int (*okfn)(struct sk_buff *),
            int hook_thresh)
{
    unsigned int verdict;

    /*
     * The caller must not block between calls to this
     * function because of risk of continuing from deleted element.
     */
    list_for_each_continue_rcu(*i, head) {
        struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

        if (hook_thresh > elem->priority)
            continue;

        /* Optimization: we don't need to hold module
           reference here, since function can't sleep. --RR */
        // 执行回调
        verdict = elem->hook(hook, skb, indev, outdev, okfn);
        // 这里可以看出来如果一个表里accept,会继续走下一个表
        if (verdict != NF_ACCEPT) {
            if (verdict != NF_REPEAT)
                return verdict;
            *i = (*i)->prev;
        }
    }
    return NF_ACCEPT;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant