iptable_nat_table_init函数通过调用ipt_register_table完成NAT表注册和钩子函数注册的功能;该流程与iptable_filter的函数调用的函数一致,此处不再重复分析,详情请移步
static int __net_init iptable_nat_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
/\* nat表已经初始化过 \*/
if (net->ipv4.nat\_table)
return ;
/\* 分配初始化表,用于下面的注册 \*/
repl = ipt\_alloc\_initial\_table(&nf\_nat\_ipv4\_table);
if (repl == NULL)
return -ENOMEM;
/\* 表注册,钩子函数注册 \*/
ret = ipt\_register\_table(net, &nf\_nat\_ipv4\_table, repl,
nf\_nat\_ipv4\_ops, &net->ipv4.nat\_table);
kfree(repl);
return ret;
}
nf_nat_ipv4_ops是NAT相关钩子函数的数组,其调用顺序和钩子点见下面注释;其中filter工作在DNAT和SNAT之间;
这几个钩子函数都会调用nf_nat_ipv4_fn来完成NAT转换,本部分最后统一分析该函数;
/* 钩子函数数组 */
/* 顺序 DNAT->filter->SNAT */
/* 输入本机 PRE_ROUTING(DNAT)->LOCAL_IN(SNAT) */
/* 转发 PRE_ROUTING(DNAT)->POST_ROUTING(SNAT) */
/* 本机输出 LOCAL_OUT(DNAT)->POST_ROUTING(SNAT) */
static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST, /* DNAT */
},
/* After packet filtering, change source */
{
.hook = iptable_nat_ipv4_out,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC, /* SNAT */
},
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST, /* DNAT */
},
/* After packet filtering, change source */
{
.hook = iptable_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC, /* SNAT */
},
};
函数工作在PRE_ROUTING钩子点,进行DNAT转换;
/* PRE_ROUTING,DNAT */
static unsigned int iptable_nat_ipv4_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain);
}
nf_nat_ipv4_in函数在进行DNAT转换之前记录了目的地址,在进行转换之后,如果目的地址发生了改变,则需要释放skb中的路由缓存;NAT转换过程调用nf_nat_ipv4_fn完成,步骤见下面的该函数分析;
/* PRE_ROUTING, DNAT */
unsigned int
nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
{
unsigned int ret;
/* 获取目的地址 */
__be32 daddr = ip_hdr(skb)->daddr;
/\* DNAT转换 \*/
ret = nf\_nat\_ipv4\_fn(priv, skb, state, do\_chain);
/\* 转换之后,目的地址发生变化,释放路由缓存 \*/
if (ret != NF\_DROP && ret != NF\_STOLEN &&
daddr != ip\_hdr(skb)->daddr)
skb\_dst\_drop(skb);
return ret;
}
函数工作在LOCAL_IN钩子点,进行SNAT转换;NAT转换过程调用nf_nat_ipv4_fn完成,步骤见下面的该函数分析;
/* LOCAL_IN,SNAT */
static unsigned int iptable_nat_ipv4_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain);
}
函数工作在LOCAL_OUT钩子点,进行DNAT转换;
/* LOCAL_OUT,DNAT */
static unsigned int iptable_nat_ipv4_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
}
nf_nat_ipv4_local_fn函数在进行DNAT转换之后,如果地址发生变化,则需要重新进行路由查;NAT转换过程调用nf_nat_ipv4_fn完成,步骤见下面的该函数分析;
unsigned int
nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
int err;
/\* root is playing with raw sockets. \*/
if (skb->len < sizeof(struct iphdr) ||
ip\_hdrlen(skb) < sizeof(struct iphdr))
return NF\_ACCEPT;
/\* DNAT转换 \*/
ret = nf\_nat\_ipv4\_fn(priv, skb, state, do\_chain);
/\* 转换成功 \*/
if (ret != NF\_DROP && ret != NF\_STOLEN &&
(ct = nf\_ct\_get(skb, &ctinfo)) != NULL) {
enum ip\_conntrack\_dir dir = CTINFO2DIR(ctinfo);
/\* ip地址发生变化 \*/
if (ct->tuplehash\[dir\].tuple.dst.u3.ip !=
ct->tuplehash\[!dir\].tuple.src.u3.ip) {
/\* 重新查路由 \*/
err = ip\_route\_me\_harder(state->net, skb, RTN\_UNSPEC);
if (err < )
ret = NF\_DROP\_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
err = nf_xfrm_me_harder(state->net, skb, AF_INET);
if (err < )
ret = NF_DROP_ERR(err);
}
#endif
}
return ret;
}
函数工作在POST_ROUTING钩子点,进行SNAT转换;
/* POST_ROUTING,SNAT */
static unsigned int iptable_nat_ipv4_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain);
}
unsigned int
nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
int err;
#endif
unsigned int ret;
/\* root is playing with raw sockets. \*/
if (skb->len < sizeof(struct iphdr) ||
ip\_hdrlen(skb) < sizeof(struct iphdr))
return NF\_ACCEPT;
/\* SNAT转换 \*/
ret = nf\_nat\_ipv4\_fn(priv, skb, state, do\_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if ((ct->tuplehash\[dir\].tuple.src.u3.ip !=
ct->tuplehash\[!dir\].tuple.dst.u3.ip) ||
(ct->tuplehash\[dir\].tuple.dst.protonum != IPPROTO\_ICMP &&
ct->tuplehash\[dir\].tuple.src.u.all !=
ct->tuplehash\[!dir\].tuple.dst.u.all)) {
err = nf\_xfrm\_me\_harder(state->net, skb, AF\_INET);
if (err < )
ret = NF\_DROP\_ERR(err);
}
}
#endif
return ret;
}
nf_nat_ipv4_fn完成具体的SNAT或者DNAT的转换流程,上面的四个钩子函数都会调用该函数;
unsigned int
nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
/* maniptype == SRC for postrouting. */
/* 获取是进行DNAT还是SNAT,其中PRE_ROUTING和LOCAL_OUT进行DNAT,LOCAL_IN和POST_ROUTING进行SNAT */
enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
/\* 获取skb关联的连接跟踪sf\_conn \*/
ct = nf\_ct\_get(skb, &ctinfo);
/\* Can't track? It's not due to stress, or conntrack would
\* have dropped it. Hence it's the user's responsibilty to
\* packet filter it out, or implement conntrack/NAT for that
\* protocol. 8) --RR
\*/
/\* 没有,返回accpet \*/
if (!ct)
return NF\_ACCEPT;
/\* 获取NAT扩展 \*/
nat = nfct\_nat(ct);
/\* 判断连接跟踪状态 \*/
switch (ctinfo) {
/\* 关联连接(或者icmp错误)或者关联连接的应答 \*/
case IP\_CT\_RELATED:
case IP\_CT\_RELATED\_REPLY:
/\* icmp协议的NAT操作 \*/
if (ip\_hdr(skb)->protocol == IPPROTO\_ICMP) {
if (!nf\_nat\_icmp\_reply\_translation(skb, ct, ctinfo,
state->hook))
return NF\_DROP;
else
return NF\_ACCEPT;
}
/\* Fall thru... (Only ICMPs can be IP\_CT\_IS\_REPLY) \*/
case IP\_CT\_NEW:
/\* Seen it before? This can happen for loopback, retrans,
\* or local packets.
\*/
/\* 尚未进行过NAT转换 \*/
if (!nf\_nat\_initialized(ct, maniptype)) {
unsigned int ret;
/\* 进行规则匹配 \*/
ret = do\_chain(priv, skb, state, ct);
if (ret != NF\_ACCEPT)
return ret;
/\* 打NAT转换标记 \*/
if (nf\_nat\_initialized(ct, HOOK2MANIP(state->hook)))
break;
/\* 连接跟踪进行NAT \*/
ret = nf\_nat\_alloc\_null\_binding(ct, state->hook);
if (ret != NF\_ACCEPT)
return ret;
}
/\* 进行过NAT转换 \*/
else {
pr\_debug("Already setup manip %s for ct %p\\n",
maniptype == NF\_NAT\_MANIP\_SRC ? "SRC" : "DST",
ct);
/\* 出接口发生改变 \*/
if (nf\_nat\_oif\_changed(state->hook, ctinfo, nat,
state->out))
goto oif\_changed;
}
break;
default:
/\* ESTABLISHED \*/
NF\_CT\_ASSERT(ctinfo == IP\_CT\_ESTABLISHED ||
ctinfo == IP\_CT\_ESTABLISHED\_REPLY);
/\* 出接口发生改变 \*/
if (nf\_nat\_oif\_changed(state->hook, ctinfo, nat, state->out))
goto oif\_changed;
}
/\* skb数据包进行NAT转换修改 \*/
return nf\_nat\_packet(ct, ctinfo, state->hook, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
unsigned int
nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}
static unsigned int
__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
/* Force range to this IP; let proto decide mapping for
* per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
* Use reply in case it's already been mangled (eg local packet).
*/
/* 使用应答方向的ip地址,LOCAL_OUT会先经过mangle,可能改变了 */
union nf_inet_addr ip =
(manip == NF_NAT_MANIP_SRC ?
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
/\* 设置range \*/
struct nf\_nat\_range range = {
.flags = NF\_NAT\_RANGE\_MAP\_IPS,
.min\_addr = ip,
.max\_addr = ip,
};
/\* 进行NAT转换 \*/
return nf\_nat\_setup\_info(ct, &range, manip);
}
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
struct nf_conntrack_tuple curr_tuple, new_tuple;
/\* Can't setup nat info for confirmed ct. \*/
/\* 已经确认的,返回accpet \*/
if (nf\_ct\_is\_confirmed(ct))
return NF\_ACCEPT;
NF\_CT\_ASSERT(maniptype == NF\_NAT\_MANIP\_SRC ||
maniptype == NF\_NAT\_MANIP\_DST);
BUG\_ON(nf\_nat\_initialized(ct, maniptype));
/\* What we've got will look like inverse of reply. Normally
\* this is what is in the conntrack, except for prior
\* manipulations (future optimization: if num\_manips == 0,
\* orig\_tp = ct->tuplehash\[IP\_CT\_DIR\_ORIGINAL\].tuple)
\*/
/\* 从应答tuple反向得到当前tuple \*/
nf\_ct\_invert\_tuplepr(&curr\_tuple,
&ct->tuplehash\[IP\_CT\_DIR\_REPLY\].tuple);
/\* 根据当前tuple和range得到NAT转换之后的的tuple \*/
get\_unique\_tuple(&new\_tuple, &curr\_tuple, range, ct, maniptype);
/\* NAT转换之后和之前的tuple不同 \*/
if (!nf\_ct\_tuple\_equal(&new\_tuple, &curr\_tuple)) {
struct nf\_conntrack\_tuple reply;
/\* Alter conntrack table so will recognize replies. \*/
/\* 通过新tuple得到reply\_tuple \*/
nf\_ct\_invert\_tuplepr(&reply, &new\_tuple);
/\* 加入到reply hash \*/
nf\_conntrack\_alter\_reply(ct, &reply);
/\* 此时tuple类似如下 \*/
/\*
//内网10.1通过100.1访问200.1,经过SNAT之后得到tuple
tuple SNAT(10.1->200.1, 200.1->100.1)
//外网300.1通过100.1访问20.1,经过DNAT之后,得到tuple
tuple DNAT(300.1->100.1, 20.1->300.1)
\*/
/\* Non-atomic: we own this at the moment. \*/
/\* 更新状态需要做NAT \*/
if (maniptype == NF\_NAT\_MANIP\_SRC)
ct->status |= IPS\_SRC\_NAT;
else
ct->status |= IPS\_DST\_NAT;
/\* 扩展项的调整 \*/
if (nfct\_help(ct))
if (!nfct\_seqadj\_ext\_add(ct))
return NF\_DROP;
}
/\* SNAT \*/
if (maniptype == NF\_NAT\_MANIP\_SRC) {
struct nf\_nat\_conn\_key key = {
.net = nf\_ct\_net(ct),
.tuple = &ct->tuplehash\[IP\_CT\_DIR\_ORIGINAL\].tuple,
.zone = nf\_ct\_zone(ct),
};
int err;
/\* 加入到nf\_nat\_bysource\_table \*/
err = rhltable\_insert\_key(&nf\_nat\_bysource\_table,
&key,
&ct->nat\_bysource,
nf\_nat\_bysource\_params);
if (err)
return NF\_DROP;
}
/\* It's done. \*/
/\* NAT转换完成 \*/
if (maniptype == NF\_NAT\_MANIP\_DST)
ct->status |= IPS\_DST\_NAT\_DONE;
else
ct->status |= IPS\_SRC\_NAT\_DONE;
return NF\_ACCEPT;
}
/* 根据orig_tuple和range得到NAT转换之后的tuple */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
const struct nf_nat_range *range,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
zone = nf\_ct\_zone(ct);
rcu\_read\_lock();
/\* 查找l3proto和l4proto \*/
l3proto = \_\_nf\_nat\_l3proto\_find(orig\_tuple->src.l3num);
l4proto = \_\_nf\_nat\_l4proto\_find(orig\_tuple->src.l3num,
orig\_tuple->dst.protonum);
/\* 1) If this srcip/proto/src-proto-part is currently mapped,
\* and that same mapping gives a unique tuple within the given
\* range, use that.
\*
\* This is only required for source (ie. NAT/masq) mappings.
\* So far, we don't do local source mappings, so multiple
\* manips not an issue.
\*/
/\* SNAT && 没有打RANDOM\_ALL标记 \*/
if (maniptype == NF\_NAT\_MANIP\_SRC &&
!(range->flags & NF\_NAT\_RANGE\_PROTO\_RANDOM\_ALL)) {
/\* try the original tuple first \*/
/\* 查看orig\_tuple是否满足范围要求 \*/
if (in\_range(l3proto, l4proto, orig\_tuple, range)) {
/\* tuple尚未被使用 \*/
if (!nf\_nat\_used\_tuple(orig\_tuple, ct)) {
/\* 使用原tuple \*/
\*tuple = \*orig\_tuple;
goto out;
}
}
/\* ori\_range不满足要求,则从bysource\_table中查找一个满足范围的tuple \*/
else if (find\_appropriate\_src(net, zone, l3proto, l4proto,
orig\_tuple, tuple, range)) {
pr\_debug("get\_unique\_tuple: Found current src map\\n");
/\* tuple尚未被使用 \*/
if (!nf\_nat\_used\_tuple(tuple, ct))
goto out;
}
}
/\* 从给定range中选择一个最少使用的组合 \*/
/\* 2) Select the least-used IP/proto combination in the given range \*/
\*tuple = \*orig\_tuple;
find\_best\_ips\_proto(zone, tuple, range, ct, maniptype);
/\* 3) The per-protocol part of the manip is made to map into
\* the range to make a unique tuple.
\*/
/\* Only bother mapping if it's not already in range and unique \*/
/\* 没有打RANDOM\_ALL标记 \*/
if (!(range->flags & NF\_NAT\_RANGE\_PROTO\_RANDOM\_ALL)) {
/\* 有SPECIFIED标记,对端口号进行检查 \*/
if (range->flags & NF\_NAT\_RANGE\_PROTO\_SPECIFIED) {
/\* 端口号已经在范围之内&&(端口最小最大范围相等||tuple没有使用) \*/
if (l4proto->in\_range(tuple, maniptype,
&range->min\_proto,
&range->max\_proto) &&
(range->min\_proto.all == range->max\_proto.all ||
!nf\_nat\_used\_tuple(tuple, ct)))
goto out;
}
/\* 没有SPECIFIED标记,端口号不变,tuple没有被使用 \*/
else if (!nf\_nat\_used\_tuple(tuple, ct)) {
goto out;
}
}
/\* Last change: get protocol to try to obtain unique tuple. \*/
/\* 随机选择端口号 \*/
l4proto->unique\_tuple(l3proto, tuple, range, maniptype, ct);
out:
rcu_read_unlock();
}
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
struct sk_buff *skb)
{
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
/* 获取方向 */
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
/* 获取进行SNAT还是DNAT */
enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
/\* 设置NAT标记 \*/
if (mtype == NF\_NAT\_MANIP\_SRC)
statusbit = IPS\_SRC\_NAT;
else
statusbit = IPS\_DST\_NAT;
/\* Invert if this is reply dir. \*/
/\* 应答方向需要取反 \*/
if (dir == IP\_CT\_DIR\_REPLY)
statusbit ^= IPS\_NAT\_MASK;
/\* Non-atomic: these bits don't change. \*/
/\* 需要做NAT \*/
if (ct->status & statusbit) {
struct nf\_conntrack\_tuple target;
/\* We are aiming to look like inverse of other direction. \*/
/\* 获取目标tuple \*/
/\*
//内网10.1通过100.1访问200.1,经过SNAT之后得到tuple
tuple SNAT(10.1->200.1, 200.1->100.1)
//外网300.1通过100.1访问20.1,经过DNAT之后,得到tuple
tuple DNAT(300.1->100.1, 20.1->300.1)
\*/
nf\_ct\_invert\_tuplepr(&target, &ct->tuplehash\[!dir\].tuple);
/\* 获取l3proto,l4proto \*/
l3proto = \_\_nf\_nat\_l3proto\_find(target.src.l3num);
l4proto = \_\_nf\_nat\_l4proto\_find(target.src.l3num,
target.dst.protonum);
/\* 将ip地址和端口的NAT转换结果写入skb \*/
if (!l3proto->manip\_pkt(skb, , l4proto, &target, mtype))
return NF\_DROP;
}
return NF\_ACCEPT;
}
手机扫一扫
移动阅读更方便
你可能感兴趣的文章