?? ip_nat_core.c
字號:
/* NAT for netfilter; shared with compatibility layer. *//* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */#include <linux/module.h>#include <linux/types.h>#include <linux/timer.h>#include <linux/skbuff.h>#include <linux/netfilter_ipv4.h>#include <linux/vmalloc.h>#include <net/checksum.h>#include <net/icmp.h>#include <net/ip.h>#include <net/tcp.h> /* For tcp_prot in getorigdst */#include <linux/icmp.h>#include <linux/udp.h>#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)#include <linux/netfilter_ipv4/ip_conntrack.h>#include <linux/netfilter_ipv4/ip_conntrack_core.h>#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>#include <linux/netfilter_ipv4/ip_nat.h>#include <linux/netfilter_ipv4/ip_nat_protocol.h>#include <linux/netfilter_ipv4/ip_nat_core.h>#include <linux/netfilter_ipv4/ip_nat_helper.h>#include <linux/netfilter_ipv4/ip_conntrack_helper.h>#include <linux/netfilter_ipv4/listhelp.h>#if 0#define DEBUGP printk#else#define DEBUGP(format, args...)#endifDECLARE_RWLOCK(ip_nat_lock);DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);/* Calculated at init based on memory size */static unsigned int ip_nat_htable_size;static struct list_head *bysource;static struct list_head *byipsproto;LIST_HEAD(protos);LIST_HEAD(helpers);extern struct ip_nat_protocol unknown_nat_protocol;/* We keep extra hashes for each conntrack, for fast searching. */static inline size_thash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto){ /* Modified src and dst, to ensure we don't create two identical streams. */ return (src + dst + proto) % ip_nat_htable_size;}static inline size_thash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto){ /* Original src, to ensure we map it consistently if poss. */ return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;}/* Noone using conntrack by the time this called. */static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn){ struct ip_nat_info *info = &conn->nat.info; unsigned int hs, hp; if (!info->initialized) return; IP_NF_ASSERT(info->bysource.conntrack); IP_NF_ASSERT(info->byipsproto.conntrack); hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src, conn->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.protonum); hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, conn->tuplehash[IP_CT_DIR_REPLY] .tuple.dst.protonum); WRITE_LOCK(&ip_nat_lock); LIST_DELETE(&bysource[hs], &info->bysource); LIST_DELETE(&byipsproto[hp], &info->byipsproto); WRITE_UNLOCK(&ip_nat_lock);}/* We do checksum mangling, so if they were wrong before they're still * wrong. Also works for incomplete packets (eg. ICMP dest * unreachables.) */u_int16_tip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck){ u_int32_t diffs[] = { oldvalinv, newval }; return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF));}static inline int cmp_proto(const struct ip_nat_protocol *i, int proto){ return i->protonum == proto;}struct ip_nat_protocol *find_nat_proto(u_int16_t protonum){ struct ip_nat_protocol *i; MUST_BE_READ_LOCKED(&ip_nat_lock); i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum); if (!i) i = &unknown_nat_protocol; return i;}/* Is this tuple already taken? (not by us) */intip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ /* Conntrack tracking doesn't keep track of outgoing tuples; only incoming ones. NAT means they don't have a fixed mapping, so we invert the tuple and look for the incoming reply. We could keep a separate hash if this proves too slow. */ struct ip_conntrack_tuple reply; invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack);}/* Does tuple + the source manip come within the range mr */static intin_range(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack_manip *manip, const struct ip_nat_multi_range *mr){ struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum); unsigned int i; struct ip_conntrack_tuple newtuple = { *manip, tuple->dst }; for (i = 0; i < mr->rangesize; i++) { /* If we are allowed to map IPs, then we must be in the range specified, otherwise we must be unchanged. */ if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip) || (ntohl(newtuple.src.ip) > ntohl(mr->range[i].max_ip))) continue; } else { if (newtuple.src.ip != tuple->src.ip) continue; } if (!(mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(&newtuple, IP_NAT_MANIP_SRC, &mr->range[i].min, &mr->range[i].max)) return 1; } return 0;}static inline intsrc_cmp(const struct ip_nat_hash *i, const struct ip_conntrack_tuple *tuple, const struct ip_nat_multi_range *mr){ return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == tuple->dst.protonum && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == tuple->src.ip && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == tuple->src.u.all && in_range(tuple, &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.src, mr));}/* Only called for SRC manip */static struct ip_conntrack_manip *find_appropriate_src(const struct ip_conntrack_tuple *tuple, const struct ip_nat_multi_range *mr){ unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); struct ip_nat_hash *i; MUST_BE_READ_LOCKED(&ip_nat_lock); i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr); if (i) return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src; else return NULL;}#ifdef CONFIG_IP_NF_NAT_LOCAL/* If it's really a local destination manip, it may need to do a source manip too. */static intdo_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp){ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } }; struct rtable *rt; /* FIXME: IPTOS_TOS(iph->tos) --RR */ if (ip_route_output_key(&rt, &fl) != 0) { DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", NIPQUAD(var_ip)); return 0; } *other_ipp = rt->rt_src; ip_rt_put(rt); return 1;}#endif/* Simple way to iterate through all. */static inline int fake_cmp(const struct ip_nat_hash *i, u_int32_t src, u_int32_t dst, u_int16_t protonum, unsigned int *score, const struct ip_conntrack *conntrack){ /* Compare backwards: we're dealing with OUTGOING tuples, and inside the conntrack is the REPLY tuple. Don't count this conntrack. */ if (i->conntrack != conntrack && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum == protonum)) (*score)++; return 0;}static inline unsigned intcount_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum, const struct ip_conntrack *conntrack){ unsigned int score = 0; unsigned int h; MUST_BE_READ_LOCKED(&ip_nat_lock); h = hash_by_ipsproto(src, dst, protonum); LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *, src, dst, protonum, &score, conntrack); return score;}/* For [FUTURE] fragmentation handling, we want the least-used src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports 1-65535, we don't do pro-rata allocation based on ports; we choose the ip with the lowest src-ip/dst-ip/proto usage. If an allocation then fails (eg. all 6 ports used in the 1.2.3.4 range), we eliminate that and try again. This is not the most efficient approach, but if you're worried about that, don't hand us ranges you don't really have. */static struct ip_nat_range *find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_nat_multi_range *mr, const struct ip_conntrack *conntrack, unsigned int hooknum){ unsigned int i; struct { const struct ip_nat_range *range; unsigned int score; struct ip_conntrack_tuple tuple; } best = { NULL, 0xFFFFFFFF }; u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip; static unsigned int randomness; if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { var_ipp = &tuple->src.ip; saved_ip = tuple->dst.ip; other_ipp = &tuple->dst.ip; } else { var_ipp = &tuple->dst.ip; saved_ip = tuple->src.ip; other_ipp = &tuple->src.ip; } /* Don't do do_extra_mangle unless necessary (overrides explicit socket bindings, for example) */ orig_dstip = tuple->dst.ip; IP_NF_ASSERT(mr->rangesize >= 1); for (i = 0; i < mr->rangesize; i++) { /* Host order */ u_int32_t minip, maxip, j; /* Don't do ranges which are already eliminated. */ if (mr->range[i].flags & IP_NAT_RANGE_FULL) { continue; } if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { minip = ntohl(mr->range[i].min_ip); maxip = ntohl(mr->range[i].max_ip); } else minip = maxip = ntohl(*var_ipp); randomness++; for (j = 0; j < maxip - minip + 1; j++) { unsigned int score; *var_ipp = htonl(minip + (randomness + j) % (maxip - minip + 1)); /* Reset the other ip in case it was mangled by * do_extra_mangle last time. */ *other_ipp = saved_ip;#ifdef CONFIG_IP_NF_NAT_LOCAL if (hooknum == NF_IP_LOCAL_OUT && *var_ipp != orig_dstip && !do_extra_mangle(*var_ipp, other_ipp)) { DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", i, NIPQUAD(*var_ipp)); /* Can't route? This whole range part is * probably screwed, but keep trying * anyway. */ continue; }#endif /* Count how many others map onto this. */ score = count_maps(tuple->src.ip, tuple->dst.ip, tuple->dst.protonum, conntrack); if (score < best.score) { /* Optimization: doesn't get any better than this. */ if (score == 0) return (struct ip_nat_range *) &mr->range[i];
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -