/* * * Masquerading functionality * * Copyright (c) 1994 Pauline Middelink * * $Id: ip_masq.c,v 1.34 1999/03/17 01:53:51 davem Exp $ * * * See ip_fw.c for original log * * Fixes: * Juan Jose Ciarlante : Modularized application masquerading (see ip_masq_app.c) * Juan Jose Ciarlante : New struct ip_masq_seq that holds output/input delta seq. * Juan Jose Ciarlante : Added hashed lookup by proto,maddr,mport and proto,saddr,sport * Juan Jose Ciarlante : Fixed deadlock if free ports get exhausted * Juan Jose Ciarlante : Added NO_ADDR status flag. * Richard Lynch : Added IP Autoforward * Nigel Metheringham : Added ICMP handling for demasquerade * Nigel Metheringham : Checksum checking of masqueraded data * Nigel Metheringham : Better handling of timeouts of TCP conns * Delian Delchev : Added support for ICMP requests and replys * Nigel Metheringham : ICMP in ICMP handling, tidy ups, bug fixes, made ICMP optional * Juan Jose Ciarlante : re-assign maddr if no packet received from outside * Juan Jose Ciarlante : ported to 2.1 tree * Juan Jose Ciarlante : reworked control connections * Steven Clarke : Added Port Forwarding * Juan Jose Ciarlante : Just ONE ip_masq_new (!) * Juan Jose Ciarlante : IP masq modules support * Juan Jose Ciarlante : don't go into search loop if mport specified * Juan Jose Ciarlante : locking * Steven Clarke : IP_MASQ_S_xx state design * Juan Jose Ciarlante : IP_MASQ_S state implementation * Juan Jose Ciarlante : xx_get() clears timer, _put() inserts it * Juan Jose Ciarlante : create /proc/net/ip_masq/ * Juan Jose Ciarlante : reworked checksums (save payload csum if possible) * Juan Jose Ciarlante : added missing ip_fw_masquerade checksum * Juan Jose Ciarlante : csum savings * Juan Jose Ciarlante : added user-space tunnel creation/del, etc * Juan Jose Ciarlante : (last) moved to ip_masq_user runtime module * Juan Jose Ciarlante : user timeout handling again * Juan Jose Ciarlante : make new modules support optional * Juan Jose Ciarlante : u-space context => locks reworked * Juan Jose Ciarlante : fixed stupid SMP locking bug * Juan Jose Ciarlante : fixed "tap"ing in demasq path by copy-on-w * Juan Jose Ciarlante : make masq_proto_doff() robust against fake sized/corrupted packets * Kai Bankett : do not toss other IP protos in proto_doff() * Dan Kegel : pointed correct NAT behavior for UDP streams * */ #include #include #ifdef CONFIG_KMOD #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_IP_MASQUERADE_MOD #include #endif #include #include #include int sysctl_ip_masq_debug = 0; /* * Exported wrapper */ int ip_masq_get_debug_level(void) { return sysctl_ip_masq_debug; } struct ip_masq_hook *ip_masq_user_hook = NULL; /* * Timeout table[state] */ /* static int masq_timeout_table[IP_MASQ_S_LAST+1] = { */ static struct ip_masq_timeout_table masq_timeout_table = { ATOMIC_INIT(0), /* refcnt */ 0, /* scale */ { 30*60*HZ, /* IP_MASQ_S_NONE, */ 15*60*HZ, /* IP_MASQ_S_ESTABLISHED, */ 2*60*HZ, /* IP_MASQ_S_SYN_SENT, */ 1*60*HZ, /* IP_MASQ_S_SYN_RECV, */ 2*60*HZ, /* IP_MASQ_S_FIN_WAIT, */ 2*60*HZ, /* IP_MASQ_S_TIME_WAIT, */ 10*HZ, /* IP_MASQ_S_CLOSE, */ 60*HZ, /* IP_MASQ_S_CLOSE_WAIT, */ 30*HZ, /* IP_MASQ_S_LAST_ACK, */ 2*60*HZ, /* IP_MASQ_S_LISTEN, */ 5*60*HZ, /* IP_MASQ_S_UDP, */ 1*60*HZ, /* IP_MASQ_S_ICMP, */ 2*HZ,/* IP_MASQ_S_LAST */ }, /* timeout */ }; #define MASQUERADE_EXPIRE_RETRY masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT] static const char * state_name_table[IP_MASQ_S_LAST+1] = { "NONE", /* IP_MASQ_S_NONE, */ "ESTABLISHED", /* IP_MASQ_S_ESTABLISHED, */ "SYN_SENT", /* IP_MASQ_S_SYN_SENT, */ "SYN_RECV", /* IP_MASQ_S_SYN_RECV, */ "FIN_WAIT", /* IP_MASQ_S_FIN_WAIT, */ "TIME_WAIT", /* IP_MASQ_S_TIME_WAIT, */ "CLOSE", /* IP_MASQ_S_CLOSE, */ "CLOSE_WAIT", /* IP_MASQ_S_CLOSE_WAIT, */ "LAST_ACK", /* IP_MASQ_S_LAST_ACK, */ "LISTEN", /* IP_MASQ_S_LISTEN, */ "UDP", /* IP_MASQ_S_UDP, */ "ICMP", /* IP_MASQ_S_ICMP, */ "BUG!", /* IP_MASQ_S_LAST */ }; #define mNO IP_MASQ_S_NONE #define mES IP_MASQ_S_ESTABLISHED #define mSS IP_MASQ_S_SYN_SENT #define mSR IP_MASQ_S_SYN_RECV #define mFW IP_MASQ_S_FIN_WAIT #define mTW IP_MASQ_S_TIME_WAIT #define mCL IP_MASQ_S_CLOSE #define mCW IP_MASQ_S_CLOSE_WAIT #define mLA IP_MASQ_S_LAST_ACK #define mLI IP_MASQ_S_LISTEN struct masq_tcp_states_t { int next_state[IP_MASQ_S_LAST]; /* should be _LAST_TCP */ }; const char * ip_masq_state_name(int state) { if (state >= IP_MASQ_S_LAST) return "ERR!"; return state_name_table[state]; } struct masq_tcp_states_t masq_tcp_states [] = { /* INPUT */ /* mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI */ /*syn*/ {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR }}, /*fin*/ {{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI }}, /*ack*/ {{mCL, mES, mSS, mSR, mFW, mTW, mCL, mCW, mCL, mLI }}, /*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI }}, /* OUTPUT */ /* mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI */ /*syn*/ {{mSS, mES, mSS, mES, mSS, mSS, mSS, mSS, mSS, mLI }}, /*fin*/ {{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI }}, /*ack*/ {{mES, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mES }}, /*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL }}, }; static __inline__ int masq_tcp_state_idx(struct tcphdr *th, int output) { /* * [0-3]: input states, [4-7]: output. */ if (output) output=4; if (th->rst) return output+3; if (th->syn) return output+0; if (th->fin) return output+1; if (th->ack) return output+2; return -1; } static int masq_set_state_timeout(struct ip_masq *ms, int state) { struct ip_masq_timeout_table *mstim = ms->timeout_table; int scale; /* * Use default timeout table if no specific for this entry */ if (!mstim) mstim = &masq_timeout_table; ms->timeout = mstim->timeout[ms->state=state]; scale = mstim->scale; if (scale<0) ms->timeout >>= -scale; else if (scale > 0) ms->timeout <<= scale; return state; } static int masq_tcp_state(struct ip_masq *ms, int output, struct tcphdr *th) { int state_idx; int new_state = IP_MASQ_S_CLOSE; if ((state_idx = masq_tcp_state_idx(th, output)) < 0) { IP_MASQ_DEBUG(1, "masq_state_idx(%d)=%d!!!\n", output, state_idx); goto tcp_state_out; } new_state = masq_tcp_states[state_idx].next_state[ms->state]; tcp_state_out: if (new_state!=ms->state) IP_MASQ_DEBUG(1, "%s %s [%c%c%c%c] %08lX:%04X-%08lX:%04X state: %s->%s\n", masq_proto_name(ms->protocol), output? "output" : "input ", th->syn? 'S' : '.', th->fin? 'F' : '.', th->ack? 'A' : '.', th->rst? 'R' : '.', ntohl(ms->saddr), ntohs(ms->sport), ntohl(ms->daddr), ntohs(ms->dport), ip_masq_state_name(ms->state), ip_masq_state_name(new_state)); return masq_set_state_timeout(ms, new_state); } /* * Handle state transitions */ static int masq_set_state(struct ip_masq *ms, int output, struct iphdr *iph, void *tp) { switch (iph->protocol) { case IPPROTO_ICMP: return masq_set_state_timeout(ms, IP_MASQ_S_ICMP); case IPPROTO_UDP: return masq_set_state_timeout(ms, IP_MASQ_S_UDP); case IPPROTO_TCP: return masq_tcp_state(ms, output, tp); } return -1; } /* * Set LISTEN timeout. (ip_masq_put will setup timer) */ int ip_masq_listen(struct ip_masq *ms) { masq_set_state_timeout(ms, IP_MASQ_S_LISTEN); return ms->timeout; } /* * Dynamic address rewriting */ extern int sysctl_ip_dynaddr; /* * Lookup lock */ rwlock_t __ip_masq_lock = RW_LOCK_UNLOCKED; /* * Implement IP packet masquerading */ /* * Converts an ICMP reply code into the equivalent request code */ static __inline__ const __u8 icmp_type_request(__u8 type) { switch (type) { case ICMP_ECHOREPLY: return ICMP_ECHO; break; case ICMP_TIMESTAMPREPLY: return ICMP_TIMESTAMP; break; case ICMP_INFO_REPLY: return ICMP_INFO_REQUEST; break; case ICMP_ADDRESSREPLY: return ICMP_ADDRESS; break; default: return (255); break; } } /* * Helper macros - attempt to make code clearer! */ /* ID used in ICMP lookups */ #define icmp_id(icmph) ((icmph->un).echo.id) /* (port) hash value using in ICMP lookups for requests */ #define icmp_hv_req(icmph) ((__u16)(icmph->code+(__u16)(icmph->type<<8))) /* (port) hash value using in ICMP lookups for replies */ #define icmp_hv_rep(icmph) ((__u16)(icmph->code+(__u16)(icmp_type_request(icmph->type)<<8))) /* * Last masq_port number in use. * Will cycle in MASQ_PORT boundaries. */ static __u16 masq_port = PORT_MASQ_BEGIN; #ifdef __SMP__ static spinlock_t masq_port_lock = SPIN_LOCK_UNLOCKED; #endif /* * free ports counters (UDP & TCP) * * Their value is _less_ or _equal_ to actual free ports: * same masq port, diff masq addr (firewall iface address) allocated * entries are accounted but their actually don't eat a more than 1 port. * * Greater values could lower MASQ_EXPIRATION setting as a way to * manage 'masq_entries resource'. * * By default we will reuse masq.port iff (output) connection * (5-upla) if not duplicated. * This may break midentd and others ... */ #ifdef CONFIG_IP_MASQ_NREUSE #define PORT_MASQ_MUL 1 #else #define PORT_MASQ_MUL 10 #endif /* * At the moment, hardcore in sync with masq_proto_num */ atomic_t ip_masq_free_ports[3] = { ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* UDP */ ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* TCP */ ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* ICMP */ }; /* * Counts entries that have been requested with specific mport. * Used for incoming packets to "relax" input rule (port in MASQ range). */ atomic_t mport_count = ATOMIC_INIT(0); EXPORT_SYMBOL(ip_masq_get_debug_level); EXPORT_SYMBOL(ip_masq_new); EXPORT_SYMBOL(ip_masq_listen); EXPORT_SYMBOL(ip_masq_free_ports); EXPORT_SYMBOL(ip_masq_out_get); EXPORT_SYMBOL(ip_masq_in_get); EXPORT_SYMBOL(ip_masq_put); EXPORT_SYMBOL(ip_masq_control_add); EXPORT_SYMBOL(ip_masq_control_del); EXPORT_SYMBOL(ip_masq_control_get); EXPORT_SYMBOL(ip_masq_user_hook); EXPORT_SYMBOL(ip_masq_m_tab); EXPORT_SYMBOL(ip_masq_state_name); EXPORT_SYMBOL(ip_masq_select_addr); EXPORT_SYMBOL(__ip_masq_lock); /* * 2 ip_masq hash tables: for input and output pkts lookups. */ struct ip_masq *ip_masq_m_tab[IP_MASQ_TAB_SIZE]; struct ip_masq *ip_masq_s_tab[IP_MASQ_TAB_SIZE]; /* * timeouts */ #if 000 /* FIXED timeout handling */ static struct ip_fw_masq ip_masq_dummy = { MASQUERADE_EXPIRE_TCP, MASQUERADE_EXPIRE_TCP_FIN, MASQUERADE_EXPIRE_UDP }; EXPORT_SYMBOL(ip_masq_expire); struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy; #endif /* * These flags enable non-strict d{addr,port} checks * Given that both (in/out) lookup tables are hashed * by m{addr,port} and s{addr,port} this is quite easy */ #define MASQ_DADDR_PASS (IP_MASQ_F_NO_DADDR|IP_MASQ_F_DLOOSE) #define MASQ_DPORT_PASS (IP_MASQ_F_NO_DPORT|IP_MASQ_F_DLOOSE) /* * By default enable dest loose semantics */ #define CONFIG_IP_MASQ_LOOSE_DEFAULT 1 /* * Set masq expiration (deletion) and adds timer, * if timeout==0 cancel expiration. * Warning: it does not check/delete previous timer! */ static void __ip_masq_set_expire(struct ip_masq *ms, unsigned long tout) { if (tout) { ms->timer.expires = jiffies+tout; add_timer(&ms->timer); } else { del_timer(&ms->timer); } } /* * Returns hash value */ static __inline__ unsigned ip_masq_hash_key(unsigned proto, __u32 addr, __u16 port) { return (proto^ntohl(addr)^ntohs(port)) & (IP_MASQ_TAB_SIZE-1); } /* * Hashes ip_masq by its proto,addrs,ports. * should be called with locked tables. * returns bool success. */ static int ip_masq_hash(struct ip_masq *ms) { unsigned hash; if (ms->flags & IP_MASQ_F_HASHED) { IP_MASQ_ERR( "ip_masq_hash(): request for already hashed, called from %p\n", __builtin_return_address(0)); return 0; } /* * Hash by proto,m{addr,port} */ hash = ip_masq_hash_key(ms->protocol, ms->maddr, ms->mport); ms->m_link = ip_masq_m_tab[hash]; atomic_inc(&ms->refcnt); ip_masq_m_tab[hash] = ms; /* * Hash by proto,s{addr,port} */ hash = ip_masq_hash_key(ms->protocol, ms->saddr, ms->sport); ms->s_link = ip_masq_s_tab[hash]; atomic_inc(&ms->refcnt); ip_masq_s_tab[hash] = ms; ms->flags |= IP_MASQ_F_HASHED; return 1; } /* * UNhashes ip_masq from ip_masq_[ms]_tables. * should be called with locked tables. * returns bool success. */ static int ip_masq_unhash(struct ip_masq *ms) { unsigned hash; struct ip_masq ** ms_p; if (!(ms->flags & IP_MASQ_F_HASHED)) { IP_MASQ_ERR( "ip_masq_unhash(): request for unhash flagged, called from %p\n", __builtin_return_address(0)); return 0; } /* * UNhash by m{addr,port} */ hash = ip_masq_hash_key(ms->protocol, ms->maddr, ms->mport); for (ms_p = &ip_masq_m_tab[hash]; *ms_p ; ms_p = &(*ms_p)->m_link) if (ms == (*ms_p)) { atomic_dec(&ms->refcnt); *ms_p = ms->m_link; break; } /* * UNhash by s{addr,port} */ hash = ip_masq_hash_key(ms->protocol, ms->saddr, ms->sport); for (ms_p = &ip_masq_s_tab[hash]; *ms_p ; ms_p = &(*ms_p)->s_link) if (ms == (*ms_p)) { atomic_dec(&ms->refcnt); *ms_p = ms->s_link; break; } ms->flags &= ~IP_MASQ_F_HASHED; return 1; } /* * Returns ip_masq associated with supplied parameters, either * broken out of the ip/tcp headers or directly supplied for those * pathological protocols with address/port in the data stream * (ftp, irc). addresses and ports are in network order. * called for pkts coming from OUTside-to-INside the firewall. * * s_addr, s_port: pkt source address (foreign host) * d_addr, d_port: pkt dest address (firewall) * * NB. Cannot check destination address, just for the incoming port. * reason: archie.doc.ac.uk has 6 interfaces, you send to * phoenix and get a reply from any other interface(==dst)! * * [Only for UDP] - AC * * Caller must lock tables */ static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) { unsigned hash; struct ip_masq *ms = NULL; hash = ip_masq_hash_key(protocol, d_addr, d_port); for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) { if (protocol==ms->protocol && (d_addr==ms->maddr && d_port==ms->mport) && (s_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) && (s_port==ms->dport || ms->flags & MASQ_DPORT_PASS) ) { IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n", protocol, s_addr, s_port, d_addr, d_port); atomic_inc(&ms->refcnt); goto out; } } IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX fail\n", protocol, s_addr, s_port, d_addr, d_port); out: return ms; } /* * Returns ip_masq associated with supplied parameters, either * broken out of the ip/tcp headers or directly supplied for those * pathological protocols with address/port in the data stream * (ftp, irc). addresses and ports are in network order. * called for pkts coming from inside-to-OUTside the firewall. * * Normally we know the source address and port but for some protocols * (e.g. ftp PASV) we do not know the source port initially. Alas the * hash is keyed on source port so if the first lookup fails then try again * with a zero port, this time only looking at entries marked "no source * port". * * Caller must lock tables */ static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) { unsigned hash; struct ip_masq *ms = NULL; /* * Check for "full" addressed entries */ hash = ip_masq_hash_key(protocol, s_addr, s_port); for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) { if (protocol == ms->protocol && s_addr == ms->saddr && s_port == ms->sport && (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) && (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS) ) { IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX OK\n", protocol, s_addr, s_port, d_addr, d_port); atomic_inc(&ms->refcnt); goto out; } } /* * Check for NO_SPORT entries */ hash = ip_masq_hash_key(protocol, s_addr, 0); for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) { if (ms->flags & IP_MASQ_F_NO_SPORT && protocol == ms->protocol && s_addr == ms->saddr && (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) && (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS) ) { IP_MASQ_DEBUG(2, "lk/out2 %d %08X:%04hX->%08X:%04hX OK\n", protocol, s_addr, s_port, d_addr, d_port); atomic_inc(&ms->refcnt); goto out; } } IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX fail\n", protocol, s_addr, s_port, d_addr, d_port); out: return ms; } #ifdef CONFIG_IP_MASQ_NREUSE /* * Returns ip_masq for given proto,m_addr,m_port. * called by allocation routine to find an unused m_port. * * Caller must lock tables */ static struct ip_masq * __ip_masq_getbym(int protocol, __u32 m_addr, __u16 m_port) { unsigned hash; struct ip_masq *ms = NULL; hash = ip_masq_hash_key(protocol, m_addr, m_port); for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) { if ( protocol==ms->protocol && (m_addr==ms->maddr && m_port==ms->mport)) { atomic_inc(&ms->refcnt); goto out; } } out: return ms; } #endif struct ip_masq * ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) { struct ip_masq *ms; read_lock(&__ip_masq_lock); ms = __ip_masq_out_get(protocol, s_addr, s_port, d_addr, d_port); read_unlock(&__ip_masq_lock); if (ms) __ip_masq_set_expire(ms, 0); return ms; } struct ip_masq * ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) { struct ip_masq *ms; read_lock(&__ip_masq_lock); ms = __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port); read_unlock(&__ip_masq_lock); if (ms) __ip_masq_set_expire(ms, 0); return ms; } static __inline__ void __ip_masq_put(struct ip_masq *ms) { atomic_dec(&ms->refcnt); } void ip_masq_put(struct ip_masq *ms) { /* * Decrement refcnt */ __ip_masq_put(ms); /* * if refcnt==2 (2 hashes) */ if (atomic_read(&ms->refcnt)==2) { __ip_masq_set_expire(ms, ms->timeout); } else { IP_MASQ_DEBUG(0, "did not set timer with refcnt=%d, called from %p\n", atomic_read(&ms->refcnt), __builtin_return_address(0)); } } static void masq_expire(unsigned long data) { struct ip_masq *ms = (struct ip_masq *)data; ms->timeout = MASQUERADE_EXPIRE_RETRY; /* * hey, I'm using it */ atomic_inc(&ms->refcnt); IP_MASQ_DEBUG(1, "Masqueraded %s %08lX:%04X expired\n", masq_proto_name(ms->protocol), ntohl(ms->saddr),ntohs(ms->sport)); write_lock(&__ip_masq_lock); #if 0000 /* * Already locked, do bounce ... */ if (ip_masq_nlocks(&__ip_masq_lock) != 1) { goto masq_expire_later; } #endif /* * do I control anybody? */ if (atomic_read(&ms->n_control)) goto masq_expire_later; /* * does anybody controls me? */ if (ms->control) ip_masq_control_del(ms); if (ip_masq_unhash(ms)) { if (ms->flags&IP_MASQ_F_MPORT) { atomic_dec(&mport_count); } else { atomic_inc(ip_masq_free_ports + masq_proto_num(ms->protocol)); } ip_masq_unbind_app(ms); } /* * refcnt==1 implies I'm the only one referrer */ if (atomic_read(&ms->refcnt) == 1) { kfree_s(ms,sizeof(*ms)); MOD_DEC_USE_COUNT; goto masq_expire_out; } masq_expire_later: IP_MASQ_DEBUG(0, "masq_expire delayed: %s %08lX:%04X->%08lX:%04X masq.refcnt-1=%d masq.n_control=%d\n", masq_proto_name(ms->protocol), ntohl(ms->saddr), ntohs(ms->sport), ntohl(ms->daddr), ntohs(ms->dport), atomic_read(&ms->refcnt)-1, atomic_read(&ms->n_control)); ip_masq_put(ms); masq_expire_out: write_unlock(&__ip_masq_lock); } static __u16 get_next_mport(void) { __u16 mport; spin_lock_irq(&masq_port_lock); /* * Try the next available port number */ mport = htons(masq_port++); if (masq_port==PORT_MASQ_END) masq_port = PORT_MASQ_BEGIN; spin_unlock_irq(&masq_port_lock); return mport; } /* * Create a new masquerade list entry, also allocate an * unused mport, keeping the portnumber between the * given boundaries MASQ_BEGIN and MASQ_END. * * Be careful, it can be called from u-space */ struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags) { struct ip_masq *ms, *mst; int ports_tried; atomic_t *free_ports_p = NULL; static int n_fails = 0; int prio; if (masq_proto_num(proto)!=-1 && mport == 0) { free_ports_p = ip_masq_free_ports + masq_proto_num(proto); if (atomic_read(free_ports_p) == 0) { if (++n_fails < 5) IP_MASQ_ERR( "ip_masq_new(proto=%s): no free ports.\n", masq_proto_name(proto)); return NULL; } } prio = (mflags&IP_MASQ_F_USER) ? GFP_KERNEL : GFP_ATOMIC; ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), prio); if (ms == NULL) { if (++n_fails < 5) IP_MASQ_ERR("ip_masq_new(proto=%s): no memory available.\n", masq_proto_name(proto)); return NULL; } MOD_INC_USE_COUNT; memset(ms, 0, sizeof(*ms)); init_timer(&ms->timer); ms->timer.data = (unsigned long)ms; ms->timer.function = masq_expire; ms->protocol = proto; ms->saddr = saddr; ms->sport = sport; ms->daddr = daddr; ms->dport = dport; ms->flags = mflags; ms->app_data = NULL; ms->control = NULL; atomic_set(&ms->n_control,0); atomic_set(&ms->refcnt,0); if (proto == IPPROTO_UDP && !mport) #ifdef CONFIG_IP_MASQ_LOOSE_DEFAULT /* * Flag this tunnel as "dest loose" * */ ms->flags |= IP_MASQ_F_DLOOSE; #else ms->flags |= IP_MASQ_F_NO_DADDR; #endif /* get masq address from rif */ ms->maddr = maddr; /* * This flag will allow masq. addr (ms->maddr) * to follow forwarding interface address. */ ms->flags |= IP_MASQ_F_NO_REPLY; /* * We want a specific mport. Be careful. */ if (masq_proto_num(proto) == -1 || mport) { ms->mport = mport; /* * Check 5-upla uniqueness */ if (mflags & IP_MASQ_F_USER) write_lock_bh(&__ip_masq_lock); else write_lock(&__ip_masq_lock); mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport); if (mst==NULL) { ms->flags |= IP_MASQ_F_MPORT; atomic_inc(&mport_count); ip_masq_hash(ms); if (mflags & IP_MASQ_F_USER) write_unlock_bh(&__ip_masq_lock); else write_unlock(&__ip_masq_lock); ip_masq_bind_app(ms); atomic_inc(&ms->refcnt); masq_set_state_timeout(ms, IP_MASQ_S_NONE); return ms; } if (mflags & IP_MASQ_F_USER) write_unlock_bh(&__ip_masq_lock); else write_unlock(&__ip_masq_lock); __ip_masq_put(mst); IP_MASQ_ERR( "Already used connection: %s, %d.%d.%d.%d:%d => %d.%d.%d.%d:%d, called from %p\n", masq_proto_name(proto), NIPQUAD(maddr), ntohs(mport), NIPQUAD(daddr), ntohs(dport), __builtin_return_address(0)); goto mport_nono; } for (ports_tried = 0; (atomic_read(free_ports_p) && (ports_tried <= (PORT_MASQ_END - PORT_MASQ_BEGIN))); ports_tried++){ mport = ms->mport = get_next_mport(); /* * lookup to find out if this connection is used. */ if (mflags & IP_MASQ_F_USER) write_lock_bh(&__ip_masq_lock); else write_lock(&__ip_masq_lock); #ifdef CONFIG_IP_MASQ_NREUSE mst = __ip_masq_getbym(proto, maddr, mport); #else mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport); #endif if (mst == NULL) { if (atomic_read(free_ports_p) == 0) { if (mflags & IP_MASQ_F_USER) write_unlock_bh(&__ip_masq_lock); else write_unlock(&__ip_masq_lock); break; } atomic_dec(free_ports_p); ip_masq_hash(ms); if (mflags & IP_MASQ_F_USER) write_unlock_bh(&__ip_masq_lock); else write_unlock(&__ip_masq_lock); ip_masq_bind_app(ms); n_fails = 0; atomic_inc(&ms->refcnt); masq_set_state_timeout(ms, IP_MASQ_S_NONE); return ms; } if (mflags & IP_MASQ_F_USER) write_unlock_bh(&__ip_masq_lock); else write_unlock(&__ip_masq_lock); __ip_masq_put(mst); } if (++n_fails < 5) IP_MASQ_ERR( "ip_masq_new(proto=%s): could not get free masq entry (free=%d).\n", masq_proto_name(ms->protocol), atomic_read(free_ports_p)); mport_nono: kfree_s(ms, sizeof(*ms)); MOD_DEC_USE_COUNT; return NULL; } /* * Get transport protocol data offset, check against size * return: * 0 if other IP proto * -1 if error */ static __inline__ int proto_doff(unsigned proto, char *th, unsigned size) { int ret = -1; switch (proto) { case IPPROTO_ICMP: if (size >= sizeof(struct icmphdr)) ret = sizeof(struct icmphdr); break; case IPPROTO_UDP: if (size >= sizeof(struct udphdr)) ret = sizeof(struct udphdr); break; case IPPROTO_TCP: /* * Is this case, this check _also_ avoids * touching an invalid pointer if * size is invalid */ if (size >= sizeof(struct tcphdr)) { ret = ((struct tcphdr*)th)->doff << 2; if (ret > size) { ret = -1 ; } } break; default: /* Other proto: nothing to say, by now :) */ ret = 0; } if (ret < 0) IP_MASQ_DEBUG(0, "mess proto_doff for proto=%d, size =%d\n", proto, size); return ret; } int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; union ip_masq_tphdr h; struct ip_masq *ms; int size; /* * doff holds transport protocol data offset * csum holds its checksum * csum_ok says if csum is valid */ int doff = 0; int csum = 0; int csum_ok = 0; /* * We can only masquerade protocols with ports... and hack some ICMPs */ h.raw = (char*) iph + iph->ihl * 4; size = ntohs(iph->tot_len) - (iph->ihl * 4); doff = proto_doff(iph->protocol, h.raw, size); if (doff <= 0) { /* * Output path: do not pass other IP protos nor * invalid packets. */ return -1; } switch (iph->protocol) { case IPPROTO_ICMP: return(ip_fw_masq_icmp(skb_p, maddr)); case IPPROTO_UDP: if (h.uh->check == 0) /* No UDP checksum */ break; case IPPROTO_TCP: /* Make sure packet is in the masq range */ IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n", masq_proto_name(iph->protocol), size); #ifdef CONFIG_IP_MASQ_DEBUG if (ip_masq_get_debug_level() > 3) { skb->ip_summed = CHECKSUM_NONE; } #endif /* Check that the checksum is OK */ switch (skb->ip_summed) { case CHECKSUM_NONE: { csum = csum_partial(h.raw + doff, size - doff, 0); IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n", masq_proto_name(iph->protocol), csum); skb->csum = csum_partial(h.raw , doff, csum); } case CHECKSUM_HW: if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, skb->csum)) { IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n", masq_proto_name(iph->protocol), NIPQUAD(iph->saddr), size); return -1; } default: /* CHECKSUM_UNNECESSARY */ } break; default: return -1; } /* * Now hunt the list to see if we have an old entry */ /* h.raw = (char*) iph + iph->ihl * 4; */ IP_MASQ_DEBUG(2, "Outgoing %s %08lX:%04X -> %08lX:%04X\n", masq_proto_name(iph->protocol), ntohl(iph->saddr), ntohs(h.portp[0]), ntohl(iph->daddr), ntohs(h.portp[1])); ms = ip_masq_out_get_iph(iph); if (ms!=NULL) { /* * If sysctl !=0 and no pkt has been received yet * in this tunnel and routing iface address has changed... * "You are welcome, diald". */ if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) { if (sysctl_ip_dynaddr > 1) { IP_MASQ_INFO( "ip_fw_masquerade(): change masq.addr from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(ms->maddr),NIPQUAD(maddr)); } write_lock(&__ip_masq_lock); ip_masq_unhash(ms); ms->maddr = maddr; ip_masq_hash(ms); write_unlock(&__ip_masq_lock); } /* * Set sport if not defined yet (e.g. ftp PASV). Because * masq entries are hashed on sport, unhash with old value * and hash with new. */ if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) { ms->flags &= ~IP_MASQ_F_NO_SPORT; write_lock(&__ip_masq_lock); ip_masq_unhash(ms); ms->sport = h.portp[0]; ip_masq_hash(ms); /* hash on new sport */ write_unlock(&__ip_masq_lock); IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n", ntohs(ms->sport)); } if (ms->flags & IP_MASQ_F_DLOOSE) { /* * update dest loose values */ ms->dport = h.portp[1]; ms->daddr = iph->daddr; } } else { /* * Nope, not found, create a new entry for it */ #ifdef CONFIG_IP_MASQUERADE_MOD if (!(ms = ip_masq_mod_out_create(skb, iph, maddr))) #endif ms = ip_masq_new(iph->protocol, maddr, 0, iph->saddr, h.portp[0], iph->daddr, h.portp[1], 0); if (ms == NULL) return -1; } /* * Call module's output update hook */ #ifdef CONFIG_IP_MASQUERADE_MOD ip_masq_mod_out_update(skb, iph, ms); #endif /* * Change the fragments origin */ size = skb->len - (h.raw - skb->nh.raw); /* * Set iph addr and port from ip_masq obj. */ iph->saddr = ms->maddr; h.portp[0] = ms->mport; /* * Invalidate csum saving if tunnel has masq helper */ if (ms->app) csum_ok = 0; /* * Attempt ip_masq_app call. * will fix ip_masq and iph seq stuff */ if (ip_masq_app_pkt_out(ms, skb_p, maddr) != 0) { /* * skb has possibly changed, update pointers. */ skb = *skb_p; iph = skb->nh.iph; h.raw = (char*) iph + iph->ihl *4; size = skb->len - (h.raw - skb->nh.raw); /* doff should have not changed */ } /* * Adjust packet accordingly to protocol */ /* * Transport's payload partial csum */ if (!csum_ok) { csum = csum_partial(h.raw + doff, size - doff, 0); } skb->csum = csum; IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n", masq_proto_name(iph->protocol), size, csum); /* * Protocol csum */ switch (iph->protocol) { case IPPROTO_TCP: h.th->check = 0; h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw , doff, csum)); IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", masq_proto_name(iph->protocol), h.th->check, (char*) & (h.th->check) - (char*) h.raw); break; case IPPROTO_UDP: h.uh->check = 0; h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw , doff, csum)); if (h.uh->check == 0) h.uh->check = 0xFFFF; IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", masq_proto_name(iph->protocol), h.uh->check, (char*) &(h.uh->check)- (char*) h.raw); break; } ip_send_check(iph); IP_MASQ_DEBUG(2, "O-routed from %08lX:%04X with masq.addr %08lX\n", ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr)); masq_set_state(ms, 1, iph, h.portp); ip_masq_put(ms); return 0; } /* * Restore original addresses and ports in the original IP * datagram if the failing packet has been [de]masqueraded. * This is ugly in the extreme. We no longer have the original * packet so we have to reconstruct it from the failing packet * plus data in the masq tables. The resulting "original data" * should be good enough to tell the sender which session to * throttle. Relies on far too much knowledge of masq internals, * there ought to be a better way - KAO 990303. * * Moved here from icmp.c - JJC. * Already known: type == ICMP_DEST_UNREACH, IPSKB_MASQUERADED * skb->nh.iph points to original header. * * Must try both OUT and IN tables; we could add a flag * ala IPSKB_MASQUERADED to avoid 2nd tables lookup, but this is VERY * unlike because routing makes mtu decision before reaching * ip_fw_masquerade(). * */ int ip_fw_unmasq_icmp(struct sk_buff *skb) { struct ip_masq *ms; struct iphdr *iph = skb->nh.iph; __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]); /* * Always called from _bh context: use read_[un]lock() */ /* * Peek "out" table, this packet has bounced: * out->in(frag_needed!)->OUT[icmp] * * iph->daddr is IN host * iph->saddr is OUT host */ read_lock(&__ip_masq_lock); ms = __ip_masq_out_get(iph->protocol, iph->daddr, portp[1], iph->saddr, portp[0]); read_unlock(&__ip_masq_lock); if (ms) { IP_MASQ_DEBUG(1, "Incoming frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(iph->daddr), NIPQUAD(ms->maddr)); iph->daddr = ms->maddr; portp[1] = ms->mport; __ip_masq_put(ms); return 1; } /* * Peek "in" table * in->out(frag_needed!)->IN[icmp] * * iph->daddr is OUT host * iph->saddr is MASQ host * */ read_lock(&__ip_masq_lock); ms = __ip_masq_in_get(iph->protocol, iph->daddr, portp[1], iph->saddr, portp[0]); read_unlock(&__ip_masq_lock); if (ms) { IP_MASQ_DEBUG(1, "Outgoing frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(iph->saddr), NIPQUAD(ms->saddr)); iph->saddr = ms->saddr; portp[0] = ms->sport; __ip_masq_put(ms); return 1; } return 0; } /* * Handle ICMP messages in forward direction. * Find any that might be relevant, check against existing connections, * forward to masqueraded host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded */ int ip_fw_masq_icmp(struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; struct icmphdr *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2)); struct iphdr *ciph; /* The ip header contained within the ICMP */ __u16 *pptr; /* port numbers from TCP/UDP contained header */ struct ip_masq *ms; unsigned short len = ntohs(iph->tot_len) - (iph->ihl * 4); IP_MASQ_DEBUG(2, "Incoming forward ICMP (%d,%d) %lX -> %lX\n", icmph->type, ntohs(icmp_id(icmph)), ntohl(iph->saddr), ntohl(iph->daddr)); #ifdef CONFIG_IP_MASQUERADE_ICMP if ((icmph->type == ICMP_ECHO ) || (icmph->type == ICMP_TIMESTAMP ) || (icmph->type == ICMP_INFO_REQUEST ) || (icmph->type == ICMP_ADDRESS )) { IP_MASQ_DEBUG(2, "icmp request rcv %lX->%lX id %d type %d\n", ntohl(iph->saddr), ntohl(iph->daddr), ntohs(icmp_id(icmph)), icmph->type); ms = ip_masq_out_get(iph->protocol, iph->saddr, icmp_id(icmph), iph->daddr, icmp_hv_req(icmph)); if (ms == NULL) { ms = ip_masq_new(iph->protocol, maddr, 0, iph->saddr, icmp_id(icmph), iph->daddr, icmp_hv_req(icmph), 0); if (ms == NULL) return (-1); IP_MASQ_DEBUG(1, "Created new icmp entry\n"); } /* Rewrite source address */ /* * If sysctl !=0 and no pkt has been received yet * in this tunnel and routing iface address has changed... * "You are welcome, diald". */ if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) { if (sysctl_ip_dynaddr > 1) { IP_MASQ_INFO( "ip_fw_masq_icmp(): change masq.addr %d.%d.%d.%d to %d.%d.%d.%d", NIPQUAD(ms->maddr), NIPQUAD(maddr)); } write_lock(&__ip_masq_lock); ip_masq_unhash(ms); ms->maddr = maddr; ip_masq_hash(ms); write_unlock(&__ip_masq_lock); } iph->saddr = ms->maddr; ip_send_check(iph); /* Rewrite port (id) */ (icmph->un).echo.id = ms->mport; icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *)icmph, len); IP_MASQ_DEBUG(2, "icmp request rwt %lX->%lX id %d type %d\n", ntohl(iph->saddr), ntohl(iph->daddr), ntohs(icmp_id(icmph)), icmph->type); masq_set_state(ms, 1, iph, icmph); ip_masq_put(ms); return 1; } #endif /* * Work through seeing if this is for us. * These checks are supposed to be in an order that * means easy things are checked first to speed up * processing.... however this means that some * packets will manage to get a long way down this * stack and then be rejected, but thats life */ if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_SOURCE_QUENCH) && (icmph->type != ICMP_TIME_EXCEEDED)) return 0; /* Now find the contained IP header */ ciph = (struct iphdr *) (icmph + 1); #ifdef CONFIG_IP_MASQUERADE_ICMP if (ciph->protocol == IPPROTO_ICMP) { /* * This section handles ICMP errors for ICMP packets */ struct icmphdr *cicmph = (struct icmphdr *)((char *)ciph + (ciph->ihl<<2)); IP_MASQ_DEBUG(2, "fw icmp/icmp rcv %lX->%lX id %d type %d\n", ntohl(ciph->saddr), ntohl(ciph->daddr), ntohs(icmp_id(cicmph)), cicmph->type); read_lock(&__ip_masq_lock); ms = __ip_masq_out_get(ciph->protocol, ciph->daddr, icmp_id(cicmph), ciph->saddr, icmp_hv_rep(cicmph)); read_unlock(&__ip_masq_lock); if (ms == NULL) return 0; /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = ms->maddr; ip_send_check(iph); /* Now change the *dest* address in the contained IP */ ciph->daddr = ms->maddr; __ip_masq_put(ms); ip_send_check(ciph); /* Change the ID to the masqed one! */ (cicmph->un).echo.id = ms->mport; /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); IP_MASQ_DEBUG(2, "fw icmp/icmp rwt %lX->%lX id %d type %d\n", ntohl(ciph->saddr), ntohl(ciph->daddr), ntohs(icmp_id(cicmph)), cicmph->type); return 1; } #endif /* CONFIG_IP_MASQUERADE_ICMP */ /* We are only interested ICMPs generated from TCP or UDP packets */ if ((ciph->protocol != IPPROTO_UDP) && (ciph->protocol != IPPROTO_TCP)) return 0; /* * Find the ports involved - this packet was * incoming so the ports are right way round * (but reversed relative to outer IP header!) */ pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]); #if 0 if (ntohs(pptr[1]) < PORT_MASQ_BEGIN || ntohs(pptr[1]) > PORT_MASQ_END) return 0; #endif /* Ensure the checksum is correct */ if (ip_compute_csum((unsigned char *) icmph, len)) { /* Failed checksum! */ IP_MASQ_DEBUG(0, "forward ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); return(-1); } IP_MASQ_DEBUG(2, "Handling forward ICMP for %08lX:%04X -> %08lX:%04X\n", ntohl(ciph->saddr), ntohs(pptr[0]), ntohl(ciph->daddr), ntohs(pptr[1])); #if 0 /* This is pretty much what __ip_masq_in_get_iph() does */ ms = __ip_masq_in_get(ciph->protocol, ciph->saddr, pptr[0], ciph->daddr, pptr[1]); #endif read_lock(&__ip_masq_lock); ms = __ip_masq_out_get(ciph->protocol, ciph->daddr, pptr[1], ciph->saddr, pptr[0]); read_unlock(&__ip_masq_lock); if (ms == NULL) return 0; /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = ms->maddr; ip_send_check(iph); /* Now change the *dest* address in the contained IP */ ciph->daddr = ms->maddr; ip_send_check(ciph); /* the TCP/UDP dest port - cannot redo check */ pptr[1] = ms->mport; __ip_masq_put(ms); /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); IP_MASQ_DEBUG(2, "Rewrote forward ICMP to %08lX:%04X -> %08lX:%04X\n", ntohl(ciph->saddr), ntohs(pptr[0]), ntohl(ciph->daddr), ntohs(pptr[1])); return 1; } /* * Own skb_cow() beast, tweaked for rewriting commonly * used pointers in masq code */ static struct sk_buff * masq_skb_cow(struct sk_buff **skb_p, struct iphdr **iph_p, unsigned char **t_p) { struct sk_buff *skb=(*skb_p); if (skb_cloned(skb)) { skb = skb_copy(skb, GFP_ATOMIC); if (skb) { /* * skb changed, update other pointers */ struct iphdr *iph = skb->nh.iph; kfree_skb(*skb_p); *skb_p = skb; *iph_p = iph; *t_p = (char*) iph + iph->ihl * 4; } } return skb; } /* * Handle ICMP messages in reverse (demasquerade) direction. * Find any that might be relevant, check against existing connections, * forward to masqueraded host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded */ int ip_fw_demasq_icmp(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; struct icmphdr *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2)); struct iphdr *ciph; /* The ip header contained within the ICMP */ __u16 *pptr; /* port numbers from TCP/UDP contained header */ struct ip_masq *ms; unsigned short len = ntohs(iph->tot_len) - (iph->ihl * 4); IP_MASQ_DEBUG(2, "icmp in/rev (%d,%d) %lX -> %lX\n", icmph->type, ntohs(icmp_id(icmph)), ntohl(iph->saddr), ntohl(iph->daddr)); #ifdef CONFIG_IP_MASQUERADE_ICMP if ((icmph->type == ICMP_ECHOREPLY) || (icmph->type == ICMP_TIMESTAMPREPLY) || (icmph->type == ICMP_INFO_REPLY) || (icmph->type == ICMP_ADDRESSREPLY)) { IP_MASQ_DEBUG(2, "icmp reply rcv %lX->%lX id %d type %d, req %d\n", ntohl(iph->saddr), ntohl(iph->daddr), ntohs(icmp_id(icmph)), icmph->type, icmp_type_request(icmph->type)); ms = ip_masq_in_get(iph->protocol, iph->saddr, icmp_hv_rep(icmph), iph->daddr, icmp_id(icmph)); if (ms == NULL) return 0; /* * got reply, so clear flag */ ms->flags &= ~IP_MASQ_F_NO_REPLY; if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) { ip_masq_put(ms); return -1; } /* Reset source address */ iph->daddr = ms->saddr; /* Redo IP header checksum */ ip_send_check(iph); /* Set ID to fake port number */ (icmph->un).echo.id = ms->sport; /* Reset ICMP checksum and set expiry */ icmph->checksum=0; icmph->checksum=ip_compute_csum((unsigned char *)icmph,len); IP_MASQ_DEBUG(2, "icmp reply rwt %lX->%lX id %d type %d\n", ntohl(iph->saddr), ntohl(iph->daddr), ntohs(icmp_id(icmph)), icmph->type); masq_set_state(ms, 0, iph, icmph); ip_masq_put(ms); return 1; } else { #endif if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_SOURCE_QUENCH) && (icmph->type != ICMP_TIME_EXCEEDED)) return 0; #ifdef CONFIG_IP_MASQUERADE_ICMP } #endif /* * If we get here we have an ICMP error of one of the above 3 types * Now find the contained IP header */ ciph = (struct iphdr *) (icmph + 1); #ifdef CONFIG_IP_MASQUERADE_ICMP if (ciph->protocol == IPPROTO_ICMP) { /* * This section handles ICMP errors for ICMP packets * * First get a new ICMP header structure out of the IP packet */ struct icmphdr *cicmph = (struct icmphdr *)((char *)ciph + (ciph->ihl<<2)); IP_MASQ_DEBUG(2, "rv icmp/icmp rcv %lX->%lX id %d type %d\n", ntohl(ciph->saddr), ntohl(ciph->daddr), ntohs(icmp_id(cicmph)), cicmph->type); read_lock(&__ip_masq_lock); ms = __ip_masq_in_get(ciph->protocol, ciph->daddr, icmp_hv_req(cicmph), ciph->saddr, icmp_id(cicmph)); read_unlock(&__ip_masq_lock); if (ms == NULL) return 0; if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) { __ip_masq_put(ms); return -1; } ciph = (struct iphdr *) (icmph + 1); cicmph = (struct icmphdr *)((char *)ciph + (ciph->ihl<<2)); /* Now we do real damage to this packet...! */ /* First change the dest IP address, and recalc checksum */ iph->daddr = ms->saddr; ip_send_check(iph); /* Now change the *source* address in the contained IP */ ciph->saddr = ms->saddr; ip_send_check(ciph); /* Change the ID to the original one! */ (cicmph->un).echo.id = ms->sport; __ip_masq_put(ms); /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); IP_MASQ_DEBUG(2, "rv icmp/icmp rwt %lX->%lX id %d type %d\n", ntohl(ciph->saddr), ntohl(ciph->daddr), ntohs(icmp_id(cicmph)), cicmph->type); return 1; } #endif /* CONFIG_IP_MASQUERADE_ICMP */ /* We are only interested ICMPs generated from TCP or UDP packets */ if ((ciph->protocol != IPPROTO_UDP) && (ciph->protocol != IPPROTO_TCP)) return 0; /* * Find the ports involved - remember this packet was * *outgoing* so the ports are reversed (and addresses) */ pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]); if (ntohs(pptr[0]) < PORT_MASQ_BEGIN || ntohs(pptr[0]) > PORT_MASQ_END) return 0; /* Ensure the checksum is correct */ if (ip_compute_csum((unsigned char *) icmph, len)) { /* Failed checksum! */ IP_MASQ_ERR( "reverse ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); return(-1); } IP_MASQ_DEBUG(2, "Handling reverse ICMP for %08lX:%04X -> %08lX:%04X\n", ntohl(ciph->saddr), ntohs(pptr[0]), ntohl(ciph->daddr), ntohs(pptr[1])); /* This is pretty much what __ip_masq_in_get_iph() does, except params are wrong way round */ read_lock(&__ip_masq_lock); ms = __ip_masq_in_get(ciph->protocol, ciph->daddr, pptr[1], ciph->saddr, pptr[0]); read_unlock(&__ip_masq_lock); if (ms == NULL) return 0; if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) { __ip_masq_put(ms); return -1; } ciph = (struct iphdr *) (icmph + 1); pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]); /* Now we do real damage to this packet...! */ /* First change the dest IP address, and recalc checksum */ iph->daddr = ms->saddr; ip_send_check(iph); /* Now change the *source* address in the contained IP */ ciph->saddr = ms->saddr; ip_send_check(ciph); /* the TCP/UDP source port - cannot redo check */ pptr[0] = ms->sport; __ip_masq_put(ms); /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); IP_MASQ_DEBUG(2, "Rewrote reverse ICMP to %08lX:%04X -> %08lX:%04X\n", ntohl(ciph->saddr), ntohs(pptr[0]), ntohl(ciph->daddr), ntohs(pptr[1])); return 1; } /* * Check if it's an masqueraded port, look it up, * and send it on its way... * * Better not have many hosts using the designated portrange * as 'normal' ports, or you'll be spending many time in * this function. */ int ip_fw_demasquerade(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; union ip_masq_tphdr h; struct ip_masq *ms; unsigned short size; int doff = 0; int csum = 0; int csum_ok = 0; __u32 maddr; /* * Big tappo: only PACKET_HOST (nor loopback neither mcasts) * ... don't know why 1st test DOES NOT include 2nd (?) */ if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) { IP_MASQ_DEBUG(2, "ip_fw_demasquerade(): packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, iph->protocol, NIPQUAD(iph->daddr)); return 0; } h.raw = (char*) iph + iph->ihl * 4; /* * IP payload size */ size = ntohs(iph->tot_len) - (iph->ihl * 4); doff = proto_doff(iph->protocol, h.raw, size); switch (doff) { case 0: /* * Input path: other IP protos Ok, will * reach local sockets path. */ return 0; case -1: IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n"); return -1; } maddr = iph->daddr; switch (iph->protocol) { case IPPROTO_ICMP: return(ip_fw_demasq_icmp(skb_p)); case IPPROTO_TCP: case IPPROTO_UDP: /* * Make sure packet is in the masq range * ... or some mod-ule relaxes input range * ... or there is still some `special' mport opened */ if ((ntohs(h.portp[1]) < PORT_MASQ_BEGIN || ntohs(h.portp[1]) > PORT_MASQ_END) #ifdef CONFIG_IP_MASQUERADE_MOD && (ip_masq_mod_in_rule(skb, iph) != 1) #endif && atomic_read(&mport_count) == 0 ) return 0; /* Check that the checksum is OK */ if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0)) /* No UDP checksum */ break; #ifdef CONFIG_IP_MASQ_DEBUG if (ip_masq_get_debug_level() > 3) { skb->ip_summed = CHECKSUM_NONE; } #endif switch (skb->ip_summed) { case CHECKSUM_NONE: csum = csum_partial(h.raw + doff, size - doff, 0); csum_ok++; skb->csum = csum_partial(h.raw , doff, csum); case CHECKSUM_HW: if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, skb->csum)) { IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n", masq_proto_name(iph->protocol), NIPQUAD(iph->saddr), size); return -1; } default: /* CHECKSUM_UNNECESSARY */ } break; default: return 0; } IP_MASQ_DEBUG(2, "Incoming %s %08lX:%04X -> %08lX:%04X\n", masq_proto_name(iph->protocol), ntohl(iph->saddr), ntohs(h.portp[0]), ntohl(iph->daddr), ntohs(h.portp[1])); /* * reroute to original host:port if found... */ ms = ip_masq_in_get_iph(iph); /* * Give additional modules a chance to create an entry */ #ifdef CONFIG_IP_MASQUERADE_MOD if (!ms) ms = ip_masq_mod_in_create(skb, iph, maddr); /* * Call module's input update hook */ ip_masq_mod_in_update(skb, iph, ms); #endif if (ms != NULL) { /* * got reply, so clear flag */ ms->flags &= ~IP_MASQ_F_NO_REPLY; /* * Set daddr,dport if not defined yet * and tunnel is not setup as "dest loose" */ if (ms->flags & IP_MASQ_F_DLOOSE) { /* * update dest loose values */ ms->dport = h.portp[0]; ms->daddr = iph->saddr; } else { if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /* && ms->protocol == IPPROTO_TCP ) { */ ms->flags &= ~IP_MASQ_F_NO_DPORT; ms->dport = h.portp[0]; IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled dport=%d\n", ntohs(ms->dport)); } if (ms->flags & IP_MASQ_F_NO_DADDR ) { /* && ms->protocol == IPPROTO_TCP) { */ ms->flags &= ~IP_MASQ_F_NO_DADDR; ms->daddr = iph->saddr; IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled daddr=%lX\n", ntohl(ms->daddr)); } } if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) { ip_masq_put(ms); return -1; } iph->daddr = ms->saddr; h.portp[1] = ms->sport; /* * Invalidate csum saving if tunnel has masq helper */ if (ms->app) csum_ok = 0; /* * Attempt ip_masq_app call. * will fix ip_masq and iph ack_seq stuff */ if (ip_masq_app_pkt_in(ms, skb_p, maddr) != 0) { /* * skb has changed, update pointers. */ skb = *skb_p; iph = skb->nh.iph; h.raw = (char*) iph + iph->ihl*4; size = ntohs(iph->tot_len) - (iph->ihl * 4); } /* * Yug! adjust UDP/TCP checksums */ /* * Transport's payload partial csum */ if (!csum_ok) { csum = csum_partial(h.raw + doff, size - doff, 0); } skb->csum = csum; /* * Protocol csum */ switch (iph->protocol) { case IPPROTO_TCP: h.th->check = 0; h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw , doff, csum)); break; case IPPROTO_UDP: h.uh->check = 0; h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw , doff, csum)); if (h.uh->check == 0) h.uh->check = 0xFFFF; break; } ip_send_check(iph); IP_MASQ_DEBUG(2, "I-routed to %08lX:%04X\n",ntohl(iph->daddr),ntohs(h.portp[1])); masq_set_state (ms, 0, iph, h.portp); ip_masq_put(ms); return 1; } /* sorry, all this trouble for a no-hit :) */ return 0; } void ip_masq_control_add(struct ip_masq *ms, struct ip_masq* ctl_ms) { if (ms->control) { IP_MASQ_ERR( "request control ADD for already controlled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", NIPQUAD(ms->saddr),ntohs(ms->sport), NIPQUAD(ms->daddr),ntohs(ms->dport)); ip_masq_control_del(ms); } IP_MASQ_DEBUG(1, "ADDing control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n", NIPQUAD(ms->daddr),ntohs(ms->dport), NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport)); ms->control = ctl_ms; atomic_inc(&ctl_ms->n_control); } void ip_masq_control_del(struct ip_masq *ms) { struct ip_masq *ctl_ms = ms->control; if (!ctl_ms) { IP_MASQ_ERR( "request control DEL for uncontrolled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", NIPQUAD(ms->saddr),ntohs(ms->sport), NIPQUAD(ms->daddr),ntohs(ms->dport)); return; } IP_MASQ_DEBUG(1, "DELeting control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n", NIPQUAD(ms->daddr),ntohs(ms->dport), NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport)); ms->control = NULL; if (atomic_read(&ctl_ms->n_control) == 0) { IP_MASQ_ERR( "BUG control DEL with n=0 : %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", NIPQUAD(ms->saddr),ntohs(ms->sport), NIPQUAD(ms->daddr),ntohs(ms->dport)); return; } atomic_dec(&ctl_ms->n_control); } struct ip_masq * ip_masq_control_get(struct ip_masq *ms) { return ms->control; } #ifdef CONFIG_PROC_FS /* * /proc/net entries * From userspace */ static int ip_msqhst_procinfo(char *buffer, char **start, off_t offset, int length, int unused) { off_t pos=0, begin; struct ip_masq *ms; char temp[129]; int idx = 0; int len=0; if (offset < 128) { sprintf(temp, "Prc FromIP FPrt ToIP TPrt Masq Init-seq Delta PDelta Expires (free=%d,%d,%d)", atomic_read(ip_masq_free_ports), atomic_read(ip_masq_free_ports+1), atomic_read(ip_masq_free_ports+2)); len = sprintf(buffer, "%-127s\n", temp); } pos = 128; for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++) { /* * Lock is actually only need in next loop * we are called from uspace: must stop bh. */ read_lock_bh(&__ip_masq_lock); for(ms = ip_masq_m_tab[idx]; ms ; ms = ms->m_link) { pos += 128; if (pos <= offset) { len = 0; continue; } /* * We have locked the tables, no need to del/add timers * nor cli() 8) */ sprintf(temp,"%s %08lX:%04X %08lX:%04X %04X %08X %6d %6d %7lu", masq_proto_name(ms->protocol), ntohl(ms->saddr), ntohs(ms->sport), ntohl(ms->daddr), ntohs(ms->dport), ntohs(ms->mport), ms->out_seq.init_seq, ms->out_seq.delta, ms->out_seq.previous_delta, ms->timer.expires-jiffies); len += sprintf(buffer+len, "%-127s\n", temp); if(len >= length) { read_unlock_bh(&__ip_masq_lock); goto done; } } read_unlock_bh(&__ip_masq_lock); } done: begin = len - (pos - offset); *start = buffer + begin; len -= begin; if(len>length) len = length; return len; } #endif /* * Timeouts handling by ipfwadm/ipchains * From ip_fw.c */ int ip_fw_masq_timeouts(void *m, int len) { struct ip_fw_masq *masq; int ret = EINVAL; if (len != sizeof(struct ip_fw_masq)) { IP_MASQ_DEBUG(1, "ip_fw_masq_timeouts: length %d, expected %d\n", len, sizeof(struct ip_fw_masq)); } else { masq = (struct ip_fw_masq *)m; if (masq->tcp_timeout) masq_timeout_table.timeout[IP_MASQ_S_ESTABLISHED] = masq->tcp_timeout; if (masq->tcp_fin_timeout) masq_timeout_table.timeout[IP_MASQ_S_FIN_WAIT] = masq->tcp_fin_timeout; if (masq->udp_timeout) masq_timeout_table.timeout[IP_MASQ_S_UDP] = masq->udp_timeout; ret = 0; } return ret; } /* * Module autoloading stuff */ static int ip_masq_user_check_hook(void) { #ifdef CONFIG_KMOD if (ip_masq_user_hook == NULL) { IP_MASQ_DEBUG(1, "About to request \"ip_masq_user\" module\n"); request_module("ip_masq_user"); } #endif /* CONFIG_KMOD */ return (ip_masq_user_hook != NULL); } /* * user module hook- info */ static int ip_masq_user_info(char *buffer, char **start, off_t offset, int len, int *eof, void *data) { int ret = -ENOPKG; if (ip_masq_user_check_hook()) { ret = ip_masq_user_hook->info(buffer, start, offset, len, (int) data); } return ret; } /* * user module hook- entry mgmt */ static int ip_masq_user_ctl(int optname, void *arg, int arglen) { int ret = -ENOPKG; if (ip_masq_user_check_hook()) { ret = ip_masq_user_hook->ctl(optname, arg, arglen); } return ret; } /* * Control from ip_sockglue * MAIN ENTRY point from userspace (apart from /proc *info entries) * Returns errno */ int ip_masq_uctl(int optname, char * optval , int optlen) { struct ip_masq_ctl masq_ctl; int ret = -EINVAL; if(optlen>sizeof(masq_ctl)) return -EINVAL; if(copy_from_user(&masq_ctl,optval,optlen)) return -EFAULT; IP_MASQ_DEBUG(1,"ip_masq_ctl(optname=%d, optlen=%d, target=%d, cmd=%d)\n", optname, optlen, masq_ctl.m_target, masq_ctl.m_cmd); switch (masq_ctl.m_target) { case IP_MASQ_TARGET_USER: ret = ip_masq_user_ctl(optname, &masq_ctl, optlen); break; #ifdef CONFIG_IP_MASQUERADE_MOD case IP_MASQ_TARGET_MOD: ret = ip_masq_mod_ctl(optname, &masq_ctl, optlen); break; #endif } /* * If ret>0, copy to user space */ if (ret > 0 && ret <= sizeof (masq_ctl)) { if (copy_to_user(optval, &masq_ctl, ret) ) return -EFAULT; ret = 0; } return ret; } #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_net_ip_masq = NULL; #ifdef MODULE static void ip_masq_proc_count(struct inode *inode, int fill) { if (fill) MOD_INC_USE_COUNT; else MOD_DEC_USE_COUNT; } #endif int ip_masq_proc_register(struct proc_dir_entry *ent) { if (!proc_net_ip_masq) return -1; IP_MASQ_DEBUG(1, "registering \"/proc/net/ip_masq/%s\" entry\n", ent->name); return proc_register(proc_net_ip_masq, ent); } void ip_masq_proc_unregister(struct proc_dir_entry *ent) { if (!proc_net_ip_masq) return; IP_MASQ_DEBUG(1, "unregistering \"/proc/net/ip_masq/%s\" entry\n", ent->name); proc_unregister(proc_net_ip_masq, ent->low_ino); } __initfunc(static void masq_proc_init(void)) { IP_MASQ_DEBUG(1,"registering /proc/net/ip_masq\n"); if (!proc_net_ip_masq) { struct proc_dir_entry *ent; ent = create_proc_entry("net/ip_masq", S_IFDIR, 0); if (ent) { #ifdef MODULE ent->fill_inode = ip_masq_proc_count; #endif proc_net_ip_masq = ent; } else { IP_MASQ_ERR("Could not create \"/proc/net/ip_masq\" entry\n"); } } } #endif /* CONFIG_PROC_FS */ /* * Wrapper over inet_select_addr() */ u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope) { return inet_select_addr(dev, dst, scope); } /* * Initialize ip masquerading */ __initfunc(int ip_masq_init(void)) { #ifdef CONFIG_PROC_FS proc_net_register(&(struct proc_dir_entry) { PROC_NET_IPMSQHST, 13, "ip_masquerade", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, ip_msqhst_procinfo }); masq_proc_init(); ip_masq_proc_register(&(struct proc_dir_entry) { 0, 3, "tcp", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, NULL, /* get_info */ NULL, /* fill_inode */ NULL, NULL, NULL, (char *) IPPROTO_TCP, ip_masq_user_info }); ip_masq_proc_register(&(struct proc_dir_entry) { 0, 3, "udp", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, NULL, /* get_info */ NULL, /* fill_inode */ NULL, NULL, NULL, (char *) IPPROTO_UDP, ip_masq_user_info }); ip_masq_proc_register(&(struct proc_dir_entry) { 0, 4, "icmp", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, NULL, /* get_info */ NULL, /* fill_inode */ NULL, NULL, NULL, (char *) IPPROTO_ICMP, ip_masq_user_info }); #endif #ifdef CONFIG_IP_MASQUERADE_IPAUTOFW ip_autofw_init(); #endif #ifdef CONFIG_IP_MASQUERADE_IPPORTFW ip_portfw_init(); #endif #ifdef CONFIG_IP_MASQUERADE_MFW ip_mfw_init(); #endif ip_masq_app_init(); return 0; }