diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
commit | c7fc24dc4420057f103afe8fc64524ebc25c5d37 (patch) | |
tree | 3682407a599b8f9f03fc096298134cafba1c9b2f /net | |
parent | 1d793fade8b063fde3cf275bf1a5c2d381292cd9 (diff) |
o Merge with Linux 2.1.116.
o New Newport console code.
o New G364 console code.
Diffstat (limited to 'net')
114 files changed, 4646 insertions, 4040 deletions
diff --git a/net/802/Makefile b/net/802/Makefile index 12db50d50..cdfafc343 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -23,12 +23,17 @@ endif ifdef CONFIG_TR O_OBJS += tr.o + SNAP=y endif ifdef CONFIG_FDDI O_OBJS += fddi.o endif +ifdef CONFIG_HIPPI +O_OBJS += hippi.o +endif + ifdef CONFIG_IPX SNAP=y endif @@ -38,7 +43,7 @@ ifdef CONFIG_ATALK endif ifeq ($(SNAP),y) -OX_OBJS += p8022.o psnap.o p8022tr.o +OX_OBJS += p8022.o psnap.o endif diff --git a/net/802/hippi.c b/net/802/hippi.c new file mode 100644 index 000000000..b8890647e --- /dev/null +++ b/net/802/hippi.c @@ -0,0 +1,161 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * HIPPI-type device handling. + * + * Version: @(#)hippi.c 1.0.0 05/29/97 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Jes Sorensen, <Jes.Sorensen@cern.ch> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/hippidevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <net/arp.h> +#include <net/sock.h> +#include <asm/checksum.h> +#include <asm/segment.h> +#include <asm/system.h> + +/* + * hippi_net_init() + * + * Do nothing, this is just to pursuade the stupid linker to behave. + */ + +void hippi_net_init(void) +{ + return; +} + +/* + * Create the HIPPI MAC header for an arbitrary protocol layer + * + * saddr=NULL means use device source address + * daddr=NULL means leave destination address (eg unresolved arp) + */ + +int hippi_header(struct sk_buff *skb, struct device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len) +{ + struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); + + if (!len){ + len = skb->len - HIPPI_HLEN; + printk("hippi_header(): length not supplied\n"); + } + + /* + * Due to the stupidity of the little endian byte-order we + * have to set the fp field this way. + */ + hip->fp.fixed = __constant_htonl(0x04800018); + hip->fp.d2_size = htonl(len + 8); + hip->le.fc = 0; + hip->le.double_wide = 0; /* only HIPPI 800 for the time being */ + hip->le.message_type = 0; /* Data PDU */ + + hip->le.dest_addr_type = 2; /* 12 bit SC address */ + hip->le.src_addr_type = 2; /* 12 bit SC address */ + +#if 1 + if (saddr) + { + printk("existing saddr - this should not happen, configure ARP please!\n"); + memcpy(hip->le.src_switch_addr, saddr + 3, 3); + }else + memcpy(hip->le.src_switch_addr, dev->dev_addr + 3, 3); + + memset(&hip->le.reserved, 0, 16); +#endif + + hip->snap.dsap = HIPPI_EXTENDED_SAP; + hip->snap.ssap = HIPPI_EXTENDED_SAP; + hip->snap.ctrl = HIPPI_UI_CMD; + hip->snap.oui[0] = 0x00; + hip->snap.oui[1] = 0x00; + hip->snap.oui[2] = 0x00; + hip->snap.ethertype = htons(type); + + if (daddr) + { + memcpy(hip->le.dest_switch_addr, daddr + 3, 3); + memcpy(&skb->private.ifield, daddr + 2, 4); + return(HIPPI_HLEN); + } + return -HIPPI_HLEN; +} + + +/* + * Rebuild the HIPPI MAC header. This is called after an ARP has + * completed on this sk_buff. We now let ARP fill in the other fields. + */ + +int hippi_rebuild_header(struct sk_buff *skb) +{ + struct hippi_hdr *hip = (struct hippi_hdr *)skb->data; + + /* + * Only IP is currently supported + */ + + if(hip->snap.ethertype != __constant_htons(ETH_P_IP)) + { + printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype)); + return 0; + } + + /* + * We don't support dynamic ARP on HIPPI, but we use the ARP + * static ARP tables to hold the I-FIELDs. + */ + return arp_find(hip->le.daddr, skb); +} + + +/* + * Determine the packet's protocol ID. + */ + +unsigned short hippi_type_trans(struct sk_buff *skb, struct device *dev) +{ + struct hippi_hdr *hip; + + hip = (struct hippi_hdr *) skb->data; + + /* + * This is actually wrong ... question is if we really should + * set the raw address here. + */ + skb->mac.raw = skb->data; + skb_pull(skb, HIPPI_HLEN); + + /* + * No fancy promisc stuff here now. + */ + + return hip->snap.ethertype; +} diff --git a/net/802/p8022.c b/net/802/p8022.c index b4a9b43f9..70bc2162c 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -1,6 +1,6 @@ /* - * NET3: Support for 802.2 demultiplexing off ethernet (Token ring - * is kept seperate see p8022tr.c) + * NET3: Support for 802.2 demultiplexing off Ethernet (Token ring + * is kept separate see p8022tr.c) * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/802/p8022tr.c b/net/802/p8022tr.c deleted file mode 100644 index b895c9343..000000000 --- a/net/802/p8022tr.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * NET3: Handling for token ring frames that are not IP. IP is hooked - * early in the token ring support code. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <net/datalink.h> -#include <linux/mm.h> -#include <linux/in.h> -#include <linux/init.h> -#include <net/p8022tr.h> - -#define SNAP_HEADER_LEN 8 - -static struct datalink_proto *p8022tr_list = NULL; - -/* - * We don't handle the loopback SAP stuff, the extended - * 802.2 command set, multicast SAP identifiers and non UI - * frames. We have the absolute minimum needed for IPX, - * IP and Appletalk phase 2. See the llc_* routines for support - * to handle the fun stuff. - * - * We assume the list will be very short (at the moment its normally - * one or two entries). - */ - -static struct datalink_proto *find_8022tr_client(unsigned char type) -{ - struct datalink_proto *proto; - - for (proto = p8022tr_list; - ((proto != NULL) && (*(proto->type) != type)); - proto = proto->next) - ; - - return proto; -} - -int p8022tr_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) -{ - struct datalink_proto *proto; - - proto = find_8022tr_client(*(skb->h.raw)); - if (proto != NULL) { - skb->h.raw += 3; - skb_pull(skb,3); - return proto->rcvfunc(skb, dev, pt); - } - - skb->sk = NULL; - kfree_skb(skb); - return 0; -} - -static void p8022tr_datalink_header(struct datalink_proto *dl, - struct sk_buff *skb, unsigned char *dest_node) -{ - struct device *dev = skb->dev; - unsigned char *rawp; - unsigned char *olddata; - unsigned char *newdata; - - rawp = skb_push(skb,3); - *rawp++ = dl->type[0]; - *rawp++ = dl->type[0]; - *rawp = 0x03; /* UI */ - dev->hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); - olddata = skb->data; - newdata = skb_pull(skb, SNAP_HEADER_LEN); - memmove(newdata, olddata, dev->hard_header_len - SNAP_HEADER_LEN); -} - -static struct packet_type p8022tr_packet_type = -{ - 0, - NULL, /* All devices */ - p8022tr_rcv, - NULL, - NULL, -}; - - -EXPORT_SYMBOL(register_8022tr_client); -EXPORT_SYMBOL(unregister_8022tr_client); - -__initfunc(void p8022tr_proto_init(struct net_proto *pro)) -{ - p8022tr_packet_type.type=htons(ETH_P_TR_802_2); - dev_add_pack(&p8022tr_packet_type); -} - -struct datalink_proto *register_8022tr_client(unsigned char type, - int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)) -{ - struct datalink_proto *proto; - - if (find_8022tr_client(type) != NULL) - return NULL; - - proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); - if (proto != NULL) { - proto->type[0] = type; - proto->type_len = 1; - proto->rcvfunc = rcvfunc; - proto->header_length = 3; - proto->datalink_header = p8022tr_datalink_header; - proto->string_name = "802.2TR"; - proto->next = p8022tr_list; - p8022tr_list = proto; - } - - return proto; -} - -void unregister_8022tr_client(unsigned char type) -{ - struct datalink_proto *tmp, **clients = &p8022tr_list; - unsigned long flags; - - save_flags(flags); - cli(); - - while ((tmp = *clients) != NULL) - { - if (tmp->type[0] == type) { - *clients = tmp->next; - kfree_s(tmp, sizeof(struct datalink_proto)); - break; - } else { - clients = &tmp->next; - } - } - - restore_flags(flags); -} diff --git a/net/802/psnap.c b/net/802/psnap.c index 24e7f2bd0..8c077d46f 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -60,9 +60,11 @@ int snap_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) */ skb->h.raw += 5; + skb->nh.raw += 5; skb_pull(skb,5); if (psnap_packet_type.type == 0) psnap_packet_type.type=htons(ETH_P_SNAP); + return proto->rcvfunc(skb, dev, &psnap_packet_type); } skb->sk = NULL; @@ -95,7 +97,7 @@ __initfunc(void snap_proto_init(struct net_proto *pro)) } /* - * Register SNAP clients. We don't yet use this for IP or IPX. + * Register SNAP clients. We don't yet use this for IP. */ struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)) diff --git a/net/802/tr.c b/net/802/tr.c index 3550b81ed..f708fe881 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -9,6 +9,9 @@ * Fixes: 3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes. * Added rif table to /proc/net/tr_rif and rif timeout to * /proc/sys/net/token-ring/rif_timeout. + * 22 Jun 98 Paul Norton <p.norton@computer.org> Rearranged + * tr_header and tr_type_trans to handle passing IPX SNAP and + * 802.2 through the correct layers. Eliminated tr_reformat. * */ @@ -84,9 +87,30 @@ int sysctl_tr_rif_timeout = RIF_TIMEOUT; int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { + struct trh_hdr *trh; + int hdr_len; - struct trh_hdr *trh=(struct trh_hdr *)skb_push(skb,dev->hard_header_len); - struct trllc *trllc=(struct trllc *)(trh+1); + /* + * Add the 802.2 SNAP header if IP as the IPv4 code calls + * dev->hard_header directly. + */ + if (type == ETH_P_IP || type == ETH_P_ARP) + { + struct trllc *trllc=(struct trllc *)(trh+1); + + hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc); + trh = (struct trh_hdr *)skb_push(skb, hdr_len); + trllc = (struct trllc *)(trh+1); + trllc->dsap = trllc->ssap = EXTENDED_SAP; + trllc->llc = UI_CMD; + trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00; + trllc->ethertype = htons(type); + } + else + { + hdr_len = sizeof(struct trh_hdr); + trh = (struct trh_hdr *)skb_push(skb, hdr_len); + } trh->ac=AC; trh->fc=LLC_FRAME; @@ -94,18 +118,7 @@ int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type, if(saddr) memcpy(trh->saddr,saddr,dev->addr_len); else - memset(trh->saddr,0,dev->addr_len); /* Adapter fills in address */ - - /* - * This is the stuff needed for IP encoding - IP over 802.2 - * with SNAP. - */ - - trllc->dsap=trllc->ssap=EXTENDED_SAP; - trllc->llc=UI_CMD; - - trllc->protid[0]=trllc->protid[1]=trllc->protid[2]=0x00; - trllc->ethertype=htons(type); + memcpy(trh->saddr,dev->dev_addr,dev->addr_len); /* * Build the destination and then source route the frame @@ -115,10 +128,10 @@ int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type, { memcpy(trh->daddr,daddr,dev->addr_len); tr_source_route(skb,trh,dev); - return(dev->hard_header_len); + return(hdr_len); } - return -dev->hard_header_len; + return -hdr_len; } /* @@ -161,12 +174,18 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev) { struct trh_hdr *trh=(struct trh_hdr *)skb->data; - struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr)); + struct trllc *trllc; + unsigned riflen=0; skb->mac.raw = skb->data; - skb_pull(skb,dev->hard_header_len); - + if(trh->saddr[0] & TR_RII) + riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; + + trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen); + + skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen); + tr_add_rif_info(trh, dev); if(*trh->daddr & 1) @@ -183,38 +202,20 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev) skb->pkt_type=PACKET_OTHERHOST; } - return trllc->ethertype; -} - -/* - * Reformat the headers to make a "standard" frame. This is done - * in-place in the sk_buff. - */ - -void tr_reformat(struct sk_buff *skb, unsigned int hdr_len) -{ - struct trllc *llc = (struct trllc *)(skb->data+hdr_len); - struct device *dev = skb->dev; - unsigned char *olddata = skb->data; - int slack; + /* + * Strip the SNAP header from ARP packets since we don't + * pass them through to the 802.2/SNAP layers. + */ - if (llc->dsap == 0xAA && llc->ssap == 0xAA) + if (trllc->dsap == EXTENDED_SAP && + (trllc->ethertype == ntohs(ETH_P_IP) || + trllc->ethertype == ntohs(ETH_P_ARP))) { - slack = sizeof(struct trh_hdr) - hdr_len; - skb_push(skb, slack); - memmove(skb->data, olddata, hdr_len); - memset(skb->data+hdr_len, 0, slack); + skb_pull(skb, sizeof(struct trllc)); + return trllc->ethertype; } - else - { - struct trllc *local_llc; - slack = sizeof(struct trh_hdr) - hdr_len + sizeof(struct trllc); - skb_push(skb, slack); - memmove(skb->data, olddata, hdr_len); - memset(skb->data+hdr_len, 0, slack); - local_llc = (struct trllc *)(skb->data+dev->hard_header_len); - local_llc->ethertype = htons(ETH_P_TR_802_2); - } + + return ntohs(ETH_P_802_2); } /* @@ -350,22 +351,23 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", return; } + memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN); + memcpy(&(entry->iface[0]),dev->name,5); + entry->next=rif_table[hash]; + entry->last_used=jiffies; + rif_table[hash]=entry; + if (rii_p) { entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK); memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); entry->local_ring = 0; + trh->saddr[0]|=TR_RII; /* put the routing indicator back for tcpdump */ } else { entry->local_ring = 1; } - - memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN); - memcpy(&(entry->iface[0]),dev->name,5); - entry->next=rif_table[hash]; - entry->last_used=jiffies; - rif_table[hash]=entry; } else /* Y. Tahara added */ { @@ -447,7 +449,7 @@ int rif_get_info(char *buffer,char **start, off_t offset, int length, int dummy) rif_cache entry; size=sprintf(buffer, - "if TR address TTL rcf routing segments\n\n"); + "if TR address TTL rcf routing segments\n"); pos+=size; len+=size; diff --git a/net/Changes b/net/Changes index b6e1d5ea3..6ff0219cc 100644 --- a/net/Changes +++ b/net/Changes @@ -1,446 +1,26 @@ +--------------- Things That Need Doing Before 2.2 ------------------ -Ongoing things. - -0.0 ---- -Initial patches to catch up with things we want to add. - -o Merged in the Jorge Cwik fast checksum. [TESTED] -o Added Arnt Gulbrandsen's fast UDP build. [TESTED] -o Pauline Middelinks masquerade patch [TESTED] - - -0.1 ---- - -o Remove excess transmit request calls. [TESTED] -o Set type before calling netif_rx() [TESTED] -o Inline sock_rcv_skb [TESTED] -o Cache last socket for UDP [TESTED] -o Cache last socket for TCP [TESTED] -o Routing cache (only in ip_build_header so far) [TESTED] -------->>>>> ALPHA 001 <<<<<---------- -o eql load balancing driver. [TESTED] -o Token ring drivers. [TESTED] -o IPIP and tunnels [TESTED] -o Fix ethernet/token ring promisc broadcast error [TESTED] - (pkt_type set to OTHERHOST in error). -o Fixed bug in the routing caches [TESTED] -o Protocol header cache support [TESTED] -o Fixed ip_build_xmit loopback bugs [TESTED] -o Fixes for SIOCGSTAMP on SOCK_PACKET [TESTED] -o Perfect hash on net_bh(). [TESTED] -o Sonix ISDN driver. [SEPARATED/SENT] -o Use ip_build_xmit for raw sockets [TESTED] -o 3c501 fixed for speed [TESTED] -------->>>>> ALPHA 002 <<<<<-------- -o PLIP, PPP and de bugs fixed [TESTED] -o Merged in G4KLX AX.25 state machine, with KA9Q - donated headers to get BSD free AX.25 done. [TESTED] -o Massive loopback device bug fixed [TESTED] -------->>>>> ALPHA 003 <<<<<---------- -o Revised code layout [TESTED] -o More bug fixes (traceroute etc) [TESTED] -------->>>>> ALPHA 004 <<<<<---------- -o IP build xmit fragment fixes [TESTED] -o TCP SWS probe fix [TESTED] -o Appletalk DDP [TESTED] -o IP firewall bug fixed [TESTED] -o IP masquerade ftp port spoof [TESTED] -o gcc 2.6.3 -O3 fix for checksum assembler [TESTED] -o /proc support shows both timers [TESTED] -o TCP irtt support [TESTED] -o RTF_REJECT routing support [TESTED] -o Fixed 0 length fragment bug [TESTED] -o Fixed overlapping reasm bug [TESTED] -o Newest AX.25 code from John Naylor [TESTED] -o NetROM from John Naylor [TESTED] -o Routerless DDP fixes from Wesley [TESTED] - -------->>>>> ALPHA 005 <<<<<---------- - -o Several compile and bugfixes from Jakko [TESTED] -o Connect fix from Matt Day (+ fix to fix) [TESTED] -o RTT, memory leak and other netrom/ax.25 cures - -- John Naylor [TESTED] -o IP source route via broadcast now illegal [TESTED] - -------->>>>> ALPHA 006 <<<<<---------- - -o Yet more NetROM/AX.25 improvements [TESTED] - -- John Naylor -o Fixed a _stupid_ appletalk bug [TESTED] -o Missing include [TESTED] - -- Lots of people -o Can drop all source routes [TESTED] -o Printing fixes for ip_fw [TESTED] -o UDP checksum fix (Gerhard) [TESTED] -o Newer 3c505 driver from Juha Laiho [IN] -o Security fix to axassociate [TESTED] -o Loopback driver debugged (fixes named) [TESTED] -o SCC driver from Joerg Reuter [TESTED] -o IP Firewall accounting zero bug [TESTED] - -////////////////////////////1.3.0/////////////////////////// - - -o Merged loadable firewall code [TESTED] -o New buffers used totally non optimally [TESTED] -o Fast ip_forwarding (needs changing) [NOW INCLUDED IN 1.3.15] -o Fixed connection hang bug in new SWS code [TESTED] -o Buffer management hack putting skbuff control - after data in the frame because kmalloc is - totally cache non-optimal [TESTED] -o Faster checksum [Tom May] [TESTED] -o Appletalk router fixes [Michael Callahan] [TESTED] -o TCP state error fixes [Mark Tamsky] [TESTED] -o Verify area fixes [Heiko Eissfeldt] [TESTED] -o Routes use metric field [John Naylor] [TESTED] -o Major AX.25/NetROM fixes [John Nalor] [TESTED] - -------->>>>> NET3 030 <<<<<---------- - -o Long word align ethernet IP headers (64byte align for pentium) [TESTED] - (less helpful than I'd have liked) -o Fixed variable length header support to really work [TESTED] -o Mend appletalk/ipx partially [TESTED] -o Start playing with input checksum & copy [TESTED] -o Fixed PPP and other oddments [TESTED] -o Mended IPIP [TESTED] - -------->>>>> 1.3.7 <<<<<---------- - -o Checksum bug fixed [TESTED] -o Lance driver panic cured [TESTED] -o DEC ALPHA stuff (Linus) [ASK HIM NOT ME] -o Always try to keep output packet order - (eg for vat and BSD fast path tcp) [TESTED] -o Copy the mac pointer in skb_clone [TESTED] -o Fix tcpdump panic [TESTED] -o Fix dev_alloc_skb NULL deref bug [TESTED] -o Fix Security error in SIGURG stuff [TESTED] -o Missing 15 byte slack on ip_loopback [TESTED] - -------->>>>> 1.3.8 <<<<<---------- - -o UDP snmp count fixed [TESTED] -o IP snmp out count fixed [TESTED] -o First bit of Dave Bonn's fast forwarding [TESTED/NOW WORKS] -o Fix leaks and double free in firewalling [TESTED] -o Fix memory scribble in ip_build_xmit [TESTED] -o Do fast cases of ip_build_xmit first - slows fragmented I/O down, speeds up smaller - packets. UDP send ttcp can now touch 7.5Mbyte/sec - with nothing else going on. UDP recv is slower 8( [TESTED] -o Fixed and enabled ethernet header caches [TESTED] -o Removed junk from igmp [TESTED] -o Obscure UDP/copy&sum bug fix [TESTED] -o Fixed multicast [TESTED] -o TCP does rerouting for most cases [TESTED] - -------->>>>> 1.3.14 <<<<<---------- - -o IPX works [TESTED] -o NetROM works [TESTED] -o AX.25 works [TESTED] -o Most modules need recompiling even though they - load OK [BLAME LINUS] -o Appletalk works nicely [CHECKED] -o Fast IP forwarding part 1 works [CHECKED] - -------->>>>> 1.3.15 <<<<<--------- -o Mike Shaver has started RFC1122 verification [DONE] -o Minor bug fixes [TESTED] - -------->>>> 1.3.16 <<<-------- - -o Missing patches for device change in TCP [TESTED] -o Device locking [TESTED] -o Infinite slip devices [TESTED] -o New AF_UNIX sockets [TESTED] -o Sendmsg/recvmsg (for some stuff only) [TESTED] -o Device unload loopholes fixed [TESTED] -o Extra firewall abilities [TESTED] -o Appletalk node probe bug fix [TESTED] - -------->>>> 1.3.18 <<<<--------- - -o AF_UNIX debugging [TESTED] -o TCP explode on SWS bug fix [TESTED] -o John Naylor's ARP hwtype fix [TESTED] -o Datagram select matches BSD semantics [TESTED] - --------->>>>> 1.3.21 <<<<<--------- - -o AF_UNIX fixes and 4K limiter [TESTED] -o Sendmsg/recvmsg for AX.25/Appletalk [TESTED] -o Datagram generic iovec support [TESTED] -o Misc minor bug fixes [TESTED] - --------->>>>> 1.3.22 <<<<<------- - -o Device lock against page fault [TESTED] -o IP_HDRINCL [TESTED] -o IP firewalling spoofing protection [TESTED] -o IGMP bug fixes and workarounds [TESTED] -o IFF_ALLMULTI protocol layer support [TESTED] -o First parts of IP multicast routing code [TESTED] -o Generate BSD ENETDOWN errors [TESTED] -o Clean device unload bug<Walter Wolfgang> [TESTED] - --------->>>>> 1.3.24 <<<<<------- - -o Missing IGMP includes fixes [TESTED] -o Smarter buffer use options for sockets [TESTED] -o AF_UNIX smarter buffer driving [TESTED] -o AF_UNIX full BSD semantics on STREAM writes [TESTED] -o IOVEC's support repeated calls to copy more [TESTED] -o Zero fragment 'solaris nfs' bug fixed <Werner> [TESTED] -o NetROM supports sendmsg/recvmsg [TESTED] -o Sendmsg verify_iovec bugfix [TESTED] -o ARP PERM is really permanent now <Craig> [TESTED] -o IPIP tunnels work fully we hope [UMM...] -o Closing socket change (Marc Tamsky) [TESTED] -o RFC1122 verification of tcp.c <Mike Shaver> [DONE] - --------->>>>> 1.3.26 <<<<<------- - -o Rewrote ICMP completely [TESTED] -o Moved most IP addresses to __u32 [TESTED] -o Cleaned up ICMP reporting [TESTED] -o Tidied remove_sock [TESTED] -o Added memory allocation type to ip_build_xmit [TESTED] -o Cleaned up af_inet to use inet_error [TESTED] -o Named firewall returns [TESTED] -o Added firewall output checks to ip_build_xmit [TESTED] -o Multicast router downs VIF's when their - physical interface is dropped [TESTED] -o Reformatted ipv4/protocol.c, dropped frag field [TESTED] -o Fixed MSS for TCP [TESTED] -o Dropped sock_awaitconn [TESTED] -o Added ip_forward to ksyms for IPIP etc [TESTED] -o Appletalk TIOCINQ/TIOCOUTQ bug fix [TESTED] -o Rewrote the IFF_UP/IFF_DOWN handling code [TESTED] - --------->>>>> 1.3.29 <<<<<------- - -o Major AX.25/NetROM fixes [John Naylor] [TESTED] -o Error in ip_mr ioctls fixed [Michael Chastain] [TESTED] -o TCP cache zap bugs hopefully fixed [CLOSE BUT NO COOKIE] -o Length checks in udp/raw sending [Craig Metz] [TESTED] - --------->>>>> 1.3.31 <<<<<<------- - -o IP_OPTIONS [A.N.Kuznetsov] [TESTED] -o TCP cache zap more fixes [TESTED] -o Most of the IP multicast routing cache added [TESTED - WORK NEEDED] -o Kernel/user communication module (not used yet) [TESTED] - --------->>>>> 1.3.31 <<<<<<------- - -o IFF_ALLMULTI support for 3c501,3c509,8390 and - tulip(SMC etherpower) boards [TESTED] - --------->>>>> 1.3.33 <<<<<<-------- - -o IFF_ALLMULTI causes an address check on ether [TESTED] -o Added multicast ability readme file [TESTED] -o Assorted driver/multicast fixes [TESTED] -o IP routing change errors resemble BSD more [TESTED/MORE TO COME] -o IP port masquerading fixes [TESTED] - --------->>>>> 1.3.35 <<<<<<-------- - -o Appletalk data now in the protinfo union [TESTED] -o BSD style bind to broadcast address supported [TESTED] -o Standard loadable firewall chains [TESTED] -o IPFW uses the firewall chains for firewall but - not yet acct/masquerade [TESTED] -o Firewall chain hooks in all other protocols [TESTED] -o Sendmsg/recvmsg for IPX. [TESTED] -o IPX uses sock_alloc_send_skb [TESTED] -o Recvmsg for all IP, sendmsg for TCP [TESTED] - (nearly ready to go all *msg()) - --------->>>>> 1.3.42 <<<<<<-------- - -o ip udp/raw nonblock bug fixed [TESTED] -o ICMP lockup fix [TESTED] -o Fundamental operations now only sendmsg/recvmsg [TESTED] -o bind() for SOCK_PACKET [IN] -o set_mac_addr fixed up [TESTED] -o BSD SIOCSIFADDR, AF_UNSPEC behaviour [TESTED] -o Updated this list [OK] -o Massive ARP/cache/routing rewrite [ANK] [TESTED] -o AX.25 connect return fixed in using sock_error [TESTED] -o Proper netlink device major(36) [TESTED] -o First parts of the SKIP support [IN, not useful] -o TCP ICMP (SOSS should work again) [TESTED] -o IPFW support for TOS changing (Al Longyear) [TESTED] -o DECNET PPP test code [Steve] [IN] -o NFS root [Miguel/Gero] [TESTED] -o Path MTU discovery [ANK] [TESTED] - --------->>>>> 1.3.44 <<<<<<-------- - -o NFS root/ FPU clash fixed [TESTED] -o ARP lock bug fixed [TESTED] -o SO_BSDCOMPAT option(libbsd/ibcs2 ought to set) [SEMIDONE] -o Changed to new set_multicast_list() [TESTED] -o ARP ioctl() call fixes [Bernd] [TESTED] -o Fixes to the name set functions (maybe fixes - netrom) [Steve] [TESTED] -o Packet protocol labelling (not IPX yet) [TESTED] -o Faster buffer copy/clone [Linus] [TESTED] - --------->>>>> 1.3.46 <<<<<<-------- - -o AX.25/NetROM fixes/changes [John Naylor] [TESTED] -o Further attempts to fix the IPX memory bug [NOW FIXED] -o ARP fixes (Assorted) [TESTED] -o Driver fixes for multicast lists [TESTED] - --------->>>>> 1.3.48 <<<<<<-------- - -o IPalias [TESTED] - --------->>>>> 1.3.50 <<<<<<-------- - -o TCP soft error support [TESTED] -o Further 3c501 tweaking [TESTED] -o Still trying to make IPX work right [TESTED] -o Trap faulty boxes sending IGMP using 0.0.0.0 [TESTED] -o Only allow SMBFS selection with IP configured [TESTED] -o Packetwin driver [Craig] [IN] -o Net alias changes [Juan] [TESTED] - --------->>>>> 1.3.53 <<<<<<-------- - -o Missing htons() in ip_build_xmit [Mike Kilburn] [TESTED] -o Missing protocol type set in appletalk [TESTED] -o Net alias changes/documentation [Juan Ciarlante][TESTED] -o Set protocol type on IPX sends [Various] [TESTED] -o Lance driver packet length sanity check [TESTED] - --------->>>>> 1.3.60 <<<<<<-------- - -o Fixed NFS notice message [IN] -o Extra ETH_P_xx types [IN] -o Added skb_copy [IN] -o AX.25 unlock bug fix [Joerg] [IN] -o Disabled buggy kerneld support [IN] -o Removed dev_rint [IN] -o Fixed bind checking [IN] -o ARP changes [Bernd] [IN] -o IPX memory leak fix [Martin] [IN] -o Net builds without /proc [Paul] [IN] -o IP multicast races removed [IN] -o Device queue length limits and packet discarder [IN] - ----------- Things I thought Linus had for a while and not merged ---------------- - - ----------- Things pending from other people ------------- - -o Improved IPX support for lwared. -o Decnet pre pre pre pre pre Alpha 0.0. -o Chase Donald for new drivers, get people to sort out what net - drivers should cease to be 'Alpha'. -o IPX PPP support -o IP multicast bug fixes - ----------- Things pending for me to merge -------------- - -o AF_UNIX garbage collect code -o Faster closedown option for heavy use sites (me) -o Tom May's insw_and_checksum() -o SPARC patches [Dave] [partly in] - ---------------- Things That Need Doing Before 1.4 ------------------ - -o Clean up RAW AX.25 sockets. [Sorted by skb_clone change] -o Finish IPIP bug fixes [Done hopefully] -o Multicast routing [Nearly right] -o PPP/IPX -o IPX for Lwared -o SKIP [Available in user mode] -o AX.25/NetROM locking changes o insw_and_csum -o AF_UNIX fd passing -------------------------- Bugs to fix ------------------------------ -o signal interrupting a unix domain connect can occasionally hang the - machine ?? -o TCP socket cache gets things wrong very very occasionally under high - load. [TRYING THINGS] -o AX.25/NetROM needs more locking. -o NFS flow control is needed with the new multirequest NFS support. -o Need to be able to turn off the intelligent arp refreshing as it's not so - hot over AX.25 and upsets some people with very dumb ISDN bridges. -o Matti Arnio's TCP problem. o Should unix domain connect never block ? -o Sort out kerneld getting things right. - -0.2 ---- -o Fast checksum/copy on outgoing TCP -o Add tty support to sonix driver. -o PPP for Sonix ISDN. o Screend loadable firewall module -o AXIP [AVAILABLE IN USER MODE] -o Finish merging the bridge code [LEAVE POST 1.4] -o Finish 802.2 Class I code to be compliant to the oddities of 802.2 -o Tidy BPQ support to use a bpqip tunnel device -o Kill off old ip_queue_xmit/ip_send stuff. +o Fix merging the bridge code o Remove kernel RARP and replace with user mode daemon. -o Throw out existing firewall ioctl()'s and use a single table load. -o SPARC merge - -0.3 ---- -o 802.2 Class 2 services (eg netbios). +o Merge ARM half word trap fixes for ethernet headers +o Stop route addition to downed interfaces +o Make sure route add window functionality is back or documented + equivalences are clear +o Merge ATM +o Merge IRDA Possible projects for victim^H^H^H^H^Holunteers - -2. Verifying all the error returns match the BSD ones (grotty job I -wouldn't wish on anyone). - -3. 'Fast Retransmit'. This is a TCP algorithm extension BSD uses. If -you see about 3 acks in a row that are for the same 'old' value. You resend -the frame following the ack. (The assumption being that the acks are -because a single frame in the data stream has been lost). Given a -mathematician with some queue theory you can show this allows you to -lose one frame per window full of data without measurable speed loss. -[done] - -4. RFC1323. These are the extensions for very fast nets. -RFC1323 will be useful for Linux talking to systems over 100Mb/sec -ethernet and over ATM as it allows large windows and protects from some -potential high speed TCP problems. -[In progress] - -6. Delayed ack. This is mostly supported but not actually set up and -used yet. Basically ack frames are held back 1/10th of a second in the hope -that two acks can be merged into one or for interactive use the ack can -piggyback on the next character typed (great improvement on 2400 baud -modems). Johannes Stille did some work on this about 0.99.13 but it never -got merged in. [Pedro Roque] [Done, but needs fixing] - -7. One on my tempting project list. Add an extra (unofficial - but so -is SLIP6) SLIP mode that does packet data compression [maybe use the code -from term]. - 9. Implementing streams. Not as a blind slow SYS5.4 style copy but actually working out how to do it so it runs like greased lightning. Quite a big problem. [See the LiS project] -10. Frame Relay/WAN/ISDN drivers [I'm working on the sonix EuroISDN board -driver but that's for an internal project and its general release is still -a maybe (so is finishing it ;))][Jim Freeman is working on Frame Relay as is -Mike McLagan][Fritz Elfert is doing the isdn4linux kit]. - 11. IP over SCSI. [worked on] 14. Bidirectional PLIP. Also PLIP for the newer style parallel ports. @@ -449,22 +29,13 @@ Mike McLagan][Fritz Elfert is doing the isdn4linux kit]. rumour is microsoft are phasing out netbeui for netbios/IP. Microsoft have gone for netbios/funny-ipx-variant it seems in Win95, but TCP is selectable. -16. X.25. This is one for a real head case with far too much time on -their hands. [Provisionally taken] - 17. PPP multilink. Another nasty job. -[In progress] - -18. Implement swIPe under Linux. -[Reportedly in progress] 19. IPv4 IP-AH and IP-ESP. -[Taken] -20. SKIP IP security using ENskip-0.10 - started -[Abandoned] +20. (userspace) GUI interface to the bandwidth allocators so mere + mortals can do this -21. T/TCP support. BTW: Don't let the magic words 'kernel programming' worry you. Its like DOS - you make a mistake you have to reboot. You do at least get dumps and a diff --git a/net/Config.in b/net/Config.in index 62dfd430f..f1ed3f79d 100644 --- a/net/Config.in +++ b/net/Config.in @@ -56,7 +56,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then if [ "$CONFIG_NET_SCHED" = "y" ]; then source net/sched/Config.in fi - bool 'Network code profiler' CONFIG_NET_PROFILE +# bool 'Network code profiler' CONFIG_NET_PROFILE fi fi endmenu diff --git a/net/Makefile b/net/Makefile index 0f32c8397..0bbf171fa 100644 --- a/net/Makefile +++ b/net/Makefile @@ -9,7 +9,8 @@ MOD_SUB_DIRS := ipv4 ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \ - netrom rose lapb x25 wanrouter netlink sched packet sunrpc #decnet + netrom rose lapb x25 wanrouter netlink sched packet sunrpc \ + econet #decnet SUB_DIRS := core ethernet sched MOD_LIST_NAME := NET_MISC_MODULES @@ -140,6 +141,14 @@ else endif endif +ifeq ($(CONFIG_ECONET),y) +SUB_DIRS += econet +else + ifeq ($(CONFIG_ECONET),m) + MOD_SUB_DIRS += econet + endif +endif + # We must attach netsyms.o to socket.o, as otherwise there is nothing # to pull the object file from the archive. diff --git a/net/README b/net/README index 8f63441fa..9281cc13d 100644 --- a/net/README +++ b/net/README @@ -4,7 +4,7 @@ Maintainers and developers for networking code sections Code Section Bug Report Contact -------------------+------------------------------------------- 802 [other ] alan@lxorguk.ukuu.org.uk - [token ring ] pnorton@cts.com + [token ring ] p.norton@computer.org appletalk Jay.Schulist@spacs.k12.wi.us ax25 g4klx@g4klx.demon.co.uk core alan@lxorguk.ukuu.org.uk @@ -12,7 +12,7 @@ decnet SteveW@ACM.org ethernet alan@lxorguk.ukuu.org.uk ipv4 davem@caip.rutgers.edu,Eric.Schenk@dna.lth.se ipv6 davem@caip.rutgers.edu,Eric.Schenk@dna.lth.se -ipx alan@lxorguk.ukuu.org.uk,greg@caldera.com +ipx/spx Jay.Schulist@spacs.k12.wi.us lapb g4klx@g4klx.demon.co.uk netrom g4klx@g4klx.demon.co.uk rose g4klx@g4klx.demon.co.uk diff --git a/net/TUNABLE b/net/TUNABLE index 2e5cc1b6e..db1bda57b 100644 --- a/net/TUNABLE +++ b/net/TUNABLE @@ -10,7 +10,7 @@ NUM_PROTO Maximum loadable address family, will need recompile MAX_LINKS Maximum number of netlink minor devices. (1-32) MAX_QBYTES Size of a netlink device queue (tunable) RIF_TABLE_SIZE Token ring RIF cache size (tunable) -AARP_HASH_SIZE Size of appletalk hash table (tunable) +AARP_HASH_SIZE Size of Appletalk hash table (tunable) AX25_DEF_T1 AX.25 parameters. These are all tunable via AX25_DEF_T2 SIOCAX25SETPARMS AX25_DEF_T3 T1-T3,N2 have the meanings in the specification diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 511c65970..89ce0b56d 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -1,6 +1,6 @@ /* - * AARP: An implementation of the Appletalk aarp protocol for - * ethernet 'ELAP'. + * AARP: An implementation of the AppleTalk AARP protocol for + * Ethernet 'ELAP'. * * Alan Cox <Alan.Cox@linux.org> * @@ -20,7 +20,10 @@ * * * References: - * Inside Appletalk (2nd Ed). + * Inside AppleTalk (2nd Ed). + * Fixes: + * Jaume Grau - flush caches on AARP_PROBE + * */ #include <linux/config.h> @@ -430,7 +433,7 @@ int aarp_send_ddp(struct device *dev,struct sk_buff *skb, struct at_addr *sa, vo skb->nh.raw=skb->data; /* - * Check for localtalk first + * Check for LocalTalk first */ @@ -645,7 +648,7 @@ static void aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, int ha /* * This is called by the SNAP driver whenever we see an AARP SNAP - * frame. We currently only support ethernet. + * frame. We currently only support Ethernet. */ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { @@ -658,7 +661,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type /* - * We only do ethernet SNAP AARP + * We only do Ethernet SNAP AARP */ if(dev->type!=ARPHRD_ETHER) @@ -773,6 +776,21 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type sa.s_node=ea->pa_dst_node; sa.s_net=ea->pa_dst_net; + if(ea->function==AARP_PROBE) + { + /* A probe implies someone trying to get an + address. So as a precaution flush any + entries we have for this address */ + struct aarp_entry *a=aarp_find_entry( + resolved[sa.s_node%(AARP_HASH_SIZE-1)], + skb->dev, + &sa); + /* Make it expire next tick - that avoids us + getting into a probe/flush/learn/probe/flush/learn + cycle during probing of a slow to respond host addr */ + if(a!=NULL) + a->expires_at=jiffies-1; + } if(sa.s_node!=ma->s_node) break; if(sa.s_net && ma->s_net && sa.s_net!=ma->s_net) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b85835f47..331f3eb7b 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1,6 +1,6 @@ /* - * DDP: An implementation of the Appletalk DDP protocol for - * ethernet 'ELAP'. + * DDP: An implementation of the AppleTalk DDP protocol for + * Ethernet 'ELAP'. * * Alan Cox <Alan.Cox@linux.org> * @@ -18,17 +18,17 @@ * Alan Cox : Added firewall hooks. * Alan Cox : Supports new ARPHRD_LOOPBACK * Christer Weinigel : Routing and /proc fixes. - * Bradford Johnson : Localtalk. + * Bradford Johnson : LocalTalk. * Tom Dyas : Module support. * Alan Cox : Hooks for PPP (based on the - * localtalk hook). + * LocalTalk hook). * Alan Cox : Posix bits * Alan Cox/Mike Freeman : Possible fix to NBP problems * Bradford Johnson : IP-over-DDP (experimental) * Jay Schulist : Moved IP-over-DDP to its own * driver file. (ipddp.c & ipddp.h) * Jay Schulist : Made work as module with - * Appletalk drivers, cleaned it. + * AppleTalk drivers, cleaned it. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -197,7 +197,7 @@ int atalk_get_info(char *buffer, char **start, off_t offset, int length, int dum off_t begin=0; /* - * Output the appletalk data for the /proc virtual fs. + * Output the AppleTalk data for the /proc filesystem. */ len += sprintf(buffer,"Type local_addr remote_addr tx_queue rx_queue st uid\n"); @@ -241,7 +241,7 @@ int atalk_get_info(char *buffer, char **start, off_t offset, int length, int dum /**************************************************************************\ * * -* Routing tables for the Appletalk socket layer. * +* Routing tables for the AppleTalk socket layer. * * * \**************************************************************************/ @@ -250,11 +250,11 @@ static struct atalk_iface *atalk_iface_list = NULL; static struct atalk_route atrtr_default; /* For probing devices or in a routerless network */ /* - * Appletalk interface control + * AppleTalk interface control */ /* - * Drop a device. Doesn't drop any of its routes - that is the the callers + * Drop a device. Doesn't drop any of its routes - that is the caller's * problem. Called when we down the interface or delete the address. */ static void atif_drop_device(struct device *dev) @@ -476,7 +476,7 @@ static struct atalk_iface *atalk_find_interface(int net, int node) /* - * Find a route for an appletalk packet. This ought to get cached in + * Find a route for an AppleTalk packet. This ought to get cached in * the socket (later on...). We know about host routes and the fact * that a route must be direct to broadcast. */ @@ -504,7 +504,7 @@ static struct atalk_route *atrtr_find(struct at_addr *target) /* - * Given an appletalk network find the device to use. This can be + * Given an AppleTalk network, find the device to use. This can be * a simple lookup. */ struct device *atrtr_get_dev(struct at_addr *sa) @@ -732,8 +732,8 @@ int atif_ioctl(int cmd, void *arg) nr=(struct netrange *)&sa->sat_zero[0]; /* - * Phase 1 is fine on Localtalk but we don't do - * Ethertalk phase 1. Anyone wanting to add it go ahead. + * Phase 1 is fine on LocalTalk but we don't do + * EtherTalk phase 1. Anyone wanting to add it go ahead. */ if(dev->type == ARPHRD_ETHER && nr->nr_phase != 2) return (-EPROTONOSUPPORT); @@ -947,7 +947,7 @@ int atalk_rt_get_info(char *buffer, char **start, off_t offset, int length, int /**************************************************************************\ * * * Handling for system calls applied via the various interfaces to an * -* Appletalk socket object. * +* AppleTalk socket object. * * * \**************************************************************************/ @@ -990,7 +990,7 @@ static int atalk_create(struct socket *sock, int protocol) { struct sock *sk; - sk = sk_alloc(AF_APPLETALK, GFP_KERNEL, 1); + sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, 1); if(sk == NULL) return (-ENOMEM); @@ -1201,7 +1201,7 @@ static int atalk_accept(struct socket *sock, struct socket *newsock, int flags) } /* - * Find the name of an appletalk socket. Just copy the right + * Find the name of an AppleTalk socket. Just copy the right * fields into the sockaddr. */ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, @@ -1254,6 +1254,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type struct atalk_iface *atif; struct sockaddr_at tosat; int origlen; + struct ddpebits ddphv; /* Size check */ if(skb->len < sizeof(*ddp)) @@ -1272,7 +1273,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type * run until we put it back) */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddp)); + *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); /* * Trim buffer in case of stray trailing data @@ -1280,7 +1281,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type origlen = skb->len; - skb_trim(skb, min(skb->len, ddp->deh_len)); + skb_trim(skb, min(skb->len, ddphv.deh_len)); /* * Size check to see if ddp->deh_len was crap @@ -1297,14 +1298,14 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type * Any checksums. Note we don't do htons() on this == is assumed to be * valid for net byte orders all over the networking code... */ - if(ddp->deh_sum && atalk_checksum(ddp, ddp->deh_len) != ddp->deh_sum) + if(ddp->deh_sum && atalk_checksum(ddp, ddphv.deh_len) != ddp->deh_sum) { - /* Not a valid appletalk frame - dustbin time */ + /* Not a valid AppleTalk frame - dustbin time */ kfree_skb(skb); return (0); } - if(call_in_firewall(AF_APPLETALK, skb->dev, ddp, NULL,&skb)!=FW_ACCEPT) + if(call_in_firewall(PF_APPLETALK, skb->dev, ddp, NULL,&skb)!=FW_ACCEPT) { kfree_skb(skb); return (0); @@ -1318,7 +1319,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type atif = atalk_find_interface(ddp->deh_dnet, ddp->deh_dnode); /* - * Not ours, so we route the packet via the correct Appletalk interface. + * Not ours, so we route the packet via the correct AppleTalk interface. */ if(atif == NULL) { @@ -1338,7 +1339,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type /* * Check firewall allows this routing */ - if(call_fw_firewall(AF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT) + if(call_fw_firewall(PF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT) { kfree_skb(skb); return (0); @@ -1349,12 +1350,12 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type /* Route the packet */ rt = atrtr_find(&ta); - if(rt == NULL || ddp->deh_hops == DDP_MAXHOPS) + if(rt == NULL || ddphv.deh_hops == DDP_MAXHOPS) { kfree_skb(skb); return (0); } - ddp->deh_hops++; + ddphv.deh_hops++; /* * Route goes through another gateway, so @@ -1368,16 +1369,16 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type /* Fix up skb->len field */ skb_trim(skb, min(origlen, rt->dev->hard_header_len + - ddp_dl->header_length + ddp->deh_len)); + ddp_dl->header_length + ddphv.deh_len)); /* Mend the byte order */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddp)); + *((__u16 *)ddp) = ntohs(*((__u16 *)&ddphv)); /* * Send the buffer onwards * * Now we must always be careful. If it's come from - * localtalk to ethertalk it might not fit + * LocalTalk to EtherTalk it might not fit * * Order matters here: If a packet has to be copied * to make a new headroom (rare hopefully) then it @@ -1452,16 +1453,13 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type skb->sk = sock; if(sock_queue_rcv_skb(sock, skb) < 0) - { - skb->sk = NULL; kfree_skb(skb); - } return (0); } /* - * Receive a localtalk frame. We make some demands on the caller here. + * Receive a LocalTalk frame. We make some demands on the caller here. * Caller must provide enough headroom on the packet to pull the short * header and append a long one. */ @@ -1611,8 +1609,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len, skb = sock_alloc_send_skb(sk, size, 0, flags&MSG_DONTWAIT, &err); if(skb == NULL) return (err); - - skb->sk = sk; + + skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); skb_reserve(skb, dev->hard_header_len); @@ -1652,7 +1650,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len, else ddp->deh_sum = atalk_checksum(ddp, len + sizeof(*ddp)); - if(call_out_firewall(AF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT) + if(call_out_firewall(PF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT) { kfree_skb(skb); return (-EPERM); @@ -1714,18 +1712,22 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, int size, struct sock *sk=sock->sk; struct sockaddr_at *sat=(struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp = NULL; + struct ddpebits ddphv; int copied = 0; struct sk_buff *skb; int err = 0; + skb = skb_recv_datagram(sk,flags&~MSG_DONTWAIT,flags&MSG_DONTWAIT,&err); if(skb == NULL) return (err); ddp = (struct ddpehdr *)(skb->h.raw); + *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); + if(sk->type == SOCK_RAW) { - copied = ddp->deh_len; + copied = ddphv.deh_len; if(copied > size) { copied = size; @@ -1736,7 +1738,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, int size, } else { - copied = ddp->deh_len - sizeof(*ddp); + copied = ddphv.deh_len - sizeof(*ddp); if(copied > size) { copied = size; @@ -1768,7 +1770,7 @@ static int atalk_shutdown(struct socket *sk,int how) } /* - * Appletalk ioctl calls. + * AppleTalk ioctl calls. */ static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) { @@ -1859,13 +1861,13 @@ static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) static struct net_proto_family atalk_family_ops= { - AF_APPLETALK, + PF_APPLETALK, atalk_create }; static struct proto_ops atalk_dgram_ops= { - AF_APPLETALK, + PF_APPLETALK, sock_no_dup, atalk_release, @@ -1913,7 +1915,7 @@ struct packet_type ppptalk_packet_type= static char ddp_snap_id[] = {0x08, 0x00, 0x07, 0x80, 0x9B}; /* - * Export symbols for use by drivers when Appletalk is a module. + * Export symbols for use by drivers when AppleTalk is a module. */ EXPORT_SYMBOL(aarp_send_ddp); EXPORT_SYMBOL(atrtr_get_dev); @@ -1972,7 +1974,7 @@ __initfunc(void atalk_proto_init(struct net_proto *pro)) atalk_register_sysctl(); #endif /* CONFIG_SYSCTL */ - printk(KERN_INFO "Appletalk 0.18 for Linux NET3.037\n"); + printk(KERN_INFO "AppleTalk 0.18 for Linux NET3.037\n"); } #ifdef MODULE @@ -1989,10 +1991,10 @@ int init_module(void) * Use counts are incremented/decremented when * sockets are created/deleted. * - * Appletalk interfaces are not incremented untill atalkd is run + * AppleTalk interfaces are not incremented untill atalkd is run * and are only decremented when they are downed. * - * Ergo, before the appletalk module can be removed, all Appletalk + * Ergo, before the AppleTalk module can be removed, all AppleTalk * sockets be closed from user space. */ @@ -2014,7 +2016,7 @@ void cleanup_module(void) dev_remove_pack(<alk_packet_type); dev_remove_pack(&ppptalk_packet_type); unregister_snap_client(ddp_snap_id); - sock_unregister(atalk_family_ops.family); + sock_unregister(PF_APPLETALK); return; } diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 776902889..8d9fe232e 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -1,5 +1,5 @@ /* -*- linux-c -*- - * sysctl_net_atalk.c: sysctl interface to net Appletalk subsystem. + * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem. * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/atalk directory entry (empty =) ). [MS] diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d8160d1ec..71999a416 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1,5 +1,5 @@ /* - * AX.25 release 037 + * AX.25 release 038 * * This code REQUIRES 2.1.15 or higher/ NET3.038 * @@ -80,7 +80,7 @@ * datagrams per socket. * AX.25 033 Jonathan(G4KLX) Removed auto-router. * Hans(PE1AYX) Converted to Module. - * Joerg(DL1BKE) Moved BPQ Ethernet to seperate driver. + * Joerg(DL1BKE) Moved BPQ Ethernet to separate driver. * AX.25 034 Jonathan(G4KLX) 2.1 changes * Alan(GW4PTS) Small POSIXisations * AX.25 035 Alan(GW4PTS) Started fixing to the new @@ -91,8 +91,10 @@ * Jonathan(G4KLX) Support for packet forwarding. * AX.25 036 Jonathan(G4KLX) Major restructuring. * Joerg(DL1BKE) Fixed DAMA Slave. - * Jonathan(G4KLX) Fix widlcard listen parameter setting. + * Jonathan(G4KLX) Fix wildcard listen parameter setting. * AX.25 037 Jonathan(G4KLX) New timer architecture. + * AX.25 038 Matthias(DG2FEF) Small fixes to the syscall interface to make kernel + * independent of AX25_MAX_DIGIS used by applications. */ #include <linux/config.h> @@ -449,8 +451,10 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Not static as it's used by the timer static int ax25_ctl_ioctl(const unsigned int cmd, void *arg) { struct ax25_ctl_struct ax25_ctl; + ax25_digi digi; ax25_dev *ax25_dev; ax25_cb *ax25; + unsigned int k; if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; @@ -458,7 +462,11 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void *arg) if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) return -ENODEV; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, NULL, ax25_dev->dev)) == NULL) + digi.ndigi = ax25_ctl.digi_count; + for (k = 0; k < digi.ndigi; k++) + digi.calls[k] = ax25_ctl.digi_addr[k]; + + if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) return -ENOTCONN; switch (ax25_ctl.cmd) { @@ -787,13 +795,13 @@ int ax25_create(struct socket *sock, int protocol) switch (sock->type) { case SOCK_DGRAM: - if (protocol == 0 || protocol == AF_AX25) + if (protocol == 0 || protocol == PF_AX25) protocol = AX25_P_TEXT; break; case SOCK_SEQPACKET: switch (protocol) { case 0: - case AF_AX25: /* For CLX */ + case PF_AX25: /* For CLX */ protocol = AX25_P_TEXT; break; case AX25_P_SEGMENT: @@ -828,7 +836,7 @@ int ax25_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC, 1)) == NULL) + if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, 1)) == NULL) return -ENOMEM; if ((ax25 = ax25_create_cb()) == NULL) { @@ -854,7 +862,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25; - if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC, 1)) == NULL) + if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -1001,7 +1009,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sk->zapped == 0) return -EINVAL; - if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) + if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) + return -EINVAL; + + if (addr_len < (addr->fsa_ax25.sax25_ndigis * sizeof(ax25_address) + sizeof(struct sockaddr_ax25))) return -EINVAL; if (addr->fsa_ax25.sax25_family != AF_AX25) @@ -1018,7 +1029,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) SOCK_DEBUG(sk, "AX25: source address set to %s\n", ax2asc(&sk->protinfo.ax25->source_addr)); - if (addr_len == sizeof(struct full_sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) { + if (addr_len > sizeof(struct sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) { if (ax25cmp(&addr->fsa_digipeater[0], &null_ax25_address) == 0) { ax25_dev = NULL; SOCK_DEBUG(sk, "AX25: bound to any device\n"); @@ -1057,14 +1068,20 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le ax25_digi *digi = NULL; int ct = 0, err; - if (sk->state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { - sock->state = SS_CONNECTED; - return 0; /* Connect completed during a ERESTARTSYS event */ - } + /* deal with restarts */ + if (sock->state == SS_CONNECTING) { + switch (sk->state) { + case TCP_SYN_SENT: /* still trying */ + return -EINPROGRESS; - if (sk->state == TCP_CLOSE && sock->state == SS_CONNECTING) { - sock->state = SS_UNCONNECTED; - return -ECONNREFUSED; + case TCP_ESTABLISHED: /* connection established */ + sock->state = SS_CONNECTED; + return 0; + + case TCP_CLOSE: /* connection refused */ + sock->state = SS_UNCONNECTED; + return -ECONNREFUSED; + } } if (sk->state == TCP_ESTABLISHED && sk->type == SOCK_SEQPACKET) @@ -1073,7 +1090,10 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le sk->state = TCP_CLOSE; sock->state = SS_UNCONNECTED; - if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) + /* + * some sanity checks. code further down depends on this + */ + if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) return -EINVAL; if (fsa->fsa_ax25.sax25_family != AF_AX25) @@ -1087,7 +1107,7 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le /* * Handle digi-peaters to be used. */ - if (addr_len == sizeof(struct full_sockaddr_ax25) && fsa->fsa_ax25.sax25_ndigis != 0) { + if (addr_len > sizeof(struct sockaddr_ax25) && fsa->fsa_ax25.sax25_ndigis != 0) { /* Valid number of digipeaters ? */ if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) return -EINVAL; @@ -1202,10 +1222,17 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *newsk; struct sk_buff *skb; - if (newsock->sk != NULL) - ax25_destroy_socket(newsock->sk->protinfo.ax25); + if (sock->state != SS_UNCONNECTED) + return -EINVAL; - newsock->sk = NULL; + /* + * sys_accept has already allocated a struct sock. we need to free it, + * since we want to use the one provided by ax25_make_new. + */ + if (newsock->sk != NULL) { + sk_free(newsock->sk); + newsock->sk = NULL; + } if ((sk = sock->sk) == NULL) return -EINVAL; @@ -1245,43 +1272,43 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) skb->sk = NULL; kfree_skb(skb); sk->ack_backlog--; - newsock->sk = newsk; + newsock->sk = newsk; + newsock->state = SS_CONNECTED; return 0; } static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { - struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; struct sock *sk = sock->sk; unsigned char ndigi, i; + struct full_sockaddr_ax25 fsa; if (peer != 0) { if (sk->state != TCP_ESTABLISHED) return -ENOTCONN; - sax->fsa_ax25.sax25_family = AF_AX25; - sax->fsa_ax25.sax25_call = sk->protinfo.ax25->dest_addr; - sax->fsa_ax25.sax25_ndigis = 0; - *uaddr_len = sizeof(struct full_sockaddr_ax25); + fsa.fsa_ax25.sax25_family = AF_AX25; + fsa.fsa_ax25.sax25_call = sk->protinfo.ax25->dest_addr; + fsa.fsa_ax25.sax25_ndigis = 0; - if (sk->protinfo.ax25->digipeat != NULL) { - ndigi = sk->protinfo.ax25->digipeat->ndigi; - sax->fsa_ax25.sax25_ndigis = ndigi; - for (i = 0; i < ndigi; i++) - sax->fsa_digipeater[i] = sk->protinfo.ax25->digipeat->calls[i]; - } + ndigi = sk->protinfo.ax25->digipeat->ndigi; + fsa.fsa_ax25.sax25_ndigis = ndigi; + for (i = 0; i < ndigi; i++) + fsa.fsa_digipeater[i] = sk->protinfo.ax25->digipeat->calls[i]; } else { - sax->fsa_ax25.sax25_family = AF_AX25; - sax->fsa_ax25.sax25_call = sk->protinfo.ax25->source_addr; - sax->fsa_ax25.sax25_ndigis = 1; - *uaddr_len = sizeof(struct full_sockaddr_ax25); - - if (sk->protinfo.ax25->ax25_dev != NULL) - memcpy(&sax->fsa_digipeater[0], sk->protinfo.ax25->ax25_dev->dev->dev_addr, AX25_ADDR_LEN); - else - sax->fsa_digipeater[0] = null_ax25_address; + fsa.fsa_ax25.sax25_family = AF_AX25; + fsa.fsa_ax25.sax25_call = sk->protinfo.ax25->source_addr; + fsa.fsa_ax25.sax25_ndigis = 1; + if (sk->protinfo.ax25->ax25_dev != NULL) { + memcpy(&fsa.fsa_digipeater[0], sk->protinfo.ax25->ax25_dev->dev->dev_addr, AX25_ADDR_LEN); + } else { + fsa.fsa_digipeater[0] = null_ax25_address; + } } + if (*uaddr_len > sizeof (struct full_sockaddr_ax25)) + *uaddr_len = sizeof (struct full_sockaddr_ax25); + memcpy(uaddr, &fsa, *uaddr_len); return 0; } @@ -1315,11 +1342,14 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct return -ENETUNREACH; if (usax != NULL) { - if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) - return -EINVAL; if (usax->sax25_family != AF_AX25) return -EINVAL; - if (addr_len == sizeof(struct full_sockaddr_ax25) && usax->sax25_ndigis != 0) { + if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) + return -EINVAL; + if (addr_len < (usax->sax25_ndigis * AX25_ADDR_LEN + sizeof(struct sockaddr_ax25))) + return -EINVAL; + + if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) { int ct = 0; struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; @@ -1375,6 +1405,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct /* User data follows immediately after the AX.25 data */ memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + skb->nh.raw = skb->data; /* Add the PID if one is not supplied by the user in the skb */ if (!sk->protinfo.ax25->pidincl) { @@ -1425,7 +1456,6 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm) { struct sock *sk = sock->sk; - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; int copied; struct sk_buff *skb; int er; @@ -1453,13 +1483,13 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, int size, int f } skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - - if (sax != NULL) { + + if (msg->msg_namelen != 0) { + struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; ax25_digi digi; ax25_address dest; - int dama; - ax25_addr_parse(skb->data, skb->len, NULL, &dest, &digi, NULL, &dama); + ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, NULL, &dest, &digi, NULL, NULL); sax->sax25_family = AF_AX25; /* We set this correctly, even though we may not let the @@ -1469,18 +1499,15 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, int size, int f sax->sax25_call = dest; if (sax->sax25_ndigis != 0) { - int ct = 0; + int ct; struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)sax; - while (ct < digi.ndigi) { + for (ct = 0; ct < digi.ndigi; ct++) fsa->fsa_digipeater[ct] = digi.calls[ct]; - ct++; - } } + msg->msg_namelen = sizeof(struct full_sockaddr_ax25); } - msg->msg_namelen = sizeof(struct full_sockaddr_ax25); - skb_free_datagram(sk, skb); return copied; @@ -1614,56 +1641,53 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int ax25_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { ax25_cb *ax25; - const char *devname; - char callbuf[15]; + int k; int len = 0; off_t pos = 0; off_t begin = 0; cli(); - len += sprintf(buffer, "dest_addr src_addr dev st vs vr va t1 t2 t3 idle n2 rtt wnd paclen Snd-Q Rcv-Q inode\n"); - + /* + * New format: + * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode + */ + for (ax25 = ax25_list; ax25 != NULL; ax25 = ax25->next) { - if (ax25->ax25_dev == NULL) - devname = "???"; - else - devname = ax25->ax25_dev->dev->name; - - len += sprintf(buffer + len, "%-9s ", - ax2asc(&ax25->dest_addr)); - - sprintf(callbuf, "%s%c", ax2asc(&ax25->source_addr), - (ax25->iamdigi) ? '*' : ' '); - - len += sprintf(buffer + len, "%-10s %-4s %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3lu %3d %5d", - callbuf, - devname, + len += sprintf(buffer+len, "%8.8lx %s %s%s ", + (long) ax25, + ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name, + ax2asc(&ax25->source_addr), + ax25->iamdigi? "*":""); + + len += sprintf(buffer+len, "%s", ax2asc(&ax25->dest_addr)); + + for (k=0; (ax25->digipeat != NULL) && (k < ax25->digipeat->ndigi); k++) { + len += sprintf(buffer+len, ",%s%s", + ax2asc(&ax25->digipeat->calls[k]), + ax25->digipeat->repeated[k]? "*":""); + } + + len += sprintf(buffer+len, " %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %d %d", ax25->state, - ax25->vs, - ax25->vr, - ax25->va, - ax25_display_timer(&ax25->t1timer) / HZ, - ax25->t1 / HZ, - ax25_display_timer(&ax25->t2timer) / HZ, - ax25->t2 / HZ, - ax25_display_timer(&ax25->t3timer) / HZ, - ax25->t3 / HZ, - ax25_display_timer(&ax25->idletimer) / (60 * HZ), + ax25->vs, ax25->vr, ax25->va, + ax25_display_timer(&ax25->t1timer) / HZ, ax25->t1 / HZ, + ax25_display_timer(&ax25->t2timer) / HZ, ax25->t2 / HZ, + ax25_display_timer(&ax25->t3timer) / HZ, ax25->t3 / HZ, + ax25_display_timer(&ax25->idletimer) / (60 * HZ), ax25->idle / (60 * HZ), - ax25->n2count, - ax25->n2, + ax25->n2count, ax25->n2, ax25->rtt / HZ, ax25->window, ax25->paclen); if (ax25->sk != NULL) { - len += sprintf(buffer + len, " %5d %5d %ld\n", + len += sprintf(buffer + len, " %d %d %ld\n", atomic_read(&ax25->sk->wmem_alloc), atomic_read(&ax25->sk->rmem_alloc), ax25->sk->socket != NULL ? ax25->sk->socket->inode->i_ino : 0L); } else { - len += sprintf(buffer + len, "\n"); + len += sprintf(buffer + len, " * * *\n"); } pos = begin + len; @@ -1689,12 +1713,12 @@ static int ax25_get_info(char *buffer, char **start, off_t offset, int length, i static struct net_proto_family ax25_family_ops = { - AF_AX25, + PF_AX25, ax25_create }; static struct proto_ops ax25_proto_ops = { - AF_AX25, + PF_AX25, sock_no_dup, ax25_release, @@ -1820,7 +1844,7 @@ void cleanup_module(void) ax25_packet_type.type = htons(ETH_P_AX25); dev_remove_pack(&ax25_packet_type); - sock_unregister(AF_AX25); + sock_unregister(PF_AX25); } #endif diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index 5daf92fa5..1b0f9da67 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -165,19 +165,23 @@ unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, a if (len < 14) return NULL; - *flags = 0; - - if (buf[6] & AX25_CBIT) - *flags = AX25_COMMAND; - if (buf[13] & AX25_CBIT) - *flags = AX25_RESPONSE; + if (flags != NULL) { + *flags = 0; + + if (buf[6] & AX25_CBIT) + *flags = AX25_COMMAND; + if (buf[13] & AX25_CBIT) + *flags = AX25_RESPONSE; + } if (dama != NULL) *dama = ~buf[13] & AX25_DAMA_FLAG; /* Copy to, from */ - memcpy(dest, buf + 0, AX25_ADDR_LEN); - memcpy(src, buf + 7, AX25_ADDR_LEN); + if (dest != NULL) + memcpy(dest, buf + 0, AX25_ADDR_LEN); + if (src != NULL) + memcpy(src, buf + 7, AX25_ADDR_LEN); buf += 2 * AX25_ADDR_LEN; len -= 2 * AX25_ADDR_LEN; diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 8e330af23..beac955e5 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -228,6 +228,8 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit) if (skb == NULL) return; + skb->nh.raw = skb->data; + if (ax25->modulus == AX25_MODULUS) { frame = skb_push(skb, 1); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 0dedcc88e..a7f6ce317 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -35,7 +35,7 @@ * "SIOCAX25OPTRT" to set IP mode and a 'permanent' flag * on routes. * AX.25 033 Jonathan(G4KLX) Remove auto-router. - * Joerg(DL1BKE) Moved BPQ Ethernet driver to seperate device. + * Joerg(DL1BKE) Moved BPQ Ethernet driver to separate device. * AX.25 035 Frederic(F1OAT) Support for pseudo-digipeating. * Jonathan(G4KLX) Support for packet forwarding. */ diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 7b7d437e8..1e27580cf 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -335,11 +335,11 @@ static int ax25_std_state4_machine(ax25_cb *ax25, struct sk_buff *skb, int frame ax25->condition |= AX25_COND_PEER_RX_BUSY; if (type == AX25_RESPONSE && pf) { ax25_stop_t1timer(ax25); + ax25->n2count = 0; if (ax25_validate_nr(ax25, nr)) { ax25_frames_acked(ax25, nr); if (ax25->vs == ax25->va) { ax25_start_t3timer(ax25); - ax25->n2count = 0; ax25->state = AX25_STATE_3; } else { ax25_requeue_frames(ax25); @@ -364,11 +364,11 @@ static int ax25_std_state4_machine(ax25_cb *ax25, struct sk_buff *skb, int frame ax25->condition &= ~AX25_COND_PEER_RX_BUSY; if (pf && type == AX25_RESPONSE) { ax25_stop_t1timer(ax25); + ax25->n2count = 0; if (ax25_validate_nr(ax25, nr)) { ax25_frames_acked(ax25, nr); if (ax25->vs == ax25->va) { ax25_start_t3timer(ax25); - ax25->n2count = 0; ax25->state = AX25_STATE_3; } else { ax25_requeue_frames(ax25); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 948ff4719..cb3e1ba4b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -197,6 +197,8 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type) skb_reserve(skb, AX25_BPQ_HEADER_LEN + ax25_addr_size(ax25->digipeat)); + skb->nh.raw = skb->data; + /* Assume a response - address structure for DTE */ if (ax25->modulus == AX25_MODULUS) { dptr = skb_put(skb, 1); diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index 8a384b58b..ed6fd7fc2 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -18,7 +18,7 @@ * AX.25 032 Joerg(DL1BKE) Fixed DAMA timeout bug * AX.25 033 Jonathan(G4KLX) Modularisation functions. * AX.25 035 Frederic(F1OAT) Support for pseudo-digipeating. - * AX.25 036 Jonathan(G4KLX) Split Standard and DAMA code into seperate files. + * AX.25 036 Jonathan(G4KLX) Split Standard and DAMA code into separate files. * Joerg(DL1BKE) Fixed DAMA Slave. We are *required* to start with * standard AX.25 mode. * AX.25 037 Jonathan(G4KLX) New timer architecture. diff --git a/net/core/dev.c b/net/core/dev.c index 69315d948..bd414c794 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -740,21 +740,20 @@ static inline void handle_bridge(struct sk_buff *skb, unsigned short type) * recovering the MAC header first. */ - int offset=skb->data-skb->mac.raw; - cli(); + int offset; + + skb=skb_clone(skb, GFP_ATOMIC); + if(skb==NULL) + return; + + offset=skb->data-skb->mac.raw; skb_push(skb,offset); /* Put header back on for bridge */ + if(br_receive_frame(skb)) - { - sti(); return; - } - /* - * Pull the MAC header off for the copy going to - * the upper layers. - */ - skb_pull(skb,offset); - sti(); + kfree_skb(skb, FREE_READ); } + return; } #endif @@ -809,7 +808,7 @@ void net_bh(void) while (!skb_queue_empty(&backlog)) { - struct sk_buff * skb = backlog.next; + struct sk_buff * skb; /* Give chance to other bottom halves to run */ if (jiffies - start_time > 1) @@ -818,9 +817,7 @@ void net_bh(void) /* * We have a packet. Therefore the queue has shrunk */ - cli(); - __skb_unlink(skb, &backlog); - sti(); + skb = skb_dequeue(&backlog); #ifdef CONFIG_CPU_IS_SLOW if (ave_busy > 128*16) { @@ -1097,7 +1094,7 @@ static int sprintf_stats(char *buffer, struct device *dev) int size; if (stats) - size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n", + size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, @@ -1325,7 +1322,7 @@ int dev_change_flags(struct device *dev, unsigned flags) dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP| IFF_SLAVE|IFF_MASTER| IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | - (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC)); + (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); /* * Load in the correct multicast list now the flags have changed. @@ -1346,13 +1343,11 @@ int dev_change_flags(struct device *dev, unsigned flags) if (ret == 0) dev_mc_upload(dev); - } + } if (dev->flags&IFF_UP && - ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_VOLATILE))) { - printk(KERN_DEBUG "SIFFL %s(%s)\n", dev->name, current->comm); + ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); - } if ((flags^dev->gflags)&IFF_PROMISC) { int inc = (flags&IFF_PROMISC) ? +1 : -1; @@ -1360,6 +1355,16 @@ int dev_change_flags(struct device *dev, unsigned flags) dev_set_promiscuity(dev, inc); } + /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI + is important. Some (broken) drivers set IFF_PROMISC, when + IFF_ALLMULTI is requested not asking us and not reporting. + */ + if ((flags^dev->gflags)&IFF_ALLMULTI) { + int inc = (flags&IFF_ALLMULTI) ? +1 : -1; + dev->gflags ^= IFF_ALLMULTI; + dev_set_allmulti(dev, inc); + } + return ret; } @@ -1378,7 +1383,8 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) switch(cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (dev->flags&~IFF_PROMISC)|(dev->gflags&IFF_PROMISC); + ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI)) + |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); return 0; case SIOCSIFFLAGS: /* Set interface flags */ @@ -1660,6 +1666,7 @@ static int dev_boot_phase = 1; int register_netdevice(struct device *dev) { struct device *d, **dp; +printk("register_netdevice #1\n"); if (dev_boot_phase) { printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name); @@ -1673,27 +1680,32 @@ int register_netdevice(struct device *dev) *dp = dev; return 0; } +printk("register_netdevice #2\n"); dev->iflink = -1; /* Init, if this function is available */ if (dev->init && dev->init(dev) != 0) return -EIO; +printk("register_netdevice #3\n"); /* Check for existence, and append to tail of chain */ for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { if (d == dev || strcmp(d->name, dev->name) == 0) return -EEXIST; } +printk("register_netdevice #4\n"); dev->next = NULL; dev_init_scheduler(dev); dev->ifindex = dev_new_index(); if (dev->iflink == -1) dev->iflink = dev->ifindex; *dp = dev; +printk("register_netdevice #5\n"); /* Notify protocols, that a new device appeared. */ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); +printk("register_netdevice #6\n"); return 0; } diff --git a/net/core/profile.c b/net/core/profile.c index 54fc57662..fc7464b7a 100644 --- a/net/core/profile.c +++ b/net/core/profile.c @@ -13,6 +13,7 @@ #include <linux/inet.h> #include <net/checksum.h> +#include <asm/processor.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -276,8 +277,8 @@ __initfunc(int net_profile_init(void)) printk("Evaluating net profiler cost ..."); #if CPU == 586 || CPU == 686 - if (!(boot_cpu_data.x86_capability & 16)) { - panic("Sorry, you CPU does not support tsc. I am dying...\n"); + if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC)) { + printk(KERN_ERR "Sorry, your CPU does not support TSC. Net profiler disabled.\n"); return -1; } #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4bbe84cac..cd8030c5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -246,7 +246,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) s_idx = 1; for (idx=1; idx<NPROTO; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; - if (idx < s_idx || idx == AF_PACKET) + if (idx < s_idx || idx == PF_PACKET) continue; if (rtnetlink_links[idx] == NULL || rtnetlink_links[idx][type].dumpit == NULL) @@ -336,7 +336,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) link_tab = rtnetlink_links[family]; if (link_tab == NULL) - link_tab = rtnetlink_links[AF_UNSPEC]; + link_tab = rtnetlink_links[PF_UNSPEC]; link = &link_tab[type]; sz_idx = type>>2; @@ -348,8 +348,10 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) } if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + int rlen; + if (link->dumpit == NULL) - link = &(rtnetlink_links[AF_UNSPEC][type]); + link = &(rtnetlink_links[PF_UNSPEC][type]); if (link->dumpit == NULL) goto err_inval; @@ -364,7 +366,10 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) atomic_dec(&rtnl_rlockct); return -1; } - skb_pull(skb, NLMSG_ALIGN(nlh->nlmsg_len)); + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); return -1; } @@ -398,7 +403,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) } if (link->doit == NULL) - link = &(rtnetlink_links[AF_UNSPEC][type]); + link = &(rtnetlink_links[PF_UNSPEC][type]); if (link->doit == NULL) goto err_inval; err = link->doit(skb, nlh, (void *)&rta); @@ -538,8 +543,8 @@ __initfunc(void rtnetlink_init(void)) if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnetlink_links[AF_UNSPEC] = link_rtnetlink_table; - rtnetlink_links[AF_PACKET] = link_rtnetlink_table; + rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; + rtnetlink_links[PF_PACKET] = link_rtnetlink_table; } diff --git a/net/core/scm.c b/net/core/scm.c index dd19cf5e0..3e4469f29 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -50,7 +50,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) creds->uid == current->suid) || capable(CAP_SETUID)) && ((creds->gid == current->gid || creds->gid == current->egid || creds->gid == current->sgid) || capable(CAP_SETGID))) { - return 0; + return 0; } return -EPERM; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index abad1e217..c218233d4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -77,11 +77,22 @@ extern atomic_t ip_frag_mem; static kmem_cache_t *skbuff_head_cache; /* - * Strings we don't want inline's duplicating + * Keep out-of-line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always + * reliable. */ - -const char skb_push_errstr[]="skpush:under: %p:%d"; -const char skb_put_errstr[] ="skput:over: %p:%d"; + +void skb_over_panic(struct sk_buff *skb, int sz, void *here) +{ + panic("skput:over: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); +} + +void skb_under_panic(struct sk_buff *skb, int sz, void *here) +{ + panic("skput:under: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); +} void show_net_buffers(void) { @@ -179,6 +190,9 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache, skb->ip_summed = 0; skb->security = 0; /* By default packets are insecure */ skb->dst = NULL; +#ifdef CONFIG_IP_FIREWALL_CHAINS + skb->fwmark = 0; +#endif memset(skb->cb, 0, sizeof(skb->cb)); skb->priority = 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 428b4052c..07d125462 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -105,6 +105,7 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/poll.h> +#include <linux/init.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -206,18 +207,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sk->broadcast=valbool; break; case SO_SNDBUF: - /* - * The spec isnt clear if ENOBUFS or EINVAL - * is best - */ - - /* printk(KERN_DEBUG "setting SO_SNDBUF %d\n", val); */ + /* Don't error on this BSD doesn't and if you think + about it this is right. Otherwise apps have to + play 'guess the biggest size' games. RCVBUF/SNDBUF + are treated in BSD as hints */ + if (val > sysctl_wmem_max) - return -EINVAL; + val = sysctl_wmem_max; - /* FIXME: the tcp code should be made to work even - * with small sndbuf values. - */ sk->sndbuf = max(val*2,2048); /* @@ -228,10 +225,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_RCVBUF: - /* printk(KERN_DEBUG "setting SO_RCVBUF %d\n", val); */ - + /* Don't error on this BSD doesn't and if you think + about it this is right. Otherwise apps have to + play 'guess the biggest size' games. RCVBUF/SNDBUF + are treated in BSD as hints */ + if (val > sysctl_rmem_max) - return -EINVAL; + val = sysctl_rmem_max; /* FIXME: is this lower bound the right one? */ sk->rcvbuf = max(val*2,256); @@ -480,8 +480,8 @@ struct sock *sk_alloc(int family, int priority, int zero_it) { struct sock *sk = kmem_cache_alloc(sk_cachep, priority); - if(sk && zero_it) { - memset(sk, 0, sizeof(struct sock)); + if(sk) { + if (zero_it) memset(sk, 0, sizeof(struct sock)); sk->family = family; } @@ -496,10 +496,11 @@ void sk_free(struct sock *sk) kmem_cache_free(sk_cachep, sk); } -void sk_init(void) +__initfunc(void sk_init(void)) { sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0, SLAB_HWCACHE_ALIGN, 0, 0); + } /* @@ -542,8 +543,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int atomic_add(skb->truesize, &sk->wmem_alloc); skb->destructor = sock_wfree; skb->sk = sk; + return skb; } - return skb; } return NULL; } @@ -556,23 +557,26 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int atomic_add(skb->truesize, &sk->rmem_alloc); skb->destructor = sock_rfree; skb->sk = sk; + return skb; } - return skb; } return NULL; } void *sock_kmalloc(struct sock *sk, int size, int priority) { - void *mem = NULL; if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) { + void *mem; /* First do the add, to avoid the race if kmalloc * might sleep. */ atomic_add(size, &sk->omem_alloc); mem = kmalloc(size, priority); + if (mem) + return mem; + atomic_sub(size, &sk->omem_alloc); } - return mem; + return NULL; } void sock_kfree_s(struct sock *sk, void *mem, int size) @@ -880,7 +884,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr, unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) { - return -EOPNOTSUPP; + return 0; } int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) diff --git a/net/econet/econet.c b/net/econet/econet.c index 9bfbfd921..92bdc4c97 100644 --- a/net/econet/econet.c +++ b/net/econet/econet.c @@ -34,6 +34,7 @@ #include <linux/inet.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> +#include <linux/wireless.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/inet_common.h> @@ -48,6 +49,8 @@ static struct proto_ops econet_ops; static struct sock *econet_sklist; +static spinlock_t aun_queue_lock; + #ifdef CONFIG_ECONET_AUNUDP static struct socket *udpsock; #define AUN_PORT 0x8000 @@ -343,7 +346,7 @@ static int econet_sendmsg(struct socket *sock, struct msghdr *msg, int len, eb->cookie = saddr->cookie; eb->sec = *saddr; - eb->sent - ec_tx_done; + eb->sent = ec_tx_done; if (dev->hard_header) { int res; @@ -557,7 +560,7 @@ static int econet_create(struct socket *sock, int protocol) MOD_INC_USE_COUNT; err = -ENOBUFS; - sk = sk_alloc(AF_ECONET, GFP_KERNEL, 1); + sk = sk_alloc(PF_ECONET, GFP_KERNEL, 1); if (sk == NULL) goto out; @@ -570,7 +573,7 @@ static int econet_create(struct socket *sock, int protocol) goto out_free; memset(sk->protinfo.af_econet, 0, sizeof(struct econet_opt)); sk->zapped=0; - sk->family = AF_ECONET; + sk->family = PF_ECONET; sk->num = protocol; sklist_insert_socket(&econet_sklist, sk); @@ -727,19 +730,19 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg } static struct net_proto_family econet_family_ops = { - AF_ECONET, + PF_ECONET, econet_create }; static struct proto_ops econet_ops = { - AF_ECONET, + PF_ECONET, sock_no_dup, econet_release, econet_bind, sock_no_connect, - NULL, - NULL, + sock_no_socketpair, + sock_no_accept, econet_getname, datagram_poll, econet_ioctl, @@ -965,7 +968,6 @@ static void aun_data_available(struct sock *sk, int slen) * drop the packet. */ -static spinlock_t aun_queue_lock; static void ab_cleanup(unsigned long h) { @@ -1008,7 +1010,7 @@ __initfunc(static int aun_udp_initialise(void)) /* We can count ourselves lucky Acorn machines are too dim to speak IPv6. :-) */ - if ((error = sock_create(AF_INET, SOCK_DGRAM, 0, &udpsock)) < 0) + if ((error = sock_create(PF_INET, SOCK_DGRAM, 0, &udpsock)) < 0) { printk("AUN: socket error %d\n", -error); return error; diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in index dbace1d3b..4b83152f0 100644 --- a/net/ipv4/Config.in +++ b/net/ipv4/Config.in @@ -31,12 +31,10 @@ if [ "$CONFIG_FIREWALL" = "y" ]; then define_bool CONFIG_NETLINK_DEV y fi fi - bool 'IP: firewall packet logging' CONFIG_IP_FIREWALL_VERBOSE bool 'IP: transparent proxy support' CONFIG_IP_TRANSPARENT_PROXY bool 'IP: always defragment' CONFIG_IP_ALWAYS_DEFRAG fi fi -bool 'IP: accounting' CONFIG_IP_ACCT if [ "$CONFIG_IP_FIREWALL" = "y" ]; then bool 'IP: masquerading' CONFIG_IP_MASQUERADE if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 10f4c5e7c..2a519f346 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -12,13 +12,17 @@ IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\ - raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o \ + raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o IPV4X_OBJS := MOD_LIST_NAME := IPV4_MODULES M_OBJS := +ifeq ($(CONFIG_IP_FIREWALL),y) +IPV4_OBJS += ip_fw.o +endif + ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y) IPV4_OBJS += fib_rules.o endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a54ae8a95..18c31f5c3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -3,9 +3,9 @@ * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * - * AF_INET protocol family socket handler. + * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.6 1998/03/17 22:18:20 ralf Exp $ + * Version: $Id: af_inet.c,v 1.74 1998/05/08 21:06:24 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -330,23 +330,23 @@ static int inet_create(struct socket *sock, int protocol) /* Compatibility */ if (sock->type == SOCK_PACKET) { static int warned; - if (net_families[AF_PACKET]==NULL) + if (net_families[PF_PACKET]==NULL) { #if defined(CONFIG_KMOD) && defined(CONFIG_PACKET_MODULE) char module_name[30]; - sprintf(module_name,"net-pf-%d", AF_PACKET); + sprintf(module_name,"net-pf-%d", PF_PACKET); request_module(module_name); - if (net_families[AF_PACKET] == NULL) + if (net_families[PF_PACKET] == NULL) #endif return -ESOCKTNOSUPPORT; } if (!warned++) - printk(KERN_INFO "%s uses obsolete (AF_INET,SOCK_PACKET)\n", current->comm); - return net_families[AF_PACKET]->create(sock, protocol); + printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); + return net_families[PF_PACKET]->create(sock, protocol); } sock->state = SS_UNCONNECTED; - sk = sk_alloc(AF_INET, GFP_KERNEL, 1); + sk = sk_alloc(PF_INET, GFP_KERNEL, 1); if (sk == NULL) goto do_oom; @@ -398,7 +398,7 @@ static int inet_create(struct socket *sock, int protocol) #ifdef CONFIG_TCP_NAGLE_OFF sk->nonagle = 1; #endif - sk->family = AF_INET; + sk->family = PF_INET; sk->protocol = protocol; sk->prot = prot; @@ -958,7 +958,7 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } struct proto_ops inet_stream_ops = { - AF_INET, + PF_INET, sock_no_dup, inet_release, @@ -979,7 +979,7 @@ struct proto_ops inet_stream_ops = { }; struct proto_ops inet_dgram_ops = { - AF_INET, + PF_INET, sock_no_dup, inet_release, @@ -1000,7 +1000,7 @@ struct proto_ops inet_dgram_ops = { }; struct net_proto_family inet_family_ops = { - AF_INET, + PF_INET, inet_create }; @@ -1129,9 +1129,10 @@ __initfunc(void inet_proto_init(struct net_proto *pro)) /* * Set the firewalling up */ -#if defined(CONFIG_IP_ACCT)||defined(CONFIG_IP_FIREWALL) +#if defined(CONFIG_IP_FIREWALL) ip_fw_init(); #endif + #ifdef CONFIG_IP_MASQUERADE ip_masq_init(); #endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index dd7ce9e0f..e6e272b0e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,6 +1,6 @@ /* linux/net/inet/arp.c * - * Version: $Id: arp.c,v 1.5 1998/03/17 22:18:21 ralf Exp $ + * Version: $Id: arp.c,v 1.67 1998/06/19 13:22:31 davem Exp $ * * Copyright (C) 1994 by Florian La Roche * @@ -15,7 +15,7 @@ * 2 of the License, or (at your option) any later version. * * Fixes: - * Alan Cox : Removed the ethernet assumptions in + * Alan Cox : Removed the Ethernet assumptions in * Florian's code * Alan Cox : Fixed some small errors in the ARP * logic @@ -135,8 +135,8 @@ static struct neigh_ops arp_generic_ops = arp_error_report, neigh_resolve_output, neigh_connected_output, - ip_acct_output, - ip_acct_output + dev_queue_xmit, + dev_queue_xmit }; static struct neigh_ops arp_hh_ops = @@ -147,8 +147,8 @@ static struct neigh_ops arp_hh_ops = arp_error_report, neigh_resolve_output, neigh_resolve_output, - ip_acct_output, - ip_acct_output + dev_queue_xmit, + dev_queue_xmit }; static struct neigh_ops arp_direct_ops = @@ -157,13 +157,13 @@ static struct neigh_ops arp_direct_ops = NULL, NULL, NULL, - ip_acct_output, - ip_acct_output, - ip_acct_output, - ip_acct_output + dev_queue_xmit, + dev_queue_xmit, + dev_queue_xmit, + dev_queue_xmit }; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25) || \ +#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) || \ defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE) struct neigh_ops arp_broken_ops = { @@ -173,8 +173,8 @@ struct neigh_ops arp_broken_ops = arp_error_report, neigh_compat_output, neigh_compat_output, - ip_acct_output, - ip_acct_output, + dev_queue_xmit, + dev_queue_xmit, }; #endif @@ -230,7 +230,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->ops = &arp_direct_ops; neigh->output = neigh->ops->queue_xmit; } else { - /* Good devices (checked by reading texts, but only ethernet is + /* Good devices (checked by reading texts, but only Ethernet is tested) ARPHRD_ETHER: (ethernet, apfddi) @@ -240,7 +240,7 @@ static int arp_constructor(struct neighbour *neigh) ARPHRD_ARCNET: etc. etc. etc. - ARPHRD_IPDDP will also work, if author repaires it. + ARPHRD_IPDDP will also work, if author repairs it. I did not it, because this driver does not work even in old paradigm. */ @@ -261,7 +261,7 @@ static int arp_constructor(struct neighbour *neigh) default: break; case ARPHRD_ROSE: -#if defined(CONFIG_AX25) || defined(CONFIG_AX25) +#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) case ARPHRD_AX25: #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) case ARPHRD_NETROM: @@ -1099,7 +1099,7 @@ __initfunc(void arp_init (void)) #ifdef CONFIG_AX25_MODULE /* - * ax25 -> ascii conversion + * ax25 -> ASCII conversion */ char *ax2asc(ax25_address *a) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 47b5ef25c..18293338e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,7 +1,7 @@ /* * NET3 IP device support routines. * - * Version: $Id: devinet.c,v 1.4 1998/03/17 22:18:21 ralf Exp $ + * Version: $Id: devinet.c,v 1.22 1998/05/08 21:06:26 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1014,10 +1014,10 @@ __initfunc(void inet_del_bootp_addr(struct device *dev)) __initfunc(void devinet_init(void)) { - register_gifconf(AF_INET, inet_gifconf); + register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); #ifdef CONFIG_RTNETLINK - rtnetlink_links[AF_INET] = inet_rtnetlink_table; + rtnetlink_links[PF_INET] = inet_rtnetlink_table; #endif #ifdef CONFIG_SYSCTL devinet_sysctl.sysctl_header = diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 24f3052fe..d9a150218 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -5,7 +5,7 @@ * * IPv4 Forwarding Information Base: FIB frontend. * - * Version: $Id: fib_frontend.c,v 1.9 1998/03/08 20:52:36 davem Exp $ + * Version: $Id: fib_frontend.c,v 1.11 1998/06/11 03:15:40 davem Exp $ * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * @@ -271,6 +271,8 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) if (tb) err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); } + if (rta.rta_mx) + kfree(rta.rta_mx); } rtnl_unlock(); return err; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4b89ab676..3e13671a2 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -5,7 +5,7 @@ * * IPv4 FIB: lookup engine and maintenance routines. * - * Version: $Id: fib_hash.c,v 1.3 1998/03/08 05:56:16 davem Exp $ + * Version: $Id: fib_hash.c,v 1.4 1998/07/15 05:05:08 davem Exp $ * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * @@ -274,7 +274,7 @@ fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result #endif ) { if (matched) - return 1; + break; continue; } matched = 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 107f07791..5537016d2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -5,7 +5,7 @@ * * IPv4 Forwarding Information Base: semantics. * - * Version: $Id: fib_semantics.c,v 1.8 1998/04/28 06:21:58 davem Exp $ + * Version: $Id: fib_semantics.c,v 1.9 1998/06/11 03:15:41 davem Exp $ * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * @@ -866,8 +866,36 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, *rta->rta_mtu = r->rt_mtu; } #else - if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) - printk(KERN_DEBUG "SIOCRT*: mtu/window/irtt are not implemnted.\n"); + if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { + struct rtattr *rec; + struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + rta->rta_mx = mx; + mx->rta_type = RTA_METRICS; + mx->rta_len = RTA_LENGTH(0); + if (r->rt_flags&RTF_MTU) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_MTU; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_mtu; + } + if (r->rt_flags&RTF_WINDOW) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_WINDOW; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_window; + } + if (r->rt_flags&RTF_IRTT) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_RTT; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_irtt; + } + } #endif return 0; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7ce08cdd4..4e947337a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -3,7 +3,7 @@ * * Alan Cox, <alan@cymru.net> * - * Version: $Id: icmp.c,v 1.5 1998/03/17 22:18:23 ralf Exp $ + * Version: $Id: icmp.c,v 1.44 1998/06/16 04:38:27 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -539,7 +539,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info) */ saddr = iph->daddr; - if (!(rt->rt_flags&RTCF_LOCAL)) + if (!(rt->rt_flags & RTCF_LOCAL)) saddr = 0; tos = icmp_pointers[type].error ? diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d3414a0fe..74757adf8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,7 +8,7 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.4 1998/03/17 22:18:24 ralf Exp $ + * Version: $Id: igmp.c,v 1.26 1998/03/08 05:56:19 davem Exp $ * * Authors: * Alan Cox <Alan.Cox@linux.org> diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8df8414cd..e136a16ca 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -5,7 +5,7 @@ * * The IP forwarding functionality. * - * Version: $Id: ip_forward.c,v 1.4 1998/03/17 22:18:25 ralf Exp $ + * Version: $Id: ip_forward.c,v 1.40 1998/03/08 05:56:20 davem Exp $ * * Authors: see ip.c * diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1641e5c3d..9641aaae3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.36 1998/04/18 02:13:07 davem Exp $ + * Version: $Id: ip_fragment.c,v 1.38 1998/06/16 04:38:29 davem Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> @@ -16,6 +16,7 @@ * Andi Kleen : Add sysctls. * xxxx : Overlapfrag bug. * Ultima : ip_expire() kernel panic. + * Bill Hawes : Frag accounting and evictor fixes. */ #include <linux/types.h> @@ -76,8 +77,6 @@ struct ipq *ipq_hash[IPQ_HASHSZ]; atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ -char *in_ntoa(__u32 in); - /* Memory Tracking Functions. */ extern __inline__ void frag_kfree_skb(struct sk_buff *skb) { @@ -88,12 +87,12 @@ extern __inline__ void frag_kfree_skb(struct sk_buff *skb) extern __inline__ void frag_kfree_s(void *ptr, int len) { atomic_sub(len, &ip_frag_mem); - kfree_s(ptr,len); + kfree(ptr); } extern __inline__ void *frag_kmalloc(int size, int pri) { - void *vp=kmalloc(size,pri); + void *vp = kmalloc(size, pri); if(!vp) return NULL; @@ -108,10 +107,8 @@ static struct ipfrag *ip_frag_create(int offset, int end, struct ipfrag *fp; fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); - if (fp == NULL) { - NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n")); - return(NULL); - } + if (fp == NULL) + goto out_nomem; /* Fill in the structure. */ fp->offset = offset; @@ -125,6 +122,10 @@ static struct ipfrag *ip_frag_create(int offset, int end, atomic_add(skb->truesize, &ip_frag_mem); return(fp); + +out_nomem: + NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n")); + return(NULL); } /* Find the correct entry in the "incomplete datagrams" queue for @@ -139,7 +140,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst) unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); struct ipq *qp; - start_bh_atomic(); + /* Always, we are in a BH context, so no locking. -DaveM */ for(qp = ipq_hash[hash]; qp; qp = qp->next) { if(qp->iph->id == id && qp->iph->saddr == saddr && @@ -149,13 +150,15 @@ static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst) break; } } - end_bh_atomic(); return qp; } /* Remove an entry from the "incomplete datagrams" queue, either * because we completed, reassembled and processed it, or because * it timed out. + * + * This is called _only_ from BH contexts, on packet reception + * processing and from frag queue expiration timers. -DaveM */ static void ip_free(struct ipq *qp) { @@ -165,11 +168,9 @@ static void ip_free(struct ipq *qp) del_timer(&qp->timer); /* Remove this entry from the "incomplete datagrams" queue. */ - start_bh_atomic(); if(qp->next) qp->next->pprev = qp->pprev; *qp->pprev = qp->next; - end_bh_atomic(); /* Release all fragment data. */ fp = qp->fragments; @@ -188,7 +189,9 @@ static void ip_free(struct ipq *qp) frag_kfree_s(qp, sizeof(struct ipq)); } -/* Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ +/* + * Oops, a fragment queue timed out. Kill it and send an ICMP reply. + */ static void ip_expire(unsigned long arg) { struct ipq *qp = (struct ipq *) arg; @@ -198,15 +201,15 @@ static void ip_expire(unsigned long arg) #ifdef IP_EXPIRE_DEBUG printk("warning: possible ip-expire attack\n"); #endif - ip_free(qp); - return; + goto out; } /* Send an ICMP "Fragment Reassembly Timeout" message. */ ip_statistics.IpReasmTimeout++; ip_statistics.IpReasmFails++; - icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + icmp_send(qp->fragments->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); +out: /* Nuke the fragment queue. */ ip_free(qp); } @@ -216,17 +219,30 @@ static void ip_expire(unsigned long arg) */ static void ip_evictor(void) { - while(atomic_read(&ip_frag_mem)>sysctl_ipfrag_low_thresh) { - int i; - - /* FIXME: Make LRU queue of frag heads. -DaveM */ - for(i = 0; i < IPQ_HASHSZ; i++) - if(ipq_hash[i]) - break; - if(i >= IPQ_HASHSZ) - panic("ip_evictor: memcount"); - ip_free(ipq_hash[i]); + int i, progress; + +restart: + progress = 0; + /* FIXME: Make LRU queue of frag heads. -DaveM */ + for (i = 0; i < IPQ_HASHSZ; i++) { + struct ipq *qp; + if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh) + return; + /* We are in a BH context, so these queue + * accesses are safe. -DaveM + */ + qp = ipq_hash[i]; + if (qp) { + /* find the oldest queue for this hash bucket */ + while (qp->next) + qp = qp->next; + ip_free(qp); + progress = 1; + } } + if (progress) + goto restart; + panic("ip_evictor: memcount"); } /* Add an entry to the 'ipq' queue for a newly received IP datagram. @@ -241,20 +257,15 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) int ihlen; qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC); - if (qp == NULL) { - NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); - return(NULL); - } + if (qp == NULL) + goto out_nomem; /* Allocate memory for the IP header (plus 8 octets for ICMP). */ ihlen = iph->ihl * 4; qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC); - if (qp->iph == NULL) { - NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); - frag_kfree_s(qp, sizeof(struct ipq)); - return NULL; - } + if (qp->iph == NULL) + goto out_free; memcpy(qp->iph, iph, ihlen + 8); qp->len = 0; @@ -262,24 +273,28 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) qp->fragments = NULL; qp->dev = skb->dev; - /* Start a timer for this entry. */ + /* Initialize a timer for this entry. */ init_timer(&qp->timer); - qp->timer.expires = jiffies + sysctl_ipfrag_time; /* about 30 seconds */ - qp->timer.data = (unsigned long) qp; /* pointer to queue */ - qp->timer.function = ip_expire; /* expire function */ - add_timer(&qp->timer); + qp->timer.expires = 0; /* (to be set later) */ + qp->timer.data = (unsigned long) qp; /* pointer to queue */ + qp->timer.function = ip_expire; /* expire function */ /* Add this entry to the queue. */ hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - start_bh_atomic(); + /* We are in a BH context, no locking necessary. -DaveM */ if((qp->next = ipq_hash[hash]) != NULL) qp->next->pprev = &qp->next; ipq_hash[hash] = qp; qp->pprev = &ipq_hash[hash]; - end_bh_atomic(); return qp; + +out_free: + frag_kfree_s(qp, sizeof(struct ipq)); +out_nomem: + NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); + return(NULL); } /* See if a fragment queue is complete. */ @@ -323,24 +338,16 @@ static struct sk_buff *ip_glue(struct ipq *qp) /* Allocate a new buffer for the datagram. */ len = qp->ihlen + qp->len; - if(len>65535) { - if (net_ratelimit()) - printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr)); - ip_statistics.IpReasmFails++; - ip_free(qp); - return NULL; - } + if(len > 65535) + goto out_oversize; - if ((skb = dev_alloc_skb(len)) == NULL) { - ip_statistics.IpReasmFails++; - NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); - ip_free(qp); - return NULL; - } + skb = dev_alloc_skb(len); + if (!skb) + goto out_nomem; /* Fill in the basic details. */ skb->mac.raw = ptr = skb->data; - skb->nh.iph = iph = (struct iphdr*)skb_put(skb,len); + skb->nh.iph = iph = (struct iphdr *) skb_put(skb, len); /* Copy the original IP headers into the new buffer. */ memcpy(ptr, qp->iph, qp->ihlen); @@ -350,14 +357,8 @@ static struct sk_buff *ip_glue(struct ipq *qp) fp = qp->fragments; count = qp->ihlen; while(fp) { - if (fp->len < 0 || count+fp->len > skb->len) { - NETDEBUG(printk(KERN_ERR "Invalid fragment list: " - "Fragment over size.\n")); - ip_free(qp); - kfree_skb(skb); - ip_statistics.IpReasmFails++; - return NULL; - } + if ((fp->len < 0) || ((count + fp->len) > skb->len)) + goto out_invalid; memcpy((ptr + fp->offset), fp->ptr, fp->len); if (count == qp->ihlen) { skb->dst = dst_clone(fp->skb->dst); @@ -369,26 +370,50 @@ static struct sk_buff *ip_glue(struct ipq *qp) skb->pkt_type = qp->fragments->skb->pkt_type; skb->protocol = qp->fragments->skb->protocol; - /* We glued together all fragments, so remove the queue entry. */ - ip_free(qp); + /* + * Clearly bogus, because security markings of the individual + * fragments should have been checked for consistency before + * gluing, and intermediate coalescing of fragments may have + * taken place in ip_defrag() before ip_glue() ever got called. + * If we're not going to do the consistency checking, we might + * as well take the value associated with the first fragment. + * --rct + */ + skb->security = qp->fragments->skb->security; /* Done with all fragments. Fixup the new IP header. */ iph = skb->nh.iph; iph->frag_off = 0; iph->tot_len = htons(count); - ip_statistics.IpReasmOKs++; return skb; + +out_invalid: + NETDEBUG(printk(KERN_ERR + "Invalid fragment list: Fragment over size.\n")); + kfree_skb(skb); + goto out_fail; +out_nomem: + NETDEBUG(printk(KERN_ERR + "IP: queue_glue: no memory for gluing queue %p\n", + qp)); + goto out_fail; +out_oversize: + if (net_ratelimit()) + printk(KERN_INFO + "Oversized IP packet from %d.%d.%d.%d.\n", + NIPQUAD(qp->iph->saddr)); +out_fail: + ip_statistics.IpReasmFails++; + return NULL; } /* Process an incoming IP datagram fragment. */ struct sk_buff *ip_defrag(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; - struct ipfrag *prev, *next, *tmp; - struct ipfrag *tfp; + struct ipfrag *prev, *next, *tmp, *tfp; struct ipq *qp; - struct sk_buff *skb2; unsigned char *ptr; int flags, offset; int i, ihl, end; @@ -396,65 +421,58 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) ip_statistics.IpReasmReqds++; /* Start by cleaning up the memory. */ - if(atomic_read(&ip_frag_mem)>sysctl_ipfrag_high_thresh) + if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) ip_evictor(); - /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ + /* + * Look for the entry for this IP datagram in the + * "incomplete datagrams" queue. If found, the + * timer is removed. + */ qp = ip_find(iph, skb->dst); /* Is this a non-fragmented datagram? */ offset = ntohs(iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; - if (((flags & IP_MF) == 0) && (offset == 0)) { - if (qp != NULL) { - /* Fragmented frame replaced by full unfragmented copy. */ - ip_free(qp); - } - return skb; - } offset <<= 3; /* offset is in 8-byte chunks */ ihl = iph->ihl * 4; - /* If the queue already existed, keep restarting its timer as long - * as we still are receiving fragments. Otherwise, create a fresh - * queue entry. + /* + * Check whether to create a fresh queue entry. If the + * queue already exists, its timer will be restarted as + * long as we continue to receive fragments. */ if (qp) { /* ANK. If the first fragment is received, * we should remember the correct IP header (with options) */ if (offset == 0) { + /* Fragmented frame replaced by unfragmented copy? */ + if ((flags & IP_MF) == 0) + goto out_freequeue; qp->ihlen = ihl; - memcpy(qp->iph, iph, ihl+8); + memcpy(qp->iph, iph, (ihl + 8)); } - /* about 30 seconds */ - mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time); } else { + /* Fragmented frame replaced by unfragmented copy? */ + if ((offset == 0) && ((flags & IP_MF) == 0)) + goto out_skb; + /* If we failed to create it, then discard the frame. */ - if ((qp = ip_create(skb, iph)) == NULL) { - kfree_skb(skb); - ip_statistics.IpReasmFails++; - return NULL; - } + qp = ip_create(skb, iph); + if (!qp) + goto out_freeskb; } /* Attempt to construct an oversize packet. */ - if(ntohs(iph->tot_len)+(int)offset>65535) { - if (net_ratelimit()) - printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr)); - frag_kfree_skb(skb); - ip_statistics.IpReasmFails++; - return NULL; - } + if((ntohs(iph->tot_len) + ((int) offset)) > 65535) + goto out_oversize; /* Determine the position of this fragment. */ end = offset + ntohs(iph->tot_len) - ihl; - /* Point into the IP datagram 'data' part. */ - ptr = skb->data + ihl; - /* Is this the final fragment? */ if ((flags & IP_MF) == 0) qp->len = end; @@ -470,11 +488,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) prev = next; } + /* Point into the IP datagram 'data' part. */ + ptr = skb->data + ihl; + /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. */ - if (prev != NULL && offset < prev->end) { + if ((prev != NULL) && (offset < prev->end)) { i = prev->end - offset; offset += i; /* ptr into datagram */ ptr += i; /* ptr into fragment data */ @@ -483,14 +504,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) /* Look for overlap with succeeding segments. * If we can merge fragments, do it. */ - for(tmp=next; tmp != NULL; tmp = tfp) { + for (tmp = next; tmp != NULL; tmp = tfp) { tfp = tmp->next; if (tmp->offset >= end) - break; /* no overlaps at all */ + break; /* no overlaps at all */ - i = end - next->offset; /* overlap is 'i' bytes */ - tmp->len -= i; /* so reduce size of */ - tmp->offset += i; /* next fragment */ + i = end - next->offset; /* overlap is 'i' bytes */ + tmp->len -= i; /* so reduce size of */ + tmp->offset += i; /* next fragment */ tmp->ptr += i; /* If we get a frag size of <= 0, remove it and the packet @@ -513,15 +534,15 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) } } - /* Insert this fragment in the chain of fragments. */ - tfp = NULL; + /* + * Create a fragment to hold this skb. + * No memory to save the fragment? throw the lot ... + */ tfp = ip_frag_create(offset, end, skb, ptr); + if (!tfp) + goto out_freeskb; - /* No memory to save the fragment - so throw the lot. */ - if (!tfp) { - frag_kfree_skb(skb); - return NULL; - } + /* Insert this fragment in the chain of fragments. */ tfp->prev = prev; tfp->next = next; if (prev != NULL) @@ -538,8 +559,34 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) */ if (ip_done(qp)) { /* Glue together the fragments. */ - skb2 = ip_glue(qp); - return(skb2); + skb = ip_glue(qp); + /* Free the queue entry. */ +out_freequeue: + ip_free(qp); +out_skb: + return skb; } + + /* + * The queue is still active ... reset its timer. + */ +out_timer: + mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time); /* ~ 30 seconds */ +out: return NULL; + + /* + * Error exits ... we need to reset the timer if there's a queue. + */ +out_oversize: + if (net_ratelimit()) + printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", + NIPQUAD(iph->saddr)); + /* the skb isn't in a fragment, so fall through to free it */ +out_freeskb: + kfree_skb(skb); + ip_statistics.IpReasmFails++; + if (qp) + goto out_timer; + goto out; } diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index 745d07cb4..57e7761e3 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -1,75 +1,44 @@ /* - * IP firewalling code. This is taken from 4.4BSD. Please note the - * copyright message below. As per the GPL it must be maintained - * and the licenses thus do not conflict. While this port is subject - * to the GPL I also place my modifications under the original - * license in recognition of the original copyright. - * -- Alan Cox. - * - * $Id: ip_fw.c,v 1.35 1998/04/30 16:29:51 freitag Exp $ - * - * Ported from BSD to Linux, - * Alan Cox 22/Nov/1994. - * Zeroing /proc and other additions - * Jos Vos 4/Feb/1995. - * Merged and included the FreeBSD-Current changes at Ugen's request - * (but hey it's a lot cleaner now). Ugen would prefer in some ways - * we waited for his final product but since Linux 1.2.0 is about to - * appear it's not practical - Read: It works, it's not clean but please - * don't consider it to be his standard of finished work. - * Alan Cox 12/Feb/1995 - * Porting bidirectional entries from BSD, fixing accounting issues, - * adding struct ip_fwpkt for checking packets with interface address - * Jos Vos 5/Mar/1995. - * Established connections (ACK check), ACK check on bidirectional rules, - * ICMP type check. - * Wilfred Mollenvanger 7/7/1995. - * TCP attack protection. - * Alan Cox 25/8/95, based on information from bugtraq. - * ICMP type printk, IP_FW_F_APPEND - * Bernd Eckenfels 1996-01-31 - * Split blocking chain into input and output chains, add new "insert" and - * "append" commands to replace semi-intelligent "add" command, let "delete". - * only delete the first matching entry, use 0xFFFF (0xFF) as ports (ICMP - * types) when counting packets being 2nd and further fragments. - * Jos Vos <jos@xos.nl> 8/2/1996. - * Add support for matching on device names. - * Jos Vos <jos@xos.nl> 15/2/1996. - * Transparent proxying support. - * Willy Konynenberg <willy@xos.nl> 10/5/96. - * Make separate accounting on incoming and outgoing packets possible. - * Jos Vos <jos@xos.nl> 18/5/1996. - * Added trap out of bad frames. - * Alan Cox <alan@cymru.net> 17/11/1996 - * - * - * Masquerading functionality - * - * Copyright (c) 1994 Pauline Middelink - * - * The pieces which added masquerading functionality are totally - * my responsibility and have nothing to with the original authors - * copyright or doing. - * - * Parts distributed under GPL. - * - * Fixes: - * Pauline Middelink : Added masquerading. - * Alan Cox : Fixed an error in the merge. - * Thomas Quinot : Fixed port spoofing. - * Alan Cox : Cleaned up retransmits in spoofing. - * Alan Cox : Cleaned up length setting. - * Wouter Gadeyne : Fixed masquerading support of ftp PORT commands - * - * Juan Jose Ciarlante : Masquerading code moved to ip_masq.c - * Andi Kleen : Print frag_offsets and the ip flags properly. - * - * All the real work was done by ..... + * This code is heavily based on the code on the old ip_fw.c code; see below for + * copyrights and attributions of the old code. This code is basically GPL. * + * 15-Aug-1997: Major changes to allow graphs for firewall rules. + * Paul Russell <Paul.Russell@rustcorp.com.au> and + * Michael Neuling <Michael.Neuling@rustcorp.com.au> + * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP). + * Added explicit RETURN from chains. + * Removed TOS mangling (done in ipchains 1.0.1). + * Fixed read & reset bug by reworking proc handling. + * Paul Russell <Paul.Russell@rustcorp.com.au> + * 28-Sep-1997: Added packet marking for net sched code. + * Removed fw_via comparisons: all done on device name now, + * similar to changes in ip_fw.c in DaveM's CVS970924 tree. + * Paul Russell <Paul.Russell@rustcorp.com.au> + * 2-Nov-1997: Moved types across to __u16, etc. + * Added inverse flags. + * Fixed fragment bug (in args to port_match). + * Changed mark to only one flag (MARKABS). + * 21-Nov-1997: Added ability to test ICMP code. + * 19-Jan-1998: Added wildcard interfaces. + * 6-Feb-1998: Merged 2.0 and 2.1 versions. + * Initialised ip_masq for 2.0.x version. + * Added explicit NETLINK option for 2.1.x version. + * Added packet and byte counters for policy matches. + * 26-Feb-1998: Fixed race conditions, added SMP support. + * 18-Mar-1998: Fix SMP, fix race condition fix. + * 1-May-1998: Remove caching of device pointer. + * 12-May-1998: Allow tiny fragment case for TCP/UDP. + * 15-May-1998: Treat short packets as fragments, don't just block. */ - /* + * + * The origina Linux port was done Alan Cox, with changes/fixes from + * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan + * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others. + * + * Copyright from the original FreeBSD version follows: + * * Copyright (c) 1993 Daniel Boulet * Copyright (c) 1994 Ugen J.S.Antsilevich * @@ -80,17 +49,18 @@ * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * - * This software is provided ``AS IS'' without any warranties of any kind. - */ + * This software is provided ``AS IS'' without any warranties of any kind. */ + #include <linux/config.h> + #include <asm/uaccess.h> #include <asm/system.h> #include <linux/types.h> -#include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -107,9 +77,9 @@ #include <net/sock.h> #include <net/icmp.h> #include <linux/netlink.h> +#include <linux/init.h> #include <linux/firewall.h> #include <linux/ip_fw.h> -#include <linux/init.h> #ifdef CONFIG_IP_MASQUERADE #include <net/ip_masq.h> @@ -119,223 +89,502 @@ #include <linux/proc_fs.h> #include <linux/stat.h> +/* Understanding locking in this code: (thanks to Alan Cox for using + * little words to explain this to me). -- PR + * + * In UP, there can be two packets traversing the chains: + * 1) A packet from the current userspace context + * 2) A packet off the bh handlers (timer or net). + * + * For SMP (kernel v2.1+), multiply this by # CPUs. + * + * [Note that this in not correct for 2.2 - because the socket code always + * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs) + * only run on one CPU at a time. This will probably change for 2.3. + * It is still good to use spinlocks because that avoids the global cli() + * for updating the tables, which is rather costly in SMP kernels -AK] + * + * This means counters and backchains can get corrupted if no precautions + * are taken. + * + * To actually alter a chain on UP, we need only do a cli(), as this will + * stop a bh handler firing, as we are in the current userspace context + * (coming from a setsockopt()). + * + * On SMP, we need a write_lock_irqsave(), which is a simple cli() in + * UP. + * + * For backchains and counters, we use an array, indexed by + * [smp_processor_id()*2 + !in_interrupt()]; the array is of size + * [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So, + * confident of uniqueness, we modify counters even though we only + * have a read lock (to read the counters, you need a write lock, + * though). */ + +/* Why I didn't use straight locking... -- PR + * + * The backchains can be separated out of the ip_chains structure, and + * allocated as needed inside ip_fw_check(). + * + * The counters, however, can't. Trying to lock these means blocking + * interrupts every time we want to access them. This would suck HARD + * performance-wise. Not locking them leads to possible corruption, + * made worse on 32-bit machines (counters are 64-bit). */ + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ +/*#define DEBUG_IP_FIREWALL_LOCKING*/ + +#ifdef CONFIG_IP_FIREWALL_NETLINK +static struct sock *ipfwsk; +#endif + +#define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt()) +#define NUM_SLOTS (smp_num_cpus*2) + +#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \ + + NUM_SLOTS*sizeof(struct ip_reent)) +#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \ + + NUM_SLOTS*sizeof(struct ip_counters)) + +#ifdef DEBUG_IP_FIREWALL_LOCKING +static unsigned int fwc_rlocks, fwc_wlocks; +#define FWC_DEBUG_LOCK(d) \ +do { \ + FWC_DONT_HAVE_LOCK(d); \ + d |= (1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DEBUG_UNLOCK(d) \ +do { \ + FWC_HAVE_LOCK(d); \ + d &= ~(1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DONT_HAVE_LOCK(d) \ +do { \ + if ((d) & (1 << SLOT_NUMBER())) \ + printk("%s:%i: Got lock on %i already!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while(0) + +#define FWC_HAVE_LOCK(d) \ +do { \ + if (!((d) & (1 << SLOT_NUMBER()))) \ + printk("%s:%i:No lock on %i!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while (0) + +#else +#define FWC_DEBUG_LOCK(d) do { } while(0) +#define FWC_DEBUG_UNLOCK(d) do { } while(0) +#define FWC_DONT_HAVE_LOCK(d) do { } while(0) +#define FWC_HAVE_LOCK(d) do { } while(0) +#endif /*DEBUG_IP_FIRWALL_LOCKING*/ + +#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0) +#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0) +#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0) +#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0) +#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0) +#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0) +#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0) +#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0) + +struct ip_chain; + +struct ip_counters +{ + __u64 pcnt, bcnt; /* Packet and byte counters */ +}; + +struct ip_fwkernel +{ + struct ip_fw ipfw; + struct ip_fwkernel *next; /* where to go next if current + * rule doesn't match */ + struct ip_chain *branch; /* which branch to jump to if + * current rule matches */ + int simplebranch; /* Use this if branch == NULL */ + struct ip_counters counters[0]; /* Actually several of these */ +}; + +struct ip_reent +{ + struct ip_chain *prevchain; /* Pointer to referencing chain */ + struct ip_fwkernel *prevrule; /* Pointer to referencing rule */ + struct ip_counters counters; +}; + +struct ip_chain +{ + ip_chainlabel label; /* Defines the label for each block */ + struct ip_chain *next; /* Pointer to next block */ + struct ip_fwkernel *chain; /* Pointer to first rule in block */ + __u32 refcount; /* Number of refernces to block */ + int policy; /* Default rule for chain. Only * + * used in built in chains */ + struct ip_reent reent[0]; /* Actually several of these */ +}; + /* * Implement IP packet firewall */ #ifdef DEBUG_IP_FIREWALL -#define dprintf1(a) printk(a) -#define dprintf2(a1,a2) printk(a1,a2) -#define dprintf3(a1,a2,a3) printk(a1,a2,a3) -#define dprintf4(a1,a2,a3,a4) printk(a1,a2,a3,a4) +#define dprintf(format, args...) printk(format , ## args) #else -#define dprintf1(a) -#define dprintf2(a1,a2) -#define dprintf3(a1,a2,a3) -#define dprintf4(a1,a2,a3,a4) +#define dprintf(format, args...) #endif -#define print_ip(a) printk("%ld.%ld.%ld.%ld",(ntohl(a)>>24)&0xFF,\ - (ntohl(a)>>16)&0xFF,\ - (ntohl(a)>>8)&0xFF,\ - (ntohl(a))&0xFF); - -#ifdef DEBUG_IP_FIREWALL -#define dprint_ip(a) print_ip(a) +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) #else -#define dprint_ip(a) +#define duprintf(format, args...) #endif -#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) - -struct ip_fw *ip_fw_fwd_chain; -struct ip_fw *ip_fw_in_chain; -struct ip_fw *ip_fw_out_chain; -struct ip_fw *ip_acct_chain; -struct ip_fw *ip_masq_chain; +/* Lock around ip_fw_chains linked list structure */ +spinlock_t ip_fw_lock = SPIN_LOCK_UNLOCKED; -static struct ip_fw **chains[] = - {&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain, - &ip_masq_chain - }; -#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ - -#ifdef CONFIG_IP_FIREWALL -int ip_fw_fwd_policy=IP_FW_F_ACCEPT; -int ip_fw_in_policy=IP_FW_F_ACCEPT; -int ip_fw_out_policy=IP_FW_F_ACCEPT; +/* Head of linked list of fw rules */ +static struct ip_chain *ip_fw_chains; -static int *policies[] = - {&ip_fw_fwd_policy, &ip_fw_in_policy, &ip_fw_out_policy}; +#define IP_FW_INPUT_CHAIN ip_fw_chains +#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next) +#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next) -#endif +/* Returns 1 if the port is matched by the range, 0 otherwise */ +extern inline int port_match(__u16 min, __u16 max, __u16 port, + int frag, int invert) +{ + if (frag) /* Fragments fail ANY port test. */ + return (min == 0 && max == 0xFFFF); + else return (port >= min && port <= max) ^ invert; +} -#ifdef CONFIG_IP_FIREWALL_NETLINK -struct sock *ipfwsk; -#endif +/* Returns whether matches rule or not. */ +static int ip_rule_match(struct ip_fwkernel *f, + const char *ifname, + struct iphdr *ip, + char tcpsyn, + __u16 src_port, __u16 dst_port, + char isfrag) +{ +#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg)) + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ -/* - * Returns 1 if the port is matched by the vector, 0 otherwise - */ + if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr, + IP_FW_INV_SRCIP) + || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr, + IP_FW_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, + f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, + f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : ""); + return 0; + } -extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag) -{ - if (!nports) - return 1; - if ( range_flag ) - { - if ( portptr[0] <= port && port <= portptr[1] ) - { - return( 1 ); - } - nports -= 2; - portptr += 2; + /* + * Look for a VIA device match + */ + if (f->ipfw.fw_flg & IP_FW_F_WILDIF) { + if (FWINV(strncmp(ifname, f->ipfw.fw_vianame, + strlen(f->ipfw.fw_vianame)) != 0, + IP_FW_INV_VIA)) { + dprintf("Wildcard interface mismatch.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : ""); + return 0; /* Mismatch */ + } } - while ( nports-- > 0 ) - { - if ( *portptr++ == port ) - { - return( 1 ); - } + else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0, + IP_FW_INV_VIA)) { + dprintf("Interface name does not match.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA + ? " (INV)" : ""); + return 0; /* Mismatch */ } - return(0); -} -#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + /* + * Ok the chain addresses match. + */ + + /* If we have a fragment rule but the packet is not a fragment + * the we return zero */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : ""); + return 0; + } -#ifdef CONFIG_IP_FIREWALL_VERBOSE + /* Fragment NEVER passes a SYN test, even an inverted one. */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN) + || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) { + dprintf("Rule requires SYN and packet has no SYN.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : ""); + return 0; + } -/* - * VERY ugly piece of code which actually makes kernel printf for - * matching packets. - */ + if (f->ipfw.fw_proto) { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ -static char *chain_name(struct ip_fw *chain, int mode) -{ - switch (mode) { - case IP_FW_MODE_ACCT_IN: return "acct in"; - case IP_FW_MODE_ACCT_OUT: return "acct out"; - default: - if (chain == ip_fw_fwd_chain) - return "fw-fwd"; - else if (chain == ip_fw_in_chain) - return "fw-in"; - else - return "fw-out"; + if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, f->ipfw.fw_proto, + f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":""); + return 0; + } + + /* For non TCP/UDP/ICMP, port range is max anyway. */ + if (!port_match(f->ipfw.fw_spts[0], + f->ipfw.fw_spts[1], + src_port, isfrag, + !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT)) + || !port_match(f->ipfw.fw_dpts[0], + f->ipfw.fw_dpts[1], + dst_port, isfrag, + !!(f->ipfw.fw_invflg + &IP_FW_INV_DSTPT))) { + dprintf("Port match failed.\n"); + return 0; + } } + + dprintf("Match succeeded.\n"); + return 1; } -static char *rule_name(struct ip_fw *f, int mode, char *buf) +static const char *branchname(struct ip_chain *branch,int simplebranch) { - if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT) - return ""; - - if(f->fw_flg&IP_FW_F_ACCEPT) { - if(f->fw_flg&IP_FW_F_REDIR) { - sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]); - return buf; - } else if(f->fw_flg&IP_FW_F_MASQ) - return "acc/masq "; - else - return "acc "; - } else if(f->fw_flg&IP_FW_F_ICMPRPL) { - return "rej "; - } else { - return "deny "; + if (branch) + return branch->label; + switch (simplebranch) + { + case FW_BLOCK: return IP_FW_LABEL_BLOCK; + case FW_ACCEPT: return IP_FW_LABEL_ACCEPT; + case FW_REJECT: return IP_FW_LABEL_REJECT; + case FW_REDIRECT: return IP_FW_LABEL_REDIRECT; + case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE; + case FW_SKIP: return "-"; + case FW_SKIP+1: return IP_FW_LABEL_RETURN; + default: + return "UNKNOWN"; } } -static void print_packet(struct iphdr *ip, - u16 src_port, u16 dst_port, u16 icmp_type, - char *chain, char *rule, char *devname) +/* + * VERY ugly piece of code which actually + * makes kernel printf for matching packets... + */ +static void dump_packet(const struct iphdr *ip, + const char *ifname, + struct ip_fwkernel *f, + const ip_chainlabel chainlabel, + __u16 src_port, + __u16 dst_port) { __u32 *opt = (__u32 *) (ip + 1); int opti; - __u16 foff = ntohs(ip->frag_off); - - printk(KERN_INFO "IP %s %s%s", chain, rule, devname); - - switch(ip->protocol) + + if (f) { - case IPPROTO_TCP: - printk(" TCP "); - break; - case IPPROTO_UDP: - printk(" UDP "); - break; - case IPPROTO_ICMP: - printk(" ICMP/%d ", icmp_type); - break; - default: - printk(" PROTO=%d ", ip->protocol); - break; + printk(KERN_INFO "Packet log: %s ",chainlabel); + + printk("%s ",branchname(f->branch,f->simplebranch)); + if (f->simplebranch==FW_REDIRECT) + printk("%d ",f->ipfw.fw_redirpt); } - print_ip(ip->saddr); - if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) - printk(":%hu", src_port); - printk(" "); - print_ip(ip->daddr); - if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) - printk(":%hu", dst_port); - printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu", + + printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu" + " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", + ifname, ip->protocol, + (ntohl(ip->saddr)>>24)&0xFF, + (ntohl(ip->saddr)>>16)&0xFF, + (ntohl(ip->saddr)>>8)&0xFF, + (ntohl(ip->saddr))&0xFF, + src_port, + (ntohl(ip->daddr)>>24)&0xFF, + (ntohl(ip->daddr)>>16)&0xFF, + (ntohl(ip->daddr)>>8)&0xFF, + (ntohl(ip->daddr))&0xFF, + dst_port, ntohs(ip->tot_len), ip->tos, ntohs(ip->id), - foff & IP_OFFSET, ip->ttl); - if (foff & IP_DF) printk(" DF=1"); - if (foff & IP_MF) printk(" MF=1"); + ntohs(ip->frag_off), ip->ttl); + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) printk(" O=0x%8.8X", *opt++); - printk("\n"); + printk("\n"); } + +/* function for checking chain labels for user space. Makes sure that + * there are no special characters in the string */ +static int check_label(ip_chainlabel label) +{ + unsigned int i; + + for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1 && label[i]; i++) + if (label[i] <= ' ') + return 0; + if (i == IP_FW_MAX_LABEL_LENGTH+1) + return 0; + return 1; +} + +/* This function returns a pointer to the first chain with a label + * that matches the one given. */ +static struct ip_chain *find_label(ip_chainlabel label) +{ + struct ip_chain *tmp; + FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks); + for (tmp = ip_fw_chains; tmp; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + break; + return tmp; +} + +/* This function returns a boolean which when true sets answer to one + of the FW_*. */ +static int find_special(ip_chainlabel label, int *answer) +{ + if (label[0] == '\0') { + *answer = FW_SKIP; /* => pass-through rule */ + return 1; + } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) { + *answer = FW_ACCEPT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) { + *answer = FW_BLOCK; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) { + *answer = FW_REJECT; + return 1; +#ifdef CONFIG_IP_TRANSPARENT_PROXY + } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) { + *answer = FW_REDIRECT; + return 1; +#endif +#ifdef CONFIG_IP_MASQUERADE + } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) { + *answer = FW_MASQUERADE; + return 1; #endif + } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) { + *answer = FW_SKIP+1; + return 1; + } else { + return 0; + } +} + +/* This function cleans up the prevchain and prevrule. If the verbose + * flag is set then he names of the chains will be printed as it + * cleans up. */ +static void cleanup(struct ip_chain *chain, + const int verbose, + unsigned int slot) +{ + struct ip_chain *tmpchain = chain->reent[slot].prevchain; + if (verbose) + printk(KERN_ERR "Chain backtrace: "); + while (tmpchain) { + if (verbose) + printk("%s<-",chain->label); + chain->reent[slot].prevchain = NULL; + chain = tmpchain; + tmpchain = chain->reent[slot].prevchain; + } + if (verbose) + printk("%s\n",chain->label); +} + +static inline void +ip_fw_domatch(struct ip_fwkernel *f, + struct iphdr *ip, + const char *rif, + const ip_chainlabel label, + struct sk_buff *skb, + unsigned int slot, + __u16 src_port, __u16 dst_port) +{ + f->counters[slot].bcnt+=ntohs(ip->tot_len); + f->counters[slot].pcnt++; + if (f->ipfw.fw_flg & IP_FW_F_PRN) { + dump_packet(ip,rif,f,label,src_port,dst_port); + } + ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor; + +/* This functionality is useless in stock 2.0.x series, but we don't + * discard the mark thing altogether, to avoid breaking ipchains (and, + * more importantly, the ipfwadm wrapper) --PR */ + if (f->ipfw.fw_flg & IP_FW_F_MARKABS) + skb->fwmark = f->ipfw.fw_mark; + else + skb->fwmark+=f->ipfw.fw_mark; +#ifdef CONFIG_IP_FIREWALL_NETLINK + if (f->ipfw.fw_flg & IP_FW_F_NETLINK) { + size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) + + sizeof(skb->fwmark) + IFNAMSIZ; + struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC); + + duprintf("Sending packet out NETLINK (length = %u).\n", + (unsigned int)len); + if (outskb) { + /* Prepend mark & interface */ + skb_put(outskb, len); + *((__u32 *)outskb->data) = skb->fwmark; + strcpy(outskb->data+sizeof(__u32), rif); + memcpy(outskb->data+sizeof(__u32)+IFNAMSIZ, ip, + len-(sizeof(__u32)+IFNAMSIZ)); + netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL); + } + else duprintf("netlink post failed - alloc_skb failed!\n"); + } +#endif +} /* * Returns one of the generic firewall policies, like FW_ACCEPT. - * Also does accounting so you can feed it the accounting chain. * - * The modes is either IP_FW_MODE_FW (normal firewall mode), - * IP_FW_MODE_ACCT_IN or IP_FW_MODE_ACCT_OUT (accounting mode, - * steps through the entire chain and handles fragments - * differently), or IP_FW_MODE_CHK (handles user-level check, - * counters are not updated). + * The testing is either false for normal firewall mode or true for + * user checking mode (counters are not updated, TOS & mark not done). */ - - -int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_fw *chain, int policy, int mode) +static int +ip_fw_check(struct iphdr *ip, + const char *rif, + __u16 *redirport, + struct ip_chain *chain, + struct sk_buff *skb, + unsigned int slot, + int testing) { - struct ip_fw *f; struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); __u32 src, dst; - __u16 src_port=0xFFFF, dst_port=0xFFFF, icmp_type=0xFF; - unsigned short f_prt=0, prt; - char notcpsyn=0, notcpack=0, match; - unsigned short offset; - int answer; - unsigned char tosand, tosxor; - - /* - * If the chain is empty follow policy. The BSD one - * accepts anything giving you a time window while - * flushing and rebuilding the tables. - */ - - src = ip->saddr; - dst = ip->daddr; - - /* - * This way we handle fragmented packets. - * we ignore all fragments but the first one - * so the whole packet can't be reassembled. - * This way we relay on the full info which - * stored only in first packet. - * - * Note that this theoretically allows partial packet - * spoofing. Not very dangerous but paranoid people may - * wish to play with this. It also allows the so called - * "fragment bomb" denial of service attack on some types - * of system. - */ + __u16 src_port = 0xFFFF, dst_port = 0xFFFF; + char tcpsyn=0; + __u16 offset; + unsigned char oldtos; + struct ip_fwkernel *f; + int ret = FW_SKIP+2; + + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ offset = ntohs(ip->frag_off) & IP_OFFSET; @@ -346,933 +595,1085 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_ * checks. */ - if (offset == 1 && ip->protocol == IPPROTO_TCP) - return FW_BLOCK; - - if (offset!=0 && !(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT)) && - (ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP || - ip->protocol == IPPROTO_ICMP)) - return FW_ACCEPT; - - /* - * Header fragment for TCP is too small to check the bits. - */ - - if(ip->protocol==IPPROTO_TCP && (ip->ihl<<2)+16 > ntohs(ip->tot_len)) + if (offset == 1 && ip->protocol == IPPROTO_TCP) { + if (!testing && net_ratelimit()) { + printk("Suspect TCP fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0); + } return FW_BLOCK; - - /* - * Too short. - * - * But only too short for a packet with ports... + } + + /* If we can't investigate ports, treat as fragment. It's + * either a trucated whole packet, or a truncated first + * fragment, or a TCP first fragment of length 8-15, in which + * case the above rule stops reassembly. */ - - else if((ntohs(ip->tot_len)<8+(ip->ihl<<2))&&(ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP)) - return FW_BLOCK; - + if (offset == 0) { + unsigned int size_req; + switch (ip->protocol) { + case IPPROTO_TCP: + /* Don't care about things past flags word */ + size_req = 16; + break; + + case IPPROTO_UDP: + case IPPROTO_ICMP: + size_req = 8; + break; + + default: + size_req = 0; + } + offset = (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req); + } + src = ip->saddr; dst = ip->daddr; - + oldtos = ip->tos; + /* * If we got interface from which packet came - * we can use the address directly. This is unlike - * 4.4BSD derived systems that have an address chain - * per device. We have a device per address with dummy - * devices instead. + * we can use the address directly. Linux 2.1 now uses address + * chains per device too, but unlike BSD we first check if the + * incoming packet matches a device address and the routing + * table before calling the firewall. */ - dprintf1("Packet "); + dprintf("Packet "); switch(ip->protocol) { case IPPROTO_TCP: - dprintf1("TCP "); - /* ports stay 0xFFFF if it is not the first fragment */ + dprintf("TCP "); if (!offset) { src_port=ntohs(tcp->source); dst_port=ntohs(tcp->dest); - if(!tcp->ack && !tcp->rst) - /* We do NOT have ACK, value TRUE */ - notcpack=1; - if(!tcp->syn || !notcpack) - /* We do NOT have SYN, value TRUE */ - notcpsyn=1; + + /* Connection initilisation can only + * be made when the syn bit is set and + * neither of the ack or reset is + * set. */ + if(tcp->syn && !(tcp->ack || tcp->rst)) + tcpsyn=1; } - prt=IP_FW_F_TCP; break; case IPPROTO_UDP: - dprintf1("UDP "); - /* ports stay 0xFFFF if it is not the first fragment */ + dprintf("UDP "); if (!offset) { src_port=ntohs(udp->source); dst_port=ntohs(udp->dest); } - prt=IP_FW_F_UDP; break; case IPPROTO_ICMP: - /* icmp_type stays 255 if it is not the first fragment */ - if (!offset) - icmp_type=(__u16)(icmp->type); - dprintf2("ICMP:%d ",icmp_type); - prt=IP_FW_F_ICMP; + if (!offset) { + src_port=(__u16)icmp->type; + dst_port=(__u16)icmp->code; + } + dprintf("ICMP "); break; default: - dprintf2("p=%d ",ip->protocol); - prt=IP_FW_F_ALL; + dprintf("p=%d ",ip->protocol); break; } #ifdef DEBUG_IP_FIREWALL - dprint_ip(ip->saddr); + print_ip(ip->saddr); - if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) - /* This will print 65535 when it is not the first fragment! */ - dprintf2(":%d ", src_port); - dprint_ip(ip->daddr); - if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) - /* This will print 65535 when it is not the first fragment! */ - dprintf2(":%d ",dst_port); - dprintf1("\n"); -#endif - - for (f=chain;f;f=f->fw_next) - { - /* - * This is a bit simpler as we don't have to walk - * an interface chain as you do in BSD - same logic - * however. - */ - - /* - * Match can become 0x01 (a "normal" match was found), - * 0x02 (a reverse match was found), and 0x03 (the - * IP addresses match in both directions). - * Now we know in which direction(s) we should look - * for a match for the TCP/UDP ports. Both directions - * might match (e.g., when both addresses are on the - * same network for which an address/mask is given), but - * the ports might only match in one direction. - * This was obviously wrong in the original BSD code. - */ - match = 0x00; - - if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr - && (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) - /* normal direction */ - match |= 0x01; - - if ((f->fw_flg & IP_FW_F_BIDIR) && - (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr - && (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) - /* reverse direction */ - match |= 0x02; - - if (!match) - continue; + if (offset) + dprintf(":fragment (%i) ", ((int)offset)<<2); + else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP + || ip->protocol==IPPROTO_ICMP) + dprintf(":%hu:%hu", src_port, dst_port); + dprintf("\n"); +#endif - /* - * Look for a VIA device match - */ - if(f->fw_viadev) - { - if(rif!=f->fw_viadev) - continue; /* Mismatch */ + if (!testing) FWC_READ_LOCK(&ip_fw_lock); + else FWC_HAVE_LOCK(fwc_rlocks); + + f = chain->chain; + do { + for (; f; f = f->next) { + if (ip_rule_match(f,rif,ip, + tcpsyn,src_port,dst_port,offset)) { + if (!testing) + ip_fw_domatch(f, ip, rif, chain->label, skb, + slot, src_port,dst_port); + break; + } } - - /* This looks stupid, because we scan almost static - list, searching for static key. However, this way seems - to be only reasonable way of handling fw_via rules - (btw bsd makes the same thing). - - It will not affect performance if you will follow - the following simple rules: - - - if inteface is aliased, ALWAYS specify fw_viadev, - so that previous check will guarantee, that we will - not waste time when packet arrive on another interface. - - - avoid using fw_via.s_addr if fw_via.s_addr is owned - by an aliased interface. - - --ANK - */ - if (f->fw_via.s_addr && rif) { - struct in_ifaddr *ifa; - - if (rif->ip_ptr == NULL) - continue; /* Mismatch */ - - for (ifa = ((struct in_device*)(rif->ip_ptr))->ifa_list; - ifa; ifa = ifa->ifa_next) { - if (ifa->ifa_local == f->fw_via.s_addr) - goto ifa_ok; + if (f) { + if (f->branch) { + /* Do sanity check to see if we have + * already set prevchain and if so we + * must be in a loop */ + if (f->branch->reent[slot].prevchain) { + if (!testing) { + printk(KERN_ERR + "IP firewall: " + "Loop detected " + "at `%s'.\n", + f->branch->label); + cleanup(chain, 1, slot); + ret = FW_BLOCK; + } else { + cleanup(chain, 0, slot); + ret = FW_SKIP+1; + } + } + else { + f->branch->reent[slot].prevchain + = chain; + f->branch->reent[slot].prevrule + = f->next; + chain = f->branch; + f = chain->chain; + } + } + else if (f->simplebranch == FW_SKIP) + f = f->next; + else if (f->simplebranch == FW_SKIP+1) { + /* Just like falling off the chain */ + goto fall_off_chain; + } + else { + cleanup(chain, 0, slot); + ret = f->simplebranch; + } + } /* f == NULL */ + else { + fall_off_chain: + if (chain->reent[slot].prevchain) { + struct ip_chain *tmp = chain; + f = chain->reent[slot].prevrule; + chain = chain->reent[slot].prevchain; + tmp->reent[slot].prevchain = NULL; + } + else { + ret = chain->policy; + if (!testing) { + chain->reent[slot].counters.pcnt++; + chain->reent[slot].counters.bcnt + += ntohs(ip->tot_len); + } } - continue; /* Mismatch */ - - ifa_ok: } + } while (ret == FW_SKIP+2); - /* - * Ok the chain addresses match. - */ + if (!testing) FWC_READ_UNLOCK(&ip_fw_lock); -#ifdef CONFIG_IP_ACCT - /* - * See if we're in accounting mode and only want to - * count incoming or outgoing packets. - */ + /* Recalculate checksum if not going to reject, and TOS changed. */ + if (ip->tos != oldtos + && ret != FW_REJECT && ret != FW_BLOCK + && !testing) + ip_send_check(ip); - if (mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT) && - ((mode == IP_FW_MODE_ACCT_IN && f->fw_flg&IP_FW_F_ACCTOUT) || - (mode == IP_FW_MODE_ACCT_OUT && f->fw_flg&IP_FW_F_ACCTIN))) - continue; +#ifdef CONFIG_IP_TRANSPARENT_PROXY + if (ret == FW_REDIRECT && redirport) { + if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + } +#endif +#ifdef DEBUG_ALLOW_ALL + return (testing ? ret : FW_ACCEPT); +#else + return ret; #endif - /* - * For all non-TCP packets and/or non-first fragments, - * notcpsyn and notcpack will always be FALSE, - * so the IP_FW_F_TCPSYN and IP_FW_F_TCPACK flags - * are actually ignored for these packets. - */ - - if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn) - continue; +} - if((f->fw_flg&IP_FW_F_TCPACK) && notcpack) - continue; +/* Must have write lock & interrupts off for any of these */ - f_prt=f->fw_flg&IP_FW_F_KIND; - if (f_prt!=IP_FW_F_ALL) - { - /* - * Specific firewall - packet's protocol - * must match firewall's. - */ +/* This function sets all the byte counters in a chain to zero. The + * input is a pointer to the chain required for zeroing */ +static int zero_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i; - if(prt!=f_prt) - continue; - - if((prt==IP_FW_F_ICMP && - ! port_match(&f->fw_pts[0], f->fw_nsp, - icmp_type,f->fw_flg&IP_FW_F_SRNG)) || - !(prt==IP_FW_F_ICMP || ((match & 0x01) && - port_match(&f->fw_pts[0], f->fw_nsp, src_port, - f->fw_flg&IP_FW_F_SRNG) && - port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port, - f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) && - port_match(&f->fw_pts[0], f->fw_nsp, dst_port, - f->fw_flg&IP_FW_F_SRNG) && - port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port, - f->fw_flg&IP_FW_F_DRNG)))) - { - continue; - } - } + FWC_HAVE_LOCK(fwc_wlocks); + for (i = chainptr->chain; i; i = i->next) + memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + return 0; +} -#ifdef CONFIG_IP_FIREWALL_VERBOSE - if (f->fw_flg & IP_FW_F_PRN) - { - char buf[16]; +static int clear_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i= chainptr->chain; - print_packet(ip, src_port, dst_port, icmp_type, - chain_name(chain, mode), - rule_name(f, mode, buf), - rif ? rif->name : "-"); - } -#endif - if (mode != IP_FW_MODE_CHK) { - f->fw_bcnt+=ntohs(ip->tot_len); - f->fw_pcnt++; - } - if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) - break; - } /* Loop */ + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->chain=NULL; + + while (i) { + struct ip_fwkernel *tmp = i->next; + if (i->branch) + i->branch->refcount--; + kfree(i); + i = tmp; + } + return 0; +} + +static int replace_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; - if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) { + FWC_HAVE_LOCK(fwc_wlocks); - /* - * We rely on policy defined in the rejecting entry or, if no match - * was found, we rely on the general policy variable for this type - * of firewall. - */ + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + + if (f->branch) f->branch->refcount--; + if (frwl->branch) frwl->branch->refcount++; - if (f!=NULL) { - policy=f->fw_flg; - tosand=f->fw_tosand; - tosxor=f->fw_tosxor; - } else { - tosand=0xFF; - tosxor=0x00; - } + frwl->next = f->next; + memcpy(f,frwl,sizeof(struct ip_fwkernel)); + kfree(frwl); + return 0; +} - if (policy&IP_FW_F_ACCEPT) { - /* Adjust priority and recompute checksum */ - __u8 old_tos = ip->tos; - ip->tos = (old_tos & tosand) ^ tosxor; - if (ip->tos != old_tos) - ip_send_check(ip); -#ifdef CONFIG_IP_TRANSPARENT_PROXY - if (policy&IP_FW_F_REDIR) { - if (redirport) - if ((*redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp])) == 0) { - /* Wildcard redirection. - * Note that redirport will become - * 0xFFFF for non-TCP/UDP packets. - */ - *redirport = htons(dst_port); - } - answer = FW_REDIRECT; - } else -#endif -#ifdef CONFIG_IP_MASQUERADE - if (policy&IP_FW_F_MASQ) - answer = FW_MASQUERADE; - else -#endif - answer = FW_ACCEPT; - - } else if(policy&IP_FW_F_ICMPRPL) - answer = FW_REJECT; - else - answer = FW_BLOCK; +static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule) +{ + struct ip_fwkernel *i; -#ifdef CONFIG_IP_FIREWALL_NETLINK - if((policy&IP_FW_F_PRN) && (answer == FW_REJECT || answer == FW_BLOCK)) - { - struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC); - if(skb) - { - int len=min(128,ntohs(ip->tot_len)); - skb_put(skb,len); - memcpy(skb->data,ip,len); - if(netlink_post(NETLINK_FIREWALL, skb)) - kfree_skb(skb); - } - } -#endif - return answer; - } else - /* we're doing accounting, always ok */ + FWC_HAVE_LOCK(fwc_wlocks); + /* Special case if no rules already present */ + if (chainptr->chain == NULL) { + + /* If pointer writes are atomic then turning off + * interupts is not necessary. */ + chainptr->chain = rule; + if (rule->branch) rule->branch->refcount++; return 0; -} + } + /* Find the rule before the end of the chain */ + for (i = chainptr->chain; i->next; i = i->next); + i->next = rule; + if (rule->branch) rule->branch->refcount++; + return 0; +} -static void zero_fw_chain(struct ip_fw *chainptr) +/* This function inserts a rule at the position of position in the + * chain refenced by chainptr. If position is 1 then this rule will + * become the new rule one. */ +static int insert_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) { - struct ip_fw *ctmp=chainptr; - while(ctmp) - { - ctmp->fw_pcnt=0L; - ctmp->fw_bcnt=0L; - ctmp=ctmp->fw_next; + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + /* special case if the position is number 1 */ + if (position == 1) { + frwl->next = chainptr->chain; + if (frwl->branch) frwl->branch->refcount++; + chainptr->chain = frwl; + return 0; } + position--; + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + if (frwl->branch) frwl->branch->refcount++; + frwl->next = f->next; + + f->next = frwl; + return 0; } -static void free_fw_chain(struct ip_fw *volatile* chainptr) +/* This function deletes the a rule from a given rulenum and chain. + * With rulenum = 1 is the first rule is deleted. */ + +static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum) { - unsigned long flags; - save_flags(flags); - cli(); - while ( *chainptr != NULL ) - { - struct ip_fw *ftmp; - ftmp = *chainptr; - *chainptr = ftmp->fw_next; - kfree_s(ftmp,sizeof(*ftmp)); + struct ip_fwkernel *i=chainptr->chain,*tmp; + + FWC_HAVE_LOCK(fwc_wlocks); + + if (!chainptr->chain) + return ENOENT; + + /* Need a special case for the first rule */ + if (rulenum == 1) { + /* store temp to allow for freeing up of memory */ + tmp = chainptr->chain; + if (chainptr->chain->branch) chainptr->chain->branch->refcount--; + chainptr->chain = chainptr->chain->next; + kfree(tmp); /* free memory that is now unused */ + } else { + rulenum--; + while (--rulenum && i->next ) i = i->next; + if (!i->next) + return ENOENT; + tmp = i->next; + if (i->next->branch) + i->next->branch->refcount--; + i->next = i->next->next; + kfree(tmp); } - restore_flags(flags); + return 0; } -/* Volatiles to keep some of the compiler versions amused */ -static int insert_in_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +/* This function deletes the a rule from a given rule and chain. + * The rule that is deleted is the first occursance of that rule. */ +static int del_rule_from_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl) { - struct ip_fw *ftmp; - unsigned long flags; - - save_flags(flags); + struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ; + int was_found; - ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); - if ( ftmp == NULL ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: malloc said no\n"); + FWC_HAVE_LOCK(fwc_wlocks); + + /* Sure, we should compare marks, but since the `ipfwadm' + * script uses it for an unholy hack... well, life is easier + * this way. We also mask it out of the flags word. --PR */ + for (ltmp=NULL, was_found=0; + !was_found && ftmp != NULL; + ltmp = ftmp,ftmp = ftmp->next) { + if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr + || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr + || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr + || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr +#if 0 + || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg +#else + || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) + != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS)) #endif - return( ENOMEM ); - } + || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg + || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto +#if 0 + || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark +#endif + || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt + || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0] + || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1] + || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0] + || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1] + || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) { + duprintf("del_rule_from_chain: mismatch:" + "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u " + "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u " + "mark:%u/%u " + "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu " + "outputsize:%hu-%hu\n", + ftmp->ipfw.fw_src.s_addr, + frwl->ipfw.fw_src.s_addr, + ftmp->ipfw.fw_dst.s_addr, + frwl->ipfw.fw_dst.s_addr, + ftmp->ipfw.fw_smsk.s_addr, + frwl->ipfw.fw_smsk.s_addr, + ftmp->ipfw.fw_dmsk.s_addr, + frwl->ipfw.fw_dmsk.s_addr, + ftmp->ipfw.fw_flg, + frwl->ipfw.fw_flg, + ftmp->ipfw.fw_invflg, + frwl->ipfw.fw_invflg, + ftmp->ipfw.fw_proto, + frwl->ipfw.fw_proto, + ftmp->ipfw.fw_mark, + frwl->ipfw.fw_mark, + ftmp->ipfw.fw_spts[0], + frwl->ipfw.fw_spts[0], + ftmp->ipfw.fw_spts[1], + frwl->ipfw.fw_spts[1], + ftmp->ipfw.fw_dpts[0], + frwl->ipfw.fw_dpts[0], + ftmp->ipfw.fw_dpts[1], + frwl->ipfw.fw_dpts[1], + ftmp->ipfw.fw_outputsize, + frwl->ipfw.fw_outputsize); + continue; + } - memcpy(ftmp, frwl, len); - /* - * Allow the more recent "minimise cost" flag to be - * set. [Rob van Nieuwkerk] - */ - ftmp->fw_tosand |= 0x01; - ftmp->fw_tosxor &= 0xFE; - ftmp->fw_pcnt=0L; - ftmp->fw_bcnt=0L; - - cli(); - - if ((ftmp->fw_vianame)[0]) { - if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame))) - ftmp->fw_viadev = (struct device *) -1; - } else - ftmp->fw_viadev = NULL; - - ftmp->fw_next = *chainptr; - *chainptr=ftmp; - restore_flags(flags); - return(0); + if (strncmp(ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame, + IFNAMSIZ)) { + duprintf("del_rule_from_chain: if mismatch: %s/%s\n", + ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame); + continue; + } + if (ftmp->branch != frwl->branch) { + duprintf("del_rule_from_chain: branch mismatch: " + "%s/%s\n", + ftmp->branch?ftmp->branch->label:"(null)", + frwl->branch?frwl->branch->label:"(null)"); + continue; + } + if (ftmp->branch == NULL + && ftmp->simplebranch != frwl->simplebranch) { + duprintf("del_rule_from_chain: simplebranch mismatch: " + "%i/%i\n", + ftmp->simplebranch, frwl->simplebranch); + continue; + } + was_found = 1; + if (ftmp->branch) + ftmp->branch->refcount--; + if (ltmp) + ltmp->next = ftmp->next; + else + chainptr->chain = ftmp->next; + kfree(ftmp); + break; + } + + if (was_found) + return 0; + else { + duprintf("del_rule_from_chain: no matching rule found\n"); + return EINVAL; + } } -static int append_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +/* This function takes the label of a chain and deletes the first + * chain with that name. No special cases required for the built in + * chains as they have their refcount initilised to 1 so that they are + * never deleted. */ +static int del_chain(ip_chainlabel label) { - struct ip_fw *ftmp; - struct ip_fw *chtmp=NULL; - struct ip_fw *volatile chtmp_prev=NULL; - unsigned long flags; - - save_flags(flags); + struct ip_chain *tmp,*tmp2; - ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); - if ( ftmp == NULL ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: malloc said no\n"); -#endif - return( ENOMEM ); - } + FWC_HAVE_LOCK(fwc_wlocks); + /* Corner case: return EBUSY not ENOENT for first elem ("input") */ + if (strcmp(label, ip_fw_chains->label) == 0) + return EBUSY; - memcpy(ftmp, frwl, len); - /* - * Allow the more recent "minimise cost" flag to be - * set. [Rob van Nieuwkerk] - */ - ftmp->fw_tosand |= 0x01; - ftmp->fw_tosxor &= 0xFE; - ftmp->fw_pcnt=0L; - ftmp->fw_bcnt=0L; - - ftmp->fw_next = NULL; + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if(strcmp(tmp->next->label,label) == 0) + break; - cli(); + tmp2 = tmp->next; + if (!tmp2) + return ENOENT; - if ((ftmp->fw_vianame)[0]) { - if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame))) - ftmp->fw_viadev = (struct device *) -1; - } else - ftmp->fw_viadev = NULL; + if (tmp2->refcount) + return EBUSY; - chtmp_prev=NULL; - for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) - chtmp_prev=chtmp; + if (tmp2->chain) + return ENOTEMPTY; - if (chtmp_prev) - chtmp_prev->fw_next=ftmp; - else - *chainptr=ftmp; - restore_flags(flags); - return(0); + tmp->next = tmp2->next; + kfree(tmp2); + return 0; } -static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl) +/* This is a function to initilise a chain. Built in rules start with + * refcount = 1 so that they cannot be deleted. User defined rules + * start with refcount = 0 so they can be deleted. */ +static struct ip_chain *ip_init_chain(ip_chainlabel name, + __u32 ref, + int policy) { - struct ip_fw *ftmp,*ltmp; - unsigned short tport1,tport2,tmpnum; - char matches,was_found; - unsigned long flags; - - save_flags(flags); - cli(); - - ftmp=*chainptr; - - if ( ftmp == NULL ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: chain is empty\n"); -#endif - restore_flags(flags); - return( EINVAL ); + unsigned int i; + struct ip_chain *label + = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL); + if (label == NULL) + panic("Can't kmalloc for firewall chains.\n"); + strcpy(label->label,name); + label->next = NULL; + label->chain = NULL; + label->refcount = ref; + label->policy = policy; + for (i = 0; i < smp_num_cpus*2; i++) { + label->reent[i].counters.pcnt = label->reent[i].counters.bcnt + = 0; + label->reent[i].prevchain = NULL; + label->reent[i].prevrule = NULL; } - ltmp=NULL; - was_found=0; + return label; +} - while( !was_found && ftmp != NULL ) - { - matches=1; - if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr - || ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr - || ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr - || ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr - || ftmp->fw_via.s_addr!=frwl->fw_via.s_addr - || ftmp->fw_flg!=frwl->fw_flg) - matches=0; - - tport1=ftmp->fw_nsp+ftmp->fw_ndp; - tport2=frwl->fw_nsp+frwl->fw_ndp; - if (tport1!=tport2) - matches=0; - else if (tport1!=0) - { - for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++) - if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum]) - matches=0; - } - if (strncmp(ftmp->fw_vianame, frwl->fw_vianame, IFNAMSIZ)) - matches=0; - if(matches) - { - was_found=1; - if (ltmp) - { - ltmp->fw_next=ftmp->fw_next; - kfree_s(ftmp,sizeof(*ftmp)); - ftmp=ltmp->fw_next; - } - else - { - *chainptr=ftmp->fw_next; - kfree_s(ftmp,sizeof(*ftmp)); - ftmp=*chainptr; - } - } - else - { - ltmp = ftmp; - ftmp = ftmp->fw_next; - } - } - restore_flags(flags); - if (was_found) - return 0; - else - return(EINVAL); +/* This is a function for reating a new chain. The chains is not + * created if a chain of the same name already exists */ +static int create_chain(ip_chainlabel label) +{ + struct ip_chain *tmp; + + FWC_HAVE_LOCK(fwc_wlocks); + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is + * zero since this is a + * user defined chain * + * and therefore can be + * deleted */ + return 0; } -#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ +/* This function simply changes the policy on one of the built in + * chains. checking must be done before this is call to ensure that + * chainptr is pointing to one of the three possible chains */ +static int change_policy(struct ip_chain *chainptr, int policy) +{ + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->policy = policy; + return 0; +} -struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len) +/* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also + * performs some checks in the structure. */ +static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno) { + struct ip_fwkernel *fwkern; - if ( len != sizeof(struct ip_fw) ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: len=%d, want %d\n",len, sizeof(struct ip_fw)); -#endif - return(NULL); + if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) { + duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n", + fwuser->ipfw.fw_flg); + *errno = EINVAL; + return NULL; } - if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n", - frwl->fw_flg); -#endif - return(NULL); +#ifdef DEBUG_IP_FIREWALL_USER + /* These are sanity checks that don't really matter. + * We can get rid of these once testing is complete. + */ + if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || fwuser->ipfw.fw_proto != IPPROTO_TCP)) { + duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n"); + *errno = EINVAL; + return NULL; } -#ifndef CONFIG_IP_TRANSPARENT_PROXY - if (frwl->fw_flg & IP_FW_F_REDIR) { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: unsupported flag IP_FW_F_REDIR\n"); -#endif - return(NULL); + if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0 + && fwuser->ipfw.fw_redirpt != 0) { + duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n"); + *errno = EINVAL; + return NULL; } -#endif -#ifndef CONFIG_IP_MASQUERADE - if (frwl->fw_flg & IP_FW_F_MASQ) { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: unsupported flag IP_FW_F_MASQ\n"); -#endif - return(NULL); + if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)) + || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) { + duprintf("convert_ipfw: Can't have INV flag if flag unset!\n"); + *errno = EINVAL; + return NULL; } -#endif - if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: src range set but fw_nsp=%d\n", - frwl->fw_nsp); -#endif - return(NULL); + if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) + && fwuser->ipfw.fw_spts[0] == 0 + && fwuser->ipfw.fw_spts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) + && fwuser->ipfw.fw_dpts[0] == 0 + && fwuser->ipfw.fw_dpts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) + && (fwuser->ipfw.fw_vianame)[0] == '\0') + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP) + && fwuser->ipfw.fw_smsk.s_addr == 0) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP) + && fwuser->ipfw.fw_dmsk.s_addr == 0)) { + duprintf("convert_ipfw: INV flag makes rule unmatchable!\n"); + *errno = EINVAL; + return NULL; } - if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: dst range set but fw_ndp=%d\n", - frwl->fw_ndp); -#endif - return(NULL); + if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG) + && (fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF + || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) { + duprintf("convert_ipfw: Can't test ports or SYN with frag!\n"); + *errno = EINVAL; + return NULL; } - - if ( frwl->fw_nsp + frwl->fw_ndp > (frwl->fw_flg & IP_FW_F_REDIR ? IP_FW_MAX_PORTS - 1 : IP_FW_MAX_PORTS) ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: too many ports (%d+%d)\n", - frwl->fw_nsp,frwl->fw_ndp); #endif - return(NULL); - } - - return frwl; -} - + if ((fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || (fwuser->ipfw.fw_proto != IPPROTO_TCP + && fwuser->ipfw.fw_proto != IPPROTO_UDP + && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) { + duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n"); + *errno = EINVAL; + return NULL; + } - -#ifdef CONFIG_IP_ACCT - -int ip_acct_ctl(int stage, void *m, int len) -{ - if ( stage == IP_ACCT_FLUSH ) - { - free_fw_chain(&ip_acct_chain); - return(0); - } - if ( stage == IP_ACCT_ZERO ) - { - zero_fw_chain(ip_acct_chain); - return(0); + fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL); + if (!fwkern) { + duprintf("convert_ipfw: kmalloc failed!\n"); + *errno = ENOMEM; + return NULL; } - if ( stage == IP_ACCT_INSERT || stage == IP_ACCT_APPEND || - stage == IP_ACCT_DELETE ) - { - struct ip_fw *frwl; + memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw)); + + if (!find_special(fwuser->label, &fwkern->simplebranch)) { + fwkern->branch = find_label(fwuser->label); + if (!fwkern->branch) { + duprintf("convert_ipfw: chain doesn't exist `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } else if (fwkern->branch == IP_FW_INPUT_CHAIN + || fwkern->branch == IP_FW_FORWARD_CHAIN + || fwkern->branch == IP_FW_OUTPUT_CHAIN) { + duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } + } else + fwkern->branch = NULL; + memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); - if (!(frwl=check_ipfw_struct(m,len))) - return (EINVAL); + /* Handle empty vianame by making it a wildcard */ + if ((fwkern->ipfw.fw_vianame)[0] == '\0') + fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF; - switch (stage) - { - case IP_ACCT_INSERT: - return( insert_in_chain(&ip_acct_chain,frwl,len)); - case IP_ACCT_APPEND: - return( append_to_chain(&ip_acct_chain,frwl,len)); - case IP_ACCT_DELETE: - return( del_from_chain(&ip_acct_chain,frwl)); - default: - /* - * Should be panic but... (Why ??? - AC) - */ -#ifdef DEBUG_IP_FIREWALL - printk("ip_acct_ctl: unknown request %d\n",stage); -#endif - return(EINVAL); - } - } -#ifdef DEBUG_IP_FIREWALL - printk("ip_acct_ctl: unknown request %d\n",stage); -#endif - return(EINVAL); + fwkern->next = NULL; + return fwkern; } -#endif -#ifdef CONFIG_IP_FIREWALL -int ip_fw_ctl(int stage, void *m, int len) +int ip_fw_ctl(int cmd, void *m, int len) { - int cmd, fwtype; - - cmd = stage & IP_FW_COMMAND; - fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; + int ret; + struct ip_chain *chain; + unsigned long flags; - if ( cmd == IP_FW_FLUSH ) - { - free_fw_chain(chains[fwtype]); - return(0); - } + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); - if ( cmd == IP_FW_ZERO ) - { - zero_fw_chain(*chains[fwtype]); - return(0); - } + switch (cmd) { + case IP_FW_FLUSH: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = clear_fw_chain(chain); + break; - if ( cmd == IP_FW_POLICY ) - { - int *tmp_policy_ptr; - tmp_policy_ptr=(int *)m; - *policies[fwtype] = *tmp_policy_ptr; - return 0; - } + case IP_FW_ZERO: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = zero_fw_chain(chain); + break; - if ( cmd == IP_FW_CHECK ) - { - struct device *viadev; - struct ip_fwpkt *ipfwp; + case IP_FW_CHECK: { + struct ip_fwtest *new = m; struct iphdr *ip; - if ( len != sizeof(struct ip_fwpkt) ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: length=%d, expected %d\n", - len, sizeof(struct ip_fwpkt)); -#endif - return( EINVAL ); - } - - ipfwp = (struct ip_fwpkt *)m; - ip = &(ipfwp->fwp_iph); - - if ( !(viadev = dev_get(ipfwp->fwp_vianame)) ) { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame); -#endif - return(EINVAL); - } else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, - sizeof(struct iphdr)/sizeof(int)); -#endif - return(EINVAL); - } - - switch (ip_fw_chk(ip, viadev, NULL, *chains[fwtype], - *policies[fwtype], IP_FW_MODE_CHK)) - { - case FW_ACCEPT: - return(0); - case FW_REDIRECT: - return(ECONNABORTED); - case FW_MASQUERADE: - return(ECONNRESET); - case FW_REJECT: - return(ECONNREFUSED); - default: /* FW_BLOCK */ - return(ETIMEDOUT); + /* Don't need write lock. */ + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + if (len != sizeof(struct ip_fwtest) || !check_label(m)) + return EINVAL; + + /* Need readlock to do find_label */ + FWC_READ_LOCK(&ip_fw_lock); + + if ((chain = find_label(new->fwt_label)) == NULL) + ret = ENOENT; + else { + ip = &(new->fwt_packet.fwp_iph); + + if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) { + duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n", + ip->ihl, + sizeof(struct iphdr) / sizeof(int)); + ret = EINVAL; + } + else { + ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame, + NULL, chain, + NULL, SLOT_NUMBER(), 1); + switch (ret) { + case FW_ACCEPT: + ret = 0; break; + case FW_REDIRECT: + ret = ECONNABORTED; break; + case FW_MASQUERADE: + ret = ECONNRESET; break; + case FW_REJECT: + ret = ECONNREFUSED; break; + /* Hack to help diag; these only get + returned when testing. */ + case FW_SKIP+1: + ret = ELOOP; break; + case FW_SKIP: + ret = ENFILE; break; + default: /* FW_BLOCK */ + ret = ETIMEDOUT; break; + } + } } + FWC_READ_UNLOCK(&ip_fw_lock); + return ret; } - if ( cmd == IP_FW_MASQ_TIMEOUTS ) - { + case IP_FW_MASQ_TIMEOUTS: { #ifdef CONFIG_IP_MASQUERADE struct ip_fw_masq *masq; - if ( len != sizeof(struct ip_fw_masq) ) - { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl (masq): length %d, expected %d\n", + if (len != sizeof(struct ip_fw_masq)) { + duprintf("ip_fw_ctl (masq): length %d, expected %d\n", len, sizeof(struct ip_fw_masq)); - -#endif - return( EINVAL ); + ret = EINVAL; } - - masq = (struct ip_fw_masq *) m; - - if (masq->tcp_timeout) - { - ip_masq_expire->tcp_timeout = masq->tcp_timeout; - } - - if (masq->tcp_fin_timeout) - { - ip_masq_expire->tcp_fin_timeout = masq->tcp_fin_timeout; - } - - if (masq->udp_timeout) - { - ip_masq_expire->udp_timeout = masq->udp_timeout; + else { + masq = (struct ip_fw_masq *)m; + if (masq->tcp_timeout) + ip_masq_expire->tcp_timeout + = masq->tcp_timeout; + + if (masq->tcp_fin_timeout) + ip_masq_expire->tcp_fin_timeout + = masq->tcp_fin_timeout; + + if (masq->udp_timeout) + ip_masq_expire->udp_timeout + = masq->udp_timeout; + ret = 0; } - - return 0; #else - return( EINVAL ); + ret = EINVAL; #endif } + break; + + case IP_FW_REPLACE: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = replace_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_APPEND: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) + ret = append_to_chain(chain, ip_fwkern); + } + break; + + case IP_FW_INSERT: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = insert_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_DELETE: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) + ret = del_rule_from_chain(chain, ip_fwkern); + } + break; -/* - * Here we really working hard-adding new elements - * to blocking/forwarding chains or deleting 'em - */ + case IP_FW_DELETE_NUM: { + struct ip_fwdelnum *new = m; - if ( cmd == IP_FW_INSERT || cmd == IP_FW_APPEND || cmd == IP_FW_DELETE ) - { - struct ip_fw *frwl; - int fwtype; + if (len != sizeof(struct ip_fwdelnum) + || !check_label(new->fwd_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwd_label)) == NULL) + ret = ENOENT; + else ret = del_num_from_chain(chain, new->fwd_rulenum); + } + break; - frwl=check_ipfw_struct(m,len); - if (frwl==NULL) - return (EINVAL); - fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; - - switch (cmd) - { - case IP_FW_INSERT: - return(insert_in_chain(chains[fwtype],frwl,len)); - case IP_FW_APPEND: - return(append_to_chain(chains[fwtype],frwl,len)); - case IP_FW_DELETE: - return(del_from_chain(chains[fwtype],frwl)); + case IP_FW_CREATECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("create_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = create_chain(m); + } + break; + + case IP_FW_DELETECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("delete_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = del_chain(m); + } + break; + + case IP_FW_POLICY: { + struct ip_fwpolicy *new = m; + + if (len != sizeof(struct ip_fwpolicy) + || !check_label(new->fwp_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwp_label)) == NULL) + ret = ENOENT; + else if (chain != IP_FW_INPUT_CHAIN + && chain != IP_FW_FORWARD_CHAIN + && chain != IP_FW_OUTPUT_CHAIN) { + duprintf("change_policy: can't change policy on user" + " defined chain.\n"); + ret = EINVAL; + } + else { + int pol = FW_SKIP; + find_special(new->fwp_policy, &pol); + + switch(pol) { + case FW_MASQUERADE: + if (chain != IP_FW_FORWARD_CHAIN) { + ret = EINVAL; + break; + } + /* Fall thru... */ + case FW_BLOCK: + case FW_ACCEPT: + case FW_REJECT: + ret = change_policy(chain, pol); + break; default: - /* - * Should be panic but... (Why are BSD people panic obsessed ??) - */ -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: unknown request %d\n",stage); -#endif - return(EINVAL); + duprintf("change_policy: bad policy `%s'\n", + new->fwp_policy); + ret = EINVAL; + } } - } + break; + + } + default: + duprintf("ip_fw_ctl: unknown request %d\n",cmd); + ret = EINVAL; + } -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: unknown request %d\n",stage); -#endif - return(EINVAL); + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; } -#endif /* CONFIG_IP_FIREWALL */ -#ifdef CONFIG_PROC_FS -#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) +/* Returns bytes used - doesn't NUL terminate */ +static int dump_rule(char *buffer, + const char *chainlabel, + const struct ip_fwkernel *rule) +{ + int len; + unsigned int i; + __u64 packets = 0, bytes = 0; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = 0; i < NUM_SLOTS; i++) { + packets += rule->counters[i].pcnt; + bytes += rule->counters[i].bcnt; + } -static int ip_chain_procinfo(int stage, char *buffer, char **start, + len=sprintf(buffer, + "%9s " /* Chain name */ + "%08lX/%08lX->%08lX/%08lX " /* Source & Destination IPs */ + "%.16s " /* Interface */ + "%hX %hX " /* fw_flg and fw_invflg fields */ + "%hu " /* Protocol */ + "%-9u %-9u %-9u %-9u " /* Packet & byte counters */ + "%hu-%hu %hu-%hu " /* Source & Dest port ranges */ + "A%02X X%02X " /* TOS and and xor masks */ + "%08X " /* Redirection port */ + "%u " /* fw_mark field */ + "%hu " /* output size */ + "%9s\n", /* Target */ + chainlabel, + ntohl(rule->ipfw.fw_src.s_addr), + ntohl(rule->ipfw.fw_smsk.s_addr), + ntohl(rule->ipfw.fw_dst.s_addr), + ntohl(rule->ipfw.fw_dmsk.s_addr), + (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-", + rule->ipfw.fw_flg, + rule->ipfw.fw_invflg, + rule->ipfw.fw_proto, + (__u32)(packets >> 32), (__u32)packets, + (__u32)(bytes >> 32), (__u32)bytes, + rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1], + rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], + rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, + rule->ipfw.fw_redirpt, + rule->ipfw.fw_mark, + rule->ipfw.fw_outputsize, + branchname(rule->branch,rule->simplebranch)); + + duprintf("dump_rule: %i bytes done.\n", len); + return len; +} + +/* File offset is actually in records, not bytes. */ +static int ip_chain_procinfo(char *buffer, char **start, off_t offset, int length, int reset) { - off_t pos=0, begin=0; - struct ip_fw *i; + struct ip_chain *i; + struct ip_fwkernel *j = ip_fw_chains->chain; unsigned long flags; - int len, p; + int len = 0; int last_len = 0; - + off_t upto = 0; + + duprintf("Offset starts at %lu\n", offset); + duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains); + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) { + for (j = i->chain; j; j = j->next) { + if (upto == offset) break; + duprintf("Skipping rule in chain `%s'\n", + i->label); + upto++; + } + if (upto == offset) break; + } - switch(stage) - { -#ifdef CONFIG_IP_FIREWALL - case IP_FW_IN: - i = ip_fw_in_chain; - len=sprintf(buffer, "IP firewall input rules, default %d\n", - ip_fw_in_policy); - break; - case IP_FW_OUT: - i = ip_fw_out_chain; - len=sprintf(buffer, "IP firewall output rules, default %d\n", - ip_fw_out_policy); - break; - case IP_FW_FWD: - i = ip_fw_fwd_chain; - len=sprintf(buffer, "IP firewall forward rules, default %d\n", - ip_fw_fwd_policy); - break; -#endif -#ifdef CONFIG_IP_ACCT - case IP_FW_ACCT: - i = ip_acct_chain; - len=sprintf(buffer,"IP accounting rules\n"); - break; -#endif - default: - /* this should never be reached, but safety first... */ - i = NULL; - len=0; - break; + /* Don't init j first time, or once i = NULL */ + for (; i; (void)((i = i->next) && (j = i->chain))) { + duprintf("Dumping chain `%s'\n", i->label); + for (; j; j = j->next, upto++, last_len = len) + { + len += dump_rule(buffer+len, i->label, j); + if (len > length) { + duprintf("Dumped to %i (past %i). " + "Moving back to %i.\n", + len, length, last_len); + len = last_len; + goto outside; + } + else if (reset) + memset(j->counters, 0, + sizeof(struct ip_counters)*NUM_SLOTS); + } } +outside: + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + buffer[len] = '\0'; + + duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n", + len, length, upto); + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start=(char *)((unsigned int)upto-offset); + return len; +} - save_flags(flags); - cli(); - - while(i!=NULL) +static int ip_chain_name_procinfo(char *buffer, char **start, + off_t offset, int length, int reset) +{ + struct ip_chain *i; + int len = 0,last_len = 0; + off_t pos = 0,begin = 0; + unsigned long flags; + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) { - len+=sprintf(buffer+len,"%08lX/%08lX->%08lX/%08lX %.16s %08lX %X ", - ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr), - ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), - (i->fw_vianame)[0] ? i->fw_vianame : "-", - ntohl(i->fw_via.s_addr),i->fw_flg); - /* 10 is enough for a 32 bit box but the counters are 64bit on - the Alpha and Ultrapenguin */ - len+=sprintf(buffer+len,"%u %u %-20lu %-20lu", - i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); - for (p = 0; p < IP_FW_MAX_PORTS; p++) - len+=sprintf(buffer+len, " %u", i->fw_pts[p]); - len+=sprintf(buffer+len, " A%02X X%02X", i->fw_tosand, i->fw_tosxor); - buffer[len++]='\n'; - buffer[len]='\0'; + unsigned int j; + __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0; + + for (j = 0; j < NUM_SLOTS; j++) { + packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF; + packetsHi += ((i->reent[j].counters.pcnt >> 32) + & 0xFFFFFFFF); + bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF; + bytesHi += ((i->reent[j].counters.bcnt >> 32) + & 0xFFFFFFFF); + } + + /* print the label and the policy */ + len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n", + i->label,branchname(NULL, i->policy),i->refcount, + packetsHi, packetsLo, bytesHi, bytesLo); pos=begin+len; - if(pos<offset) - { + if(pos<offset) { len=0; begin=pos; } - else if(pos>offset+length) - { + else if(pos>offset+length) { len = last_len; break; } - else if(reset) - { - /* This needs to be done at this specific place! */ - i->fw_pcnt=0L; - i->fw_bcnt=0L; - } + last_len = len; - i=i->fw_next; } - restore_flags(flags); - *start=buffer+(offset-begin); + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + *start = buffer+(offset-begin); len-=(offset-begin); if(len>length) - len=length; + len=length; return len; } -#endif - -#ifdef CONFIG_IP_ACCT - -static int ip_acct_procinfo(char *buffer, char **start, off_t offset, - int length, int reset) -{ - return ip_chain_procinfo(IP_FW_ACCT, buffer,start, offset,length, - reset); -} - -#endif - -#ifdef CONFIG_IP_FIREWALL - -static int ip_fw_in_procinfo(char *buffer, char **start, off_t offset, - int length, int reset) -{ - return ip_chain_procinfo(IP_FW_IN, buffer,start,offset,length, - reset); -} - -static int ip_fw_out_procinfo(char *buffer, char **start, off_t offset, - int length, int reset) -{ - return ip_chain_procinfo(IP_FW_OUT, buffer,start,offset,length, - reset); -} - -static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, - int length, int reset) -{ - return ip_chain_procinfo(IP_FW_FWD, buffer,start,offset,length, - reset); -} -#endif -#endif - -#ifdef CONFIG_IP_FIREWALL /* * Interface to the generic firewall chains. */ - -int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb) +int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) { - return ip_fw_chk(phdr, dev, arg, ip_fw_in_chain, ip_fw_in_policy, IP_FW_MODE_FW); + return ip_fw_check(phdr, dev->name, + arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); } -int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb) +int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) { - return ip_fw_chk(phdr, dev, arg, ip_fw_out_chain, ip_fw_out_policy, IP_FW_MODE_FW); + return ip_fw_check(phdr, dev->name, + arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); } -int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb) +int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) { - return ip_fw_chk(phdr, dev, arg, ip_fw_fwd_chain, ip_fw_fwd_policy, IP_FW_MODE_FW); + return ip_fw_check(phdr, dev->name, + arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0); } - + struct firewall_ops ipfw_ops= { NULL, @@ -1283,106 +1684,45 @@ struct firewall_ops ipfw_ops= 0 /* We don't even allow a fall through so we are last */ }; -#endif - -#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) - -int ipfw_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct device *dev=ptr; - char *devname = dev->name; - unsigned long flags; - struct ip_fw *fw; - int chn; - - save_flags(flags); - cli(); - - if (event == NETDEV_UP) { - for (chn = 0; chn < IP_FW_CHAINS; chn++) - for (fw = *chains[chn]; fw; fw = fw->fw_next) - if ((fw->fw_vianame)[0] && !strncmp(devname, - fw->fw_vianame, IFNAMSIZ)) - fw->fw_viadev = dev; - } else if (event == NETDEV_DOWN) { - for (chn = 0; chn < IP_FW_CHAINS; chn++) - for (fw = *chains[chn]; fw; fw = fw->fw_next) - /* we could compare just the pointers ... */ - if ((fw->fw_vianame)[0] && !strncmp(devname, - fw->fw_vianame, IFNAMSIZ)) - fw->fw_viadev = (struct device *) -1; - } - - restore_flags(flags); - return NOTIFY_DONE; -} - -static struct notifier_block ipfw_dev_notifier={ - ipfw_device_event, - NULL, - 0 +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry proc_net_ipfwchains_chain = { + PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, + IP_FW_PROC_CHAINS, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_net_inode_operations, ip_chain_procinfo }; -#endif - -#ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_ACCT -static struct proc_dir_entry proc_net_ipacct = { - PROC_NET_IPACCT, 7, "ip_acct", - S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, - 0, &proc_net_inode_operations, - ip_acct_procinfo +static struct proc_dir_entry proc_net_ipfwchains_chainnames = { + PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, + IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_net_inode_operations, ip_chain_name_procinfo }; -#endif -#endif -#ifdef CONFIG_IP_FIREWALL -#ifdef CONFIG_PROC_FS -static struct proc_dir_entry proc_net_ipfwin = { - PROC_NET_IPFWIN, 8, "ip_input", - S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, - 0, &proc_net_inode_operations, - ip_fw_in_procinfo -}; -static struct proc_dir_entry proc_net_ipfwout = { - PROC_NET_IPFWOUT, 9, "ip_output", - S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, - 0, &proc_net_inode_operations, - ip_fw_out_procinfo -}; -static struct proc_dir_entry proc_net_ipfwfwd = { - PROC_NET_IPFWFWD, 10, "ip_forward", - S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, - 0, &proc_net_inode_operations, - ip_fw_fwd_procinfo -}; #endif -#endif - __initfunc(void ip_fw_init(void)) { -#ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_ACCT - proc_net_register(&proc_net_ipacct); +#ifdef DEBUG_IP_FIRWALL_LOCKING + fwc_wlocks = fwc_rlocks = 0; #endif -#endif -#ifdef CONFIG_IP_FIREWALL + + IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT); + IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT); + IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT); if(register_firewall(PF_INET,&ipfw_ops)<0) panic("Unable to register IP firewall.\n"); + #ifdef CONFIG_PROC_FS - proc_net_register(&proc_net_ipfwin); - proc_net_register(&proc_net_ipfwout); - proc_net_register(&proc_net_ipfwfwd); -#endif + proc_net_register(&proc_net_ipfwchains_chain); + proc_net_register(&proc_net_ipfwchains_chainnames); #endif -#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) - /* Register for device up/down reports */ - register_netdevice_notifier(&ipfw_dev_notifier); -#endif #ifdef CONFIG_IP_FIREWALL_NETLINK ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); + if (ipfwsk == NULL) + panic("ip_fw_init: cannot initialize netlink\n"); +#endif +#if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER) + printk("Firewall graphs enabled! Untested kernel coming thru. \n"); #endif } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fa8208959..f56a90332 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.3 1998/03/17 22:18:26 ralf Exp $ + * Version: $Id: ip_input.c,v 1.31 1998/05/17 02:19:15 freitag Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -97,7 +97,6 @@ * Alan Cox : Multicast routing hooks * Jos Vos : Do accounting *before* call_in_firewall * Willy Konynenberg : Transparent proxying support - * Mike McLagan : Routing by source * * * @@ -403,15 +402,6 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) ip_statistics.IpInReceives++; /* - * Account for the packet (even if the packet is - * not accepted by the firewall!). - */ - -#ifdef CONFIG_IP_ACCT - ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); -#endif - - /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? @@ -479,6 +469,15 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) /* * See if the firewall wants to dispose of the packet. + * + * Note: the current standard firewall code expects that the + * destination address was already checked against the interface + * address lists. + * + * If this code is ever moved in front of ip_route_input() you need + * to fix the fw code [moving it might be a good idea anyways, + * so that we can firewall against potentially bugs in the options + * or routing code] */ #ifdef CONFIG_FIREWALL diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c index cf92b1638..2a6093583 100644 --- a/net/ipv4/ip_masq.c +++ b/net/ipv4/ip_masq.c @@ -1528,7 +1528,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p) if (csum_tcpudp_magic(iph->saddr, iph->daddr, len, iph->protocol, skb->csum)) { - IP_MASQ_WARNING( "failed TCP/UDP checksum from %d.%d.%d.%d!\n", + IP_MASQ_DEBUG(2, "failed TCP/UDP checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); return -1; } diff --git a/net/ipv4/ip_nat_dumb.c b/net/ipv4/ip_nat_dumb.c index def66858c..07a7afc23 100644 --- a/net/ipv4/ip_nat_dumb.c +++ b/net/ipv4/ip_nat_dumb.c @@ -5,7 +5,7 @@ * * Dumb Network Address Translation. * - * Version: $Id: ip_nat_dumb.c,v 1.2 1997/12/16 05:37:40 ralf Exp $ + * Version: $Id: ip_nat_dumb.c,v 1.3 1998/03/15 03:31:44 davem Exp $ * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index d78cc1ff0..3e3674ef7 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -5,7 +5,7 @@ * * The options processing module for ip.c * - * Version: $Id: ip_options.c,v 1.3 1998/03/17 22:18:28 ralf Exp $ + * Version: $Id: ip_options.c,v 1.13 1998/02/12 07:43:12 davem Exp $ * * Authors: A.N.Kuznetsov * diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7a1c141bb..0527c1b0b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.5 1998/03/17 22:18:29 ralf Exp $ + * Version: $Id: ip_output.c,v 1.59 1998/07/15 05:05:15 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -99,6 +99,7 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, { struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; + struct device *dev; /* Build the IP header. */ if (opt) @@ -126,10 +127,19 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, ip_options_build(skb, opt, daddr, rt, 0); } + dev = rt->u.dst.dev; + + if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT) + goto drop; + ip_send_check(iph); /* Send it out. */ skb->dst->output(skb); + return; + +drop: + kfree_skb(skb); } int __ip_finish_output(struct sk_buff *skb) @@ -218,21 +228,6 @@ int ip_output(struct sk_buff *skb) return ip_finish_output(skb); } -#ifdef CONFIG_IP_ACCT -int ip_acct_output(struct sk_buff *skb) -{ - /* - * Count mapping we shortcut - */ - - ip_fw_chk(skb->nh.iph, skb->dev, NULL, ip_acct_chain, 0, IP_FW_MODE_ACCT_OUT); - - dev_queue_xmit(skb); - - return 0; -} -#endif - /* Queues a packet to be sent, and starts the transmitter if necessary. * This routine also needs to put in the total length and compute the * checksum. We use to do this in two stages, ip_build_header() then @@ -819,7 +814,7 @@ void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) * will inherit fixed options. */ if (offset == 0) - ip_options_fragment(skb2); + ip_options_fragment(skb); /* * Added AC : If we are fragmenting a fragment that's not the diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b31a1d3a0..8f712c801 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,7 +5,7 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.5 1998/03/17 22:18:29 ralf Exp $ + * Version: $Id: ip_sockglue.c,v 1.36 1998/07/15 05:05:06 davem Exp $ * * Authors: see ip.c * @@ -39,6 +39,8 @@ #include <asm/uaccess.h> +#define MAX(a,b) ((a)>(b)?(a):(b)) + #define IP_CMSG_PKTINFO 1 #define IP_CMSG_TTL 2 #define IP_CMSG_TOS 4 @@ -68,17 +70,11 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) { - if (IPCB(skb)->opt.optlen == 0) - return; - put_cmsg(msg, SOL_IP, IP_TTL, 1, &skb->nh.iph->ttl); } static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) { - if (IPCB(skb)->opt.optlen == 0) - return; - put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos); } @@ -227,8 +223,8 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { int val=0,err; -#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) - struct ip_fw tmp_fw; +#if defined(CONFIG_IP_FIREWALL) + char tmp_fw[MAX(sizeof(struct ip_fwtest),sizeof(struct ip_fwnew))]; #endif #ifdef CONFIG_IP_MASQUERADE char masq_ctl[IP_FW_MASQCTL_MAX]; @@ -387,7 +383,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt } if (!mreq.imr_ifindex) { - if (!mreq.imr_address.s_addr == INADDR_ANY) { + if (mreq.imr_address.s_addr == INADDR_ANY) { sk->ip_mc_index = 0; sk->ip_mc_addr = 0; return 0; @@ -432,28 +428,18 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return ip_ra_control(sk, val ? 1 : 0, NULL); #ifdef CONFIG_IP_FIREWALL - case IP_FW_INSERT_IN: - case IP_FW_INSERT_OUT: - case IP_FW_INSERT_FWD: - case IP_FW_APPEND_IN: - case IP_FW_APPEND_OUT: - case IP_FW_APPEND_FWD: - case IP_FW_DELETE_IN: - case IP_FW_DELETE_OUT: - case IP_FW_DELETE_FWD: - case IP_FW_CHECK_IN: - case IP_FW_CHECK_OUT: - case IP_FW_CHECK_FWD: - case IP_FW_FLUSH_IN: - case IP_FW_FLUSH_OUT: - case IP_FW_FLUSH_FWD: - case IP_FW_ZERO_IN: - case IP_FW_ZERO_OUT: - case IP_FW_ZERO_FWD: - case IP_FW_POLICY_IN: - case IP_FW_POLICY_OUT: - case IP_FW_POLICY_FWD: case IP_FW_MASQ_TIMEOUTS: + case IP_FW_APPEND: + case IP_FW_REPLACE: + case IP_FW_DELETE: + case IP_FW_DELETE_NUM: + case IP_FW_INSERT: + case IP_FW_FLUSH: + case IP_FW_ZERO: + case IP_FW_CHECK: + case IP_FW_CREATECHAIN: + case IP_FW_DELETECHAIN: + case IP_FW_POLICY: if(!capable(CAP_NET_ADMIN)) return -EACCES; if(optlen>sizeof(tmp_fw) || optlen<1) @@ -462,8 +448,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return -EFAULT; err=ip_fw_ctl(optname, &tmp_fw,optlen); return -err; /* -0 is 0 after all */ - -#endif +#endif /* CONFIG_IP_FIREWALL */ #ifdef CONFIG_IP_MASQUERADE case IP_FW_MASQ_ADD: case IP_FW_MASQ_DEL: @@ -478,21 +463,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return -err; /* -0 is 0 after all */ #endif -#ifdef CONFIG_IP_ACCT - case IP_ACCT_INSERT: - case IP_ACCT_APPEND: - case IP_ACCT_DELETE: - case IP_ACCT_FLUSH: - case IP_ACCT_ZERO: - if(!capable(CAP_NET_ADMIN)) - return -EACCES; - if(optlen>sizeof(tmp_fw) || optlen<1) - return -EINVAL; - if(copy_from_user(&tmp_fw, optval,optlen)) - return -EFAULT; - err=ip_acct_ctl(optname, &tmp_fw,optlen); - return -err; /* -0 is 0 after all */ -#endif default: return(-ENOPROTOOPT); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 1e44ae8aa..76372b4ab 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1,5 +1,5 @@ /* - * $Id: ipconfig.c,v 1.11 1998/02/12 07:43:16 davem Exp $ + * $Id: ipconfig.c,v 1.15 1998/06/19 13:22:33 davem Exp $ * * Automatic Configuration of IP -- use BOOTP or RARP or user-supplied * information to configure own IP address and routes. @@ -248,6 +248,11 @@ __initfunc(int ic_setup_routes(void)) __initfunc(int ic_defaults(void)) { + /* + * At this point we have no userspace running so need not + * claim locks on system_utsname + */ + if (!ic_host_name_set) strcpy(system_utsname.nodename, in_ntoa(ic_myaddr)); @@ -318,7 +323,7 @@ ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)) if (rarp->ar_op != htons(ARPOP_RREPLY)) goto drop; - /* If it's not ethernet, delete it. */ + /* If it's not Ethernet, delete it. */ if (rarp->ar_pro != htons(ETH_P_IP)) goto drop; @@ -552,7 +557,7 @@ __initfunc(static int ic_udp_open(struct socket **sock)) { int err; - if ((err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0) + if ((err = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0) printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n"); return err; } @@ -1072,8 +1077,8 @@ __initfunc(int ip_auto_config(void)) } /* - * Decode any IP configuration options in the "ipconfig" kernel command - * line parameter. It consists of option fields separated by colons in + * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel + * command line parameter. It consists of option fields separated by colons in * the following order: * * <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<bootp|rarp> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ce071d406..d0b3b5ff2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.5 1998/03/17 22:18:30 ralf Exp $ + * Version: $Id: ipip.c,v 1.22 1998/03/08 05:56:27 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d3c7503df..29fd4b3ad 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.4 1998/03/17 22:18:31 ralf Exp $ + * Version: $Id: ipmr.c,v 1.35 1998/05/13 06:23:24 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -321,7 +321,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); } else #endif kfree_skb(skb); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fc6176c0f..b6e06242f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -7,7 +7,7 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.4 1997/12/16 05:37:43 ralf Exp $ + * Version: $Id: proc.c,v 1.30 1998/04/16 16:29:05 freitag Exp $ * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c index e1eba43c5..7f7c7e3f2 100644 --- a/net/ipv4/rarp.c +++ b/net/ipv4/rarp.c @@ -3,11 +3,11 @@ * Copyright (C) 1994 by Ross Martin * Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche * - * $Id: rarp.c,v 1.4 1998/03/17 22:18:31 ralf Exp $ + * $Id: rarp.c,v 1.25 1998/06/19 13:22:34 davem Exp $ * * This module implements the Reverse Address Resolution Protocol * (RARP, RFC 903), which is used to convert low level addresses such - * as ethernet addresses into high level addresses such as IP addresses. + * as Ethernet addresses into high level addresses such as IP addresses. * The most common use of RARP is as a means for a diskless workstation * to discover its IP address during a network boot. * @@ -19,7 +19,7 @@ *** unless you have all the rest to boot the box from it. ** * - * Currently, only ethernet address -> IP address is likely to work. + * Currently, only Ethernet address -> IP address is likely to work. * (Is RARP ever used for anything else?) * * This code is free software; you can redistribute it and/or diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a73f12a49..8d8bdab97 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,7 +5,7 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.4 1998/03/17 22:18:32 ralf Exp $ + * Version: $Id: raw.c,v 1.36 1998/05/08 21:06:29 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7d21af4a8..e10f65c68 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.5 1998/03/17 22:18:32 ralf Exp $ + * Version: $Id: route.c,v 1.54 1998/07/15 05:05:22 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -144,9 +144,9 @@ struct dst_ops ipv4_dst_ops = }; __u8 ip_tos2prio[16] = { - TC_PRIO_FILLER, TC_PRIO_BESTEFFORT, TC_PRIO_FILLER, + TC_PRIO_BESTEFFORT, TC_PRIO_FILLER, TC_PRIO_BULK, TC_PRIO_FILLER, @@ -221,7 +221,7 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt r->u.dst.window, (int)r->u.dst.rtt, r->key.tos, r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, - r->u.dst.hh ? (r->u.dst.hh->hh_output == ip_acct_output) : 0, + r->u.dst.hh ? (r->u.dst.hh->hh_output == dev_queue_xmit) : 0, r->rt_spec_dst); sprintf(buffer+len,"%-127s\n",temp); len += 128; @@ -816,7 +816,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res) rt->u.dst.window= 0; rt->u.dst.rtt = TCP_TIMEOUT_INIT; } -#ifdef CONFIG_NET_CLS_ROUTE +#if defined(CONFIG_NET_CLS_ROUTE) && defined(CONFIG_IP_MULTIPLE_TABLES) if (rt->u.dst.tclassid == 0) rt->u.dst.tclassid = fib_rules_tclass(res); #endif @@ -1095,11 +1095,12 @@ local_input: rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->u.dst.input= ip_local_deliver; + rth->rt_flags = flags|RTCF_LOCAL; if (res.type == RTN_UNREACHABLE) { rth->u.dst.input= ip_error; - rth->u.dst.error= err; + rth->u.dst.error= -err; + rth->rt_flags &= ~RTCF_LOCAL; } - rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth); return 0; @@ -1165,7 +1166,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, } /* Multicast recognition logic is moved from route cache to here. - The problem was that too many ethernet cards have broken/missing + The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting network acquires a lot of useless route cache entries, sort of SDR messages from all the world. Now we try to get rid of them. @@ -1495,7 +1496,7 @@ static int rt_fill_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, int nlh->nlmsg_flags = nowait ? NLM_F_MULTI : 0; r->rtm_family = AF_INET; r->rtm_dst_len = 32; - r->rtm_src_len = 32; + r->rtm_src_len = 0; r->rtm_tos = rt->key.tos; r->rtm_table = RT_TABLE_MAIN; r->rtm_type = rt->rt_type; @@ -1508,9 +1509,16 @@ static int rt_fill_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, int o = skb->tail; #endif RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); - RTA_PUT(skb, RTA_SRC, 4, &rt->rt_src); + if (rt->key.src) { + r->rtm_src_len = 32; + RTA_PUT(skb, RTA_SRC, 4, &rt->key.src); + } if (rt->u.dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + if (rt->key.iif) + RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); + else if (rt->rt_src != rt->key.src) + RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); if (rt->rt_dst != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); #ifdef CONFIG_RTNL_OLD_IFINFO @@ -1532,7 +1540,6 @@ static int rt_fill_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, int if (mx->rta_len == RTA_LENGTH(0)) skb_trim(skb, (u8*)mx - skb->data); #endif - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); ci.rta_lastuse = jiffies - rt->u.dst.lastuse; ci.rta_used = atomic_read(&rt->u.dst.refcnt); ci.rta_clntref = atomic_read(&rt->u.dst.use); @@ -1637,7 +1644,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).pid = NETLINK_CB(in_skb).pid; + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); if (err == 0) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a3e3be0f1..c3e219d46 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * $Id: syncookies.c,v 1.5 1998/04/03 09:49:46 freitag Exp $ + * $Id: syncookies.c,v 1.6 1998/06/10 07:29:22 davem Exp $ * * Missing: IPv6 support. */ @@ -26,104 +26,74 @@ extern int sysctl_tcp_syncookies; static unsigned long tcp_lastsynq_overflow; /* - * This table has to be sorted. Only 8 entries are allowed and the - * last entry has to be duplicated. + * This table has to be sorted and terminated with (__u16)-1. * XXX generate a better table. * Unresolved Issues: HIPPI with a 64k MSS is not well supported. */ static __u16 const msstab[] = { - 64, - 256, - 512, - 536, - 1024, - 1440, - 1460, - 4312, - 4312 + 64-1, + 256-1, + 512-1, + 536-1, + 1024-1, + 1440-1, + 1460-1, + 4312-1, + (__u16)-1 }; - -static __u32 make_syncookie(struct sk_buff *skb, __u32 counter, __u32 seq) -{ - __u32 z; - - z = secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr, - skb->h.th->source, skb->h.th->dest, - seq, - counter); - -#if 0 - printk(KERN_DEBUG - "msc: z=%u,cnt=%u,seq=%u,sadr=%u,dadr=%u,sp=%u,dp=%u\n", - z,counter,seq, - skb->nh.iph->saddr,skb->nh.iph->daddr, - ntohs(skb->h.th->source), ntohs(skb->h.th->dest)); -#endif - - return z; -} +/* The number doesn't include the -1 terminator */ +#define NUM_MSS (sizeof(msstab)/sizeof(msstab[0]) - 1) /* - * Generate a syncookie. + * Generate a syncookie. mssp points to the mss, which is returned + * rounded down to the value encoded in the cookie. */ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) { - int i; - __u32 isn; - const __u16 mss = *mssp, *w; + int mssind; + const __u16 mss = *mssp; tcp_lastsynq_overflow = jiffies; - - isn = make_syncookie(skb, (jiffies/HZ) >> 6, ntohl(skb->h.th->seq)); - - /* XXX sort msstab[] by probability? */ - w = msstab; - for (i = 0; i < 8; i++) - if (mss >= *w && mss < *++w) - goto found; - i--; -found: - *mssp = w[-1]; + /* XXX sort msstab[] by probability? Binary search? */ + for (mssind = 0; mss > msstab[mssind+1]; mssind++) + ; + *mssp = msstab[mssind]+1; net_statistics.SyncookiesSent++; - isn |= i; - return isn; + return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->h.th->source, skb->h.th->dest, + ntohl(skb->h.th->seq), + jiffies / (HZ*60), mssind); } -/* This value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula - * (exponential backoff) to compute at runtime so it's currently hardcoded - * here. +/* + * This (misnamed) value is the age of syncookie which is permitted. + * Its ideal value should be dependent on TCP_TIMEOUT_INIT and + * sysctl_tcp_retries1. It's a rather complicated formula (exponential + * backoff) to compute at runtime so it's currently hardcoded here. */ #define COUNTER_TRIES 4 - /* * Check if a ack sequence number is a valid syncookie. + * Return the decoded mss if it is, or 0 if not. */ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) { - int mssind; - int i; - __u32 counter; __u32 seq; + __u32 mssind; - if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT - && tcp_lastsynq_overflow) { + if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT) return 0; - } - - mssind = cookie & 7; - cookie &= ~7; - counter = (jiffies/HZ)>>6; seq = ntohl(skb->h.th->seq)-1; - for (i = 0; i < COUNTER_TRIES; i++) - if (make_syncookie(skb, counter-i, seq) == cookie) - return msstab[mssind]; + mssind = check_tcp_syn_cookie(cookie, + skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->h.th->source, skb->h.th->dest, + seq, jiffies/(HZ*60), COUNTER_TRIES); - return 0; + return mssind < NUM_MSS ? msstab[mssind]+1 : 0; } extern struct or_calltable or_ipv4; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 92b980b55..65bc5f0fc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,7 +1,7 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.7 1998/03/17 22:18:33 ralf Exp $ + * $Id: sysctl_net_ipv4.c,v 1.34 1998/04/11 09:38:26 freitag Exp $ * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fd4284af9..3d6f188e7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.114 1998/04/26 01:11:33 davem Exp $ + * Version: $Id: tcp.c,v 1.116 1998/07/26 03:06:54 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -580,7 +580,7 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) /* Always wake the user up when an error occurred */ if (sock_wspace(sk) >= space || sk->err) mask |= POLLOUT | POLLWRNORM; - if (tp->urg_data) + if (tp->urg_data & URG_VALID) mask |= POLLPRI; } return mask; @@ -1069,6 +1069,13 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, add_wait_queue(sk->sleep, &wait); lock_sock(sk); + + /* + * BUG BUG BUG + * This violates 1003.1g compliance. We must wait for + * data to exist even if we read none! + */ + while (len > 0) { struct sk_buff * skb; u32 offset; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7a0a40aeb..a4ad2dc3c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.116 1998/05/02 14:50:11 davem Exp $ + * Version: $Id: tcp_input.c,v 1.121 1998/07/15 04:39:12 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -156,8 +156,8 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) } } -/* Calculate rto without backoff. This is the second half of Van Jacobsons - * routine refered to above. +/* Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. */ static __inline__ void tcp_set_rto(struct tcp_opt *tp) @@ -186,13 +186,21 @@ static __inline__ void tcp_bound_rto(struct tcp_opt *tp) } /* WARNING: this must not be called if tp->saw_timestamp was false. */ -extern __inline__ void tcp_replace_ts_recent(struct tcp_opt *tp, __u32 end_seq) +extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp, + __u32 start_seq, __u32 end_seq) { /* From draft-ietf-tcplw-high-performance: the correct * test is last_ack_sent <= end_seq. * (RFC1323 stated last_ack_sent < end_seq.) + * + * HOWEVER: The current check contradicts the draft statements. + * It has been done for good reasons. + * The implemented check improves security and eliminates + * unnecessary RTT overestimation. + * 1998/06/27 Andrey V. Savochkin <saw@msu.ru> */ - if (!before(end_seq, tp->last_ack_sent)) { + if (!before(end_seq, tp->last_ack_sent - sk->rcvbuf) && + !after(start_seq, tp->rcv_wup + tp->rcv_wnd)) { /* PAWS bug workaround wrt. ACK frames, the PAWS discard * extra check below makes sure this can only happen * for pure ACK frames. -DaveM @@ -593,7 +601,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if(sacked & TCPCB_SACKED_RETRANS && tp->retrans_out) + if((sacked & TCPCB_SACKED_RETRANS) && tp->retrans_out) tp->retrans_out--; if(!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; @@ -968,7 +976,7 @@ void tcp_time_wait(struct sock *sk) tw->af_specific = sk->tp_pinfo.af_tcp.af_specific; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - if(tw->family == AF_INET6) { + if(tw->family == PF_INET6) { memcpy(&tw->v6_daddr, &sk->net_pinfo.af_inet6.daddr, sizeof(struct in6_addr)); @@ -1657,7 +1665,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto discard; } } - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->end_seq); + tcp_replace_ts_recent(sk, tp, + TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq); } } @@ -1686,8 +1696,13 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) { /* Bulk data transfer: receiver */ - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { + /* We must send an ACK for zero window probes. */ + if (!before(TCP_SKB_CB(skb)->seq, + tp->rcv_wup + tp->rcv_wnd)) + tcp_send_ack(sk); goto discard; + } skb_pull(skb,th->doff*4); @@ -1714,15 +1729,21 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - if (!th->rst) { - if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n", - TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tp->rcv_wup, tp->rcv_wnd); - } - tcp_send_ack(sk); + /* RFC793, page 37: "In all states except SYN-SENT, all reset + * (RST) segments are validated by checking their SEQ-fields." + * And page 69: "If an incoming segment is not acceptable, + * an acknowledgment should be sent in reply (unless the RST bit + * is set, if so drop the segment and return)". + */ + if (th->rst) goto discard; + if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->rcv_wup, tp->rcv_wnd); } + tcp_send_ack(sk); + goto discard; } if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) { @@ -2020,7 +2041,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; } } - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->end_seq); + tcp_replace_ts_recent(sk, tp, + TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 328cc9389..e0ecdbfa5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.145 1998/05/02 12:47:13 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.150 1998/07/28 17:45:07 freitag Exp $ * * IPv4 specific functions * @@ -690,7 +690,7 @@ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, /* - * This routine does path mtu discovery as defined in RFC1197. + * This routine does path mtu discovery as defined in RFC1191. */ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip) { @@ -771,10 +771,11 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len) tp = &sk->tp_pinfo.af_tcp; seq = ntohl(th->seq); if (sk->state != TCP_LISTEN && - !between(seq, tp->snd_una, max(tp->snd_una+32768,tp->snd_nxt))) { + !between(seq, tp->snd_una-16384, max(tp->snd_una+32768,tp->snd_nxt))) { if (net_ratelimit()) - printk(KERN_DEBUG "icmp packet outside the tcp window:" - " s:%d %u,%u,%u\n", + printk(KERN_WARNING + "icmp packet outside the tcp window:" + " state:%d seq:%u win:%u,%u\n", (int)sk->state, seq, tp->snd_una, tp->snd_nxt); return; } @@ -1033,7 +1034,14 @@ tcp_v4_save_options(struct sock *sk, struct sk_buff *skb, return dopt; } -int sysctl_max_syn_backlog = 1024; +/* + * Maximum number of SYN_RECV sockets in queue per LISTEN socket. + * One SYN_RECV socket costs about 80bytes on a 32bit machine. + * It would be better to replace it with a global counter for all sockets + * but then some measure against one socket starving all other sockets + * would be needed. + */ +int sysctl_max_syn_backlog = 128; struct or_calltable or_ipv4 = { tcp_v4_send_synack, @@ -1155,7 +1163,7 @@ drop: struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb, int snd_mss) { - struct sock *newsk = sk_alloc(AF_INET, GFP_ATOMIC, 0); + struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0); if(newsk != NULL) { struct tcp_opt *newtp; @@ -1325,11 +1333,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; - if (newsk->rcvbuf < (3 * newsk->mtu)) - newsk->rcvbuf = min ((3 * newsk->mtu), sysctl_rmem_max); - if (newsk->sndbuf < (3 * newsk->mtu)) - newsk->sndbuf = min ((3 * newsk->mtu), sysctl_wmem_max); - sk->tp_pinfo.af_tcp.syn_backlog--; sk->ack_backlog++; @@ -1346,6 +1349,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->opt = req->af.v4_req.opt; newsk->mtu = mtu; + if (newsk->rcvbuf < (3 * newsk->mtu)) + newsk->rcvbuf = min ((3 * newsk->mtu), sysctl_rmem_max); + if (newsk->sndbuf < (3 * newsk->mtu)) + newsk->sndbuf = min ((3 * newsk->mtu), sysctl_wmem_max); + tcp_v4_hash(newsk); add_to_prot_sklist(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 482ca262c..84535341f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.87 1998/04/26 01:11:35 davem Exp $ + * Version: $Id: tcp_output.c,v 1.92 1998/06/19 13:22:44 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -166,10 +166,10 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue) } } -/* Function to create two new tcp segments. Shrinks the given segment +/* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the - * packet to the list. This won't be called frenquently, I hope... - * Remember, these are still header-less SKB's at this point. + * packet to the list. This won't be called frequently, I hope. + * Remember, these are still headerless SKBs at this point. */ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) { @@ -256,7 +256,7 @@ void tcp_write_xmit(struct sock *sk) * * a) following SWS avoidance [and Nagle algorithm] * b) not exceeding our congestion window. - * c) not retransmiting [Nagle] + * c) not retransmitting [Nagle] */ while((skb = tp->send_head) && tcp_snd_test(sk, skb)) { if (skb->len > mss_now) { @@ -288,14 +288,14 @@ void tcp_write_xmit(struct sock *sk) * 2. We limit memory per socket * * RFC 1122: - * "the suggested [SWS] avoidance algoritm for the receiver is to keep + * "the suggested [SWS] avoidance algorithm for the receiver is to keep * RECV.NEXT + RCV.WIN fixed until: * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * * i.e. don't raise the right edge of the window until you can raise * it at least MSS bytes. * - * Unfortunately, the recomended algorithm breaks header prediction, + * Unfortunately, the recommended algorithm breaks header prediction, * since header prediction assumes th->window stays fixed. * * Strictly speaking, keeping th->window fixed violates the receiver @@ -331,16 +331,18 @@ void tcp_write_xmit(struct sock *sk) * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. */ -u32 __tcp_select_window(struct sock *sk) +u32 __tcp_select_window(struct sock *sk, u32 cur_win) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; unsigned int mss = sk->mss; - unsigned int free_space; - u32 window, cur_win; + int free_space; + u32 window; + /* Sometimes free_space can be < 0. */ free_space = (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 2; if (tp->window_clamp) { - free_space = min(tp->window_clamp, free_space); + if (free_space > ((int) tp->window_clamp)) + free_space = tp->window_clamp; mss = min(tp->window_clamp, mss); } else { printk("tcp_select_window: tp->window_clamp == 0.\n"); @@ -351,8 +353,7 @@ u32 __tcp_select_window(struct sock *sk) printk("tcp_select_window: sk->mss fell to 0.\n"); } - cur_win = tcp_receive_window(tp); - if (free_space < sk->rcvbuf/4 && free_space < mss/2) { + if ((free_space < (sk->rcvbuf/4)) && (free_space < ((int) (mss/2)))) { window = 0; } else { /* Get the largest window that is a nice multiple of mss. @@ -364,8 +365,9 @@ u32 __tcp_select_window(struct sock *sk) * is too small. */ window = tp->rcv_wnd; - if ((window <= (free_space - mss)) || (window > free_space)) - window = (free_space/mss)*mss; + if ((((int) window) <= (free_space - ((int) mss))) || + (((int) window) > free_space)) + window = (((unsigned int) free_space)/mss)*mss; } return window; } @@ -415,8 +417,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m } /* Update sequence range on original skb. */ - TCP_SKB_CB(skb)->end_seq += - TCP_SKB_CB(next_skb)->end_seq - TCP_SKB_CB(next_skb)->seq; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. */ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ @@ -453,7 +454,9 @@ void tcp_simple_retransmit(struct sock *sk) * and not use it for RTT calculation in the absence of * the timestamp option. */ - for (skb = skb_peek(&sk->write_queue); skb != tp->send_head; + for (skb = skb_peek(&sk->write_queue); + ((skb != tp->send_head) && + (skb != (struct sk_buff *)&sk->write_queue)); skb = skb->next) if (skb->len > mss) tcp_retransmit_skb(sk, skb); @@ -471,7 +474,7 @@ static __inline__ void update_retrans_head(struct sock *sk) /* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an - * error occured which prevented the send. + * error occurred which prevented the send. */ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { @@ -502,7 +505,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->retrans_out++; /* Make a copy, if the first transmission SKB clone we made - * is still in somebodies hands, else make a clone. + * is still in somebody's hands, else make a clone. */ TCP_SKB_CB(skb)->when = jiffies; if(skb_cloned(skb)) @@ -536,6 +539,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (tp->retrans_head == tp->send_head) tp->retrans_head = NULL; + /* Each time, advance the retrans_head if we got + * a packet out or we skipped one because it was + * SACK'd. -DaveM + */ while ((skb = tp->retrans_head) != NULL) { /* If it has been ack'd by a SACK block, we don't * retransmit it. @@ -544,14 +551,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* Send it out, punt if error occurred. */ if(tcp_retransmit_skb(sk, skb)) break; + + update_retrans_head(sk); /* Stop retransmitting if we've hit the congestion * window limit. */ if (tp->retrans_out >= (tp->snd_cwnd >> TCP_CWND_SHIFT)) break; + } else { + update_retrans_head(sk); } - update_retrans_head(sk); } } @@ -732,8 +742,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, mss = min(mss, sk->user_mss); if (req->tstamp_ok) mss -= TCPOLEN_TSTAMP_ALIGNED; - else - req->mss += TCPOLEN_TSTAMP_ALIGNED; /* Don't offer more than they did. * This way we don't have to memorize who said what. @@ -819,7 +827,7 @@ void tcp_connect(struct sock *sk, struct sk_buff *buff, int mss) mss = min(mss, sk->user_mss); if (mss < 1) { - printk(KERN_DEBUG "intial sk->mss below 1\n"); + printk(KERN_DEBUG "initial sk->mss below 1\n"); mss = 1; /* Sanity limit */ } diff --git a/net/ipv4/timer.c b/net/ipv4/timer.c index 5c5e5eeb3..a0501bd19 100644 --- a/net/ipv4/timer.c +++ b/net/ipv4/timer.c @@ -5,7 +5,7 @@ * * TIMER - implementation of software timers for IP. * - * Version: $Id: timer.c,v 1.2 1997/12/16 05:37:48 ralf Exp $ + * Version: $Id: timer.c,v 1.11 1998/03/19 08:34:06 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d42dcc3d4..7e2c7bfa6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.4 1998/03/17 22:18:36 ralf Exp $ + * Version: $Id: udp.c,v 1.57 1998/05/14 06:32:44 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -57,6 +57,8 @@ * Andi Kleen : Some cleanups, cache destination entry * for connect. * Vitaly E. Lavrov : Transparent proxy revived after year coma. + * Melvin Smith : Check msg_name not msg_namelen in sendto(), + * return ENOTCONN for unconnected sockets (POSIX) * * * This program is free software; you can redistribute it and/or @@ -657,7 +659,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) * Get and verify the address. */ - if (msg->msg_namelen) { + if (msg->msg_name) { struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; if (msg->msg_namelen < sizeof(*usin)) return(-EINVAL); @@ -684,7 +686,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) */ } else { if (sk->state != TCP_ESTABLISHED) - return -EINVAL; + return -ENOTCONN; ufh.daddr = sk->daddr; ufh.uh.dest = sk->dport; diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c index 3e638d6c8..ce74ade2a 100644 --- a/net/ipv4/utils.c +++ b/net/ipv4/utils.c @@ -6,7 +6,7 @@ * Various kernel-resident INET utility functions; mainly * for format conversion and debugging output. * - * Version: $Id: utils.c,v 1.3 1997/12/16 05:37:49 ralf Exp $ + * Version: $Id: utils.c,v 1.6 1997/12/13 21:53:03 kuznet Exp $ * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5571c04c7..329807093 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: addrconf.c,v 1.38 1998/03/20 09:12:14 davem Exp $ + * $Id: addrconf.c,v 1.43 1998/07/15 05:05:32 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -53,6 +53,7 @@ #include <linux/rtnetlink.h> #include <asm/uaccess.h> +#include <asm/delay.h> /* Set to 3 to get tracing... */ #define ACONF_DEBUG 2 @@ -1033,7 +1034,7 @@ static void addrconf_dev_config(struct device *dev) struct inet6_dev * idev; if (dev->type != ARPHRD_ETHER) { - /* Alas, we support only ethernet autoconfiguration. */ + /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -1157,13 +1158,6 @@ static int addrconf_ifdown(struct device *dev, int how) start_bh_atomic(); - /* Discard multicast list */ - - if (how == 1) - ipv6_mc_destroy_dev(idev); - else - ipv6_mc_down(idev); - /* Discard address list */ idev->addr_list = NULL; @@ -1187,6 +1181,13 @@ static int addrconf_ifdown(struct device *dev, int how) } } + /* Discard multicast list */ + + if (how == 1) + ipv6_mc_destroy_dev(idev); + else + ipv6_mc_down(idev); + /* Delete device from device hash table (if unregistered) */ if (how == 1) { @@ -1608,7 +1609,7 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX-RTM_BASE+1] = { inet6_rtm_newroute, NULL, }, { inet6_rtm_delroute, NULL, }, - { NULL, inet6_dump_fib, }, + { inet6_rtm_getroute, inet6_dump_fib, }, { NULL, NULL, }, }; #endif @@ -1808,7 +1809,7 @@ __initfunc(void addrconf_init(void)) addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; add_timer(&addr_chk_timer); #ifdef CONFIG_RTNETLINK - rtnetlink_links[AF_INET6] = inet6_rtnetlink_table; + rtnetlink_links[PF_INET6] = inet6_rtnetlink_table; #endif #ifdef CONFIG_SYSCTL addrconf_sysctl.sysctl_header = @@ -1825,7 +1826,7 @@ void addrconf_cleanup(void) int i; #ifdef CONFIG_RTNETLINK - rtnetlink_links[AF_INET6] = NULL; + rtnetlink_links[PF_INET6] = NULL; #endif #ifdef CONFIG_SYSCTL addrconf_sysctl_unregister(&ipv6_devconf_dflt); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c1b2e9d14..051f9a28e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1,5 +1,5 @@ /* - * AF_INET6 socket family + * PF_INET6 socket protocol family * Linux INET6 implementation * * Authors: @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.30 1998/03/25 00:23:05 davem Exp $ + * $Id: af_inet6.c,v 1.36 1998/06/10 07:29:25 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -66,12 +66,17 @@ extern int udp6_get_info(char *, char **, off_t, int, int); extern int afinet6_get_info(char *, char **, off_t, int, int); #endif +#ifdef CONFIG_SYSCTL +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + static int inet6_create(struct socket *sock, int protocol) { struct sock *sk; struct proto *prot; - sk = sk_alloc(AF_INET6, GFP_KERNEL, 1); + sk = sk_alloc(PF_INET6, GFP_KERNEL, 1); if (sk == NULL) goto do_oom; @@ -105,7 +110,7 @@ static int inet6_create(struct socket *sock, int protocol) sk->destruct = NULL; sk->zapped = 0; - sk->family = AF_INET6; + sk->family = PF_INET6; sk->protocol = protocol; sk->prot = prot; @@ -336,7 +341,7 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } struct proto_ops inet6_stream_ops = { - AF_INET6, + PF_INET6, sock_no_dup, inet6_release, @@ -357,7 +362,7 @@ struct proto_ops inet6_stream_ops = { }; struct proto_ops inet6_dgram_ops = { - AF_INET6, + PF_INET6, sock_no_dup, inet6_release, @@ -378,7 +383,7 @@ struct proto_ops inet6_dgram_ops = { }; struct net_proto_family inet6_family_ops = { - AF_INET6, + PF_INET6, inet6_create }; @@ -412,10 +417,16 @@ static struct proc_dir_entry proc_net_sockstat6 = { #ifdef MODULE int ipv6_unload(void) { - return 0; + /* We keep internally 3 raw sockets */ + return __this_module.usecount - 3; } #endif +#if defined(MODULE) && defined(CONFIG_SYSCTL) +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + #ifdef MODULE int init_module(void) #else @@ -423,6 +434,7 @@ __initfunc(void inet6_proto_init(struct net_proto *pro)) #endif { struct sk_buff *dummy_skb; + int err; #ifdef MODULE if (!mod_member_present(&__this_module, can_unload)) @@ -443,8 +455,6 @@ __initfunc(void inet6_proto_init(struct net_proto *pro)) #endif } - (void) sock_register(&inet6_family_ops); - /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -452,19 +462,26 @@ __initfunc(void inet6_proto_init(struct net_proto *pro)) * able to communicate via both network protocols. */ - ipv6_init(); - - icmpv6_init(&inet6_family_ops); - - addrconf_init(); - - sit_init(); - - /* init v6 transport protocols */ - +#if defined(MODULE) && defined(CONFIG_SYSCTL) + ipv6_sysctl_register(); +#endif + err = icmpv6_init(&inet6_family_ops); + if (err) + goto icmp_fail; + err = ndisc_init(&inet6_family_ops); + if (err) + goto ndisc_fail; + err = igmp6_init(&inet6_family_ops); + if (err) + goto igmp_fail; + ipv6_netdev_notif_init(); + ipv6_packet_init(); + ip6_route_init(); + addrconf_init(); + sit_init(); + + /* Init v6 transport protocols. */ udpv6_init(); - /* add /proc entries here */ - tcpv6_init(); /* Create /proc/foo6 entries. */ @@ -475,22 +492,52 @@ __initfunc(void inet6_proto_init(struct net_proto *pro)) proc_net_register(&proc_net_sockstat6); #endif + /* Now the userspace is allowed to create INET6 sockets. */ + (void) sock_register(&inet6_family_ops); + #ifdef MODULE return 0; +#else + return; +#endif + +igmp_fail: + ndisc_cleanup(); +ndisc_fail: + icmpv6_cleanup(); +icmp_fail: +#if defined(MODULE) && defined(CONFIG_SYSCTL) + ipv6_sysctl_unregister(); +#endif +#ifdef MODULE + return err; +#else + return; #endif } #ifdef MODULE void cleanup_module(void) { - sit_cleanup(); - ipv6_cleanup(); - sock_unregister(AF_INET6); + /* First of all disallow new sockets creation. */ + sock_unregister(PF_INET6); #ifdef CONFIG_PROC_FS proc_net_unregister(proc_net_raw6.low_ino); proc_net_unregister(proc_net_tcp6.low_ino); proc_net_unregister(proc_net_udp6.low_ino); proc_net_unregister(proc_net_sockstat6.low_ino); #endif + /* Cleanup code parts. */ + sit_cleanup(); + ipv6_netdev_notif_cleanup(); + addrconf_cleanup(); + ip6_route_cleanup(); + ipv6_packet_cleanup(); + igmp6_cleanup(); + ndisc_cleanup(); + icmpv6_cleanup(); +#ifdef CONFIG_SYSCTL + ipv6_sysctl_unregister(); +#endif } #endif /* MODULE */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 104895936..c3b6f7b6b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: icmp.c,v 1.17 1998/05/01 10:31:41 davem Exp $ + * $Id: icmp.c,v 1.18 1998/05/07 15:42:59 davem Exp $ * * Based on net/ipv4/icmp.c * @@ -62,8 +62,7 @@ * ICMP socket for flow control. */ -struct inode icmpv6_inode; -struct socket *icmpv6_socket=&icmpv6_inode.u.socket_i; +struct socket *icmpv6_socket; int icmpv6_rcv(struct sk_buff *skb, struct device *dev, struct in6_addr *saddr, struct in6_addr *daddr, @@ -557,19 +556,23 @@ __initfunc(int icmpv6_init(struct net_proto_family *ops)) struct sock *sk; int err; - icmpv6_inode.i_mode = S_IFSOCK; - icmpv6_inode.i_sock = 1; - icmpv6_inode.i_uid = 0; - icmpv6_inode.i_gid = 0; - - icmpv6_socket->inode = &icmpv6_inode; - icmpv6_socket->state = SS_UNCONNECTED; - icmpv6_socket->type=SOCK_RAW; - - if((err=ops->create(icmpv6_socket, IPPROTO_ICMPV6))<0) { - printk(KERN_DEBUG + icmpv6_socket = sock_alloc(); + if (icmpv6_socket == NULL) { + printk(KERN_ERR "Failed to create the ICMP6 control socket.\n"); - return 1; + return -1; + } + icmpv6_socket->inode->i_uid = 0; + icmpv6_socket->inode->i_gid = 0; + icmpv6_socket->type = SOCK_RAW; + + if ((err = ops->create(icmpv6_socket, IPPROTO_ICMPV6)) < 0) { + printk(KERN_ERR + "Failed to initialize the ICMP6 control socket (err %d).\n", + err); + sock_release(icmpv6_socket); + icmpv6_socket = NULL; /* for safety */ + return err; } sk = icmpv6_socket->sk; @@ -578,18 +581,14 @@ __initfunc(int icmpv6_init(struct net_proto_family *ops)) inet6_add_protocol(&icmpv6_protocol); - ndisc_init(ops); - igmp6_init(ops); - return 0; + return 0; } void icmpv6_cleanup(void) { + sock_release(icmpv6_socket); + icmpv6_socket = NULL; /* For safety. */ inet6_del_protocol(&icmpv6_protocol); -#if 0 - ndisc_cleanup(); -#endif - igmp6_cleanup(); } static struct icmp6_err { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 693caaf3b..e7e12e3ae 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fib.c,v 1.13 1998/04/28 06:22:03 davem Exp $ + * $Id: ip6_fib.c,v 1.14 1998/05/07 15:43:03 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1077,3 +1077,10 @@ void fib6_run_gc(unsigned long dummy) ip6_fib_timer.expires = 0; } } + +#ifdef MODULE +void fib6_gc_cleanup(void) +{ + del_timer(&ip6_fib_timer); +} +#endif diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5f024dddb..6ab4d2c08 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.9 1998/04/30 16:24:24 freitag Exp $ + * $Id: ip6_input.c,v 1.10 1998/07/15 05:05:34 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -65,11 +65,6 @@ struct hdrtype_proc { /* New header structures */ -struct ipv6_tlvtype { - u8 type; - u8 len; -}; - struct tlvtype_proc { u8 type; int (*func) (struct sk_buff *, struct device *dev, __u8 *ptr, @@ -82,7 +77,7 @@ struct tlvtype_proc { {255, NULL} }; -static int ip6_dstopt_unknown(struct sk_buff *skb, struct ipv6_tlvtype *hdr) +int ip6_dstopt_unknown(struct sk_buff *skb, struct ipv6_tlvtype *hdr) { struct in6_addr *daddr; int pos; @@ -91,7 +86,7 @@ static int ip6_dstopt_unknown(struct sk_buff *skb, struct ipv6_tlvtype *hdr) * unkown destination option type */ - pos = (__u8 *) skb->h.raw - (__u8 *) skb->nh.raw; + pos = (__u8 *) hdr - (__u8 *) skb->nh.raw; /* I think this is correct please check - IPM */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index eb3984f55..aa13c2074 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_output.c,v 1.12 1998/04/11 22:11:06 davem Exp $ + * $Id: ip6_output.c,v 1.13 1998/07/15 05:05:38 davem Exp $ * * Based on linux/net/ipv4/ip_output.c * @@ -32,6 +32,7 @@ #include <net/protocol.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/rawv6.h> static u32 ipv6_fragmentation_id = 1; @@ -519,25 +520,104 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, return err; } +int ip6_call_ra_chain(struct sk_buff *skb, int sel) +{ + struct ip6_ra_chain *ra; + struct sock *last = NULL; + + for (ra = ip6_ra_chain; ra; ra = ra->next) { + struct sock *sk = ra->sk; + if (sk && ra->sel == sel) { + if (last) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + skb2->sk = last; + rawv6_rcv(skb2, skb2->dev, &skb2->nh.ipv6h->saddr, + &skb2->nh.ipv6h->daddr, NULL, skb2->len); + } + } + last = sk; + } + } + + if (last) { + skb->sk = last; + rawv6_rcv(skb, skb->dev, &skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, NULL, skb->len); + return 1; + } + return 0; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr = skb->nh.ipv6h; int size; - if (ipv6_devconf.forwarding == 0) { - kfree_skb(skb); - return -EINVAL; - } + if (ipv6_devconf.forwarding == 0) + goto drop; /* * check hop-by-hop options present */ -#if 0 - if (hdr->nexthdr == NEXTHDR_HOP) - { + /* + * Note, that NEXTHDR_HOP header must be checked + * always at the most beginning of ipv6_rcv. + * The result should be saved somewhere, but + * we do not it for now. Alas. Let's do it here. --ANK + * + * Second note: we DO NOT make any processing on + * RA packets, pushing them to user level AS IS + * without ane WARRANTY that application will able + * to interpret them. The reson is that we + * cannot make anything clever here. + * + * We are not end-node, so that if packet contains + * AH/ESP, we cannot make anything. + * Defragmentation also would be mistake, RA packets + * cannot be fragmented, because there is no warranty + * that different fragments will go along one path. --ANK + */ + if (hdr->nexthdr == NEXTHDR_HOP) { + int ra_value = -1; + u8 *ptr = (u8*)(skb->nh.ipv6h+1); + int len = (ptr[1]+1)<<3; + + if (len + sizeof(struct ipv6hdr) > skb->len) + goto drop; + + ptr += 2; + len -= 2; + while (len > 0) { + u8 *opt; + int optlen; + + if (ptr[0] == 0) { + len--; + ptr++; + continue; + } + opt = ptr; + optlen = ptr[1]+1; + + len -= optlen; + ptr += optlen; + if (len < 0) + goto drop; + + if (opt[0] == 20) { + /* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */ + if (optlen < 4) + goto drop; + ra_value = opt[2] + (opt[3]<<8); + } else if (!ip6_dstopt_unknown(skb, (struct ipv6_tlvtype*)opt)) + goto drop; + } + if (ra_value>=0 && ip6_call_ra_chain(skb, ra_value)) + return 0; } -#endif + /* * check and decrement ttl */ @@ -589,4 +669,8 @@ int ip6_forward(struct sk_buff *skb) dst->output(skb); return 0; + +drop: + kfree_skb(skb); + return -EINVAL; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ebd3365cd..b31c07c00 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.19 1998/04/30 16:24:26 freitag Exp $ + * $Id: ipv6_sockglue.c,v 1.22 1998/07/15 05:05:39 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -67,6 +67,45 @@ static struct notifier_block ipv6_dev_notf = { 0 }; +struct ip6_ra_chain *ip6_ra_chain; + +int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) +{ + struct ip6_ra_chain *ra, *new_ra, **rap; + + /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ + if (sk->type != SOCK_RAW || sk->num != IPPROTO_RAW) + return -EINVAL; + + new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + + for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + if (ra->sk == sk) { + if (sel>=0) { + if (new_ra) + kfree(new_ra); + return -EADDRINUSE; + } + *rap = ra->next; + if (ra->destructor) + ra->destructor(sk); + kfree(ra); + return 0; + } + } + if (new_ra == NULL) + return -ENOBUFS; + new_ra->sk = sk; + new_ra->sel = sel; + new_ra->destructor = destructor; + start_bh_atomic(); + new_ra->next = ra; + *rap = new_ra; + end_bh_atomic(); + return 0; +} + + int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { @@ -74,6 +113,9 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, int val, err; int retv = -ENOPROTOOPT; + if(level==SOL_IP && sk->type != SOCK_RAW) + return udp_prot.setsockopt(sk, level, optname, optval, optlen); + if(level!=SOL_IPV6) goto out; @@ -110,7 +152,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, sk->prot = &tcp_prot; tp->af_specific = &ipv4_specific; sk->socket->ops = &inet_stream_ops; - sk->family = AF_INET; + sk->family = PF_INET; } else { sk->prot = &udp_prot; sk->socket->ops = &inet_dgram_ops; @@ -197,7 +239,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + break; } + case IPV6_ROUTER_ALERT: + retv = ip6_ra_control(sk, val, NULL); + break; }; out: @@ -207,7 +253,11 @@ out: int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) { - return 0; + if(level==SOL_IP && sk->type != SOCK_RAW) + return udp_prot.getsockopt(sk, level, optname, optval, optlen); + if(level!=SOL_IPV6) + return -ENOPROTOOPT; + return -EINVAL; } #if defined(MODULE) && defined(CONFIG_SYSCTL) @@ -220,31 +270,24 @@ extern void ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); #endif -__initfunc(void ipv6_init(void)) +__initfunc(void ipv6_packet_init(void)) { dev_add_pack(&ipv6_packet_type); +} -#if defined(MODULE) && defined(CONFIG_SYSCTL) - ipv6_sysctl_register(); -#endif - +__initfunc(void ipv6_netdev_notif_init(void)) +{ register_netdevice_notifier(&ipv6_dev_notf); - - ip6_route_init(); } #ifdef MODULE -void ipv6_cleanup(void) +void ipv6_packet_cleanup(void) { - unregister_netdevice_notifier(&ipv6_dev_notf); dev_remove_pack(&ipv6_packet_type); -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -#endif - ip6_route_cleanup(); - icmpv6_cleanup(); - addrconf_cleanup(); } -#endif - +void ipv6_netdev_notif_cleanup(void) +{ + unregister_netdevice_notifier(&ipv6_dev_notf); +} +#endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0e10dcf0b..c50f37fcf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.15 1998/04/30 16:24:28 freitag Exp $ + * $Id: mcast.c,v 1.16 1998/05/07 15:43:10 davem Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -52,8 +52,7 @@ #define MDBG(x) #endif -static struct inode igmp6_inode; -static struct socket *igmp6_socket=&igmp6_inode.u.socket_i; +static struct socket *igmp6_socket; static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -598,7 +597,7 @@ done: } #endif -__initfunc(void igmp6_init(struct net_proto_family *ops)) +__initfunc(int igmp6_init(struct net_proto_family *ops)) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *ent; @@ -606,18 +605,24 @@ __initfunc(void igmp6_init(struct net_proto_family *ops)) struct sock *sk; int err; - igmp6_inode.i_mode = S_IFSOCK; - igmp6_inode.i_sock = 1; - igmp6_inode.i_uid = 0; - igmp6_inode.i_gid = 0; - - igmp6_socket->inode = &igmp6_inode; - igmp6_socket->state = SS_UNCONNECTED; + igmp6_socket = sock_alloc(); + if (igmp6_socket == NULL) { + printk(KERN_ERR + "Failed to create the IGMP6 control socket.\n"); + return -1; + } + igmp6_socket->inode->i_uid = 0; + igmp6_socket->inode->i_gid = 0; igmp6_socket->type = SOCK_RAW; - if((err=ops->create(igmp6_socket, IPPROTO_ICMPV6))<0) + if((err = ops->create(igmp6_socket, IPPROTO_ICMPV6)) < 0) { printk(KERN_DEBUG - "Failed to create the IGMP6 control socket.\n"); + "Failed to initialize the IGMP6 control socket (err %d).\n", + err); + sock_release(igmp6_socket); + igmp6_socket = NULL; /* For safety. */ + return err; + } sk = igmp6_socket->sk; sk->allocation = GFP_ATOMIC; @@ -628,11 +633,17 @@ __initfunc(void igmp6_init(struct net_proto_family *ops)) ent = create_proc_entry("net/igmp6", 0, 0); ent->read_proc = igmp6_read_proc; #endif + + return 0; } +#ifdef MODULE void igmp6_cleanup(void) { + sock_release(igmp6_socket); + igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - remove_proc_entry("net/igmp6", 0); + remove_proc_entry("net/igmp6", 0); #endif } +#endif diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e69d90332..26e42a1ed 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -74,8 +74,7 @@ #include <net/checksum.h> #include <linux/proc_fs.h> -static struct inode ndisc_inode; -static struct socket *ndisc_socket=&ndisc_inode.u.socket_i; +static struct socket *ndisc_socket; static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); @@ -1134,23 +1133,29 @@ struct proc_dir_entry ndisc_proc_entry = -__initfunc(void ndisc_init(struct net_proto_family *ops)) +__initfunc(int ndisc_init(struct net_proto_family *ops)) { struct sock *sk; int err; - ndisc_inode.i_mode = S_IFSOCK; - ndisc_inode.i_sock = 1; - ndisc_inode.i_uid = 0; - ndisc_inode.i_gid = 0; - - ndisc_socket->inode = &ndisc_inode; - ndisc_socket->state = SS_UNCONNECTED; - ndisc_socket->type = SOCK_RAW; + ndisc_socket = sock_alloc(); + if (ndisc_socket == NULL) { + printk(KERN_ERR + "Failed to create the NDISC control socket.\n"); + return -1; + } + ndisc_socket->inode->i_uid = 0; + ndisc_socket->inode->i_gid = 0; + ndisc_socket->type = SOCK_RAW; - if((err=ops->create(ndisc_socket, IPPROTO_ICMPV6))<0) + if((err = ops->create(ndisc_socket, IPPROTO_ICMPV6)) < 0) { printk(KERN_DEBUG - "Failed to create the NDISC control socket.\n"); + "Failed to initializee the NDISC control socket (err %d).\n", + err); + sock_release(ndisc_socket); + ndisc_socket = NULL; /* For safety. */ + return err; + } sk = ndisc_socket->sk; sk->allocation = GFP_ATOMIC; @@ -1174,9 +1179,10 @@ __initfunc(void ndisc_init(struct net_proto_family *ops)) #ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6"); #endif + + return 0; } -#ifdef MODULE void ndisc_cleanup(void) { #ifdef CONFIG_PROC_FS @@ -1185,5 +1191,6 @@ void ndisc_cleanup(void) #endif #endif neigh_table_clear(&nd_tbl); + sock_release(ndisc_socket); + ndisc_socket = NULL; /* For safety. */ } -#endif diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 3ec242adb..8a5ae0654 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -3,9 +3,9 @@ * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * - * AF_INET6 protocol dispatch tables. + * PF_INET6 protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.5 1997/03/18 18:24:44 davem Exp $ + * Version: $Id: protocol.c,v 1.6 1998/05/03 14:31:09 alan Exp $ * * Authors: Pedro Roque <roque@di.fc.ul.pt> * diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7429a9210..659ec59cc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.19 1998/03/20 09:12:20 davem Exp $ + * $Id: raw.c,v 1.20 1998/07/15 05:05:41 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -596,6 +596,8 @@ static void rawv6_close(struct sock *sk, unsigned long timeout) { sk->state = TCP_CLOSE; ipv6_sock_mc_close(sk); + if (sk->num == IPPROTO_RAW) + ip6_ra_control(sk, -1, NULL); sk->dead = 1; destroy_sock(sk); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3baa41007..9d159fe36 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.28 1998/04/28 06:22:04 davem Exp $ + * $Id: route.c,v 1.32 1998/07/25 23:28:52 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1722,7 +1722,6 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } - struct rt6_rtnl_dump_arg { struct sk_buff *skb; @@ -1733,6 +1732,9 @@ struct rt6_rtnl_dump_arg }; static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, + struct in6_addr *dst, + struct in6_addr *src, + int iif, int type, pid_t pid, u32 seq) { struct rtmsg *rtm; @@ -1777,10 +1779,23 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, #ifdef CONFIG_RTNL_OLD_IFINFO o = skb->tail; #endif - if (rtm->rtm_dst_len) + if (dst) { + RTA_PUT(skb, RTA_DST, 16, dst); + rtm->rtm_dst_len = 128; + } else if (rtm->rtm_dst_len) RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); - if (rtm->rtm_src_len) + if (src) { + RTA_PUT(skb, RTA_SRC, 16, src); + rtm->rtm_src_len = 128; + } else if (rtm->rtm_src_len) RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); + if (iif) + RTA_PUT(skb, RTA_IIF, 4, &iif); + else if (dst) { + struct inet6_ifaddr *ifp = ipv6_get_saddr(&rt->u.dst, dst); + if (ifp) + RTA_PUT(skb, RTA_PREFSRC, 16, &ifp->addr); + } #ifdef CONFIG_RTNL_OLD_IFINFO if (rt->u.dst.pmtu) RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu); @@ -1842,7 +1857,7 @@ static void rt6_dump_node(struct fib6_node *fn, void *p_arg) arg->count++; continue; } - if (rt6_fill_node(arg->skb, rt, RTM_NEWROUTE, + if (rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq) <= 0) { arg->stop = 1; break; @@ -1870,6 +1885,68 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + int iif = 0; + int err; + struct sk_buff *skb; + struct flowi fl; + struct rt6_info *rt; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOBUFS; + + /* Reserve room for dummy headers, this skb can pass + through good chunk of routing engine. + */ + skb->mac.raw = skb->data; + skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); + + fl.proto = 0; + fl.nl_u.ip6_u.daddr = NULL; + fl.nl_u.ip6_u.saddr = NULL; + fl.uli_u.icmpt.type = 0; + fl.uli_u.icmpt.code = 0; + if (rta[RTA_SRC-1]) + fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]); + if (rta[RTA_DST-1]) + fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]); + + if (rta[RTA_IIF-1]) + memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + + if (iif) { + struct device *dev; + dev = dev_get_by_index(iif); + if (!dev) + return -ENODEV; + } + + fl.oif = 0; + if (rta[RTA_OIF-1]) + memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + + rt = (struct rt6_info*)ip6_route_output(NULL, &fl); + + skb->dst = &rt->u.dst; + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = rt6_fill_node(skb, rt, + fl.nl_u.ip6_u.daddr, + fl.nl_u.ip6_u.saddr, + iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq); + if (err < 0) + return -EMSGSIZE; + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err < 0) + return err; + return 0; +} + void inet6_rt_notify(int event, struct rt6_info *rt) { struct sk_buff *skb; @@ -1880,7 +1957,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt) netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); return; } - if (rt6_fill_node(skb, rt, event, 0, 0) < 0) { + if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); return; @@ -2173,5 +2250,6 @@ void ip6_route_cleanup(void) netlink_detach(NETLINK_ROUTE6); #endif rt6_ifdown(NULL); + fib6_gc_cleanup(); } #endif /* MODULE */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a4a95c7c..5fa45dce5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.80 1998/05/02 12:47:15 davem Exp $ + * $Id: tcp_ipv6.c,v 1.82 1998/06/11 03:15:52 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -64,9 +64,7 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, { int hashent = (lport ^ fport); - hashent ^= (laddr->s6_addr32[0] ^ laddr->s6_addr32[1]); - hashent ^= (faddr->s6_addr32[0] ^ faddr->s6_addr32[1]); - hashent ^= (faddr->s6_addr32[2] ^ faddr->s6_addr32[3]); + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); return (hashent & ((TCP_HTABLE_SIZE/2) - 1)); } @@ -145,6 +143,13 @@ go_like_smoke: static void tcp_v6_hash(struct sock *sk) { + /* Well, I know that it is ugly... + All this ->prot, ->af_specific etc. need LARGE cleanup --ANK + */ + if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { + tcp_prot.hash(sk); + return; + } if(sk->state != TCP_CLOSE) { struct sock **skp; @@ -213,7 +218,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor hiscore=0; sk = tcp_listening_hash[tcp_lhashfn(hnum)]; for(; sk; sk = sk->next) { - if((sk->num == hnum) && (sk->family == AF_INET6)) { + if((sk->num == hnum) && (sk->family == PF_INET6)) { struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; score = 1; @@ -272,7 +277,7 @@ static inline struct sock *__tcp_v6_lookup(struct tcphdr *th, /* Must check for a TIME_WAIT'er before going to listener hash. */ for(sk = tcp_established_hash[hash+(TCP_HTABLE_SIZE/2)]; sk; sk = sk->next) { if(*((__u32 *)&(sk->dport)) == ports && - sk->family == AF_INET6) { + sk->family == PF_INET6) { struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) && !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) && @@ -415,8 +420,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; sk->backlog_rcv = tcp_v6_do_rcv; + } else { + /* Yuup... And it is not the only place... --ANK */ + ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF), + sk->saddr); + ipv6_addr_set(&np->rcv_saddr, 0, 0, __constant_htonl(0x0000FFFF), + sk->rcv_saddr); } - + return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6078ab679..2dac0570f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.27 1998/03/21 07:28:06 davem Exp $ + * $Id: udp.c,v 1.31 1998/07/15 05:05:45 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -147,7 +147,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { if((sk->num == hnum) && - (sk->family == AF_INET6) && + (sk->family == PF_INET6) && !(sk->dead && (sk->state == TCP_CLOSE))) { struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; int score = 0; @@ -185,12 +185,18 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; struct in6_addr *daddr; struct dst_entry *dst; - struct ipv6_pinfo *np; struct inet6_ifaddr *ifa; struct flowi fl; int addr_type; + int err; + + if (usin->sin6_family == AF_INET) { + err = udp_connect(sk, uaddr, addr_len); + goto ipv4_connected; + } if (addr_len < sizeof(*usin)) return(-EINVAL); @@ -199,7 +205,6 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return(-EAFNOSUPPORT); addr_type = ipv6_addr_type(&usin->sin6_addr); - np = &sk->net_pinfo.af_inet6; if (addr_type == IPV6_ADDR_ANY) { /* @@ -212,18 +217,21 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_type == IPV6_ADDR_MAPPED) { struct sockaddr_in sin; - int err; sin.sin_family = AF_INET; sin.sin_addr.s_addr = daddr->s6_addr32[3]; + sin.sin_port = usin->sin6_port; err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin)); - + +ipv4_connected: if (err < 0) return err; - ipv6_addr_copy(&np->daddr, daddr); - + ipv6_addr_set(&np->daddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->daddr); + if(ipv6_addr_any(&np->saddr)) { ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000ffff), @@ -236,7 +244,7 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) __constant_htonl(0x0000ffff), sk->rcv_saddr); } - + return 0; } ipv6_addr_copy(&np->daddr, daddr); @@ -347,6 +355,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (skb->protocol == __constant_htons(ETH_P_IP)) { ipv6_addr_set(&sin6->sin6_addr, 0, 0, __constant_htonl(0xffff), skb->nh.iph->saddr); + if (sk->ip_cmsg_flags) + ip_cmsg_recv(msg, skb); } else { memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr, sizeof(struct in6_addr)); @@ -668,6 +678,9 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) return(-EINVAL); if (sin6) { + if (sin6->sin6_family == AF_INET) + return udp_sendmsg(sk, msg, ulen); + if (addr_len < sizeof(*sin6)) return(-EINVAL); @@ -689,7 +702,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) } } else { if (sk->state != TCP_ESTABLISHED) - return(-EINVAL); + return(-ENOTCONN); udh.uh.dest = sk->dport; daddr = &sk->net_pinfo.af_inet6.daddr; @@ -702,8 +715,10 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) sin.sin_family = AF_INET; sin.sin_addr.s_addr = daddr->s6_addr32[3]; + sin.sin_port = udh.uh.dest; + msg->msg_name = (struct sockaddr *)(&sin); - return udp_sendmsg(sk, msg, len); + return udp_sendmsg(sk, msg, ulen); } udh.daddr = NULL; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f035e8c62..85aaaa7b8 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -11,32 +11,32 @@ * work I am currently employed to do there. * * All the material in this file is subject to the Gnu license version 2. - * Neither Alan Cox nor the Swansea University Computer Society admit liability - * nor provide warranty for any of this software. This material is provided - * as is and at no charge. + * Neither Alan Cox nor the Swansea University Computer Society admit + * liability nor provide warranty for any of this software. This material + * is provided as is and at no charge. * * Revision 0.21: Uses the new generic socket option code. * Revision 0.22: Gcc clean ups and drop out device registration. Use the * new multi-protocol edition of hard_header - * Revision 0.23: IPX /proc by Mark Evans. - * Adding a route will overwrite any existing route to the same - * network. + * Revision 0.23: IPX /proc by Mark Evans. Adding a route will + * will overwrite any existing route to the same network. * Revision 0.24: Supports new /proc with no 4K limit * Revision 0.25: Add ephemeral sockets, passive local network * identification, support for local net 0 and * multiple datalinks <Greg Page> - * Revision 0.26: Device drop kills IPX routes via it. (needed for modules) + * Revision 0.26: Device drop kills IPX routes via it. (needed for module) * Revision 0.27: Autobind <Mark Evans> * Revision 0.28: Small fix for multiple local networks <Thomas Winder> * Revision 0.29: Assorted major errors removed <Mark Evans> * Small correction to promisc mode error fix <Alan Cox> - * Asynchronous I/O support. - * Changed to use notifiers and the newer packet_type stuff. - * Assorted major fixes <Alejandro Liu> + * Asynchronous I/O support. Changed to use notifiers + * and the newer packet_type stuff. Assorted major + * fixes <Alejandro Liu> * Revision 0.30: Moved to net/ipx/... <Alan Cox> * Don't set address length on recvfrom that errors. * Incorrect verify_area. - * Revision 0.31: New sk_buffs. This still needs a lot of testing. <Alan Cox> + * Revision 0.31: New sk_buffs. This still needs a lot of + * testing. <Alan Cox> * Revision 0.32: Using sock_alloc_send_skb, firewall hooks. <Alan Cox> * Supports sendmsg/recvmsg * Revision 0.33: Internal network support, routing changes, uses a @@ -47,8 +47,9 @@ * Revision 0.36: Internal bump up for 2.1 * Revision 0.37: Began adding POSIXisms. * Revision 0.38: Asynchronous socket stuff made current. - * Revision 0.39: SPX interfaces - * Revision 0.40: Tiny SIOCGSTAMP fix (chris@cybernet.co.nz) + * Revision 0.39: SPX interfaces + * Revision 0.40: Tiny SIOCGSTAMP fix (chris@cybernet.co.nz) + * Revision 0.41: 802.2TR removed (p.norton@computer.org) * * Protect the module by a MOD_INC_USE_COUNT/MOD_DEC_USE_COUNT * pair. Also, now usage count is managed this way @@ -65,6 +66,7 @@ */ #include <linux/config.h> +#if defined (CONFIG_IPX) || defined (CONFIG_IPX_MODULE) #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> @@ -88,12 +90,12 @@ #include <linux/termios.h> /* For TIOCOUTQ/INQ */ #include <linux/interrupt.h> #include <net/p8022.h> -#include <net/p8022tr.h> #include <net/psnap.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/firewall.h> #include <linux/init.h> +#include <linux/if_arp.h> #ifdef MODULE static void ipx_proto_finito(void); @@ -105,44 +107,42 @@ static char ipxcfg_auto_select_primary = 0; static char ipxcfg_auto_create_interfaces = 0; /* Global Variables */ -static struct datalink_proto *p8022_datalink = NULL; -static struct datalink_proto *p8022tr_datalink = NULL; -static struct datalink_proto *pEII_datalink = NULL; -static struct datalink_proto *p8023_datalink = NULL; -static struct datalink_proto *pSNAP_datalink = NULL; +static struct datalink_proto *p8022_datalink = NULL; +static struct datalink_proto *pEII_datalink = NULL; +static struct datalink_proto *p8023_datalink = NULL; +static struct datalink_proto *pSNAP_datalink = NULL; static struct proto_ops ipx_dgram_ops; static struct net_proto_family *spx_family_ops; -static ipx_route *ipx_routes = NULL; -static ipx_interface *ipx_interfaces = NULL; -static ipx_interface *ipx_primary_net = NULL; -static ipx_interface *ipx_internal_net = NULL; +static ipx_route *ipx_routes = NULL; +static ipx_interface *ipx_interfaces = NULL; +static ipx_interface *ipx_primary_net = NULL; +static ipx_interface *ipx_internal_net = NULL; static int ipxcfg_set_auto_create(char val) { - if (ipxcfg_auto_create_interfaces != val) + if(ipxcfg_auto_create_interfaces != val) { - if (val) - { + if(val) MOD_INC_USE_COUNT; - } else - { MOD_DEC_USE_COUNT; - } + ipxcfg_auto_create_interfaces = val; } - return 0; + + return (0); } static int ipxcfg_set_auto_select(char val) { ipxcfg_auto_select_primary = val; - if (val && (ipx_primary_net == NULL)) + if(val && (ipx_primary_net == NULL)) ipx_primary_net = ipx_interfaces; - return 0; + + return (0); } static int ipxcfg_get_config_data(ipx_config_data *arg) @@ -151,84 +151,86 @@ static int ipxcfg_get_config_data(ipx_config_data *arg) vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces; vals.ipxcfg_auto_select_primary = ipxcfg_auto_select_primary; - return copy_to_user(arg, &vals, sizeof(vals)) ? -EFAULT : 0; -} + return (copy_to_user(arg, &vals, sizeof(vals)) ? -EFAULT : 0); +} -/***********************************************************************************************************************\ -* * -* Handlers for the socket list. * -* * -\***********************************************************************************************************************/ +/**************************************************************************\ +* * +* Handlers for the socket list. * +* * +\**************************************************************************/ /* - * Note: Sockets may not be removed _during_ an interrupt or inet_bh - * handler using this technique. They can be added although we do not - * use this facility. + * Note: Sockets may not be removed _during_ an interrupt or inet_bh + * handler using this technique. They can be added although we do not + * use this facility. */ void ipx_remove_socket(struct sock *sk) { - struct sock *s; - ipx_interface *intrfc; - unsigned long flags; + struct sock *s; + ipx_interface *intrfc; + unsigned long flags; save_flags(flags); cli(); /* Determine interface with which socket is associated */ intrfc = sk->protinfo.af_ipx.intrfc; - if (intrfc == NULL) { + if(intrfc == NULL) + { restore_flags(flags); return; } - s=intrfc->if_sklist; - if(s==sk) { - intrfc->if_sklist=s->next; + s = intrfc->if_sklist; + if(s == sk) + { + intrfc->if_sklist = s->next; restore_flags(flags); return; } - while(s && s->next) { - if(s->next==sk) { - s->next=sk->next; + while(s && s->next) + { + if(s->next == sk) + { + s->next = sk->next; restore_flags(flags); return; } - s=s->next; + s = s->next; } restore_flags(flags); } /* - * This is only called from user mode. Thus it protects itself against - * interrupt users but doesn't worry about being called during work. - * Once it is removed from the queue no interrupt or bottom half will - * touch it and we are (fairly 8-) ) safe. + * This is only called from user mode. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. */ - static void ipx_destroy_socket(struct sock *sk) { struct sk_buff *skb; ipx_remove_socket(sk); - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { + while((skb = skb_dequeue(&sk->receive_queue)) != NULL) kfree_skb(skb); - } sk_free(sk); } -/* The following code is used to support IPX Interfaces (IPXITF). An +/* + * The following code is used to support IPX Interfaces (IPXITF). An * IPX interface is defined by a physical device and a frame type. */ - static ipx_route * ipxrtr_lookup(__u32); static void ipxitf_clear_primary_net(void) { - if (ipxcfg_auto_select_primary && (ipx_interfaces != NULL)) + if(ipxcfg_auto_select_primary && (ipx_interfaces != NULL)) ipx_primary_net = ipx_interfaces; else ipx_primary_net = NULL; @@ -238,36 +240,38 @@ static ipx_interface *ipxitf_find_using_phys(struct device *dev, unsigned short { ipx_interface *i; - for (i=ipx_interfaces; - i && ((i->if_dev!=dev) || (i->if_dlink_type!=datalink)); - i=i->if_next) + for(i = ipx_interfaces; + i && ((i->if_dev != dev) || (i->if_dlink_type != datalink)); + i = i->if_next) ; - return i; + + return (i); } static ipx_interface *ipxitf_find_using_net(__u32 net) { ipx_interface *i; - if (!net) - return ipx_primary_net; + if(!net) + return (ipx_primary_net); - for (i=ipx_interfaces; i && (i->if_netnum!=net); i=i->if_next) + for(i = ipx_interfaces; i && (i->if_netnum != net); i = i->if_next) ; - return i; + return (i); } /* Sockets are bound to a particular IPX interface. */ static void ipxitf_insert_socket(ipx_interface *intrfc, struct sock *sk) { - struct sock *s; + struct sock *s; sk->protinfo.af_ipx.intrfc = intrfc; sk->next = NULL; - if (intrfc->if_sklist == NULL) { + if(intrfc->if_sklist == NULL) intrfc->if_sklist = sk; - } else { + else + { for (s = intrfc->if_sklist; s->next != NULL; s = s->next) ; s->next = sk; @@ -276,14 +280,14 @@ static void ipxitf_insert_socket(ipx_interface *intrfc, struct sock *sk) static struct sock *ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) { - struct sock *s; + struct sock *s; - for (s=intrfc->if_sklist; + for(s = intrfc->if_sklist; (s != NULL) && (s->protinfo.af_ipx.port != port); - s=s->next) + s = s->next) ; - return s; + return (s); } #ifdef CONFIG_IPX_INTERN @@ -293,16 +297,17 @@ static struct sock *ipxitf_find_internal_socket(ipx_interface *intrfc, { struct sock *s = intrfc->if_sklist; - while (s != NULL) + while(s != NULL) { - if ( (s->protinfo.af_ipx.port == port) + if((s->protinfo.af_ipx.port == port) && (memcmp(node, s->protinfo.af_ipx.node, IPX_NODE_LEN) == 0)) { break; } s = s->next; } - return s; + + return (s); } #endif @@ -310,14 +315,15 @@ static void ipxrtr_del_routes(ipx_interface *); static void ipxitf_down(ipx_interface *intrfc) { - ipx_interface *i; - struct sock *s, *t; + ipx_interface *i; + struct sock *s, *t; /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); /* error sockets */ - for (s = intrfc->if_sklist; s != NULL; ) { + for(s = intrfc->if_sklist; s != NULL; ) + { s->err = ENOLINK; s->error_report(s); s->protinfo.af_ipx.intrfc = NULL; @@ -330,28 +336,27 @@ static void ipxitf_down(ipx_interface *intrfc) intrfc->if_sklist = NULL; /* remove this interface from list */ - if (intrfc == ipx_interfaces) { + if(intrfc == ipx_interfaces) ipx_interfaces = intrfc->if_next; - } else { - for (i = ipx_interfaces; + else + { + for(i = ipx_interfaces; (i != NULL) && (i->if_next != intrfc); i = i->if_next) ; - if ((i != NULL) && (i->if_next == intrfc)) + if((i != NULL) && (i->if_next == intrfc)) i->if_next = intrfc->if_next; } /* remove this interface from *special* networks */ - if (intrfc == ipx_primary_net) + if(intrfc == ipx_primary_net) ipxitf_clear_primary_net(); - if (intrfc == ipx_internal_net) + if(intrfc == ipx_internal_net) ipx_internal_net = NULL; kfree_s(intrfc, sizeof(*intrfc)); - /* sockets still dangling - * - must be closed from user space - */ MOD_DEC_USE_COUNT; + return; } @@ -360,30 +365,29 @@ static int ipxitf_device_event(struct notifier_block *notifier, unsigned long ev struct device *dev = ptr; ipx_interface *i, *tmp; - if(event!=NETDEV_DOWN) + if(event != NETDEV_DOWN) return NOTIFY_DONE; - for (i = ipx_interfaces; i != NULL; ) { - + for(i = ipx_interfaces; i != NULL;) + { tmp = i->if_next; - if (i->if_dev == dev) + if(i->if_dev == dev) ipxitf_down(i); i = tmp; } - return NOTIFY_DONE; + return (NOTIFY_DONE); } static int ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb) { - int retval; + int retval; - if((retval = sock_queue_rcv_skb(sock, skb))<0) - { + if((retval = sock_queue_rcv_skb(sock, skb)) < 0) kfree_skb(skb); - } - return retval; + + return (retval); } /* @@ -393,25 +397,25 @@ static int ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb) #ifdef CONFIG_IPX_INTERN static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int copy) { - struct ipxhdr *ipx = skb->nh.ipxh; - struct sock *s; + struct ipxhdr *ipx = skb->nh.ipxh; + struct sock *s; int is_broadcast = (memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN) == 0); s = intrfc->if_sklist; - while (s != NULL) + while(s != NULL) { - if ( (s->protinfo.af_ipx.port == ipx->ipx_dest.sock) - && ( is_broadcast + if((s->protinfo.af_ipx.port == ipx->ipx_dest.sock) + && (is_broadcast || (memcmp(ipx->ipx_dest.node, s->protinfo.af_ipx.node, IPX_NODE_LEN) == 0))) { /* We found a socket to which to send */ struct sk_buff *skb1; - if (copy != 0) + if(copy != 0) { skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1 == NULL) @@ -424,57 +428,50 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c } ipxitf_def_skb_handler(s, skb1); - if (intrfc != ipx_internal_net) - { - /* on an external interface, at most - * one socket can listen. - */ + /* On an external interface, one socket can listen */ + if(intrfc != ipx_internal_net) break; - } } s = s->next; } - if (copy == 0) - { - /* skb was solely for us, and we did not make a copy, - * so free it. - */ + /* skb was solely for us, and we did not make a copy, so free it. */ + if(copy == 0) kfree_skb(skb); - } - return 0; + + return (0); } #else static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int copy) { - struct ipxhdr *ipx = skb->nh.ipxh; - struct sock *sock1 = NULL, *sock2 = NULL; - struct sk_buff *skb1 = NULL, *skb2 = NULL; + struct ipxhdr *ipx = skb->nh.ipxh; + struct sock *sock1 = NULL, *sock2 = NULL; + struct sk_buff *skb1 = NULL, *skb2 = NULL; sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock); /* - * We need to check if there is a primary net and if - * this is addressed to one of the *SPECIAL* sockets because - * these need to be propagated to the primary net. - * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and - * 0x456(Diagnostic). + * We need to check if there is a primary net and if + * this is addressed to one of the *SPECIAL* sockets because + * these need to be propagated to the primary net. + * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and + * 0x456(Diagnostic). */ - if (ipx_primary_net && (intrfc != ipx_primary_net)) + if(ipx_primary_net && (intrfc != ipx_primary_net)) { - switch (ntohs(ipx->ipx_dest.sock)) + switch(ntohs(ipx->ipx_dest.sock)) { case 0x452: case 0x453: case 0x456: /* - * The appropriate thing to do here is to - * dup the packet and route to the primary net - * interface via ipxitf_send; however, we'll cheat - * and just demux it here. + * The appropriate thing to do here is to + * dup the packet and route to the primary net + * interface via ipxitf_send; however, we'll + * cheat and just demux it here. */ sock2 = ipxitf_find_socket(ipx_primary_net, ipx->ipx_dest.sock); @@ -485,15 +482,13 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c } /* - * if there is nothing to do, return. The kfree will - * cancel any charging. + * If there is nothing to do return. The kfree will cancel any charging. */ - - if (sock1 == NULL && sock2 == NULL) + if(sock1 == NULL && sock2 == NULL) { - if (!copy) + if(!copy) kfree_skb(skb); - return 0; + return (0); } /* @@ -504,191 +499,162 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c * copies, we do as much as is possible. */ - if (copy) - { + if(copy) skb1 = skb_clone(skb, GFP_ATOMIC); - } else - { skb1 = skb; - } - if (skb1 == NULL) - return -ENOMEM; - - /* - * Do we need 2 SKBs? - */ + if(skb1 == NULL) + return (-ENOMEM); - if (sock1 && sock2) - { + /* Do we need 2 SKBs? */ + if(sock1 && sock2) skb2 = skb_clone(skb1, GFP_ATOMIC); - } else skb2 = skb1; - if (sock1) + if(sock1) (void) ipxitf_def_skb_handler(sock1, skb1); - if (skb2 == NULL) - return -ENOMEM; + if(skb2 == NULL) + return (-ENOMEM); - if (sock2) + if(sock2) (void) ipxitf_def_skb_handler(sock2, skb2); - return 0; + return (0); } -#endif +#endif /* CONFIG_IPX_INTERN */ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buff *skb) { - struct sk_buff *skb2; - int in_offset = skb->h.raw - skb->head; - int out_offset = intrfc->if_ipx_offset; - int len; + struct sk_buff *skb2; + int in_offset = skb->h.raw - skb->head; + int out_offset = intrfc->if_ipx_offset; + int len; /* Hopefully, most cases */ - if (in_offset >= out_offset) { - return skb; - } + if(in_offset >= out_offset) + return (skb); /* Need new SKB */ - len = skb->len + out_offset; + len = skb->len + out_offset; skb2 = alloc_skb(len, GFP_ATOMIC); - if (skb2 != NULL) { - skb_reserve(skb2,out_offset); - skb2->nh.raw= - skb2->h.raw=skb_put(skb2,skb->len); + if(skb2 != NULL) + { + skb_reserve(skb2, out_offset); + skb2->nh.raw = + skb2->h.raw = skb_put(skb2,skb->len); memcpy(skb2->h.raw, skb->h.raw, skb->len); } kfree_skb(skb); - return NULL; + + return (NULL); } static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node) { - struct ipxhdr *ipx = skb->nh.ipxh; - struct device *dev = intrfc->if_dev; - struct datalink_proto *dl = intrfc->if_dlink; - char dest_node[IPX_NODE_LEN]; - int send_to_wire = 1; - int addr_len; + struct ipxhdr *ipx = skb->nh.ipxh; + struct device *dev = intrfc->if_dev; + struct datalink_proto *dl = intrfc->if_dlink; + char dest_node[IPX_NODE_LEN]; + int send_to_wire = 1; + int addr_len; /* - * We need to know how many skbuffs it will take to send out this - * packet to avoid unnecessary copies. + * We need to know how many skbuffs it will take to send out this + * packet to avoid unnecessary copies. */ - if ((dl == NULL) || (dev == NULL) || (dev->flags & IFF_LOOPBACK)) + if((dl == NULL) || (dev == NULL) || (dev->flags & IFF_LOOPBACK)) send_to_wire = 0; /* No non looped */ /* - * See if this should be demuxed to sockets on this interface + * See if this should be demuxed to sockets on this interface * - * We want to ensure the original was eaten or that we only use - * up clones. + * We want to ensure the original was eaten or that we only use + * up clones. */ - if (ipx->ipx_dest.net == intrfc->if_netnum) + if(ipx->ipx_dest.net == intrfc->if_netnum) { /* - * To our own node, loop and free the original. - * The internal net will receive on all node address. + * To our own node, loop and free the original. + * The internal net will receive on all node address. */ - if ((intrfc == ipx_internal_net) + if((intrfc == ipx_internal_net) || memcmp(intrfc->if_node, node, IPX_NODE_LEN) == 0) { - /* - * Don't charge sender - */ + /* Don't charge sender */ skb_orphan(skb); - /* - * Will charge receiver - */ - return ipxitf_demux_socket(intrfc, skb, 0); + + /* Will charge receiver */ + return (ipxitf_demux_socket(intrfc, skb, 0)); } - /* - * Broadcast, loop and possibly keep to send on. - */ - if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) + + /* Broadcast, loop and possibly keep to send on. */ + if(memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) { - if (!send_to_wire) + if(!send_to_wire) skb_orphan(skb); ipxitf_demux_socket(intrfc, skb, send_to_wire); - if (!send_to_wire) - return 0; + if(!send_to_wire) + return (0); } } /* - * If the originating net is not equal to our net; this is routed - * We are still charging the sender. Which is right - the driver - * free will handle this fairly. + * If the originating net is not equal to our net; this is routed + * We are still charging the sender. Which is right - the driver + * free will handle this fairly. */ - - if (ipx->ipx_source.net != intrfc->if_netnum) + if(ipx->ipx_source.net != intrfc->if_netnum) { /* - * Unshare the buffer before modifying the count in - * case its a flood or tcpdump + * Unshare the buffer before modifying the count in + * case its a flood or tcpdump */ - skb=skb_unshare(skb, GFP_ATOMIC); + skb = skb_unshare(skb, GFP_ATOMIC); if(!skb) - return 0; - ipx = skb->nh.ipxh; - if (++(ipx->ipx_tctrl) > ipxcfg_max_hops) + return (0); + if(++(ipx->ipx_tctrl) > ipxcfg_max_hops) send_to_wire = 0; } - if (!send_to_wire) + if(!send_to_wire) { kfree_skb(skb); - return 0; + return (0); } - /* - * Determine the appropriate hardware address - */ - + /* Determine the appropriate hardware address */ addr_len = dev->addr_len; - if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) + if(memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) memcpy(dest_node, dev->broadcast, addr_len); else memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len); - /* - * Make any compensation for differing physical/data link size - */ - + /* Make any compensation for differing physical/data link size */ skb = ipxitf_adjust_skbuff(intrfc, skb); - if (skb == NULL) - return 0; + if(skb == NULL) + return (0); /* set up data link and physical headers */ skb->dev = dev; skb->protocol = htons(ETH_P_IPX); dl->datalink_header(dl, skb, dest_node); -#if 0 - /* - * Now log the packet just before transmission - */ - - dump_pkt("IPX snd:", skb->nh.ipxh); - dump_data("ETH hdr:", skb->mac.raw, skb->nh.raw - skb->mac.raw); -#endif - /* - * Send it out - */ + /* Send it out */ dev_queue_xmit(skb); - return 0; + + return (0); } static int ipxrtr_add_route(__u32, ipx_interface *, unsigned char *); static int ipxitf_add_local_route(ipx_interface *intrfc) { - return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL); + return (ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL)); } static const char * ipx_frame_name(unsigned short); @@ -699,26 +665,23 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) struct ipxhdr *ipx = skb->nh.ipxh; ipx_interface *i; - /* - * We firewall first, ask questions later. - */ - + /* We firewall first, ask questions later. */ if (call_in_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT) { kfree_skb(skb); - return 0; + return (0); } /* See if we should update our network number */ - if ( !intrfc->if_netnum && /* net number of intrfc not known yet (== 0) */ - (ipx->ipx_source.net == ipx->ipx_dest.net) && /* intra-net packet */ - ipx->ipx_source.net) /* source net number of packet != 0 */ + if(!intrfc->if_netnum /* net number of intrfc not known yet (== 0) */ + && (ipx->ipx_source.net == ipx->ipx_dest.net) /* intra packet */ + && ipx->ipx_source.net) /* source net number of packet != 0 */ { /* NB: NetWare servers lie about their hop count so we * dropped the test based on it. This is the best way * to determine this is a 0 hop count packet. */ - if ((i=ipxitf_find_using_net(ipx->ipx_source.net))==NULL) + if((i=ipxitf_find_using_net(ipx->ipx_source.net)) == NULL) { intrfc->if_netnum = ipx->ipx_source.net; (void) ipxitf_add_local_route(intrfc); @@ -734,7 +697,9 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) } } - if( ipx->ipx_type == IPX_TYPE_PPROP && ipx->ipx_tctrl < 8 && skb->pkt_type != PACKET_OTHERHOST ) + if(ipx->ipx_type == IPX_TYPE_PPROP + && ipx->ipx_tctrl < 8 + && skb->pkt_type != PACKET_OTHERHOST) { int i; ipx_interface *ifcs; @@ -743,235 +708,247 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) char *c; c = (char *) skb->data; - c += sizeof( struct ipxhdr ); + c += sizeof(struct ipxhdr); l = (__u32 *) c; i = 0; - /* - * Dump packet if too many hops or already seen this net - */ - if( ipx->ipx_tctrl < 8 ) - for( ; i < ipx->ipx_tctrl ; i++ ) - if( *l++ == intrfc->if_netnum ) - break; - if( i == ipx->ipx_tctrl ) + /* Dump packet if too many hops or already seen this net */ + if(ipx->ipx_tctrl < 8) + for( ; i < ipx->ipx_tctrl; i++) + if(*l++ == intrfc->if_netnum) + break; + + if(i == ipx->ipx_tctrl) { /* < 8 hops && input itfc not in list */ *l = intrfc->if_netnum; /* insert recvd netnum into list */ /* xmit on all other interfaces... */ - for ( ifcs = ipx_interfaces; ifcs != NULL ; ifcs = ifcs->if_next) + for(ifcs = ipx_interfaces; ifcs != NULL; ifcs = ifcs->if_next) { /* That aren't in the list */ l = (__u32 *) c; - for( i = 0 ; i <= ipx->ipx_tctrl ; i++ ) - if( ifcs->if_netnum == *l++ ) + for(i = 0; i <= ipx->ipx_tctrl; i++) + if(ifcs->if_netnum == *l++) break; - if( i - 1 == ipx->ipx_tctrl ) + if(i - 1 == ipx->ipx_tctrl) { ipx->ipx_dest.net = ifcs->if_netnum; /* See if we are allowed to firewall forward */ - if (call_fw_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)==FW_ACCEPT) + if(call_fw_firewall(PF_IPX, skb->dev, ipx, NULL, &skb) == FW_ACCEPT) { - skb2 = skb_clone(skb, GFP_ATOMIC); + skb2=skb_clone(skb, GFP_ATOMIC); ipxrtr_route_skb(skb2); } } } - /* - * Reset network number in packet - */ + + /* Reset network number in packet */ ipx->ipx_dest.net = intrfc->if_netnum; } - } - if (!ipx->ipx_dest.net) + if(!ipx->ipx_dest.net) ipx->ipx_dest.net = intrfc->if_netnum; - if (!ipx->ipx_source.net) + if(!ipx->ipx_source.net) ipx->ipx_source.net = intrfc->if_netnum; - if (intrfc->if_netnum != ipx->ipx_dest.net) + if(intrfc->if_netnum != ipx->ipx_dest.net) { - /* - * See if we are allowed to firewall forward - */ - if (call_fw_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT) + /* See if we are allowed to firewall forward */ + if(call_fw_firewall(PF_IPX, skb->dev, ipx, NULL, &skb) != FW_ACCEPT) { kfree_skb(skb); - return 0; + return (0); } /* We only route point-to-point packets. */ - if (skb->pkt_type == PACKET_HOST) + if(skb->pkt_type == PACKET_HOST) { skb=skb_unshare(skb, GFP_ATOMIC); if(skb) - return ipxrtr_route_skb(skb); + return (ipxrtr_route_skb(skb)); else - return 0; + return (0); } + kfree_skb(skb); - return 0; + return (0); } /* see if we should keep it */ - if ((memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0) + if((memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0) || (memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0)) { - return ipxitf_demux_socket(intrfc, skb, 0); + return (ipxitf_demux_socket(intrfc, skb, 0)); } /* we couldn't pawn it off so unload it */ kfree_skb(skb); - return 0; + return (0); } static void ipxitf_insert(ipx_interface *intrfc) { - ipx_interface *i; + ipx_interface *i; intrfc->if_next = NULL; - if (ipx_interfaces == NULL) { + if(ipx_interfaces == NULL) ipx_interfaces = intrfc; - } else { - for (i = ipx_interfaces; i->if_next != NULL; i = i->if_next) + else + { + for(i = ipx_interfaces; i->if_next != NULL; i = i->if_next) ; i->if_next = intrfc; } - if (ipxcfg_auto_select_primary && (ipx_primary_net == NULL)) + if(ipxcfg_auto_select_primary && (ipx_primary_net == NULL)) ipx_primary_net = intrfc; + MOD_INC_USE_COUNT; + return; } static int ipxitf_create_internal(ipx_interface_definition *idef) { - ipx_interface *intrfc; + ipx_interface *intrfc; /* Only one primary network allowed */ - if (ipx_primary_net != NULL) - return -EEXIST; + if(ipx_primary_net != NULL) + return (-EEXIST); /* Must have a valid network number */ - if (!idef->ipx_network) - return -EADDRNOTAVAIL; - if (ipxitf_find_using_net(idef->ipx_network) != NULL) - return -EADDRINUSE; - - intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); - if (intrfc==NULL) - return -EAGAIN; - intrfc->if_dev=NULL; - intrfc->if_netnum=idef->ipx_network; - intrfc->if_dlink_type = 0; - intrfc->if_dlink = NULL; - intrfc->if_sklist = NULL; - intrfc->if_internal = 1; - intrfc->if_ipx_offset = 0; - intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + if(!idef->ipx_network) + return (-EADDRNOTAVAIL); + if(ipxitf_find_using_net(idef->ipx_network) != NULL) + return (-EADDRINUSE); + + intrfc = (ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if(intrfc == NULL) + return (-EAGAIN); + intrfc->if_dev = NULL; + intrfc->if_netnum = idef->ipx_network; + intrfc->if_dlink_type = 0; + intrfc->if_dlink = NULL; + intrfc->if_sklist = NULL; + intrfc->if_internal = 1; + intrfc->if_ipx_offset = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN); - ipx_internal_net = intrfc; - ipx_primary_net = intrfc; + ipx_internal_net = intrfc; + ipx_primary_net = intrfc; ipxitf_insert(intrfc); - return ipxitf_add_local_route(intrfc); + + return (ipxitf_add_local_route(intrfc)); } static int ipx_map_frame_type(unsigned char type) { - switch (type) + switch(type) { case IPX_FRAME_ETHERII: - return htons(ETH_P_IPX); + return (htons(ETH_P_IPX)); + case IPX_FRAME_8022: - return htons(ETH_P_802_2); - case IPX_FRAME_TR_8022: - return htons(ETH_P_TR_802_2); + return (htons(ETH_P_802_2)); + case IPX_FRAME_SNAP: - return htons(ETH_P_SNAP); + return (htons(ETH_P_SNAP)); + case IPX_FRAME_8023: - return htons(ETH_P_802_3); + return (htons(ETH_P_802_3)); } - return 0; + + return (0); } static int ipxitf_create(ipx_interface_definition *idef) { - struct device *dev; - unsigned short dlink_type = 0; - struct datalink_proto *datalink = NULL; - ipx_interface *intrfc; + struct device *dev; + unsigned short dlink_type = 0; + struct datalink_proto *datalink = NULL; + ipx_interface *intrfc; + + if(idef->ipx_special == IPX_INTERNAL) + return (ipxitf_create_internal(idef)); - if (idef->ipx_special == IPX_INTERNAL) - return ipxitf_create_internal(idef); + if((idef->ipx_special == IPX_PRIMARY) && (ipx_primary_net != NULL)) + return (-EEXIST); - if ((idef->ipx_special == IPX_PRIMARY) && (ipx_primary_net != NULL)) - return -EEXIST; + if(idef->ipx_network + && (ipxitf_find_using_net(idef->ipx_network) != NULL)) + return (-EADDRINUSE); - if (idef->ipx_network && - (ipxitf_find_using_net(idef->ipx_network) != NULL)) - return -EADDRINUSE; + dev = dev_get(idef->ipx_device); + if(dev == NULL) + return (-ENODEV); - switch (idef->ipx_dlink_type) + switch(idef->ipx_dlink_type) { - case IPX_FRAME_ETHERII: - dlink_type = htons(ETH_P_IPX); - datalink = pEII_datalink; - break; case IPX_FRAME_TR_8022: - dlink_type = htons(ETH_P_TR_802_2); - datalink = p8022tr_datalink; - break; + printk("IPX frame type 802.2TR is obsolete. Use 802.2 instead.\n"); + /* fall through */ + case IPX_FRAME_8022: - dlink_type = htons(ETH_P_802_2); - datalink = p8022_datalink; + dlink_type = htons(ETH_P_802_2); + datalink = p8022_datalink; break; + + case IPX_FRAME_ETHERII: + if (dev->type != ARPHRD_IEEE802) + { + dlink_type = htons(ETH_P_IPX); + datalink = pEII_datalink; + break; + } + else + printk("IPX frame type EtherII over token-ring is obsolete. Use SNAP instead.\n"); + /* fall through */ + case IPX_FRAME_SNAP: - dlink_type = htons(ETH_P_SNAP); - datalink = pSNAP_datalink; + dlink_type = htons(ETH_P_SNAP); + datalink = pSNAP_datalink; break; + case IPX_FRAME_8023: - dlink_type = htons(ETH_P_802_3); - datalink = p8023_datalink; + dlink_type = htons(ETH_P_802_3); + datalink = p8023_datalink; break; + case IPX_FRAME_NONE: default: break; } - if (datalink == NULL) - return -EPROTONOSUPPORT; - - dev=dev_get(idef->ipx_device); - if (dev==NULL) - return -ENODEV; - - if (!(dev->flags & IFF_UP)) - return -ENETDOWN; + if(!(dev->flags & IFF_UP)) + return (-ENETDOWN); /* Check addresses are suitable */ - if(dev->addr_len>IPX_NODE_LEN) - return -EINVAL; + if(dev->addr_len > IPX_NODE_LEN) + return (-EINVAL); + + if(datalink == NULL) + return (-EPROTONOSUPPORT); - if ((intrfc = ipxitf_find_using_phys(dev, dlink_type)) == NULL) + if((intrfc = ipxitf_find_using_phys(dev, dlink_type)) == NULL) { /* Ok now create */ - intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); - if (intrfc==NULL) - return -EAGAIN; - intrfc->if_dev=dev; - intrfc->if_netnum=idef->ipx_network; - intrfc->if_dlink_type = dlink_type; - intrfc->if_dlink = datalink; - intrfc->if_sklist = NULL; - intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + intrfc = (ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if(intrfc == NULL) + return (-EAGAIN); + intrfc->if_dev = dev; + intrfc->if_netnum = idef->ipx_network; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; /* Setup primary if necessary */ - if ((idef->ipx_special == IPX_PRIMARY)) + if((idef->ipx_special == IPX_PRIMARY)) ipx_primary_net = intrfc; - intrfc->if_internal = 0; - intrfc->if_ipx_offset = dev->hard_header_len + datalink->header_length; - if(memcmp(idef->ipx_node, "\000\000\000\000\000\000", IPX_NODE_LEN)==0) + intrfc->if_internal = 0; + intrfc->if_ipx_offset = dev->hard_header_len + datalink->header_length; + if(memcmp(idef->ipx_node, "\000\000\000\000\000\000", IPX_NODE_LEN) == 0) { memset(intrfc->if_node, 0, IPX_NODE_LEN); memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), @@ -983,234 +960,246 @@ static int ipxitf_create(ipx_interface_definition *idef) } /* If the network number is known, add a route */ - if (!intrfc->if_netnum) - return 0; + if(!intrfc->if_netnum) + return (0); - return ipxitf_add_local_route(intrfc); + return (ipxitf_add_local_route(intrfc)); } static int ipxitf_delete(ipx_interface_definition *idef) { - struct device *dev = NULL; - unsigned short dlink_type = 0; - ipx_interface *intrfc; + struct device *dev = NULL; + unsigned short dlink_type = 0; + ipx_interface *intrfc; - if (idef->ipx_special == IPX_INTERNAL) + if(idef->ipx_special == IPX_INTERNAL) { - if (ipx_internal_net != NULL) + if(ipx_internal_net != NULL) { ipxitf_down(ipx_internal_net); - return 0; + return (0); } - return -ENOENT; + return (-ENOENT); } dlink_type = ipx_map_frame_type(idef->ipx_dlink_type); - if (dlink_type == 0) - return -EPROTONOSUPPORT; + if(dlink_type == 0) + return (-EPROTONOSUPPORT); - dev=dev_get(idef->ipx_device); - if (dev==NULL) - return -ENODEV; + dev = dev_get(idef->ipx_device); + if(dev == NULL) + return (-ENODEV); intrfc = ipxitf_find_using_phys(dev, dlink_type); - if (intrfc != NULL) { + if(intrfc != NULL) + { ipxitf_down(intrfc); - return 0; + return (0); } - return -EINVAL; + + return (-EINVAL); } static ipx_interface *ipxitf_auto_create(struct device *dev, unsigned short dlink_type) { struct datalink_proto *datalink = NULL; - ipx_interface *intrfc; + ipx_interface *intrfc; - switch (htons(dlink_type)) + switch(htons(dlink_type)) { case ETH_P_IPX: datalink = pEII_datalink; break; + case ETH_P_802_2: datalink = p8022_datalink; break; - case ETH_P_TR_802_2: - datalink = p8022tr_datalink; - break; + case ETH_P_SNAP: datalink = pSNAP_datalink; break; + case ETH_P_802_3: datalink = p8023_datalink; break; + default: - return NULL; + return (NULL); } - if (dev == NULL) - return NULL; + if(dev == NULL) + return (NULL); /* Check addresses are suitable */ - if(dev->addr_len>IPX_NODE_LEN) return NULL; - - intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); - if (intrfc!=NULL) - { - intrfc->if_dev=dev; - intrfc->if_netnum=0; - intrfc->if_dlink_type = dlink_type; - intrfc->if_dlink = datalink; - intrfc->if_sklist = NULL; - intrfc->if_internal = 0; - intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; - intrfc->if_ipx_offset = dev->hard_header_len + - datalink->header_length; + if(dev->addr_len>IPX_NODE_LEN) + return (NULL); + + intrfc = (ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if(intrfc != NULL) + { + intrfc->if_dev = dev; + intrfc->if_netnum = 0; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_internal = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + intrfc->if_ipx_offset = dev->hard_header_len + + datalink->header_length; memset(intrfc->if_node, 0, IPX_NODE_LEN); memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), dev->dev_addr, dev->addr_len); ipxitf_insert(intrfc); } - return intrfc; + return (intrfc); } -static int ipxitf_ioctl_real(unsigned int cmd, void *arg) +static int ipxitf_ioctl(unsigned int cmd, void *arg) { + struct ifreq ifr; + int err, val; + switch(cmd) { case SIOCSIFADDR: { - struct ifreq ifr; struct sockaddr_ipx *sipx; ipx_interface_definition f; - if (copy_from_user(&ifr,arg,sizeof(ifr))) - return -EFAULT; - sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; - if(sipx->sipx_family!=AF_IPX) - return -EINVAL; - f.ipx_network=sipx->sipx_network; - memcpy(f.ipx_device, ifr.ifr_name, sizeof(f.ipx_device)); + if(copy_from_user(&ifr, arg, sizeof(ifr))) + return (-EFAULT); + + sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; + if(sipx->sipx_family != AF_IPX) + return (-EINVAL); + + f.ipx_network = sipx->sipx_network; + memcpy(f.ipx_device,ifr.ifr_name,sizeof(f.ipx_device)); memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN); - f.ipx_dlink_type=sipx->sipx_type; - f.ipx_special=sipx->sipx_special; - if(sipx->sipx_action==IPX_DLTITF) - return ipxitf_delete(&f); + f.ipx_dlink_type = sipx->sipx_type; + f.ipx_special = sipx->sipx_special; + + if(sipx->sipx_action == IPX_DLTITF) + return (ipxitf_delete(&f)); else - return ipxitf_create(&f); + return (ipxitf_create(&f)); } + case SIOCGIFADDR: { - struct ifreq ifr; struct sockaddr_ipx *sipx; ipx_interface *ipxif; struct device *dev; - int err; - if (copy_from_user(&ifr,arg,sizeof(ifr))) - return -EFAULT; - sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; - dev=dev_get(ifr.ifr_name); + if(copy_from_user(&ifr, arg, sizeof(ifr))) + return (-EFAULT); + + sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; + dev = dev_get(ifr.ifr_name); if(!dev) - return -ENODEV; - ipxif=ipxitf_find_using_phys(dev, ipx_map_frame_type(sipx->sipx_type)); - if(ipxif==NULL) - return -EADDRNOTAVAIL; - sipx->sipx_family=AF_IPX; - sipx->sipx_network=ipxif->if_netnum; + return (-ENODEV); + + ipxif = ipxitf_find_using_phys(dev, ipx_map_frame_type(sipx->sipx_type)); + if(ipxif == NULL) + return (-EADDRNOTAVAIL); + + sipx->sipx_family = AF_IPX; + sipx->sipx_network = ipxif->if_netnum; memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node)); err = -EFAULT; - if (!copy_to_user(arg, &ifr, sizeof(ifr))) + if(!copy_to_user(arg, &ifr, sizeof(ifr))) err = 0; - return err; + + return (err); } + case SIOCAIPXITFCRT: { - int err, val; err = get_user(val, (unsigned char *) arg); - if (err) - return err; - return ipxcfg_set_auto_create(val); + if(err) + return (err); + + return (ipxcfg_set_auto_create(val)); } + case SIOCAIPXPRISLT: { - int err, val; err = get_user(val, (unsigned char *) arg); - if (err) - return err; - return ipxcfg_set_auto_select(val); + if(err) + return (err); + + return (ipxcfg_set_auto_select(val)); } + default: - return -EINVAL; + return (-EINVAL); } } -static int ipxitf_ioctl(unsigned int cmd, void *arg) -{ - int ret; - MOD_INC_USE_COUNT; - ret = ipxitf_ioctl_real (cmd,arg); - MOD_DEC_USE_COUNT; - return ret; -} - -/*******************************************************************************************************************\ -* * -* Routing tables for the IPX socket layer * -* * -\*******************************************************************************************************************/ +/**************************************************************************\ +* * +* Routing tables for the IPX socket layer. * +* * +\**************************************************************************/ static ipx_route *ipxrtr_lookup(__u32 net) { ipx_route *r; - for (r=ipx_routes; (r!=NULL) && (r->ir_net!=net); r=r->ir_next) + for(r = ipx_routes; (r != NULL) && (r->ir_net != net); r = r->ir_next) ; - return r; + return (r); } static int ipxrtr_add_route(__u32 network, ipx_interface *intrfc, unsigned char *node) { - ipx_route *rt; + ipx_route *rt; /* Get a route structure; either existing or create */ rt = ipxrtr_lookup(network); - if (rt==NULL) + if(rt == NULL) + { + rt = (ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); + if(rt == NULL) + return (-EAGAIN); + rt->ir_next = ipx_routes; + ipx_routes = rt; + } + else if(intrfc == ipx_internal_net) + return (-EEXIST); + + rt->ir_net = network; + rt->ir_intrfc = intrfc; + if(node == NULL) { - rt=(ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); - if(rt==NULL) - return -EAGAIN; - rt->ir_next=ipx_routes; - ipx_routes=rt; - } - else if (intrfc == ipx_internal_net) - return(-EEXIST); - - rt->ir_net = network; - rt->ir_intrfc = intrfc; - if (node == NULL) { memset(rt->ir_router_node, '\0', IPX_NODE_LEN); rt->ir_routed = 0; - } else { + } + else + { memcpy(rt->ir_router_node, node, IPX_NODE_LEN); - rt->ir_routed=1; + rt->ir_routed = 1; } - return 0; + + return (0); } static void ipxrtr_del_routes(ipx_interface *intrfc) { - ipx_route **r, *tmp; + ipx_route **r, *tmp; - for (r = &ipx_routes; (tmp = *r) != NULL; ) { - if (tmp->ir_intrfc == intrfc) { + for(r = &ipx_routes; (tmp = *r) != NULL;) + { + if(tmp->ir_intrfc == intrfc) + { *r = tmp->ir_next; kfree_s(tmp, sizeof(ipx_route)); - } else { - r = &(tmp->ir_next); } + else + r = &(tmp->ir_next); } } @@ -1220,33 +1209,34 @@ static int ipxrtr_create(ipx_route_definition *rd) /* Find the appropriate interface */ intrfc = ipxitf_find_using_net(rd->ipx_router_network); - if (intrfc == NULL) - return -ENETUNREACH; + if(intrfc == NULL) + return (-ENETUNREACH); - return ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node); + return (ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node)); } - static int ipxrtr_delete(long net) { - ipx_route **r; - ipx_route *tmp; + ipx_route **r; + ipx_route *tmp; - for (r = &ipx_routes; (tmp = *r) != NULL; ) + for(r = &ipx_routes; (tmp = *r) != NULL;) { - if (tmp->ir_net == net) + if(tmp->ir_net == net) { - if (!(tmp->ir_routed)) - /* Directly connected; can't lose route */ - return -EPERM; + /* Directly connected; can't lose route */ + if(!(tmp->ir_routed)) + return (-EPERM); + *r = tmp->ir_next; kfree_s(tmp, sizeof(ipx_route)); - return 0; + return (0); } + r = &(tmp->ir_next); } - return -ENOENT; + return (-ENOENT); } /* @@ -1263,59 +1253,41 @@ static __u16 ipx_set_checksum(struct ipxhdr *packet,int length) * don't know of a machine that isn't.) */ - __u32 sum=0; + __u32 sum = 0; - /* - * Pointer to second word - We skip the checksum field - */ + /* Pointer to second word - We skip the checksum field */ + __u16 *p = (__u16 *)&packet->ipx_pktsize; - __u16 *p=(__u16 *)&packet->ipx_pktsize; + /* Number of complete words */ + __u32 i = length >> 1; + char hops = packet->ipx_tctrl; - /* - * Number of complete words - */ - - __u32 i=length>>1; - char hops = packet->ipx_tctrl; - - packet->ipx_tctrl = 0; /* hop count excluded from checksum calc */ - - /* - * Loop through all complete words except the checksum field - */ + /* Hop count excluded from checksum calc */ + packet->ipx_tctrl = 0; + /* Loop through all complete words except the checksum field */ while(--i) - sum+=*p++; + sum += *p++; - /* - * Add on the last part word if it exists - */ - - if(packet->ipx_pktsize&htons(1)) - sum+=ntohs(0xff00)&*p; + /* Add on the last part word if it exists */ + if(packet->ipx_pktsize & htons(1)) + sum += ntohs(0xff00) & *p; packet->ipx_tctrl = hops; - /* - * Do final fixup - */ - sum=(sum&0xffff)+(sum>>16); + /* Do final fixup */ + sum = (sum & 0xffff) + (sum >> 16); - /* - * It's a pity there's no concept of carry in C - */ - - if(sum>=0x10000) + /* It's a pity there's no concept of carry in C */ + if(sum >= 0x10000) sum++; - return ~sum; -}; - + return (~sum); +} /* - * Route an outgoing frame from a socket. + * Route an outgoing frame from a socket. */ - static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, struct iovec *iov, int len, int noblock) { struct sk_buff *skb; @@ -1327,7 +1299,7 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru int err; /* Find the appropriate interface on which to send packet */ - if (!usipx->sipx_network && (ipx_primary_net != NULL)) + if(!usipx->sipx_network && (ipx_primary_net != NULL)) { usipx->sipx_network = ipx_primary_net->if_netnum; intrfc = ipx_primary_net; @@ -1335,35 +1307,36 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru else { rt = ipxrtr_lookup(usipx->sipx_network); - if (rt==NULL) - return -ENETUNREACH; + if(rt == NULL) + return (-ENETUNREACH); intrfc = rt->ir_intrfc; } ipx_offset = intrfc->if_ipx_offset; - size=sizeof(struct ipxhdr)+len; - size += ipx_offset; + size = sizeof(struct ipxhdr) + len; + size += ipx_offset; - skb=sock_alloc_send_skb(sk, size, 0, noblock, &err); - if(skb==NULL) - return err; + skb = sock_alloc_send_skb(sk, size, 0, noblock, &err); + if(skb == NULL) + return (err); skb_reserve(skb,ipx_offset); - skb->sk=sk; + skb->sk = sk; /* Fill in IPX header */ - ipx=(struct ipxhdr *)skb_put(skb,sizeof(struct ipxhdr)); - ipx->ipx_pktsize=htons(len+sizeof(struct ipxhdr)); - ipx->ipx_tctrl=0; - ipx->ipx_type=usipx->sipx_type; - skb->h.raw = (void *)skb->nh.ipxh = ipx; + ipx = (struct ipxhdr *)skb_put(skb, sizeof(struct ipxhdr)); + ipx->ipx_pktsize= htons(len + sizeof(struct ipxhdr)); + ipx->ipx_tctrl = 0; + ipx->ipx_type = usipx->sipx_type; + skb->h.raw = (void *)skb->nh.ipxh = ipx; ipx->ipx_source.net = sk->protinfo.af_ipx.intrfc->if_netnum; + #ifdef CONFIG_IPX_INTERN memcpy(ipx->ipx_source.node, sk->protinfo.af_ipx.node, IPX_NODE_LEN); #else - if ((err = ntohs(sk->protinfo.af_ipx.port)) == 0x453 || err == 0x452) + if((err = ntohs(sk->protinfo.af_ipx.port)) == 0x453 || err == 0x452) { /* RIP/SAP special handling for mars_nwe */ ipx->ipx_source.net = intrfc->if_netnum; @@ -1374,114 +1347,119 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru ipx->ipx_source.net = sk->protinfo.af_ipx.intrfc->if_netnum; memcpy(ipx->ipx_source.node, sk->protinfo.af_ipx.intrfc->if_node, IPX_NODE_LEN); } -#endif - ipx->ipx_source.sock = sk->protinfo.af_ipx.port; - ipx->ipx_dest.net=usipx->sipx_network; +#endif /* CONFIG_IPX_INTERN */ + + ipx->ipx_source.sock = sk->protinfo.af_ipx.port; + ipx->ipx_dest.net = usipx->sipx_network; memcpy(ipx->ipx_dest.node,usipx->sipx_node,IPX_NODE_LEN); - ipx->ipx_dest.sock=usipx->sipx_port; + ipx->ipx_dest.sock = usipx->sipx_port; err = memcpy_fromiovec(skb_put(skb,len),iov,len); - if (err) + if(err) { kfree_skb(skb); - return -EFAULT; + return (-EFAULT); } - /* - * Apply checksum. Not allowed on 802.3 links. - */ - - if(sk->no_check || intrfc->if_dlink_type==IPX_FRAME_8023) + /* Apply checksum. Not allowed on 802.3 links. */ + if(sk->no_check || intrfc->if_dlink_type == IPX_FRAME_8023) ipx->ipx_checksum=0xFFFF; else - ipx->ipx_checksum=ipx_set_checksum(ipx, len+sizeof(struct ipxhdr)); + ipx->ipx_checksum = ipx_set_checksum(ipx, len + sizeof(struct ipxhdr)); - if(call_out_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT) + if(call_out_firewall(PF_IPX, skb->dev, ipx, NULL, &skb) != FW_ACCEPT) { kfree_skb(skb); - return -EPERM; + return (-EPERM); } - return ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? - rt->ir_router_node : ipx->ipx_dest.node); + return (ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? + rt->ir_router_node : ipx->ipx_dest.node)); } int ipxrtr_route_skb(struct sk_buff *skb) { - struct ipxhdr *ipx = skb->nh.ipxh; - ipx_route *r; - ipx_interface *i; + struct ipxhdr *ipx = skb->nh.ipxh; + ipx_interface *i; + ipx_route *r; r = ipxrtr_lookup(ipx->ipx_dest.net); - if (r == NULL) + if(r == NULL) /* no known route */ { - /* no known route */ kfree_skb(skb); - return 0; + return (0); } + i = r->ir_intrfc; (void)ipxitf_send(i, skb, (r->ir_routed) ? r->ir_router_node : ipx->ipx_dest.node); - return 0; + + return (0); } /* - * We use a normal struct rtentry for route handling + * We use a normal struct rtentry for route handling */ - static int ipxrtr_ioctl(unsigned int cmd, void *arg) { - int err; struct rtentry rt; /* Use these to behave like 'other' stacks */ struct sockaddr_ipx *sg,*st; + int err; err = copy_from_user(&rt,arg,sizeof(rt)); - if (err) - return -EFAULT; + if(err) + return (-EFAULT); - sg=(struct sockaddr_ipx *)&rt.rt_gateway; - st=(struct sockaddr_ipx *)&rt.rt_dst; + sg = (struct sockaddr_ipx *)&rt.rt_gateway; + st = (struct sockaddr_ipx *)&rt.rt_dst; - if(!(rt.rt_flags&RTF_GATEWAY)) - return -EINVAL; /* Direct routes are fixed */ - if(sg->sipx_family!=AF_IPX) - return -EINVAL; - if(st->sipx_family!=AF_IPX) - return -EINVAL; + if(!(rt.rt_flags & RTF_GATEWAY)) + return (-EINVAL); /* Direct routes are fixed */ + if(sg->sipx_family != AF_IPX) + return (-EINVAL); + if(st->sipx_family != AF_IPX) + return (-EINVAL); switch(cmd) { case SIOCDELRT: - return ipxrtr_delete(st->sipx_network); + return (ipxrtr_delete(st->sipx_network)); + case SIOCADDRT: { struct ipx_route_definition f; f.ipx_network=st->sipx_network; f.ipx_router_network=sg->sipx_network; memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN); - return ipxrtr_create(&f); + return (ipxrtr_create(&f)); } + default: - return -EINVAL; + return (-EINVAL); } } static const char *ipx_frame_name(unsigned short frame) { - switch (ntohs(frame)) + switch(ntohs(frame)) { case ETH_P_IPX: - return "EtherII"; + return ("EtherII"); + case ETH_P_802_2: - return "802.2"; + return ("802.2"); + case ETH_P_SNAP: - return "SNAP"; + return ("SNAP"); + case ETH_P_802_3: - return "802.3"; + return ("802.3"); + case ETH_P_TR_802_2: - return "802.2TR"; + return ("802.2TR"); + default: - return "None"; + return ("None"); } } @@ -1496,43 +1474,44 @@ static int ipx_interface_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { ipx_interface *i; - int len=0; - off_t pos=0; - off_t begin=0; + off_t begin = 0, pos = 0; + int len = 0; /* Theory.. Keep printing in the same place until we pass offset */ - len += sprintf (buffer,"%-11s%-15s%-9s%-11s%s\n", "Network", + len += sprintf(buffer,"%-11s%-15s%-9s%-11s%s\n", "Network", "Node_Address", "Primary", "Device", "Frame_Type"); - for (i = ipx_interfaces; i != NULL; i = i->if_next) { + for(i = ipx_interfaces; i != NULL; i = i->if_next) + { len += sprintf(buffer+len, "%08lX ", (long unsigned int)ntohl(i->if_netnum)); - len += sprintf (buffer+len,"%02X%02X%02X%02X%02X%02X ", + len += sprintf(buffer+len,"%02X%02X%02X%02X%02X%02X ", i->if_node[0], i->if_node[1], i->if_node[2], i->if_node[3], i->if_node[4], i->if_node[5]); len += sprintf(buffer+len, "%-9s", (i == ipx_primary_net) ? "Yes" : "No"); - len += sprintf (buffer+len, "%-11s", ipx_device_name(i)); - len += sprintf (buffer+len, "%s\n", + len += sprintf(buffer+len, "%-11s", ipx_device_name(i)); + len += sprintf(buffer+len, "%s\n", ipx_frame_name(i->if_dlink_type)); /* Are we still dumping unwanted data then discard the record */ - pos=begin+len; + pos = begin + len; - if(pos<offset) { - len=0; /* Keep dumping into the buffer start */ - begin=pos; + if(pos < offset) + { + len = 0; /* Keep dumping into the buffer start */ + begin = pos; } - if(pos>offset+length) /* We have dumped enough */ + if(pos > offset + length) /* We have dumped enough */ break; } /* The data in question runs from begin to begin+len */ - *start=buffer+(offset-begin); /* Start of wanted data */ - len-=(offset-begin); /* Remove unwanted header data from length */ - if(len>length) - len=length; /* Remove unwanted tail data from length */ + *start = buffer + (offset - begin); /* Start of wanted data */ + len -= (offset - begin); /* Remove unwanted header data from length */ + if(len > length) + len = length; /* Remove unwanted tail data from length */ - return len; + return (len); } static int ipx_get_info(char *buffer, char **start, off_t offset, @@ -1540,21 +1519,23 @@ static int ipx_get_info(char *buffer, char **start, off_t offset, { struct sock *s; ipx_interface *i; - int len=0; - off_t pos=0; - off_t begin=0; + off_t begin = 0, pos = 0; + int len = 0; /* Theory.. Keep printing in the same place until we pass offset */ #ifdef CONFIG_IPX_INTERN - len += sprintf (buffer,"%-28s%-28s%-10s%-10s%-7s%s\n", "Local_Address", + len += sprintf(buffer,"%-28s%-28s%-10s%-10s%-7s%s\n", "Local_Address", #else - len += sprintf (buffer,"%-15s%-28s%-10s%-10s%-7s%s\n", "Local_Address", -#endif + len += sprintf(buffer,"%-15s%-28s%-10s%-10s%-7s%s\n", "Local_Address", +#endif /* CONFIG_IPX_INTERN */ "Remote_Address", "Tx_Queue", "Rx_Queue", "State", "Uid"); - for (i = ipx_interfaces; i != NULL; i = i->if_next) { - for (s = i->if_sklist; s != NULL; s = s->next) { + + for(i = ipx_interfaces; i != NULL; i = i->if_next) + { + for(s = i->if_sklist; s != NULL; s = s->next) + { #ifdef CONFIG_IPX_INTERN len += sprintf(buffer+len, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", @@ -1570,11 +1551,13 @@ static int ipx_get_info(char *buffer, char **start, off_t offset, len += sprintf(buffer+len,"%08lX:%04X ", htonl(i->if_netnum), htons(s->protinfo.af_ipx.port)); -#endif - if (s->state!=TCP_ESTABLISHED) { +#endif /* CONFIG_IPX_INTERN */ + + if(s->state != TCP_ESTABLISHED) len += sprintf(buffer+len, "%-28s", "Not_Connected"); - } else { - len += sprintf (buffer+len, + else + { + len += sprintf(buffer+len, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", (long unsigned int) htonl(s->protinfo.af_ipx.dest_addr.net), s->protinfo.af_ipx.dest_addr.node[0], @@ -1585,92 +1568,99 @@ static int ipx_get_info(char *buffer, char **start, off_t offset, s->protinfo.af_ipx.dest_addr.node[5], htons(s->protinfo.af_ipx.dest_addr.sock)); } - len += sprintf (buffer+len,"%08X %08X ", + + len += sprintf(buffer+len,"%08X %08X ", atomic_read(&s->wmem_alloc), - atomic_read(&s->rmem_alloc)); - len += sprintf (buffer+len,"%02X %03d\n", + atomic_read(&s->rmem_alloc)); + len += sprintf(buffer+len,"%02X %03d\n", s->state, SOCK_INODE(s->socket)->i_uid); - /* Are we still dumping unwanted data then discard the record */ - pos=begin+len; - - if(pos<offset) + pos = begin + len; + if(pos < offset) { - len=0; /* Keep dumping into the buffer start */ - begin=pos; + len = 0; + begin = pos; } - if(pos>offset+length) /* We have dumped enough */ + + if(pos > offset + length) /* We have dumped enough */ break; } } /* The data in question runs from begin to begin+len */ - *start=buffer+(offset-begin); /* Start of wanted data */ - len-=(offset-begin); /* Remove unwanted header data from length */ - if(len>length) - len=length; /* Remove unwanted tail data from length */ + *start = buffer + (offset-begin); + len -= (offset - begin); + if(len > length) + len = length; - return len; + return (len); } static int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { ipx_route *rt; - int len=0; - off_t pos=0; - off_t begin=0; + off_t begin = 0, pos = 0; + int len = 0; - len += sprintf (buffer,"%-11s%-13s%s\n", + len += sprintf(buffer,"%-11s%-13s%s\n", "Network", "Router_Net", "Router_Node"); - for (rt = ipx_routes; rt != NULL; rt = rt->ir_next) + for(rt = ipx_routes; rt != NULL; rt = rt->ir_next) { - len += sprintf (buffer+len,"%08lX ", (long unsigned int) ntohl(rt->ir_net)); - if (rt->ir_routed) { - len += sprintf (buffer+len,"%08lX %02X%02X%02X%02X%02X%02X\n", + len += sprintf(buffer+len,"%08lX ", (long unsigned int) ntohl(rt->ir_net)); + if(rt->ir_routed) + { + len += sprintf(buffer+len,"%08lX %02X%02X%02X%02X%02X%02X\n", (long unsigned int) ntohl(rt->ir_intrfc->if_netnum), rt->ir_router_node[0], rt->ir_router_node[1], rt->ir_router_node[2], rt->ir_router_node[3], rt->ir_router_node[4], rt->ir_router_node[5]); - } else { - len += sprintf (buffer+len, "%-13s%s\n", + } + else + { + len += sprintf(buffer+len, "%-13s%s\n", "Directly", "Connected"); } - pos=begin+len; - if(pos<offset) + + pos = begin + len; + if(pos < offset) { - len=0; - begin=pos; + len = 0; + begin = pos; } - if(pos>offset+length) + + if(pos > offset + length) break; } - *start=buffer+(offset-begin); - len-=(offset-begin); - if(len>length) - len=length; - return len; + + *start = buffer + (offset - begin); + len -= (offset - begin); + if(len > length) + len = length; + + return (len); } -/*******************************************************************************************************************\ -* * -* Handling for system calls applied via the various interfaces to an IPX socket object * -* * -\*******************************************************************************************************************/ +/**************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to an * +* IPX socket object. * +* * +\**************************************************************************/ static int ipx_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { struct sock *sk; - int err,opt; + int err, opt; - sk=sock->sk; + sk = sock->sk; - if (optlen!=sizeof(int)) - return(-EINVAL); + if(optlen != sizeof(int)) + return (-EINVAL); err = get_user(opt, (unsigned int *)optval); - if (err) - return err; + if(err) + return (err); switch(level) { @@ -1678,15 +1668,16 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, char *opt switch(optname) { case IPX_TYPE: - sk->protinfo.af_ipx.type=opt; - return 0; + sk->protinfo.af_ipx.type = opt; + return (0); + default: - return -ENOPROTOOPT; + return (-ENOPROTOOPT); } break; default: - return -ENOPROTOOPT; + return (-ENOPROTOOPT); } } @@ -1697,142 +1688,157 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, int val=0; int len; - sk=sock->sk; + sk = sock->sk; switch(level) { - case SOL_IPX: switch(optname) { case IPX_TYPE: - val=sk->protinfo.af_ipx.type; + val = sk->protinfo.af_ipx.type; break; + default: - return -ENOPROTOOPT; + return (-ENOPROTOOPT); } break; default: - return -ENOPROTOOPT; + return (-ENOPROTOOPT); } - if(get_user(len,optlen)) - return -EFAULT; - len=min(len,sizeof(int)); + + if(get_user(len, optlen)) + return (-EFAULT); + + len = min(len, sizeof(int)); if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval,&val,len)) - return -EFAULT; - return 0; + return (-EFAULT); + + if(copy_to_user(optval, &val, len)) + return (-EFAULT); + + return (0); } static int ipx_create(struct socket *sock, int protocol) { struct sock *sk; + switch(sock->type) { case SOCK_DGRAM: - sk=sk_alloc(AF_IPX, GFP_KERNEL, 1); - if(sk==NULL) - return(-ENOMEM); + sk = sk_alloc(PF_IPX, GFP_KERNEL, 1); + if(sk == NULL) + return (-ENOMEM); sock->ops = &ipx_dgram_ops; break; + case SOCK_SEQPACKET: /* - * From this point on SPX sockets are handled - * by af_spx.c and the methods replaced. + * From this point on SPX sockets are handled + * by af_spx.c and the methods replaced. */ if(spx_family_ops) - return spx_family_ops->create(sock,protocol); - /* Fall through if SPX is not loaded */ - case SOCK_STREAM: /* Allow higher levels to piggyback */ + return (spx_family_ops->create(sock,protocol)); + /* Fall through if SPX is not loaded */ + case SOCK_STREAM: /* Allow higher levels to piggyback */ default: - return(-ESOCKTNOSUPPORT); + return (-ESOCKTNOSUPPORT); } - sock_init_data(sock,sk); - sk->destruct=NULL; - sk->mtu=IPX_MTU; - sk->no_check = 1; /* Checksum off by default */ + + sock_init_data(sock, sk); + sk->destruct = NULL; + sk->mtu = IPX_MTU; + sk->no_check = 1; /* Checksum off by default */ + MOD_INC_USE_COUNT; - return 0; + + return (0); } static int ipx_release(struct socket *sock, struct socket *peer) { - struct sock *sk=sock->sk; - if(sk==NULL) - return(0); + struct sock *sk = sock->sk; + + if(sk == NULL) + return (0); + if(!sk->dead) sk->state_change(sk); - sk->dead=1; - sock->sk=NULL; + + sk->dead = 1; + sock->sk = NULL; ipx_destroy_socket(sk); - if ( sock->type == SOCK_DGRAM ) { + + if(sock->type == SOCK_DGRAM) MOD_DEC_USE_COUNT; - } - return(0); + + return (0); } static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc) { - unsigned short socketNum = intrfc->if_sknum; + unsigned short socketNum = intrfc->if_sknum; - if (socketNum < IPX_MIN_EPHEMERAL_SOCKET) + if(socketNum < IPX_MIN_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; - while (ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) - if (socketNum > IPX_MAX_EPHEMERAL_SOCKET) + while(ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) + { + if(socketNum > IPX_MAX_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; else socketNum++; + } intrfc->if_sknum = socketNum; - return ntohs(socketNum); + + return (ntohs(socketNum)); } static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk; ipx_interface *intrfc; - struct sockaddr_ipx *addr=(struct sockaddr_ipx *)uaddr; + struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr; - sk=sock->sk; + sk = sock->sk; - if(sk->zapped==0) - return -EINVAL; + if(sk->zapped == 0) + return (-EINVAL); - if(addr_len!=sizeof(struct sockaddr_ipx)) - return -EINVAL; + if(addr_len != sizeof(struct sockaddr_ipx)) + return (-EINVAL); intrfc = ipxitf_find_using_net(addr->sipx_network); - if (intrfc == NULL) - return -EADDRNOTAVAIL; + if(intrfc == NULL) + return (-EADDRNOTAVAIL); - if (addr->sipx_port == 0) { + if(addr->sipx_port == 0) + { addr->sipx_port = ipx_first_free_socketnum(intrfc); - if (addr->sipx_port == 0) - return -EINVAL; + if(addr->sipx_port == 0) + return (-EINVAL); } - if(ntohs(addr->sipx_port)<IPX_MIN_EPHEMERAL_SOCKET && !suser()) - return -EACCES; /* protect IPX system stuff like routing/sap */ + /* protect IPX system stuff like routing/sap */ + if(ntohs(addr->sipx_port) < IPX_MIN_EPHEMERAL_SOCKET && !suser()) + return (-EACCES); - sk->protinfo.af_ipx.port=addr->sipx_port; + sk->protinfo.af_ipx.port = addr->sipx_port; #ifdef CONFIG_IPX_INTERN - if (intrfc == ipx_internal_net) + if(intrfc == ipx_internal_net) { /* The source address is to be set explicitly if the * socket is to be bound on the internal network. If a * node number 0 was specified, the default is used. */ - if (memcmp(addr->sipx_node, ipx_broadcast_node, - IPX_NODE_LEN) == 0) - { - return -EINVAL; - } - if (memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN) == 0) + if(memcmp(addr->sipx_node,ipx_broadcast_node,IPX_NODE_LEN) == 0) + return (-EINVAL); + if(memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN) == 0) { memcpy(sk->protinfo.af_ipx.node, intrfc->if_node, IPX_NODE_LEN); @@ -1841,14 +1847,15 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { memcpy(sk->protinfo.af_ipx.node, addr->sipx_node, IPX_NODE_LEN); } - if (ipxitf_find_internal_socket(intrfc, + + if(ipxitf_find_internal_socket(intrfc, sk->protinfo.af_ipx.node, sk->protinfo.af_ipx.port) != NULL) { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return -EADDRINUSE; + return (-EADDRINUSE); } } else @@ -1861,85 +1868,96 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) memcpy(sk->protinfo.af_ipx.node, intrfc->if_node, IPX_NODE_LEN); - if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) { + if(ipxitf_find_socket(intrfc, addr->sipx_port) != NULL) + { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return -EADDRINUSE; + return (-EADDRINUSE); } } -#else +#else /* !def CONFIG_IPX_INTERN */ /* Source addresses are easy. It must be our network:node pair for an interface routed to IPX with the ipx routing ioctl() */ - if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) { + if(ipxitf_find_socket(intrfc, addr->sipx_port) != NULL) + { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return -EADDRINUSE; + return (-EADDRINUSE); } -#endif +#endif /* CONFIG_IPX_INTERN */ ipxitf_insert_socket(intrfc, sk); - sk->zapped=0; + sk->zapped = 0; SOCK_DEBUG(sk, "IPX: bound socket 0x%04X.\n", ntohs(addr->sipx_port) ); - return 0; + return (0); } static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; struct sockaddr_ipx *addr; - sk->state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; - if(addr_len!=sizeof(*addr)) - return(-EINVAL); - addr=(struct sockaddr_ipx *)uaddr; + if(addr_len != sizeof(*addr)) + return (-EINVAL); + addr = (struct sockaddr_ipx *)uaddr; - if(sk->protinfo.af_ipx.port==0) /* put the autobinding in */ + if(sk->protinfo.af_ipx.port == 0) { struct sockaddr_ipx uaddr; int ret; - uaddr.sipx_port = 0; - uaddr.sipx_network = 0; + uaddr.sipx_port = 0; + uaddr.sipx_network = 0; + #ifdef CONFIG_IPX_INTERN memcpy(uaddr.sipx_node, sk->protinfo.af_ipx.intrfc->if_node, IPX_NODE_LEN); -#endif - ret = ipx_bind (sock, (struct sockaddr *)&uaddr, +#endif /* CONFIG_IPX_INTERN */ + + ret = ipx_bind(sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); - if (ret != 0) return (ret); + if(ret != 0) + return (ret); } - if(ipxrtr_lookup(addr->sipx_network)==NULL) - return -ENETUNREACH; - sk->protinfo.af_ipx.dest_addr.net=addr->sipx_network; - sk->protinfo.af_ipx.dest_addr.sock=addr->sipx_port; + if(ipxrtr_lookup(addr->sipx_network) == NULL) + return (-ENETUNREACH); + + sk->protinfo.af_ipx.dest_addr.net = addr->sipx_network; + sk->protinfo.af_ipx.dest_addr.sock = addr->sipx_port; memcpy(sk->protinfo.af_ipx.dest_addr.node, addr->sipx_node,IPX_NODE_LEN); - sk->protinfo.af_ipx.type=addr->sipx_type; - if(sock->type == SOCK_DGRAM ) { - sock->state = SS_CONNECTED; - sk->state=TCP_ESTABLISHED; + sk->protinfo.af_ipx.type = addr->sipx_type; + + if(sock->type == SOCK_DGRAM ) + { + sock->state = SS_CONNECTED; + sk->state = TCP_ESTABLISHED; } - return 0; + + return (0); } static int ipx_accept(struct socket *sock, struct socket *newsock, int flags) { - if(newsock->sk) { + if(newsock->sk) + { sk_free(newsock->sk); MOD_DEC_USE_COUNT; } - return -EOPNOTSUPP; + + return (-EOPNOTSUPP); } static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, @@ -1949,163 +1967,105 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_ipx sipx; struct sock *sk; - sk=sock->sk; + sk = sock->sk; *uaddr_len = sizeof(struct sockaddr_ipx); - if(peer) { - if(sk->state!=TCP_ESTABLISHED) - return -ENOTCONN; - addr=&sk->protinfo.af_ipx.dest_addr; + if(peer) + { + if(sk->state != TCP_ESTABLISHED) + return (-ENOTCONN); + + addr = &sk->protinfo.af_ipx.dest_addr; sipx.sipx_network = addr->net; memcpy(sipx.sipx_node,addr->node,IPX_NODE_LEN); sipx.sipx_port = addr->sock; - } else { - if (sk->protinfo.af_ipx.intrfc != NULL) { - sipx.sipx_network = sk->protinfo.af_ipx.intrfc->if_netnum; + } + else + { + if(sk->protinfo.af_ipx.intrfc != NULL) + { + sipx.sipx_network=sk->protinfo.af_ipx.intrfc->if_netnum; #ifdef CONFIG_IPX_INTERN memcpy(sipx.sipx_node, sk->protinfo.af_ipx.node, IPX_NODE_LEN); #else - memcpy(sipx.sipx_node, - sk->protinfo.af_ipx.intrfc->if_node, IPX_NODE_LEN); -#endif + memcpy(sipx.sipx_node, sk->protinfo.af_ipx.intrfc->if_node, IPX_NODE_LEN); +#endif /* CONFIG_IPX_INTERN */ - } else { + } + else + { sipx.sipx_network = 0; memset(sipx.sipx_node, '\0', IPX_NODE_LEN); } + sipx.sipx_port = sk->protinfo.af_ipx.port; } sipx.sipx_family = AF_IPX; - sipx.sipx_type = sk->protinfo.af_ipx.type; + sipx.sipx_type = sk->protinfo.af_ipx.type; memcpy(uaddr,&sipx,sizeof(sipx)); - return 0; -} -#if 0 -/* - * User to dump IPX packets (debugging) - */ -void dump_data(char *str,unsigned char *d, int len) -{ - static char h2c[] = "0123456789ABCDEF"; - int l,i; - char *p, b[64]; - for (l=0;len > 0 && l<16;l++) - { - p = b; - for (i=0; i < 8 ; i++, --len) - { - if (len > 0) - { - *(p++) = h2c[(d[i] >> 4) & 0x0f]; - *(p++) = h2c[d[i] & 0x0f]; - } - else - { - *(p++) = ' '; - *(p++) = ' '; - } - *(p++) = ' '; - } - *(p++) = '-'; - *(p++) = ' '; - len += 8; - for (i=0; i < 8 ; i++, --len) - { - if (len > 0) - *(p++) = ' '<= d[i] && d[i]<'\177' ? d[i] : '.'; - else - *(p++) = ' '; - } - *p = '\000'; - d += i; - printk(KERN_DEBUG"%s-%04X: %s\n",str,l*8,b); - } -} - -void dump_addr(char *str,ipx_address *p) -{ - printk(KERN_DEBUG"%s: %08lX:%02X%02X%02X%02X%02X%02X:%04X\n", - str,(long unsigned int)ntohl(p->net),p->node[0],p->node[1],p->node[2], - p->node[3],p->node[4],p->node[5],ntohs(p->sock)); -} - -void dump_hdr(char *str,struct ipxhdr *p) -{ - printk(KERN_DEBUG"%s: CHKSUM=%04X SIZE=%d (%04X) HOPS=%d (%02X) TYPE=%02X\n", - str,p->ipx_checksum,ntohs(p->ipx_pktsize),ntohs(p->ipx_pktsize), - p->ipx_tctrl,p->ipx_tctrl,p->ipx_type); - dump_addr(" IPX-DST",&p->ipx_dest); - dump_addr(" IPX-SRC",&p->ipx_source); -} - -void dump_pkt(char *str,struct ipxhdr *p) -{ - int len = ntohs(p->ipx_pktsize); - dump_hdr(str,p); - if (len > 30) - dump_data(str,(unsigned char *)p + 30, len - 30); + return (0); } -#endif int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { /* NULL here for pt means the packet was looped back */ - ipx_interface *intrfc; + ipx_interface *intrfc; struct ipxhdr *ipx; ipx = skb->nh.ipxh; /* Too small? */ - if(ntohs(ipx->ipx_pktsize)<sizeof(struct ipxhdr)) { + if(ntohs(ipx->ipx_pktsize) < sizeof(struct ipxhdr)) + { kfree_skb(skb); - return 0; + return (0); } - if(ipx->ipx_checksum!=IPX_NO_CHECKSUM) + if(ipx->ipx_checksum != IPX_NO_CHECKSUM) { - if(ipx_set_checksum(ipx, ntohs(ipx->ipx_pktsize))!=ipx->ipx_checksum) + if(ipx_set_checksum(ipx, ntohs(ipx->ipx_pktsize)) != ipx->ipx_checksum) { kfree_skb(skb); - return 0; + return (0); } } /* Determine what local ipx endpoint this is */ intrfc = ipxitf_find_using_phys(dev, pt->type); - if (intrfc == NULL) + if(intrfc == NULL) { - if (ipxcfg_auto_create_interfaces && - ntohl(ipx->ipx_dest.net)!=0L) + if(ipxcfg_auto_create_interfaces + && ntohl(ipx->ipx_dest.net) != 0L) { intrfc = ipxitf_auto_create(dev, pt->type); } - if (intrfc == NULL) { - /* Not one of ours */ + if(intrfc == NULL) /* Not one of ours */ + { kfree_skb(skb); - return 0; + return (0); } } - return ipxitf_rcv(intrfc, skb); + return (ipxitf_rcv(intrfc, skb)); } static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; struct sockaddr_ipx *usipx=(struct sockaddr_ipx *)msg->msg_name; struct sockaddr_ipx local_sipx; int retval; int flags = msg->msg_flags; - if (sk->zapped) - return -EIO; /* Socket not bound */ - if (flags&~MSG_DONTWAIT) - return -EINVAL; + if(sk->zapped) + return (-EIO); /* Socket not bound */ + if(flags & ~MSG_DONTWAIT) + return (-EINVAL); if(usipx) { @@ -2120,192 +2080,200 @@ static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, int len, memcpy(uaddr.sipx_node, sk->protinfo.af_ipx.intrfc ->if_node, IPX_NODE_LEN); #endif - ret = ipx_bind (sock, (struct sockaddr *)&uaddr, + ret = ipx_bind(sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); - if (ret != 0) return ret; + if(ret != 0) + return (ret); } - if(msg->msg_namelen <sizeof(*usipx)) - return -EINVAL; + if(msg->msg_namelen < sizeof(*usipx)) + return (-EINVAL); if(usipx->sipx_family != AF_IPX) - return -EINVAL; + return (-EINVAL); } else { - if(sk->state!=TCP_ESTABLISHED) - return -ENOTCONN; + if(sk->state != TCP_ESTABLISHED) + return (-ENOTCONN); + usipx=&local_sipx; - usipx->sipx_family=AF_IPX; - usipx->sipx_type=sk->protinfo.af_ipx.type; - usipx->sipx_port=sk->protinfo.af_ipx.dest_addr.sock; - usipx->sipx_network=sk->protinfo.af_ipx.dest_addr.net; + usipx->sipx_family = AF_IPX; + usipx->sipx_type = sk->protinfo.af_ipx.type; + usipx->sipx_port = sk->protinfo.af_ipx.dest_addr.sock; + usipx->sipx_network = sk->protinfo.af_ipx.dest_addr.net; memcpy(usipx->sipx_node,sk->protinfo.af_ipx.dest_addr.node,IPX_NODE_LEN); } retval = ipxrtr_route_packet(sk, usipx, msg->msg_iov, len, flags&MSG_DONTWAIT); - if (retval < 0) - return retval; + if(retval < 0) + return (retval); - return len; + return (len); } static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm) { - struct sock *sk=sock->sk; - struct sockaddr_ipx *sipx=(struct sockaddr_ipx *)msg->msg_name; + struct sock *sk = sock->sk; + struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)msg->msg_name; struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, err; - if (sk->zapped) - return -ENOTCONN; + if(sk->zapped) + return (-ENOTCONN); - skb=skb_recv_datagram(sk,flags&~MSG_DONTWAIT,flags&MSG_DONTWAIT,&err); - if (!skb) + skb = skb_recv_datagram(sk,flags&~MSG_DONTWAIT,flags&MSG_DONTWAIT,&err); + if(!skb) goto out; - ipx = skb->nh.ipxh; - copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr); + ipx = skb->nh.ipxh; + copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr); if(copied > size) { copied=size; - msg->msg_flags|=MSG_TRUNC; + msg->msg_flags |= MSG_TRUNC; } err = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov, copied); - if (err) + if(err) goto out_free; - sk->stamp=skb->stamp; + sk->stamp = skb->stamp; msg->msg_namelen = sizeof(*sipx); if(sipx) { - sipx->sipx_family=AF_IPX; - sipx->sipx_port=ipx->ipx_source.sock; + sipx->sipx_family = AF_IPX; + sipx->sipx_port = ipx->ipx_source.sock; memcpy(sipx->sipx_node,ipx->ipx_source.node,IPX_NODE_LEN); - sipx->sipx_network=ipx->ipx_source.net; - sipx->sipx_type = ipx->ipx_type; + sipx->sipx_network = ipx->ipx_source.net; + sipx->sipx_type = ipx->ipx_type; } err = copied; out_free: skb_free_datagram(sk, skb); out: - return err; + return (err); } /* - * FIXME: We have to support shutdown really. + * FIXME: We have to really support shutdown. */ - static int ipx_shutdown(struct socket *sk,int how) { - return -EOPNOTSUPP; + return (-EOPNOTSUPP); } static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) { - long amount=0; - struct sock *sk=sock->sk; + long amount = 0; + struct sock *sk = sock->sk; switch(cmd) { case TIOCOUTQ: amount = sk->sndbuf - atomic_read(&sk->wmem_alloc); - if(amount<0) - amount=0; - return put_user(amount, (int *)arg); + if(amount < 0) + amount = 0; + return (put_user(amount, (int *)arg)); + case TIOCINQ: { struct sk_buff *skb; /* These two are safe on a single CPU system as only user tasks fiddle here */ - if((skb=skb_peek(&sk->receive_queue))!=NULL) - amount=skb->len-sizeof(struct ipxhdr); - return put_user(amount, (int *)arg); + if((skb = skb_peek(&sk->receive_queue)) != NULL) + amount = skb->len - sizeof(struct ipxhdr); + return (put_user(amount, (int *)arg)); } + case SIOCADDRT: case SIOCDELRT: if(!suser()) - return -EPERM; - return(ipxrtr_ioctl(cmd,(void *)arg)); + return (-EPERM); + return (ipxrtr_ioctl(cmd,(void *)arg)); + case SIOCSIFADDR: case SIOCAIPXITFCRT: case SIOCAIPXPRISLT: if(!suser()) - return -EPERM; + return (-EPERM); + case SIOCGIFADDR: - return(ipxitf_ioctl(cmd,(void *)arg)); + return (ipxitf_ioctl(cmd,(void *)arg)); + case SIOCIPXCFGDATA: - { - return(ipxcfg_get_config_data((void *)arg)); - } + return (ipxcfg_get_config_data((void *)arg)); + case SIOCGSTAMP: { int ret = -EINVAL; - if (sk) + if(sk) { - if(sk->stamp.tv_sec==0) - return -ENOENT; + if(sk->stamp.tv_sec == 0) + return (-ENOENT); ret = -EFAULT; - if (!copy_to_user((void *)arg, &sk->stamp, + if(!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval))) ret = 0; } - return ret; + + return (ret); } + case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: - return -EINVAL; + return (-EINVAL); + default: - return(dev_ioctl(cmd,(void *) arg)); + return (dev_ioctl(cmd,(void *) arg)); } - /*NOTREACHED*/ - return(0); + + /*NOT REACHED*/ + return (0); } /* - * SPX interface support + * SPX interface support */ int ipx_register_spx(struct proto_ops **p, struct net_proto_family *spx) { - if(spx_family_ops!=NULL) - return -EBUSY; - cli(); - MOD_INC_USE_COUNT; - *p=&ipx_dgram_ops; - spx_family_ops=spx; - sti(); - return 0; + if(spx_family_ops!=NULL) + return -EBUSY; + cli(); + MOD_INC_USE_COUNT; + *p=&ipx_dgram_ops; + spx_family_ops=spx; + sti(); + return 0; } int ipx_unregister_spx(void) { - spx_family_ops=NULL; - MOD_DEC_USE_COUNT; - return 0; + spx_family_ops=NULL; + MOD_DEC_USE_COUNT; + return 0; } - /* - * Socket family declarations + * Socket family declarations */ - + static struct net_proto_family ipx_family_ops = { - AF_IPX, + PF_IPX, ipx_create }; static struct proto_ops ipx_dgram_ops = { - AF_IPX, + PF_IPX, sock_no_dup, ipx_release, ipx_bind, @@ -2352,10 +2320,10 @@ static struct notifier_block ipx_dev_notifier={ }; -extern struct datalink_proto *make_EII_client(void); -extern struct datalink_proto *make_8023_client(void); -extern void destroy_EII_client(struct datalink_proto *); -extern void destroy_8023_client(struct datalink_proto *); +extern struct datalink_proto *make_EII_client(void); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_EII_client(struct datalink_proto *); +extern void destroy_8023_client(struct datalink_proto *); struct proc_dir_entry ipx_procinfo = { PROC_NET_IPX, 3, "ipx", S_IFREG | S_IRUGO, @@ -2372,32 +2340,29 @@ struct proc_dir_entry ipx_rt_procinfo = { 1, 0, 0, 0, &proc_net_inode_operations, ipx_rt_get_info }; -static unsigned char ipx_8022_type = 0xE0; -static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; +static unsigned char ipx_8022_type = 0xE0; +static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; -void -ipx_proto_init(struct net_proto *pro) +void ipx_proto_init(struct net_proto *pro) { (void) sock_register(&ipx_family_ops); pEII_datalink = make_EII_client(); - ipx_dix_packet_type.type=htons(ETH_P_IPX); + ipx_dix_packet_type.type = htons(ETH_P_IPX); dev_add_pack(&ipx_dix_packet_type); p8023_datalink = make_8023_client(); - ipx_8023_packet_type.type=htons(ETH_P_802_3); + ipx_8023_packet_type.type = htons(ETH_P_802_3); dev_add_pack(&ipx_8023_packet_type); - if ((p8022_datalink = register_8022_client(ipx_8022_type, ipx_rcv)) == NULL) + if((p8022_datalink = register_8022_client(ipx_8022_type,ipx_rcv)) == NULL) printk(KERN_CRIT "IPX: Unable to register with 802.2\n"); - if ((p8022tr_datalink = register_8022tr_client(ipx_8022_type, ipx_rcv)) == NULL) - printk(KERN_CRIT "IPX: Unable to register with 802.2TR\n"); - - if ((pSNAP_datalink = register_snap_client(ipx_snap_id, ipx_rcv)) == NULL) + if((pSNAP_datalink = register_snap_client(ipx_snap_id,ipx_rcv)) == NULL) printk(KERN_CRIT "IPX: Unable to register with SNAP\n"); register_netdevice_notifier(&ipx_dev_notifier); + #ifdef CONFIG_PROC_FS proc_net_register(&ipx_procinfo); proc_net_register(&ipx_if_procinfo); @@ -2414,7 +2379,8 @@ int ipx_if_offset(unsigned long ipx_net_number) ipx_route *rt = NULL; rt = ipxrtr_lookup(ipx_net_number); - return ( rt ? rt->ir_intrfc->if_ipx_offset : -ENETUNREACH ); + + return (rt ? rt->ir_intrfc->if_ipx_offset : -ENETUNREACH); } /* Export symbols for higher layers */ @@ -2424,7 +2390,6 @@ EXPORT_SYMBOL(ipx_remove_socket); EXPORT_SYMBOL(ipx_register_spx); EXPORT_SYMBOL(ipx_unregister_spx); - #ifdef MODULE /* Note on MOD_{INC,DEC}_USE_COUNT: * @@ -2443,10 +2408,11 @@ static void ipx_proto_finito(void) { ipx_interface *ifc; - while (ipx_interfaces) { + while(ipx_interfaces) + { ifc = ipx_interfaces; - ipx_interfaces = ifc->if_next; - ifc->if_next = NULL; + ipx_interfaces = ifc->if_next; + ifc->if_next = NULL; ipxitf_down(ifc); } @@ -2459,21 +2425,18 @@ static void ipx_proto_finito(void) unregister_netdevice_notifier(&ipx_dev_notifier); unregister_snap_client(ipx_snap_id); - pSNAP_datalink = NULL; - - unregister_8022tr_client(ipx_8022_type); - p8022tr_datalink = NULL; + pSNAP_datalink = NULL; unregister_8022_client(ipx_8022_type); - p8022_datalink = NULL; + p8022_datalink = NULL; dev_remove_pack(&ipx_8023_packet_type); destroy_8023_client(p8023_datalink); - p8023_datalink = NULL; + p8023_datalink = NULL; dev_remove_pack(&ipx_dix_packet_type); destroy_EII_client(pEII_datalink); - pEII_datalink = NULL; + pEII_datalink = NULL; (void) sock_unregister(ipx_family_ops.family); @@ -2483,7 +2446,7 @@ static void ipx_proto_finito(void) int init_module(void) { ipx_proto_init(NULL); - return 0; + return (0); } void cleanup_module(void) @@ -2491,4 +2454,6 @@ void cleanup_module(void) ipx_proto_finito(); return; } -#endif /* def MODULE */ + +#endif /* MODULE */ +#endif /* CONFIG_IPX || CONFIG_IPX_MODULE */ diff --git a/net/ipx/af_spx.c b/net/ipx/af_spx.c index a14ad0a31..fb2cffeab 100644 --- a/net/ipx/af_spx.c +++ b/net/ipx/af_spx.c @@ -48,6 +48,8 @@ static void spx_retransmit(unsigned long data); static void spx_watchdog(unsigned long data); void spx_rcv(struct sock *sk, int bytes); +extern void ipx_remove_socket(struct sock *sk); + /* Create the SPX specific data */ static int spx_sock_init(struct sock *sk) { @@ -83,7 +85,7 @@ static int spx_create(struct socket *sock, int protocol) { struct sock *sk; - sk = sk_alloc(AF_IPX, GFP_KERNEL, 1); + sk = sk_alloc(PF_IPX, GFP_KERNEL, 1); if(sk == NULL) return (-ENOMEM); @@ -328,6 +330,8 @@ static int spx_connect(struct socket *sock, struct sockaddr *uaddr, * As we simply have a default retry time of 1*HZ and a max retry * time of 5*HZ. Between those values we increase the timeout based * on the number of retransmit tries. + * + * FixMe: This is quite fake, but will work for now. (JS) */ static inline unsigned long spx_calc_rtt(int tries) { @@ -344,11 +348,12 @@ static int spx_route_skb(struct spx_opt *pdata, struct sk_buff *skb, int type) int err = 0; skb = skb_unshare(skb, GFP_ATOMIC); - if(skb==NULL) - return -ENOBUFS; + if(skb == NULL) + return (-ENOBUFS); switch(type) { + case (CONREQ): case (DATA): if(!skb_queue_empty(&pdata->retransmit_queue)) { @@ -366,7 +371,6 @@ static int spx_route_skb(struct spx_opt *pdata, struct sk_buff *skb, int type) skb_queue_tail(&pdata->retransmit_queue, skb2); case (ACK): - case (CONREQ): case (CONACK): case (WDREQ): case (WDACK): @@ -388,7 +392,8 @@ static int spx_transmit(struct sock *sk, struct sk_buff *skb, int type, int len) { struct spx_opt *pdata = &sk->tp_pinfo.af_spx; struct ipxspxhdr *ipxh; - int flags, err; + unsigned long flags; + int err; if(skb == NULL) { @@ -397,11 +402,11 @@ static int spx_transmit(struct sock *sk, struct sk_buff *skb, int type, int len) save_flags(flags); cli(); - skb = sock_alloc_send_skb(sk, size, 0, 0, &err); + skb = sock_alloc_send_skb(sk, size, 1, 0, &err); if(skb == NULL) return (-ENOMEM); skb_reserve(skb, offset); - skb->nh.raw = skb_put(skb, sizeof(struct ipxspxhdr)); + skb->h.raw = skb->nh.raw = skb_put(skb,sizeof(struct ipxspxhdr)); restore_flags(flags); } @@ -435,10 +440,10 @@ static int spx_transmit(struct sock *sk, struct sk_buff *skb, int type, int len) pdata->sequence++; break; - case (ACK): /* Connection/WD/Data ACK */ + case (ACK): /* ACK */ pdata->rmt_seq++; - case (WDACK): - case (CONACK): + case (WDACK): /* WD ACK */ + case (CONACK): /* Connection ACK */ ipxh->spx.cctl = CCTL_SYS; ipxh->spx.ackseq = htons(pdata->rmt_seq); break; @@ -472,9 +477,7 @@ static int spx_transmit(struct sock *sk, struct sk_buff *skb, int type, int len) } /* Send data */ - spx_route_skb(pdata, skb, type); - - return (0); + return (spx_route_skb(pdata, skb, type)); } /* Check the state of the connection and send a WD request if needed. */ @@ -484,6 +487,8 @@ static void spx_watchdog(unsigned long data) struct spx_opt *pdata = &sk->tp_pinfo.af_spx; del_timer(&pdata->watchdog); + if(pdata->state == SPX_CLOSED) + return; if(pdata->retries > pdata->max_retries) { spx_close_socket(sk); /* Unilateral Abort */ @@ -502,21 +507,27 @@ static void spx_retransmit(unsigned long data) struct sock *sk = (struct sock*)data; struct spx_opt *pdata = &sk->tp_pinfo.af_spx; struct sk_buff *skb; + unsigned long flags; int err; del_timer(&pdata->retransmit); + if(pdata->state == SPX_CLOSED) + return; if(pdata->retransmits > RETRY_COUNT) { spx_close_socket(sk); /* Unilateral Abort */ return; } - /* need to leave skb on the queue! */ + /* Need to leave skb on the queue, aye the fear */ + save_flags(flags); + cli(); skb = skb_peek(&pdata->retransmit_queue); if(skb_cloned(skb)) skb = skb_copy(skb, GFP_ATOMIC); else skb = skb_clone(skb, GFP_ATOMIC); + restore_flags(flags); pdata->retransmit.expires = jiffies + spx_calc_rtt(pdata->retransmits); add_timer(&pdata->retransmit); @@ -527,13 +538,46 @@ static void spx_retransmit(unsigned long data) return; } +/* Check packet for retransmission, ConReqAck aware */ +static int spx_retransmit_chk(struct spx_opt *pdata, int ackseq, int type) +{ + struct ipxspxhdr *ipxh; + struct sk_buff *skb; + + skb = skb_dequeue(&pdata->retransmit_queue); + if(!skb) + return (-ENOENT); + + /* Check Data/ACK seq */ + switch(type) + { + case ACK: /* Check Sequence, Should == 1 */ + ipxh = (struct ipxspxhdr *)skb->nh.raw; + if(!(ntohs(ipxh->spx.sequence) - htons(ackseq))) + break; + + case CONACK: + del_timer(&pdata->retransmit); + pdata->retransmits = 0; + kfree_skb(skb); + if(skb_queue_empty(&pdata->retransmit_queue)) + { + skb = skb_dequeue(&pdata->transmit_queue); + if(skb != NULL) + spx_route_skb(pdata, skb, TQUEUE); + } + return (0); + } + + skb_queue_head(&pdata->retransmit_queue, skb); + return (-1); +} + /* SPX packet receive engine */ void spx_rcv(struct sock *sk, int bytes) { struct sk_buff *skb; - struct sk_buff *skb2; struct ipxspxhdr *ipxh; - struct ipxspxhdr *ipxh2; struct spx_opt *pdata = &sk->tp_pinfo.af_spx; skb = skb_dequeue(&sk->receive_queue); @@ -543,15 +587,13 @@ void spx_rcv(struct sock *sk, int bytes) /* Can't receive on a closed connection */ if((pdata->state == SPX_CLOSED) && (ipxh->spx.sequence != 0)) - return; + goto toss_skb; if(ntohs(ipxh->ipx.ipx_pktsize) < SPX_SYS_PKT_LEN) - return; + goto toss_skb; if(ipxh->ipx.ipx_type != IPX_TYPE_SPX) - return; - - /* insanity - rcv'd ACK of unsent data ?? */ + goto toss_skb; if(ntohs(ipxh->spx.ackseq) > pdata->sequence) - return; + goto toss_skb; /* Reset WD timer on any received packet */ del_timer(&pdata->watchdog); @@ -577,7 +619,7 @@ void spx_rcv(struct sock *sk, int bytes) } else /* WD Request */ spx_transmit(sk, skb, WDACK, 0); - break; + goto finish; case CCTL_SYS: /* ACK */ if((ipxh->spx.dtype == 0) /* ConReq ACK */ @@ -588,62 +630,56 @@ void spx_rcv(struct sock *sk, int bytes) && (pdata->state != SPX_CONNECTED)) { pdata->state = SPX_CONNECTED; + pdata->dest_connid = ipxh->spx.sconn; + + if(spx_retransmit_chk(pdata, 0, CONACK) < 0) + goto toss_skb; skb_queue_tail(&sk->receive_queue, skb); wake_up_interruptible(sk->sleep); - break; - } - - /* Check Data/ACK seq */ - skb2 = skb_dequeue(&pdata->retransmit_queue); - if(skb2) - { - ipxh2 = (struct ipxspxhdr *)skb2->nh.raw; - if((ntohs(ipxh2->spx.sequence) - == (ntohs(ipxh->spx.ackseq) - 1)) - || (ntohs(ipxh2->spx.sequence) == 65535 - && ntohs(ipxh->spx.ackseq) == 0)) - { - del_timer(&pdata->retransmit); - pdata->retransmits = 0; - kfree_skb(skb2); - if(skb_queue_empty(&pdata->retransmit_queue)) - { - skb2 = skb_dequeue(&pdata->transmit_queue); - if(skb2 != NULL) - spx_route_skb(pdata, skb2, TQUEUE); - } - } - else /* Out of Seq - ERROR! */ - skb_queue_head(&pdata->retransmit_queue, skb2); + goto finish; } - kfree_skb(skb); - break; + spx_retransmit_chk(pdata, ipxh->spx.ackseq, ACK); + goto toss_skb; - case (CCTL_ACK): /* Informed Disconnect */ + case (CCTL_ACK): + /* Informed Disconnect */ if(ipxh->spx.dtype == SPX_DTYPE_ECONN) { + spx_transmit(sk, skb, DISACK, 0); spx_close_socket(sk); + goto finish; } - break; + /* Fall through */ default: if(ntohs(ipxh->spx.sequence) == pdata->rmt_seq) { pdata->rmt_seq = ntohs(ipxh->spx.sequence); + pdata->rmt_ack = ntohs(ipxh->spx.ackseq); + if(pdata->rmt_ack > 0 || pdata->rmt_ack == 0) + spx_retransmit_chk(pdata,pdata->rmt_ack, ACK); + skb_queue_tail(&pdata->rcv_queue, skb); wake_up_interruptible(sk->sleep); - spx_transmit(sk, NULL, ACK, 0); - break; + if(ipxh->spx.cctl&CCTL_ACK) + spx_transmit(sk, NULL, ACK, 0); + goto finish; } - /* Catch All */ - kfree_skb(skb); - break; + if(ipxh->spx.dtype == SPX_DTYPE_ECACK) + { + if(pdata->state != SPX_CLOSED) + spx_close_socket(sk); + goto toss_skb; + } } +toss_skb: /* Catch All */ + kfree_skb(skb); +finish: return; } @@ -665,13 +701,16 @@ static int spx_sendmsg(struct socket *sock, struct msghdr *msg, int len, offset = ipx_if_offset(sk->tp_pinfo.af_spx.dest_addr.net); size = offset + sizeof(struct ipxspxhdr) + len; + + cli(); skb = sock_alloc_send_skb(sk, size, 0, flags&MSG_DONTWAIT, &err); if(skb == NULL) return (err); + sti(); skb->sk = sk; skb_reserve(skb, offset); - skb->nh.raw = skb_put(skb, sizeof(struct ipxspxhdr)); + skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxspxhdr)); err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if(err) @@ -806,7 +845,7 @@ static int spx_getsockopt(struct socket *sock, int level, int optname, } static struct proto_ops spx_operations = { - AF_IPX, + PF_IPX, sock_no_dup, spx_release, spx_bind, @@ -827,7 +866,7 @@ static struct proto_ops spx_operations = { static struct net_proto_family spx_family_ops= { - AF_IPX, + PF_IPX, spx_create }; @@ -842,9 +881,9 @@ void spx_proto_init(void) if (error) printk(KERN_ERR "SPX: unable to register with IPX.\n"); - /* route socket(AF_IPX, SOCK_SEQPACKET) calls through spx_create() */ + /* route socket(PF_IPX, SOCK_SEQPACKET) calls through spx_create() */ - printk(KERN_INFO "Sequenced Packet eXchange (SPX) 0.01 for Linux NET3.037\n"); + printk(KERN_INFO "Sequenced Packet eXchange (SPX) 0.02 for Linux NET3.037\n"); return; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b8e5a4b8..c57d793c0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -147,7 +147,7 @@ static int netlink_create(struct socket *sock, int protocol) sock->ops = &netlink_ops; - sk = sk_alloc(AF_NETLINK, GFP_KERNEL, 1); + sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1); if (!sk) return -ENOMEM; @@ -720,7 +720,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (err == 0) size = NLMSG_SPACE(sizeof(struct nlmsgerr)); else - size = NLMSG_SPACE(4 + nlh->nlmsg_len); + size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); skb = alloc_skb(size, GFP_KERNEL); if (!skb) @@ -980,28 +980,28 @@ done: #endif struct proto_ops netlink_ops = { - AF_NETLINK, + PF_NETLINK, sock_no_dup, netlink_release, netlink_bind, netlink_connect, - NULL, - NULL, + sock_no_socketpair, + sock_no_accept, netlink_getname, datagram_poll, sock_no_ioctl, sock_no_listen, sock_no_shutdown, - NULL, - NULL, + sock_no_setsockopt, + sock_no_getsockopt, sock_no_fcntl, netlink_sendmsg, netlink_recvmsg }; struct net_proto_family netlink_family_ops = { - AF_NETLINK, + PF_NETLINK, netlink_create }; diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c index 2a1dd160c..94be0069b 100644 --- a/net/netlink/netlink_dev.c +++ b/net/netlink/netlink_dev.c @@ -116,7 +116,7 @@ static int netlink_open(struct inode * inode, struct file * file) MOD_INC_USE_COUNT; err = -EINVAL; - if (net_families[AF_NETLINK]==NULL) + if (net_families[PF_NETLINK]==NULL) goto out; err = -ENFILE; @@ -125,7 +125,7 @@ static int netlink_open(struct inode * inode, struct file * file) sock->type = SOCK_RAW; - if ((err = net_families[AF_NETLINK]->create(sock, minor)) < 0) + if ((err = net_families[PF_NETLINK]->create(sock, minor)) < 0) { sock_release(sock); goto out; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 59d3dacfb..84451d0b9 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -98,7 +98,7 @@ static struct sock *nr_alloc_sock(void) struct sock *sk; nr_cb *nr; - if ((sk = sk_alloc(AF_NETROM, GFP_ATOMIC, 1)) == NULL) + if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, 1)) == NULL) return NULL; if ((nr = kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) { @@ -1235,12 +1235,12 @@ static int nr_get_info(char *buffer, char **start, off_t offset, int length, int static struct net_proto_family nr_family_ops = { - AF_NETROM, + PF_NETROM, nr_create }; static struct proto_ops nr_proto_ops = { - AF_NETROM, + PF_NETROM, sock_no_dup, nr_release, @@ -1363,7 +1363,7 @@ void cleanup_module(void) #ifdef CONFIG_SYSCTL nr_unregister_sysctl(); #endif - sock_unregister(AF_NETROM); + sock_unregister(PF_NETROM); for (i = 0; i < nr_ndevs; i++) { if (dev_nr[i].priv != NULL) { diff --git a/net/netsyms.c b/net/netsyms.c index 52be53033..d9767a09e 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -17,6 +17,10 @@ #include <net/neighbour.h> #include <net/snmp.h> +#ifdef CONFIG_BRIDGE +#include <net/br.h> +#endif + #ifdef CONFIG_INET #include <linux/ip.h> #include <linux/etherdevice.h> @@ -80,8 +84,8 @@ extern int sysctl_max_syn_backlog; EXPORT_SYMBOL(dev_lockct); /* Skbuff symbols. */ -EXPORT_SYMBOL(skb_push_errstr); -EXPORT_SYMBOL(skb_put_errstr); +EXPORT_SYMBOL(skb_over_panic); +EXPORT_SYMBOL(skb_under_panic); /* Socket layer registration */ EXPORT_SYMBOL(sock_register); @@ -191,6 +195,10 @@ EXPORT_SYMBOL(sklist_insert_socket); EXPORT_SYMBOL(scm_detach_fds); +#ifdef CONFIG_BRIDGE +EXPORT_SYMBOL(br_ioctl); +#endif + #ifdef CONFIG_INET /* Internet layer registration */ EXPORT_SYMBOL(inet_add_protocol); @@ -213,6 +221,7 @@ EXPORT_SYMBOL(ip_mc_inc_group); EXPORT_SYMBOL(ip_mc_dec_group); EXPORT_SYMBOL(__ip_finish_output); EXPORT_SYMBOL(inet_dgram_ops); +EXPORT_SYMBOL(ip_cmsg_recv); EXPORT_SYMBOL(__release_sock); /* needed for ip_gre -cw */ @@ -353,7 +362,6 @@ EXPORT_SYMBOL(sock_rmalloc); /* If 8390 NIC support is built in, we will need these. */ EXPORT_SYMBOL(ei_open); EXPORT_SYMBOL(ei_close); -EXPORT_SYMBOL(ei_debug); EXPORT_SYMBOL(ei_interrupt); EXPORT_SYMBOL(ethdev_init); EXPORT_SYMBOL(NS8390_init); @@ -366,7 +374,6 @@ EXPORT_SYMBOL(register_trdev); EXPORT_SYMBOL(unregister_trdev); EXPORT_SYMBOL(init_trdev); EXPORT_SYMBOL(tr_freedev); -EXPORT_SYMBOL(tr_reformat); #endif /* Used by at least ipip.c. */ @@ -415,9 +422,6 @@ EXPORT_SYMBOL(netdev_register_fc); EXPORT_SYMBOL(netdev_unregister_fc); EXPORT_SYMBOL(netdev_fc_xoff); #endif -#ifdef CONFIG_IP_ACCT -EXPORT_SYMBOL(ip_acct_output); -#endif EXPORT_SYMBOL(dev_base); EXPORT_SYMBOL(dev_close); EXPORT_SYMBOL(dev_mc_add); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f56b660c0..1e5a509d4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -704,7 +704,7 @@ static int packet_create(struct socket *sock, int protocol) MOD_INC_USE_COUNT; err = -ENOBUFS; - sk = sk_alloc(AF_PACKET, GFP_KERNEL, 1); + sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1); if (sk == NULL) goto out; @@ -721,7 +721,7 @@ static int packet_create(struct socket *sock, int protocol) goto out_free; memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt)); sk->zapped=0; - sk->family = AF_PACKET; + sk->family = PF_PACKET; sk->num = protocol; /* @@ -1176,14 +1176,14 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg #ifdef CONFIG_SOCK_PACKET struct proto_ops packet_ops_spkt = { - AF_PACKET, + PF_PACKET, sock_no_dup, packet_release, packet_bind_spkt, sock_no_connect, - NULL, - NULL, + sock_no_socketpair, + sock_no_accept, packet_getname_spkt, datagram_poll, packet_ioctl, @@ -1198,14 +1198,14 @@ struct proto_ops packet_ops_spkt = { #endif struct proto_ops packet_ops = { - AF_PACKET, + PF_PACKET, sock_no_dup, packet_release, packet_bind, sock_no_connect, - NULL, - NULL, + sock_no_socketpair, + sock_no_accept, packet_getname, datagram_poll, packet_ioctl, @@ -1223,7 +1223,7 @@ struct proto_ops packet_ops = { }; static struct net_proto_family packet_family_ops = { - AF_PACKET, + PF_PACKET, packet_create }; @@ -1238,7 +1238,7 @@ struct notifier_block packet_netdev_notifier={ void cleanup_module(void) { unregister_netdevice_notifier(&packet_netdev_notifier); - sock_unregister(packet_family_ops.family); + sock_unregister(PF_PACKET); return; } diff --git a/net/protocols.c b/net/protocols.c index cba2a3ac4..2e2362b82 100644 --- a/net/protocols.c +++ b/net/protocols.c @@ -86,7 +86,6 @@ extern void rif_init(struct net_proto *); #ifdef NEED_802 #include <net/p8022call.h> -#include <net/p8022trcall.h> #endif /* @@ -108,7 +107,6 @@ struct net_proto protocols[] = { #ifdef NEED_802 { "802.2", p8022_proto_init }, /* 802.2 demultiplexor */ - { "802.2TR", p8022tr_proto_init }, /* 802.2 demultiplexor */ { "SNAP", snap_proto_init }, /* SNAP demultiplexor */ #endif diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 494b9fa62..1c17b3648 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -149,7 +149,7 @@ static struct sock *rose_alloc_sock(void) struct sock *sk; rose_cb *rose; - if ((sk = sk_alloc(AF_ROSE, GFP_ATOMIC, 1)) == NULL) + if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, 1)) == NULL) return NULL; if ((rose = kmalloc(sizeof(*rose), GFP_ATOMIC)) == NULL) { @@ -1320,12 +1320,12 @@ static int rose_get_info(char *buffer, char **start, off_t offset, int length, i } static struct net_proto_family rose_family_ops = { - AF_ROSE, + PF_ROSE, rose_create }; static struct proto_ops rose_proto_ops = { - AF_ROSE, + PF_ROSE, sock_no_dup, rose_release, @@ -1462,7 +1462,7 @@ void cleanup_module(void) #endif unregister_netdevice_notifier(&rose_dev_notifier); - sock_unregister(AF_ROSE); + sock_unregister(PF_ROSE); for (i = 0; i < rose_ndevs; i++) { if (dev_rose[i].priv != NULL) { diff --git a/net/sched/Config.in b/net/sched/Config.in index 052b62281..5d497a050 100644 --- a/net/sched/Config.in +++ b/net/sched/Config.in @@ -1,6 +1,8 @@ # # Traffic control configuration. # +define_bool CONFIG_NETLINK y +define_bool CONFIG_RTNETLINK y tristate 'CBQ packet scheduler' CONFIG_NET_SCH_CBQ tristate 'CSZ packet scheduler' CONFIG_NET_SCH_CSZ #tristate 'H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ @@ -14,12 +16,12 @@ bool 'QoS support' CONFIG_NET_QOS if [ "$CONFIG_NET_QOS" = "y" ]; then bool 'Rate estimator' CONFIG_NET_ESTIMATOR fi -if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then - bool 'Packet classifier API' CONFIG_NET_CLS -fi +bool 'Packet classifier API' CONFIG_NET_CLS if [ "$CONFIG_NET_CLS" = "y" ]; then bool 'Routing tables based classifier' CONFIG_NET_CLS_ROUTE -# bool 'Firewall based classifier' CONFIG_NET_CLS_FW + if [ "$CONFIG_IP_FIREWALL" = "y" ]; then + bool 'Firewall based classifier' CONFIG_NET_CLS_FW + fi tristate 'U32 classifier' CONFIG_NET_CLS_U32 if [ "$CONFIG_NET_QOS" = "y" ]; then tristate 'Special RSVP classifier' CONFIG_NET_CLS_RSVP diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6eae05d7b..0bf7a92f4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -114,6 +114,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 protocol = TC_H_MIN(t->tcm_info); u32 prio = TC_H_MAJ(t->tcm_info); u32 nprio = prio; + u32 parent = t->tcm_parent; struct device *dev; struct Qdisc *q; struct tcf_proto **back, **chain; @@ -141,9 +142,10 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENODEV; /* Find qdisc */ - if (!t->tcm_parent) + if (!parent) { q = dev->qdisc_sleeping; - else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) + parent = q->handle; + } else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) return -EINVAL; /* Is it classful? */ @@ -151,8 +153,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; /* Do we search for filter, attached to class? */ - if (TC_H_MIN(t->tcm_parent)) { - cl = cops->get(q, t->tcm_parent); + if (TC_H_MIN(parent)) { + cl = cops->get(q, parent); if (cl == 0) return -ENOENT; } @@ -203,7 +205,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) tp->prio = nprio ? : tcf_auto_prio(*back, prio); tp->q = q; tp->classify = tp_ops->classify; - tp->classid = t->tcm_parent; + tp->classid = parent; err = tp_ops->init(tp); if (err) { kfree(tp); @@ -395,7 +397,7 @@ errout: __initfunc(int tc_filter_init(void)) { #ifdef CONFIG_RTNETLINK - struct rtnetlink_link *link_p = rtnetlink_links[AF_UNSPEC]; + struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; /* Setup rtnetlink links. It is made here to avoid exporting large number of public symbols. diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ff831817a..0fab64dda 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -38,7 +38,6 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { -#if 0 /* XXX skb->fwmark, where is it? -DaveM */ u32 clid = skb->fwmark; if (clid && (TC_H_MAJ(clid) == 0 || @@ -47,7 +46,6 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, res->class = 0; return 0; } -#endif return -1; } @@ -81,7 +79,7 @@ static int fw_change(struct tcf_proto *tp, u32 handle, return handle ? -EINVAL : 0; } -struct tcf_proto_ops fw_cls_ops = { +struct tcf_proto_ops cls_fw_ops = { NULL, "fw", fw_classify, diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 9e12a806a..8f50013f7 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -15,26 +15,26 @@ * (dst, protocol) are always specified, so that we are able to hash them. - * src may be exact, and may be wildcard, so that - we can keep hash table plus one wildcard entry. + * src may be exact, or may be wildcard, so that + we can keep a hash table plus one wildcard entry. * source port (or flow label) is important only if src is given. IMPLEMENTATION. - We use two level hash table: top level is keyed by - destination address and protocol ID, every bucket contains list of - "rsvp sessions", identified by destination address, protocol - and DPI(="Destination Port ID"): triple (key, mask, offset). + We use a two level hash table: The top level is keyed by + destination address and protocol ID, every bucket contains a list + of "rsvp sessions", identified by destination address, protocol and + DPI(="Destination Port ID"): triple (key, mask, offset). - Every bucket has smaller hash table keyed by source address + Every bucket has a smaller hash table keyed by source address (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. - Every bucket is again list of "RSVP flows", selected by + Every bucket is again a list of "RSVP flows", selected by source address and SPI(="Source Port ID" here rather than "security parameter index"): triple (key, mask, offset). NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) - and all fragmented packets go to best-effort traffic class. + and all fragmented packets go to the best-effort traffic class. NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires @@ -42,29 +42,28 @@ ah, esp (and udp,tcp) both *pi should coincide or one of them should be wildcard. - From the first sight, this redundancy is just waste of CPU - resources. But, DPI and SPI add possibility to assign different - priorities to GPIs. Look also note 4 about tunnels below. + At first sight, this redundancy is just a waste of CPU + resources. But DPI and SPI add the possibility to assign different + priorities to GPIs. Look also at note 4 about tunnels below. NOTE 3. One complication is the case of tunneled packets. - We implement it as the following: if the first lookup - matches special session with "tunnelhdr" value not zero, - flowid contains not true flow ID, but tunnel ID (1...255). + We implement it as following: if the first lookup + matches a special session with "tunnelhdr" value not zero, + flowid doesn't contain the true flow ID, but the tunnel ID (1...255). In this case, we pull tunnelhdr bytes and restart lookup - with tunnel ID added to list of keys. Simple and stupid 8)8) + with tunnel ID added to the list of keys. Simple and stupid 8)8) It's enough for PIMREG and IPIP. - NOTE 4. Two GPIs make possible to parse even GRE packets. + NOTE 4. Two GPIs make it possible to parse even GRE packets. F.e. DPI can select ETH_P_IP (and necessary flags to make tunnelhdr correct) in GRE protocol field and SPI matches GRE key. Is it not nice? 8)8) - Well, as result, despite of simplicity, we get pretty - powerful clsssification engine. - */ + Well, as result, despite its simplicity, we get a pretty + powerful classification engine. */ #include <linux/config.h> @@ -415,7 +414,7 @@ static int rsvp_change(struct tcf_proto *tp, u32 handle, int err; if (opt == NULL) - return -EINVAL; + return handle ? -EINVAL : 0; if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) return -EINVAL; @@ -526,6 +525,7 @@ insert: break; f->next = *fp; *fp = f; + *arg = (unsigned long)f; return 0; } } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10e355201..cb52e9d07 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -12,16 +12,16 @@ * with a set of 32bit key/mask pairs at every node. * Nodes reference next level hash tables etc. * - * This scheme is the best universal classifier - * I managed to invent; it is not super-fast, but it is not slow - * (provided you programmed it correctly), and enough general. - * And its relative speed grows, when number of rules becomes larger. + * This scheme is the best universal classifier I managed to + * invent; it is not super-fast, but it is not slow (provided you + * program it correctly), and general enough. And its relative + * speed grows as the number of rules becomes larger. * - * Seems, it presents the best middle point between speed and - * managability both by human and by machine. + * It seems that it represents the best middle point between + * speed and manageability both by human and by machine. * - * It is especially useful for link sharing and link sharing, combined - * with QoS; pure RSVP need not such general approach and can use + * It is especially useful for link sharing combined with QoS; + * pure RSVP doesn't need such a general approach and can use * much simpler (and faster) schemes, sort of cls_rsvp.c. */ diff --git a/net/sched/estimator.c b/net/sched/estimator.c index 06defeec5..463879606 100644 --- a/net/sched/estimator.c +++ b/net/sched/estimator.c @@ -31,21 +31,21 @@ #include <net/pkt_sched.h> /* - This text is NOT intended to be used for statistics collection, - its purpose is to provide base for statistical multiplexing + This code is NOT intended to be used for statistics collection, + its purpose is to provide a base for statistical multiplexing for controlled load service. - If you need only statistics, run user level daemon, which will - periodically read byte counters. + If you need only statistics, run a user level daemon which + periodically reads byte counters. - Unfortunately, rate estimation is not very easy task. - F.e. I did not find a simple way to estimate current peak rate + Unfortunately, rate estimation is not a very easy task. + F.e. I did not find a simple way to estimate the current peak rate and even failed to formulate the problem 8)8) - So that I preferred not to built estimator in scheduler, + So I preferred not to built an estimator into the scheduler, but run this task separately. Ideally, it should be kernel thread(s), but for now it runs - from timers, which puts apparent top bounds on number of rated - flows, but has minimal overhead on small, but enough + from timers, which puts apparent top bounds on the number of rated + flows, has minimal overhead on small, but is enough to handle controlled load service, sets of aggregates. We measure rate over A=(1<<interval) seconds and evaluate EWMA: @@ -54,18 +54,18 @@ where W is chosen as negative power of 2: W = 2^(-ewma_log) - Resulting time constant is: + The resulting time constant is: T = A/(-ln(1-W)) NOTES. - * Stored value for avbps is scaled by 2^5, so that maximal + * The stored value for avbps is scaled by 2^5, so that maximal rate is ~1Gbit, avpps is scaled by 2^10. - * Minimal interval is HZ/4=250msec (it is the least integer divisor - both for HZ=100 and HZ=1024 8)), maximal interval + * Minimal interval is HZ/4=250msec (it is the greatest common divisor + for HZ=100 and HZ=1024 8)), maximal interval is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals are too expensive, longer ones can be implemented at user level painlessly. diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6d36af30d..a684cde66 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -9,9 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> @@ -28,9 +25,14 @@ #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/proc_fs.h> + #include <net/sock.h> #include <net/pkt_sched.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/bitops.h> #define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } @@ -881,7 +883,7 @@ __initfunc(int psched_calibrate_clock(void)) unsigned long stop; #if CPU == 586 || CPU == 686 - if (!(boot_cpu_data.x86_capability & 16)) + if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC) return -1; #endif @@ -928,7 +930,7 @@ __initfunc(int pktsched_init(void)) #endif #ifdef CONFIG_RTNETLINK - struct rtnetlink_link *link_p = rtnetlink_links[AF_UNSPEC]; + struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; /* Setup rtnetlink links. It is made here to avoid exporting large number of public symbols. diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 759ef4d57..52512e879 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -54,18 +54,18 @@ ----------------------------------------------------------------------- - Algorithm skeleton is taken from from NS simulator cbq.cc. - If someone wants to check this text against LBL version, - he should take into account that ONLY skeleton is borrowed, - implementation is different. Particularly: - - --- WRR algorithm is different. Our version looks - more reasonable (I hope) and works when quanta are allowed - to be less than MTU, which always is the case, when real time - classes have small rates. Note, that the statement of [3] is incomplete, - Actually delay may be estimated even if class per-round allotment - less than MTU. Namely, if per-round allotment is W*r_i, - and r_1+...+r_k = r < 1 + Algorithm skeleton was taken from from NS simulator cbq.cc. + If someone wants to check this code against the LBL version, + he should take into account that ONLY the skeleton was borrowed, + the implementation is different. Particularly: + + --- The WRR algorithm is different. Our version looks more + reasonable (I hope) and works when quanta are allowed to be + less than MTU, which is always the case when real time classes + have small rates. Note, that the statement of [3] is + incomplete, delay may actually be estimated even if class + per-round allotment is less than MTU. Namely, if per-round + allotment is W*r_i, and r_1+...+r_k = r < 1 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B @@ -73,18 +73,17 @@ and C = MTU*r. The proof (if correct at all) is trivial. - --- Seems, cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translation - from NS. Anyone is advertised to found these differences - and explain me, why I am wrong 8). + --- It seems that cbq-2.0 is not very accurate. At least, I cannot + interpret some places, which look like wrong translations + from NS. Anyone is advised to find these differences + and explain to me, why I am wrong 8). --- Linux has no EOI event, so that we cannot estimate true class idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. It is wrong because of - internal device queueing, but on permanently loaded link it is true. + as sign that previous packet is finished. This is wrong because of + internal device queueing, but on a permanently loaded link it is true. Moreover, combined with clock integrator, this scheme looks - very close to ideal solution. -*/ + very close to an ideal solution. */ struct cbq_sched_data; @@ -177,7 +176,7 @@ struct cbq_sched_data unsigned pmask; struct timer_list delay_timer; - struct timer_list wd_timer; /* Wathchdog timer, that + struct timer_list wd_timer; /* Watchdog timer, started when CBQ has backlog, but cannot transmit just now */ @@ -231,9 +230,9 @@ cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) transparently. Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - so that it resolves to split nodes. Then packeta are classified - by logical priority, or more specific classifier may be attached - to split node. + so that it resolves to split nodes. Then packets are classified + by logical priority, or a more specific classifier may be attached + to the split node. */ static struct cbq_class * @@ -283,9 +282,9 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch) } /* - * Step 3+n. If classifier selected link sharing class, + * Step 3+n. If classifier selected a link sharing class, * apply agency specific classifier. - * Repeat this procdure until we hit leaf node. + * Repeat this procdure until we hit a leaf node. */ head = cl; } @@ -332,7 +331,7 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl) /* Unlink class from active chain. - Note, that the same procedure is made directly in cbq_dequeue* + Note that this same procedure is done directly in cbq_dequeue* during round-robin procedure. */ @@ -712,9 +711,9 @@ cbq_update(struct cbq_sched_data *q) /* That is not all. - To maintain rate allocated to class, + To maintain the rate allocated to the class, we add to undertime virtual clock, - necassry to complete transmitted packet. + necesary to complete transmitted packet. (len/phys_bandwidth has been already passed to the moment of cbq_update) */ @@ -932,8 +931,8 @@ cbq_dequeue(struct Qdisc *sch) an arbitrary class is appropriate for ancestor-only sharing, but not for toplevel algorithm. - Our version is better, but slower, because requires - two passes, but it is inavoidable with top-level sharing. + Our version is better, but slower, because it requires + two passes, but it is unavoidable with top-level sharing. */ if (q->toplevel == TC_CBQ_MAXLEVEL && @@ -1000,8 +999,8 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) q->quanta[prio]; } if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { - printk("Damn! %08x cl->quantum==%ld\n", cl->classid, cl->quantum); - cl->quantum = 1; + printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum); + cl->quantum = cl->qdisc->dev->mtu/2 + 1; } } } @@ -1436,7 +1435,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) if (cl->police) { opt.police = cl->police; - RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); + RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt); } return skb->len; diff --git a/net/sched/sch_csz.c b/net/sched/sch_csz.c index c21d8ac43..207a1dd51 100644 --- a/net/sched/sch_csz.c +++ b/net/sched/sch_csz.c @@ -49,12 +49,12 @@ CBQ presents a flexible universal algorithm for packet scheduling, but it has pretty poor delay characteristics. Round-robin scheduling and link-sharing goals - apparently contradict to minimization of network delay and jitter. + apparently contradict minimization of network delay and jitter. Moreover, correct handling of predictive flows seems to be impossible in CBQ. - CSZ presents more precise but less flexible and less efficient - approach. As I understand, the main idea is to create + CSZ presents a more precise but less flexible and less efficient + approach. As I understand it, the main idea is to create WFQ flows for each guaranteed service and to allocate the rest of bandwith to dummy flow-0. Flow-0 comprises the predictive services and the best effort traffic; @@ -62,22 +62,23 @@ priority band allocated for predictive services, and the rest --- to the best effort packets. - Note, that in CSZ flows are NOT limited to their bandwidth. - It is supposed, that flow passed admission control at the edge - of QoS network and it more need no shaping. Any attempt to improve - the flow or to shape it to a token bucket at intermediate hops - will introduce undesired delays and raise jitter. + Note that in CSZ flows are NOT limited to their bandwidth. It + is supposed that the flow passed admission control at the edge + of the QoS network and it doesn't need further shaping. Any + attempt to improve the flow or to shape it to a token bucket + at intermediate hops will introduce undesired delays and raise + jitter. At the moment CSZ is the only scheduler that provides true guaranteed service. Another schemes (including CBQ) do not provide guaranteed delay and randomize jitter. - There exists the statement (Sally Floyd), that delay - can be estimated by a IntServ compliant formulae. + There is a proof (Sally Floyd), that delay + can be estimated by a IntServ compliant formula. This result is true formally, but it is wrong in principle. It takes into account only round-robin delays, ignoring delays introduced by link sharing i.e. overlimiting. - Note, that temporary overlimits are inevitable because - real links are not ideal, and true algorithm must take it + Note that temporary overlimits are inevitable because + real links are not ideal, and the real algorithm must take this into account. ALGORITHM. @@ -92,14 +93,14 @@ --- Flow model. - Let $m_a$ is number of backlogged bits in flow $a$. - The flow is {\em active }, if $m_a > 0$. - This number is discontinuous function of time; + Let $m_a$ is the number of backlogged bits in flow $a$. + The flow is {\em active}, if $m_a > 0$. + This number is a discontinuous function of time; when a packet $i$ arrives: \[ m_a(t_i+0) - m_a(t_i-0) = L^i, \] - where $L^i$ is the length of arrived packet. + where $L^i$ is the length of the arrived packet. The flow queue is drained continuously until $m_a == 0$: \[ {d m_a \over dt} = - { B r_a \over \sum_{b \in A} r_b}. @@ -112,23 +113,23 @@ {d m_a \over dt} = - B r_a . \] More complicated hierarchical bandwidth allocation - policies are possible, but, unfortunately, basic - flows equation have simple solution only for proportional + policies are possible, but unfortunately, the basic + flow equations have a simple solution only for proportional scaling. --- Departure times. - We calculate time until the last bit of packet will be sent: + We calculate the time until the last bit of packet is sent: \[ E_a^i(t) = { m_a(t_i) - \delta_a(t) \over r_a }, \] where $\delta_a(t)$ is number of bits drained since $t_i$. We have to evaluate $E_a^i$ for all queued packets, - then find packet with minimal $E_a^i$ and send it. + then find the packet with minimal $E_a^i$ and send it. - It sounds good, but direct implementation of the algorithm + This sounds good, but direct implementation of the algorithm is absolutely infeasible. Luckily, if flow rates - are scaled proportionally, the equations have simple solution. + are scaled proportionally, the equations have a simple solution. The differential equation for $E_a^i$ is \[ @@ -149,7 +150,7 @@ $B \sum_{a \in A} r_a$ bits per round, that takes $\sum_{a \in A} r_a$ seconds. - Hence, $R(t)$ (round number) is monotonically increasing + Hence, $R(t)$ (round number) is a monotonically increasing linear function of time when $A$ is not changed \[ { d R(t) \over dt } = { 1 \over \sum_{a \in A} r_a } @@ -160,17 +161,17 @@ $F_a^i = R(t) + E_a^i(t)/B$ does not depend on time at all! $R(t)$ does not depend on flow, so that $F_a^i$ can be calculated only once on packet arrival, and we need not - recalculation of $E$ numbers and resorting queues. - Number $F_a^i$ is called finish number of the packet. - It is just value of $R(t)$, when the last bit of packet - will be sent out. + recalculate $E$ numbers and resorting queues. + The number $F_a^i$ is called finish number of the packet. + It is just the value of $R(t)$ when the last bit of packet + is sent out. Maximal finish number on flow is called finish number of flow and minimal one is "start number of flow". Apparently, flow is active if and only if $F_a \leq R$. - When packet of length $L_i$ bit arrives to flow $a$ at time $t_i$, - we calculate number $F_a^i$ as: + When a packet of length $L_i$ bit arrives to flow $a$ at time $t_i$, + we calculate $F_a^i$ as: If flow was inactive ($F_a < R$): $F_a^i = R(t) + {L_i \over B r_a}$ @@ -179,31 +180,30 @@ These equations complete the algorithm specification. - It looks pretty hairy, but there exists a simple + It looks pretty hairy, but there is a simple procedure for solving these equations. See procedure csz_update(), that is a generalization of - algorithm from S. Keshav's thesis Chapter 3 + the algorithm from S. Keshav's thesis Chapter 3 "Efficient Implementation of Fair Queeing". NOTES. * We implement only the simplest variant of CSZ, - when flow-0 is explicit 4band priority fifo. - It is bad, but we need "peek" operation in addition + when flow-0 is a explicit 4band priority fifo. + This is bad, but we need a "peek" operation in addition to "dequeue" to implement complete CSZ. - I do not want to make it, until it is not absolutely + I do not want to do that, unless it is absolutely necessary. * A primitive support for token bucket filtering - presents too. It directly contradicts to CSZ, but - though the Internet is on the globe ... :-) - yet "the edges of the network" really exist. + presents itself too. It directly contradicts CSZ, but + even though the Internet is on the globe ... :-) + "the edges of the network" really exist. BUGS. * Fixed point arithmetic is overcomplicated, suboptimal and even - wrong. Check it later. -*/ + wrong. Check it later. */ /* This number is arbitrary */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e07bced8..7ba2e94cc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -37,7 +37,7 @@ struct Qdisc_head qdisc_head = { &qdisc_head }; /* Kick device. - Note, that this procedure can be called by watchdog timer, so that + Note, that this procedure can be called by a watchdog timer, so that we do not check dev->tbusy flag here. Returns: 0 - queue is empty. @@ -62,7 +62,7 @@ int qdisc_restart(struct device *dev) } /* Device kicked us out :( - It is possible in three cases: + This is possible in three cases: 1. fastroute is enabled 2. device cannot determine busy state @@ -79,7 +79,7 @@ int qdisc_restart(struct device *dev) /* Scan transmission queue and kick devices. Deficiency: slow devices (ppp) and fast ones (100Mb ethernet) - share one queue. It means, that if we have a lot of loaded ppp channels, + share one queue. This means that if we have a lot of loaded ppp channels, we will scan a long list on every 100Mb EOI. I have no idea how to solve it using only "anonymous" Linux mark_bh(). @@ -99,12 +99,12 @@ void qdisc_run_queues(void) while (!dev->tbusy && (res = qdisc_restart(dev)) < 0) /* NOTHING */; - /* The explanation is necessary here. + /* An explanation is necessary here. qdisc_restart called dev->hard_start_xmit, if device is virtual, it could trigger one more - dev_queue_xmit and new device could appear - in active chain. In this case we cannot unlink - empty queue, because we lost back pointer. + dev_queue_xmit and a new device could appear + in the active chain. In this case we cannot unlink + the empty queue, because we lost the back pointer. No problem, we will unlink it during the next round. */ @@ -117,7 +117,7 @@ void qdisc_run_queues(void) } } -/* Periodic watchdoc timer to recover of hard/soft device bugs. */ +/* Periodic watchdoc timer to recover from hard/soft device bugs. */ static void dev_do_watchdog(unsigned long dummy); @@ -141,8 +141,8 @@ static void dev_do_watchdog(unsigned long dummy) /* "NOOP" scheduler: the best scheduler, recommended for all interfaces - in all curcumstances. It is difficult to invent anything more - fast or cheap. + under all circumstances. It is difficult to invent anything faster or + cheaper. */ static int diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 56d1651f3..80bc0a96f 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -43,17 +43,17 @@ for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. This file codes a "divisionless" version of RED algorithm - written down in Fig.17 of the paper. + as written down in Fig.17 of the paper. Short description. ------------------ - When new packet arrives we calculate average queue length: + When a new packet arrives we calculate the average queue length: avg = (1-W)*avg + W*current_queue_len, - W is filter time constant (choosen as 2^(-Wlog)), controlling - inertia of algorithm. To allow larger bursts, W should be + W is the filter time constant (choosen as 2^(-Wlog)), it controls + the inertia of the algorithm. To allow larger bursts, W should be decreased. if (avg > th_max) -> packet marked (dropped). @@ -67,7 +67,7 @@ Short description. max_P should be small (not 1), usually 0.01..0.02 is good value. max_P is chosen as a number, so that max_P/(th_max-th_min) - is negative power of two in order arithmetics to contain + is a negative power of two in order arithmetics to contain only shifts. @@ -78,9 +78,9 @@ Short description. Hard limit on queue length, should be chosen >qth_max to allow packet bursts. This parameter does not - affect algorithm behaviour and can be chosen + affect the algorithms behaviour and can be chosen arbitrarily high (well, less than ram size) - Really, this limit will never be achieved + Really, this limit will never be reached if RED works correctly. qth_min - bytes (should be < qth_max/2) @@ -162,18 +162,18 @@ red_enqueue(struct sk_buff *skb, struct Qdisc* sch) /* The problem: ideally, average length queue recalcultion should - be done over constant clock intervals. It is too expensive, so that - calculation is driven by outgoing packets. - When queue is idle we have to model this clock by hands. + be done over constant clock intervals. This is too expensive, so that + the calculation is driven by outgoing packets. + When the queue is idle we have to model this clock by hand. SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth) - dummy packets as burst after idle time, i.e. + dummy packets as a burst after idle time, i.e. q->qave *= (1-W)^m - It is apparently overcomplicated solution (f.e. we have to precompute - a table to make this calculation for reasonable time) - I believe, that a simpler model may be used here, + This is an apparently overcomplicated solution (f.e. we have to precompute + a table to make this calculation in reasonable time) + I believe that a simpler model may be used here, but it is field for experiments. */ q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7cc2b6e5f..c6f43badc 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -55,9 +55,9 @@ Queuing using Deficit Round Robin", Proc. SIGCOMM 95. - It is not the thing that usually called (W)FQ nowadays. It does not - use any timestamp mechanism, but instead processes queues - in round-robin order. + This is not the thing that is usually called (W)FQ nowadays. + It does not use any timestamp mechanism, but instead + processes queues in round-robin order. ADVANTAGE: @@ -66,10 +66,10 @@ DRAWBACKS: - "Stochastic" -> It is not 100% fair. - When hash collisions occur, several flows are considred as one. + When hash collisions occur, several flows are considered as one. - "Round-robin" -> It introduces larger delays than virtual clock - based schemes, and should not be used for isolation interactive + based schemes, and should not be used for isolating interactive traffic from non-interactive. It means, that this scheduler should be used as leaf of CBQ or P3, which put interactive traffic to higher priority band. @@ -82,13 +82,12 @@ This implementation limits maximal queue length to 128; maximal mtu to 2^15-1; number of hash buckets to 1024. The only goal of this restrictions was that all data - fitted to one 4K page :-). Struct sfq_sched_data is - organized in anti-cache manner: all the data for bucket - scattered over different locations. It is not good, - but it allowed to put it into 4K. + fit into one 4K page :-). Struct sfq_sched_data is + organized in anti-cache manner: all the data for a bucket + are scattered over different locations. This is not good, + but it allowed me to put it into 4K. - It is easy to increase these values, but not in flight. -*/ + It is easy to increase these values, but not in flight. */ #define SFQ_DEPTH 128 #define SFQ_HASH_DIVISOR 1024 diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 109ae7bec..83d6da87c 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -48,12 +48,12 @@ Description. ------------ - Data flow obeys TBF with rate R and depth B, if for any - time interval t_i...t_f number of transmitted bits + A data flow obeys TBF with rate R and depth B, if for any + time interval t_i...t_f the number of transmitted bits does not exceed B + R*(t_f-t_i). Packetized version of this definition: - sequence of packets of sizes s_i served at moments t_i + The sequence of packets of sizes s_i served at moments t_i obeys TBF, if for any i<=k: s_i+....+s_k <= B + R*(t_k - t_i) @@ -61,7 +61,7 @@ Algorithm. ---------- - Let N(t_i) be B/R initially and N(t) grows continuously with time as: + Let N(t_i) be B/R initially and N(t) grow continuously with time as: N(t+delta) = min{B/R, N(t) + delta} @@ -73,13 +73,13 @@ - Actually, QoS requires two TBF to be applied to data stream. + Actually, QoS requires two TBF to be applied to a data stream. One of them controls steady state burst size, another - with rate P (peak rate) and depth M (equal to link MTU) - limits bursts at smaller time scale. + one with rate P (peak rate) and depth M (equal to link MTU) + limits bursts at a smaller time scale. - Apparently, P>R, and B>M. If P is infinity, this double - TBF is equivalent to single one. + It is easy to see that P>R, and B>M. If P is infinity, this double + TBF is equivalent to a single one. When TBF works in reshaping mode, latency is estimated as: @@ -89,22 +89,22 @@ NOTES. ------ - If TBF throttles, it starts watchdog timer, which will wake up it - when it will be ready to transmit. - Note, that minimal timer resolution is 1/HZ. - If no new packets will arrive during this period, - or device will not be awaken by EOI for previous packet, - tbf could stop its activity for 1/HZ. + If TBF throttles, it starts a watchdog timer, which will wake it up + when it is ready to transmit. + Note that the minimal timer resolution is 1/HZ. + If no new packets arrive during this period, + or if the device is not awaken by EOI for some previous packet, + TBF can stop its activity for 1/HZ. - It means, that with depth B, the maximal rate is + This means, that with depth B, the maximal rate is R_crit = B*HZ - F.e. for 10Mbit ethernet and HZ=100 minimal allowed B is ~10Kbytes. + F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. - Note, that peak rate TBF is much more tough: with MTU 1500 - P_crit = 150Kbytes/sec. So that, if you need greater peak + Note that the peak rate TBF is much more tough: with MTU 1500 + P_crit = 150Kbytes/sec. So, if you need greater peak rates, use alpha with HZ=1000 :-) */ @@ -139,7 +139,7 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 1; } - /* Drop action: undo the things that we just made, + /* Drop action: undo the things that we just did, * i.e. make tail drop */ @@ -230,14 +230,14 @@ tbf_dequeue(struct Qdisc* sch) add_timer(&q->wd_timer); } - /* Maybe, we have in queue a shorter packet, + /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, - but, however, wrong in principle. - We MUST NOT reorder packets in these curcumstances. + but, however, this is wrong in principle. + We MUST NOT reorder packets under these circumstances. - Really, if we splitted flow to independent - subflows, it would be very good solution. - It is main idea of all FQ algorithms + Really, if we split the flow into independent + subflows, it would be a very good solution. + This is the main idea of all FQ algorithms (cf. CSZ, HPFQ, HFCS) */ __skb_queue_head(&sch->q, skb); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 414bbdfb1..212e6f696 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -38,8 +38,8 @@ How to setup it. ---------------- - After loading this module you will find new device teqlN - and new qdisc with the same name. To join a slave to equalizer + After loading this module you will find a new device teqlN + and new qdisc with the same name. To join a slave to the equalizer you should just set this qdisc on a device f.e. # tc qdisc add dev eth0 root teql0 @@ -50,20 +50,19 @@ Applicability. -------------- - 1. Slave devices MUST be active devices i.e. must raise tbusy - signal and generate EOI event. If you want to equalize virtual devices - sort of tunnels, use normal eql device. + 1. Slave devices MUST be active devices, i.e., they must raise the tbusy + signal and generate EOI events. If you want to equalize virtual devices + like tunnels, use a normal eql device. 2. This device puts no limitations on physical slave characteristics f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) - Certainly, large difference in link speeds will make resulting eqalized - link unusable, because of huge packet reordering. I estimated upper - useful difference as ~10 times. - 3. If slave requires address resolution, only protocols using - neighbour cache (IPv4/IPv6) will work over equalized link. - Another protocols still are allowed to use slave device directly, + Certainly, large difference in link speeds will make the resulting + eqalized link unusable, because of huge packet reordering. + I estimate an upper useful difference as ~10 times. + 3. If the slave requires address resolution, only protocols using + neighbour cache (IPv4/IPv6) will work over the equalized link. + Other protocols are still allowed to use the slave device directly, which will not break load balancing, though native slave - traffic will have the highest priority. - */ + traffic will have the highest priority. */ struct teql_master { @@ -166,8 +165,10 @@ teql_destroy(struct Qdisc* sch) NEXT_SLAVE(prev) = NEXT_SLAVE(q); if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); - if (q == master->slaves) + if (q == master->slaves) { master->slaves = NULL; + qdisc_reset(master->dev.qdisc); + } } skb_queue_purge(&dat->q); teql_neigh_release(xchg(&dat->ncache, NULL)); @@ -251,7 +252,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct device *dev) return 0; } neigh_release(n); - return (skb_res != NULL); + return (skb_res == NULL) ? -EAGAIN : 1; } static __inline__ int @@ -270,6 +271,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct device *dev) struct Qdisc *start, *q; int busy; int nores; + int len = skb->len; struct sk_buff *skb_res = NULL; dev->tbusy = 1; @@ -278,7 +280,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct device *dev) restart: nores = 0; - busy = 1; + busy = 0; if ((q = start) == NULL) goto drop; @@ -286,32 +288,39 @@ restart: do { struct device *slave = q->dev; - if (!slave->tbusy && slave->qdisc_sleeping == q) { - busy = 0; - - if (q->h.forw == NULL) { - q->h.forw = qdisc_head.forw; - qdisc_head.forw = &q->h; - } + if (slave->qdisc_sleeping != q) + continue; + if (slave->tbusy) { + busy = 1; + continue; + } - switch (teql_resolve(skb, skb_res, slave)) { - case 0: - if (slave->hard_start_xmit(skb, slave) == 0) { - master->slaves = NEXT_SLAVE(q); - dev->tbusy = 0; - return 0; - } - break; - case 1: - nores = 1; - break; - default: + if (q->h.forw == NULL) { + q->h.forw = qdisc_head.forw; + qdisc_head.forw = &q->h; + } + + switch (teql_resolve(skb, skb_res, slave)) { + case 0: + if (slave->hard_start_xmit(skb, slave) == 0) { master->slaves = NEXT_SLAVE(q); dev->tbusy = 0; - return 0; + master->stats.tx_packets++; + master->stats.tx_bytes += len; + return 0; } - __skb_pull(skb, skb->nh.raw - skb->data); + if (dev->tbusy) + busy = 1; + break; + case 1: + master->slaves = NEXT_SLAVE(q); + dev->tbusy = 0; + return 0; + default: + nores = 1; + break; } + __skb_pull(skb, skb->nh.raw - skb->data); } while ((q = NEXT_SLAVE(q)) != start); if (nores && skb_res == NULL) { @@ -322,8 +331,10 @@ restart: dev->tbusy = busy; if (busy) return 1; + master->stats.tx_errors++; drop: + master->stats.tx_dropped++; dev_kfree_skb(skb); return 0; } @@ -366,6 +377,7 @@ static int teql_master_open(struct device *dev) m->dev.mtu = mtu; m->dev.flags = (m->dev.flags&~FMASK) | flags; + m->dev.tbusy = 0; MOD_INC_USE_COUNT; return 0; } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 0c4cc7f5a..e3025334d 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -178,6 +178,11 @@ unx_marshal(struct rpc_task *task, u32 *p, int ruid) base = p++; *p++ = htonl(jiffies/HZ); #ifndef DONT_FILLIN_HOSTNAME + /* + * Problem: The UTS name could change under us. We can't lock + * here to handle this. On the other hand we can't really + * go building a bad RPC! + */ if ((n = strlen((char *) system_utsname.nodename)) > UNX_MAXNODENAME) n = UNX_MAXNODENAME; *p++ = htonl(n); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e8ca9a511..b1a8150ec 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -203,6 +203,10 @@ rpc_do_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, unsigned long irqflags; int async, status; + /* If this client is slain all further I/O fails */ + if (clnt->cl_dead) + return -EIO; + /* Turn off various signals */ if (clnt->cl_intr) { struct k_sigaction *action = current->sig->action; @@ -637,7 +641,8 @@ call_decode(struct rpc_task *task) * The following is an NFS-specific hack to cater for setuid * processes whose uid is mapped to nobody on the server. */ - if (task->tk_client->cl_prog == 100003 && ntohl(*p) == NFSERR_PERM) { + if (task->tk_client->cl_prog == 100003 && + (ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) { if (RPC_IS_SETUID(task) && (task->tk_suid_retry)--) { dprintk("RPC: %4d retry squashed uid\n", task->tk_pid); task->tk_flags ^= RPC_CALL_REALUID; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 765dc05fc..8caaa46e8 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -176,6 +176,21 @@ rpc_make_runnable(struct rpc_task *task) task->tk_flags |= RPC_TASK_RUNNING; } + +/* + * For other people who may need to wake the I/O daemon + * but should (for now) know nothing about its innards + */ + +void rpciod_wake_up(void) +{ + if(rpciod_pid==0) + { + printk(KERN_ERR "rpciod: wot no daemon?\n"); + } + wake_up(&rpciod_idle); +} + /* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. @@ -466,10 +481,12 @@ __rpc_schedule(void) struct rpc_task *task; int count = 0; unsigned long oldflags; + int need_resched = current->need_resched; dprintk("RPC: rpc_schedule enter\n"); + save_flags(oldflags); while (1) { - save_flags(oldflags); cli(); + cli(); if (!(task = schedq.task)) break; rpc_del_timer(task); @@ -793,6 +810,7 @@ rpciod(void *ptr) dprintk("RPC: rpciod back to sleep\n"); interruptible_sleep_on(&rpciod_idle); dprintk("RPC: switch to rpciod\n"); + rpciod_tcp_dispatcher(); rounds = 0; } restore_flags(oldflags); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8c1df5a50..6ccf2e29f 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -40,6 +40,7 @@ EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_create_client); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); +EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_do_call); EXPORT_SYMBOL(rpc_call_setup); EXPORT_SYMBOL(rpc_delay); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 47d1104dc..e97d339b3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -905,7 +905,7 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) } type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - if ((error = sock_create(AF_INET, type, protocol, &sock)) < 0) + if ((error = sock_create(PF_INET, type, protocol, &sock)) < 0) return error; if (sin != NULL) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f614cfa33..e2af81be4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -32,6 +32,8 @@ * tasks that rely on callbacks. * * Copyright (C) 1995, 1996, Olaf Kirch <okir@monad.swb.de> + * + * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> */ #define __KERNEL_SYSCALLS__ @@ -322,6 +324,12 @@ xprt_close(struct rpc_xprt *xprt) fput(xprt->file); else sock_release(xprt->sock); + /* + * TCP doesnt require the rpciod now - other things may + * but rpciod handles that not us. + */ + if(xprt->stream) + rpciod_down(); } /* @@ -693,41 +701,103 @@ done: } /* - * data_ready callback for TCP. + * TCP task queue stuff */ -static void -tcp_data_ready(struct sock *sk, int len) + +static struct rpc_xprt *rpc_xprt_pending = NULL; /* Chain by rx_pending of rpc_xprt's */ + +/* + * This is protected from tcp_data_ready and the stack as its run + * inside of the RPC I/O daemon + */ + +void rpciod_tcp_dispatcher(void) +{ + struct rpc_xprt *xprt; + int result; + + dprintk("rpciod_tcp_dispatcher: Queue Running\n"); + + /* + * Empty each pending socket + */ + + while((xprt=rpc_xprt_pending)!=NULL) + { + int safe_retry=0; + + rpc_xprt_pending=xprt->rx_pending; + xprt->rx_pending_flag=0; + + dprintk("rpciod_tcp_dispatcher: Processing %p\n", xprt); + + do + { + if (safe_retry++ > 50) + break; + result = tcp_input_record(xprt); + } + while (result >= 0); + + switch (result) { + case -EAGAIN: + continue; + case -ENOTCONN: + case -EPIPE: + xprt_disconnect(xprt); + continue; + default: + printk(KERN_WARNING "RPC: unexpected error %d from tcp_input_record\n", + result); + } + } +} + + +extern inline void tcp_rpciod_queue(void) +{ + rpciod_wake_up(); +} + +/* + * data_ready callback for TCP. We can't just jump into the + * tcp recvmsg functions inside of the network receive bh or + * bad things occur. We queue it to pick up after networking + * is done. + */ + +static void tcp_data_ready(struct sock *sk, int len) { struct rpc_xprt *xprt; - int result, safe_retry = 0; dprintk("RPC: tcp_data_ready...\n"); if (!(xprt = xprt_from_sock(sk))) + { + printk("Not a socket with xprt %p\n", sk); return; + } dprintk("RPC: tcp_data_ready client %p\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d\n", sk->state, xprt->connected, sk->dead, sk->zapped); - - do { - if (safe_retry++ > 20) - return; - result = tcp_input_record(xprt); - } while (result >= 0); - - switch (result) { - case -EAGAIN: - return; - case -ENOTCONN: - case -EPIPE: - xprt_disconnect(xprt); - return; - default: - printk("RPC: unexpected error %d from tcp_input_record\n", - result); + /* + * If we are not waiting for the RPC bh run then + * we are now + */ + if (!xprt->rx_pending_flag) + { + dprintk("RPC: xprt queue\n"); + if(rpc_xprt_pending==NULL) + tcp_rpciod_queue(); + xprt->rx_pending_flag=1; + xprt->rx_pending=rpc_xprt_pending; + rpc_xprt_pending=xprt; } + else + dprintk("RPC: xprt queued already %p\n", xprt); } + static void tcp_state_change(struct sock *sk) { @@ -1210,6 +1280,12 @@ xprt_setup(struct socket *sock, int proto, xprt->free = xprt->slot; dprintk("RPC: created transport %p\n", xprt); + + /* + * TCP requires the rpc I/O daemon is present + */ + if(proto==IPPROTO_TCP) + rpciod_up(); return xprt; } @@ -1231,7 +1307,7 @@ xprt_create(struct file *file, struct sockaddr_in *ap, struct rpc_timeout *to) } sock = &file->f_inode->u.socket_i; - if (sock->ops->family != AF_INET) { + if (sock->ops->family != PF_INET) { printk(KERN_WARNING "RPC: only INET sockets supported\n"); return NULL; } @@ -1284,7 +1360,7 @@ xprt_create_socket(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) (proto == IPPROTO_UDP)? "udp" : "tcp", proto); type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - if ((err = sock_create(AF_INET, type, proto, &sock)) < 0) { + if ((err = sock_create(PF_INET, type, proto, &sock)) < 0) { printk("RPC: can't create socket (%d).\n", -err); goto failed; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 624cbb8d8..961516de6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -86,6 +86,7 @@ #include <linux/proc_fs.h> #include <net/scm.h> #include <linux/init.h> +#include <linux/poll.h> #include <asm/checksum.h> @@ -100,6 +101,8 @@ unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE) +static void unix_destroy_socket(unix_socket *sk); + extern __inline__ unsigned unix_hash_fold(unsigned hash) { hash ^= hash>>16; @@ -127,7 +130,7 @@ extern __inline__ void unix_lock(unix_socket *sk) extern __inline__ int unix_unlock(unix_socket *sk) { - return sk->sock_readers--; + return --sk->sock_readers; } extern __inline__ int unix_locked(unix_socket *sk) @@ -254,6 +257,10 @@ static void unix_destroy_timer(unsigned long data) if(!unix_locked(sk) && atomic_read(&sk->wmem_alloc) == 0) { sk_free(sk); + unix_remove_socket(sk); + + /* socket destroyed, decrement count */ + MOD_DEC_USE_COUNT; return; } @@ -273,28 +280,56 @@ static void unix_delayed_delete(unix_socket *sk) sk->timer.function=unix_destroy_timer; add_timer(&sk->timer); } + +static int unix_release_sock (unix_socket *sk) +{ + unix_socket *skpair; + + sk->state_change(sk); + sk->dead=1; + sk->socket = NULL; + + skpair=unix_peer(sk); + + /* Try to flush out this socket. Throw out buffers at least */ + unix_destroy_socket(sk); + + if (skpair!=NULL) + { + if (sk->type==SOCK_STREAM && unix_our_peer(sk, skpair)) + { + skpair->state_change(skpair); + skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/ + } + unix_unlock(skpair); /* It may now die */ + } + + /* + * Fixme: BSD difference: In BSD all sockets connected to use get + * ECONNRESET and we die on the spot. In Linux we behave + * like files and pipes do and wait for the last + * dereference. + * + * Can't we simply set sock->err? + */ + + unix_gc(); /* Garbage collect fds */ + return 0; +} static void unix_destroy_socket(unix_socket *sk) { struct sk_buff *skb; - unix_remove_socket(sk); - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { if(sk->state==TCP_LISTEN) { - unix_socket *osk=skb->sk; - osk->state=TCP_CLOSE; - kfree_skb(skb); /* Now surplus - free the skb first before the socket */ - osk->state_change(osk); /* So the connect wakes and cleans up (if any) */ - /* osk will be destroyed when it gets to close or the timer fires */ - } - else - { - /* passed fds are erased in the kfree_skb hook */ - kfree_skb(skb); + unix_unlock(sk); + unix_release_sock(skb->sk); } + /* passed fds are erased in the kfree_skb hook */ + kfree_skb(skb); } if(sk->protinfo.af_unix.dentry!=NULL) @@ -306,15 +341,18 @@ static void unix_destroy_socket(unix_socket *sk) if(!unix_unlock(sk) && atomic_read(&sk->wmem_alloc) == 0) { sk_free(sk); + unix_remove_socket(sk); + + /* socket destroyed, decrement count */ + MOD_DEC_USE_COUNT; } else { + sk->state=TCP_CLOSE; sk->dead=1; unix_delayed_delete(sk); /* Try every so often until buffers are all freed */ } - /* socket destroyed, decrement count */ - MOD_DEC_USE_COUNT; } static int unix_listen(struct socket *sock, int backlog) @@ -332,23 +370,29 @@ static int unix_listen(struct socket *sock, int backlog) sk->state_change(sk); sk->state=TCP_LISTEN; sock->flags |= SO_ACCEPTCON; + /* set credentials so connect can copy them */ + sk->peercred.pid = current->pid; + sk->peercred.uid = current->euid; + sk->peercred.gid = current->egid; return 0; } extern struct proto_ops unix_stream_ops; extern struct proto_ops unix_dgram_ops; -static int unix_create(struct socket *sock, int protocol) +static int unix_create1(struct socket *sock, struct sock **skp, int protocol) { struct sock *sk; - sock->state = SS_UNCONNECTED; - if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; - switch (sock->type) + if (sock) { + sock->state = SS_UNCONNECTED; + + switch (sock->type) + { case SOCK_STREAM: sock->ops = &unix_stream_ops; break; @@ -363,66 +407,48 @@ static int unix_create(struct socket *sock, int protocol) break; default: return -ESOCKTNOSUPPORT; + } } - sk = sk_alloc(AF_UNIX, GFP_KERNEL, 1); + sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1); if (!sk) return -ENOMEM; sock_init_data(sock,sk); sk->destruct = unix_destruct_addr; - sk->protinfo.af_unix.family=AF_UNIX; + sk->protinfo.af_unix.family=PF_UNIX; sk->protinfo.af_unix.dentry=NULL; sk->sock_readers=1; /* Us */ sk->protinfo.af_unix.readsem=MUTEX; /* single task reading lock */ sk->mtu=4096; sk->protinfo.af_unix.list=&unix_sockets_unbound; unix_insert_socket(sk); - + if (skp) + *skp =sk; + /* socket created, increment count */ MOD_INC_USE_COUNT; return 0; } +static int unix_create(struct socket *sock, int protocol) +{ + return unix_create1(sock, NULL, protocol); +} + static int unix_release(struct socket *sock, struct socket *peer) { unix_socket *sk = sock->sk; - unix_socket *skpair; if (!sk) return 0; + sock->sk = NULL; if (sock->state != SS_UNCONNECTED) sock->state = SS_DISCONNECTING; - sk->state_change(sk); - sk->dead=1; - skpair=unix_peer(sk); - if (sock->type==SOCK_STREAM && skpair) - { - if (unix_our_peer(sk, skpair)) - skpair->shutdown=SHUTDOWN_MASK; /* No more writes */ - if (skpair->state!=TCP_LISTEN) - skpair->state_change(skpair); /* Wake any blocked writes */ - } - if (skpair!=NULL) - unix_unlock(skpair); /* It may now die */ - unix_peer(sk)=NULL; /* No pair */ - unix_destroy_socket(sk); /* Try to flush out this socket. Throw out buffers at least */ - unix_gc(); /* Garbage collect fds */ - - /* - * FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In - * Linux we behave like files and pipes do and wait for the last dereference. - */ - if (sk->socket) - { - sk->socket = NULL; - sock->sk = NULL; - } - - return 0; + return unix_release_sock (sk); } static int unix_autobind(struct socket *sock) @@ -625,18 +651,17 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, return 0; } -static int unix_stream_connect1(struct socket *sock, struct msghdr *msg, - int len, struct unix_skb_parms *cmsg, int nonblock) +static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { - struct sockaddr_un *sunaddr=(struct sockaddr_un *)msg->msg_name; - struct sock *sk = sock->sk; + struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; + struct sock *sk = sock->sk, *newsk; unix_socket *other; struct sk_buff *skb; int err; unsigned hash; - int addr_len; - addr_len = unix_mkname(sunaddr, msg->msg_namelen, &hash); + addr_len = unix_mkname(sunaddr, addr_len, &hash); if (addr_len < 0) return addr_len; @@ -648,123 +673,80 @@ static int unix_stream_connect1(struct socket *sock, struct msghdr *msg, case SS_CONNECTED: /* Socket is already connected */ return -EISCONN; - case SS_CONNECTING: - /* Not yet connected... we will check this. */ - break; default: return(-EINVAL); } + /* + * Now ready to connect + */ + + sk->state=TCP_CLOSE; + + /* Find listening sock */ + other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err); + if(other==NULL) + goto out; - if (unix_peer(sk)) - { - if (sock->state==SS_CONNECTING && sk->state==TCP_ESTABLISHED) - { - sock->state=SS_CONNECTED; - if (!sk->protinfo.af_unix.addr) - unix_autobind(sock); - return 0; - } - if (sock->state==SS_CONNECTING && sk->state == TCP_CLOSE) - { - sock->state=SS_UNCONNECTED; - return -ECONNREFUSED; - } - if (sock->state!=SS_CONNECTING) - return -EISCONN; - if (nonblock) - return -EALREADY; - /* - * Drop through the connect up logic to the wait. - */ - } + /* create new sock for complete connection */ + err = unix_create1(NULL, &newsk, PF_UNIX); + if (newsk == NULL) + goto out; - if (sock->state==SS_UNCONNECTED) - { + /* Allocate skb for sending to listening sock */ + skb=sock_alloc_send_skb(newsk, 0, 0, flags&O_NONBLOCK, &err); + if(skb==NULL) /* - * Now ready to connect + * if it gives EAGAIN we should give back + * EINPROGRESS. But this should not happen since the + * socket should have some writespace left (it did not + * allocate any memory until now) */ - - skb=sock_alloc_send_skb(sk, len, 0, nonblock, &err); /* Marker object */ - if(skb==NULL) - goto out; - memcpy(&UNIXCB(skb), cmsg, sizeof(*cmsg)); - if (len) { - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, - len); - if (err) - goto out_free; - } + goto out_release; - sk->state=TCP_CLOSE; - other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err); - if(other==NULL) - goto out_free; - other->ack_backlog++; - unix_peer(sk)=other; - skb_queue_tail(&other->receive_queue,skb); - sk->state=TCP_SYN_SENT; - sock->state=SS_CONNECTING; - other->data_ready(other,0); /* Wake up ! */ - } - - - /* Wait for an accept */ - - while(sk->state==TCP_SYN_SENT) - { - if(nonblock) - return -EINPROGRESS; - interruptible_sleep_on(sk->sleep); - if(signal_pending(current)) - return -ERESTARTSYS; - } - - /* - * Has the other end closed on us ? - */ - - if(sk->state==TCP_CLOSE) - { - unix_unlock(unix_peer(sk)); - unix_peer(sk)=NULL; - sock->state=SS_UNCONNECTED; - return -ECONNREFUSED; - } - - /* - * Amazingly it has worked - */ - + UNIXCB(skb).attr = MSG_SYN; + + /* set up connecting socket */ sock->state=SS_CONNECTED; if (!sk->protinfo.af_unix.addr) unix_autobind(sock); + unix_peer(sk)=newsk; + unix_lock(sk); + sk->state=TCP_ESTABLISHED; + /* Set credentials */ + sk->peercred = other->peercred; + + /* set up newly created sock */ + unix_peer(newsk)=sk; + unix_lock(newsk); + newsk->state=TCP_ESTABLISHED; + newsk->type=SOCK_STREAM; + newsk->peercred.pid = current->pid; + newsk->peercred.uid = current->euid; + newsk->peercred.gid = current->egid; + + /* copy address information from listening to new sock*/ + if (other->protinfo.af_unix.addr) + { + atomic_inc(&other->protinfo.af_unix.addr->refcnt); + newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr; + } + if (other->protinfo.af_unix.dentry) + newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry); + + /* send info to listening sock */ + other->ack_backlog++; + skb_queue_tail(&other->receive_queue,skb); + other->data_ready(other,0); /* Wake up ! */ + return 0; -out_free: - kfree_skb(skb); +out_release: + unix_destroy_socket(newsk); out: return err; } - -static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - struct msghdr msg; - struct unix_skb_parms cmsg; - - msg.msg_name = uaddr; - msg.msg_namelen = addr_len; - cmsg.fp = NULL; - cmsg.attr = MSG_SYN; - cmsg.creds.pid = current->pid; - cmsg.creds.uid = current->euid; - cmsg.creds.gid = current->egid; - - return unix_stream_connect1(sock, &msg, 0, &cmsg, flags&O_NONBLOCK); -} - static int unix_socketpair(struct socket *socka, struct socket *sockb) { struct sock *ska=socka->sk, *skb = sockb->sk; @@ -802,14 +784,6 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) if (sk->state!=TCP_LISTEN) return -EINVAL; - if (sk->protinfo.af_unix.addr) - { - atomic_inc(&sk->protinfo.af_unix.addr->refcnt); - newsk->protinfo.af_unix.addr=sk->protinfo.af_unix.addr; - } - if (sk->protinfo.af_unix.dentry) - newsk->protinfo.af_unix.dentry=dget(sk->protinfo.af_unix.dentry); - for (;;) { skb=skb_dequeue(&sk->receive_queue); @@ -834,20 +808,19 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) tsk=skb->sk; sk->ack_backlog--; - unix_peer(newsk)=tsk; - unix_peer(tsk)=newsk; - tsk->state=TCP_ESTABLISHED; - newsk->state=TCP_ESTABLISHED; - memcpy(&newsk->peercred, UNIXCREDS(skb), sizeof(struct ucred)); - tsk->peercred.pid = current->pid; - tsk->peercred.uid = current->euid; - tsk->peercred.gid = current->egid; - unix_lock(newsk); /* Swap lock over */ - unix_unlock(sk); /* Locked to child socket not master */ - unix_lock(tsk); /* Back lock */ - kfree_skb(skb); /* The buffer is just used as a tag */ - tsk->state_change(tsk); /* Wake up any sleeping connect */ - sock_wake_async(tsk->socket, 0); + unix_unlock(sk); /* No longer locked to master */ + kfree_skb(skb); + + /* attach accepted sock to socket */ + newsock->state=SS_CONNECTED; + newsock->sk=tsk; + tsk->sleep=newsk->sleep; + tsk->socket=newsock; + + /* destroy handed sock */ + newsk->socket = NULL; + unix_destroy_socket(newsk); + return 0; } @@ -1389,6 +1362,38 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return(0); } +static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask; + + poll_wait(file, sk->sleep, wait); + mask = 0; + + /* exceptional events? */ + if (sk->err) + mask |= POLLERR; + if (sk->shutdown & RCV_SHUTDOWN) + mask |= POLLHUP; + + /* readable? */ + if (!skb_queue_empty(&sk->receive_queue)) + mask |= POLLIN | POLLRDNORM; + + /* Connection-based need to check for termination and startup */ + if (sk->type == SOCK_STREAM && sk->state==TCP_CLOSE) + mask |= POLLHUP; + + /* + * we set writable also when the other side has shut down the + * connection. This prevents stuck sockets. + */ + if (sk->sndbuf - atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + #ifdef CONFIG_PROC_FS static int unix_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) @@ -1410,7 +1415,9 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, 0, s->socket ? s->socket->flags : 0, s->type, - s->socket ? s->socket->state : 0, + s->socket ? s->socket->state : + (s->state == TCP_ESTABLISHED ? + SS_CONNECTING : SS_DISCONNECTING), s->socket ? s->socket->inode->i_ino : 0); if (s->protinfo.af_unix.addr) @@ -1426,7 +1433,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, } buffer[len++]='\n'; - pos=begin+len; + pos+=len; if(pos<offset) { len=0; @@ -1441,12 +1448,14 @@ done: len-=(offset-begin); if(len>length) len=length; + if (len < 0) + len = 0; return len; } #endif struct proto_ops unix_stream_ops = { - AF_UNIX, + PF_UNIX, sock_no_dup, unix_release, @@ -1455,7 +1464,7 @@ struct proto_ops unix_stream_ops = { unix_socketpair, unix_accept, unix_getname, - datagram_poll, + unix_poll, unix_ioctl, unix_listen, unix_shutdown, @@ -1467,7 +1476,7 @@ struct proto_ops unix_stream_ops = { }; struct proto_ops unix_dgram_ops = { - AF_UNIX, + PF_UNIX, sock_no_dup, unix_release, @@ -1488,7 +1497,7 @@ struct proto_ops unix_dgram_ops = { }; struct net_proto_family unix_family_ops = { - AF_UNIX, + PF_UNIX, unix_create }; @@ -1534,10 +1543,13 @@ __initfunc(void unix_proto_init(struct net_proto *pro)) #ifdef MODULE void cleanup_module(void) { - sock_unregister(AF_UNIX); + sock_unregister(PF_UNIX); #ifdef CONFIG_SYSCTL unix_sysctl_unregister(); #endif +#ifdef CONFIG_PROC_FS + remove_proc_entry("net/unix", 0); +#endif } #endif diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 703fdd41e..4f85caa73 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -72,9 +72,9 @@ extern inline unix_socket *unix_get_socket(struct file *filp) struct sock * s = sock->sk; /* - * AF_UNIX ? + * PF_UNIX ? */ - if (s && sock->ops && sock->ops->family == AF_UNIX) + if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; } return u_sock; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index b00d0ab2a..fabe85161 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -5,7 +5,7 @@ * the following common services for the WAN Link Drivers: * o WAN device managenment (registering, unregistering) * o Network interface management -* o Physical connection management (dial-up, incomming calls) +* o Physical connection management (dial-up, incoming calls) * o Logical connection management (switched virtual circuits) * o Protocol encapsulation/decapsulation * @@ -24,6 +24,8 @@ * Jun 27, 1997 Alan Cox realigned with vendor code * Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 * Apr 20, 1998 Alan Cox Fixed 2.1 symbols +* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate +* Aug 15, 1998 Arnaldo C. Melo Fixed device_setup return value *****************************************************************************/ #include <linux/stddef.h> /* offsetof(), etc. */ @@ -288,11 +290,11 @@ int wanrouter_encapsulate (struct sk_buff* skb, struct device* dev) case ETH_P_IPX: /* SNAP encapsulation */ case ETH_P_ARP: hdr_len += 7; - skb_push(skb, 6); + skb_push(skb, 7); skb->data[0] = 0; skb->data[1] = NLPID_SNAP; - memcpy(&skb->data[1], oui_ether, sizeof(oui_ether)); - *((unsigned short*)&skb->data[4]) = htons(skb->protocol); + memcpy(&skb->data[2], oui_ether, sizeof(oui_ether)); + *((unsigned short*)&skb->data[5]) = htons(skb->protocol); break; default: /* Unknown packet type */ @@ -465,7 +467,7 @@ static int device_setup (wan_device_t* wandev, wandev_conf_t* u_conf) if(!copy_from_user(data, conf->data, conf->data_size)) { conf->data=data; - wandev->setup(wandev,conf); + err = wandev->setup(wandev,conf); } else err = -ENOBUFS; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 514f64e1b..60a3581e5 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -174,7 +174,8 @@ static void x25_kill_by_device(struct device *dev) struct sock *s; for (s = x25_list; s != NULL; s = s->next) - if (s->protinfo.x25->neighbour->dev == dev) + if (s->protinfo.x25->neighbour && + s->protinfo.x25->neighbour->dev == dev) x25_disconnect(s, ENETUNREACH, 0, 0); } @@ -621,6 +622,9 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len if ((sk->protinfo.x25->neighbour = x25_get_neigh(dev)) == NULL) return -ENETUNREACH; + x25_limit_facilities(&sk->protinfo.x25->facilities, + sk->protinfo.x25->neighbour); + if ((sk->protinfo.x25->lci = x25_new_lci(sk->protinfo.x25->neighbour)) == 0) return -ENETUNREACH; @@ -787,6 +791,13 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *neigh, unsigned i } /* + * current neighbour/link might impose additional limits + * on certain facilties + */ + + x25_limit_facilities(&facilities,neigh); + + /* * Try to create a new socket. */ if ((make = x25_make_new(sk)) == NULL) { @@ -1124,18 +1135,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -EINVAL; if (facilities.pacsize_out < X25_PS16 || facilities.pacsize_out > X25_PS4096) return -EINVAL; - if (sk->state == TCP_CLOSE || sk->protinfo.x25->neighbour->extended) - { - if (facilities.winsize_in < 1 || facilities.winsize_in > 127) - return -EINVAL; - if (facilities.winsize_out < 1 || facilities.winsize_out > 127) - return -EINVAL; - } else { - if (facilities.winsize_in < 1 || facilities.winsize_in > 7) - return -EINVAL; - if (facilities.winsize_out < 1 || facilities.winsize_out > 7) - return -EINVAL; - } + if (facilities.winsize_in < 1 || facilities.winsize_in > 127) + return -EINVAL; if (facilities.throughput < 0x03 || facilities.throughput > 0x2C) return -EINVAL; if (facilities.reverse != 0 && facilities.reverse != 1) @@ -1276,6 +1277,16 @@ struct notifier_block x25_dev_notifier = { 0 }; +void x25_kill_by_neigh(struct x25_neigh *neigh) +{ + struct sock *s; + + for( s=x25_list; s != NULL; s=s->next){ + if( s->protinfo.x25->neighbour == neigh ) + x25_disconnect(s, ENETUNREACH, 0, 0); + } +} + #ifdef CONFIG_PROC_FS static struct proc_dir_entry proc_net_x25 = { PROC_NET_X25, 3, "x25", diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 16fc3677d..bcbf31b0d 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -98,6 +98,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *neigh) int x25_lapb_receive_frame(struct sk_buff *skb, struct device *dev, struct packet_type *ptype) { struct x25_neigh *neigh; + int queued; skb->sk = NULL; @@ -113,7 +114,13 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct device *dev, struct packe switch (skb->data[0]) { case 0x00: skb_pull(skb, 1); - return x25_receive_data(skb, neigh); + queued = x25_receive_data(skb, neigh); + if( ! queued ) + /* We need to free the skb ourselves because + * net_bh() won't care about our return code. + */ + kfree_skb(skb); + return 0; case 0x01: x25_link_established(neigh); @@ -215,6 +222,8 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *neigh) { unsigned char *dptr; + skb->nh.raw = skb->data; + switch (neigh->dev->type) { case ARPHRD_X25: dptr = skb_push(skb, 1); @@ -238,3 +247,6 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *neigh) } #endif + + + diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index af072ce22..18de2da1a 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -200,4 +200,26 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, struct x25_fa return len; } +/* + * Limit values of certain facilities according to the capability of the + * currently attached x25 link. + */ +void x25_limit_facilities(struct x25_facilities *facilities, + struct x25_neigh *neighbour) +{ + + if( ! neighbour->extended ){ + if( facilities->winsize_in > 7 ){ + printk(KERN_DEBUG "X.25: incoming winsize limited to 7\n"); + facilities->winsize_in = 7; + } + if( facilities->winsize_out > 7 ){ + facilities->winsize_out = 7; + printk( KERN_DEBUG "X.25: outgoing winsize limited to 7\n"); + } + } +} + #endif + + diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index ae98e95ec..ad7adb7ea 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -49,17 +49,20 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) if (more) { sk->protinfo.x25->fraglen += skb->len; skb_queue_tail(&sk->protinfo.x25->fragment_queue, skb); + skb_set_owner_r(skb, sk); return 0; } if (!more && sk->protinfo.x25->fraglen > 0) { /* End of fragment */ - sk->protinfo.x25->fraglen += skb->len; - skb_queue_tail(&sk->protinfo.x25->fragment_queue, skb); + int len = sk->protinfo.x25->fraglen + skb->len; - if ((skbn = alloc_skb(sk->protinfo.x25->fraglen, GFP_ATOMIC)) == NULL) + if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){ + kfree_skb(skb); return 1; + } + + skb_queue_tail(&sk->protinfo.x25->fragment_queue, skb); - skb_set_owner_r(skbn, sk); skbn->h.raw = skbn->data; skbo = skb_dequeue(&sk->protinfo.x25->fragment_queue); @@ -75,7 +78,12 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) sk->protinfo.x25->fraglen = 0; } - return sock_queue_rcv_skb(sk, skbn); + skb_set_owner_r(skbn, sk); + skb_queue_tail(&sk->receive_queue, skbn); + if (!sk->dead) + sk->data_ready(sk,skbn->len); + + return 0; } /* diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index f27fa4f4a..961682702 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -264,11 +264,14 @@ void x25_link_established(struct x25_neigh *neigh) /* * Called when the link layer has terminated, or an establishment - * request has failed. XXX should tell sockets. + * request has failed. */ + void x25_link_terminated(struct x25_neigh *neigh) { neigh->state = X25_LINK_STATE_0; + /* Out of order: clear existing virtual calls (X.25 03/93 4.6.3) */ + x25_kill_by_neigh(neigh); } /* |