diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1995-11-14 08:00:00 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1995-11-14 08:00:00 +0000 |
commit | e7c2a72e2680827d6a733931273a93461c0d8d1b (patch) | |
tree | c9abeda78ef7504062bb2e816bcf3e3c9d680112 /net | |
parent | ec6044459060a8c9ce7f64405c465d141898548c (diff) |
Import of Linux/MIPS 1.3.0
Diffstat (limited to 'net')
-rw-r--r-- | net/802/Makefile | 55 | ||||
-rw-r--r-- | net/802/llc.c | 412 | ||||
-rw-r--r-- | net/802/p8022.c (renamed from net/inet/p8022.c) | 7 | ||||
-rw-r--r-- | net/802/p8023.c (renamed from net/inet/p8023.c) | 3 | ||||
-rw-r--r-- | net/802/psnap.c | 123 | ||||
-rw-r--r-- | net/802/tr.c | 285 | ||||
-rw-r--r-- | net/Changes | 206 | ||||
-rw-r--r-- | net/Makefile | 13 | ||||
-rw-r--r-- | net/README | 42 | ||||
-rw-r--r-- | net/appletalk/Makefile | 35 | ||||
-rw-r--r-- | net/appletalk/aarp.c | 721 | ||||
-rw-r--r-- | net/appletalk/ddp.c | 1843 | ||||
-rw-r--r-- | net/ax25/Makefile | 40 | ||||
-rw-r--r-- | net/ax25/README.AX25 | 20 | ||||
-rw-r--r-- | net/ax25/af_ax25.c | 1972 | ||||
-rw-r--r-- | net/ax25/ax25_in.c | 591 | ||||
-rw-r--r-- | net/ax25/ax25_out.c | 235 | ||||
-rw-r--r-- | net/ax25/ax25_route.c | 288 | ||||
-rw-r--r-- | net/ax25/ax25_subr.c | 383 | ||||
-rw-r--r-- | net/ax25/ax25_timer.c | 226 | ||||
-rw-r--r-- | net/core/Makefile | 43 | ||||
-rw-r--r-- | net/core/datagram.c (renamed from net/inet/datagram.c) | 35 | ||||
-rw-r--r-- | net/core/dev.c (renamed from net/inet/dev.c) | 561 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 169 | ||||
-rw-r--r-- | net/core/skbuff.c (renamed from net/inet/skbuff.c) | 44 | ||||
-rw-r--r-- | net/core/sock.c (renamed from net/inet/sock.c) | 178 | ||||
-rw-r--r-- | net/ethernet/Makefile | 57 | ||||
-rw-r--r-- | net/ethernet/eth.c (renamed from net/inet/eth.c) | 29 | ||||
-rw-r--r-- | net/ethernet/pe2.c (renamed from net/inet/pe2.c) | 3 | ||||
-rw-r--r-- | net/inet/README | 21 | ||||
-rw-r--r-- | net/inet/arp.h | 18 | ||||
-rw-r--r-- | net/inet/datalink.h | 17 | ||||
-rw-r--r-- | net/inet/eth.h | 35 | ||||
-rw-r--r-- | net/inet/icmp.h | 38 | ||||
-rw-r--r-- | net/inet/ip.h | 91 | ||||
-rw-r--r-- | net/inet/ipx.c | 1360 | ||||
-rw-r--r-- | net/inet/ipx.h | 71 | ||||
-rw-r--r-- | net/inet/ipxcall.h | 2 | ||||
-rw-r--r-- | net/inet/ncp.h | 26 | ||||
-rw-r--r-- | net/inet/p8022.h | 2 | ||||
-rw-r--r-- | net/inet/p8022call.h | 2 | ||||
-rw-r--r-- | net/inet/protocol.h | 59 | ||||
-rw-r--r-- | net/inet/rarp.h | 14 | ||||
-rw-r--r-- | net/inet/raw.h | 36 | ||||
-rw-r--r-- | net/inet/route.h | 53 | ||||
-rw-r--r-- | net/inet/snmp.h | 107 | ||||
-rw-r--r-- | net/inet/sock.h | 287 | ||||
-rw-r--r-- | net/inet/tcp.h | 134 | ||||
-rw-r--r-- | net/inet/udp.h | 50 | ||||
-rw-r--r-- | net/ipv4/Makefile (renamed from net/inet/Makefile) | 33 | ||||
-rw-r--r-- | net/ipv4/README.TCP | 39 | ||||
-rw-r--r-- | net/ipv4/af_inet.c (renamed from net/inet/af_inet.c) | 417 | ||||
-rw-r--r-- | net/ipv4/arp.c (renamed from net/inet/arp.c) | 222 | ||||
-rw-r--r-- | net/ipv4/checksum.c | 276 | ||||
-rw-r--r-- | net/ipv4/devinet.c (renamed from net/inet/devinet.c) | 19 | ||||
-rw-r--r-- | net/ipv4/icmp.c (renamed from net/inet/icmp.c) | 85 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 402 | ||||
-rw-r--r-- | net/ipv4/ip.c (renamed from net/inet/ip.c) | 1788 | ||||
-rw-r--r-- | net/ipv4/ip_fw.c | 1514 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 95 | ||||
-rw-r--r-- | net/ipv4/packet.c (renamed from net/inet/packet.c) | 41 | ||||
-rw-r--r-- | net/ipv4/proc.c (renamed from net/inet/proc.c) | 78 | ||||
-rw-r--r-- | net/ipv4/protocol.c (renamed from net/inet/protocol.c) | 57 | ||||
-rw-r--r-- | net/ipv4/rarp.c (renamed from net/inet/rarp.c) | 25 | ||||
-rw-r--r-- | net/ipv4/raw.c (renamed from net/inet/raw.c) | 158 | ||||
-rw-r--r-- | net/ipv4/route.c (renamed from net/inet/route.c) | 93 | ||||
-rw-r--r-- | net/ipv4/tcp.c (renamed from net/inet/tcp.c) | 3163 | ||||
-rw-r--r-- | net/ipv4/timer.c (renamed from net/inet/timer.c) | 111 | ||||
-rw-r--r-- | net/ipv4/udp.c (renamed from net/inet/udp.c) | 406 | ||||
-rw-r--r-- | net/ipv4/utils.c (renamed from net/inet/utils.c) | 6 | ||||
-rw-r--r-- | net/ipx/Makefile | 35 | ||||
-rw-r--r-- | net/ipx/af_ipx.c | 1953 | ||||
-rw-r--r-- | net/netrom/Makefile | 40 | ||||
-rw-r--r-- | net/netrom/af_netrom.c | 1339 | ||||
-rw-r--r-- | net/netrom/nr_dev.c | 254 | ||||
-rw-r--r-- | net/netrom/nr_in.c | 313 | ||||
-rw-r--r-- | net/netrom/nr_out.c | 243 | ||||
-rw-r--r-- | net/netrom/nr_route.c | 750 | ||||
-rw-r--r-- | net/netrom/nr_subr.c | 295 | ||||
-rw-r--r-- | net/netrom/nr_timer.c | 192 | ||||
-rw-r--r-- | net/protocols.c | 48 | ||||
-rw-r--r-- | net/socket.c | 417 | ||||
-rw-r--r-- | net/unix/proc.c | 4 | ||||
-rw-r--r-- | net/unix/sock.c | 29 | ||||
-rw-r--r-- | net/unix/unix.h | 69 |
85 files changed, 20245 insertions, 5806 deletions
diff --git a/net/802/Makefile b/net/802/Makefile new file mode 100644 index 000000000..a81249c91 --- /dev/null +++ b/net/802/Makefile @@ -0,0 +1,55 @@ +# +# Makefile for the Linux 802.x protocol layers. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := p8023.o + +ifdef CONFIG_TR + +OBJS := $(OBJS) tr.o + +endif + +ifdef CONFIG_IPX + +OBJS := $(OBJS) p8022.o psnap.o + +endif + +ifdef CONFIG_ATALK +ifndef CONFIG_IPX + +OBJS := $(OBJS) p8022.o psnap.o + +endif +endif + +802.o: $(OBJS) + $(LD) -r -o 802.o $(OBJS) + + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/802/llc.c b/net/802/llc.c new file mode 100644 index 000000000..d280fb38f --- /dev/null +++ b/net/802/llc.c @@ -0,0 +1,412 @@ +/* + * 802.2 Class 2 LLC service. + */ + + +int llc_rx_adm(struct sock *sk,struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==DISC) + send_response(sk,DM|pf); + else if(cmd==SABM) + { + if(sk->state!=TCP_LISTEN) + send_response(sk. DM|pf); + else + { + sk=ll_rx_accept(sk); + if(sk!=NULL) + { + send_response(sk, UA|pf); + sk->llc.vs=0; + sk->llc.vr=0; + sk->llc.p_flag=0; + sk->llc.remote_busy=0; + llc_state(sk,LLC_NORMAL); + } + } + } + else if(pf) + send_response(sk, DM|PF); + } + return 0; +} + +int llc_rx_setup(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==SABM) + { + sk->llc.vs=0; + sk->llc.vr=0; + send_response(sk, UA|pf); + } + if(cmd==DISC) + { + send_response(sk, DM|pf); + llc_error(sk,ECONNRESET); + llc_state(sk, LLC_ADM); + } + } + else + { + if(cmd==UA && pf==sk->llc.p_flag) + { + del_timer(&sk->llc.t1); + sk->llc.vs=0; + llc_update_p_flag(sk,pf); + llc_state(sk,LLC_NORMAL); + } + if(cmd==DM) + { + llc_error(sk, ECONNRESET); + llc_state(sk, LLC_ADM); + } + } +} + +int llc_rx_reset(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==SABM) + { + sk->llc.vr=0; + sk->llc.vs=0; + send_response(sk, UA|pf); + } + else if(cmd==DISC) + { + if(sk->llc.cause_flag==1) + llc_shutdown(sk,SHUTDOWN_MASK); + else + llc_eror(sk, ECONNREFUSED); + send_response(sk, DM|pf); + llc_state(sk, LLC_ADM); + } + } + else + { + if(cmd==UA) + { + if(sk->llc.p_flag==pf) + { + del_timer(&sk->llc.t1); + sk->llc.vs=0; + sk->llc.vr=0; + llc_update_p_flag(sk,pf); + llc_confirm_reset(sk, sk->llc.cause_flag); + sk->llc.remote_busy=0; + llc_state(sk, LLC_NORMAL); + } + } + if(cmd==DM) + { /* Should check cause_flag */ + llc_shutdown(sk, SHUTDOWN_MASK); + llc_state(sk, LLC_ADM); + } + } + return 0; +} + +int llc_rx_d_conn(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==SABM) + { + llc_error(sk, ECONNRESET); + llc_state(sk, ADM); + } + else if(cmd==DISC) + { + send_response(UA|pf); + llc_state(sk, LLC_D_CONN); + } + else if(pf) + send_response(sk, DM|PF); + } + else + { + if(cmd==UA && pf==sk->llc.p_flag) + { + del_timer(&sk->llc.t1); + llc_state(sk, ADM); + llc_confirm_reset(sk, sk->llc.cause_flag); + } + if(cmd==DM) + { + del_timer(&sk->llc.t1); + /*if(sk->llc.cause_flag)*/ + llc_shutdown(sk, SHUTDOWN_MASK); + } + + } + return 0; +} + +int llc_rx_error(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==SABM) + { + sk->llc.vs=0; + sk->llc.vr=0; + send_response(sk, UA|pf); + llc_error(sk,ECONNRESET); + sk->llc.p_flag=0; + sk->llc.remote_busy=0; + llc_state(sk, LLC_NORMAL); + } + else if(cmd==DISC) + { + send_response(sk, UA|pf); + llc_shutdown(sk, SHUTDOWN_MASK); + llc_state(sk, LLC_ADM); + } + else + llc_resend_frmr_rsp(sk,pf); + } + else + { + if(cmd==DM) + { + llc_error(sk, ECONNRESET); + del_timer(&sk->llc.t1); + llc_state(sk, LLC_ADM); + } + if(cmd==FRMR) + { + send_command(sk, SABM); + sk->llc.p_flag=pf; + llc_start_t1(); + sk->llc.retry_count=0; + sk->llc.cause_flag=0; + llc_error(sk, EPROTO); + llc_state(sk, LLC_RESET); + } + } +} + + +/* + * Subroutine for handling the shared cases of the data modes. + */ + +int llc_rx_nr_shared(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(type==CMD) + { + if(cmd==SABM) + { + /* + * Optional reset processing. We decline resets. + */ + send_response(sk,DM|pf); + llc_error(sk, ECONNRESET); + llc_state(sk, LLC_ADM); + } + else if(cmd==DISC) + { + send_response(sk,UA|pf); + llc_state(sk, LLC_ADM); + llc_shutdown(sk, SHUTDOWN_MASK); + } + /* + * We only ever use windows of 7, so there is no illegal NR/NS value + * otherwise we would FRMR here and go to ERROR state + */ + else if(cmd==ILLEGAL) + { + llc_send_frmr_response(sk, ILLEGAL_TYPE,pf); + llc_state(sk, LLC_ERROR); + llc_error(sk, EPROTO); + } + else + /* + * Not covered by general rule + */ + return 0; + } + else + { + /* + * We close on errors + */ + if(cmd==FRMR) + { + send_command(sk, DM|pf); + sk->llc.p_flag=pf; + llc_start_t1(sk); + llc_error(sk, EPROTO); + sk->llc.cause_flag=0; + llc_state(sk, LLC_D_CONN): + } + else if(cmd==DM) + { + llc_state(sk, LLC_ADM); + llc_error(sk, ECONNREFUSED); + } + /* + * We always use a window of 7 so can't get I resp + * with invalid NS, or any resp with invalid NR. If + * we add this they do the same as.. + */ + else if(cmd==UA) + { + llc_send_frmr_response(sk, UNEXPECTED_CONTROL, pf); + llc_state(sk, LLC_ERROR); + llc_error(sk, EPROTO); + } + else if(pf==1 && sk->llc.p_flag==0) + { + llc_send_frmr_response(sk, UNEXPECTED_RESPONSE, pf); + llc_state(sk, LLC_ERROR); + llc_error(sk, EPROTO); + } + else if(cmd==ILLEGAL) + { + llc_send_frmr_response(sk, ILLEGAL_TYPE,pf); + llc_state(sk, LLC_ERROR); + llc_error(sk, EPROTO); + } + else + /* + * Not covered by general rule + */ + return 0 + } + /* + * Processed. + */ + return 1; +} + +int llc_rx_normal(struct sock *sk, struct sk_buff *skb, int type, int cmd, int pf, int nr, int ns) +{ + if(llc_rx_nr_shared(sk, skb, type, cmd, pf, nr, ns)) + return 0; + if(cmd==I) + { + if(llc_invalid_ns(sk,ns)) + { + if((type==RESP && sk->llc.p_flag==pf)||(type==CMD && pf==0 && sk->llc.p_flag==0)) + { + llc_command(sk, REJ|PF); + llc_ack_frames(sk,nr); /* Ack frames and update N(R) */ + sk->llc.p_flag=PF; + llc_state(sk, LLC_REJECT); + sk->llc.retry_count=0; + llc_start_t1(sk); + sk->llc.remote_busy=0; + } + else if((type==CMD && !pf && sk->llc.p_flag==1) || (type==RESP && !pf && sk->llc.p_flag==1)) + { + if(type==CMD) + llc_response(sk, REJ); + else + llc_command(sk, REJ); + llc_ack_frames(sk,nr); + sk->llc.retry_count=0; + llc_state(sk, LLC_REJECT); + llc_start_t1(sk); + } + else if(pf && type==CMD) + { + llc_response(sk, REJ|PF); + llc_ack_frames(sk,nr); + sk->llc.retry_count=0; + llc_start_t1(sk); + } + } + else + { + /* + * Valid I frame cases + */ + + if(sk->llc.p_flag==pf && !(type==CMD && pf)) + { + sk->llc.vr=(sk->llc.vr+1)&7; + llc_queue_rr_cmd(sk, PF); + sk->llc.retry_count=0; + llc_start_t1(sk); + sk->llc.p_flag=1; + llc_ack_frames(sk,nr); + sk->llc.remote_busy=0; + } + else if(sk->ppc.p_flag!=pf) + { + sk->llc.vr=(sk->llc.vr+1)&7; + if(type==CMD) + llc_queue_rr_resp(sk, 0); + else + llc_queue_rr_cmd(sk, 0); + if(sk->llc.nr!=nr) + { + llc_ack_frames(sk,nr); + llc_reset_t1(sk); + } + } + else if(pf) + { + sk->llc.vr=(sk->llc.vr+1)&7; + llc_queue_rr_resp(sk,PF); + if(sk->llc.nr!=nr) + { + llc_ack_frames(sk,nr); + llc_reset_t1(sk); + } + } + llc_queue_data(sk,skb); + return 1; + } + } + else if(cmd==RR||cmd==RNR) + { + if(type==CMD || (type==RESP && (!pf || pf==1 && sk->llc.p_flag==1))) + { + llc_update_p_flag(sk,pf); + if(sk->llc.nr!=nr) + { + llc_ack_frames(sk,nr); + llc_reset_t1(sk); + } + if(cmd==RR) + sk->llc.remote_busy=0; + else + { sk->llc.remote_busy=1; + if(!llc_t1_running(sk)) + llc_start_t1(sk); + } + } + else if(type==cmd && pf) + { + if(cmd==RR) + llc_queue_rr_resp(sk,PF); + else + { + send_response(sk, RR|PF); + if(!llc_t1_running(sk)) + llc_start_t1(sk); + } + if(sk->llc.nr!=nr) + { + llc_ack_frames(sk,nr); + llc_reset_t1(sk); + } + if(cmd==RR) + sk->llc.remote_busy=0; + else + sk->llc.remote_busy=1; + } + } + else if(cmd==REJ) + { + + } +} + diff --git a/net/inet/p8022.c b/net/802/p8022.c index f145a836e..dd1510774 100644 --- a/net/inet/p8022.c +++ b/net/802/p8022.c @@ -1,6 +1,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> -#include "datalink.h" +#include <net/datalink.h> #include <linux/mm.h> #include <linux/in.h> @@ -27,6 +27,7 @@ p8022_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) proto = find_8022_client(*(skb->h.raw)); if (proto != NULL) { skb->h.raw += 3; + skb->len -= 3; return proto->rcvfunc(skb, dev, pt); } @@ -59,7 +60,7 @@ p8022_datalink_header(struct datalink_proto *dl, static struct packet_type p8022_packet_type = { 0, /* MUTTER ntohs(ETH_P_IPX),*/ - 0, /* copy */ + NULL, /* All devices */ p8022_rcv, NULL, NULL, @@ -87,7 +88,7 @@ register_8022_client(unsigned char type, int (*rcvfunc)(struct sk_buff *, struct proto->rcvfunc = rcvfunc; proto->header_length = 3; proto->datalink_header = p8022_datalink_header; - + proto->string_name = "802.2"; proto->next = p8022_list; p8022_list = proto; } diff --git a/net/inet/p8023.c b/net/802/p8023.c index b5196e409..4b1f5e0bf 100644 --- a/net/inet/p8023.c +++ b/net/802/p8023.c @@ -1,6 +1,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> -#include "datalink.h" +#include <net/datalink.h> #include <linux/mm.h> #include <linux/in.h> @@ -27,6 +27,7 @@ make_8023_client(void) proto->type_len = 0; proto->header_length = 0; proto->datalink_header = p8023_datalink_header; + proto->string_name = "802.3"; } return proto; diff --git a/net/802/psnap.c b/net/802/psnap.c new file mode 100644 index 000000000..d0186c54e --- /dev/null +++ b/net/802/psnap.c @@ -0,0 +1,123 @@ +/* + * SNAP data link layer. Derived from 802.2 + * + * Alan Cox <Alan.Cox@linux.org>, from the 802.2 layer by Greg Page. + * Merged in additions from Greg Page's psnap.c. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/datalink.h> +#include <net/p8022.h> +#include <net/psnap.h> +#include <linux/mm.h> +#include <linux/in.h> + +static struct datalink_proto *snap_list = NULL; +static struct datalink_proto *snap_dl = NULL; /* 802.2 DL for SNAP */ + +/* + * Find a snap client by matching the 5 bytes. + */ + +static struct datalink_proto *find_snap_client(unsigned char *desc) +{ + struct datalink_proto *proto; + + for (proto = snap_list; proto != NULL && memcmp(proto->type, desc, 5) ; proto = proto->next); + return proto; +} + +/* + * A SNAP packet has arrived + */ + +int snap_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + static struct packet_type psnap_packet_type = + { + 0, + NULL, /* All Devices */ + snap_rcv, + NULL, + NULL, + }; + + struct datalink_proto *proto; + + proto = find_snap_client(skb->h.raw); + if (proto != NULL) + { + /* + * Pass the frame on. + */ + + skb->h.raw += 5; + skb->len -= 5; + if (psnap_packet_type.type == 0) + psnap_packet_type.type=htons(ETH_P_SNAP); + return proto->rcvfunc(skb, dev, &psnap_packet_type); + } + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * Put a SNAP header on a frame and pass to 802.2 + */ + +static void snap_datalink_header(struct datalink_proto *dl, struct sk_buff *skb, unsigned char *dest_node) +{ + struct device *dev = skb->dev; + unsigned char *rawp; + + rawp = skb->data + snap_dl->header_length+dev->hard_header_len; + memcpy(rawp,dl->type,5); + skb->h.raw = rawp+5; + snap_dl->datalink_header(snap_dl, skb, dest_node); +} + +/* + * Set up the SNAP layer + */ + +void snap_proto_init(struct net_proto *pro) +{ + snap_dl=register_8022_client(0xAA, snap_rcv); + if(snap_dl==NULL) + printk("SNAP - unable to register with 802.2\n"); +} + +/* + * Register SNAP clients. We don't yet use this for IP or IPX. + */ + +struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)) +{ + struct datalink_proto *proto; + + if (find_snap_client(desc) != NULL) + return NULL; + + proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); + if (proto != NULL) + { + memcpy(proto->type, desc,5); + proto->type_len = 5; + proto->rcvfunc = rcvfunc; + proto->header_length = 5+snap_dl->header_length; + proto->datalink_header = snap_datalink_header; + proto->string_name = "SNAP"; + proto->next = snap_list; + snap_list = proto; + } + + return proto; +} + diff --git a/net/802/tr.c b/net/802/tr.c new file mode 100644 index 000000000..643cf64c5 --- /dev/null +++ b/net/802/tr.c @@ -0,0 +1,285 @@ +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/trdevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <linux/net.h> +#include <net/arp.h> + +static void tr_source_route(struct trh_hdr *trh,struct device *dev); +static void tr_add_rif_info(struct trh_hdr *trh); +static void rif_check_expire(unsigned long dummy); + +typedef struct rif_cache_s *rif_cache; + +struct rif_cache_s { + unsigned char addr[TR_ALEN]; + unsigned short rcf; + unsigned short rseg[8]; + rif_cache next; + unsigned long last_used; +}; + +#define RIF_TABLE_SIZE 16 +rif_cache rif_table[RIF_TABLE_SIZE]={ NULL, }; + +#define RIF_TIMEOUT 60*10*HZ +#define RIF_CHECK_INTERVAL 60*HZ +static struct timer_list rif_timer={ NULL,NULL,RIF_CHECK_INTERVAL,0L,rif_check_expire }; + +int tr_header(unsigned char *buff, struct device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len, struct sk_buff *skb) { + + struct trh_hdr *trh=(struct trh_hdr *)buff; + struct trllc *trllc=(struct trllc *)(buff+sizeof(struct trh_hdr)); + + trh->ac=AC; + trh->fc=LLC_FRAME; + + if(saddr) + memcpy(trh->saddr,saddr,dev->addr_len); + else + memset(trh->saddr,0,dev->addr_len); /* Adapter fills in address */ + + trllc->dsap=trllc->ssap=EXTENDED_SAP; + trllc->llc=UI_CMD; + + trllc->protid[0]=trllc->protid[1]=trllc->protid[2]=0x00; + trllc->ethertype=htons(type); + + if(daddr) { + memcpy(trh->daddr,daddr,dev->addr_len); + tr_source_route(trh,dev); + return(dev->hard_header_len); + } + return -dev->hard_header_len; + +} + +int tr_rebuild_header(void *buff, struct device *dev, unsigned long dest, + struct sk_buff *skb) { + + struct trh_hdr *trh=(struct trh_hdr *)buff; + struct trllc *trllc=(struct trllc *)(buff+sizeof(struct trh_hdr)); + + if(trllc->ethertype != htons(ETH_P_IP)) { + printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons( trllc->ethertype)); + return 0; + } + + if(arp_find(trh->daddr, dest, dev, dev->pa_addr, skb)) { + return 1; + } + else { + tr_source_route(trh,dev); + return 0; + } +} + +unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev) { + + struct trh_hdr *trh=(struct trh_hdr *)skb->data; + struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr)); + + if(trh->saddr[0] & TR_RII) + tr_add_rif_info(trh); + + if(*trh->daddr & 1) + { + if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN)) + skb->pkt_type=PACKET_BROADCAST; + else + skb->pkt_type=PACKET_MULTICAST; + } + + else if(dev->flags & IFF_PROMISC) + { + if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN)) + skb->pkt_type=PACKET_OTHERHOST; + } + + return trllc->ethertype; +} + +/* We try to do source routing... */ + +static void tr_source_route(struct trh_hdr *trh,struct device *dev) { + + int i; + unsigned int hash; + rif_cache entry; + + /* Broadcasts are single route as stated in RFC 1042 */ + if(!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) { + trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK) + | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST); + trh->saddr[0]|=TR_RII; + } + else { + for(i=0,hash=0;i<TR_ALEN;hash+=trh->daddr[i++]); + hash&=RIF_TABLE_SIZE-1; + for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next); + + if(entry) { +#if 0 +printk("source routing for %02X %02X %02X %02X %02X %02X\n",trh->daddr[0], + trh->daddr[1],trh->daddr[2],trh->daddr[3],trh->daddr[4],trh->daddr[5]); +#endif + if((ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8) { + trh->rcf=entry->rcf; + memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short)); + trh->rcf^=htons(TR_RCF_DIR_BIT); + trh->rcf&=htons(0x1fff); /* Issam Chehab <ichehab@madge1.demon.co.uk> */ + + trh->saddr[0]|=TR_RII; + entry->last_used=jiffies; + } + } + else { + trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK) + | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST); + trh->saddr[0]|=TR_RII; + } + } + +} + +static void tr_add_rif_info(struct trh_hdr *trh) { + + int i; + unsigned int hash; + rif_cache entry; + + + trh->saddr[0]&=0x7f; + for(i=0,hash=0;i<TR_ALEN;hash+=trh->saddr[i++]); + hash&=RIF_TABLE_SIZE-1; +#if 0 + printk("hash: %d\n",hash); +#endif + for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next); + + if(entry==NULL) { +#if 0 +printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", + trh->saddr[0],trh->saddr[1],trh->saddr[2], + trh->saddr[3],trh->saddr[4],trh->saddr[5], + trh->rcf); +#endif + entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC); + if(!entry) { + printk("tr.c: Couldn't malloc rif cache entry !\n"); + return; + } + entry->rcf=trh->rcf; + memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); + memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN); + entry->next=rif_table[hash]; + entry->last_used=jiffies; + rif_table[hash]=entry; + } +/* Y. Tahara added */ + else { + if ( entry->rcf != trh->rcf ) { + if (!(trh->rcf & htons(TR_RCF_BROADCAST_MASK))) { +#if 0 +printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", + trh->saddr[0],trh->saddr[1],trh->saddr[2], + trh->saddr[3],trh->saddr[4],trh->saddr[5], + trh->rcf); +#endif + entry->rcf = trh->rcf; + memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); + entry->last_used=jiffies; + } + } + } + +} + +static void rif_check_expire(unsigned long dummy) { + + int i; + unsigned long now=jiffies,flags; + + save_flags(flags); + cli(); + + for(i=0; i < RIF_TABLE_SIZE;i++) { + + rif_cache entry, *pentry=rif_table+i; + + while((entry=*pentry)) + if((now-entry->last_used) > RIF_TIMEOUT) { + *pentry=entry->next; + kfree_s(entry,sizeof(struct rif_cache_s)); + } + else + pentry=&entry->next; + } + restore_flags(flags); + + del_timer(&rif_timer); + rif_timer.expires=RIF_CHECK_INTERVAL; + add_timer(&rif_timer); + +} + +int rif_get_info(char *buffer,char **start, off_t offset, int length) { + + int len=0; + off_t begin=0; + off_t pos=0; + int size,i; + + rif_cache entry; + + size=sprintf(buffer, +" TR address rcf routing segments TTL\n\n"); + pos+=size; + len+=size; + + for(i=0;i < RIF_TABLE_SIZE;i++) { + for(entry=rif_table[i];entry;entry=entry->next) { + size=sprintf(buffer+len,"%02X:%02X:%02X:%02X:%02X:%02X %04X %04X %04X %04X %04X %04X %04X %04X %04X %lu\n", + entry->addr[0],entry->addr[1],entry->addr[2],entry->addr[3],entry->addr[4],entry->addr[5], + entry->rcf,entry->rseg[0],entry->rseg[1],entry->rseg[2],entry->rseg[3], + entry->rseg[4],entry->rseg[5],entry->rseg[6],entry->rseg[7],jiffies-entry->last_used); + len+=size; + pos=begin+len; + + if(pos<offset) { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + if(pos>offset+length) + break; + } + + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Start slop */ + if(len>length) + len=length; /* Ending slop */ + return len; +} + +void rif_init(struct net_proto *unused) { + + add_timer(&rif_timer); + +} + diff --git a/net/Changes b/net/Changes new file mode 100644 index 000000000..b316f85fd --- /dev/null +++ b/net/Changes @@ -0,0 +1,206 @@ + +Ongoing things. + +0.0 +--- +Initial patches to catch up with things we want to add. + +o Merged in the Jorge Cwik fast checksum. [TESTED] +o Added Arnt Gulbrandsen's fast UDP build. [TESTED] +o Pauline Middelinks masquerade patch [IN/COMPILES] + + +0.1 +--- + +o Remove excess transmit request calls. [TESTED] +o Set type before calling netif_rx() [TESTED] +o Inline sock_rcv_skb [TESTED] +o Cache last socket for UDP [TESTED] +o Cache last socket for TCP [TESTED] +o Routing cache (only in ip_build_header so far) [TESTED] +------->>>>> ALPHA 001 <<<<<---------- +o eql load balancing driver. [TESTED] +o Token ring drivers. [COMPILE - CANT TEST] +o IPIP and tunnels [TESTED] +o Fix ethernet/token ring promisc broadcast error [QUICK TEST] + (pkt_type set to OTHERHOST in error). +o Fixed bug in the routing caches [TESTED] +o Protocol header cache support [TESTED] +o Fixed ip_build_xmit loopback bugs [TESTED] +o Fixes for SIOCGSTAMP on SOCK_PACKET [TESTED] +o Perfect hash on net_bh(). [TESTED] +o Sonix ISDN driver. [NOT INCLUDED YET] +o Use ip_build_xmit for raw sockets [TESTED] +o 3c501 fixed for speed [TESTED] +------->>>>> ALPHA 002 <<<<<-------- +o PLIP, PPP and de bugs fixed [TESTED] +o Merged in G4KLX AX.25 state machine, with KA9Q + donated headers to get BSD free AX.25 done. [TESTED] +o Massive loopback device bug fixed [TESTED] +------->>>>> ALPHA 003 <<<<<---------- +o Revised code layout [TESTED] +o More bug fixes (tracroute etc) [TESTED] +------->>>>> ALPHA 004 <<<<<---------- +o IP build xmit fragment fixes [TESTED] +o TCP SWS probe fix [TESTED] +o Appletalk DDP [TESTED] +o IP firewall bug fixed [TESTED] +o IP masquerade ftp port spoof [IN] +o gcc 2.6.3 -O3 fix for checksum assembler [TESTED] +o /proc support shows both timers [TESTED] +o TCP irtt support [TESTED] +o RTF_REJECT routing support [TESTED] +o Fixed 0 length fragment bug [TESTED] +o Fixed overlapping reasm bug [TESTED] +o Newest AX.25 code from John Naylor [IN] +o NetROM from John Naylor [IN] +o Routerless DDP fixes from Wesley [TESTED] + +------->>>>> ALPHA 005 <<<<<---------- + +o Several compile and bugfixes from Jakko [IN] +o Connect fix from Matt Day (+ fix to fix) [TESTED] +o RTT, memory leak and other netrom/ax.25 cures + -- John Naylor [IN] +o IP source route via broadcast now illegal [IN] + +------->>>>> ALPHA 006 <<<<<---------- + +o Yet more NetROM/AX.25 improvements [IN] + -- John Naylor +o Fixed a _stupid_ appletalk bug [TESTED] +o Missing include [IN] + -- Lots of people +o Can drop all source routes [IN] +o Printing fixes for ip_fw [IN] +o UDP checksum fix (Gerhard) [IN] +o Newer 3c505 driver from Juha Laiho [IN] +o Security fix to axassociate [IN] +o Loopback driver debugged (fixes named) [TESTED] +o SCC driver from Joerg Reuter [IN] +o IP Firewall accounting zero bug [IN] + +////////////////////////////1.3.0/////////////////////////// ?? + +o Finish merging the bridge code +o Device locking +o Faster ip_csum +o SIOCSLEEPRT patch +o Options support in ip_build_xmit [PENDING] +o Fast checksum/copy on outgoing TCP +o New buffers. Used totally non-optimally +o Long word align ethernet IP headers (64byte align for pentium) +o Explode/implode headers for alpha,mips etc. +o Fast dev_grab_next() transmit reload function + and dev_push_failed() ?? +o Faster ip_forward +o Faster loopback frame forwarding. +o Forwarding queue control (+ fairness algorithms ??) +o Merge loadable firewall code. +o IP forward flow control. +o Infinite PPP/SLIP devices. + +0.2 +--- +o New UNIX sockets include Pedro Roque's shutdown. +o New icmp.c. +o Better TCP window handling [Pedro Roque] +o IP option support. +o Add tty support to sonix driver. +o PPP for Sonix ISDN. +o Loadable firewall extensions. +o Screend loadable firewall module +o LZ SLIP + +0.3 +--- +o Merge the layered protocol support. +o IP firewalling performance - caching and radix trees. +o Zebedee +o 802.2 Class 2 services (eg netbios). +o Multidrop KISS +o Multicast routing +o IPX/Atalk/Netrom firewalling + +Possible projects for victim^H^H^H^H^Holunteers + + +1. Verifying the correctness of implementation against RFC1122 and +making a list of violations (BSD is sufficiently screwed up you can't +implement all of RFC1122 and talk to it usefully). + +2. Verifying all the error returns match the BSD ones (grotty job I +wouldn't wish on anyone). + +3. 'Fast Retransmit'. This is a TCP algorithm extension BSD uses. If +you see about 3 acks in a row that are for the same 'old' value. You resend +the frame following the ack. (The assumption being that the acks are +because a single frame in the data stream has been lost). Given a +mathematician with some queue theory you can show this allows you to +lose one frame per window full of data without measurable speed loss. + +4. RFC1323 and RFC1191. These are the extensions for very fast nets +and for 'path MTU discovery' - a way of finding the best packetsize to use. +RFC1323 will be useful for Linux talking to systems over 100Mb/sec +ethernet and over ATM as it allows large windows and protects from some +potential high speed TCP problems. + +5. Fixing the IP fragment handling so that the total space allocated to +fragments is limited and old fragments are deleted to make room for new ones +when space is exhausted. Fixing the fragment handling to work at a decent +speed wouldn't be bad either. + +6. Delayed ack. This is mostly supported but not actually set up and +used yet. Basically ack frames are held back 1/10th of a second in the hope +that two acks can be merged into one or for interactive use the ack can +piggyback on the next character typed (great improvement on 2400 baud +modems). Johannes Stille did some work on this about 0.99.13 but it never +got merged in. + +7. One on my tempting project list. Add an extra (unofficial - but so +is SLIP6) SLIP mode that does packet data compression [maybe use the code +from term]. + +8. Making SLIP/PPP dynamically allocate devices so you never run out +of channels. [Taken/Done pending inclusion] + +9. Implementing streams. Not as a blind slow SYS5.4 style copy but actually +working out how to do it so it runs like greased lightning. Quite a big +problem. + +10. Frame Relay/WAN/ISDN drivers [I'm working on the sonix EuroISDN board +driver but thats for an internal project and its general release is still +a maybe (so is finishing it ;))]. + +11. IP over SCSI. + +12. Debugging and making the appletalk alpha test code useful. + [Done and in] + +13. Mrouted Multicast routing. Or possibly MOSPF and others + as they become available + [Some interest: see/join linux-multicast@www.linux.org.uk + if you wish to join in] + +14. Bidirectional PLIP. Also PLIP for the newer style parallel ports. + +15. 802.2LLC and thus Netbeui sockets. Becoming less important since the +rumour is microsoft are phasing out netbeui for netbios/IP. + +16. X.25. This is one for a real head case with far too much time on +their hands. [Provisionally taken] + +17. PPP multilink. Another nasty job. + +18. Implement swIPe under Linux. +[In progress] + +BTW: Don't let the magic words 'kernel programming' worry you. Its like DOS +- you make a mistake you have to reboot. You do at least get dumps and a +kernel logger that is reliable. There is now a loadable module allowing +use of gdb on the kernel (no breakpoints though!). No magic involved. + +Alan + + diff --git a/net/Makefile b/net/Makefile index 1b97fdf01..9797a97a7 100644 --- a/net/Makefile +++ b/net/Makefile @@ -7,10 +7,7 @@ # # Note 2! The CFLAGS definition is now in the main makefile... -# only these two lines should need to be changed to remove inet sockets. -# (and the inet/tcpip.o in net.o) - -SUBDIRS := unix inet +SUBDIRS := 802 ax25 core ethernet ipv4 ipx unix appletalk netrom SUBOBJS := $(foreach f,$(SUBDIRS),$f/$f.o) @@ -26,7 +23,11 @@ OBJS = socket.o protocols.o all: net.o net.o: $(OBJS) network.a - $(LD) -r -o net.o $(OBJS) network.a +ifeq ($(ARCH),mips) + $(LD) -u eth_setup -r -o net.o $(OBJS) network.a +else + $(LD) -u _eth_setup -r -o net.o $(OBJS) network.a +endif network.a: subdirs rm -f $@ @@ -40,6 +41,8 @@ dep: $(CPP) -M *.c > .depend set -e; for i in $(SUBDIRS); do $(MAKE) -C $$i dep; done +modules: + dummy: # diff --git a/net/README b/net/README new file mode 100644 index 000000000..33ffd8a41 --- /dev/null +++ b/net/README @@ -0,0 +1,42 @@ +Upgrade Notes from 1.0 +[Alan Cox - Alan.Cox@linux.org] + +Upgrading to 1.2.0 from a 1.0 kernel networking set. If you are using +a complete 1.2 distribution you can ignore this. + +This doesn't attempt to list the changes. That would be too large. Instead +just what you need and can change + +arp,ifconfig, etc. Get net-tools-1.1.95 (or 1.2.0 if its out) from +ftp.linux.org.uk:/pub/Linux/Networking/PROGRAMS/NetTools, and install +these. You will also acquire a couple of new tools "plipconfig" for tuning +plip links and "ipfw" for ip firewall management. + +bootpd: The original bootpd has a bug that the 1.2 kernel spots. You will +need to upgrade this to the version in +ftp.linux.org.uk:/pub/Linux/Networking/PROGRAMS/Upgrades + + +Standard programs that you ought to update are + +named 4.7.x to 4.9.x Stops named dying occasionally +pop3d 1.001 to 1.004 Fixes a bug that can lose mail + +A complete current networking set for Linux can be obtained by getting +the NetKit[A,B...] series archives from ftp.funet.fi. Funet also carries +binaries for Linux mbone applications if you now wish to make use of +these facilities. + +For commercial UK custom Linux networking projects, drivers and development +(but not free support!) I can be contacted via + + I^2IT Ltd, The Innovation Centre, University Of Wales + Swansea SA2 8PP. + Fax: +44 1792 295811 + Tel: +44 1792 295213 + +Please don't send commercial queries to my email address as I have that +in an academic and _not_ commercial capacity. On the other hand feel +free to send bug reports, queries and enhancements that way. + +Alan diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile new file mode 100644 index 000000000..a14da6dd9 --- /dev/null +++ b/net/appletalk/Makefile @@ -0,0 +1,35 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := aarp.o ddp.o + + +appletalk.o: $(OBJS) + $(LD) -r -o appletalk.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c new file mode 100644 index 000000000..52a46347a --- /dev/null +++ b/net/appletalk/aarp.c @@ -0,0 +1,721 @@ +/* + * AARP: An implementation of the Appletalk aarp protocol for + * ethernet 'ELAP'. + * + * Alan Cox <Alan.Cox@linux.org> + * <iialan@www.linux.org.uk> + * + * This doesn't fit cleanly with the IP arp. This isn't a problem as + * the IP arp wants extracting from the device layer in 1.3.x anyway. + * [see the pre-1.3 test code for details 8)] + * + * FIXME: + * We ought to handle the retransmits with a single list and a + * seperate fast timer for when it is needed. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * References: + * Inside Appletalk (2nd Ed). + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/datalink.h> +#include <net/psnap.h> +#include <net/atalk.h> + +#ifdef CONFIG_ATALK +/* + * Lists of aarp entries + */ + +struct aarp_entry +{ + /* These first two are only used for unresolved entries */ + unsigned long last_sent; /* Last time we xmitted the aarp request */ + struct sk_buff_head packet_queue; /* Queue of frames wait for resolution */ + unsigned long expires_at; /* Entry expiry time */ + struct at_addr target_addr; /* DDP Address */ + struct device *dev; /* Device to use */ + char hwaddr[6]; /* Physical i/f address of target/router */ + unsigned short xmit_count; /* When this hits 10 we give up */ + struct aarp_entry *next; /* Next entry in chain */ +}; + + +/* + * Hashed list of resolved and unresolved entries + */ + +static struct aarp_entry *resolved[AARP_HASH_SIZE], *unresolved[AARP_HASH_SIZE]; +static int unresolved_count=0; + +/* + * Used to walk the list and purge/kick entries. + */ + +static struct timer_list aarp_timer; + +/* + * Delete an aarp queue + */ + +static void aarp_expire(struct aarp_entry *a) +{ + struct sk_buff *skb; + + while((skb=skb_dequeue(&a->packet_queue))!=NULL) + kfree_skb(skb, FREE_WRITE); + kfree_s(a,sizeof(*a)); +} + +/* + * Send an aarp queue entry request + */ + +static void aarp_send_query(struct aarp_entry *a) +{ + static char aarp_eth_multicast[ETH_ALEN]={ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF }; + struct device *dev=a->dev; + int len=dev->hard_header_len+sizeof(struct elapaarp)+aarp_dl->header_length; + struct sk_buff *skb=alloc_skb(len, GFP_ATOMIC); + struct elapaarp *eah=(struct elapaarp *)(skb->data+dev->hard_header_len+aarp_dl->header_length); + struct at_addr *sat=atalk_find_dev_addr(dev); + + if(skb==NULL || sat==NULL) + return; + + /* + * Set up the buffer. + */ + + skb->arp = 1; + skb->free = 1; + skb->len = len; + skb->dev = a->dev; + + /* + * Set up the ARP. + */ + + eah->hw_type = htons(AARP_HW_TYPE_ETHERNET); + eah->pa_type = htons(ETH_P_ATALK); + eah->hw_len = ETH_ALEN; + eah->pa_len = AARP_PA_ALEN; + eah->function = htons(AARP_REQUEST); + + memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + + eah->pa_src_zero= 0; + eah->pa_src_net = sat->s_net; + eah->pa_src_node= sat->s_node; + + memset(eah->hw_dst, '\0', ETH_ALEN); + + eah->pa_dst_zero= 0; + eah->pa_dst_net = a->target_addr.s_net; + eah->pa_dst_node= a->target_addr.s_node; + + /* + * Add ELAP headers and set target to the AARP multicast. + */ + + aarp_dl->datalink_header(aarp_dl, skb, aarp_eth_multicast); + + /* + * Send it. + */ + + + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + + /* + * Update the sending count + */ + + a->xmit_count++; +} + +static void aarp_send_reply(struct device *dev, struct at_addr *us, struct at_addr *them, unsigned char *sha) +{ + int len=dev->hard_header_len+sizeof(struct elapaarp)+aarp_dl->header_length; + struct sk_buff *skb=alloc_skb(len, GFP_ATOMIC); + struct elapaarp *eah=(struct elapaarp *)(skb->data+dev->hard_header_len+aarp_dl->header_length); + + if(skb==NULL) + return; + + /* + * Set up the buffer. + */ + + skb->arp = 1; + skb->free = 1; + skb->len = len; + skb->dev = dev; + + /* + * Set up the ARP. + */ + + eah->hw_type = htons(AARP_HW_TYPE_ETHERNET); + eah->pa_type = htons(ETH_P_ATALK); + eah->hw_len = ETH_ALEN; + eah->pa_len = AARP_PA_ALEN; + eah->function = htons(AARP_REPLY); + + memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + + eah->pa_src_zero= 0; + eah->pa_src_net = us->s_net; + eah->pa_src_node= us->s_node; + + if(sha==NULL) + memset(eah->hw_dst, '\0', ETH_ALEN); + else + memcpy(eah->hw_dst, sha, ETH_ALEN); + + eah->pa_dst_zero= 0; + eah->pa_dst_net = them->s_net; + eah->pa_dst_node= them->s_node; + + /* + * Add ELAP headers and set target to the AARP multicast. + */ + + aarp_dl->datalink_header(aarp_dl, skb, sha); + + /* + * Send it. + */ + + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + +} + +/* + * Send probe frames. Called from atif_probe_device. + */ + +void aarp_send_probe(struct device *dev, struct at_addr *us) +{ + int len=dev->hard_header_len+sizeof(struct elapaarp)+aarp_dl->header_length; + struct sk_buff *skb=alloc_skb(len, GFP_ATOMIC); + struct elapaarp *eah=(struct elapaarp *)(skb->data+dev->hard_header_len+aarp_dl->header_length); + static char aarp_eth_multicast[ETH_ALEN]={ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF }; + + if(skb==NULL) + return; + + /* + * Set up the buffer. + */ + + skb->arp = 1; + skb->free = 1; + skb->len = len; + skb->dev = dev; + + /* + * Set up the ARP. + */ + + eah->hw_type = htons(AARP_HW_TYPE_ETHERNET); + eah->pa_type = htons(ETH_P_ATALK); + eah->hw_len = ETH_ALEN; + eah->pa_len = AARP_PA_ALEN; + eah->function = htons(AARP_PROBE); + + memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + + eah->pa_src_zero= 0; + eah->pa_src_net = us->s_net; + eah->pa_src_node= us->s_node; + + memset(eah->hw_dst, '\0', ETH_ALEN); + + eah->pa_dst_zero= 0; + eah->pa_dst_net = us->s_net; + eah->pa_dst_node= us->s_node; + + /* + * Add ELAP headers and set target to the AARP multicast. + */ + + aarp_dl->datalink_header(aarp_dl, skb, aarp_eth_multicast); + + /* + * Send it. + */ + + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + +} + +/* + * Handle an aarp timer expire + */ + +static void aarp_expire_timer(struct aarp_entry **n) +{ + struct aarp_entry *t; + while((*n)!=NULL) + { + /* Expired ? */ + if((*n)->expires_at < jiffies) + { + t= *n; + *n=(*n)->next; + aarp_expire(t); + } + else + n=&((*n)->next); + } +} + +/* + * Kick all pending requests 5 times a second. + */ + +static void aarp_kick(struct aarp_entry **n) +{ + struct aarp_entry *t; + while((*n)!=NULL) + { + /* Expired - if this will be the 11th transmit, we delete + instead */ + if((*n)->xmit_count>=AARP_RETRANSMIT_LIMIT) + { + t= *n; + *n=(*n)->next; + aarp_expire(t); + } + else + { + aarp_send_query(*n); + n=&((*n)->next); + } + } +} + +/* + * A device has gone down. Take all entries referring to the device + * and remove them. + */ + +static void aarp_expire_device(struct aarp_entry **n, struct device *dev) +{ + struct aarp_entry *t; + while((*n)!=NULL) + { + if((*n)->dev==dev) + { + t= *n; + *n=(*n)->next; + aarp_expire(t); + } + else + n=&((*n)->next); + } +} + +/* + * Handle the timer event + */ + +static void aarp_expire_timeout(unsigned long unused) +{ + int ct=0; + for(ct=0;ct<AARP_HASH_SIZE;ct++) + { + aarp_expire_timer(&resolved[ct]); + aarp_kick(&unresolved[ct]); + aarp_expire_timer(&unresolved[ct]); + } + del_timer(&aarp_timer); + if(unresolved_count==0) + aarp_timer.expires=AARP_EXPIRY_TIME; + else + aarp_timer.expires=AARP_TICK_TIME; + add_timer(&aarp_timer); +} + +/* + * Network device notifier chain handler. + */ + +static int aarp_device_event(unsigned long event, void *ptr) +{ + int ct=0; + if(event==NETDEV_DOWN) + { + for(ct=0;ct<AARP_HASH_SIZE;ct++) + { + aarp_expire_device(&resolved[ct],ptr); + aarp_expire_device(&unresolved[ct],ptr); + } + } + return NOTIFY_DONE; +} + +/* + * Create a new aarp entry. + */ + +static struct aarp_entry *aarp_alloc(void) +{ + struct aarp_entry *a=kmalloc(sizeof(struct aarp_entry), GFP_ATOMIC); + if(a==NULL) + return NULL; + skb_queue_head_init(&a->packet_queue); + return a; +} + +/* + * Find an entry. We might return an expired but not yet purged entry. We + * don't care as it will do no harm. + */ + +static struct aarp_entry *aarp_find_entry(struct aarp_entry *list, struct device *dev, struct at_addr *sat) +{ + unsigned long flags; + save_flags(flags); + cli(); + while(list) + { + if(list->target_addr.s_net==sat->s_net && + list->target_addr.s_node==sat->s_node && list->dev==dev) + break; + list=list->next; + } + restore_flags(flags); + return list; +} + +/* + * Send a DDP frame + */ + +int aarp_send_ddp(struct device *dev,struct sk_buff *skb, struct at_addr *sa, void *hwaddr) +{ + static char ddp_eth_multicast[ETH_ALEN]={ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF }; + int hash; + struct aarp_entry *a; + unsigned long flags; + + /* + * Non ELAP we cannot do. + */ + if(dev->type!=ARPHRD_ETHER) + { + return -1; + } + + skb->dev = dev; + + hash=sa->s_node%(AARP_HASH_SIZE-1); + save_flags(flags); + cli(); + + /* + * Do we have a resolved entry ? + */ + + if(sa->s_node==ATADDR_BCAST) + { + ddp_dl->datalink_header(ddp_dl, skb, ddp_eth_multicast); + if(skb->sk==NULL) + dev_queue_xmit(skb, skb->dev, SOPRI_NORMAL); + else + dev_queue_xmit(skb, skb->dev, skb->sk->priority); + restore_flags(flags); + return 1; + } + a=aarp_find_entry(resolved[hash],dev,sa); + if(a!=NULL) + { + /* + * Return 1 and fill in the address + */ + a->expires_at=jiffies+AARP_EXPIRY_TIME*10; + ddp_dl->datalink_header(ddp_dl, skb, a->hwaddr); + if(skb->sk==NULL) + dev_queue_xmit(skb, skb->dev, SOPRI_NORMAL); + else + dev_queue_xmit(skb, skb->dev, skb->sk->priority); + restore_flags(flags); + return 1; + } + /* + * Do we have an unresolved entry: This is the less common path + */ + a=aarp_find_entry(unresolved[hash],dev,sa); + if(a!=NULL) + { + /* + * Queue onto the unresolved queue + */ + skb_queue_tail(&a->packet_queue, skb); + restore_flags(flags); + return 0; + } + /* + * Allocate a new entry + */ + a=aarp_alloc(); + if(a==NULL) + { + /* + * Whoops slipped... good job it's an unreliable + * protocol 8) + */ + restore_flags(flags); + return -1; + } + /* + * Set up the queue + */ + skb_queue_tail(&a->packet_queue, skb); + a->expires_at=jiffies+AARP_RESOLVE_TIME; + a->dev=dev; + a->next=unresolved[hash]; + a->target_addr= *sa; + a->xmit_count=0; + unresolved[hash]=a; + unresolved_count++; + restore_flags(flags); + /* + * Send an initial request for the address + */ + aarp_send_query(a); + /* + * Switch to fast timer if needed (That is if this is the + * first unresolved entry to get added) + */ + if(unresolved_count==1) + { + del_timer(&aarp_timer); + aarp_timer.expires=AARP_TICK_TIME; + add_timer(&aarp_timer); + } + /* + * Tell the ddp layer we have taken over for this frame. + */ + return 0; +} + +static void aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, int hash) +{ + struct sk_buff *skb; + while(*list!=NULL) + { + if(*list==a) + { + unresolved_count--; + *list=a->next; + /* Move into the resolved list */ + a->next=resolved[hash]; + resolved[hash]=a; + /* Kick frames off */ + while((skb=skb_dequeue(&a->packet_queue))!=NULL) + { + a->expires_at=jiffies+AARP_EXPIRY_TIME*10; + ddp_dl->datalink_header(ddp_dl,skb,a->hwaddr); + if(skb->sk==NULL) + dev_queue_xmit(skb, skb->dev, SOPRI_NORMAL); + else + dev_queue_xmit(skb, skb->dev, skb->sk->priority); + } + } + else + list=&((*list)->next); + } +} + +static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct elapaarp *ea=(struct elapaarp *)skb->h.raw; + struct aarp_entry *a; + struct at_addr sa, *ma; + unsigned long flags; + int hash; + struct atalk_iface *ifa; + + + /* + * We only do ethernet SNAP AARP + */ + + if(dev->type!=ARPHRD_ETHER) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + /* + * Frame size ok ? + */ + + if(skb->len<sizeof(*ea)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + ea->function=ntohs(ea->function); + + /* + * Sanity check fields. + */ + + if(ea->function<AARP_REQUEST || ea->function > AARP_PROBE || ea->hw_len != ETH_ALEN || ea->pa_len != AARP_PA_ALEN || + ea->pa_src_zero != 0 || ea->pa_dst_zero != 0) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + /* + * Looks good + */ + + hash=ea->pa_src_node%(AARP_HASH_SIZE-1); + + /* + * Build an address + */ + + sa.s_node=ea->pa_src_node; + sa.s_net=ea->pa_src_net; + + /* + * Process the packet + */ + + save_flags(flags); + + /* + * Check for replies of me + */ + + ifa=atalk_find_dev(dev); + if(ifa==NULL) + { + restore_flags(flags); + kfree_skb(skb, FREE_READ); + return 1; + } + if(ifa->status&ATIF_PROBE) + { + if(ifa->address.s_node==ea->pa_dst_node && ifa->address.s_net==ea->pa_dst_net) + { + /* + * Fail the probe (in use) + */ + ifa->status|=ATIF_PROBE_FAIL; + restore_flags(flags); + kfree_skb(skb, FREE_READ); + return 1; + } + } + + switch(ea->function) + { + case AARP_REPLY: + if(unresolved_count==0) /* Speed up */ + break; + /* + * Find the entry + */ + + cli(); + if((a=aarp_find_entry(unresolved[hash],dev,&sa))==NULL || dev != a->dev) + break; + /* + * We can fill one in - this is good + */ + memcpy(a->hwaddr,ea->hw_src,ETH_ALEN); + aarp_resolved(&unresolved[hash],a,hash); + if(unresolved_count==0) + { + del_timer(&aarp_timer); + aarp_timer.expires=AARP_EXPIRY_TIME; + add_timer(&aarp_timer); + } + break; + + case AARP_REQUEST: + case AARP_PROBE: + /* + * If it is my address set ma to my address and reply. We can treat probe and + * request the same. Probe simply means we shouldn't cache the querying host, + * as in a probe they are proposing an address not using one. + */ + + ma=&ifa->address; + sa.s_node=ea->pa_dst_node; + sa.s_net=ea->pa_dst_net; + + if(sa.s_node!=ma->s_node) + break; + if(sa.s_net && ma->s_net && sa.s_net!=ma->s_net) + break; + + sa.s_node=ea->pa_src_node; + sa.s_net=ea->pa_src_net; + + /* + * aarp_my_address has found the address to use for us. + */ + aarp_send_reply(dev,ma,&sa,ea->hw_src); + break; + } + restore_flags(flags); + kfree_skb(skb, FREE_READ); + return 1; +} + +static struct notifier_block aarp_notifier={ + aarp_device_event, + NULL, + 0 +}; + + +void aarp_proto_init(void) +{ + static char aarp_snap_id[]={0x00,0x00,0x00,0x80,0xF3}; + if((aarp_dl=register_snap_client(aarp_snap_id, aarp_rcv))==NULL) + printk("Unable to register AARP with SNAP.\n"); + init_timer(&aarp_timer); + aarp_timer.function=aarp_expire_timeout; + aarp_timer.data=0; + aarp_timer.expires=AARP_EXPIRY_TIME; + add_timer(&aarp_timer); + register_netdevice_notifier(&aarp_notifier); +} +#endif diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c new file mode 100644 index 000000000..67ad1bf22 --- /dev/null +++ b/net/appletalk/ddp.c @@ -0,0 +1,1843 @@ +/* + * DDP: An implementation of the Appletalk DDP protocol for + * ethernet 'ELAP'. + * + * Alan Cox <Alan.Cox@linux.org> + * <iialan@www.linux.org.uk> + * + * With more than a little assistance from + * + * Wesley Craig <netatalk@umich.edu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO + * ASYNC I/O + * Testing. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <net/datalink.h> +#include <net/p8022.h> +#include <net/psnap.h> +#include <net/sock.h> +#include <net/atalk.h> + +#ifdef CONFIG_ATALK + +#define APPLETALK_DEBUG + + +#ifdef APPLETALK_DEBUG +#define DPRINT(x) print(x) +#else +#define DPRINT(x) +#endif + +struct datalink_proto *ddp_dl, *aarp_dl; + +#define min(a,b) (((a)<(b))?(a):(b)) + +/***********************************************************************************************************************\ +* * +* Handlers for the socket list. * +* * +\***********************************************************************************************************************/ + +static atalk_socket *volatile atalk_socket_list=NULL; + +/* + * Note: Sockets may not be removed _during_ an interrupt or inet_bh + * handler using this technique. They can be added although we do not + * use this facility. + */ + +static void atalk_remove_socket(atalk_socket *sk) +{ + unsigned long flags; + atalk_socket *s; + + save_flags(flags); + cli(); + + s=atalk_socket_list; + if(s==sk) + { + atalk_socket_list=s->next; + restore_flags(flags); + return; + } + while(s && s->next) + { + if(s->next==sk) + { + s->next=sk->next; + restore_flags(flags); + return; + } + s=s->next; + } + restore_flags(flags); +} + +static void atalk_insert_socket(atalk_socket *sk) +{ + unsigned long flags; + save_flags(flags); + cli(); + sk->next=atalk_socket_list; + atalk_socket_list=sk; + restore_flags(flags); +} + +static atalk_socket *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) +{ + atalk_socket *s; + + for( s = atalk_socket_list; s != NULL; s = s->next ) { + if ( to->sat_port != s->at.src_port ) { + continue; + } + + if ( to->sat_addr.s_net == 0 && + to->sat_addr.s_node == ATADDR_BCAST && + s->at.src_net == atif->address.s_net ) { + break; + } + + if ( to->sat_addr.s_net == s->at.src_net && + to->sat_addr.s_node == s->at.src_node ) { + break; + } + + /* XXXX.0 */ + } + return( s ); +} + +/* + * Find a socket in the list. + */ + +static atalk_socket *atalk_find_socket(struct sockaddr_at *sat) +{ + atalk_socket *s; + + for ( s = atalk_socket_list; s != NULL; s = s->next ) { + if ( s->at.src_net != sat->sat_addr.s_net ) { + continue; + } + if ( s->at.src_node != sat->sat_addr.s_node ) { + continue; + } + if ( s->at.src_port != sat->sat_port ) { + continue; + } + break; + } + return( s ); +} + +/* + * This is only called from user mode. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. + */ + +static void atalk_destroy_socket(atalk_socket *sk); + +/* + * Handler for deferred kills. + */ + +static void atalk_destroy_timer(unsigned long data) +{ + atalk_destroy_socket((atalk_socket *)data); +} + +static void atalk_destroy_socket(atalk_socket *sk) +{ + struct sk_buff *skb; + atalk_remove_socket(sk); + + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) + { + kfree_skb(skb,FREE_READ); + } + + if(sk->wmem_alloc == 0 && sk->rmem_alloc == 0 && sk->dead) + kfree_s(sk,sizeof(*sk)); + else + { + /* + * Someone is using our buffers still.. defer + */ + init_timer(&sk->timer); + sk->timer.expires=10*HZ; + sk->timer.function=atalk_destroy_timer; + sk->timer.data = (unsigned long)sk; + add_timer(&sk->timer); + } +} + + +/* Called from proc fs */ +int atalk_get_info(char *buffer, char **start, off_t offset, int length) +{ + atalk_socket *s; + int len=0; + off_t pos=0; + off_t begin=0; + + /* + * Fill this in to print out the appletalk info you want + */ + + /* Theory.. Keep printing in the same place until we pass offset */ + + len += sprintf (buffer,"Type local_addr remote_addr tx_queue rx_queue st uid\n"); + for (s = atalk_socket_list; s != NULL; s = s->next) + { + len += sprintf (buffer+len,"%02X ", s->type); + len += sprintf (buffer+len,"%04X:%02X:%02X ", + s->at.src_net,s->at.src_node,s->at.src_port); + len += sprintf (buffer+len,"%04X:%02X:%02X ", + s->at.dest_net,s->at.dest_node,s->at.dest_port); + len += sprintf (buffer+len,"%08lX:%08lX ", s->wmem_alloc, s->rmem_alloc); + len += sprintf (buffer+len,"%02X %d\n", s->state, SOCK_INODE(s->socket)->i_uid); + + /* Are we still dumping unwanted data then discard the record */ + pos=begin+len; + + if(pos<offset) + { + len=0; /* Keep dumping into the buffer start */ + begin=pos; + } + if(pos>offset+length) /* We have dumped enough */ + break; + } + + /* The data in question runs from begin to begin+len */ + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Remove unwanted header data from length */ + if(len>length) + len=length; /* Remove unwanted tail data from length */ + + return len; +} + +/*******************************************************************************************************************\ +* * +* Routing tables for the Appletalk socket layer * +* * +\*******************************************************************************************************************/ + + +static struct atalk_route *atalk_router_list=NULL; +static struct atalk_route atrtr_default; /* For probing devices or in a routerless network */ +static struct atalk_iface *atalk_iface_list=NULL; + +/* + * Appletalk interface control + */ + +/* + * Drop a device. Doesn't drop any of its routes - that is the + * the callers problem. Called when we down the interface or + * delete the address. + */ + +static void atif_drop_device(struct device *dev) +{ + struct atalk_iface **iface = &atalk_iface_list; + struct atalk_iface *tmp; + + while ((tmp = *iface) != NULL) + { + if (tmp->dev == dev) + { + *iface = tmp->next; + kfree_s(tmp, sizeof(struct atalk_iface)); + } + else + iface = &tmp->next; + } +} + +static struct atalk_iface *atif_add_device(struct device *dev, struct at_addr *sa) +{ + struct atalk_iface *iface=(struct atalk_iface *) + kmalloc(sizeof(*iface), GFP_KERNEL); + unsigned long flags; + if(iface==NULL) + return NULL; + iface->dev=dev; + iface->address= *sa; + iface->status=0; + save_flags(flags); + cli(); + iface->next=atalk_iface_list; + atalk_iface_list=iface; + restore_flags(flags); + return iface; +} + +/* + * Perform phase 2 AARP probing on our tentative address. + */ + +static int atif_probe_device(struct atalk_iface *atif) +{ + int ct; + int netrange=ntohs(atif->nets.nr_lastnet)-ntohs(atif->nets.nr_firstnet)+1; + int probe_net=ntohs(atif->address.s_net); + int netct; + int nodect; + + + /* + * Offset the network we start probing with. + */ + + if(probe_net==ATADDR_ANYNET) + { + if(!netrange) + probe_net=ntohs(atif->nets.nr_firstnet); + else + probe_net=ntohs(atif->nets.nr_firstnet) + (jiffies%netrange); + } + + + /* + * Scan the networks. + */ + + for(netct=0;netct<=netrange;netct++) + { + /* + * Sweep the available nodes from a random start. + */ + int nodeoff=jiffies&255; + + atif->address.s_net=htons(probe_net); + for(nodect=0;nodect<256;nodect++) + { + atif->address.s_node=((nodect+nodeoff)&0xFF); + if(atif->address.s_node>0&&atif->address.s_node<254) + { + /* + * Probe a proposed address. + */ + for(ct=0;ct<AARP_RETRANSMIT_LIMIT;ct++) + { + aarp_send_probe(atif->dev, &atif->address); + /* + * Defer 1/10th + */ + current->timeout = jiffies + (HZ/10); + current->state = TASK_INTERRUPTIBLE; + schedule(); + if(atif->status&ATIF_PROBE_FAIL) + break; + } + if(!(atif->status&ATIF_PROBE_FAIL)) + return 0; + } + atif->status&=~ATIF_PROBE_FAIL; + } + probe_net++; + if(probe_net>ntohs(atif->nets.nr_lastnet)) + probe_net=ntohs(atif->nets.nr_firstnet); + } + return -EADDRINUSE; /* Network is full... */ +} + +struct at_addr *atalk_find_dev_addr(struct device *dev) +{ + struct atalk_iface *iface; + for(iface=atalk_iface_list;iface!=NULL;iface=iface->next) + if(iface->dev==dev) + return &iface->address; + return NULL; +} + +static struct at_addr *atalk_find_primary(void) +{ + struct atalk_iface *iface; + for(iface=atalk_iface_list;iface!=NULL;iface=iface->next) + if(!(iface->dev->flags&IFF_LOOPBACK)) + return &iface->address; + if ( atalk_iface_list != NULL ) { + return &atalk_iface_list->address; + } else { + return NULL; + } +} + +/* + * Give a device find its atif control structure + */ + +struct atalk_iface *atalk_find_dev(struct device *dev) +{ + struct atalk_iface *iface; + for(iface=atalk_iface_list;iface!=NULL;iface=iface->next) + if(iface->dev==dev) + return iface; + return NULL; +} + +/* + * Find a match for 'any network' - ie any of our interfaces with that + * node number will do just nicely. + */ + +static struct atalk_iface *atalk_find_anynet(int node, struct device *dev) +{ + struct atalk_iface *iface; + for(iface=atalk_iface_list;iface!=NULL;iface=iface->next) { + if ( iface->dev != dev || ( iface->status & ATIF_PROBE )) { + continue; + } + if ( node == ATADDR_BCAST || iface->address.s_node == node ) { + return iface; + } + } + return NULL; +} + +/* + * Find a match for a specific network:node pair + */ + +static struct atalk_iface *atalk_find_interface(int net, int node) +{ + struct atalk_iface *iface; + for(iface=atalk_iface_list;iface!=NULL;iface=iface->next) + { + if((node==ATADDR_BCAST || iface->address.s_node==node) + && iface->address.s_net==net && !(iface->status&ATIF_PROBE)) + return iface; + } + return NULL; +} + + +/* + * Find a route for an appletalk packet. This ought to get cached in + * the socket (later on...). We know about host routes and the fact + * that a route must be direct to broadcast. + */ + +static struct atalk_route *atrtr_find(struct at_addr *target) +{ + struct atalk_route *r; + for(r=atalk_router_list;r!=NULL;r=r->next) + { + if(!(r->flags&RTF_UP)) + continue; + if(r->target.s_net==target->s_net) + { + if(!(r->flags&RTF_HOST) || r->target.s_node==target->s_node) + return r; + } + } + if(atrtr_default.dev) + return &atrtr_default; + return NULL; +} + + +/* + * Given an appletalk network find the device to use. This can be + * a simple lookup. Funny stuff like routers can wait 8) + */ + +static struct device *atrtr_get_dev(struct at_addr *sa) +{ + struct atalk_route *atr=atrtr_find(sa); + if(atr==NULL) + return NULL; + else + return atr->dev; +} + +/* + * Set up a default router. + */ + +static void atrtr_set_default(struct device *dev) +{ + atrtr_default.dev=dev; + atrtr_default.flags= RTF_UP; + atrtr_default.gateway.s_net=htons(0); + atrtr_default.gateway.s_node=0; +} + +/* + * Add a router. Basically make sure it looks valid and stuff the + * entry in the list. While it uses netranges we always set them to one + * entry to work like netatalk. + */ + +static int atrtr_create(struct rtentry *r, struct device *devhint) +{ + struct sockaddr_at *ta=(struct sockaddr_at *)&r->rt_dst; + struct sockaddr_at *ga=(struct sockaddr_at *)&r->rt_gateway; + struct atalk_route *rt; + struct atalk_iface *iface, *riface; + unsigned long flags; + + save_flags(flags); + + /* + * Fixme: Raise/Lower a routing change semaphore for these + * operations. + */ + + /* + * Validate the request + */ + if(ta->sat_family!=AF_APPLETALK) + return -EINVAL; + if(devhint == NULL && ga->sat_family != AF_APPLETALK) + return -EINVAL; + + /* + * Now walk the routing table and make our decisions + */ + + for(rt=atalk_router_list;rt!=NULL;rt=rt->next) + { + if(r->rt_flags != rt->flags) + continue; + + if(ta->sat_addr.s_net == rt->target.s_net) { + if(!(rt->flags&RTF_HOST)) + break; + if(ta->sat_addr.s_node == rt->target.s_node) + break; + } + } + + if ( devhint == NULL ) { + for ( riface = NULL, iface = atalk_iface_list; iface; + iface = iface->next ) { + if ( riface == NULL && ntohs( ga->sat_addr.s_net ) >= + ntohs( iface->nets.nr_firstnet ) && + ntohs( ga->sat_addr.s_net ) <= + ntohs( iface->nets.nr_lastnet )) + riface = iface; + if ( ga->sat_addr.s_net == iface->address.s_net && + ga->sat_addr.s_node == iface->address.s_node ) + riface = iface; + } + if ( riface == NULL ) + return -ENETUNREACH; + devhint = riface->dev; + } + + if(rt==NULL) + { + rt=(struct atalk_route *)kmalloc(sizeof(struct atalk_route), GFP_KERNEL); + if(rt==NULL) + return -ENOBUFS; + cli(); + rt->next=atalk_router_list; + atalk_router_list=rt; + } + + /* + * Fill in the entry. + */ + rt->target=ta->sat_addr; + rt->dev=devhint; + rt->flags=r->rt_flags; + rt->gateway=ga->sat_addr; + + restore_flags(flags); + return 0; +} + + +/* + * Delete a route. Find it and discard it. + */ + +static int atrtr_delete( struct at_addr *addr ) +{ + struct atalk_route **r = &atalk_router_list; + struct atalk_route *tmp; + + while ((tmp = *r) != NULL) { + if (tmp->target.s_net == addr->s_net && + (!(tmp->flags&RTF_GATEWAY) || + tmp->target.s_node == addr->s_node )) { + *r = tmp->next; + kfree_s(tmp, sizeof(struct atalk_route)); + return 0; + } + r = &tmp->next; + } + return -ENOENT; +} + +/* + * Called when a device is downed. Just throw away any routes + * via it. + */ + +void atrtr_device_down(struct device *dev) +{ + struct atalk_route **r = &atalk_router_list; + struct atalk_route *tmp; + + while ((tmp = *r) != NULL) { + if (tmp->dev == dev) { + *r = tmp->next; + kfree_s(tmp, sizeof(struct atalk_route)); + } + else + r = &tmp->next; + } + if(atrtr_default.dev==dev) + atrtr_set_default(NULL); +} + +/* + * A device event has occured. Watch for devices going down and + * delete our use of them (iface and route). + */ + +static int ddp_device_event(unsigned long event, void *ptr) +{ + if(event==NETDEV_DOWN) + { + /* Discard any use of this */ + atrtr_device_down((struct device *)ptr); + atif_drop_device((struct device *)ptr); + } + return NOTIFY_DONE; +} + +/* + * ioctl calls. Shouldn't even need touching. + */ + +/* + * Device configuration ioctl calls. + */ + +int atif_ioctl(int cmd, void *arg) +{ + struct ifreq atreq; + static char aarp_mcast[6]={0x09,0x00,0x00,0xFF,0xFF,0xFF}; + struct netrange *nr; + struct sockaddr_at *sa; + struct device *dev; + struct atalk_iface *atif; + int ro=(cmd==SIOCSIFADDR); + int err=verify_area(ro?VERIFY_READ:VERIFY_WRITE, arg,sizeof(atreq)); + int ct; + int limit; + struct rtentry rtdef; + + if(err) + return err; + + memcpy_fromfs(&atreq,arg,sizeof(atreq)); + + if((dev=dev_get(atreq.ifr_name))==NULL) + return -ENODEV; + + sa=(struct sockaddr_at*)&atreq.ifr_addr; + atif=atalk_find_dev(dev); + + switch(cmd) + { + case SIOCSIFADDR: + if(!suser()) + return -EPERM; + if(sa->sat_family!=AF_APPLETALK) + return -EINVAL; + if(dev->type!=ARPHRD_ETHER) + return -EPROTONOSUPPORT; + nr=(struct netrange *)&sa->sat_zero[0]; + if(nr->nr_phase!=2) + return -EPROTONOSUPPORT; + if(sa->sat_addr.s_node==ATADDR_BCAST || sa->sat_addr.s_node == 254) + return -EINVAL; + if(atif) + { + /* + * Already setting address. + */ + if(atif->status&ATIF_PROBE) + return -EBUSY; + + atif->address.s_net=sa->sat_addr.s_net; + atif->address.s_node=sa->sat_addr.s_node; + atrtr_device_down(dev); /* Flush old routes */ + } + else + { + atif=atif_add_device(dev, &sa->sat_addr); + } + atif->nets= *nr; + + /* + * Check if the chosen address is used. If so we + * error and atalkd will try another. + */ + + if(!(dev->flags&IFF_LOOPBACK) && atif_probe_device(atif)<0) + { + atif_drop_device(dev); + return -EADDRINUSE; + } + + /* + * Hey it worked - add the direct + * routes. + */ + + sa=(struct sockaddr_at *)&rtdef.rt_gateway; + sa->sat_family=AF_APPLETALK; + sa->sat_addr.s_net=atif->address.s_net; + sa->sat_addr.s_node=atif->address.s_node; + sa=(struct sockaddr_at *)&rtdef.rt_dst; + rtdef.rt_flags=RTF_UP; + sa->sat_family=AF_APPLETALK; + sa->sat_addr.s_node=ATADDR_ANYNODE; + if(dev->flags&IFF_LOOPBACK) + rtdef.rt_flags|=RTF_HOST; + /* + * Routerless initial state. + */ + if(nr->nr_firstnet==htons(0) && nr->nr_lastnet==htons(0xFFFE)) { + sa->sat_addr.s_net=atif->address.s_net; + atrtr_create(&rtdef, dev); + atrtr_set_default(dev); + } else { + limit=ntohs(nr->nr_lastnet); + if(limit-ntohs(nr->nr_firstnet) > 256) + { + printk("Too many routes/iface.\n"); + return -EINVAL; + } + for(ct=ntohs(nr->nr_firstnet);ct<=limit;ct++) + { + sa->sat_addr.s_net=htons(ct); + atrtr_create(&rtdef, dev); + } + } + dev_mc_add(dev, aarp_mcast, 6, 1); + return 0; + case SIOCGIFADDR: + if(atif==NULL) + return -EADDRNOTAVAIL; + ((struct sockaddr_at *)(&atreq.ifr_addr))->sat_family=AF_APPLETALK; + ((struct sockaddr_at *)(&atreq.ifr_addr))->sat_addr=atif->address; + break; + case SIOCGIFBRDADDR: + if(atif==NULL) + return -EADDRNOTAVAIL; + ((struct sockaddr_at *)(&atreq.ifr_addr))->sat_family=AF_APPLETALK; + ((struct sockaddr_at *)(&atreq.ifr_addr))->sat_addr.s_net=atif->address.s_net; + ((struct sockaddr_at *)(&atreq.ifr_addr))->sat_addr.s_node=ATADDR_BCAST; + break; + } + memcpy_tofs(arg,&atreq,sizeof(atreq)); + return 0; +} + +/* + * Routing ioctl() calls + */ + +static int atrtr_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct rtentry rt; + + err=verify_area(VERIFY_READ, arg, sizeof(rt)); + if(err) + return err; + memcpy_fromfs(&rt,arg,sizeof(rt)); + + switch(cmd) + { + case SIOCDELRT: + if(rt.rt_dst.sa_family!=AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *)&rt.rt_dst)->sat_addr); + case SIOCADDRT: + return atrtr_create(&rt, NULL); + default: + return -EINVAL; + } +} + +/* Called from proc fs - just make it print the ifaces neatly */ + +int atalk_if_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct atalk_iface *iface; + int len=0; + off_t pos=0; + off_t begin=0; + + len += sprintf (buffer,"Interface Address Networks Status\n"); + for (iface = atalk_iface_list; iface != NULL; iface = iface->next) + { + len += sprintf (buffer+len,"%-16s %04X:%02X %04X-%04X %d\n", + iface->dev->name, + ntohs(iface->address.s_net),iface->address.s_node, + ntohs(iface->nets.nr_firstnet),ntohs(iface->nets.nr_lastnet), + iface->status); + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* Called from proc fs - just make it print the routes neatly */ + +int atalk_rt_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct atalk_route *rt; + int len=0; + off_t pos=0; + off_t begin=0; + + len += sprintf (buffer,"Target Router Flags Dev\n"); + if(atrtr_default.dev) + { + rt=&atrtr_default; + len += sprintf (buffer+len,"Default %5d:%-3d %-4d %s\n", + ntohs(rt->gateway.s_net), rt->gateway.s_node, rt->flags, + rt->dev->name); + } + for (rt = atalk_router_list; rt != NULL; rt = rt->next) + { + len += sprintf (buffer+len,"%04X:%02X %5d:%-3d %-4d %s\n", + ntohs(rt->target.s_net),rt->target.s_node, + ntohs(rt->gateway.s_net), rt->gateway.s_node, rt->flags, + rt->dev->name); + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/*******************************************************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to an Appletalk socket object * +* * +\*******************************************************************************************************************/ + +/* + * Checksum: This is 'optional'. It's quite likely also a good + * candidate for assembler hackery 8) + */ + +unsigned short atalk_checksum(struct ddpehdr *ddp, int len) +{ + unsigned long sum=0; /* Assume unsigned long is >16 bits */ + unsigned char *data=(unsigned char *)ddp; + + len-=4; /* skip header 4 bytes */ + data+=4; + + /* This ought to be unwrapped neatly. I'll trust gcc for now */ + while(len--) + { + sum+=*data; + sum<<=1; + if(sum&0x10000) + { + sum++; + sum&=0xFFFF; + } + data++; + } + if(sum) + return htons((unsigned short)sum); + return 0xFFFF; /* Use 0xFFFF for 0. 0 itself means none */ +} + +/* + * Generic fcntl calls are already dealt with. If we don't need funny ones + * this is the all you need. Async I/O is also seperate. + */ + +static int atalk_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ +/* atalk_socket *sk=(atalk_socket *)sock->data;*/ + switch(cmd) + { + default: + return(-EINVAL); + } +} + +/* + * Set 'magic' options for appletalk. If we don't have any this is fine + * as it is. + */ + +static int atalk_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) +{ + atalk_socket *sk; + int err,opt; + + sk=(atalk_socket *)sock->data; + + if(optval==NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ,optval,sizeof(int)); + if(err) + return err; + opt=get_fs_long((unsigned long *)optval); + + switch(level) + { + case SOL_ATALK: + switch(optname) + { + default: + return -EOPNOTSUPP; + } + break; + + case SOL_SOCKET: + return sock_setsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } +} + + +/* + * Get any magic options. Comment above applies. + */ + +static int atalk_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + atalk_socket *sk; + int val=0; + int err; + + sk=(atalk_socket *)sock->data; + + switch(level) + { + + case SOL_ATALK: + switch(optname) + { + default: + return -ENOPROTOOPT; + } + break; + + case SOL_SOCKET: + return sock_getsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } + err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *)optlen); + err=verify_area(VERIFY_WRITE,optval,sizeof(int)); + put_fs_long(val,(unsigned long *)optval); + return(0); +} + +/* + * Only for connection oriented sockets - ignore + */ + +static int atalk_listen(struct socket *sock, int backlog) +{ + return -EOPNOTSUPP; +} + +/* + * These are standard. + */ + +static void def_callback1(struct sock *sk) +{ + if(!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk, int len) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket,0); + } +} + +/* + * Create a socket. Initialise the socket, blank the addresses + * set the state. + */ + +static int atalk_create(struct socket *sock, int protocol) +{ + atalk_socket *sk; + sk=(atalk_socket *)kmalloc(sizeof(*sk),GFP_KERNEL); + if(sk==NULL) + return(-ENOMEM); + switch(sock->type) + { + /* This RAW is an extension. It is trivial to do and gives you + the full ELAP frame. Should be handy for CAP 8) */ + case SOCK_RAW: + /* We permit DDP datagram sockets */ + case SOCK_DGRAM: + break; + default: + kfree_s((void *)sk,sizeof(*sk)); + return(-ESOCKTNOSUPPORT); + } + sk->dead=0; + sk->next=NULL; + sk->broadcast=0; + sk->no_check=0; /* Checksums on by default */ + sk->rcvbuf=SK_RMEM_MAX; + sk->sndbuf=SK_WMEM_MAX; + sk->pair=NULL; + sk->wmem_alloc=0; + sk->rmem_alloc=0; + sk->inuse=0; + sk->proc=0; + sk->priority=1; + sk->shutdown=0; + sk->prot=NULL; /* So we use default free mechanisms */ + sk->broadcast=0; + sk->err=0; + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + sk->send_head=NULL; + skb_queue_head_init(&sk->back_log); + sk->state=TCP_CLOSE; + sk->socket=sock; + sk->type=sock->type; + sk->debug=0; + + sk->at.src_net=0; + sk->at.src_node=0; + sk->at.src_port=0; + + sk->at.dest_net=0; + sk->at.dest_node=0; + sk->at.dest_port=0; + + sk->mtu=DDP_MAXSZ; + + if(sock!=NULL) + { + sock->data=(void *)sk; + sk->sleep=sock->wait; + } + + sk->state_change=def_callback1; + sk->data_ready=def_callback2; + sk->write_space=def_callback1; + sk->error_report=def_callback1; + + sk->zapped=1; + return(0); +} + +/* + * Copy a socket. No work needed. + */ + +static int atalk_dup(struct socket *newsock,struct socket *oldsock) +{ + return(atalk_create(newsock,SOCK_DGRAM)); +} + +/* + * Free a socket. No work needed + */ + +static int atalk_release(struct socket *sock, struct socket *peer) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + if(sk==NULL) + return(0); + if(!sk->dead) + sk->state_change(sk); + sk->dead=1; + sock->data=NULL; + atalk_destroy_socket(sk); + return(0); +} + +/* + * Pick a source address if one is not given. Just return + * an error if not supportable. + */ + +static int atalk_pick_port(struct sockaddr_at *sat) +{ + for ( sat->sat_port = ATPORT_RESERVED; sat->sat_port < ATPORT_LAST; + sat->sat_port++ ) + if ( atalk_find_socket( sat ) == NULL ) + return sat->sat_port; + return -EBUSY; +} + +static int atalk_autobind(atalk_socket *sk) +{ + struct at_addr *ap = atalk_find_primary(); + struct sockaddr_at sat; + int n; + + if ( ap == NULL || ap->s_net == htons( ATADDR_ANYNET )) + return -EADDRNOTAVAIL; + sk->at.src_net = sat.sat_addr.s_net = ap->s_net; + sk->at.src_node = sat.sat_addr.s_node = ap->s_node; + + if (( n = atalk_pick_port( &sat )) < 0 ) + return( n ); + sk->at.src_port=n; + atalk_insert_socket(sk); + sk->zapped=0; + return 0; +} + +/* + * Set the address 'our end' of the connection. + */ + +static int atalk_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) +{ + atalk_socket *sk; + struct sockaddr_at *addr=(struct sockaddr_at *)uaddr; + + sk=(atalk_socket *)sock->data; + + if(sk->zapped==0) + return(-EIO); + + if(addr_len!=sizeof(struct sockaddr_at)) + return -EINVAL; + + if(addr->sat_family!=AF_APPLETALK) + return -EAFNOSUPPORT; + + if(addr->sat_addr.s_net==htons(ATADDR_ANYNET)) + { + struct at_addr *ap=atalk_find_primary(); + if(ap==NULL) + return -EADDRNOTAVAIL; + sk->at.src_net=addr->sat_addr.s_net=ap->s_net; + sk->at.src_node=addr->sat_addr.s_node=ap->s_node; + } + else + { + if ( atalk_find_interface( addr->sat_addr.s_net, + addr->sat_addr.s_node ) == NULL ) + return -EADDRNOTAVAIL; + sk->at.src_net=addr->sat_addr.s_net; + sk->at.src_node=addr->sat_addr.s_node; + } + + if(addr->sat_port == ATADDR_ANYPORT) + { + int n = atalk_pick_port(addr); + if(n < 0) + return n; + sk->at.src_port=addr->sat_port=n; + } + else + sk->at.src_port=addr->sat_port; + + if(atalk_find_socket(addr)!=NULL) + return -EADDRINUSE; + + atalk_insert_socket(sk); + sk->zapped=0; + return(0); +} + +/* + * Set the address we talk to. + */ + +static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + struct sockaddr_at *addr; + + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + if(addr_len!=sizeof(*addr)) + return(-EINVAL); + addr=(struct sockaddr_at *)uaddr; + + if(addr->sat_family!=AF_APPLETALK) + return -EAFNOSUPPORT; +#if 0 /* Netatalk doesnt check this */ + if(addr->sat_addr.s_node==ATADDR_BCAST && !sk->broadcast) + return -EPERM; +#endif + if(sk->zapped) + { + if(atalk_autobind(sk)<0) + return -EBUSY; + } + + if(atrtr_get_dev(&addr->sat_addr)==NULL) + return -ENETUNREACH; + + sk->at.dest_port=addr->sat_port; + sk->at.dest_net=addr->sat_addr.s_net; + sk->at.dest_node=addr->sat_addr.s_node; + sock->state = SS_CONNECTED; + sk->state=TCP_ESTABLISHED; + return(0); +} + +/* + * Not relevant + */ + +static int atalk_socketpair(struct socket *sock1, struct socket *sock2) +{ + return(-EOPNOTSUPP); +} + +/* + * Not relevant + */ + +static int atalk_accept(struct socket *sock, struct socket *newsock, int flags) +{ + if(newsock->data) + kfree_s(newsock->data,sizeof(atalk_socket)); + return -EOPNOTSUPP; +} + +/* + * Find the name of an appletalk socket. Just copy the right + * fields into the sockaddr. + */ + +static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_at sat; + atalk_socket *sk; + + sk=(atalk_socket *)sock->data; + if(sk->zapped) + { + if(atalk_autobind(sk)<0) + return -EBUSY; + } + + *uaddr_len = sizeof(struct sockaddr_at); + + if(peer) + { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + sat.sat_addr.s_net=sk->at.dest_net; + sat.sat_addr.s_node=sk->at.dest_node; + sat.sat_port=sk->at.dest_port; + } + else + { + sat.sat_addr.s_net=sk->at.src_net; + sat.sat_addr.s_node=sk->at.src_node; + sat.sat_port=sk->at.src_port; + } + sat.sat_family = AF_APPLETALK; + memcpy(uaddr,&sat,sizeof(sat)); + return(0); +} + +/* + * Receive a packet (in skb) from device dev. This has come from the SNAP decoder, and on entry + * skb->h.raw is the DDP header, skb->len is the DDP length. The physical headers have been + * extracted. + */ + +int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + atalk_socket *sock; + struct ddpehdr *ddp=(void *)skb->h.raw; + struct atalk_iface *atif; + struct sockaddr_at tosat; + + /* Size check */ + if(skb->len<sizeof(*ddp)) + { + kfree_skb(skb,FREE_READ); + return(0); + } + + + /* + * Fix up the length field [Ok this is horrible but otherwise + * I end up with unions of bit fields and messy bit field order + * compiler/endian dependancies..] + */ + + *((__u16 *)ddp)=ntohs(*((__u16 *)ddp)); + + /* + * Trim buffer in case of stray trailing data + */ + + skb->len=min(skb->len,ddp->deh_len); + + /* + * Size check to see if ddp->deh_len was crap + * (Otherwise we'll detonate most spectacularly + * in the middle of recvfrom()). + */ + + if(skb->len<sizeof(*ddp)) + { + kfree_skb(skb,FREE_READ); + return(0); + } + + /* + * Any checksums. Note we don't do htons() on this == is assumed to be + * valid for net byte orders all over the networking code... + */ + + if(ddp->deh_sum && atalk_checksum(ddp, ddp->deh_len)!= ddp->deh_sum) + { + /* Not a valid appletalk frame - dustbin time */ + kfree_skb(skb,FREE_READ); + return(0); + } + + /* Check the packet is aimed at us */ + + if(ddp->deh_dnet == 0) /* Net 0 is 'this network' */ + atif=atalk_find_anynet(ddp->deh_dnode, dev); + else + atif=atalk_find_interface(ddp->deh_dnet,ddp->deh_dnode); + + /* Not ours */ + if(atif==NULL) + { + struct atalk_route *rt; + struct at_addr ta; + ta.s_net=ddp->deh_dnet; + ta.s_node=ddp->deh_dnode; + /* Route the packet */ + rt=atrtr_find(&ta); + if(rt==NULL || ddp->deh_hops==15) + { + kfree_skb(skb, FREE_READ); + return(0); + } + ddp->deh_hops++; + *((__u16 *)ddp)=ntohs(*((__u16 *)ddp)); /* Mend the byte order */ + /* + * Send the buffer onwards + */ + if(aarp_send_ddp(dev,skb, &ta, NULL)==-1) + kfree_skb(skb, FREE_READ); + return 0; + } + + /* Which socket - atalk_search_socket() looks for a *full match* + of the <net,node,port> tuple */ + tosat.sat_addr.s_net = ddp->deh_dnet; + tosat.sat_addr.s_node = ddp->deh_dnode; + tosat.sat_port = ddp->deh_dport; + + sock=atalk_search_socket( &tosat, atif ); + + if(sock==NULL) /* But not one of our sockets */ + { + kfree_skb(skb,FREE_READ); + return(0); + } + + + /* + * Queue packet (standard) + */ + + skb->sk = sock; + + if(sock_queue_rcv_skb(sock,skb)<0) + { + skb->sk=NULL; + kfree_skb(skb, FREE_WRITE); + } + return(0); +} + +static int atalk_sendto(struct socket *sock, void *ubuf, int len, int noblock, + unsigned flags, struct sockaddr *sat, int addr_len) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + struct sockaddr_at *usat=(struct sockaddr_at *)sat; + struct sockaddr_at local_satalk, gsat; + struct sk_buff *skb; + struct device *dev; + struct ddpehdr *ddp; + int size; + struct atalk_route *rt; + int loopback=0; + int err; + + if(flags) + return -EINVAL; + + if(len>587) + return -EMSGSIZE; + + if(usat) + { + if(sk->zapped) + /* put the autobinding in */ + { + if(atalk_autobind(sk)<0) + return -EBUSY; + } + + if(addr_len <sizeof(*usat)) + return(-EINVAL); + if(usat->sat_family != AF_APPLETALK) + return -EINVAL; +#if 0 /* netatalk doesnt implement this check */ + if(usat->sat_addr.s_node==ATADDR_BCAST && !sk->broadcast) + return -EPERM; +#endif + } + else + { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + usat=&local_satalk; + usat->sat_family=AF_APPLETALK; + usat->sat_port=sk->at.dest_port; + usat->sat_addr.s_node=sk->at.dest_node; + usat->sat_addr.s_net=sk->at.dest_net; + } + + /* Build a packet */ + + if(sk->debug) + printk("SK %p: Got address.\n",sk); + + size=sizeof(struct ddpehdr)+len+ddp_dl->header_length; /* For headers */ + + if(usat->sat_addr.s_net!=0 || usat->sat_addr.s_node == ATADDR_ANYNODE) + { + rt=atrtr_find(&usat->sat_addr); + if(rt==NULL) + return -ENETUNREACH; + dev=rt->dev; + } + else + { + struct at_addr at_hint; + at_hint.s_node=0; + at_hint.s_net=sk->at.src_net; + rt=atrtr_find(&at_hint); + if(rt==NULL) + return -ENETUNREACH; + dev=rt->dev; + } + + if(sk->debug) + printk("SK %p: Size needed %d, device %s\n", sk, size, dev->name); + + size += dev->hard_header_len; + + skb = sock_alloc_send_skb(sk, size, 0 , &err); + if(skb==NULL) + return err; + + skb->sk=sk; + skb->free=1; + skb->arp=1; + skb->len=size; + + skb->dev=dev; + + if(sk->debug) + printk("SK %p: Begin build.\n", sk); + + skb->h.raw=skb->data+ddp_dl->header_length+dev->hard_header_len; + + ddp=(struct ddpehdr *)skb->h.raw; + ddp->deh_pad=0; + ddp->deh_hops=0; + ddp->deh_len=len+sizeof(*ddp); + /* + * Fix up the length field [Ok this is horrible but otherwise + * I end up with unions of bit fields and messy bit field order + * compiler/endian dependancies.. + */ + *((__u16 *)ddp)=ntohs(*((__u16 *)ddp)); + + ddp->deh_dnet=usat->sat_addr.s_net; + ddp->deh_snet=sk->at.src_net; + ddp->deh_dnode=usat->sat_addr.s_node; + ddp->deh_snode=sk->at.src_node; + ddp->deh_dport=usat->sat_port; + ddp->deh_sport=sk->at.src_port; + + if(sk->debug) + printk("SK %p: Copy user data (%d bytes).\n", sk, len); + + memcpy_fromfs((char *)(ddp+1),ubuf,len); + + if(sk->no_check==1) + ddp->deh_sum=0; + else + ddp->deh_sum=atalk_checksum(ddp, len+sizeof(*ddp)); + + /* + * Loopback broadcast packets to non gateway targets (ie routes + * to group we are in) + */ + + if(ddp->deh_dnode==ATADDR_BCAST) + { + if((!(rt->flags&RTF_GATEWAY))&&(!(dev->flags&IFF_LOOPBACK))) + { + struct sk_buff *skb2=skb_clone(skb, GFP_KERNEL); + if(skb2) + { + loopback=1; + if(sk->debug) + printk("SK %p: send out(copy).\n", sk); + if(aarp_send_ddp(dev,skb2,&usat->sat_addr, NULL)==-1) + kfree_skb(skb2, FREE_WRITE); + /* else queued/sent above in the aarp queue */ + } + } + } + + if((dev->flags&IFF_LOOPBACK) || loopback) + { + if(sk->debug) + printk("SK %p: Loop back.\n", sk); + /* loop back */ + sk->wmem_alloc-=skb->mem_len; + ddp_dl->datalink_header(ddp_dl, skb, dev->dev_addr); + skb->sk = NULL; + skb->h.raw = skb->data + ddp_dl->header_length + dev->hard_header_len; + skb->len -= ddp_dl->header_length ; + skb->len -= dev->hard_header_len ; + atalk_rcv(skb,dev,NULL); + } + else + { + if(sk->debug) + printk("SK %p: send out.\n", sk); + + if ( rt->flags & RTF_GATEWAY ) { + gsat.sat_addr = rt->gateway; + usat = &gsat; + } + + if(aarp_send_ddp(dev,skb,&usat->sat_addr, NULL)==-1) + kfree_skb(skb, FREE_WRITE); + /* else queued/sent above in the aarp queue */ + } + if(sk->debug) + printk("SK %p: Done write (%d).\n", sk, len); + return len; +} + +static int atalk_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) +{ + return atalk_sendto(sock,ubuf,size,noblock,flags,NULL,0); +} + +static int atalk_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sip, int *addr_len) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + struct sockaddr_at *sat=(struct sockaddr_at *)sip; + struct ddpehdr *ddp = NULL; + int copied = 0; + struct sk_buff *skb; + int er; + + if(sk->err) + { + er= -sk->err; + sk->err=0; + return er; + } + + if(addr_len) + *addr_len=sizeof(*sat); + + skb=skb_recv_datagram(sk,flags,noblock,&er); + if(skb==NULL) + return er; + + ddp = (struct ddpehdr *)(skb->h.raw); + if(sk->type==SOCK_RAW) + { + copied=ddp->deh_len; + if(copied > size) + copied=size; + skb_copy_datagram(skb,0,ubuf,copied); + } + else + { + copied=ddp->deh_len - sizeof(*ddp); + if (copied > size) + copied = size; + skb_copy_datagram(skb,sizeof(*ddp),ubuf,copied); + } + if(sat) + { + sat->sat_family=AF_APPLETALK; + sat->sat_port=ddp->deh_sport; + sat->sat_addr.s_node=ddp->deh_snode; + sat->sat_addr.s_net=ddp->deh_snet; + } + skb_free_datagram(skb); + return(copied); +} + + +static int atalk_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return atalk_send(sock,ubuf,size,noblock,0); +} + + +static int atalk_recv(struct socket *sock, void *ubuf, int size , int noblock, + unsigned flags) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + if(sk->zapped) + return -ENOTCONN; + return atalk_recvfrom(sock,ubuf,size,noblock,flags,NULL, NULL); +} + +static int atalk_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + return atalk_recv(sock,ubuf,size,noblock,0); +} + + +static int atalk_shutdown(struct socket *sk,int how) +{ + return -EOPNOTSUPP; +} + +static int atalk_select(struct socket *sock , int sel_type, select_table *wait) +{ + atalk_socket *sk=(atalk_socket *)sock->data; + + return datagram_select(sk,sel_type,wait); +} + +/* + * Appletalk ioctl calls. + */ + +static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) +{ + int err; + long amount=0; + atalk_socket *sk=(atalk_socket *)sock->data; + int v; + + switch(cmd) + { + /* + * Protocol layer + */ + case TIOCOUTQ: + v=sk->sndbuf-sk->wmem_alloc; + if(v<0) + v=0; + break; + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if((skb=skb_peek(&sk->receive_queue))!=NULL) + v=skb->len-sizeof(struct ddpehdr); + break; + } + case SIOCGSTAMP: + if (sk) + { + if(sk->stamp.tv_sec==0) + return -ENOENT; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); + if(err) + return err; + memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); + return 0; + } + return -EINVAL; + /* + * Routing + */ + case SIOCADDRT: + case SIOCDELRT: + if(!suser()) + return -EPERM; + return(atrtr_ioctl(cmd,(void *)arg)); + /* + * Interface + */ + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFBRDADDR: + return atif_ioctl(cmd,(void *)arg); + /* + * Physical layer ioctl calls + */ + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case OLD_SIOCGIFHWADDR: + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFMTU: + case SIOCGIFCONF: + case SIOCADDMULTI: + case SIOCDELMULTI: + + return(dev_ioctl(cmd,(void *) arg)); + + case SIOCSIFMETRIC: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + return -EINVAL; + + default: + return -EINVAL; + } + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + if(err) + return err; + put_fs_long(amount,(unsigned long *)arg); + return(0); +} + +static struct proto_ops atalk_proto_ops = { + AF_APPLETALK, + + atalk_create, + atalk_dup, + atalk_release, + atalk_bind, + atalk_connect, + atalk_socketpair, + atalk_accept, + atalk_getname, + atalk_read, + atalk_write, + atalk_select, + atalk_ioctl, + atalk_listen, + atalk_send, + atalk_recv, + atalk_sendto, + atalk_recvfrom, + atalk_shutdown, + atalk_setsockopt, + atalk_getsockopt, + atalk_fcntl, +}; + +static struct notifier_block ddp_notifier={ + ddp_device_event, + NULL, + 0 +}; + +/* Called by proto.c on kernel start up */ + +void atalk_proto_init(struct net_proto *pro) +{ + static char ddp_snap_id[]={0x08,0x00,0x07,0x80,0x9B}; + (void) sock_register(atalk_proto_ops.family, &atalk_proto_ops); + if((ddp_dl=register_snap_client(ddp_snap_id, atalk_rcv))==NULL) + printk("Unable to register DDP with SNAP.\n"); + register_netdevice_notifier(&ddp_notifier); + aarp_proto_init(); + printk("Appletalk ALPHA 0.08 for Linux NET3.029\n"); + +} +#endif diff --git a/net/ax25/Makefile b/net/ax25/Makefile new file mode 100644 index 000000000..77301561c --- /dev/null +++ b/net/ax25/Makefile @@ -0,0 +1,40 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := af_ax25.o + +ifdef CONFIG_AX25 + +OBJS := $(OBJS) ax25_in.o ax25_out.o ax25_route.o ax25_subr.o ax25_timer.o + +endif + +ax25.o: $(OBJS) + $(LD) -r -o ax25.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/ax25/README.AX25 b/net/ax25/README.AX25 new file mode 100644 index 000000000..90b6a037a --- /dev/null +++ b/net/ax25/README.AX25 @@ -0,0 +1,20 @@ +This is a working version of the new state machine code for AX25 under +Linux. It is closely based on the SDL diagrams published in the ARRL 7th +Computer Networking Conference papers, and they should be referred to when +reading the code, notably the stuff in ax25_in.c. The next stage is to +separate the ax25 control block from the socket and then add NET/ROM and +connected mode IP. I would also like to add the extended AX25 designed by a +Dutch station which allows for window sizes up to 127. + +This code will work the same as the old code, although the display in +/proc/net/ax25 is a little different, but should be understandable. Please +give this code a work out and report any bugs to me either at +jsn@cs.nott.ac.uk or at GB7DAD.GBR.EU. + +This code has taught me a lot about the internals of the networking side of +Linux especially skbuff handling and I now feel happy about implementing the +higher level protocols. + +73's + +Jonathan diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c new file mode 100644 index 000000000..725939698 --- /dev/null +++ b/net/ax25/af_ax25.c @@ -0,0 +1,1972 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * AX.25 006 Alan(GW4PTS) Nearly died of shock - its working 8-) + * AX.25 007 Alan(GW4PTS) Removed the silliest bugs + * AX.25 008 Alan(GW4PTS) Cleaned up, fixed a few state machine problems, added callbacks + * AX.25 009 Alan(GW4PTS) Emergency patch kit to fix memory corruption + * AX.25 010 Alan(GW4PTS) Added RAW sockets/Digipeat. + * AX.25 011 Alan(GW4PTS) RAW socket and datagram fixes (thanks) - Raw sendto now gets PID right + * datagram sendto uses correct target address. + * AX.25 012 Alan(GW4PTS) Correct incoming connection handling, send DM to failed connects. + * Use skb->data not skb+1. Support sk->priority correctly. + * Correct receive on SOCK_DGRAM. + * AX.25 013 Alan(GW4PTS) Send DM to all unknown frames, missing initialiser fixed + * Leave spare SSID bits set (DAMA etc) - thanks for bug report, + * removed device registration (its not used or needed). Clean up for + * gcc 2.5.8. PID to AX25_P_ + * AX.25 014 Alan(GW4PTS) Cleanup and NET3 merge + * AX.25 015 Alan(GW4PTS) Internal test version. + * AX.25 016 Alan(GW4PTS) Semi Internal version for PI card + * work. + * AX.25 017 Alan(GW4PTS) Fixed some small bugs reported by + * G4KLX + * AX.25 018 Alan(GW4PTS) Fixed a small error in SOCK_DGRAM + * AX.25 019 Alan(GW4PTS) Clean ups for the non INET kernel and device ioctls in AX.25 + * AX.25 020 Jonathan(G4KLX) /proc support and other changes. + * AX.25 021 Alan(GW4PTS) Added AX25_T1, AX25_N2, AX25_T3 as requested. + * AX.25 022 Jonathan(G4KLX) More work on the ax25 auto router and /proc improved (again)! + * Alan(GW4PTS) Added TIOCINQ/OUTQ + * AX.25 023 Alan(GW4PTS) Fixed shutdown bug + * AX.25 023 Alan(GW4PTS) Linus changed timers + * AX.25 024 Alan(GW4PTS) Small bug fixes + * AX.25 025 Alan(GW4PTS) More fixes, Linux 1.1.51 compatibility stuff, timers again! + * AX.25 026 Alan(GW4PTS) Small state fix. + * AX.25 027 Alan(GW4PTS) Socket close crash fixes. + * AX.25 028 Alan(GW4PTS) Callsign control including settings per uid. + * Small bug fixes. + * Protocol set by sockets only. + * Small changes to allow for start of NET/ROM layer. + * AX.25 028a Jonathan(G4KLX) Changes to state machine. + * AX.25 028b Jonathan(G4KLX) Extracted ax25 control block + * from sock structure. + * AX.25 029 Alan(GW4PTS) Combined 028b and some KA9Q code + * Jonathan(G4KLX) and removed all the old Berkeley, added IP mode registration. + * Darryl(G7LED) stuff. Cross-port digipeating. Minor fixes and enhancements. + * Alan(GW4PTS) Missed suser() on axassociate checks + * + * To do: + * Support use as digipeater, including an on/off ioctl + * Restructure the ax25_rcv code to be cleaner/faster and + * copy only when needed. + * Consider better arbitary protocol support. + * Fix non-blocking connect failure. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/termios.h> /* For TIOCINQ/OUTQ */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> + +#include <net/ip.h> +#include <net/arp.h> + +#define CONFIG_AX25_XDIGI /* Cross port (band) digi stuff */ + +/**********************************************************************************************************************\ +* * +* Handlers for the socket list. * +* * +\**********************************************************************************************************************/ + +static ax25_cb *volatile ax25_list = NULL; + +/* + * ax25 -> ascii conversion + */ +char *ax2asc(ax25_address *a) +{ + static char buf[11]; + char c, *s; + int n; + + for (n = 0, s = buf; n < 6; n++) + { + c = (a->ax25_call[n] >> 1) & 0x7F; + + if (c != ' ') *s++ = c; + } + + *s++ = '-'; + + if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) + { + *s++ = '1'; + n -= 10; + } + + *s++ = n + '0'; + *s++ = '\0'; + + return(buf); + +} + +/* + * Compare two ax.25 addresses + */ +int ax25cmp(ax25_address *a, ax25_address *b) +{ + int ct = 0; + + while (ct < 6) { + if ((a->ax25_call[ct] & 0xFE) != (b->ax25_call[ct] & 0xFE)) /* Clean off repeater bits */ + return 1; + ct++; + } + + if ((a->ax25_call[ct] & 0x1E) == (b->ax25_call[ct] & 0x1E)) /* SSID without control bit */ + return 0; + + return 2; /* Partial match */ +} + +/* + * Socket removal during an interrupt is now safe. + */ +static void ax25_remove_socket(ax25_cb *ax25) +{ + ax25_cb *s; + unsigned long flags; + + save_flags(flags); + cli(); + + if ((s = ax25_list) == ax25) { + ax25_list = s->next; + restore_flags(flags); + return; + } + + while (s != NULL && s->next != NULL) { + if (s->next == ax25) { + s->next = ax25->next; + restore_flags(flags); + return; + } + + s = s->next; + } + + restore_flags(flags); +} + +/* + * Kill all bound sockets on a dropped device. + */ +static void ax25_kill_by_device(struct device *dev) +{ + ax25_cb *s; + + for (s = ax25_list; s != NULL; s = s->next) { + if (s->device == dev) { + s->device = NULL; + if (s->sk != NULL) { + s->sk->state = TCP_CLOSE; + s->sk->err = ENETUNREACH; + if (!s->sk->dead) + s->sk->state_change(s->sk); + s->sk->dead = 1; + } + } + } + + ax25_rt_device_down(dev); +} + +/* + * Handle device status changes. + */ +static int ax25_device_event(unsigned long event, void *ptr) +{ + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + ax25_kill_by_device(ptr); + + return NOTIFY_DONE; +} + +/* + * Add a socket to the bound sockets list. + */ +static void ax25_insert_socket(ax25_cb *ax25) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + ax25->next = ax25_list; + ax25_list = ax25; + + restore_flags(flags); +} + +/* + * Find a socket that wants to accept the SABM we just + * received. + */ +static struct sock *ax25_find_listener(ax25_address *addr, struct device *dev, int type) +{ + unsigned long flags; + ax25_cb *s; + + save_flags(flags); + cli(); + + for (s = ax25_list; s != NULL; s = s->next) { + if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->type == type && s->sk->state == TCP_LISTEN) { + /* If device is null we match any device */ + if (s->device == NULL || s->device == dev) { + restore_flags(flags); + return s->sk; + } + } + } + + restore_flags(flags); + return NULL; +} + +/* + * Find an AX.25 socket given both ends. + */ +static struct sock *ax25_find_socket(ax25_address *my_addr, ax25_address *dest_addr, int type) +{ + ax25_cb *s; + unsigned long flags; + + save_flags(flags); + cli(); + + for (s = ax25_list; s != NULL; s = s->next) { + if (s->sk != NULL && ax25cmp(&s->source_addr, my_addr) == 0 && ax25cmp(&s->dest_addr, dest_addr) == 0 && s->sk->type == type) { + restore_flags(flags); + return s->sk; + } + } + + restore_flags(flags); + + return NULL; +} + +/* + * Find an AX.25 control block given both ends. It will only pick up + * floating AX.25 control blocks or non Raw socket bound control blocks. + */ +static ax25_cb *ax25_find_cb(ax25_address *my_addr, ax25_address *dest_addr, struct device *dev) +{ + ax25_cb *s; + unsigned long flags; + + save_flags(flags); + cli(); + + for (s = ax25_list; s != NULL; s = s->next) { + if (s->sk != NULL && s->sk->type != SOCK_SEQPACKET) + continue; + if (ax25cmp(&s->source_addr, my_addr) == 0 && ax25cmp(&s->dest_addr, dest_addr) == 0 && s->device == dev) { + restore_flags(flags); + return s; + } + } + + restore_flags(flags); + + return NULL; +} + +/* + * Look for any matching address - RAW sockets can bind to arbitary names + */ +static struct sock *ax25_addr_match(ax25_address *addr) +{ + unsigned long flags; + ax25_cb *s; + + save_flags(flags); + cli(); + + for (s = ax25_list; s != NULL; s = s->next) { + if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->type == SOCK_RAW) { + restore_flags(flags); + return s->sk; + } + } + + restore_flags(flags); + + return NULL; +} + +static void ax25_send_to_raw(struct sock *sk, struct sk_buff *skb, int proto) +{ + struct sk_buff *copy; + + while (sk != NULL) { + if (sk->type == SOCK_RAW && sk->protocol == proto && sk->rmem_alloc <= sk->rcvbuf) { + if ((copy = skb_clone(skb, GFP_ATOMIC)) == NULL) + return; + + copy->sk = sk; + sk->rmem_alloc += copy->mem_len; + skb_queue_tail(&sk->receive_queue, copy); + if (!sk->dead) + sk->data_ready(sk, skb->len - 2); + } + + sk = sk->next; + } +} + +/* + * Deferred destroy. + */ +void ax25_destory_socket(ax25_cb *); + +/* + * Handler for deferred kills. + */ +static void ax25_destroy_timer(unsigned long data) +{ + ax25_destroy_socket((ax25_cb *)data); +} + +/* + * This is called from user mode and the timers. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. + */ +void ax25_destroy_socket(ax25_cb *ax25) /* Not static as its used by the timer */ +{ + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + + del_timer(&ax25->timer); + + ax25_remove_socket(ax25); + ax25_clear_tx_queue(ax25); /* Flush the send queue */ + + if (ax25->sk != NULL) { + while ((skb = skb_dequeue(&ax25->sk->receive_queue)) != NULL) { + if (skb->sk != ax25->sk) { /* A pending connection */ + skb->sk->dead = 1; /* Queue the unaccepted socket for death */ + ax25_set_timer(skb->sk->ax25); + skb->sk->ax25->state = AX25_STATE_0; + } + + kfree_skb(skb, FREE_READ); + } + } + + if (ax25->digipeat != NULL) { + kfree_s(ax25->digipeat, sizeof(ax25_digi)); + ax25->digipeat = NULL; + } + + if (ax25->sk != NULL) { + if (ax25->sk->wmem_alloc || ax25->sk->rmem_alloc) { /* Defer: outstanding buffers */ + init_timer(&ax25->timer); + ax25->timer.expires = 10 * HZ; + ax25->timer.function = ax25_destroy_timer; + ax25->timer.data = (unsigned long)ax25; + add_timer(&ax25->timer); + } else { + kfree_s(ax25->sk, sizeof(*ax25->sk)); + kfree_s(ax25, sizeof(*ax25)); + } + } else { + kfree_s(ax25, sizeof(*ax25)); + } + + restore_flags(flags); +} + +/* + * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. + */ + +ax25_uid_assoc *ax25_uid_list; + +int ax25_uid_policy = 0; + +ax25_address *ax25_findbyuid(uid_t uid) +{ + ax25_uid_assoc *a; + + for (a = ax25_uid_list; a != NULL; a = a->next) { + if (a->uid == uid) + return &a->call; + } + + return NULL; +} + +static int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) +{ + ax25_uid_assoc *a; + + switch (cmd) { + case SIOCAX25GETUID: + for (a = ax25_uid_list; a != NULL; a = a->next) { + if (ax25cmp(&sax->sax25_call, &a->call) == 0) + return a->uid; + } + return -ENOENT; + case SIOCAX25ADDUID: + if(!suser()) + return -EPERM; + if (ax25_findbyuid(sax->sax25_uid)) + return -EEXIST; + a = (ax25_uid_assoc *)kmalloc(sizeof(*a), GFP_KERNEL); + a->uid = sax->sax25_uid; + a->call = sax->sax25_call; + a->next = ax25_uid_list; + ax25_uid_list = a; + return 0; + case SIOCAX25DELUID: + { + ax25_uid_assoc **l; + + if(!suser()) + return -EPERM; + l = &ax25_uid_list; + while ((*l) != NULL) { + if (ax25cmp(&((*l)->call), &(sax->sax25_call)) == 0) { + a = *l; + *l = (*l)->next; + kfree_s(a, sizeof(*a)); + return 0; + } + + l = &((*l)->next); + } + return -ENOENT; + } + } + + return -EINVAL; /*NOTREACHED */ +} + +/* + * Create an empty AX.25 control block. + */ +static ax25_cb *ax25_create_cb(void) +{ + ax25_cb *ax25; + + if ((ax25 = (ax25_cb *)kmalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) + return NULL; + + skb_queue_head_init(&ax25->write_queue); + skb_queue_head_init(&ax25->ack_queue); + + init_timer(&ax25->timer); + + ax25->rtt = DEFAULT_T1; + ax25->t1 = DEFAULT_T1; + ax25->t2 = DEFAULT_T2; + ax25->n2 = DEFAULT_N2; + ax25->t3 = DEFAULT_T3; + + ax25->condition = 0x00; + ax25->t1timer = 0; + ax25->t2timer = 0; + ax25->t3timer = 0; + ax25->n2count = 0; + + ax25->va = 0; + ax25->vr = 0; + ax25->vs = 0; + + ax25->window = DEFAULT_WINDOW; + ax25->device = NULL; + ax25->digipeat = NULL; + ax25->sk = NULL; + + ax25->state = AX25_STATE_0; + + memset(&ax25->dest_addr, '\0', sizeof(ax25_address)); + memset(&ax25->source_addr, '\0', sizeof(ax25_address)); + + return ax25; +} + +int ax25_send_frame(struct sk_buff *skb, ax25_address *src, ax25_address *dest, struct device *dev) +{ + ax25_cb *ax25; + + if (skb == NULL) + return 0; + + skb->h.raw = skb->data + 15; + + /* + * Look for an existing connection. + */ + for (ax25 = ax25_list; ax25 != NULL; ax25 = ax25->next) { + if (ax25->sk != NULL && ax25->sk->type != SOCK_SEQPACKET) + continue; + + if (ax25cmp(&ax25->source_addr, src) == 0 && ax25cmp(&ax25->dest_addr, dest) == 0 && ax25->device == dev) { + ax25_output(ax25, skb); + return 1; /* It already existed */ + } + } + + if ((ax25 = ax25_create_cb()) == NULL) + return 0; + + ax25->device = dev; + + memcpy(&ax25->source_addr, src, sizeof(ax25_address)); + memcpy(&ax25->dest_addr, dest, sizeof(ax25_address)); + + ax25_establish_data_link(ax25); + ax25_insert_socket(ax25); + + ax25->state = AX25_STATE_1; + + ax25_set_timer(ax25); + + ax25_output(ax25, skb); + + return 1; /* We had to create it */ +} + +/*******************************************************************************************************************\ +* * +* Routing rules for AX.25: Basically iterate over the active interfaces * +* * +\*******************************************************************************************************************/ + +struct device *ax25rtr_get_dev(ax25_address *addr) +{ + struct device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) { /* Active kiss ax25 mode */ + if (ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) + return dev; + } + } + + return NULL; +} + +/*******************************************************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to an AX25 socket object * +* * +\*******************************************************************************************************************/ + +static int ax25_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } +} + +static int ax25_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + struct sock *sk; + int err, opt; + + sk = (struct sock *)sock->data; + + if (level == SOL_SOCKET) + return sock_setsockopt(sk, level, optname, optval, optlen); + + if (level != SOL_AX25) + return -EOPNOTSUPP; + + if (optval == NULL) + return -EINVAL; + + if ((err = verify_area(VERIFY_READ, optval, sizeof(int))) != 0) + return err; + + opt = get_fs_long((unsigned long *)optval); + + switch (optname) { + case AX25_WINDOW: + if (opt < 1 || opt > 7) + return -EINVAL; + sk->ax25->window = opt; + return 0; + + case AX25_T1: + if (opt < 1) + return -EINVAL; + sk->ax25->t1 = opt * PR_SLOWHZ; + return 0; + + case AX25_T2: + if (opt < 1) + return -EINVAL; + sk->ax25->t2 = opt * PR_SLOWHZ; + return 0; + + case AX25_N2: + if (opt < 1 || opt > 31) + return -EINVAL; + sk->ax25->n2 = opt; + return 0; + + case AX25_T3: + if (opt < 1) + return -EINVAL; + sk->ax25->t3 = opt * PR_SLOWHZ; + return 0; + + default: + return -ENOPROTOOPT; + } +} + +static int ax25_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + struct sock *sk; + int val = 0; + int err; + + sk = (struct sock *)sock->data; + + if (level == SOL_SOCKET) + return sock_getsockopt(sk, level, optname, optval, optlen); + + if (level != SOL_AX25) + return -EOPNOTSUPP; + + switch (optname) { + case AX25_WINDOW: + val = sk->ax25->window; + break; + + case AX25_T1: + val = sk->ax25->t1 / PR_SLOWHZ; + break; + + case AX25_T2: + val = sk->ax25->t2 / PR_SLOWHZ; + break; + + case AX25_N2: + val = sk->ax25->n2; + break; + + case AX25_T3: + val = sk->ax25->t3 / PR_SLOWHZ; + break; + + default: + return -ENOPROTOOPT; + } + + if ((err = verify_area(VERIFY_WRITE, optlen, sizeof(int))) != 0) + return err; + + put_fs_long(sizeof(int), (unsigned long *)optlen); + + if ((err = verify_area(VERIFY_WRITE, optval, sizeof(int))) != 0) + return err; + + put_fs_long(val, (unsigned long *)optval); + + return 0; +} + +static int ax25_listen(struct socket *sock, int backlog) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk->type == SOCK_SEQPACKET && sk->state != TCP_LISTEN) { + sk->max_ack_backlog = backlog; + sk->state = TCP_LISTEN; + return 0; + } + + return -EOPNOTSUPP; +} + +static void def_callback1(struct sock *sk) +{ + if (!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk, int len) +{ + if (!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static int ax25_create(struct socket *sock, int protocol) +{ + struct sock *sk; + ax25_cb *ax25; + + if ((sk = (struct sock *)kmalloc(sizeof(*sk), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + if ((ax25 = ax25_create_cb()) == NULL) { + kfree_s(sk, sizeof(*sk)); + return -ENOMEM; + } + + sk->type = sock->type; + + switch (sock->type) { + case SOCK_DGRAM: + case SOCK_SEQPACKET: + if (protocol == 0) + protocol = AX25_P_TEXT; + break; + case SOCK_RAW: + break; + default: + kfree_s((void *)sk, sizeof(*sk)); + kfree_s((void *)ax25, sizeof(*ax25)); + return -ESOCKTNOSUPPORT; + } + + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->back_log); + + sk->socket = sock; + sk->protocol = protocol; + sk->dead = 0; + sk->next = NULL; + sk->broadcast = 0; + sk->rcvbuf = SK_RMEM_MAX; + sk->sndbuf = SK_WMEM_MAX; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; + sk->inuse = 0; + sk->debug = 0; + sk->prot = NULL; /* So we use default free mechanisms */ + sk->err = 0; + sk->localroute = 0; + sk->send_head = NULL; + sk->state = TCP_CLOSE; + sk->shutdown = 0; + sk->priority = SOPRI_NORMAL; + sk->ack_backlog = 0; + sk->mtu = AX25_MTU; /* 256 */ + sk->zapped = 1; + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback1; + sk->error_report = def_callback1; + + if (sock != NULL) { + sock->data = (void *)sk; + sk->sleep = sock->wait; + } + + ax25->sk = sk; + sk->ax25 = ax25; + + return 0; +} + +static struct sock *ax25_make_new(struct sock *osk, struct device *dev) +{ + struct sock *sk; + ax25_cb *ax25; + + if ((sk = (struct sock *)kmalloc(sizeof(*sk), GFP_ATOMIC)) == NULL) + return NULL; + + if ((ax25 = ax25_create_cb()) == NULL) { + kfree_s(sk, sizeof(*sk)); + return NULL; + } + + sk->type = osk->type; + sk->socket = osk->socket; + + switch(osk->type) + { + case SOCK_DGRAM: + break; + case SOCK_SEQPACKET: + break; + default: + kfree_s((void *)sk, sizeof(*sk)); + kfree_s((void *)ax25, sizeof(*ax25)); + return NULL; + } + + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->back_log); + + sk->dead = 0; + sk->next = NULL; + sk->priority = osk->priority; + sk->broadcast = 0; + sk->protocol = osk->protocol; + sk->rcvbuf = osk->rcvbuf; + sk->sndbuf = osk->sndbuf; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; + sk->inuse = 0; + sk->ack_backlog = 0; + sk->prot = NULL; /* So we use default free mechanisms */ + sk->err = 0; + sk->localroute = 0; + sk->send_head = NULL; + sk->debug = osk->debug; + sk->state = TCP_ESTABLISHED; + sk->window = osk->window; + sk->shutdown = 0; + sk->mtu = osk->mtu; + sk->sleep = osk->sleep; + sk->zapped = osk->zapped; + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback1; + sk->error_report = def_callback1; + + ax25->rtt = osk->ax25->rtt; + ax25->t1 = osk->ax25->t1; + ax25->t2 = osk->ax25->t2; + ax25->t3 = osk->ax25->t3; + ax25->n2 = osk->ax25->n2; + + ax25->window = osk->ax25->window; + ax25->device = dev; + + memcpy(&ax25->source_addr, &osk->ax25->source_addr, sizeof(ax25_address)); + + if (osk->ax25->digipeat != NULL) { + if ((ax25->digipeat = (ax25_digi *)kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { + kfree_s(sk, sizeof(*sk)); + kfree_s(ax25, sizeof(*ax25)); + return NULL; + } + } + + sk->ax25 = ax25; + ax25->sk = sk; + + return sk; +} + +static int ax25_dup(struct socket *newsock, struct socket *oldsock) +{ + struct sock *sk = (struct sock *)oldsock->data; + + return ax25_create(newsock, sk->protocol); +} + +static int ax25_release(struct socket *sock, struct socket *peer) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk == NULL) return 0; + + if (sk->type == SOCK_SEQPACKET) { + switch (sk->ax25->state) { + case AX25_STATE_0: + sk->dead = 1; + sk->state_change(sk); + ax25_destroy_socket(sk->ax25); + break; + + case AX25_STATE_1: + ax25_send_control(sk->ax25, DISC | PF, C_RESPONSE); + sk->ax25->state = AX25_STATE_0; + sk->dead = 1; + sk->state_change(sk); + ax25_destroy_socket(sk->ax25); + break; + + case AX25_STATE_2: + ax25_send_control(sk->ax25, DM | PF, C_RESPONSE); + sk->ax25->state = AX25_STATE_0; + sk->dead = 1; + sk->state_change(sk); + ax25_destroy_socket(sk->ax25); + break; + + case AX25_STATE_3: + case AX25_STATE_4: + ax25_clear_tx_queue(sk->ax25); + sk->ax25->n2count = 0; + ax25_send_control(sk->ax25, DISC | PF, C_COMMAND); + sk->ax25->t3timer = 0; + sk->ax25->t1timer = sk->ax25->t1 = ax25_calculate_t1(sk->ax25); + sk->ax25->state = AX25_STATE_2; + sk->state_change(sk); + sk->dead = 1; + break; + + default: + break; + } + } else { + sk->dead = 1; + sk->state_change(sk); + ax25_destroy_socket(sk->ax25); + } + + sock->data = NULL; + + return 0; +} + +/* + * We support a funny extension here so you can (as root) give any callsign + * digipeated via a local address as source. This is a hack until we add + * BSD 4.4 ADDIFADDR type support. It is however small and trivially backward + * compatible 8) + */ +static int ax25_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) +{ + struct sock *sk; + struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; + struct device *dev; + ax25_address *call; + + sk = (struct sock *)sock->data; + + if (sk->zapped == 0) + return -EIO; + + if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) + return -EINVAL; + +#ifdef DONTDO + if (ax25_find_socket(&addr->fsa_ax25.sax25_call, sk->type) != NULL) { + if (sk->debug) + printk("AX25: bind failed: in use\n"); + return -EADDRINUSE; + } +#endif + + call = ax25_findbyuid(current->euid); + if (call == NULL && ax25_uid_policy && !suser()) + return -EPERM; + + if (call == NULL) + memcpy(&sk->ax25->source_addr, &addr->fsa_ax25.sax25_call, sizeof(ax25_address)); + else + memcpy(&sk->ax25->source_addr, call, sizeof(ax25_address)); + + if (addr_len == sizeof(struct full_sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) { + if (!suser()) + return -EPERM; + call = &addr->fsa_digipeater[0]; + } else { + call = &addr->fsa_ax25.sax25_call; + } + + if ((dev = ax25rtr_get_dev(call)) == NULL) { + if (sk->debug) + printk("AX25 bind failed: no device\n"); + return -EADDRNOTAVAIL; + } + + sk->ax25->device = dev; + ax25_insert_socket(sk->ax25); + + sk->zapped = 0; + + if (sk->debug) + printk("AX25: socket is bound\n"); + + return 0; +} + +static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; + int err; + + if (sk->state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { + sock->state = SS_CONNECTED; + return 0; /* Connect completed during a ERESTARTSYS event */ + } + + if (sk->state == TCP_CLOSE && sock->state == SS_CONNECTING) { + sock->state = SS_UNCONNECTED; + return -ECONNREFUSED; + } + + if (sk->state == TCP_ESTABLISHED && sk->type == SOCK_SEQPACKET) + return -EISCONN; /* No reconnect on a seqpacket socket */ + + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + if (addr_len > sizeof(*addr)) { + int ct = 0; + int ndigi = addr_len - sizeof(*addr); + ax25_address *ap = (ax25_address *)(((char *)addr) + sizeof(*addr)); + + /* Size is an exact number of digipeaters ? */ + if (ndigi % sizeof(ax25_address)) + return -EINVAL; + + ndigi /= sizeof(ax25_address); + + /* Valid number of digipeaters ? */ + if (ndigi < 1 || ndigi > 6) + return -EINVAL; + + if (sk->ax25->digipeat == NULL) { + if ((sk->ax25->digipeat = (ax25_digi *)kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) + return -ENOMEM; + } + + sk->ax25->digipeat->ndigi = ndigi; + + while (ct < ndigi) { + sk->ax25->digipeat->repeated[ct] = 0; + memcpy(&sk->ax25->digipeat->calls[ct], &ap[ct], sizeof(ax25_address)); + ct++; + } + + sk->ax25->digipeat->lastrepeat = 0; + addr_len -= ndigi * sizeof(ax25_address); + } + + if (addr_len != sizeof(struct sockaddr_ax25)) + return -EINVAL; + + if (sk->zapped) { /* Must bind first - autobinding in this may or may not work */ + if ((err = ax25_rt_autobind(sk->ax25, &addr->sax25_call)) < 0) + return err; + ax25_insert_socket(sk->ax25); /* Finish the bind */ + } + + if (sk->type == SOCK_SEQPACKET && ax25_find_cb(&sk->ax25->source_addr, &addr->sax25_call, sk->ax25->device) != NULL) + return -EBUSY; /* Already such a connection */ + + memcpy(&sk->ax25->dest_addr, &addr->sax25_call, sizeof(ax25_address)); + + /* First the easy one */ + if (sk->type != SOCK_SEQPACKET) { + sock->state = SS_CONNECTED; + sk->state = TCP_ESTABLISHED; + return 0; + } + + /* Move to connecting socket, ax.25 lapb WAIT_UA.. */ + sock->state = SS_CONNECTING; + sk->state = TCP_SYN_SENT; + ax25_establish_data_link(sk->ax25); + sk->ax25->state = AX25_STATE_1; + ax25_set_timer(sk->ax25); /* Start going SABM SABM until a UA or a give up and DM */ + + /* Now the loop */ + if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) + return -EINPROGRESS; + + cli(); /* To avoid races on the sleep */ + + /* A DM or timeout will go to closed, a UA will go to ABM */ + while (sk->state == TCP_SYN_SENT) { + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) { + sti(); + return -ERESTARTSYS; + } + } + + if (sk->state != TCP_ESTABLISHED) { /* Not in ABM, not in WAIT_UA -> failed */ + sti(); + sock->state = SS_UNCONNECTED; + return -sk->err; /* Always set at this point */ + } + + sock->state = SS_CONNECTED; + + sti(); + + return 0; +} + +static int ax25_socketpair(struct socket *sock1, struct socket *sock2) +{ + return -EOPNOTSUPP; +} + +static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *sk; + struct sock *newsk; + struct sk_buff *skb; + + if (newsock->data) + kfree_s(newsock->data, sizeof(struct sock)); + + newsock->data = NULL; + + sk = (struct sock *)sock->data; + + if (sk->type != SOCK_SEQPACKET) + return -EOPNOTSUPP; + + if (sk->state != TCP_LISTEN) + return -EINVAL; + + /* The write queue this time is holding sockets ready to use + hooked into the SABM we saved */ + do { + cli(); + if ((skb = skb_dequeue(&sk->receive_queue)) == NULL) { + if (flags & O_NONBLOCK) { + sti(); + return 0; + } + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) { + sti(); + return -ERESTARTSYS; + } + } + } while (skb == NULL); + + newsk = skb->sk; + newsk->pair = NULL; + sti(); + + /* Now attach up the new socket */ + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + sk->ack_backlog--; + newsock->data = newsk; + + return 0; +} + +static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + ax25_address *addr; + struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; + struct sock *sk; + unsigned char ndigi, i; + + sk = (struct sock *)sock->data; + + if (peer != 0) { + if (sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + addr = &sk->ax25->dest_addr; + } else { + addr = &sk->ax25->source_addr; + } + + sax->fsa_ax25.sax25_family = AF_AX25; + memcpy(&sax->fsa_ax25.sax25_call, addr, sizeof(ax25_address)); + sax->fsa_ax25.sax25_ndigis = 0; + *uaddr_len = sizeof(struct sockaddr_ax25); + + /* This will supply digipeat path on both getpeername() and getsockname() */ + if (sk->ax25->digipeat != NULL) { + ndigi = sk->ax25->digipeat->ndigi; + sax->fsa_ax25.sax25_ndigis = ndigi; + *uaddr_len += sizeof(ax25_address) * ndigi; + for (i = 0; i < ndigi; i++) + memcpy(&sax->fsa_digipeater[i], &sk->ax25->digipeat->calls[i], sizeof(ax25_address)); + } + + return 0; +} + +int ax25_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *ptype) +{ + unsigned char *data = skb->data; + struct sock *make; + struct sock *sk; + int type = 0; + ax25_digi dp; + ax25_cb *ax25; + ax25_address src, dest; + struct sock *raw; + int mine = 0; + + skb->sk = NULL; /* Initially we don't know who its for */ + + if ((*data & 0x0F) != 0) { + kfree_skb(skb, FREE_READ); /* Not a KISS data frame */ + return 0; + } + + data++; + + /* + * Parse the address header. + */ + if ((data = ax25_parse_addr(data, skb->len + dev->hard_header_len - 1, &src, &dest, &dp, &type)) == NULL) { + kfree_skb(skb, FREE_READ); + return 0; + } + + /* + * Send the frame to the AX.25 auto-router + */ + ax25_rt_rx_frame(&src, dev); + + /* + * Ours perhaps ? + */ + if (dp.lastrepeat + 1 < dp.ndigi) { /* Not yet digipeated completely */ + if (ax25cmp(&dp.calls[dp.lastrepeat + 1], (ax25_address *)dev->dev_addr) == 0) { + /* We are the digipeater. Mark ourselves as repeated + and throw the packet back out of the same device */ + dp.lastrepeat++; + dp.repeated[(int)dp.lastrepeat] = 1; +#ifdef CONFIG_AX25_XDIGI + while (dp.lastrepeat + 1 < dp.ndigi) { + struct device *dev_scan; + if ((dev_scan = ax25rtr_get_dev(&dp.calls[dp.lastrepeat + 1])) == NULL) + break; + dp.lastrepeat++; + dp.repeated[(int)dp.lastrepeat] = 1; + dev = dev_scan; + } +#endif + build_ax25_addr(skb->data + 1, &src, &dest, &dp, type); + skb->len += dev->hard_header_len; + skb->arp = 1; + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + } else { + kfree_skb(skb, FREE_READ); + } + + return 0; + } + + /* + * Adjust the lengths for digipeated input + */ + skb->len -= sizeof(ax25_address) * dp.ndigi; + + /* For our port addreses ? */ + if (ax25cmp(&dest, (ax25_address *)dev->dev_addr) == 0) + mine = 1; + +#ifdef CONFIG_NETROM + /* Also match on any NET/ROM callsign */ + if (!mine && nr_dev_get(&dest) != NULL) + mine = 1; +#endif + + if ((*data & ~0x10) == LAPB_UI) { /* UI frame - bypass LAPB processing */ + data++; + skb->h.raw = data + 1; /* skip pid */ + + if ((raw = ax25_addr_match(&dest)) != NULL) + ax25_send_to_raw(raw, skb, (int)*data); + + if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) { + kfree_skb(skb, FREE_READ); + return 0; + } + + /* Now we are pointing at the pid byte */ + switch (*data++) { +#ifdef CONFIG_INET + case AX25_P_IP: + ax25_ip_mode_set(&src, dev, 'D'); + ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ + break; + + case AX25_P_ARP: + arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ + break; +#endif + case AX25_P_TEXT: + /* Now find a suitable dgram socket */ + if ((sk = ax25_find_socket(&dest, &src, SOCK_DGRAM)) != NULL) { + if (sk->rmem_alloc >= sk->rcvbuf) { + kfree_skb(skb, FREE_READ); + } else { + skb_queue_tail(&sk->receive_queue, skb); + skb->sk = sk; + sk->rmem_alloc += skb->mem_len; + if (!sk->dead) + sk->data_ready(sk, skb->len - 2); + } + } else { + kfree_skb(skb, FREE_READ); + } + break; + + default: + kfree_skb(skb, FREE_READ); /* Will scan SOCK_AX25 RAW sockets */ + break; + } + + return 0; + } + + /* LAPB */ + if ((ax25 = ax25_find_cb(&dest, &src, dev)) != NULL) { + skb->h.raw = data; + /* Process the frame. If it is queued up internally it returns one otherwise we + free it immediately. This routine itself wakes the user context layers so we + do no further work */ + if (ax25_process_rx_frame(ax25, skb, type) == 0) + kfree_skb(skb, FREE_READ); + + return 0; + } + + if ((data[0] & 0xEF) != SABM) { + /* + * Never reply to a DM. Also ignore any connects for + * addresses that are not our interfaces and not a socket. + */ + if ((data[0] & 0xEF) != DM && mine) + ax25_return_dm(dev, &src, &dest, &dp); + + kfree_skb(skb, FREE_READ); + return 0; + } + + if ((sk = ax25_find_listener(&dest, dev, SOCK_SEQPACKET)) != NULL) { + if (sk->ack_backlog == sk->max_ack_backlog || (make = ax25_make_new(sk, dev)) == NULL) { + if (mine) + ax25_return_dm(dev, &src, &dest, &dp); + + kfree_skb(skb, FREE_READ); + return 0; + } + + ax25 = make->ax25; + + /* + * Sort out any digipeated paths. + */ + if (dp.ndigi != 0 && ax25->digipeat == NULL && (ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { + kfree_skb(skb, FREE_READ); + ax25_destroy_socket(ax25); + return 0; + } + + if (dp.ndigi == 0) { + if (ax25->digipeat != NULL) { + kfree_s(ax25->digipeat, sizeof(ax25_digi)); + ax25->digipeat = NULL; + } + } else { + /* Reverse the source SABM's path */ + ax25_digi_invert(&dp, ax25->digipeat); + } + + skb_queue_head(&sk->receive_queue, skb); + + skb->sk = make; + make->state = TCP_ESTABLISHED; + make->pair = sk; + + sk->ack_backlog++; + } else { +#ifdef CONFIG_NETROM + if (!mine) { + kfree_skb(skb, FREE_READ); + return 0; + } + + if (dp.ndigi != 0) { + ax25_return_dm(dev, &src, &dest, &dp); + kfree_skb(skb, FREE_READ); + return 0; + } + + if ((ax25 = ax25_create_cb()) == NULL) { + ax25_return_dm(dev, &src, &dest, &dp); + kfree_skb(skb, FREE_READ); + return 0; + } +#else + if (mine) + ax25_return_dm(dev, &src, &dest, &dp); + + kfree_skb(skb, FREE_READ); + return 0; +#endif + } + + memcpy(&ax25->source_addr, &dest, sizeof(ax25_address)); + memcpy(&ax25->dest_addr, &src, sizeof(ax25_address)); + + ax25->device = dev; + + ax25_send_control(ax25, UA | PF, C_RESPONSE); + + ax25->t3timer = ax25->t3; + ax25->state = AX25_STATE_3; + + ax25_insert_socket(ax25); + + ax25_set_timer(ax25); + + if (sk != NULL) { + if (!sk->dead) + sk->data_ready(sk, skb->len - 2); + } else { + kfree_skb(skb, FREE_READ); + } + + return 0; +} + +static int ax25_sendto(struct socket *sock, void *ubuf, int len, int noblock, + unsigned flags, struct sockaddr *usip, int addr_len) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)usip; + unsigned char *uaddr = (unsigned char *)usip; + int err; + struct sockaddr_ax25 sax; + struct sk_buff *skb; + unsigned char *asmptr; + int size; + ax25_digi *dp; + ax25_digi dtmp; + int lv; + + if (sk->err) { + err = sk->err; + sk->err = 0; + return -err; + } + + if (flags) + return -EINVAL; + + if (sk->zapped) + return -EADDRNOTAVAIL; + + if (sk->ax25->device == NULL) + return -ENETUNREACH; + + if (usax) { + int ndigi = addr_len - sizeof(sax); + if (addr_len < sizeof(sax)) + return -EINVAL; + + /* Trailing digipeaters on address ?? */ + if (addr_len > sizeof(sax)) { + int ct = 0; + + ax25_address *ap = (ax25_address *)(((char *)uaddr) + sizeof(sax)); + /* Size is an exact number of digipeaters ? */ + if (ndigi % sizeof(ax25_address)) + return -EINVAL; + ndigi /= sizeof(ax25_address); + + /* Valid number of digipeaters ? */ + if (ndigi < 1 || ndigi > 6) + return -EINVAL; + + /* Copy data into digipeat structure */ + while (ct < ndigi) { + dtmp.repeated[ct] = 0; + memcpy(&dtmp.calls[ct], &ap[ct], sizeof(ax25_address)); + ct++; + } + + dtmp.lastrepeat = 0; + dtmp.ndigi = ndigi; + addr_len -= ndigi * sizeof(ax25_address); + } + + memcpy(&sax, usax, sizeof(sax)); + if (sk->type == SOCK_SEQPACKET && memcmp(&sk->ax25->dest_addr, &sax.sax25_call, sizeof(ax25_address)) != 0) + return -EISCONN; + if (sax.sax25_family != AF_AX25) + return -EINVAL; + if (ndigi != 0) + dp = &dtmp; + else + dp = NULL; + } else { + if (sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + sax.sax25_family = AF_AX25; + memcpy(&sax.sax25_call, &sk->ax25->dest_addr, sizeof(ax25_address)); + dp = sk->ax25->digipeat; + } + + if (sk->debug) + printk("AX.25: sendto: Addresses built.\n"); + + /* Build a packet */ + if (sk->debug) + printk("AX.25: sendto: building packet.\n"); + + size = 2 + len + 1 + size_ax25_addr(dp); + /* 2 bytes for PID and (U)I frame byte: 15+ for KISS data & calls */ + + if ((skb = sock_alloc_send_skb(sk, size, 0, &err)) == NULL) + return err; + + skb->sk = sk; + skb->free = 1; + skb->arp = 1; + skb->len = size; + + asmptr = skb->data; + if (sk->debug) { + printk("Building AX.25 Header (dp=%p).\n", dp); + if (dp != 0) + printk("Num digipeaters=%d\n", dp->ndigi); + } + + /* Build an AX.25 header */ + *asmptr++ = 0; /* KISS data */ + asmptr += (lv = build_ax25_addr(asmptr, &sk->ax25->source_addr, &sax.sax25_call, dp, C_COMMAND)); + if (sk->debug) + printk("Built header (%d bytes)\n",lv); + skb->h.raw = asmptr; + + if (sk->debug) + printk("base=%p pos=%p\n", skb->data, asmptr); + *asmptr++ = LAPB_UI; /* Datagram - will get replaced for I frames */ + *asmptr++ = sk->protocol; /* AX.25 TEXT by default */ + + if (sk->debug) + printk("AX.25: Appending user data\n"); + + /* User data follows immediately after the AX.25 data */ + memcpy_fromfs(asmptr, ubuf, len); + if (sk->debug) + printk("AX.25: Transmitting buffer\n"); + if (sk->type == SOCK_SEQPACKET) { + /* Connected mode sockets go via the LAPB machine */ + if (sk->state != TCP_ESTABLISHED) { + kfree_skb(skb, FREE_WRITE); + return -ENOTCONN; + } + ax25_output(sk->ax25, skb); /* Shove it onto the queue and kick */ + return len; + } else { + /* Datagram frames go straight out of the door as UI */ + dev_queue_xmit(skb, sk->ax25->device, SOPRI_NORMAL); + return len; + } +} + +static int ax25_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) +{ + return ax25_sendto(sock, ubuf, size, noblock, flags, NULL, 0); +} + +static int ax25_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ax25_send(sock, ubuf, size, noblock, 0); +} + +static int ax25_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sip, int *addr_len) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)sip; + char *addrptr = (char *)sip; + int copied = 0; + struct sk_buff *skb; + int er; + + if (sk->err) { + er = -sk->err; + sk->err = 0; + return er; + } + + if (addr_len != NULL) + *addr_len = sizeof(*sax); + + /* This works for seqpacket too. The receiver has ordered the queue for us! We do one quick check first though */ + if (sk->type == SOCK_SEQPACKET && sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + + /* Now we can treat all alike */ + if ((skb = skb_recv_datagram(sk, flags, noblock, &er)) == NULL) + return er; + + copied= (size < skb->len) ? size : skb->len; + skb_copy_datagram(skb, sk->type == SOCK_SEQPACKET ? 2 : 0, ubuf, copied); + + if (sax) { + struct sockaddr_ax25 addr; + ax25_digi digi; + ax25_address dest; + unsigned char *dp = skb->data; + int ct = 0; + + ax25_parse_addr(dp, skb->len, NULL, &dest, &digi, NULL); + addr.sax25_family = AF_AX25; + memcpy(&addr.sax25_call, &dest, sizeof(ax25_address)); + memcpy(sax,&addr, sizeof(*sax)); + addrptr += sizeof(*sax); + + while (ct < digi.ndigi) { + memcpy(addrptr, &digi. calls[ct], 7); + addrptr += 7; + ct++; + } + if (addr_len) + *addr_len = sizeof(*sax) + 7 * digi.ndigi; + } + + skb_free_datagram(skb); + + return copied; +} + +static int ax25_recv(struct socket *sock, void *ubuf, int size , int noblock, + unsigned flags) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk->zapped) + return -ENOTCONN; + + return ax25_recvfrom(sock, ubuf, size, noblock, flags, NULL, NULL); +} + +static int ax25_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ax25_recv(sock, ubuf, size, noblock, 0); +} + +static int ax25_shutdown(struct socket *sk, int how) +{ + /* FIXME - generate DM and RNR states */ + return -EOPNOTSUPP; +} + +static int ax25_select(struct socket *sock , int sel_type, select_table *wait) +{ + struct sock *sk = (struct sock *)sock->data; + + return datagram_select(sk, sel_type, wait); +} + +static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = (struct sock *)sock->data; + int err; + long amount = 0; + + switch (cmd) { + case TIOCOUTQ: + if ((err = verify_area(VERIFY_WRITE, (void *)arg, sizeof(unsigned long))) != 0) + return err; + amount = sk->sndbuf - sk->wmem_alloc; + if (amount < 0) + amount = 0; + put_fs_long(amount, (unsigned long *)arg); + return 0; + + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if ((skb = skb_peek(&sk->receive_queue)) != NULL) + amount = skb->len; + if ((err = verify_area(VERIFY_WRITE, (void *)arg, sizeof(unsigned long))) != 0) + return err; + put_fs_long(amount, (unsigned long *)arg); + return 0; + } + + case SIOCGSTAMP: + if (sk != NULL) { + if (sk->stamp.tv_sec==0) + return -ENOENT; + if ((err = verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval))) != 0) + return err; + memcpy_tofs((void *)arg, &sk->stamp, sizeof(struct timeval)); + return 0; + } + return -EINVAL; + + case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ + case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */ + case SIOCAX25GETUID: + { + struct sockaddr_ax25 sax25; + if ((err = verify_area(VERIFY_READ, (void *)arg, sizeof(struct sockaddr_ax25))) != 0) + return err; + memcpy_fromfs(&sax25, (void *)arg, sizeof(sax25)); + return ax25_uid_ioctl(cmd, &sax25); + } + + case SIOCAX25NOUID: /* Set the default policy (default/bar) */ + if ((err = verify_area(VERIFY_READ, (void *)arg, sizeof(unsigned long))) != 0) + return err; + if(!suser()) + return -EPERM; + amount = get_fs_long((void *)arg); + if (amount > AX25_NOUID_BLOCK) + return -EINVAL; + ax25_uid_policy = amount; + return 0; + + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + return -EINVAL; + + default: + return(dev_ioctl(cmd, (void *)arg)); + } + + /*NOTREACHED*/ + return(0); +} + +int ax25_get_info(char *buffer, char **start, off_t offset, int length) +{ + ax25_cb *ax25; + struct device *dev; + char *devname; + int len = 0; + off_t pos = 0; + off_t begin = 0; + + cli(); + + len += sprintf(buffer, "dest_addr src_addr dev st vs vr va t1 t2 t3 n2 rtt wnd Snd-Q Rcv-Q\n"); + + for (ax25 = ax25_list; ax25 != NULL; ax25 = ax25->next) { + if ((dev = ax25->device) == NULL) + devname = "???"; + else + devname = dev->name; + + len += sprintf(buffer + len, "%-9s ", + ax2asc(&ax25->dest_addr)); + len += sprintf(buffer + len, "%-9s %-3s %2d %2d %2d %2d %3d/%03d %2d/%02d %3d/%03d %2d/%02d %3d %3d", + ax2asc(&ax25->source_addr), devname, + ax25->state, + ax25->vs, ax25->vr, ax25->va, + ax25->t1timer / PR_SLOWHZ, + ax25->t1 / PR_SLOWHZ, + ax25->t2timer / PR_SLOWHZ, + ax25->t2 / PR_SLOWHZ, + ax25->t3timer / PR_SLOWHZ, + ax25->t3 / PR_SLOWHZ, + ax25->n2count, ax25->n2, + ax25->rtt / PR_SLOWHZ, + ax25->window); + + if (ax25->sk != NULL) { + len += sprintf(buffer + len, " %5ld %5ld\n", + ax25->sk->wmem_alloc, + ax25->sk->rmem_alloc); + } else { + len += sprintf(buffer + len, "\n"); + } + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= (offset - begin); + + if (len > length) len = length; + + return(len); +} + +static struct proto_ops ax25_proto_ops = { + AF_AX25, + + ax25_create, + ax25_dup, + ax25_release, + ax25_bind, + ax25_connect, + ax25_socketpair, + ax25_accept, + ax25_getname, + ax25_read, + ax25_write, + ax25_select, + ax25_ioctl, + ax25_listen, + ax25_send, + ax25_recv, + ax25_sendto, + ax25_recvfrom, + ax25_shutdown, + ax25_setsockopt, + ax25_getsockopt, + ax25_fcntl, +}; + +/* Called by socket.c on kernel start up */ + +static struct packet_type ax25_packet_type = +{ + 0, /* MUTTER ntohs(ETH_P_AX25),*/ + 0, /* copy */ + ax25_rcv, + NULL, + NULL, +}; + +static struct notifier_block ax25_dev_notifier = { + ax25_device_event, + 0 +}; + +void ax25_proto_init(struct net_proto *pro) +{ + sock_register(ax25_proto_ops.family, &ax25_proto_ops); + ax25_packet_type.type = htons(ETH_P_AX25); + dev_add_pack(&ax25_packet_type); + register_netdevice_notifier(&ax25_dev_notifier); + printk("GW4PTS/G4KLX AX.25 for Linux. Version 0.29 ALPHA for Linux NET3.029 (Linux 1.3.0)\n"); +} + +/*******************************************************************************************************************\ +* * +* Driver encapsulation support: Moved out of SLIP because a) it should be here * +* b) for HDLC cards * +* * +\*******************************************************************************************************************/ + +/* + * Shove an AX.25 UI header on an IP packet and handle ARP + */ + +#ifdef CONFIG_INET + +int ax25_encapsulate(unsigned char *buff, struct device *dev, unsigned short type, void *daddr, + void *saddr, unsigned len, struct sk_buff *skb) +{ + /* header is an AX.25 UI frame from us to them */ + *buff++ = 0; /* KISS DATA */ + + if (daddr != NULL) + memcpy(buff, daddr, dev->addr_len); /* Address specified */ + buff[6] &= ~LAPB_C; + buff[6] &= ~LAPB_E; + buff[6] |= SSID_SPARE; + buff += 7; + + if (saddr != NULL) + memcpy(buff, saddr, dev->addr_len); + else + memcpy(buff, dev->dev_addr, dev->addr_len); + + buff[6] &= ~LAPB_C; + buff[6] |= LAPB_E; + buff[6] |= SSID_SPARE; + buff += 7; + *buff++ = LAPB_UI; /* UI */ + + /* Append a suitable AX.25 PID */ + switch (type) { + case ETH_P_IP: + *buff++ = AX25_P_IP; + break; + + case ETH_P_ARP: + *buff++ = AX25_P_ARP; + break; + + default: + *buff++ = 0; + break; + } + + if (daddr != NULL) + return 17; + + return -17; /* Unfinished header */ +} + +int ax25_rebuild_header(unsigned char *bp, struct device *dev, unsigned long dest, struct sk_buff *skb) +{ + if (arp_find(bp + 1, dest, dev, dev->pa_addr, skb)) + return 1; + + bp[7] &= ~LAPB_C; + bp[7] &= ~LAPB_E; + bp[7] |= SSID_SPARE; + bp[14] &= ~LAPB_C; + bp[14] |= LAPB_E; + bp[14] |= SSID_SPARE; + + return 0; +} + +#endif + +#endif diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c new file mode 100644 index 000000000..ab22a8f6d --- /dev/null +++ b/net/ax25/ax25_in.c @@ -0,0 +1,591 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Most of this code is based on the SDL diagrams published in the 7th + * ARRL Computer Networking Conference papers. The diagrams have mistakes + * in them, but are mostly correct. Before you modify the code could you + * read the SDL diagrams as the code is not obvious and probably very + * easy to break; + * + * History + * AX.25 028a Jonathan(G4KLX) New state machine based on SDL diagrams. + * AX.25 028b Jonathan(G4KLX) Extracted AX25 control block from + * the sock structure. + * AX.25 029 Alan(GW4PTS) Switched to KA9Q constant names. + * Jonathan(G4KLX) Added IP mode registration. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/ip.h> /* For ip_rcv */ +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#ifdef CONFIG_NETROM +#include <net/netrom.h> +#endif + +/* + * This is where all valid I frames are sent to, to be dispatched to + * whichever protocol requires them. + */ +static int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb, unsigned char *iframe) +{ + int queued = 0; + + switch (iframe[1]) { +#ifdef CONFIG_NETROM + case AX25_P_NETROM: + /* We can't handle digipeated NET/ROM frames */ + if (ax25->digipeat == NULL) + queued = nr_route_frame(skb, ax25->device); + break; +#endif +#ifdef CONFIG_INET + case AX25_P_IP: + ax25_ip_mode_set(&ax25->dest_addr, ax25->device, 'V'); + skb->h.raw = ((char *)(iframe)) + 2; + skb->len -= 2; + ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ + queued = 1; + break; +#endif + case AX25_P_TEXT: + if (ax25->sk != NULL) { + if (sock_queue_rcv_skb(ax25->sk, skb) == 0) { + queued = 1; + } else { + ax25->condition |= OWN_RX_BUSY_CONDITION; + } + } + break; + + default: + break; + } + + return queued; +} + +/* + * State machine for state 1, Awaiting Connection State. + * The handling of the timer(s) is in file ax25_timer.c. + * Handling of state 0 and connection release is in ax25.c. + */ +static int ax25_state1_machine(ax25_cb *ax25, struct sk_buff *skb, unsigned char *frame, int frametype, int type) +{ + int pf = frame[0] & PF; + + switch (frametype) { + case SABM: + ax25_send_control(ax25, UA | pf, C_RESPONSE); + break; + + case DISC: + ax25_send_control(ax25, DM | pf, C_RESPONSE); + break; + + case UA: + if (pf) { + ax25_calculate_rtt(ax25); + ax25->t1timer = 0; + ax25->t3timer = ax25->t3; + ax25->vs = 0; + ax25->va = 0; + ax25->vr = 0; + ax25->state = AX25_STATE_3; + ax25->n2count = 0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_ESTABLISHED; + /* For WAIT_SABM connections we will produce an accept ready socket here */ + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + } + } + break; + + case DM: + if (pf) { + ax25_clear_tx_queue(ax25); + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ECONNREFUSED; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } + break; + + default: + break; + } + + return 0; +} + +/* + * State machine for state 2, Awaiting Release State. + * The handling of the timer(s) is in file ax25_timer.c + * Handling of state 0 and connection release is in ax25.c. + */ +static int ax25_state2_machine(ax25_cb *ax25, struct sk_buff *skb, unsigned char *frame, int frametype, int type) +{ + int pf = frame[0] & PF; + + switch (frametype) { + case SABM: + ax25_send_control(ax25, DM | pf, C_RESPONSE); + break; + + case DISC: + ax25_send_control(ax25, UA | pf, C_RESPONSE); + break; + + case UA: + if (pf) { + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = 0; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } + break; + + case DM: + if (pf) { + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = 0; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } + break; + + case I: + case REJ: + case RNR: + case RR: + if (pf) + ax25_send_control(ax25, DM | PF, C_RESPONSE); + break; + + default: + break; + } + + return 0; +} + +/* + * State machine for state 3, Connected State. + * The handling of the timer(s) is in file ax25_timer.c + * Handling of state 0 and connection release is in ax25.c. + */ +static int ax25_state3_machine(ax25_cb *ax25, struct sk_buff *skb, unsigned char *frame, int frametype, int type) +{ + unsigned short nr = (frame[0] >> 5) & 7; + unsigned short ns = (frame[0] >> 1) & 7; + int pf = frame[0] & PF; + int queued = 0; + + switch (frametype) { + case SABM: + ax25_send_control(ax25, UA | pf, C_RESPONSE); + ax25->condition = 0x00; + ax25->t1timer = 0; + ax25->t3timer = ax25->t3; + ax25->vs = 0; + ax25->va = 0; + ax25->vr = 0; + break; + + case DISC: + ax25_clear_tx_queue(ax25); + ax25_send_control(ax25, UA | pf, C_RESPONSE); + ax25->t3timer = 0; + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = 0; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + break; + + case UA: + ax25_establish_data_link(ax25); + ax25->state = AX25_STATE_1; + break; + + case DM: + ax25_clear_tx_queue(ax25); + ax25->t3timer = 0; + ax25->state = AX25_STATE_0; + if (ax25->sk) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ECONNRESET; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + break; + + case RNR: + ax25->condition |= PEER_RX_BUSY_CONDITION; + ax25_check_need_response(ax25, type, pf); + if (ax25_validate_nr(ax25, nr)) { + ax25_check_iframes_acked(ax25, nr); + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case RR: + ax25->condition &= ~PEER_RX_BUSY_CONDITION; + ax25_check_need_response(ax25, type, pf); + if (ax25_validate_nr(ax25, nr)) { + ax25_check_iframes_acked(ax25, nr); + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case REJ: + ax25->condition &= ~PEER_RX_BUSY_CONDITION; + ax25_check_need_response(ax25, type, pf); + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + ax25_calculate_rtt(ax25); + ax25->t1timer = 0; + ax25->t3timer = ax25->t3; + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case I: + if (type != C_COMMAND) + break; + if (!ax25_validate_nr(ax25, nr)) { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + break; + } + if (ax25->condition & PEER_RX_BUSY_CONDITION) { + ax25_frames_acked(ax25, nr); + } else { + ax25_check_iframes_acked(ax25, nr); + } + if (ax25->condition & OWN_RX_BUSY_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + break; + } + if (ns == ax25->vr) { + queued = ax25_rx_iframe(ax25, skb, frame); + if (ax25->condition & OWN_RX_BUSY_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + break; + } + ax25->vr = (ax25->vr + 1) % MODULUS; + ax25->condition &= ~REJECT_CONDITION; + if (pf) { + ax25_enquiry_response(ax25); + } else { + if (!(ax25->condition & ACK_PENDING_CONDITION)) { + ax25->t2timer = ax25->t2; + ax25->condition |= ACK_PENDING_CONDITION; + } + } + } else { + if (ax25->condition & REJECT_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + } else { + ax25->condition |= REJECT_CONDITION; + ax25_send_control(ax25, REJ | pf, C_RESPONSE); + ax25->condition &= ~ACK_PENDING_CONDITION; + } + } + break; + + case FRMR: + case ILLEGAL: + ax25_establish_data_link(ax25); + ax25->state = AX25_STATE_1; + break; + + default: + break; + } + + return queued; +} + +/* + * State machine for state 4, Timer Recovery State. + * The handling of the timer(s) is in file ax25_timer.c + * Handling of state 0 and connection release is in ax25.c. + */ +static int ax25_state4_machine(ax25_cb *ax25, struct sk_buff *skb, unsigned char *frame, int frametype, int type) +{ + unsigned short nr = (frame[0] >> 5) & 7; + unsigned short ns = (frame[0] >> 1) & 7; + int pf = frame[0] & PF; + int queued = 0; + + switch (frametype) { + case SABM: + ax25_send_control(ax25, UA | pf, C_RESPONSE); + ax25->condition = 0x00; + ax25->t1timer = 0; + ax25->t3timer = ax25->t3; + ax25->vs = 0; + ax25->va = 0; + ax25->vr = 0; + ax25->state = AX25_STATE_3; + ax25->n2count = 0; + break; + + case DISC: + ax25_clear_tx_queue(ax25); + ax25_send_control(ax25, UA | pf, C_RESPONSE); + ax25->t3timer = 0; + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = 0; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + break; + + case UA: + ax25_establish_data_link(ax25); + ax25->state = AX25_STATE_1; + break; + + case DM: + ax25_clear_tx_queue(ax25); + ax25->t3timer = 0; + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ECONNRESET; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + break; + + case RNR: + ax25->condition |= PEER_RX_BUSY_CONDITION; + if (type == C_RESPONSE && pf) { + ax25->t1timer = 0; + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + if (ax25->vs == ax25->va) { + ax25->t3timer = ax25->t3; + ax25->n2count = 0; + ax25->state = AX25_STATE_3; + } + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + } + if (type == C_COMMAND && pf) + ax25_enquiry_response(ax25); + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case RR: + ax25->condition &= ~PEER_RX_BUSY_CONDITION; + if (type == C_RESPONSE && pf) { + ax25->t1timer = 0; + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + if (ax25->vs == ax25->va) { + ax25->t3timer = ax25->t3; + ax25->n2count = 0; + ax25->state = AX25_STATE_3; + } + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + } + if (type == C_COMMAND && pf) + ax25_enquiry_response(ax25); + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case REJ: + ax25->condition &= ~PEER_RX_BUSY_CONDITION; + if (type == C_RESPONSE && pf) { + ax25->t1timer = 0; + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + if (ax25->vs == ax25->va) { + ax25->t3timer = ax25->t3; + ax25->n2count = 0; + ax25->state = AX25_STATE_3; + } + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + } + if (type == C_COMMAND && pf) + ax25_enquiry_response(ax25); + if (ax25_validate_nr(ax25, nr)) { + ax25_frames_acked(ax25, nr); + } else { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + } + break; + + case I: + if (type != C_COMMAND) + break; + if (!ax25_validate_nr(ax25, nr)) { + ax25_nr_error_recovery(ax25); + ax25->state = AX25_STATE_1; + break; + } + ax25_frames_acked(ax25, nr); + if (ax25->condition & OWN_RX_BUSY_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + break; + } + if (ns == ax25->vr) { + queued = ax25_rx_iframe(ax25, skb, frame); + if (ax25->condition & OWN_RX_BUSY_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + break; + } + ax25->vr = (ax25->vr + 1) % MODULUS; + ax25->condition &= ~REJECT_CONDITION; + if (pf) { + ax25_enquiry_response(ax25); + } else { + if (!(ax25->condition & ACK_PENDING_CONDITION)) { + ax25->t2timer = ax25->t2; + ax25->condition |= ACK_PENDING_CONDITION; + } + } + } else { + if (ax25->condition & REJECT_CONDITION) { + if (pf) ax25_enquiry_response(ax25); + } else { + ax25->condition |= REJECT_CONDITION; + ax25_send_control(ax25, REJ | pf, C_RESPONSE); + ax25->condition &= ~ACK_PENDING_CONDITION; + } + } + break; + + case FRMR: + case ILLEGAL: + ax25_establish_data_link(ax25); + ax25->state = AX25_STATE_1; + break; + + default: + break; + } + + return queued; +} + +/* + * Higher level upcall for a LAPB frame + */ +int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type) +{ + int queued = 0, frametype; + unsigned char *frame; + + del_timer(&ax25->timer); + + frame = skb->h.raw; + + frametype = ax25_decode(frame); + + switch (ax25->state) { + case AX25_STATE_1: + queued = ax25_state1_machine(ax25, skb, frame, frametype, type); + break; + case AX25_STATE_2: + queued = ax25_state2_machine(ax25, skb, frame, frametype, type); + break; + case AX25_STATE_3: + queued = ax25_state3_machine(ax25, skb, frame, frametype, type); + break; + case AX25_STATE_4: + queued = ax25_state4_machine(ax25, skb, frame, frametype, type); + break; + default: + printk("ax25_process_rx_frame: frame received - state = %d\n", ax25->state); + break; + } + + ax25_set_timer(ax25); + + return(queued); +} + +#endif diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c new file mode 100644 index 000000000..73cd056c7 --- /dev/null +++ b/net/ax25/ax25_out.c @@ -0,0 +1,235 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Most of this code is based on the SDL diagrams published in the 7th + * ARRL Computer Networking Conference papers. The diagrams have mistakes + * in them, but are mostly correct. Before you modify the code could you + * read the SDL diagrams as the code is not obvious and probably very + * easy to break; + * + * History + * AX.25 028a Jonathan(G4KLX) New state machine based on SDL diagrams. + * AX.25 029 Alan(GW4PTS) Switched to KA9Q constant names. + * Jonathan(G4KLX) Only poll when window is full. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +int ax25_output(ax25_cb *ax25, struct sk_buff *skb) +{ + skb_queue_tail(&ax25->write_queue, skb); /* Throw it on the queue */ + + if (ax25->state == AX25_STATE_3 || ax25->state == AX25_STATE_4) + ax25_kick(ax25); + + return 0; +} + +/* + * This procedure is passed a buffer descriptor for an iframe. It builds + * the rest of the control part of the frame and then writes it out. + */ +static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit) +{ + unsigned char *frame; + + if (skb == NULL) + return; + + frame = skb->h.raw; /* KISS + header */ + + *frame = I; + *frame |= poll_bit; + *frame |= (ax25->vr << 5); + *frame |= (ax25->vs << 1); + + ax25_transmit_buffer(ax25, skb, C_COMMAND); +} + +void ax25_kick(ax25_cb *ax25) +{ + struct sk_buff *skb, *skbn; + int last = 1; + unsigned short start, end, next; + + del_timer(&ax25->timer); + + start = (skb_peek(&ax25->ack_queue) == NULL) ? ax25->va : ax25->vs; + end = (ax25->va + ax25->window) % MODULUS; + + if (!(ax25->condition & PEER_RX_BUSY_CONDITION) && + start != end && + skb_peek(&ax25->write_queue) != NULL) { + + ax25->vs = start; + + /* + * Transmit data until either we're out of data to send or + * the window is full. Send a poll on the final I frame if + * the window is filled. + */ + do { + /* + * Dequeue the frame and copy it. + */ + skb = skb_dequeue(&ax25->write_queue); + + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skb_queue_head(&ax25->write_queue, skb); + return; + } + + next = (ax25->vs + 1) % MODULUS; +#ifdef notdef + last = (next == end) || skb_peek(&ax25->write_queue) == NULL; +#else + last = (next == end); +#endif + /* + * Transmit the frame copy. + */ + ax25_send_iframe(ax25, skbn, (last) ? PF : 0); + + ax25->vs = next; + + /* + * Requeue the original data frame. + */ + skb_queue_tail(&ax25->ack_queue, skb); +#ifdef notdef + } while (!last); +#else + } while (!last && skb_peek(&ax25->write_queue) != NULL); +#endif + ax25->condition &= ~ACK_PENDING_CONDITION; + + if (ax25->t1timer == 0) { + ax25->t3timer = 0; + ax25->t1timer = ax25->t1 = ax25_calculate_t1(ax25); + } + } + + ax25_set_timer(ax25); +} + +void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) +{ + unsigned char *ptr = skb->data; + + if (ax25->device == NULL) { + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ENETUNREACH; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + return; + } + + *ptr++ = 0; /* KISS data */ + ptr += build_ax25_addr(ptr, &ax25->source_addr, &ax25->dest_addr, ax25->digipeat, type); + + skb->arp = 1; + + dev_queue_xmit(skb, ax25->device, SOPRI_NORMAL); +} + +/* + * The following routines are taken from page 170 of the 7th ARRL Computer + * Networking Conference paper, as is the whole state machine. + */ + +void ax25_nr_error_recovery(ax25_cb *ax25) +{ + ax25_establish_data_link(ax25); +} + +void ax25_establish_data_link(ax25_cb *ax25) +{ + ax25->condition = 0x00; + ax25->n2count = 0; + + ax25_send_control(ax25, SABM | PF, C_COMMAND); + + ax25->t3timer = 0; + ax25->t2timer = 0; + ax25->t1timer = ax25->t1 = ax25_calculate_t1(ax25); +} + +void ax25_transmit_enquiry(ax25_cb *ax25) +{ + if (ax25->condition & OWN_RX_BUSY_CONDITION) + ax25_send_control(ax25, RNR | PF, C_COMMAND); + else + ax25_send_control(ax25, RR | PF, C_COMMAND); + + ax25->condition &= ~ACK_PENDING_CONDITION; + + ax25->t1timer = ax25->t1 = ax25_calculate_t1(ax25); +} + +void ax25_enquiry_response(ax25_cb *ax25) +{ + if (ax25->condition & OWN_RX_BUSY_CONDITION) + ax25_send_control(ax25, RNR | PF, C_RESPONSE); + else + ax25_send_control(ax25, RR | PF, C_RESPONSE); + + ax25->condition &= ~ACK_PENDING_CONDITION; +} + +void ax25_check_iframes_acked(ax25_cb *ax25, unsigned short nr) +{ + if (ax25->vs == nr) { + ax25_frames_acked(ax25, nr); + ax25_calculate_rtt(ax25); + ax25->t1timer = 0; + ax25->t3timer = ax25->t3; + } else { + if (ax25->va != nr) { + ax25_frames_acked(ax25, nr); + ax25->t1timer = ax25->t1 = ax25_calculate_t1(ax25); + } + } +} + +void ax25_check_need_response(ax25_cb *ax25, int type, int pf) +{ + if (type == C_COMMAND && pf) + ax25_enquiry_response(ax25); +} + +#endif diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c new file mode 100644 index 000000000..b0ffcea58 --- /dev/null +++ b/net/ax25/ax25_route.c @@ -0,0 +1,288 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Other kernels modules in this kit are generally BSD derived. See the copyright headers. + * + * + * History + * AX.25 020 Jonathan(G4KLX) First go. + * AX.25 022 Jonathan(G4KLX) Added the actual meat to this - we now have a nice mheard list. + * AX.25 025 Alan(GW4PTS) First cut at autobinding by route scan. + * AX.25 028b Jonathan(G4KLX) Extracted AX25 control block from the + * sock structure. Device removal now + * removes the heard structure. + * AX.25 029 Steven(GW7RRM) Added /proc information for uid/callsign mapping. + * Jonathan(G4KLX) Handling of IP mode in the routing list and /proc entry. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +#define AX25_ROUTE_MAX 40 + +static struct ax25_route { + struct ax25_route *next; + ax25_address callsign; + struct device *dev; + struct timeval stamp; + int n; + char ip_mode; +} *ax25_route = NULL; + +void ax25_rt_rx_frame(ax25_address *src, struct device *dev) +{ + unsigned long flags; + extern struct timeval xtime; + struct ax25_route *ax25_rt; + struct ax25_route *oldest; + int count; + + count = 0; + oldest = NULL; + + for (ax25_rt = ax25_route; ax25_rt != NULL; ax25_rt = ax25_rt->next) { + if (count == 0 || ax25_rt->stamp.tv_sec < oldest->stamp.tv_sec) + oldest = ax25_rt; + + if (ax25cmp(&ax25_rt->callsign, src) == 0 && ax25_rt->dev == dev) { + ax25_rt->stamp = xtime; + ax25_rt->n++; + return; + } + + count++; + } + + if (count > AX25_ROUTE_MAX) { + oldest->callsign = *src; + oldest->dev = dev; + oldest->stamp = xtime; + oldest->n = 1; + oldest->ip_mode = ' '; + return; + } + + if ((ax25_rt = (struct ax25_route *)kmalloc(sizeof(struct ax25_route), GFP_ATOMIC)) == NULL) + return; /* No space */ + + ax25_rt->callsign = *src; + ax25_rt->dev = dev; + ax25_rt->stamp = xtime; + ax25_rt->n = 1; + ax25_rt->ip_mode = ' '; + + save_flags(flags); + cli(); + + ax25_rt->next = ax25_route; + ax25_route = ax25_rt; + + restore_flags(flags); +} + +void ax25_rt_device_down(struct device *dev) +{ + struct ax25_route *s, *t, *ax25_rt = ax25_route; + + while (ax25_rt != NULL) { + s = ax25_rt; + ax25_rt = ax25_rt->next; + + if (s->dev == dev) { + if (ax25_route == s) { + ax25_route = s->next; + kfree_s((void *)s, (sizeof *s)); + } else { + for (t = ax25_route; t != NULL; t = t->next) { + if (t->next == s) { + t->next = s->next; + kfree_s((void *)s, sizeof(*s)); + break; + } + } + } + } + } +} + +int ax25_rt_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct ax25_route *ax25_rt; + int len = 0; + off_t pos = 0; + off_t begin = 0; + + cli(); + + len += sprintf(buffer, "callsign dev count time mode\n"); + + for (ax25_rt = ax25_route; ax25_rt != NULL; ax25_rt = ax25_rt->next) { + len += sprintf(buffer + len, "%-9s %-3s %5d %9ld", + ax2asc(&ax25_rt->callsign), + ax25_rt->dev ? ax25_rt->dev->name : "???", + ax25_rt->n, + ax25_rt->stamp.tv_sec); + + switch (ax25_rt->ip_mode) { + case 'V': + case 'v': + len += sprintf(buffer + len, " vc\n"); + break; + case 'D': + case 'd': + len += sprintf(buffer + len, " dg\n"); + break; + default: + len += sprintf(buffer + len, "\n"); + break; + } + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= (offset - begin); + + if (len > length) len = length; + + return len; +} + +int ax25_cs_get_info(char *buffer, char **start, off_t offset, int length) +{ + ax25_uid_assoc *pt; + int len = 0; + off_t pos = 0; + off_t begin = 0; + + cli(); + + len += sprintf(buffer, "Policy: %d\n", ax25_uid_policy); + + for (pt = ax25_uid_list; pt != NULL; pt = pt->next) { + len += sprintf(buffer + len, "%6d %s\n", pt->uid, ax2asc(&pt->call)); + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= offset - begin; + + if (len > length) len = length; + + return len; +} + +/* + * Find what interface to use. + */ +int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) +{ + struct ax25_route *ax25_rt; + ax25_address *call; + + for (ax25_rt = ax25_route; ax25_rt != NULL; ax25_rt = ax25_rt->next) { + if (ax25cmp(&ax25_rt->callsign, addr) == 0) { + /* + * Bind to the physical interface we heard them on. + */ + if ((ax25->device = ax25_rt->dev) == NULL) + continue; + if ((call = ax25_findbyuid(current->euid)) == NULL) { + if (ax25_uid_policy && !suser()) + return -EPERM; + call = (ax25_address *)ax25->device->dev_addr; + } + memcpy(&ax25->source_addr, call, sizeof(ax25_address)); + if (ax25->sk != NULL) + ax25->sk->zapped = 0; + + return 0; + } + } + + return -EINVAL; +} + +/* + * Register the mode of an incoming IP frame. It is assumed that an entry + * already exists in the routing table. + */ +void ax25_ip_mode_set(ax25_address *callsign, struct device *dev, char ip_mode) +{ + struct ax25_route *ax25_rt; + + for (ax25_rt = ax25_route; ax25_rt != NULL; ax25_rt = ax25_rt->next) { + if (ax25cmp(&ax25_rt->callsign, callsign) == 0 && ax25_rt->dev == dev) { + ax25_rt->ip_mode = ip_mode; + return; + } + } +} + +/* + * Return the IP mode of a given callsign/device pair. + */ +char ax25_ip_mode_get(ax25_address *callsign, struct device *dev) +{ + struct ax25_route *ax25_rt; + + for (ax25_rt = ax25_route; ax25_rt != NULL; ax25_rt = ax25_rt->next) + if (ax25cmp(&ax25_rt->callsign, callsign) == 0 && ax25_rt->dev == dev) + return ax25_rt->ip_mode; + + return ' '; +} + +#endif diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c new file mode 100644 index 000000000..2530346e5 --- /dev/null +++ b/net/ax25/ax25_subr.c @@ -0,0 +1,383 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Most of this code is based on the SDL diagrams published in the 7th + * ARRL Computer Networking Conference papers. The diagrams have mistakes + * in them, but are mostly correct. Before you modify the code could you + * read the SDL diagrams as the code is not obvious and probably very + * easy to break; + * + * History + * AX.25 029 Alan(GW4PTS) Switched to KA9Q constant names. Removed + * old BSD code. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +/* + * This routine purges the input queue of frames. + */ +void ax25_clear_tx_queue(ax25_cb *ax25) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ax25->write_queue)) != NULL) { + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } + + while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } +} + +/* + * This routine purges the input queue of those frames that have been + * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the + * SDL diagram. + */ +void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) +{ + struct sk_buff *skb, *skb_prev = NULL; + + /* + * Remove all the ack-ed frames from the ack queue. + */ + if (ax25->va != nr) { + while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) { + skb = skb_dequeue(&ax25->ack_queue); + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + ax25->va = (ax25->va + 1) % MODULUS; + } + } + + /* + * Requeue all the un-ack-ed frames on the output queue to be picked + * up by ax25_kick called from the timer. This arrangement handles the + * possibility of an empty output queue. + */ + while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { + if (skb_prev == NULL) + skb_queue_head(&ax25->write_queue, skb); + else + skb_append(skb_prev, skb); + skb_prev = skb; + } +} + +/* + * Validate that the value of nr is between va and vs. Return true or + * false for testing. + */ +int ax25_validate_nr(ax25_cb *ax25, unsigned short nr) +{ + unsigned short vc = ax25->va; + + while (vc != ax25->vs) { + if (nr == vc) return 1; + vc = (vc + 1) % MODULUS; + } + + if (nr == ax25->vs) return 1; + + return 0; +} + +int ax25_decode(unsigned char *frame) +{ + int frametype = ILLEGAL; + + if ((frame[0] & S) == 0) + frametype = I; /* I frame - carries NR/NS/PF */ + else if ((frame[0] & U) == 1) /* S frame - take out PF/NR */ + frametype = frame[0] & 0x0F; + else if ((frame[0] & U) == 3) /* U frame - take out PF */ + frametype = frame[0] & ~PF; + + return frametype; +} + +/* + * This routine is called when the HDLC layer internally generates a + * command or response for the remote machine ( eg. RR, UA etc. ). + * Only supervisory or unnumbered frames are processed. + */ +void ax25_send_control(ax25_cb *ax25, int frametype, int type) +{ + struct sk_buff *skb; + unsigned char *dptr; + int len; + struct device *dev; + + if ((dev = ax25->device) == NULL) + return; /* Route died */ + + if ((skb = alloc_skb(16 + 1 + size_ax25_addr(ax25->digipeat), GFP_ATOMIC)) == NULL) + return; + + if (ax25->sk != NULL) { + skb->sk = ax25->sk; + ax25->sk->wmem_alloc += skb->mem_len; + } + + dptr = skb->data; + + dptr += 1 + size_ax25_addr(ax25->digipeat); /* KISS byte & 2 calls */ + + /* Assume a response - address structure for DTE */ + len = 1; /* Normal size */ + + if ((frametype & U) == S) /* S frames carry NR */ + frametype |= (ax25->vr << 5); + + *dptr = frametype; + + skb->free = 1; + skb->len = len + size_ax25_addr(ax25->digipeat) + 1; + + ax25_transmit_buffer(ax25, skb, type); +} + +/* + * Send a 'DM' to an unknown connection attempt, or an invalid caller. + * + * Note: src here is the sender, thus its the target of the DM + */ +void ax25_return_dm(struct device *dev, ax25_address *src, ax25_address *dest, ax25_digi *digi) +{ + struct sk_buff *skb; + char *dptr; + ax25_digi retdigi; + int len = 2 + size_ax25_addr(digi); + + if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + return; /* Next SABM will get DM'd */ + + skb->len = len; + + ax25_digi_invert(digi, &retdigi); + + dptr = skb->data + 1 + size_ax25_addr(digi); + skb->sk = NULL; + + *dptr = DM; + + if (dev == NULL) + return; + + dptr = skb->data; + *dptr++ = 0; + dptr += build_ax25_addr(dptr, dest, src, &retdigi, C_RESPONSE); + + skb->arp = 1; + skb->free = 1; + + dev_queue_xmit(skb, dev, SOPRI_NORMAL); +} + +/* + * Exponential backoff for AX.25 + */ +unsigned short ax25_calculate_t1(ax25_cb *ax25) +{ + int t, n; + + for (t = 2, n = 0; n < ax25->n2count; n++) + t *= 2; + + return t * ax25->rtt; +} + +/* + * Calculate the r Round Trip Time + */ +void ax25_calculate_rtt(ax25_cb *ax25) +{ + if (ax25->n2count == 0) + ax25->rtt = (9 * ax25->rtt + ax25->t1 - ax25->t1timer) / 10; + + /* Don't go below one second */ + if (ax25->rtt < 1 * PR_SLOWHZ) + ax25->rtt = 1 * PR_SLOWHZ; +} + +/* + * Digipeated address processing + */ + + +/* + * Given an AX.25 address pull of to, from, digi list, command/response and the start of data + * + */ + +unsigned char *ax25_parse_addr(unsigned char *buf, int len, ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags) +{ + int d = 0; + + if (len < 14) return NULL; + + if (flags != NULL) { + *flags = 0; + + if (buf[6] & LAPB_C) { + *flags = C_COMMAND; + } + if (buf[13] & LAPB_C) { + *flags = C_RESPONSE; + } + } + + /* Copy to, from */ + if (dest != NULL) memcpy(dest, buf + 0, 7); + if (src != NULL) memcpy(src, buf + 7, 7); + buf += 14; + len -= 14; + digi->lastrepeat = -1; + digi->ndigi = 0; + + while (!(buf[-1] & LAPB_E)) + { + if (d >= 6) return NULL; /* Max of 6 digis */ + if (len < 7) return NULL; /* Short packet */ + + if (digi != NULL) { + memcpy(&digi->calls[d], buf, 7); + digi->ndigi = d + 1; + if (buf[6] & AX25_REPEATED) { + digi->repeated[d] = 1; + digi->lastrepeat = d; + } else { + digi->repeated[d] = 0; + } + } + + buf += 7; + len -= 7; + d++; + } + + return buf; +} + +/* + * Assemble an AX.25 header from the bits + */ + +int build_ax25_addr(unsigned char *buf, ax25_address *src, ax25_address *dest, ax25_digi *d, int flag) +{ + int len = 0; + int ct = 0; + + memcpy(buf, dest, 7); + + if (flag != C_COMMAND && flag != C_RESPONSE) + printk("build_ax25_addr: Bogus flag %d\n!", flag); + buf[6] &= ~(LAPB_E | LAPB_C); + buf[6] |= SSID_SPARE; + + if (flag == C_COMMAND) buf[6] |= LAPB_C; + + buf += 7; + len += 7; + memcpy(buf, src, 7); + buf[6] &= ~(LAPB_E | LAPB_C); + buf[6] |= SSID_SPARE; + + if (flag == C_RESPONSE) buf[6] |= LAPB_C; + /* + * Fast path the normal digiless path + */ + if (d == NULL || d->ndigi == 0) { + buf[6] |= LAPB_E; + return 14; + } + + buf += 7; + len += 7; + + while (ct < d->ndigi) { + memcpy(buf, &d->calls[ct], 7); + if (d->repeated[ct]) + buf[6] |= AX25_REPEATED; + else + buf[6] &= ~AX25_REPEATED; + buf[6] &= ~LAPB_E; + buf[6] |= SSID_SPARE; + + buf += 7; + len += 7; + ct++; + } + + buf[-1] |= LAPB_E; + + return len; +} + +int size_ax25_addr(ax25_digi *dp) +{ + if (dp == NULL) + return 14; + + return 14 + (7 * dp->ndigi); +} + +/* + * Reverse Digipeat List. May not pass both parameters as same struct + */ + +void ax25_digi_invert(ax25_digi *in, ax25_digi *out) +{ + int ct = 0; + + /* Invert the digipeaters */ + + while (ct < in->ndigi) { + out->calls[ct] = in->calls[in->ndigi - ct - 1]; + out->repeated[ct] = 0; + ct++; + } + + /* Copy ndigis */ + out->ndigi = in->ndigi; + + /* Finish off */ + out->lastrepeat = 0; +} + +#endif diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c new file mode 100644 index 000000000..daa3bd657 --- /dev/null +++ b/net/ax25/ax25_timer.c @@ -0,0 +1,226 @@ +/* + * AX.25 release 029 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * AX.25 028a Jonathan(G4KLX) New state machine based on SDL diagrams. + * AX.25 028b Jonathan(G4KLX) Extracted AX25 control block from the + * sock structure. + * AX.25 029 Alan(GW4PTS) Switched to KA9Q constant names. + */ + +#include <linux/config.h> +#ifdef CONFIG_AX25 +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#ifdef CONFIG_NETROM +#include <net/netrom.h> +#endif + +static void ax25_timer(unsigned long); + +/* + * Linux set/reset timer routines + */ +void ax25_set_timer(ax25_cb *ax25) +{ + unsigned long flags; + + save_flags(flags); + cli(); + del_timer(&ax25->timer); + restore_flags(flags); + + ax25->timer.next = ax25->timer.prev = NULL; + ax25->timer.data = (unsigned long)ax25; + ax25->timer.function = &ax25_timer; + + ax25->timer.expires = 10; + add_timer(&ax25->timer); +} + +static void ax25_reset_timer(ax25_cb *ax25) +{ + unsigned long flags; + + save_flags(flags); + cli(); + del_timer(&ax25->timer); + restore_flags(flags); + + ax25->timer.data = (unsigned long)ax25; + ax25->timer.function = &ax25_timer; + ax25->timer.expires = 10; + add_timer(&ax25->timer); +} + +/* + * AX.25 TIMER + * + * This routine is called every 500ms. Decrement timer by this + * amount - if expired then process the event. + */ +static void ax25_timer(unsigned long param) +{ + ax25_cb *ax25 = (ax25_cb *)param; + + switch (ax25->state) { + case AX25_STATE_0: + /* Magic here: If we listen() and a new link dies before it + is accepted() it isnt 'dead' so doesnt get removed. */ + if ((ax25->sk != NULL && ax25->sk->dead) || ax25->sk == NULL) { + del_timer(&ax25->timer); + ax25_destroy_socket(ax25); + return; + } + break; + + case AX25_STATE_3: + case AX25_STATE_4: + /* + * Check the state of the receive buffer. + */ + if (ax25->sk != NULL) { + if (ax25->sk->rmem_alloc < (ax25->sk->rcvbuf / 2) && (ax25->condition & OWN_RX_BUSY_CONDITION)) { + ax25->condition &= ~OWN_RX_BUSY_CONDITION; + ax25_send_control(ax25, RR, C_RESPONSE); + ax25->condition &= ~ACK_PENDING_CONDITION; + break; + } + } + /* + * Check for frames to transmit. + */ + ax25_kick(ax25); + break; + + default: + break; + } + + if (ax25->t2timer > 0 && --ax25->t2timer == 0) { + if (ax25->state == AX25_STATE_3 || ax25->state == AX25_STATE_4) { + if (ax25->condition & ACK_PENDING_CONDITION) { + ax25->condition &= ~ACK_PENDING_CONDITION; + ax25_enquiry_response(ax25); + } + } + } + + if (ax25->t3timer > 0 && --ax25->t3timer == 0) { + if (ax25->state == AX25_STATE_3) { + ax25->n2count = 0; + ax25_transmit_enquiry(ax25); + ax25->state = AX25_STATE_4; + } + ax25->t3timer = ax25->t3; + } + + if (ax25->t1timer == 0 || --ax25->t1timer > 0) { + ax25_reset_timer(ax25); + return; + } + + switch (ax25->state) { + case AX25_STATE_1: + if (ax25->n2count == ax25->n2) { +#ifdef CONFIG_NETROM + nr_link_failed(&ax25->dest_addr, ax25->device); +#endif + ax25_clear_tx_queue(ax25); + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ETIMEDOUT; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } else { + ax25->n2count++; + ax25_send_control(ax25, SABM | PF, C_COMMAND); + } + break; + + case AX25_STATE_2: + if (ax25->n2count == ax25->n2) { +#ifdef CONFIG_NETROM + nr_link_failed(&ax25->dest_addr, ax25->device); +#endif + ax25_clear_tx_queue(ax25); + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ETIMEDOUT; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } else { + ax25->n2count++; + ax25_send_control(ax25, DISC | PF, C_COMMAND); + } + break; + + case AX25_STATE_3: + ax25->n2count = 1; + ax25_transmit_enquiry(ax25); + ax25->state = AX25_STATE_4; + break; + + case AX25_STATE_4: + if (ax25->n2count == ax25->n2) { +#ifdef CONFIG_NETROM + nr_link_failed(&ax25->dest_addr, ax25->device); +#endif + ax25_clear_tx_queue(ax25); + ax25_send_control(ax25, DM | PF, C_RESPONSE); + ax25->state = AX25_STATE_0; + if (ax25->sk != NULL) { + ax25->sk->state = TCP_CLOSE; + ax25->sk->err = ETIMEDOUT; + if (!ax25->sk->dead) + ax25->sk->state_change(ax25->sk); + ax25->sk->dead = 1; + } + } else { + ax25->n2count++; + ax25_transmit_enquiry(ax25); + } + break; + } + + ax25->t1timer = ax25->t1 = ax25_calculate_t1(ax25); + + ax25_set_timer(ax25); +} + +#endif diff --git a/net/core/Makefile b/net/core/Makefile new file mode 100644 index 000000000..dee2b16d3 --- /dev/null +++ b/net/core/Makefile @@ -0,0 +1,43 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := sock.o dev.o dev_mcast.o skbuff.o datagram.o + +ifdef CONFIG_NET + +core.o: $(OBJS) + $(LD) -r -o core.o $(OBJS) + +else + +core.o: + $(AR) rcs core.o + +endif + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/inet/datagram.c b/net/core/datagram.c index 116c16e3e..ce08e543c 100644 --- a/net/inet/datagram.c +++ b/net/core/datagram.c @@ -15,13 +15,13 @@ * AX.25 now works right, and SPX is feasible. * Alan Cox : Fixed write select of non IP protocol crash. * Florian La Roche: Changed for my new skbuff handling. + * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. * * Note: * A lot of this will change when the protocol/socket separation * occurs. Using this will make things reasonably clean. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <asm/segment.h> @@ -33,19 +33,19 @@ #include <linux/sched.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "route.h" -#include "tcp.h" -#include "udp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" +#include <net/sock.h> /* * Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible - * races. This replaces identical code in packet,raw and udp, as well as the yet to - * be released IPX support. It also finally fixes the long standing peek and read + * races. This replaces identical code in packet,raw and udp, as well as the IPX + * AX.25 and Appletalk. It also finally fixes the long standing peek and read * race for datagram sockets. If you alter this routine remember it must be * re-entrant. */ @@ -53,8 +53,10 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) { struct sk_buff *skb; + unsigned long intflags; /* Socket is inuse - so the timer doesn't attack it */ + save_flags(intflags); restart: sk->inuse = 1; while(skb_peek(&sk->receive_queue) == NULL) /* No data */ @@ -101,7 +103,7 @@ restart: /* Signals may need a restart of the syscall */ if (current->signal & ~current->blocked) { - sti(); + restore_flags(intflags);; *err=-ERESTARTSYS; return(NULL); } @@ -110,13 +112,13 @@ restart: peer has finally turned up now */ { *err = -sk->err; - sti(); sk->err=0; + restore_flags(intflags); return NULL; } } sk->inuse = 1; - sti(); + restore_flags(intflags); } /* Again only user level code calls this function, so nothing interrupt level will suddenly eat the receive_queue */ @@ -134,7 +136,7 @@ restart: skb=skb_peek(&sk->receive_queue); if(skb!=NULL) skb->users++; - sti(); + restore_flags(intflags); if(skb==NULL) /* shouldn't happen but .. */ *err=-EAGAIN; } @@ -154,7 +156,7 @@ void skb_free_datagram(struct sk_buff *skb) return; } /* See if it needs destroying */ - if(!skb->next && !skb->prev) /* Been dequeued by someone - ie its read */ + if(!skb->next && !skb->prev) /* Been dequeued by someone - ie it's read */ kfree_skb(skb,FREE_READ); restore_flags(flags); } @@ -190,6 +192,11 @@ int datagram_select(struct sock *sk, int sel_type, select_table *wait) return(0); case SEL_OUT: + if (sk->type==SOCK_SEQPACKET && sk->state==TCP_SYN_SENT) + { + /* Connection still in progress */ + return(0); + } if (sk->prot && sk->prot->wspace(sk) >= MIN_WRITE_SPACE) { return(1); diff --git a/net/inet/dev.c b/net/core/dev.c index 973070388..d36b6b00c 100644 --- a/net/inet/dev.c +++ b/net/core/dev.c @@ -22,6 +22,20 @@ * keep the queue safe. * Alan Cox : Fixed double lock. * Alan Cox : Fixed promisc NULL pointer trap + * ???????? : Support the full private ioctl range + * Alan Cox : Moved ioctl permission check into drivers + * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI + * Alan Cox : 100 backlog just doesn't cut it when + * you start doing multicast video 8) + * Alan Cox : Rewrote net_bh and list manager. + * Alan Cox : Fix ETH_P_ALL echoback lengths. + * Alan Cox : Took out transmit every packet pass + * Saved a few bytes in the ioctl handler + * Alan Cox : Network driver sets packet type before calling netif_rx. Saves + * a function call a packet. + * Alan Cox : Hashed net_bh() + * Richard Kooijman : Timestamp fixes. + * Alan Cox : Wrong field in SIOCGIFDSTADDR * * Cleaned up and recommented by Alan Cox 2nd April 1994. I hope to have * the rest as well commented in the end. @@ -49,11 +63,12 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "ip.h" -#include "route.h" +#include <linux/notifier.h> +#include <net/ip.h> +#include <net/route.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" +#include <net/sock.h> +#include <net/arp.h> /* @@ -61,7 +76,14 @@ * and the routines to invoke. */ -struct packet_type *ptype_base = NULL; +struct packet_type *ptype_base[16]; +struct packet_type *ptype_all = NULL; /* Taps */ + +/* + * Our notifier list + */ + +struct notifier_block *netdev_chain=NULL; /* * Device drivers call our routines to queue packets here. We empty the @@ -83,13 +105,6 @@ static struct sk_buff_head backlog = static int backlog_size = 0; /* - * The number of sockets open for 'all' protocol use. We have to - * know this to copy a buffer the correct number of times. - */ - -static int dev_nit=0; - -/* * Return the lesser of the two values. */ @@ -105,70 +120,33 @@ static __inline__ unsigned long min(unsigned long a, unsigned long b) *******************************************************************************************/ +/* + * For efficiency + */ + +static int dev_nit=0; /* - * Add a protocol ID to the list. + * Add a protocol ID to the list. Now that the input handler is + * smarter we can dispense with all the messy stuff that used to be + * here. */ void dev_add_pack(struct packet_type *pt) { - struct packet_type *p1; - pt->next = ptype_base; - - /* - * Don't use copy counts on ETH_P_ALL. Instead keep a global - * count of number of these and use it and pt->copy to decide - * copies - */ - - pt->copy=0; /* Assume we will not be copying the buffer before - * this routine gets it - */ - - if(pt->type == htons(ETH_P_ALL)) - dev_nit++; /* I'd like a /dev/nit too one day 8) */ - else + int hash; + if(pt->type==htons(ETH_P_ALL)) { - /* - * See if we need to copy it - that is another process also - * wishes to receive this type of packet. - */ - for (p1 = ptype_base; p1 != NULL; p1 = p1->next) - { - if (p1->type == pt->type) - { - pt->copy = 1; /* We will need to copy */ - break; - } - } + dev_nit++; + pt->next=ptype_all; + ptype_all=pt; } - - /* - * NIT taps must go at the end or net_bh will leak! - */ - - if (pt->type == htons(ETH_P_ALL)) - { - pt->next=NULL; - if(ptype_base==NULL) - ptype_base=pt; - else - { - /* - * Move to the end of the list - */ - for(p1=ptype_base;p1->next!=NULL;p1=p1->next); - /* - * Hook on the end - */ - p1->next=pt; - } - } else -/* - * It goes on the start - */ - ptype_base = pt; + { + hash=ntohs(pt->type)&15; + pt->next = ptype_base[hash]; + ptype_base[hash] = pt; + } } @@ -178,47 +156,21 @@ void dev_add_pack(struct packet_type *pt) void dev_remove_pack(struct packet_type *pt) { - struct packet_type *lpt, *pt1; - - /* - * Keep the count of nit (Network Interface Tap) sockets correct. - */ - - if (pt->type == htons(ETH_P_ALL)) - dev_nit--; - - /* - * If we are first, just unhook us. - */ - - if (pt == ptype_base) + struct packet_type **pt1; + if(pt->type==htons(ETH_P_ALL)) { - ptype_base = pt->next; - return; + dev_nit--; + pt1=&ptype_all; } - - lpt = NULL; - - /* - * This is harder. What we do is to walk the list of sockets - * for this type. We unhook the entry, and if there is a previous - * entry that is copying _and_ we are not copying, (ie we are the - * last entry for this type) then the previous one is set to - * non-copying as it is now the last. - */ - for (pt1 = ptype_base; pt1->next != NULL; pt1 = pt1->next) + else + pt1=&ptype_base[ntohs(pt->type)&15]; + for(; (*pt1)!=NULL; pt1=&((*pt1)->next)) { - if (pt1->next == pt ) + if(pt==(*pt1)) { - cli(); - if (!pt->copy && lpt) - lpt->copy = 0; - pt1->next = pt->next; - sti(); + *pt1=pt->next; return; } - if (pt1->next->type == pt->type && pt->type != htons(ETH_P_ALL)) - lpt = pt1->next; } } @@ -264,19 +216,26 @@ int dev_open(struct device *dev) */ if (ret == 0) + { dev->flags |= (IFF_UP | IFF_RUNNING); - + /* + * Initialise multicasting status + */ +#ifdef CONFIG_IP_MULTICAST + /* + * Join the all host group + */ + ip_mc_allhost(dev); +#endif + dev_mc_upload(dev); + notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + } return(ret); } /* * Completely shutdown an interface. - * - * WARNING: Both because of the way the upper layers work (that can be fixed) - * and because of races during a close (that can't be fixed any other way) - * a device may be given things to transmit EVEN WHEN IT IS DOWN. The driver - * MUST cope with this (eg by freeing and dumping the frame). */ int dev_close(struct device *dev) @@ -295,15 +254,13 @@ int dev_close(struct device *dev) if (dev->stop) dev->stop(dev); /* - * Delete the route to the device. + * Tell people we are going down */ -#ifdef CONFIG_INET - ip_rt_flush(dev); - arp_device_down(dev); -#endif -#ifdef CONFIG_IPX - ipxrtr_device_down(dev); -#endif + notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); /* * Blank the IP addresses */ @@ -328,6 +285,23 @@ int dev_close(struct device *dev) /* + * Device change register/unregister. These are not inline or static + * as we export them to the world. + */ + +int register_netdevice_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&netdev_chain, nb); +} + +int unregister_netdevice_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netdev_chain,nb); +} + + + +/* * Send (or queue for sending) a packet. * * IMPORTANT: When this is called to resend frames. The caller MUST @@ -344,42 +318,14 @@ void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) /* at the front or the back of the */ /* queue - front is a retransmit try */ - if (dev == NULL) - { - printk("dev.c: dev_queue_xmit: dev = NULL\n"); - return; - } - if(pri>=0 && !skb_device_locked(skb)) skb_device_lock(skb); /* Shove a lock on the frame */ -#ifdef CONFIG_SLAVE_BALANCING - save_flags(flags); - cli(); - if(dev->slave!=NULL && dev->slave->pkt_queue < dev->pkt_queue && - (dev->slave->flags & IFF_UP)) - dev=dev->slave; - restore_flags(flags); -#endif - +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); - +#endif skb->dev = dev; /* - * This just eliminates some race conditions, but not all... - */ - - if (skb->next != NULL) - { - /* - * Make sure we haven't missed an interrupt. - */ - printk("dev_queue_xmit: worked around a missed interrupt\n"); - dev->hard_start_xmit(NULL, dev); - return; - } - - /* * Negative priority is used to flag a frame that is being pulled from the * queue front as a retransmit attempt. It therefore goes back on the queue * start on a failure. @@ -391,11 +337,13 @@ void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) where = 1; } +#ifdef CONFIG_NET_DEBUG if (pri >= DEV_NUMBUFFS) { printk("bad priority in dev_queue_xmit.\n"); pri = 1; } +#endif /* * If the address has not been resolved. Call the device header rebuilder. @@ -408,50 +356,55 @@ void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) save_flags(flags); cli(); - if (!where) { -#ifdef CONFIG_SLAVE_BALANCING - skb->in_dev_queue=1; -#endif + if (dev_nit && !where) + { skb_queue_tail(dev->buffs + pri,skb); skb_device_unlock(skb); /* Buffer is on the device queue and can be freed safely */ skb = skb_dequeue(dev->buffs + pri); skb_device_lock(skb); /* New buffer needs locking down */ -#ifdef CONFIG_SLAVE_BALANCING - skb->in_dev_queue=0; -#endif } restore_flags(flags); /* copy outgoing packets to any sniffer packet handlers */ if(!where) { - for (nitcount = dev_nit, ptype = ptype_base; nitcount > 0 && ptype != NULL; ptype = ptype->next) + skb->stamp=xtime; + for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) { - if (ptype->type == htons(ETH_P_ALL)) { + /* Never send packets back to the socket + * they originated from - MvS (miquels@drinkel.ow.org) + */ + if ((ptype->dev == dev || !ptype->dev) && + ((struct sock *)ptype->data != skb->sk)) + { struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) break; + /* + * The protocol knows this has (for other paths) been taken off + * and adds it back. + */ + skb2->len-=skb->dev->hard_header_len; ptype->func(skb2, skb->dev, ptype); nitcount--; } } } + start_bh_atomic(); if (dev->hard_start_xmit(skb, dev) == 0) { /* * Packet is now solely the responsibility of the driver */ + end_bh_atomic(); return; } + end_bh_atomic(); /* - * Transmission failed, put skb back into a list. Once on the list its safe and + * Transmission failed, put skb back into a list. Once on the list it's safe and * no longer device locked (it can be freed safely from the device queue) */ cli(); -#ifdef CONFIG_SLAVE_BALANCING - skb->in_dev_queue=1; - dev->pkt_queue++; -#endif skb_device_unlock(skb); skb_queue_head(dev->buffs + pri,skb); restore_flags(flags); @@ -483,7 +436,7 @@ void netif_rx(struct sk_buff *skb) if (!backlog_size) dropping = 0; - else if (backlog_size > 100) + else if (backlog_size > 300) dropping = 1; if (dropping) @@ -495,8 +448,9 @@ void netif_rx(struct sk_buff *skb) /* * Add it to the "backlog" queue. */ - +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); +#endif skb_queue_tail(&backlog,skb); backlog_size++; @@ -505,7 +459,11 @@ void netif_rx(struct sk_buff *skb) * hardware interrupt returns. */ +#ifdef CONFIG_NET_RUNONIRQ /* Dont enable yet, needs some driver mods */ + inet_bh(); +#else mark_bh(NET_BH); +#endif return; } @@ -629,7 +587,7 @@ void dev_transmit(void) * useful will emerge. */ -volatile char in_bh = 0; /* Non-reentrant remember */ +volatile int in_bh = 0; /* Non-reentrant remember */ int in_net_bh() /* Used by timer.c */ { @@ -648,9 +606,8 @@ void net_bh(void *tmp) { struct sk_buff *skb; struct packet_type *ptype; + struct packet_type *pt_prev; unsigned short type; - unsigned char flag = 0; - int nitcount; /* * Atomically check and mark our BUSY state. @@ -686,8 +643,6 @@ void net_bh(void *tmp) */ backlog_size--; - nitcount=dev_nit; - flag=0; sti(); /* @@ -701,95 +656,76 @@ void net_bh(void *tmp) skb->len -= skb->dev->hard_header_len; /* - * Fetch the packet protocol ID. This is also quite ugly, as - * it depends on the protocol driver (the interface itself) to - * know what the type is, or where to get it from. The Ethernet - * interfaces fetch the ID from the two bytes in the Ethernet MAC - * header (the h_proto field in struct ethhdr), but other drivers - * may either use the ethernet ID's or extra ones that do not - * clash (eg ETH_P_AX25). We could set this before we queue the - * frame. In fact I may change this when I have time. + * Fetch the packet protocol ID. */ - type = skb->dev->type_trans(skb, skb->dev); + type = skb->protocol; /* * We got a packet ID. Now loop over the "known protocols" - * table (which is actually a linked list, but this will - * change soon if I get my way- FvK), and forward the packet - * to anyone who wants it. - * - * [FvK didn't get his way but he is right this ought to be - * hashed so we typically get a single hit. The speed cost - * here is minimal but no doubt adds up at the 4,000+ pkts/second - * rate we can hit flat out] + * list. There are two lists. The ptype_all list of taps (normally empty) + * and the main protocol list which is hashed perfectly for normal protocols. */ - - for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) + pt_prev = NULL; + for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next) { - if (ptype->type == type || ptype->type == htons(ETH_P_ALL)) + if(pt_prev) + { + struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); + if(skb2) + pt_prev->func(skb2,skb->dev, pt_prev); + } + pt_prev=ptype; + } + + for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) + { + if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev)) { - struct sk_buff *skb2; - - if (ptype->type == htons(ETH_P_ALL)) - nitcount--; - if (ptype->copy || nitcount) - { - /* - * copy if we need to - */ -#ifdef OLD - skb2 = alloc_skb(skb->len, GFP_ATOMIC); - if (skb2 == NULL) - continue; - memcpy(skb2, skb, skb2->mem_len); - skb2->mem_addr = skb2; - skb2->h.raw = (unsigned char *)( - (unsigned long) skb2 + - (unsigned long) skb->h.raw - - (unsigned long) skb - ); - skb2->free = 1; -#else - skb2=skb_clone(skb, GFP_ATOMIC); - if(skb2==NULL) - continue; -#endif - } - else - { - skb2 = skb; - } - /* - * Protocol located. + * We already have a match queued. Deliver + * to it and then remember the new match */ - - flag = 1; + if(pt_prev) + { + struct sk_buff *skb2; - /* - * Kick the protocol handler. This should be fast - * and efficient code. - */ + skb2=skb_clone(skb, GFP_ATOMIC); - ptype->func(skb2, skb->dev, ptype); + /* + * Kick the protocol handler. This should be fast + * and efficient code. + */ + + if(skb2) + pt_prev->func(skb2, skb->dev, pt_prev); + } + /* Remember the current last to do */ + pt_prev=ptype; } } /* End of protocol list loop */ + + /* + * Is there a last item to send to ? + */ + if(pt_prev) + pt_prev->func(skb, skb->dev, pt_prev); /* * Has an unknown packet has been received ? */ - if (!flag) - { + else kfree_skb(skb, FREE_WRITE); - } /* * Again, see if we can transmit anything now. + * [Ought to take this out judging by tests it slows + * us down not speeds us up] */ - +#ifdef CONFIG_XMIT_EVERY dev_transmit(); +#endif cli(); } /* End of queue loop */ @@ -1057,46 +993,22 @@ static int dev_ifsioc(void *arg, unsigned int getset) { case SIOCGIFFLAGS: /* Get interface flags */ ifr.ifr_flags = dev->flags; - memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); - ret = 0; - break; + goto rarok; + case SIOCSIFFLAGS: /* Set interface flags */ { int old_flags = dev->flags; -#ifdef CONFIG_SLAVE_BALANCING - if(dev->flags&IFF_SLAVE) - return -EBUSY; -#endif dev->flags = ifr.ifr_flags & ( IFF_UP | IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | - IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER); -#ifdef CONFIG_SLAVE_BALANCING - if(!(dev->flags&IFF_MASTER) && dev->slave) - { - dev->slave->flags&=~IFF_SLAVE; - dev->slave=NULL; - } -#endif + IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER + | IFF_MULTICAST); + /* + * Load in the correct multicast list now the flags have changed. + */ + + dev_mc_upload(dev); - if( dev->set_multicast_list!=NULL) - { - - /* - * Has promiscuous mode been turned off - */ - - if ( (old_flags & IFF_PROMISC) && ((dev->flags & IFF_PROMISC) == 0)) - dev->set_multicast_list(dev,0,NULL); - - /* - * Has it been turned on - */ - - if ( (dev->flags & IFF_PROMISC) && ((old_flags & IFF_PROMISC) == 0)) - dev->set_multicast_list(dev,-1,NULL); - } - /* * Have we downed the interface */ @@ -1129,9 +1041,7 @@ static int dev_ifsioc(void *arg, unsigned int getset) &ifr.ifr_addr).sin_family = dev->family; (*(struct sockaddr_in *) &ifr.ifr_addr).sin_port = 0; - memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); - ret = 0; - break; + goto rarok; case SIOCSIFADDR: /* Set interface address (and family) */ dev->pa_addr = (*(struct sockaddr_in *) @@ -1154,6 +1064,7 @@ static int dev_ifsioc(void *arg, unsigned int getset) &ifr.ifr_broadaddr).sin_family = dev->family; (*(struct sockaddr_in *) &ifr.ifr_broadaddr).sin_port = 0; + goto rarok; memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); ret = 0; break; @@ -1168,9 +1079,9 @@ static int dev_ifsioc(void *arg, unsigned int getset) (*(struct sockaddr_in *) &ifr.ifr_dstaddr).sin_addr.s_addr = dev->pa_dstaddr; (*(struct sockaddr_in *) - &ifr.ifr_broadaddr).sin_family = dev->family; + &ifr.ifr_dstaddr).sin_family = dev->family; (*(struct sockaddr_in *) - &ifr.ifr_broadaddr).sin_port = 0; + &ifr.ifr_dstaddr).sin_port = 0; memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); ret = 0; break; @@ -1188,9 +1099,7 @@ static int dev_ifsioc(void *arg, unsigned int getset) &ifr.ifr_netmask).sin_family = dev->family; (*(struct sockaddr_in *) &ifr.ifr_netmask).sin_port = 0; - memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); - ret = 0; - break; + goto rarok; case SIOCSIFNETMASK: /* Set the netmask for the interface */ { @@ -1210,28 +1119,24 @@ static int dev_ifsioc(void *arg, unsigned int getset) case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr.ifr_metric = dev->metric; - memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); - ret = 0; - break; + goto rarok; case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ dev->metric = ifr.ifr_metric; - ret = 0; + ret=0; break; case SIOCGIFMTU: /* Get the MTU of a device */ ifr.ifr_mtu = dev->mtu; - memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); - ret = 0; - break; + goto rarok; case SIOCSIFMTU: /* Set the MTU of a device */ /* - * MTU must be positive and under the page size problem + * MTU must be positive. */ - if(ifr.ifr_mtu<1 || ifr.ifr_mtu>3800) + if(ifr.ifr_mtu<68) return -EINVAL; dev->mtu = ifr.ifr_mtu; ret = 0; @@ -1239,28 +1144,22 @@ static int dev_ifsioc(void *arg, unsigned int getset) case SIOCGIFMEM: /* Get the per device memory space. We can add this but currently do not support it */ - printk("NET: ioctl(SIOCGIFMEM, 0x%08X)\n", (int)arg); ret = -EINVAL; break; case SIOCSIFMEM: /* Set the per device memory buffer space. Not applicable in our case */ - printk("NET: ioctl(SIOCSIFMEM, 0x%08X)\n", (int)arg); ret = -EINVAL; break; case OLD_SIOCGIFHWADDR: /* Get the hardware address. This will change and SIFHWADDR will be added */ memcpy(ifr.old_ifr_hwaddr,dev->dev_addr, MAX_ADDR_LEN); - memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); - ret=0; - break; + goto rarok; case SIOCGIFHWADDR: memcpy(ifr.ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); ifr.ifr_hwaddr.sa_family=dev->type; - memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); - ret=0; - break; - + goto rarok; + case SIOCSIFHWADDR: if(dev->set_mac_address==NULL) return -EOPNOTSUPP; @@ -1285,61 +1184,21 @@ static int dev_ifsioc(void *arg, unsigned int getset) return -EOPNOTSUPP; return dev->set_config(dev,&ifr.ifr_map); - case SIOCGIFSLAVE: -#ifdef CONFIG_SLAVE_BALANCING - if(dev->slave==NULL) - return -ENOENT; - strncpy(ifr.ifr_name,dev->name,sizeof(ifr.ifr_name)); - memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); - ret=0; -#else - return -ENOENT; -#endif - break; -#ifdef CONFIG_SLAVE_BALANCING - case SIOCSIFSLAVE: - { - - /* - * Fun game. Get the device up and the flags right without - * letting some scummy user confuse us. - */ - unsigned long flags; - struct device *slave=dev_get(ifr.ifr_slave); - save_flags(flags); - if(slave==NULL) - { - return -ENODEV; - } - cli(); - if((slave->flags&(IFF_UP|IFF_RUNNING))!=(IFF_UP|IFF_RUNNING)) - { - restore_flags(flags); + case SIOCADDMULTI: + if(dev->set_multicast_list==NULL) return -EINVAL; - } - if(dev->flags&IFF_SLAVE) - { - restore_flags(flags); - return -EBUSY; - } - if(dev->slave!=NULL) - { - restore_flags(flags); - return -EBUSY; - } - if(slave->flags&IFF_SLAVE) - { - restore_flags(flags); - return -EBUSY; - } - dev->slave=slave; - slave->flags|=IFF_SLAVE; - dev->flags|=IFF_MASTER; - restore_flags(flags); - ret=0; - } - break; -#endif + if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + dev_mc_add(dev,ifr.ifr_hwaddr.sa_data, dev->addr_len, 1); + return 0; + + case SIOCDELMULTI: + if(dev->set_multicast_list==NULL) + return -EINVAL; + if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + dev_mc_delete(dev,ifr.ifr_hwaddr.sa_data,dev->addr_len, 1); + return 0; /* * Unknown or private ioctl */ @@ -1357,6 +1216,12 @@ static int dev_ifsioc(void *arg, unsigned int getset) ret = -EINVAL; } return(ret); +/* + * The load of calls that return an ifreq and ok (saves memory). + */ +rarok: + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + return 0; } @@ -1406,6 +1271,8 @@ int dev_ioctl(unsigned int cmd, void *arg) case SIOCSIFMEM: case SIOCSIFMAP: case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: if (!suser()) return -EPERM; return dev_ifsioc(arg, cmd); @@ -1420,8 +1287,6 @@ int dev_ioctl(unsigned int cmd, void *arg) default: if((cmd >= SIOCDEVPRIVATE) && (cmd <= (SIOCDEVPRIVATE + 15))) { - if (!suser()) - return -EPERM; return dev_ifsioc(arg, cmd); } return -EINVAL; @@ -1434,9 +1299,6 @@ int dev_ioctl(unsigned int cmd, void *arg) * unhooks any devices that fail to initialise (normally hardware not * present) and leaves us with a valid list of present and active devices. * - * The PCMCIA code may need to change this a little, and add a pair - * of register_inet_device() unregister_inet_device() calls. This will be - * needed for ethernet as modules support. */ void dev_init(void) @@ -1470,3 +1332,4 @@ void dev_init(void) } } } + diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c new file mode 100644 index 000000000..7195d5a52 --- /dev/null +++ b/net/core/dev_mcast.c @@ -0,0 +1,169 @@ +/* + * Linux NET3: Multicast List maintenance. + * + * Authors: + * Tim Kordas <tjk@nostromo.eeap.cwru.edu> + * Richard Underwood <richard@wuzz.demon.co.uk> + * + * Stir fried together from the IP multicast and CAP patches above + * Alan Cox <Alan.Cox@linux.org> + * + * Fixes: + * Alan Cox : Update the device on a real delete + * rather than any time but... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <net/ip.h> +#include <net/route.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/arp.h> + + +/* + * Device multicast list maintenance. This knows about such little matters as promiscuous mode and + * converting from the list to the array the drivers use. At least until I fix the drivers up. + * + * This is used both by IP and by the user level maintenance functions. Unlike BSD we maintain a usage count + * on a given multicast address so that a casual user application can add/delete multicasts used by protocols + * without doing damage to the protocols when it deletes the entries. It also helps IP as it tracks overlapping + * maps. + */ + + +/* + * Update the multicast list into the physical NIC controller. + */ + +void dev_mc_upload(struct device *dev) +{ + struct dev_mc_list *dmi; + char *data, *tmp; + + /* Don't do anything till we up the interface + [dev_open will call this function so the list will + stay sane] */ + + if(!(dev->flags&IFF_UP)) + return; + + + /* Devices with no set multicast don't get set */ + if(dev->set_multicast_list==NULL) + return; + /* Promiscuous is promiscuous - so no filter needed */ + if(dev->flags&IFF_PROMISC) + { + dev->set_multicast_list(dev, -1, NULL); + return; + } + + if(dev->mc_count==0) + { + dev->set_multicast_list(dev,0,NULL); + return; + } + + data=kmalloc(dev->mc_count*dev->addr_len, GFP_KERNEL); + if(data==NULL) + { + printk("Unable to get memory to set multicast list on %s\n",dev->name); + return; + } + for(tmp = data, dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next) + { + memcpy(tmp,dmi->dmi_addr, dmi->dmi_addrlen); + tmp+=dev->addr_len; + } + dev->set_multicast_list(dev,dev->mc_count,data); + kfree(data); +} + +/* + * Delete a device level multicast + */ + +void dev_mc_delete(struct device *dev, void *addr, int alen, int all) +{ + struct dev_mc_list **dmi; + for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next) + { + if(memcmp((*dmi)->dmi_addr,addr,(*dmi)->dmi_addrlen)==0 && alen==(*dmi)->dmi_addrlen) + { + struct dev_mc_list *tmp= *dmi; + if(--(*dmi)->dmi_users && !all) + return; + *dmi=(*dmi)->next; + dev->mc_count--; + kfree_s(tmp,sizeof(*tmp)); + dev_mc_upload(dev); + return; + } + } +} + +/* + * Add a device level multicast + */ + +void dev_mc_add(struct device *dev, void *addr, int alen, int newonly) +{ + struct dev_mc_list *dmi; + for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next) + { + if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) + { + if(!newonly) + dmi->dmi_users++; + return; + } + } + dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi),GFP_KERNEL); + if(dmi==NULL) + return; /* GFP_KERNEL so can't happen anyway */ + memcpy(dmi->dmi_addr, addr, alen); + dmi->dmi_addrlen=alen; + dmi->next=dev->mc_list; + dmi->dmi_users=1; + dev->mc_list=dmi; + dev->mc_count++; + dev_mc_upload(dev); +} + +/* + * Discard multicast list when a device is downed + */ + +void dev_mc_discard(struct device *dev) +{ + while(dev->mc_list!=NULL) + { + struct dev_mc_list *tmp=dev->mc_list; + dev->mc_list=dev->mc_list->next; + kfree_s(tmp,sizeof(*tmp)); + } + dev->mc_count=0; +} + diff --git a/net/inet/skbuff.c b/net/core/skbuff.c index d5ae2adef..1b49683e6 100644 --- a/net/inet/skbuff.c +++ b/net/core/skbuff.c @@ -6,6 +6,8 @@ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. + * Dave Platt : Interrupt stacking fix + * Richard Kooijman : Timestamp fixes. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -30,14 +32,14 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" +#include <net/ip.h> +#include <net/protocol.h> #include <linux/string.h> -#include "route.h" -#include "tcp.h" -#include "udp.h" +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" +#include <net/sock.h> /* @@ -149,13 +151,12 @@ int skb_check(struct sk_buff *skb, int head, int line, char *file) #endif +#ifdef CONFIG_SKB_CHECK void skb_queue_head_init(struct sk_buff_head *list) { list->prev = (struct sk_buff *)list; list->next = (struct sk_buff *)list; -#if CONFIG_SKB_CHECK list->magic_debug_cookie = SK_HEAD_SKB; -#endif } @@ -170,12 +171,10 @@ void skb_queue_head(struct sk_buff_head *list_,struct sk_buff *newsk) save_flags(flags); cli(); -#if CONFIG_SKB_CHECK IS_SKB(newsk); IS_SKB_HEAD(list); if (newsk->next || newsk->prev) printk("Suspicious queue head: sk_buff on list!\n"); -#endif newsk->next = list->next; newsk->prev = list; @@ -197,12 +196,10 @@ void skb_queue_tail(struct sk_buff_head *list_, struct sk_buff *newsk) save_flags(flags); cli(); -#if CONFIG_SKB_CHECK if (newsk->next || newsk->prev) printk("Suspicious queue tail: sk_buff on list!\n"); IS_SKB(newsk); IS_SKB_HEAD(list); -#endif newsk->next = list; newsk->prev = list->prev; @@ -254,7 +251,6 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk) { unsigned long flags; -#if CONFIG_SKB_CHECK IS_SKB(old); IS_SKB(newsk); @@ -262,7 +258,6 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk) printk("insert before unlisted item!\n"); if(newsk->next || newsk->prev) printk("inserted item is already on a list.\n"); -#endif save_flags(flags); cli(); @@ -281,7 +276,6 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) { unsigned long flags; -#if CONFIG_SKB_CHECK IS_SKB(old); IS_SKB(newsk); @@ -289,7 +283,6 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) printk("append before unlisted item!\n"); if(newsk->next || newsk->prev) printk("append item is already on a list.\n"); -#endif save_flags(flags); cli(); @@ -331,6 +324,8 @@ void skb_unlink(struct sk_buff *skb) restore_flags(flags); } +#endif + /* * Free an sk_buff. This still knows about things it should * not need to like protocols and sockets. @@ -344,7 +339,9 @@ void kfree_skb(struct sk_buff *skb, int rw) __builtin_return_address(0)); return; } +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); +#endif if (skb->lock) { skb->free = 3; /* Free when unlocked */ @@ -369,13 +366,18 @@ void kfree_skb(struct sk_buff *skb, int rw) } else { + unsigned long flags; /* Non INET - default wmalloc/rmalloc handler */ + save_flags(flags); + cli(); if (rw) skb->sk->rmem_alloc-=skb->mem_len; else skb->sk->wmem_alloc-=skb->mem_len; + restore_flags(flags); if(!skb->sk->dead) skb->sk->write_space(skb->sk); + kfree_skbmem(skb,skb->mem_len); } } else @@ -456,6 +458,7 @@ void kfree_skbmem(struct sk_buff *skb,unsigned size) skb->dev->pkt_queue--; restore_flags(flags); #endif +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); if(size!=skb->truesize) printk("kfree_skbmem: size mismatch.\n"); @@ -473,6 +476,14 @@ void kfree_skbmem(struct sk_buff *skb,unsigned size) } else printk("kfree_skbmem: bad magic cookie\n"); +#else + save_flags(flags); + cli(); + kfree_s((void *)skb,size); + net_skbcount--; + net_memory -= size; + restore_flags(flags); +#endif } /* @@ -512,6 +523,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority) n->lock=0; n->users=0; n->pkt_type=skb->pkt_type; + n->stamp=skb->stamp; return n; } diff --git a/net/inet/sock.c b/net/core/sock.c index fd5f123bd..a95586cbc 100644 --- a/net/inet/sock.c +++ b/net/core/sock.c @@ -61,6 +61,9 @@ * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code * Alan Cox : Split IP from generic code * Alan Cox : New kfree_skbmem() + * Alan Cox : Make SO_DEBUG superuser only. + * Alan Cox : Allow anyone to clear SO_DEBUG + * (compatibility fix) * * To Fix: * @@ -92,17 +95,17 @@ #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "arp.h" -#include "rarp.h" -#include "route.h" -#include "tcp.h" -#include "udp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/arp.h> +#include <net/rarp.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "raw.h" -#include "icmp.h" +#include <net/sock.h> +#include <net/raw.h> +#include <net/icmp.h> #define min(a,b) ((a)<(b)?(a):(b)) @@ -115,6 +118,7 @@ int sock_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { int val; + int valbool; int err; struct linger ling; @@ -126,20 +130,26 @@ int sock_setsockopt(struct sock *sk, int level, int optname, return err; val = get_fs_long((unsigned long *)optval); + valbool = val?1:0; + switch(optname) { + case SO_DEBUG: + if(val && !suser()) + return(-EPERM); + sk->debug=valbool; + return 0; + case SO_REUSEADDR: + sk->reuse = valbool; + return(0); case SO_TYPE: case SO_ERROR: return(-ENOPROTOOPT); - - case SO_DEBUG: - sk->debug=val?1:0; - return 0; case SO_DONTROUTE: - sk->localroute=val?1:0; + sk->localroute=valbool; return 0; case SO_BROADCAST: - sk->broadcast=val?1:0; + sk->broadcast=valbool; return 0; case SO_SNDBUF: if(val>32767) @@ -148,19 +158,7 @@ int sock_setsockopt(struct sock *sk, int level, int optname, val=256; sk->sndbuf=val; return 0; - case SO_LINGER: - err=verify_area(VERIFY_READ,optval,sizeof(ling)); - if(err) - return err; - memcpy_fromfs(&ling,optval,sizeof(ling)); - if(ling.l_onoff==0) - sk->linger=0; - else - { - sk->lingertime=ling.l_linger; - sk->linger=1; - } - return 0; + case SO_RCVBUF: if(val>32767) val=32767; @@ -169,35 +167,19 @@ int sock_setsockopt(struct sock *sk, int level, int optname, sk->rcvbuf=val; return(0); - case SO_REUSEADDR: - if (val) - sk->reuse = 1; - else - sk->reuse = 0; - return(0); - case SO_KEEPALIVE: - if (val) - sk->keepopen = 1; - else - sk->keepopen = 0; + sk->keepopen = valbool; return(0); case SO_OOBINLINE: - if (val) - sk->urginline = 1; - else - sk->urginline = 0; + sk->urginline = valbool; return(0); case SO_NO_CHECK: - if (val) - sk->no_check = 1; - else - sk->no_check = 0; + sk->no_check = valbool; return(0); - case SO_PRIORITY: + case SO_PRIORITY: if (val >= 0 && val < DEV_NUMBUFFS) { sk->priority = val; @@ -208,6 +190,22 @@ int sock_setsockopt(struct sock *sk, int level, int optname, } return(0); + + case SO_LINGER: + err=verify_area(VERIFY_READ,optval,sizeof(ling)); + if(err) + return err; + memcpy_fromfs(&ling,optval,sizeof(ling)); + if(ling.l_onoff==0) + sk->linger=0; + else + { + sk->lingertime=ling.l_linger; + sk->linger=1; + } + return 0; + + default: return(-ENOPROTOOPT); } @@ -234,20 +232,7 @@ int sock_getsockopt(struct sock *sk, int level, int optname, case SO_BROADCAST: val= sk->broadcast; break; - - case SO_LINGER: - err=verify_area(VERIFY_WRITE,optval,sizeof(ling)); - if(err) - return err; - err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); - if(err) - return err; - put_fs_long(sizeof(ling),(unsigned long *)optlen); - ling.l_onoff=sk->linger; - ling.l_linger=sk->lingertime; - memcpy_tofs(optval,&ling,sizeof(ling)); - return 0; - + case SO_SNDBUF: val=sk->sndbuf; break; @@ -265,12 +250,6 @@ int sock_getsockopt(struct sock *sk, int level, int optname, break; case SO_TYPE: -#if 0 - if (sk->prot == &tcp_prot) - val = SOCK_STREAM; - else - val = SOCK_DGRAM; -#endif val = sk->type; break; @@ -290,6 +269,21 @@ int sock_getsockopt(struct sock *sk, int level, int optname, case SO_PRIORITY: val = sk->priority; break; + + case SO_LINGER: + err=verify_area(VERIFY_WRITE,optval,sizeof(ling)); + if(err) + return err; + err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(ling),(unsigned long *)optlen); + ling.l_onoff=sk->linger; + ling.l_linger=sk->lingertime; + memcpy_tofs(optval,&ling,sizeof(ling)); + return 0; + + default: return(-ENOPROTOOPT); @@ -317,9 +311,11 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int struct sk_buff * c = alloc_skb(size, priority); if (c) { + unsigned long flags; + save_flags(flags); cli(); sk->wmem_alloc+= c->mem_len; - sti(); + restore_flags(flags); /* was sti(); */ } return c; } @@ -338,9 +334,11 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int struct sk_buff *c = alloc_skb(size, priority); if (c) { + unsigned long flags; + save_flags(flags); cli(); sk->rmem_alloc += c->mem_len; - sti(); + restore_flags(flags); /* was sti(); */ } return(c); } @@ -383,14 +381,19 @@ unsigned long sock_wspace(struct sock *sk) void sock_wfree(struct sock *sk, struct sk_buff *skb, unsigned long size) { +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); +#endif kfree_skbmem(skb, size); if (sk) { + unsigned long flags; + save_flags(flags); + cli(); sk->wmem_alloc -= size; + restore_flags(flags); /* In case it might be waiting for more memory. */ - if (!sk->dead) - sk->write_space(sk); + sk->write_space(sk); return; } } @@ -398,11 +401,17 @@ void sock_wfree(struct sock *sk, struct sk_buff *skb, unsigned long size) void sock_rfree(struct sock *sk, struct sk_buff *skb, unsigned long size) { +#ifdef CONFIG_SKB_CHECK IS_SKB(skb); +#endif kfree_skbmem(skb, size); if (sk) { + unsigned long flags; + save_flags(flags); + cli(); sk->rmem_alloc -= size; + restore_flags(flags); } } @@ -440,6 +449,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int nob if(skb==NULL) { unsigned long tmp; + + sk->socket->flags |= SO_NOSPACE; if(noblock) { *errcode=-EAGAIN; @@ -461,6 +472,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int nob if( tmp <= sk->wmem_alloc) { + sk->socket->flags &= ~SO_NOSPACE; interruptible_sleep_on(sk->sleep); if (current->signal & ~current->blocked) { @@ -477,27 +489,13 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int nob return skb; } -/* - * Queue a received datagram if it will fit. Stream and sequenced protocols - * can't normally use this as they need to fit buffers in and play with them. - */ - -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - if(sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) - return -ENOMEM; - sk->rmem_alloc+=skb->mem_len; - skb->sk=sk; - skb_queue_tail(&sk->receive_queue,skb); - if(!sk->dead) - sk->data_ready(sk,skb->len); - return 0; -} void release_sock(struct sock *sk) { - struct sk_buff *skb; unsigned long flags; +#ifdef CONFIG_INET + struct sk_buff *skb; +#endif if (!sk->prot) return; diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile new file mode 100644 index 000000000..a298cb88a --- /dev/null +++ b/net/ethernet/Makefile @@ -0,0 +1,57 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := eth.o + +ifdef CONFIG_IPX + +OBJ2 := pe2.o + +endif + +ifdef CONFIG_ATALK + +OBJ2 := pe2.o + +endif + +OBJS := $(OBJS) $(OBJ2) + +ifdef CONFIG_NET + +ethernet.o: $(OBJS) + $(LD) -r -o ethernet.o $(OBJS) + +else + +ethernet.o: + $(AR) rcs ethernet.o + +endif + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/inet/eth.c b/net/ethernet/eth.c index 550e0f8f0..cf6ef5328 100644 --- a/net/inet/eth.c +++ b/net/ethernet/eth.c @@ -45,7 +45,9 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> -#include "arp.h" +#include <linux/config.h> +#include <net/arp.h> +#include <net/sock.h> void eth_setup(char *str, int *ints) { @@ -176,7 +178,7 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev) skb->pkt_type=PACKET_MULTICAST; } - if(dev->flags&IFF_PROMISC) + else if(dev->flags&IFF_PROMISC) { if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) skb->pkt_type=PACKET_OTHERHOST; @@ -189,8 +191,27 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev) if (*(unsigned short *)rawp == 0xFFFF) return htons(ETH_P_802_3); - if (*(unsigned short *)rawp == 0xAAAA) - return htons(ETH_P_SNAP); return htons(ETH_P_802_2); } + +/* + * Header caching for ethernet. Try to find and cache a header to avoid arp overhead. + */ + +void eth_header_cache(struct device *dev, struct sock *sk, unsigned long saddr, unsigned long daddr) +{ + int v=arp_find_cache(sk->ip_hcache_data, daddr, dev); + if(v!=1) + sk->ip_hcache_state=0; /* Try when arp resolves */ + else + { + memcpy(sk->ip_hcache_data+6, dev->dev_addr, ETH_ALEN); + sk->ip_hcache_data[12]=ETH_P_IP>>8; + sk->ip_hcache_data[13]=ETH_P_IP&0xFF; + sk->ip_hcache_state=1; + sk->ip_hcache_stamp=arp_cache_stamp; + sk->ip_hcache_ver=&arp_cache_stamp; + } +} + diff --git a/net/inet/pe2.c b/net/ethernet/pe2.c index 15f62b344..7cb40d12f 100644 --- a/net/inet/pe2.c +++ b/net/ethernet/pe2.c @@ -1,6 +1,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> -#include "datalink.h" +#include <net/datalink.h> #include <linux/mm.h> #include <linux/in.h> @@ -27,6 +27,7 @@ make_EII_client(void) proto->type_len = 0; proto->header_length = 0; proto->datalink_header = pEII_datalink_header; + proto->string_name = "EtherII"; } return proto; diff --git a/net/inet/README b/net/inet/README deleted file mode 100644 index 005795df0..000000000 --- a/net/inet/README +++ /dev/null @@ -1,21 +0,0 @@ -Changes for NET3.017 - -This is mostly small stuff as follows: - -o accept()ed socket don't end up with an invalid sk->socket and give bogus - netstat output. -o FASYNC/SIGIO now works with sockets. -o Fixed the permissions on F_SETOWN for all. Its now as broken/working - as other systems. Really we need something like a 32bit generation - number on processes. -o ARP allows proxy for whole networks (a la cisco routers) -o TCP sendto() reports ENOTCONN in the right cases -o Removed some surplus uncommented code from tcp.c -o Fixed protocol violation during closedown in tcp.c - [Still not got the window < MSS bug fix included] - -Fixes for 1.1.58 - -o non blocking connect fail gets the error code right. -o select() not reporting read ok after an urgent read fixed. - diff --git a/net/inet/arp.h b/net/inet/arp.h deleted file mode 100644 index a68adc30a..000000000 --- a/net/inet/arp.h +++ /dev/null @@ -1,18 +0,0 @@ -/* linux/net/inet/arp.h */ -#ifndef _ARP_H -#define _ARP_H - -extern void arp_init(void); -extern void arp_destroy(unsigned long paddr, int force); -extern void arp_device_down(struct device *dev); -extern int arp_rcv(struct sk_buff *skb, struct device *dev, - struct packet_type *pt); -extern int arp_find(unsigned char *haddr, unsigned long paddr, - struct device *dev, unsigned long saddr, struct sk_buff *skb); -extern int arp_get_info(char *buffer, char **start, off_t origin, int length); -extern int arp_ioctl(unsigned int cmd, void *arg); -extern void arp_send(int type, int ptype, unsigned long dest_ip, - struct device *dev, unsigned long src_ip, - unsigned char *dest_hw, unsigned char *src_hw); - -#endif /* _ARP_H */ diff --git a/net/inet/datalink.h b/net/inet/datalink.h deleted file mode 100644 index ba345f3b5..000000000 --- a/net/inet/datalink.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _NET_INET_DATALINK_H_ -#define _NET_INET_DATALINK_H_ - -struct datalink_proto { - unsigned short type_len; - unsigned char type[8]; - unsigned short datalink_type; - unsigned short header_length; - int (*rcvfunc)(struct sk_buff *, struct device *, - struct packet_type *); - void (*datalink_header)(struct datalink_proto *, struct sk_buff *, - unsigned char *); - struct datalink_proto *next; -}; - -#endif - diff --git a/net/inet/eth.h b/net/inet/eth.h deleted file mode 100644 index f8fed44ed..000000000 --- a/net/inet/eth.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. NET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the Ethernet handlers. - * - * Version: @(#)eth.h 1.0.4 05/13/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _ETH_H -#define _ETH_H - - -#include <linux/if_ether.h> - - -extern char *eth_print(unsigned char *ptr); -extern void eth_dump(struct ethhdr *eth); -extern int eth_header(unsigned char *buff, struct device *dev, - unsigned short type, unsigned long daddr, - unsigned long saddr, unsigned len); -extern int eth_rebuild_header(void *buff, struct device *dev); -extern void eth_add_arp(unsigned long addr, struct sk_buff *skb, - struct device *dev); -extern unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev); - -#endif /* _ETH_H */ diff --git a/net/inet/icmp.h b/net/inet/icmp.h deleted file mode 100644 index 1067d8380..000000000 --- a/net/inet/icmp.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the ICMP module. - * - * Version: @(#)icmp.h 1.0.4 05/13/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _ICMP_H -#define _ICMP_H - -#include <linux/icmp.h> - - -extern struct icmp_err icmp_err_convert[]; -extern struct icmp_mib icmp_statistics; - - -extern void icmp_send(struct sk_buff *skb_in, int type, int code, - struct device *dev); -extern int icmp_rcv(struct sk_buff *skb1, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol); - -extern int icmp_ioctl(struct sock *sk, int cmd, - unsigned long arg); - -#endif /* _ICMP_H */ diff --git a/net/inet/ip.h b/net/inet/ip.h deleted file mode 100644 index dd2cbc5ca..000000000 --- a/net/inet/ip.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the IP module. - * - * Version: @(#)ip.h 1.0.2 05/07/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * Alan Cox, <gw4pts@gw4pts.ampr.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _IP_H -#define _IP_H - - -#include <linux/ip.h> - -#ifndef _SNMP_H -#include "snmp.h" -#endif - -#include "sock.h" /* struct sock */ - -/* IP flags. */ -#define IP_CE 0x8000 /* Flag: "Congestion" */ -#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ -#define IP_MF 0x2000 /* Flag: "More Fragments" */ -#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ - -#define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ - - -/* Describe an IP fragment. */ -struct ipfrag { - int offset; /* offset of fragment in IP datagram */ - int end; /* last byte of data in datagram */ - int len; /* length of this fragment */ - struct sk_buff *skb; /* complete received fragment */ - unsigned char *ptr; /* pointer into real fragment data */ - struct ipfrag *next; /* linked list pointers */ - struct ipfrag *prev; -}; - -/* Describe an entry in the "incomplete datagrams" queue. */ -struct ipq { - unsigned char *mac; /* pointer to MAC header */ - struct iphdr *iph; /* pointer to IP header */ - int len; /* total length of original datagram */ - short ihlen; /* length of the IP header */ - short maclen; /* length of the MAC header */ - struct timer_list timer; /* when will this queue expire? */ - struct ipfrag *fragments; /* linked list of received fragments */ - struct ipq *next; /* linked list pointers */ - struct ipq *prev; - struct device *dev; /* Device - for icmp replies */ -}; - - -extern int backoff(int n); - -extern void ip_print(const struct iphdr *ip); -extern int ip_ioctl(struct sock *sk, int cmd, - unsigned long arg); -extern void ip_route_check(unsigned long daddr); -extern int ip_build_header(struct sk_buff *skb, - unsigned long saddr, - unsigned long daddr, - struct device **dev, int type, - struct options *opt, int len, - int tos,int ttl); -extern unsigned short ip_compute_csum(unsigned char * buff, int len); -extern int ip_rcv(struct sk_buff *skb, struct device *dev, - struct packet_type *pt); -extern void ip_queue_xmit(struct sock *sk, - struct device *dev, struct sk_buff *skb, - int free); -extern void ip_retransmit(struct sock *sk, int all); -extern void ip_do_retransmit(struct sock *sk, int all); -extern int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen); -extern int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); -extern void ip_init(void); - -extern struct ip_mib ip_statistics; -#endif /* _IP_H */ diff --git a/net/inet/ipx.c b/net/inet/ipx.c deleted file mode 100644 index 1d41acc16..000000000 --- a/net/inet/ipx.c +++ /dev/null @@ -1,1360 +0,0 @@ -/* - * Implements an IPX socket layer (badly - but I'm working on it). - * - * This code is derived from work by - * Ross Biro : Writing the original IP stack - * Fred Van Kempen : Tidying up the TCP/IP - * - * Many thanks go to Keith Baker, Institute For Industrial Information - * Technology Ltd, Swansea University for allowing me to work on this - * in my own time even though it was in some ways related to commercial - * work I am currently employed to do there. - * - * All the material in this file is subject to the Gnu license version 2. - * Neither Alan Cox nor the Swansea University Computer Society admit liability - * nor provide warranty for any of this software. This material is provided - * as is and at no charge. - * - * Revision 0.21: Uses the new generic socket option code. - * Revision 0.22: Gcc clean ups and drop out device registration. Use the - * new multi-protocol edition of hard_header - * Revision 0.23: IPX /proc by Mark Evans. - * Adding a route will overwrite any existing route to the same - * network. - * Revision 0.24: Supports new /proc with no 4K limit - * Revision 0.25: Add ephemeral sockets, passive local network - * identification, support for local net 0 and - * multiple datalinks <Greg Page> - * Revision 0.26: Device drop kills IPX routes via it. (needed for modules) - * Revision 0.27: Autobind <Mark Evans> - * Revision 0.28: Small fix for multiple local networks <Thomas Winder> - * Revision 0.29: Assorted major errors removed <Mark Evans> - * Small correction to promisc mode error fix <Alan Cox> - * Asynchronous I/O support. - * - * - * - */ - -#include <linux/config.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/ipx.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include "sock.h" -#include <asm/segment.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/termios.h> /* For TIOCOUTQ/INQ */ -#include <linux/interrupt.h> -#include "p8022.h" - -#ifdef CONFIG_IPX -/***********************************************************************************************************************\ -* * -* Handlers for the socket list. * -* * -\***********************************************************************************************************************/ - -static ipx_socket *volatile ipx_socket_list=NULL; - -/* - * Note: Sockets may not be removed _during_ an interrupt or inet_bh - * handler using this technique. They can be added although we do not - * use this facility. - */ - -static void ipx_remove_socket(ipx_socket *sk) -{ - ipx_socket *s; - - cli(); - s=ipx_socket_list; - if(s==sk) - { - ipx_socket_list=s->next; - sti(); - return; - } - while(s && s->next) - { - if(s->next==sk) - { - s->next=sk->next; - sti(); - return; - } - s=s->next; - } - sti(); -} - -static void ipx_insert_socket(ipx_socket *sk) -{ - cli(); - sk->next=ipx_socket_list; - ipx_socket_list=sk; - sti(); -} - -static ipx_socket *ipx_find_socket(int port) -{ - ipx_socket *s; - s=ipx_socket_list; - while(s) - { - if(s->ipx_source_addr.sock==port) - { - return(s); - } - s=s->next; - } - return(NULL); -} - -/* - * This is only called from user mode. Thus it protects itself against - * interrupt users but doesn't worry about being called during work. - * Once it is removed from the queue no interrupt or bottom half will - * touch it and we are (fairly 8-) ) safe. - */ - -static void ipx_destroy_socket(ipx_socket *sk) -{ - struct sk_buff *skb; - ipx_remove_socket(sk); - - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) - { - kfree_skb(skb,FREE_READ); - } - - kfree_s(sk,sizeof(*sk)); -} - - -/* Called from proc fs */ -int ipx_get_info(char *buffer, char **start, off_t offset, int length) -{ - ipx_socket *s; - int len=0; - off_t pos=0; - off_t begin=0; - - /* Theory.. Keep printing in the same place until we pass offset */ - - len += sprintf (buffer,"Type local_address rem_address tx_queue rx_queue st uid\n"); - for (s = ipx_socket_list; s != NULL; s = s->next) - { - len += sprintf (buffer+len,"%02X ", s->ipx_type); - len += sprintf (buffer+len,"%08lX:%02X%02X%02X%02X%02X%02X:%04X ", htonl(s->ipx_source_addr.net), - s->ipx_source_addr.node[0], s->ipx_source_addr.node[1], s->ipx_source_addr.node[2], - s->ipx_source_addr.node[3], s->ipx_source_addr.node[4], s->ipx_source_addr.node[5], - htons(s->ipx_source_addr.sock)); - len += sprintf (buffer+len,"%08lX:%02X%02X%02X%02X%02X%02X:%04X ", htonl(s->ipx_dest_addr.net), - s->ipx_dest_addr.node[0], s->ipx_dest_addr.node[1], s->ipx_dest_addr.node[2], - s->ipx_dest_addr.node[3], s->ipx_dest_addr.node[4], s->ipx_dest_addr.node[5], - htons(s->ipx_dest_addr.sock)); - len += sprintf (buffer+len,"%08lX:%08lX ", s->wmem_alloc, s->rmem_alloc); - len += sprintf (buffer+len,"%02X %d\n", s->state, SOCK_INODE(s->socket)->i_uid); - - /* Are we still dumping unwanted data then discard the record */ - pos=begin+len; - - if(pos<offset) - { - len=0; /* Keep dumping into the buffer start */ - begin=pos; - } - if(pos>offset+length) /* We have dumped enough */ - break; - } - - /* The data in question runs from begin to begin+len */ - *start=buffer+(offset-begin); /* Start of wanted data */ - len-=(offset-begin); /* Remove unwanted header data from length */ - if(len>length) - len=length; /* Remove unwanted tail data from length */ - - return len; -} - -/*******************************************************************************************************************\ -* * -* Routing tables for the IPX socket layer * -* * -\*******************************************************************************************************************/ - - -static struct datalink_proto *p8022_datalink = NULL; -static struct datalink_proto *pEII_datalink = NULL; -static struct datalink_proto *p8023_datalink = NULL; -static struct datalink_proto *pSNAP_datalink = NULL; - -static ipx_route *ipx_router_list=NULL; -static ipx_route *ipx_localnet_list=NULL; - -static ipx_route * -ipxrtr_get_local_net(struct device *dev, unsigned short datalink) -{ - ipx_route *r; - unsigned long flags; - save_flags(flags); - cli(); - r=ipx_localnet_list; - while(r!=NULL) - { - if((r->dev==dev) && (r->dlink_type == datalink)) - { - restore_flags(flags); - return r; - } - r=r->nextlocal; - } - restore_flags(flags); - return NULL; -} - -static ipx_route * -ipxrtr_get_default_net(void) -{ - return ipx_localnet_list; -} - -static ipx_route *ipxrtr_get_dev(long net) -{ - ipx_route *r; - unsigned long flags; - save_flags(flags); - cli(); - r=ipx_router_list; - while(r!=NULL) - { - if(r->net==net) - { - restore_flags(flags); - return r; - } - r=r->next; - } - restore_flags(flags); - return NULL; -} - -static void ipxrtr_add_localnet(ipx_route *newnet) -{ - ipx_route *r; - unsigned long flags; - save_flags(flags); - cli(); - - newnet->nextlocal = NULL; - if (ipx_localnet_list == NULL) { - ipx_localnet_list = newnet; - restore_flags(flags); - return; - } - - r=ipx_localnet_list; - while(r->nextlocal!=NULL) - r=r->nextlocal; - - r->nextlocal = newnet; - - restore_flags(flags); - return; -} - -static int ipxrtr_create(struct ipx_route_def *r) -{ - ipx_route *rt=ipxrtr_get_dev(r->ipx_network); - struct device *dev; - unsigned short dlink_type; - struct datalink_proto *datalink = NULL; - - if (r->ipx_flags & IPX_RT_BLUEBOOK) { - dlink_type = htons(ETH_P_IPX); - datalink = pEII_datalink; - } else if (r->ipx_flags & IPX_RT_8022) { - dlink_type = htons(ETH_P_802_2); - datalink = p8022_datalink; - } else if (r->ipx_flags & IPX_RT_SNAP) { - dlink_type = htons(ETH_P_SNAP); - datalink = pSNAP_datalink; - } else { - dlink_type = htons(ETH_P_802_3); - datalink = p8023_datalink; - } - - if (datalink == NULL) { - printk("IPX: Unsupported datalink protocol.\n"); - return -EPROTONOSUPPORT; - } - - if(r->ipx_router_network!=0) - { - /* Adding an indirect route */ - ipx_route *rt1=ipxrtr_get_dev(r->ipx_router_network); - if(rt1==NULL) - return -ENETUNREACH; - if(rt1->flags&IPX_RT_ROUTED) - return -EMULTIHOP; - if (rt==NULL) - { - rt=(ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); /* Because we are brave and don't lock the table! */ - if(rt==NULL) - return -EAGAIN; - rt->next=ipx_router_list; - ipx_router_list=rt; - } - rt->net=r->ipx_network; - rt->router_net=r->ipx_router_network; - memcpy(rt->router_node,r->ipx_router_node,sizeof(rt->router_node)); - rt->flags=IPX_RT_ROUTED; - rt->dlink_type = dlink_type; - rt->datalink = datalink; - rt->dev=rt1->dev; - return 0; - } - /* Add a direct route */ - dev=dev_get(r->ipx_device); - if(dev==NULL) - return -ENODEV; - /* Check addresses are suitable */ - if(dev->addr_len>6) - return -EINVAL; - if(dev->addr_len<2) - return -EINVAL; - if (ipxrtr_get_local_net(dev, dlink_type) != NULL) - return -EEXIST; - /* Ok now create */ - rt=(ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); /* Because we are brave and don't lock the table! */ - if(rt==NULL) - return -EAGAIN; - rt->next=ipx_router_list; - ipx_router_list=rt; - rt->router_net=0; - memset(rt->router_node,0,sizeof(rt->router_node)); - rt->dev=dev; - rt->net=r->ipx_network; - rt->flags=0; - rt->dlink_type = dlink_type; - rt->datalink = datalink; - ipxrtr_add_localnet(rt); - return 0; -} - - -static int ipxrtr_delete_localnet(ipx_route *d) -{ - ipx_route **r = &ipx_localnet_list; - ipx_route *tmp; - - while ((tmp = *r) != NULL) { - if (tmp == d) { - *r = tmp->next; - return 0; - } - r = &tmp->nextlocal; - } - return -ENOENT; -} - -static int ipxrtr_delete(long net) -{ - ipx_route **r = &ipx_router_list; - ipx_route *tmp; - - while ((tmp = *r) != NULL) { - if (tmp->net == net) { - *r = tmp->next; - if (tmp->router_net == 0) { - ipxrtr_delete_localnet(tmp); - } - kfree_s(tmp, sizeof(ipx_route)); - return 0; - } - r = &tmp->next; - } - return -ENOENT; -} - -void ipxrtr_device_down(struct device *dev) -{ - ipx_route **r = &ipx_router_list; - ipx_route *tmp; - - while ((tmp = *r) != NULL) { - if (tmp->dev == dev) { - *r = tmp->next; - if(tmp->router_net == 0) - ipxrtr_delete_localnet(tmp); - kfree_s(tmp, sizeof(ipx_route)); - } - r = &tmp->next; - } -} - -static int ipxrtr_ioctl(unsigned int cmd, void *arg) -{ - int err; - switch(cmd) - { - case SIOCDELRT: - err=verify_area(VERIFY_READ,arg,sizeof(long)); - if(err) - return err; - return ipxrtr_delete(get_fs_long(arg)); - case SIOCADDRT: - { - struct ipx_route_def f; - err=verify_area(VERIFY_READ,arg,sizeof(f)); - if(err) - return err; - memcpy_fromfs(&f,arg,sizeof(f)); - return ipxrtr_create(&f); - } - default: - return -EINVAL; - } -} - -/* Called from proc fs */ -int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length) -{ - ipx_route *rt; - int len=0; - off_t pos=0; - off_t begin=0; - - len += sprintf (buffer,"Net Router Flags Dev\n"); - for (rt = ipx_router_list; rt != NULL; rt = rt->next) - { - len += sprintf (buffer+len,"%08lX %08lX:%02X%02X%02X%02X%02X%02X %02X %s\n", ntohl(rt->net), - ntohl(rt->router_net), rt->router_node[0], rt->router_node[1], rt->router_node[2], - rt->router_node[3], rt->router_node[4], rt->router_node[5], rt->flags, rt->dev->name); - pos=begin+len; - if(pos<offset) - { - len=0; - begin=pos; - } - if(pos>offset+length) - break; - } - *start=buffer+(offset-begin); - len-=(offset-begin); - if(len>length) - len=length; - return len; -} - -/*******************************************************************************************************************\ -* * -* Handling for system calls applied via the various interfaces to an IPX socket object * -* * -\*******************************************************************************************************************/ - -static int ipx_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - switch(cmd) - { - default: - return(-EINVAL); - } -} - -static int ipx_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) -{ - ipx_socket *sk; - int err,opt; - - sk=(ipx_socket *)sock->data; - - if(optval==NULL) - return(-EINVAL); - - err=verify_area(VERIFY_READ,optval,sizeof(int)); - if(err) - return err; - opt=get_fs_long((unsigned long *)optval); - - switch(level) - { - case SOL_IPX: - switch(optname) - { - case IPX_TYPE: - if(!suser()) - return(-EPERM); - sk->ipx_type=opt; - return 0; - default: - return -EOPNOTSUPP; - } - break; - - case SOL_SOCKET: - return sock_setsockopt(sk,level,optname,optval,optlen); - - default: - return -EOPNOTSUPP; - } -} - -static int ipx_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) -{ - ipx_socket *sk; - int val=0; - int err; - - sk=(ipx_socket *)sock->data; - - switch(level) - { - - case SOL_IPX: - switch(optname) - { - case IPX_TYPE: - val=sk->ipx_type; - break; - default: - return -ENOPROTOOPT; - } - break; - - case SOL_SOCKET: - return sock_getsockopt(sk,level,optname,optval,optlen); - - default: - return -EOPNOTSUPP; - } - err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); - if(err) - return err; - put_fs_long(sizeof(int),(unsigned long *)optlen); - err=verify_area(VERIFY_WRITE,optval,sizeof(int)); - put_fs_long(val,(unsigned long *)optval); - return(0); -} - -static int ipx_listen(struct socket *sock, int backlog) -{ - return -EOPNOTSUPP; -} - -static void def_callback1(struct sock *sk) -{ - if(!sk->dead) - wake_up_interruptible(sk->sleep); -} - -static void def_callback2(struct sock *sk, int len) -{ - if(!sk->dead) - { - wake_up_interruptible(sk->sleep); - sock_wake_async(sk->socket); - } -} - -static int ipx_create(struct socket *sock, int protocol) -{ - ipx_socket *sk; - sk=(ipx_socket *)kmalloc(sizeof(*sk),GFP_KERNEL); - if(sk==NULL) - return(-ENOMEM); - switch(sock->type) - { - case SOCK_DGRAM: - break; - default: - kfree_s((void *)sk,sizeof(*sk)); - return(-ESOCKTNOSUPPORT); - } - sk->dead=0; - sk->next=NULL; - sk->broadcast=0; - sk->rcvbuf=SK_RMEM_MAX; - sk->sndbuf=SK_WMEM_MAX; - sk->wmem_alloc=0; - sk->rmem_alloc=0; - sk->inuse=0; - sk->shutdown=0; - sk->prot=NULL; /* So we use default free mechanisms */ - sk->broadcast=0; - sk->err=0; - skb_queue_head_init(&sk->receive_queue); - skb_queue_head_init(&sk->write_queue); - sk->send_head=NULL; - skb_queue_head_init(&sk->back_log); - sk->state=TCP_CLOSE; - sk->socket=sock; - sk->type=sock->type; - sk->ipx_type=0; /* General user level IPX */ - sk->debug=0; - - memset(&sk->ipx_dest_addr,'\0',sizeof(sk->ipx_dest_addr)); - memset(&sk->ipx_source_addr,'\0',sizeof(sk->ipx_source_addr)); - sk->mtu=IPX_MTU; - - if(sock!=NULL) - { - sock->data=(void *)sk; - sk->sleep=sock->wait; - } - - sk->state_change=def_callback1; - sk->data_ready=def_callback2; - sk->write_space=def_callback1; - sk->error_report=def_callback1; - - sk->zapped=1; - return(0); -} - -static int ipx_dup(struct socket *newsock,struct socket *oldsock) -{ - return(ipx_create(newsock,SOCK_DGRAM)); -} - -static int ipx_release(struct socket *sock, struct socket *peer) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - if(sk==NULL) - return(0); - if(!sk->dead) - sk->state_change(sk); - sk->dead=1; - sock->data=NULL; - ipx_destroy_socket(sk); - return(0); -} - -static unsigned short first_free_socketnum(void) -{ - static unsigned short socketNum = 0x4000; - - while (ipx_find_socket(ntohs(socketNum)) != NULL) - if (socketNum > 0x7ffc) - socketNum = 0x4000; - else - socketNum++; - - return ntohs(socketNum); -} - -static int ipx_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) -{ - ipx_socket *sk; - struct ipx_route *rt; - unsigned char *nodestart; - struct sockaddr_ipx *addr=(struct sockaddr_ipx *)uaddr; - - sk=(ipx_socket *)sock->data; - - if(sk->zapped==0) - return(-EIO); - - if(addr_len!=sizeof(struct sockaddr_ipx)) - return -EINVAL; - - if (addr->sipx_port == 0) - { - addr->sipx_port = first_free_socketnum(); - if (addr->sipx_port == 0) - return -EINVAL; - } - - if(ntohs(addr->sipx_port)<0x4000 && !suser()) - return(-EPERM); /* protect IPX system stuff like routing/sap */ - - /* Source addresses are easy. It must be our network:node pair for - an interface routed to IPX with the ipx routing ioctl() */ - - if(ipx_find_socket(addr->sipx_port)!=NULL) - { - if(sk->debug) - printk("IPX: bind failed because port %X in use.\n", - (int)addr->sipx_port); - return -EADDRINUSE; - } - - sk->ipx_source_addr.sock=addr->sipx_port; - - if (addr->sipx_network == 0L) - { - rt = ipxrtr_get_default_net(); - } - else - { - rt = ipxrtr_get_dev(addr->sipx_network); - } - - if(rt == NULL) - { - if(sk->debug) - printk("IPX: bind failed (no device for net %lX)\n", - sk->ipx_source_addr.net); - return -EADDRNOTAVAIL; - } - - sk->ipx_source_addr.net=rt->net; - - /* IPX addresses zero pad physical addresses less than 6 */ - memset(sk->ipx_source_addr.node,'\0',6); - nodestart = sk->ipx_source_addr.node + (6 - rt->dev->addr_len); - memcpy(nodestart,rt->dev->dev_addr,rt->dev->addr_len); - - ipx_insert_socket(sk); - sk->zapped=0; - if(sk->debug) - printk("IPX: socket is bound.\n"); - return(0); -} - -static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - struct sockaddr_ipx *addr; - - sk->state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; - - if(addr_len!=sizeof(*addr)) - return(-EINVAL); - addr=(struct sockaddr_ipx *)uaddr; - - if(sk->ipx_source_addr.net==0) - /* put the autobinding in */ - { - struct sockaddr_ipx uaddr; - int ret; - - uaddr.sipx_port = 0; - uaddr.sipx_network = 0L; - ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); - if (ret != 0) return (ret); - } - - sk->ipx_dest_addr.net=addr->sipx_network; - sk->ipx_dest_addr.sock=addr->sipx_port; - memcpy(sk->ipx_dest_addr.node,addr->sipx_node,sizeof(sk->ipx_source_addr.node)); - if(ipxrtr_get_dev(sk->ipx_dest_addr.net)==NULL) - return -ENETUNREACH; - sk->ipx_type=addr->sipx_type; - sock->state = SS_CONNECTED; - sk->state=TCP_ESTABLISHED; - return(0); -} - -static int ipx_socketpair(struct socket *sock1, struct socket *sock2) -{ - return(-EOPNOTSUPP); -} - -static int ipx_accept(struct socket *sock, struct socket *newsock, int flags) -{ - if(newsock->data) - kfree_s(newsock->data,sizeof(ipx_socket)); - return -EOPNOTSUPP; -} - -static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) -{ - ipx_address *addr; - struct sockaddr_ipx sipx; - ipx_socket *sk; - - sk=(ipx_socket *)sock->data; - - *uaddr_len = sizeof(struct sockaddr_ipx); - - if(peer) - { - if(sk->state!=TCP_ESTABLISHED) - return -ENOTCONN; - addr=&sk->ipx_dest_addr; - } - else - addr=&sk->ipx_source_addr; - - sipx.sipx_family = AF_IPX; - sipx.sipx_type = sk->ipx_type; - sipx.sipx_port = addr->sock; - sipx.sipx_network = addr->net; - memcpy(sipx.sipx_node,addr->node,sizeof(sipx.sipx_node)); - memcpy(uaddr,&sipx,sizeof(sipx)); - return(0); -} - -int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) -{ - /* NULL here for pt means the packet was looped back */ - ipx_socket *sock; - ipx_packet *ipx; - ipx_route *rt; - ipx_route *ln; - unsigned char IPXaddr[6]; - - ipx=(ipx_packet *)skb->h.raw; - - if(ipx->ipx_checksum!=IPX_NO_CHECKSUM) - { - /* We don't do checksum options. We can't really. Novell don't seem to have documented them. - If you need them try the XNS checksum since IPX is basically XNS in disguise. It might be - the same... */ - kfree_skb(skb,FREE_READ); - return(0); - } - - /* Too small */ - if(htons(ipx->ipx_pktsize)<sizeof(ipx_packet)) - { - kfree_skb(skb,FREE_READ); - return(0); - } - - /* Too many hops */ - if(ipx->ipx_tctrl>16) - { - kfree_skb(skb,FREE_READ); - return(0); - } - - /* Determine what local ipx endpoint this is */ - ln = ipxrtr_get_local_net(dev, pt->type); - if (ln == NULL) - { - kfree_skb(skb,FREE_READ); - return(0); - } - - memset(IPXaddr, '\0', 6); - memcpy(IPXaddr+(6 - dev->addr_len), dev->dev_addr, dev->addr_len); - - /* Not us/broadcast */ - if(memcmp(IPXaddr,ipx->ipx_dest.node,6)!=0 - && memcmp(ipx_broadcast_node,ipx->ipx_dest.node,6)!=0) - { - /********************************************************************************************** - - IPX router. Roughly as per the Novell spec. This doesn't handle netbios flood fill - broadcast frames. See the Novell IPX router specification for more details - (for ftp from ftp.novell.com) - - ***********************************************************************************************/ - - int incoming_size; - int outgoing_size; - struct sk_buff *skb2; - int free_it=0; - - /* Rule: Don't forward packets that have exceeded the hop limit. This is fixed at 16 in IPX */ - if((ipx->ipx_tctrl==16) || (skb->pkt_type!=PACKET_HOST)) - { - kfree_skb(skb,FREE_READ); - return(0); - } - - ipx->ipx_tctrl++; - /* Don't forward if we don't have a route. We ought to go off and start hunting out routes but - if someone needs this _THEY_ can add it */ - rt=ipxrtr_get_dev(ipx->ipx_dest.net); - if(rt==NULL) /* Unlike IP we can send on the interface we received. Eg doing DIX/802.3 conversion */ - { - kfree_skb(skb,FREE_READ); - return(0); - } - - /* Check for differences in outgoing and incoming packet size */ - incoming_size = skb->len - ntohs(ipx->ipx_pktsize); - outgoing_size = rt->datalink->header_length + rt->dev->hard_header_len; - if(incoming_size != outgoing_size) - { - /* A different header length causes a copy. Awkward to avoid with the current - sk_buff stuff. */ - skb2=alloc_skb(ntohs(ipx->ipx_pktsize) + outgoing_size, - GFP_ATOMIC); - if(skb2==NULL) - { - kfree_skb(skb,FREE_READ); - return 0; - } - free_it=1; - skb2->free=1; - skb2->len=ntohs(ipx->ipx_pktsize) + outgoing_size; - skb2->mem_addr = skb2; - skb2->arp = 1; - skb2->sk = NULL; - - /* Need to copy with appropriate offsets */ - memcpy((char *)(skb2+1)+outgoing_size, - (char *)(skb+1)+incoming_size, - ntohs(ipx->ipx_pktsize)); - } - else - { - skb2=skb; - } - - /* Now operate on the buffer */ - /* Increase hop count */ - - skb2->dev = rt->dev; - rt->datalink->datalink_header(rt->datalink, skb2, - (rt->flags&IPX_RT_ROUTED)?rt->router_node - :ipx->ipx_dest.node); - - dev_queue_xmit(skb2,rt->dev,SOPRI_NORMAL); - - if(free_it) - kfree_skb(skb,FREE_READ); - return(0); - } - /************ End of router: Now sanity check stuff for us ***************/ - - /* Ok its for us ! */ - if (ln->net == 0L) { -/* printk("IPX: Registering local net %lx\n", ipx->ipx_dest.net);*/ - ln->net = ipx->ipx_dest.net; - } - - sock=ipx_find_socket(ipx->ipx_dest.sock); - if(sock==NULL) /* But not one of our sockets */ - { - kfree_skb(skb,FREE_READ); - return(0); - } - - /* Check to see if this socket needs its network number */ - ln = ipxrtr_get_default_net(); - if (sock->ipx_source_addr.net == 0L) - sock->ipx_source_addr.net = ln->net; - - if(sock_queue_rcv_skb(sock, skb)<0) - { - kfree_skb(skb,FREE_READ); /* Socket is full */ - return(0); - } - - return(0); -} - -static int ipx_sendto(struct socket *sock, void *ubuf, int len, int noblock, - unsigned flags, struct sockaddr *usip, int addr_len) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - struct sockaddr_ipx *usipx=(struct sockaddr_ipx *)usip; - struct sockaddr_ipx local_sipx; - struct sk_buff *skb; - struct device *dev; - struct ipx_packet *ipx; - int size; - ipx_route *rt; - struct datalink_proto *dl = NULL; - unsigned char IPXaddr[6]; - int self_addressing = 0; - int broadcast = 0; - - if(flags) - return -EINVAL; - - if(usipx) - { - if(sk->ipx_source_addr.net==0) - /* put the autobinding in */ - { - struct sockaddr_ipx uaddr; - int ret; - - uaddr.sipx_port = 0; - uaddr.sipx_network = 0L; - ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); - if (ret != 0) return (ret); - } - - if(addr_len <sizeof(*usipx)) - return(-EINVAL); - if(usipx->sipx_family != AF_IPX) - return -EINVAL; - if(htons(usipx->sipx_port)<0x4000 && !suser()) - return -EPERM; - } - else - { - if(sk->state!=TCP_ESTABLISHED) - return -ENOTCONN; - usipx=&local_sipx; - usipx->sipx_family=AF_IPX; - usipx->sipx_type=sk->ipx_type; - usipx->sipx_port=sk->ipx_dest_addr.sock; - usipx->sipx_network=sk->ipx_dest_addr.net; - memcpy(usipx->sipx_node,sk->ipx_dest_addr.node,sizeof(usipx->sipx_node)); - } - - if(sk->debug) - printk("IPX: sendto: Addresses built.\n"); - - if(memcmp(&usipx->sipx_node,&ipx_broadcast_node,6)==0) - { - if (!sk->broadcast) - return -ENETUNREACH; - broadcast = 1; - } - - /* Build a packet */ - - if(sk->debug) - printk("IPX: sendto: building packet.\n"); - - size=sizeof(ipx_packet)+len; /* For mac headers */ - - /* Find out where this has to go */ - if (usipx->sipx_network == 0L) { - rt = ipxrtr_get_default_net(); - if (rt != NULL) - usipx->sipx_network = rt->net; - } else - rt=ipxrtr_get_dev(usipx->sipx_network); - - if(rt==NULL) - { - return -ENETUNREACH; - } - - dev=rt->dev; - dl = rt->datalink; - - size += dev->hard_header_len; - size += dl->header_length; - - if(sk->debug) - printk("IPX: sendto: allocating buffer (%d)\n",size); - - if(size+sk->wmem_alloc>sk->sndbuf) { - return -EAGAIN; - } - - skb=alloc_skb(size,GFP_KERNEL); - if(skb==NULL) - return -ENOMEM; - - skb->mem_addr=skb; - skb->sk=sk; - skb->free=1; - skb->arp=1; - skb->len=size; - - sk->wmem_alloc+=skb->mem_len; - - if(sk->debug) - printk("Building MAC header.\n"); - skb->dev=rt->dev; - - /* Build Data Link header */ - dl->datalink_header(dl, skb, - (rt->flags&IPX_RT_ROUTED)?rt->router_node:usipx->sipx_node); - - /* See if we are sending to ourself */ - memset(IPXaddr, '\0', 6); - memcpy(IPXaddr+(6 - skb->dev->addr_len), skb->dev->dev_addr, - skb->dev->addr_len); - - self_addressing = !memcmp(IPXaddr, - (rt->flags&IPX_RT_ROUTED)?rt->router_node - :usipx->sipx_node, - 6); - - /* Now the IPX */ - if(sk->debug) - printk("Building IPX Header.\n"); - ipx=(ipx_packet *)skb->h.raw; - ipx->ipx_checksum=0xFFFF; - ipx->ipx_pktsize=htons(len+sizeof(ipx_packet)); - ipx->ipx_tctrl=0; - ipx->ipx_type=usipx->sipx_type; - - memcpy(&ipx->ipx_source,&sk->ipx_source_addr,sizeof(ipx->ipx_source)); - ipx->ipx_dest.net=usipx->sipx_network; - memcpy(ipx->ipx_dest.node,usipx->sipx_node,sizeof(ipx->ipx_dest.node)); - ipx->ipx_dest.sock=usipx->sipx_port; - if(sk->debug) - printk("IPX: Appending user data.\n"); - /* User data follows immediately after the IPX data */ - memcpy_fromfs((char *)(ipx+1),ubuf,len); - if(sk->debug) - printk("IPX: Transmitting buffer\n"); - if((dev->flags&IFF_LOOPBACK) || self_addressing) { - struct packet_type pt; - - /* loop back */ - pt.type = rt->dlink_type; - sk->wmem_alloc-=skb->mem_len; - skb->sk = NULL; - ipx_rcv(skb,dev,&pt); - } else { - if (broadcast) { - struct packet_type pt; - struct sk_buff *skb2; - - /* loop back */ - pt.type = rt->dlink_type; - - skb2=alloc_skb(skb->len, GFP_ATOMIC); - skb2->mem_addr=skb2; - skb2->free=1; - skb2->arp=1; - skb2->len=skb->len; - skb2->sk = NULL; - skb2->h.raw = skb2->data + rt->datalink->header_length - + dev->hard_header_len; - memcpy(skb2->data, skb->data, skb->len); - ipx_rcv(skb2,dev,&pt); - } - dev_queue_xmit(skb,dev,SOPRI_NORMAL); - } - return len; -} - -static int ipx_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) -{ - return ipx_sendto(sock,ubuf,size,noblock,flags,NULL,0); -} - -static int ipx_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, - unsigned flags, struct sockaddr *sip, int *addr_len) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - struct sockaddr_ipx *sipx=(struct sockaddr_ipx *)sip; - struct ipx_packet *ipx = NULL; - /* FILL ME IN */ - int copied = 0; - struct sk_buff *skb; - int er; - - if(sk->err) - { - er= -sk->err; - sk->err=0; - return er; - } - - if(addr_len) - *addr_len=sizeof(*sipx); - - skb=skb_recv_datagram(sk,flags,noblock,&er); - if(skb==NULL) - return er; - - ipx = (ipx_packet *)(skb->h.raw); - copied=ntohs(ipx->ipx_pktsize) - sizeof(ipx_packet); - skb_copy_datagram(skb,sizeof(struct ipx_packet),ubuf,copied); - - if(sipx) - { - sipx->sipx_family=AF_IPX; - sipx->sipx_port=ipx->ipx_source.sock; - memcpy(sipx->sipx_node,ipx->ipx_source.node,sizeof(sipx->sipx_node)); - sipx->sipx_network=ipx->ipx_source.net; - sipx->sipx_type = ipx->ipx_type; - } - skb_free_datagram(skb); - return(copied); -} - - -static int ipx_write(struct socket *sock, char *ubuf, int size, int noblock) -{ - return ipx_send(sock,ubuf,size,noblock,0); -} - - -static int ipx_recv(struct socket *sock, void *ubuf, int size , int noblock, - unsigned flags) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - if(sk->zapped) - return -ENOTCONN; - return ipx_recvfrom(sock,ubuf,size,noblock,flags,NULL, NULL); -} - -static int ipx_read(struct socket *sock, char *ubuf, int size, int noblock) -{ - return ipx_recv(sock,ubuf,size,noblock,0); -} - - -static int ipx_shutdown(struct socket *sk,int how) -{ - return -EOPNOTSUPP; -} - -static int ipx_select(struct socket *sock , int sel_type, select_table *wait) -{ - ipx_socket *sk=(ipx_socket *)sock->data; - - return datagram_select(sk,sel_type,wait); -} - -static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) -{ - int err; - long amount=0; - ipx_socket *sk=(ipx_socket *)sock->data; - - switch(cmd) - { - case TIOCOUTQ: - err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); - if(err) - return err; - amount=sk->sndbuf-sk->wmem_alloc; - if(amount<0) - amount=0; - put_fs_long(amount,(unsigned long *)arg); - return 0; - case TIOCINQ: - { - struct sk_buff *skb; - /* These two are safe on a single CPU system as only user tasks fiddle here */ - if((skb=skb_peek(&sk->receive_queue))!=NULL) - amount=skb->len; - err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); - put_fs_long(amount,(unsigned long *)arg); - return 0; - } - case SIOCADDRT: - case SIOCDELRT: - if(!suser()) - return -EPERM; - return(ipxrtr_ioctl(cmd,(void *)arg)); - case SIOCGSTAMP: - if (sk) - { - if(sk->stamp.tv_sec==0) - return -ENOENT; - err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); - if(err) - return err; - memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); - return 0; - } - return -EINVAL; - case SIOCGIFCONF: - case SIOCGIFFLAGS: - case SIOCSIFFLAGS: - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - case SIOCGIFMEM: - case SIOCSIFMEM: - case SIOCGIFMTU: - case SIOCSIFMTU: - case SIOCSIFLINK: - case SIOCGIFHWADDR: - case SIOCSIFHWADDR: - case OLD_SIOCGIFHWADDR: - return(dev_ioctl(cmd,(void *) arg)); - - - default: - return -EINVAL; - } - /*NOTREACHED*/ - return(0); -} - -static struct proto_ops ipx_proto_ops = { - AF_IPX, - - ipx_create, - ipx_dup, - ipx_release, - ipx_bind, - ipx_connect, - ipx_socketpair, - ipx_accept, - ipx_getname, - ipx_read, - ipx_write, - ipx_select, - ipx_ioctl, - ipx_listen, - ipx_send, - ipx_recv, - ipx_sendto, - ipx_recvfrom, - ipx_shutdown, - ipx_setsockopt, - ipx_getsockopt, - ipx_fcntl, -}; - -/* Called by ddi.c on kernel start up */ - -static struct packet_type ipx_8023_packet_type = -{ - 0, /* MUTTER ntohs(ETH_P_8023),*/ - 0, /* copy */ - ipx_rcv, - NULL, - NULL, -}; - -static struct packet_type ipx_dix_packet_type = -{ - 0, /* MUTTER ntohs(ETH_P_IPX),*/ - 0, /* copy */ - ipx_rcv, - NULL, - NULL, -}; - - -extern struct datalink_proto *make_EII_client(void); -extern struct datalink_proto *make_8023_client(void); - -void ipx_proto_init(struct net_proto *pro) -{ - unsigned char val = 0xE0; - (void) sock_register(ipx_proto_ops.family, &ipx_proto_ops); - - pEII_datalink = make_EII_client(); - ipx_dix_packet_type.type=htons(ETH_P_IPX); - dev_add_pack(&ipx_dix_packet_type); - - p8023_datalink = make_8023_client(); - ipx_8023_packet_type.type=htons(ETH_P_802_3); - dev_add_pack(&ipx_8023_packet_type); - - if ((p8022_datalink = register_8022_client(val, ipx_rcv)) == NULL) - printk("IPX: Unable to register with 802.2\n"); - - printk("Swansea University Computer Society IPX 0.29 BETA for NET3.017\n"); - -} -#endif diff --git a/net/inet/ipx.h b/net/inet/ipx.h deleted file mode 100644 index 7a4cf6a0e..000000000 --- a/net/inet/ipx.h +++ /dev/null @@ -1,71 +0,0 @@ - -/* - * The following information is in its entirety obtained from: - * - * Novell 'IPX Router Specification' Version 1.10 - * Part No. 107-000029-001 - * - * Which is available from ftp.novell.com - */ - -#ifndef _NET_INET_IPX_H_ -#define _NET_INET_IPX_H_ - -#include <linux/ipx.h> -#include "datalink.h" - -typedef struct -{ - unsigned long net; - unsigned char node[6]; - unsigned short sock; -} ipx_address; - -#define ipx_broadcast_node "\377\377\377\377\377\377" - -typedef struct ipx_packet -{ - unsigned short ipx_checksum; -#define IPX_NO_CHECKSUM 0xFFFF - unsigned short ipx_pktsize; - unsigned char ipx_tctrl; - unsigned char ipx_type; -#define IPX_TYPE_UNKNOWN 0x00 -#define IPX_TYPE_RIP 0x01 /* may also be 0 */ -#define IPX_TYPE_SAP 0x04 /* may also be 0 */ -#define IPX_TYPE_SPX 0x05 /* Not yet implemented */ -#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */ -#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast [Not supported] */ - ipx_address ipx_dest __attribute__ ((packed)); - ipx_address ipx_source __attribute__ ((packed)); -} ipx_packet; - - -typedef struct ipx_route -{ - unsigned long net; - unsigned char router_node[6]; - unsigned long router_net; - unsigned short flags; -#define IPX_RT_ROUTED 1 /* This isn't a direct route. Send via this if to node router_node */ -#define IPX_RT_BLUEBOOK 2 -#define IPX_RT_8022 4 -#define IPX_RT_SNAP 8 - unsigned short dlink_type; - struct device *dev; - struct datalink_proto *datalink; - struct ipx_route *next; - struct ipx_route *nextlocal; -} ipx_route; - - -typedef struct sock ipx_socket; - - -#include "ipxcall.h" -extern int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt); -extern void ipxrtr_device_down(struct device *dev); - - - -#endif diff --git a/net/inet/ipxcall.h b/net/inet/ipxcall.h deleted file mode 100644 index eb5bd2bd2..000000000 --- a/net/inet/ipxcall.h +++ /dev/null @@ -1,2 +0,0 @@ -/* Separate to keep compilation of protocols.c simpler */ -extern void ipx_proto_init(struct net_proto *pro); diff --git a/net/inet/ncp.h b/net/inet/ncp.h deleted file mode 100644 index b12011c98..000000000 --- a/net/inet/ncp.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * - * Kernel support for NCP - * - * Mark Evans 1994 - * - */ - -#ifndef _NCP_H -#define _NCP_H - -#include <linux/ncp.h> - -struct ncp_info -{ - unsigned short conn; /* connection number */ - unsigned char seq; /* sequence number */ - ipx_socket *ncp; /* ncp socket */ - ipx_socket *watchdog; /* watchdog socket */ - ipx_socket *mail; /* mail socket */ -}; - -#define NCP_TIMEOUT (3*HZ) -#define MAX_TIMEOUT 15 - -#endif /* _NCP_H */ diff --git a/net/inet/p8022.h b/net/inet/p8022.h deleted file mode 100644 index 52c676be2..000000000 --- a/net/inet/p8022.h +++ /dev/null @@ -1,2 +0,0 @@ -struct datalink_proto *register_8022_client(unsigned char type, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)); - diff --git a/net/inet/p8022call.h b/net/inet/p8022call.h deleted file mode 100644 index 14f0c2cee..000000000 --- a/net/inet/p8022call.h +++ /dev/null @@ -1,2 +0,0 @@ -/* Separate to keep compilation of Space.c simpler */ -extern void p8022_proto_init(struct net_proto *); diff --git a/net/inet/protocol.h b/net/inet/protocol.h deleted file mode 100644 index 3e0b6fb3c..000000000 --- a/net/inet/protocol.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the protocol dispatcher. - * - * Version: @(#)protocol.h 1.0.2 05/07/93 - * - * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Changes: - * Alan Cox : Added a name field and a frag handler - * field for later. - */ - -#ifndef _PROTOCOL_H -#define _PROTOCOL_H - - -#define MAX_INET_PROTOS 32 /* Must be a power of 2 */ - - -/* This is used to register protocols. */ -struct inet_protocol { - int (*handler)(struct sk_buff *skb, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol); - int (*frag_handler)(struct sk_buff *skb, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol); - void (*err_handler)(int err, unsigned char *buff, - unsigned long daddr, - unsigned long saddr, - struct inet_protocol *protocol); - struct inet_protocol *next; - unsigned char protocol; - unsigned char copy:1; - void *data; - char *name; -}; - - -extern struct inet_protocol *inet_protocol_base; -extern struct inet_protocol *inet_protos[MAX_INET_PROTOS]; - - -extern void inet_add_protocol(struct inet_protocol *prot); -extern int inet_del_protocol(struct inet_protocol *prot); - - -#endif /* _PROTOCOL_H */ diff --git a/net/inet/rarp.h b/net/inet/rarp.h deleted file mode 100644 index 02ee7784f..000000000 --- a/net/inet/rarp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* linux/net/inet/rarp.h */ -#ifndef _RARP_H -#define _RARP_H - -extern int rarp_ioctl(unsigned int cmd, void *arg); -extern int rarp_rcv(struct sk_buff *skb, - struct device *dev, - struct packet_type *pt); -extern int rarp_get_info(char *buffer, - char **start, - off_t offset, - int length); -#endif /* _RARP_H */ - diff --git a/net/inet/raw.h b/net/inet/raw.h deleted file mode 100644 index 80cb4b4bf..000000000 --- a/net/inet/raw.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the RAW-IP module. - * - * Version: @(#)raw.h 1.0.2 05/07/93 - * - * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _RAW_H -#define _RAW_H - - -extern struct proto raw_prot; - - -extern void raw_err(int err, unsigned char *header, unsigned long daddr, - unsigned long saddr, struct inet_protocol *protocol); -extern int raw_rcv(struct sk_buff *skb, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol); -extern int raw_recvfrom(struct sock *sk, unsigned char *to, - int len, int noblock, unsigned flags, - struct sockaddr_in *sin, int *addr_len); -extern int raw_read(struct sock *sk, unsigned char *buff, - int len, int noblock, unsigned flags); - -#endif /* _RAW_H */ diff --git a/net/inet/route.h b/net/inet/route.h deleted file mode 100644 index a693ffb41..000000000 --- a/net/inet/route.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the IP router. - * - * Version: @(#)route.h 1.0.4 05/27/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * Fixes: - * Alan Cox : Reformatted. Added ip_rt_local() - * Alan Cox : Support for TCP parameters. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _ROUTE_H -#define _ROUTE_H - - -#include <linux/route.h> - - -/* This is an entry in the IP routing table. */ -struct rtable -{ - struct rtable *rt_next; - unsigned long rt_dst; - unsigned long rt_mask; - unsigned long rt_gateway; - unsigned char rt_flags; - unsigned char rt_metric; - short rt_refcnt; - unsigned long rt_use; - unsigned short rt_mss; - unsigned long rt_window; - struct device *rt_dev; -}; - - -extern void ip_rt_flush(struct device *dev); -extern void ip_rt_add(short flags, unsigned long addr, unsigned long mask, - unsigned long gw, struct device *dev, unsigned short mss, unsigned long window); -extern struct rtable *ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr); -extern struct rtable *ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr); -extern int rt_get_info(char * buffer, char **start, off_t offset, int length); -extern int ip_rt_ioctl(unsigned int cmd, void *arg); - -#endif /* _ROUTE_H */ diff --git a/net/inet/snmp.h b/net/inet/snmp.h deleted file mode 100644 index 552292be6..000000000 --- a/net/inet/snmp.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * - * SNMP MIB entries for the IP subsystem. - * - * Alan Cox <gw4pts@gw4pts.ampr.org> - * - * We don't chose to implement SNMP in the kernel (this would - * be silly as SNMP is a pain in the backside in places). We do - * however need to collect the MIB statistics and export them - * out of /proc (eventually) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#ifndef _SNMP_H -#define _SNMP_H - -/* - * We use all unsigned longs. Linux will soon be so reliable that even these - * will rapidly get too small 8-). Seriously consider the IpInReceives count - * on the 20Gb/s + networks people expect in a few years time! - */ - -struct ip_mib -{ - unsigned long IpForwarding; - unsigned long IpDefaultTTL; - unsigned long IpInReceives; - unsigned long IpInHdrErrors; - unsigned long IpInAddrErrors; - unsigned long IpForwDatagrams; - unsigned long IpInUnknownProtos; - unsigned long IpInDiscards; - unsigned long IpInDelivers; - unsigned long IpOutRequests; - unsigned long IpOutDiscards; - unsigned long IpOutNoRoutes; - unsigned long IpReasmTimeout; - unsigned long IpReasmReqds; - unsigned long IpReasmOKs; - unsigned long IpReasmFails; - unsigned long IpFragOKs; - unsigned long IpFragFails; - unsigned long IpFragCreates; -}; - - -struct icmp_mib -{ - unsigned long IcmpInMsgs; - unsigned long IcmpInErrors; - unsigned long IcmpInDestUnreachs; - unsigned long IcmpInTimeExcds; - unsigned long IcmpInParmProbs; - unsigned long IcmpInSrcQuenchs; - unsigned long IcmpInRedirects; - unsigned long IcmpInEchos; - unsigned long IcmpInEchoReps; - unsigned long IcmpInTimestamps; - unsigned long IcmpInTimestampReps; - unsigned long IcmpInAddrMasks; - unsigned long IcmpInAddrMaskReps; - unsigned long IcmpOutMsgs; - unsigned long IcmpOutErrors; - unsigned long IcmpOutDestUnreachs; - unsigned long IcmpOutTimeExcds; - unsigned long IcmpOutParmProbs; - unsigned long IcmpOutSrcQuenchs; - unsigned long IcmpOutRedirects; - unsigned long IcmpOutEchos; - unsigned long IcmpOutEchoReps; - unsigned long IcmpOutTimestamps; - unsigned long IcmpOutTimestampReps; - unsigned long IcmpOutAddrMasks; - unsigned long IcmpOutAddrMaskReps; -}; - -struct tcp_mib -{ - unsigned long TcpRtoAlgorithm; - unsigned long TcpRtoMin; - unsigned long TcpRtoMax; - unsigned long TcpMaxConn; - unsigned long TcpActiveOpens; - unsigned long TcpPassiveOpens; - unsigned long TcpAttemptFails; - unsigned long TcpEstabResets; - unsigned long TcpCurrEstab; - unsigned long TcpInSegs; - unsigned long TcpOutSegs; - unsigned long TcpRetransSegs; -}; - -struct udp_mib -{ - unsigned long UdpInDatagrams; - unsigned long UdpNoPorts; - unsigned long UdpInErrors; - unsigned long UdpOutDatagrams; -}; - - -#endif diff --git a/net/inet/sock.h b/net/inet/sock.h deleted file mode 100644 index 07b036fd5..000000000 --- a/net/inet/sock.h +++ /dev/null @@ -1,287 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the AF_INET socket handler. - * - * Version: @(#)sock.h 1.0.4 05/13/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * Corey Minyard <wf-rch!minyard@relay.EU.net> - * Florian La Roche <flla@stud.uni-sb.de> - * - * Fixes: - * Alan Cox : Volatiles in skbuff pointers. See - * skbuff comments. May be overdone, - * better to prove they can be removed - * than the reverse. - * Alan Cox : Added a zapped field for tcp to note - * a socket is reset and must stay shut up - * Alan Cox : New fields for options - * Pauline Middelink : identd support - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _SOCK_H -#define _SOCK_H - -#include <linux/timer.h> -#include <linux/ip.h> /* struct options */ -#include <linux/tcp.h> /* struct tcphdr */ - -#include <linux/skbuff.h> /* struct sk_buff */ -#include "protocol.h" /* struct inet_protocol */ -#ifdef CONFIG_AX25 -#include "ax25.h" -#endif -#ifdef CONFIG_IPX -#include "ipx.h" -#endif - -#define SOCK_ARRAY_SIZE 64 - - -/* - * This structure really needs to be cleaned up. - * Most of it is for TCP, and not used by any of - * the other protocols. - */ -struct sock { - struct options *opt; - volatile unsigned long wmem_alloc; - volatile unsigned long rmem_alloc; - unsigned long write_seq; - unsigned long sent_seq; - unsigned long acked_seq; - unsigned long copied_seq; - unsigned long rcv_ack_seq; - unsigned long window_seq; - unsigned long fin_seq; - unsigned long urg_seq; - unsigned long urg_data; - - /* - * Not all are volatile, but some are, so we - * might as well say they all are. - */ - volatile char inuse, - dead, - urginline, - intr, - blog, - done, - reuse, - keepopen, - linger, - delay_acks, - destroy, - ack_timed, - no_check, - zapped, /* In ax25 & ipx means not linked */ - broadcast, - nonagle; - unsigned long lingertime; - int proc; - struct sock *next; - struct sock *pair; - struct sk_buff * volatile send_head; - struct sk_buff * volatile send_tail; - struct sk_buff_head back_log; - struct sk_buff *partial; - struct timer_list partial_timer; - long retransmits; - struct sk_buff_head write_queue, - receive_queue; - struct proto *prot; - struct wait_queue **sleep; - unsigned long daddr; - unsigned long saddr; - unsigned short max_unacked; - unsigned short window; - unsigned short bytes_rcv; -/* mss is min(mtu, max_window) */ - unsigned short mtu; /* mss negotiated in the syn's */ - volatile unsigned short mss; /* current eff. mss - can change */ - volatile unsigned short user_mss; /* mss requested by user in ioctl */ - volatile unsigned short max_window; - unsigned long window_clamp; - unsigned short num; - volatile unsigned short cong_window; - volatile unsigned short cong_count; - volatile unsigned short ssthresh; - volatile unsigned short packets_out; - volatile unsigned short shutdown; - volatile unsigned long rtt; - volatile unsigned long mdev; - volatile unsigned long rto; -/* currently backoff isn't used, but I'm maintaining it in case - * we want to go back to a backoff formula that needs it - */ - volatile unsigned short backoff; - volatile short err; - unsigned char protocol; - volatile unsigned char state; - volatile unsigned char ack_backlog; - unsigned char max_ack_backlog; - unsigned char priority; - unsigned char debug; - unsigned short rcvbuf; - unsigned short sndbuf; - unsigned short type; - unsigned char localroute; /* Route locally only */ -#ifdef CONFIG_IPX - ipx_address ipx_source_addr,ipx_dest_addr; - unsigned short ipx_type; -#endif -#ifdef CONFIG_AX25 -/* Really we want to add a per protocol private area */ - ax25_address ax25_source_addr,ax25_dest_addr; - struct sk_buff *volatile ax25_retxq[8]; - char ax25_state,ax25_vs,ax25_vr,ax25_lastrxnr,ax25_lasttxnr; - char ax25_condition; - char ax25_retxcnt; - char ax25_xx; - char ax25_retxqi; - char ax25_rrtimer; - char ax25_timer; - unsigned char ax25_n2; - unsigned short ax25_t1,ax25_t2,ax25_t3; - ax25_digi *ax25_digipeat; -#endif -/* IP 'private area' or will be eventually */ - int ip_ttl; /* TTL setting */ - int ip_tos; /* TOS */ - struct tcphdr dummy_th; - - /* This part is used for the timeout functions (timer.c). */ - int timeout; /* What are we waiting for? */ - struct timer_list timer; - struct timeval stamp; - - /* identd */ - struct socket *socket; - - /* Callbacks */ - void (*state_change)(struct sock *sk); - void (*data_ready)(struct sock *sk,int bytes); - void (*write_space)(struct sock *sk); - void (*error_report)(struct sock *sk); - -}; - -struct proto { - struct sk_buff * (*wmalloc)(struct sock *sk, - unsigned long size, int force, - int priority); - struct sk_buff * (*rmalloc)(struct sock *sk, - unsigned long size, int force, - int priority); - void (*wfree)(struct sock *sk, struct sk_buff *skb, - unsigned long size); - void (*rfree)(struct sock *sk, struct sk_buff *skb, - unsigned long size); - unsigned long (*rspace)(struct sock *sk); - unsigned long (*wspace)(struct sock *sk); - void (*close)(struct sock *sk, int timeout); - int (*read)(struct sock *sk, unsigned char *to, - int len, int nonblock, unsigned flags); - int (*write)(struct sock *sk, unsigned char *to, - int len, int nonblock, unsigned flags); - int (*sendto)(struct sock *sk, - unsigned char *from, int len, int noblock, - unsigned flags, struct sockaddr_in *usin, - int addr_len); - int (*recvfrom)(struct sock *sk, - unsigned char *from, int len, int noblock, - unsigned flags, struct sockaddr_in *usin, - int *addr_len); - int (*build_header)(struct sk_buff *skb, - unsigned long saddr, - unsigned long daddr, - struct device **dev, int type, - struct options *opt, int len, int tos, int ttl); - int (*connect)(struct sock *sk, - struct sockaddr_in *usin, int addr_len); - struct sock * (*accept) (struct sock *sk, int flags); - void (*queue_xmit)(struct sock *sk, - struct device *dev, struct sk_buff *skb, - int free); - void (*retransmit)(struct sock *sk, int all); - void (*write_wakeup)(struct sock *sk); - void (*read_wakeup)(struct sock *sk); - int (*rcv)(struct sk_buff *buff, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol); - int (*select)(struct sock *sk, int which, - select_table *wait); - int (*ioctl)(struct sock *sk, int cmd, - unsigned long arg); - int (*init)(struct sock *sk); - void (*shutdown)(struct sock *sk, int how); - int (*setsockopt)(struct sock *sk, int level, int optname, - char *optval, int optlen); - int (*getsockopt)(struct sock *sk, int level, int optname, - char *optval, int *option); - unsigned short max_header; - unsigned long retransmits; - struct sock * sock_array[SOCK_ARRAY_SIZE]; - char name[80]; -}; - -#define TIME_WRITE 1 -#define TIME_CLOSE 2 -#define TIME_KEEPOPEN 3 -#define TIME_DESTROY 4 -#define TIME_DONE 5 /* used to absorb those last few packets */ -#define TIME_PROBE0 6 -#define SOCK_DESTROY_TIME 1000 /* about 10 seconds */ - -#define PROT_SOCK 1024 /* Sockets 0-1023 can't be bound too unless you are superuser */ - -#define SHUTDOWN_MASK 3 -#define RCV_SHUTDOWN 1 -#define SEND_SHUTDOWN 2 - - -extern void destroy_sock(struct sock *sk); -extern unsigned short get_new_socknum(struct proto *, unsigned short); -extern void put_sock(unsigned short, struct sock *); -extern void release_sock(struct sock *sk); -extern struct sock *get_sock(struct proto *, unsigned short, - unsigned long, unsigned short, - unsigned long); -extern void print_sk(struct sock *); -extern struct sk_buff *sock_wmalloc(struct sock *sk, - unsigned long size, int force, - int priority); -extern struct sk_buff *sock_rmalloc(struct sock *sk, - unsigned long size, int force, - int priority); -extern void sock_wfree(struct sock *sk, struct sk_buff *skb, - unsigned long size); -extern void sock_rfree(struct sock *sk, struct sk_buff *skb, - unsigned long size); -extern unsigned long sock_rspace(struct sock *sk); -extern unsigned long sock_wspace(struct sock *sk); - -extern int sock_setsockopt(struct sock *sk,int level,int op,char *optval,int optlen); - -extern int sock_getsockopt(struct sock *sk,int level,int op,char *optval,int *optlen); -extern struct sk_buff *sock_alloc_send_skb(struct sock *skb, unsigned long size, int noblock, int *errcode); -extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -/* declarations from timer.c */ -extern struct sock *timer_base; - -void delete_timer (struct sock *); -void reset_timer (struct sock *, int, unsigned long); -void net_timer (unsigned long); - - -#endif /* _SOCK_H */ diff --git a/net/inet/tcp.h b/net/inet/tcp.h deleted file mode 100644 index 2dcb22c82..000000000 --- a/net/inet/tcp.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the TCP module. - * - * Version: @(#)tcp.h 1.0.5 05/23/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _TCP_H -#define _TCP_H - -#include <linux/tcp.h> - -#define MAX_SYN_SIZE 44 + MAX_HEADER -#define MAX_FIN_SIZE 40 + MAX_HEADER -#define MAX_ACK_SIZE 40 + MAX_HEADER -#define MAX_RESET_SIZE 40 + MAX_HEADER -#define MAX_WINDOW 8192 -#define MIN_WINDOW 2048 -#define MAX_ACK_BACKLOG 2 -#define MIN_WRITE_SPACE 2048 -#define TCP_WINDOW_DIFF 2048 - -/* urg_data states */ -#define URG_VALID 0x0100 -#define URG_NOTYET 0x0200 -#define URG_READ 0x0400 - -#define TCP_RETR1 7 /* - * This is how many retries it does before it - * tries to figure out if the gateway is - * down. - */ - -#define TCP_RETR2 15 /* - * This should take at least - * 90 minutes to time out. - */ - -#define TCP_TIMEOUT_LEN (15*60*HZ) /* should be about 15 mins */ -#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to successfully - * close the socket, about 60 seconds */ -#define TCP_ACK_TIME (3*HZ) /* time to delay before sending an ACK */ -#define TCP_DONE_TIME 250 /* maximum time to wait before actually - * destroying a socket */ -#define TCP_WRITE_TIME 3000 /* initial time to wait for an ACK, - * after last transmit */ -#define TCP_TIMEOUT_INIT (3*HZ) /* RFC 1122 initial timeout value */ -#define TCP_SYN_RETRIES 5 /* number of times to retry opening a - * connection */ -#define TCP_PROBEWAIT_LEN 100 /* time to wait between probes when - * I've got something to write and - * there is no window */ - -#define TCP_NO_CHECK 0 /* turn to one if you want the default - * to be no checksum */ - - -/* - * TCP option - */ - -#define TCPOPT_NOP 1 -#define TCPOPT_EOL 0 -#define TCPOPT_MSS 2 - -/* - * The next routines deal with comparing 32 bit unsigned ints - * and worry about wraparound (automatic with unsigned arithmetic). - */ -static inline int before(unsigned long seq1, unsigned long seq2) -{ - return (long)(seq1-seq2) < 0; -} - -static inline int after(unsigned long seq1, unsigned long seq2) -{ - return (long)(seq1-seq2) > 0; -} - - -/* is s2<=s1<=s3 ? */ -static inline int between(unsigned long seq1, unsigned long seq2, unsigned long seq3) -{ - return (after(seq1+1, seq2) && before(seq1, seq3+1)); -} - - -/* - * List all states of a TCP socket that can be viewed as a "connected" - * state. This now includes TCP_SYN_RECV, although I am not yet fully - * convinced that this is the solution for the 'getpeername(2)' - * problem. Thanks to Stephen A. Wood <saw@cebaf.gov> -FvK - */ -static inline const int -tcp_connected(const int state) -{ - return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || - state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || - state == TCP_SYN_RECV); -} - - -extern struct proto tcp_prot; - - -extern void tcp_err(int err, unsigned char *header, unsigned long daddr, - unsigned long saddr, struct inet_protocol *protocol); -extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_rcv(struct sk_buff *skb, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, int redo, - struct inet_protocol *protocol); - -extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); - -extern int tcp_select_window(struct sock *sk); -extern void tcp_send_check(struct tcphdr *th, unsigned long saddr, - unsigned long daddr, int len, struct sock *sk); -extern void tcp_send_probe0(struct sock *sk); -extern void tcp_enqueue_partial(struct sk_buff *, struct sock *); -extern struct sk_buff * tcp_dequeue_partial(struct sock *); - - -#endif /* _TCP_H */ diff --git a/net/inet/udp.h b/net/inet/udp.h deleted file mode 100644 index 6bfbb3cb7..000000000 --- a/net/inet/udp.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the UDP module. - * - * Version: @(#)udp.h 1.0.2 05/07/93 - * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * Fixes: - * Alan Cox : Turned on udp checksums. I don't want to - * chase 'memory corruption' bugs that aren't! - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _UDP_H -#define _UDP_H - -#include <linux/udp.h> - - -#define UDP_NO_CHECK 0 - - -extern struct proto udp_prot; - - -extern void udp_err(int err, unsigned char *header, unsigned long daddr, - unsigned long saddr, struct inet_protocol *protocol); -extern int udp_recvfrom(struct sock *sk, unsigned char *to, - int len, int noblock, unsigned flags, - struct sockaddr_in *sin, int *addr_len); -extern int udp_read(struct sock *sk, unsigned char *buff, - int len, int noblock, unsigned flags); -extern int udp_connect(struct sock *sk, - struct sockaddr_in *usin, int addr_len); -extern int udp_rcv(struct sk_buff *skb, struct device *dev, - struct options *opt, unsigned long daddr, - unsigned short len, unsigned long saddr, int redo, - struct inet_protocol *protocol); -extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); - - -#endif /* _UDP_H */ diff --git a/net/inet/Makefile b/net/ipv4/Makefile index 95af29230..296c4d114 100644 --- a/net/inet/Makefile +++ b/net/ipv4/Makefile @@ -15,14 +15,9 @@ $(CC) $(CFLAGS) -S $< -OBJS := sock.o eth.o dev.o skbuff.o datagram.o - -ifdef CONFIG_INET - -OBJS := $(OBJS) utils.o route.o proc.o timer.o protocol.o packet.o \ - arp.o ip.o raw.o icmp.o tcp.o udp.o devinet.o af_inet.o - -endif +OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \ + arp.o ip.o raw.o icmp.o tcp.o udp.o devinet.o af_inet.o \ + igmp.o ip_fw.o checksum.o ipip.o ifdef CONFIG_INET_RARP @@ -30,27 +25,15 @@ OBJS := $(OBJS) rarp.o endif -ifdef CONFIG_AX25 - -OBJS := $(OBJS) ax25.o ax25_in.o ax25_out.o ax25_route.o ax25_subr.o ax25_timer.o - -endif - -ifdef CONFIG_IPX - -OBJS := $(OBJS) ipx.o pe2.o p8022.o p8023.o - -endif - -ifdef CONFIG_NET +ifdef CONFIG_INET -inet.o: $(OBJS) - $(LD) -r -o inet.o $(OBJS) +ipv4.o: $(OBJS) + $(LD) -r -o ipv4.o $(OBJS) else -inet.o: - echo | $(AS) -o inet.o +ipv4.o: + $(AR) rcs ipv4.o endif diff --git a/net/ipv4/README.TCP b/net/ipv4/README.TCP new file mode 100644 index 000000000..f18963f88 --- /dev/null +++ b/net/ipv4/README.TCP @@ -0,0 +1,39 @@ +How the new TCP output machine [nyi] works. + + +Data is kept on a single queue. The skb->users flag tells us if the frame is +one that has been queued already. To add a frame we throw it on the end. Ack +walks down the list from the start. + +We keep a set of control flags + + + sk->tcp_pend_event + + TCP_PEND_ACK Ack needed + TCP_ACK_NOW Needed now + TCP_WINDOW Window update check + TCP_WINZERO Zero probing + + + sk->transmit_queue The transmission frame begin + sk->transmit_new First new frame pointer + sk->transmit_end Where to add frames + + sk->tcp_last_tx_ack Last ack seen + sk->tcp_dup_ack Dup ack count for fast retransmit + + +Frames are queued for output by tcp_write. We do our best to send the frames +off immediately if possible, but otherwise queue and compute the body +checksum in the copy. + +When a write is done we try to clear any pending events and piggy back them. +If the window is full we queue full sized frames. On the firs timeout in +zero window we split this. + +On a timer we walk the retransmit list to send any retransmits, update the +backoff timers etc. A change of route table stamp causes a change of header +and recompute. We add any new tcp level headers and refinish the checksum +before sending. + diff --git a/net/inet/af_inet.c b/net/ipv4/af_inet.c index 8e7739611..fbfc44bb2 100644 --- a/net/inet/af_inet.c +++ b/net/ipv4/af_inet.c @@ -21,6 +21,21 @@ * Alan Cox : Asynchronous I/O support * Alan Cox : Keep correct socket pointer on sock structures * when accept() ed + * Alan Cox : Semantics of SO_LINGER aren't state moved + * to close when you look carefully. With + * this fixed and the accept bug fixed + * some RPC stuff seems happier. + * Niibe Yutaka : 4.4BSD style write async I/O + * Alan Cox, + * Tony Gale : Fixed reuse semantics. + * Alan Cox : bind() shouldn't abort existing but dead + * sockets. Stops FTP netin:.. I hope. + * Alan Cox : bind() works correctly for RAW sockets. Note + * that FreeBSD at least is broken in this respect + * so be careful with compatibility tests... + * Alan Cox : routing cache support + * Alan Cox : memzero the socket structure for compactness. + * Matt Day : nonblock connect error handler * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -49,17 +64,18 @@ #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "arp.h" -#include "rarp.h" -#include "route.h" -#include "tcp.h" -#include "udp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/arp.h> +#include <net/rarp.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "raw.h" -#include "icmp.h" +#include <net/sock.h> +#include <net/raw.h> +#include <net/icmp.h> +#include <linux/ip_fw.h> #define min(a,b) ((a)<(b)?(a):(b)) @@ -149,20 +165,30 @@ void put_sock(unsigned short num, struct sock *sk) struct sock *sk1; struct sock *sk2; int mask; + unsigned long flags; + + if(sk->type==SOCK_PACKET) + return; sk->num = num; sk->next = NULL; num = num &(SOCK_ARRAY_SIZE -1); /* We can't have an interrupt re-enter here. */ + save_flags(flags); cli(); + + sk->prot->inuse += 1; + if (sk->prot->highestinuse < sk->prot->inuse) + sk->prot->highestinuse = sk->prot->inuse; + if (sk->prot->sock_array[num] == NULL) { sk->prot->sock_array[num] = sk; - sti(); + restore_flags(flags); return; } - sti(); + restore_flags(flags); for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) { if ((mask & sk->saddr) && @@ -206,20 +232,26 @@ void put_sock(unsigned short num, struct sock *sk) static void remove_sock(struct sock *sk1) { struct sock *sk2; + unsigned long flags; + if (sk1->type==SOCK_PACKET) + return; + if (!sk1->prot) { - printk("sock.c: remove_sock: sk1->prot == NULL\n"); + NETDEBUG(printk("sock.c: remove_sock: sk1->prot == NULL\n")); return; } /* We can't have this changing out from under us. */ + save_flags(flags); cli(); sk2 = sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)]; if (sk2 == sk1) { + sk1->prot->inuse -= 1; sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)] = sk1->next; - sti(); + restore_flags(flags); return; } @@ -230,11 +262,12 @@ static void remove_sock(struct sock *sk1) if (sk2) { + sk1->prot->inuse -= 1; sk2->next = sk1->next; - sti(); + restore_flags(flags); return; } - sti(); + restore_flags(flags); } /* @@ -247,7 +280,7 @@ void destroy_sock(struct sock *sk) sk->inuse = 1; /* just to be safe. */ - /* Incase it's sleeping somewhere. */ + /* In case it's sleeping somewhere. */ if (!sk->dead) sk->write_space(sk); @@ -255,7 +288,9 @@ void destroy_sock(struct sock *sk) /* Now we can no longer get new packets. */ delete_timer(sk); - + /* Nor send them */ + del_timer(&sk->retransmit_timer); + while ((skb = tcp_dequeue_partial(sk)) != NULL) { IS_SKB(skb); kfree_skb(skb, FREE_WRITE); @@ -426,6 +461,8 @@ static int inet_autobind(struct sock *sk) sk->num = get_new_socknum(sk->prot, 0); if (sk->num == 0) return(-EAGAIN); + udp_cache_zap(); + tcp_cache_zap(); put_sock(sk->num, sk); sk->dummy_th.source = ntohs(sk->num); } @@ -444,8 +481,15 @@ static int inet_listen(struct socket *sock, int backlog) return -EAGAIN; /* We might as well re use these. */ + /* + * note that the backlog is "unsigned char", so truncate it + * somewhere. We might as well truncate it to what everybody + * else does.. + */ + if (backlog > 5) + backlog = 5; sk->max_ack_backlog = backlog; - if (sk->state != TCP_LISTEN) + if (sk->state != TCP_LISTEN) { sk->ack_backlog = 0; sk->state = TCP_LISTEN; @@ -469,10 +513,18 @@ static void def_callback2(struct sock *sk,int len) if(!sk->dead) { wake_up_interruptible(sk->sleep); - sock_wake_async(sk->socket); + sock_wake_async(sk->socket, 1); } } +static void def_callback3(struct sock *sk) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 2); + } +} /* * Create an inet socket. @@ -490,8 +542,9 @@ static int inet_create(struct socket *sock, int protocol) sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL); if (sk == NULL) return(-ENOBUFS); - sk->num = 0; - sk->reuse = 0; + memset(sk,0,sizeof(*sk)); /* Efficient way to set most fields to zero */ +/* sk->num = 0; + * sk->reuse = 0;*/ switch(sock->type) { case SOCK_STREAM: @@ -530,10 +583,6 @@ static int inet_create(struct socket *sock, int protocol) } prot = &raw_prot; sk->reuse = 1; - sk->no_check = 0; /* - * Doesn't matter no checksum is - * performed anyway. - */ sk->num = protocol; break; @@ -550,9 +599,6 @@ static int inet_create(struct socket *sock, int protocol) } prot = &packet_prot; sk->reuse = 1; - sk->no_check = 0; /* Doesn't matter no checksum is - * performed anyway. - */ sk->num = protocol; break; @@ -564,15 +610,20 @@ static int inet_create(struct socket *sock, int protocol) #ifdef CONFIG_TCP_NAGLE_OFF sk->nonagle = 1; #else - sk->nonagle = 0; +/* sk->nonagle = 0;*/ #endif sk->type = sock->type; - sk->stamp.tv_sec=0; sk->protocol = protocol; - sk->wmem_alloc = 0; - sk->rmem_alloc = 0; sk->sndbuf = SK_WMEM_MAX; sk->rcvbuf = SK_RMEM_MAX; + sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + sk->cong_window = 1; /* start with only sending one packet at a time. */ + sk->priority = 1; + sk->state = TCP_CLOSE; +#ifdef WHAT_WE_DO_THE_MEMZERO_INSTEAD_OF + sk->stamp.tv_sec=0; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; sk->pair = NULL; sk->opt = NULL; sk->write_seq = 0; @@ -583,11 +634,9 @@ static int inet_create(struct socket *sock, int protocol) sk->urg_data = 0; sk->proc = 0; sk->rtt = 0; /*TCP_WRITE_TIME << 3;*/ - sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ sk->mdev = 0; sk->backoff = 0; sk->packets_out = 0; - sk->cong_window = 1; /* start with only sending one packet at a time. */ sk->cong_count = 0; sk->ssthresh = 0; sk->max_window = 0; @@ -595,7 +644,6 @@ static int inet_create(struct socket *sock, int protocol) sk->intr = 0; sk->linger = 0; sk->destroy = 0; - sk->priority = 1; sk->shutdown = 0; sk->keepopen = 0; sk->zapped = 0; @@ -603,26 +651,16 @@ static int inet_create(struct socket *sock, int protocol) sk->ack_backlog = 0; sk->window = 0; sk->bytes_rcv = 0; - sk->state = TCP_CLOSE; sk->dead = 0; sk->ack_timed = 0; sk->partial = NULL; sk->user_mss = 0; sk->debug = 0; - - /* this is how many unacked bytes we will accept for this socket. */ - sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ - /* how many packets we should send before forcing an ack. if this is set to zero it is the same as sk->delay_acks = 0 */ sk->max_ack_backlog = 0; sk->inuse = 0; sk->delay_acks = 0; - skb_queue_head_init(&sk->write_queue); - skb_queue_head_init(&sk->receive_queue); - sk->mtu = 576; - sk->prot = prot; - sk->sleep = sock->wait; sk->daddr = 0; sk->saddr = 0 /* ip_my_addr() */; sk->err = 0; @@ -633,13 +671,7 @@ static int inet_create(struct socket *sock, int protocol) sk->timeout = 0; sk->broadcast = 0; sk->localroute = 0; - init_timer(&sk->timer); - sk->timer.data = (unsigned long)sk; - sk->timer.function = &net_timer; - skb_queue_head_init(&sk->back_log); sk->blog = 0; - sock->data =(void *) sk; - sk->dummy_th.doff = sizeof(sk->dummy_th)/4; sk->dummy_th.res1=0; sk->dummy_th.res2=0; sk->dummy_th.urg_ptr = 0; @@ -651,11 +683,41 @@ static int inet_create(struct socket *sock, int protocol) sk->dummy_th.urg = 0; sk->dummy_th.dest = 0; sk->ip_tos=0; + sk->ip_route_cache=NULL; + sk->ip_hcache_ver= 0; + sk->ip_option_len=0; + sk->ip_option_flen=0; + sk->ip_opt_next_hop=0; + sk->ip_opt_ptr[0]=NULL; + sk->ip_opt_ptr[1]=NULL; +#endif + + /* this is how many unacked bytes we will accept for this socket. */ + sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ + + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->receive_queue); + sk->mtu = 576; + sk->prot = prot; + sk->sleep = sock->wait; + init_timer(&sk->timer); + init_timer(&sk->retransmit_timer); + sk->timer.data = (unsigned long)sk; + sk->timer.function = &net_timer; + skb_queue_head_init(&sk->back_log); + sock->data =(void *) sk; + sk->dummy_th.doff = sizeof(sk->dummy_th)/4; sk->ip_ttl=64; +#ifdef CONFIG_IP_MULTICAST + sk->ip_mc_loop=1; + sk->ip_mc_ttl=1; + *sk->ip_mc_name=0; + sk->ip_mc_list=NULL; +#endif sk->state_change = def_callback1; sk->data_ready = def_callback2; - sk->write_space = def_callback1; + sk->write_space = def_callback3; sk->error_report = def_callback1; if (sk->num) @@ -692,6 +754,20 @@ static int inet_dup(struct socket *newsock, struct socket *oldsock) return(inet_create(newsock,((struct sock *)(oldsock->data))->protocol)); } +/* + * Return 1 if we still have things to send in our buffers. + */ +static inline int closing(struct sock * sk) +{ + switch (sk->state) { + case TCP_FIN_WAIT1: + case TCP_CLOSING: + case TCP_LAST_ACK: + return 1; + } + return 0; +} + /* * The peer socket should always be NULL (or else). When we call this @@ -709,13 +785,20 @@ static int inet_release(struct socket *sock, struct socket *peer) /* Start closing the connection. This may take a while. */ +#ifdef CONFIG_IP_MULTICAST + /* Applications forget to leave groups before exiting */ + ip_mc_drop_socket(sk); +#endif /* * If linger is set, we don't return until the close - * is complete. Other wise we return immediately. The + * is complete. Otherwise we return immediately. The * actually closing is done the same either way. + * + * If the close is due to the process exiting, we never + * linger.. */ - if (sk->linger == 0) + if (sk->linger == 0 || (current->flags & PF_EXITING)) { sk->prot->close(sk,0); sk->dead = 1; @@ -726,7 +809,7 @@ static int inet_release(struct socket *sock, struct socket *peer) cli(); if (sk->lingertime) current->timeout = jiffies + HZ*sk->lingertime; - while(sk->state != TCP_CLOSE && current->timeout>0) + while(closing(sk) && current->timeout>0) { interruptible_sleep_on(sk->sleep); if (current->signal & ~current->blocked) @@ -747,91 +830,94 @@ static int inet_release(struct socket *sock, struct socket *peer) sk->inuse = 1; /* This will destroy it. */ - release_sock(sk); sock->data = NULL; + release_sock(sk); sk->socket = NULL; return(0); } -/* this needs to be changed to disallow - the rebinding of sockets. What error - should it return? */ - static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; struct sock *sk=(struct sock *)sock->data, *sk2; - unsigned short snum; + unsigned short snum = 0 /* Stoopid compiler.. this IS ok */; int chk_addr_ret; /* check this error. */ if (sk->state != TCP_CLOSE) return(-EIO); - if (sk->num != 0) - return(-EINVAL); - if(addr_len<sizeof(struct sockaddr_in)) return -EINVAL; - - snum = ntohs(addr->sin_port); - - /* - * We can't just leave the socket bound wherever it is, it might - * be bound to a privileged port. However, since there seems to - * be a bug here, we will leave it if the port is not privileged. - */ - if (snum == 0) + + if(sock->type != SOCK_RAW) { - snum = get_new_socknum(sk->prot, 0); - } - if (snum < PROT_SOCK && !suser()) - return(-EACCES); + if (sk->num != 0) + return(-EINVAL); + snum = ntohs(addr->sin_port); + +#ifdef CONFIG_IP_MASQUERADE + /* + * The kernel masquerader needs some ports + */ + if(snum>=PORT_MASQ_BEGIN && snum<=PORT_MASQ_END) + return -EADDRINUSE; +#endif + + if (snum == 0) + snum = get_new_socknum(sk->prot, 0); + if (snum < PROT_SOCK && !suser()) + return(-EACCES); + } + chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR) + if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST) return(-EADDRNOTAVAIL); /* Source address MUST be ours! */ - + if (chk_addr_ret || addr->sin_addr.s_addr == 0) sk->saddr = addr->sin_addr.s_addr; - - /* Make sure we are allowed to bind here. */ - cli(); -outside_loop: - for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)]; - sk2 != NULL; sk2 = sk2->next) + + if(sock->type != SOCK_RAW) { -/* should be below! */ - if (sk2->num != snum) continue; - if (sk2->dead) - { - destroy_sock(sk2); - goto outside_loop; - } - if (!sk->reuse) - { - sti(); - return(-EADDRINUSE); - } - - if (sk2->num != snum) - continue; /* more than one */ - if (sk2->saddr != sk->saddr) - continue; /* socket per slot ! -FB */ - if (!sk2->reuse) + /* Make sure we are allowed to bind here. */ + cli(); + for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)]; + sk2 != NULL; sk2 = sk2->next) { - sti(); - return(-EADDRINUSE); + /* should be below! */ + if (sk2->num != snum) + continue; + if (!sk->reuse) + { + sti(); + return(-EADDRINUSE); + } + + if (sk2->num != snum) + continue; /* more than one */ + if (sk2->saddr != sk->saddr) + continue; /* socket per slot ! -FB */ + if (!sk2->reuse || sk2->state==TCP_LISTEN) + { + sti(); + return(-EADDRINUSE); + } } - } - sti(); + sti(); - remove_sock(sk); - put_sock(snum, sk); - sk->dummy_th.source = ntohs(sk->num); - sk->daddr = 0; - sk->dummy_th.dest = 0; + remove_sock(sk); + if(sock->type==SOCK_DGRAM) + udp_cache_zap(); + if(sock->type==SOCK_STREAM) + tcp_cache_zap(); + put_sock(snum, sk); + sk->dummy_th.source = ntohs(sk->num); + sk->daddr = 0; + sk->dummy_th.dest = 0; + } + sk->ip_route_cache=NULL; return(0); } @@ -847,7 +933,7 @@ static int inet_error(struct sock *sk) cli(); err=sk->err; sk->err=0; - sti(); + restore_flags(flags); return -err; } @@ -871,8 +957,15 @@ static int inet_connect(struct socket *sock, struct sockaddr * uaddr, } if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) + { + if(sk->err!=0) + { + err=sk->err; + sk->err=0; + return -err; + } return -EALREADY; /* Connecting is currently in progress */ - + } if (sock->state != SS_CONNECTING) { /* We may need to bind the socket. */ @@ -940,7 +1033,7 @@ static int inet_socketpair(struct socket *sock1, struct socket *sock2) /* - * FIXME: Get BSD behaviour + * Accept a pending connection. The TCP layer now gives BSD semantics. */ static int inet_accept(struct socket *sock, struct socket *newsock, int flags) @@ -953,7 +1046,7 @@ static int inet_accept(struct socket *sock, struct socket *newsock, int flags) /* * We've been passed an extra socket. * We need to free it up because the tcp module creates - * it's own when it accepts one. + * its own when it accepts one. */ if (newsock->data) { @@ -977,8 +1070,6 @@ static int inet_accept(struct socket *sock, struct socket *newsock, int flags) sk2 = sk1->prot->accept(sk1,flags); if (sk2 == NULL) { - if (sk1->err <= 0) - printk("Warning sock.c:sk1->err <= 0. Returning non-error.\n"); err=sk1->err; sk1->err=0; return(-err); @@ -1209,8 +1300,8 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); return 0; - case SIOCADDRT: case SIOCADDRTOLD: - case SIOCDELRT: case SIOCDELRTOLD: + case SIOCADDRT: + case SIOCDELRT: return(ip_rt_ioctl(cmd,(void *) arg)); case SIOCDARP: case SIOCGARP: @@ -1227,6 +1318,8 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFFLAGS: case SIOCGIFADDR: case SIOCSIFADDR: + case SIOCADDMULTI: + case SIOCDELMULTI: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: @@ -1331,6 +1424,79 @@ struct sock *get_sock(struct proto *prot, unsigned short num, return result; } +/* + * Deliver a datagram to raw sockets. + */ + +struct sock *get_sock_raw(struct sock *sk, + unsigned short num, + unsigned long raddr, + unsigned long laddr) +{ + struct sock *s; + + s=sk; + + for(; s != NULL; s = s->next) + { + if (s->num != num) + continue; + if(s->dead && (s->state == TCP_CLOSE)) + continue; + if(s->daddr && s->daddr!=raddr) + continue; + if(s->saddr && s->saddr!=laddr) + continue; + return(s); + } + return(NULL); +} + +#ifdef CONFIG_IP_MULTICAST +/* + * Deliver a datagram to broadcast/multicast sockets. + */ + +struct sock *get_sock_mcast(struct sock *sk, + unsigned short num, + unsigned long raddr, + unsigned short rnum, unsigned long laddr) +{ + struct sock *s; + unsigned short hnum; + + hnum = ntohs(num); + + /* + * SOCK_ARRAY_SIZE must be a power of two. This will work better + * than a prime unless 3 or more sockets end up using the same + * array entry. This should not be a problem because most + * well known sockets don't overlap that much, and for + * the other ones, we can just be careful about picking our + * socket number when we choose an arbitrary one. + */ + + s=sk; + + for(; s != NULL; s = s->next) + { + if (s->num != hnum) + continue; + if(s->dead && (s->state == TCP_CLOSE)) + continue; + if(s->daddr && s->daddr!=raddr) + continue; + if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) + continue; + if(s->saddr && s->saddr!=laddr) + continue; + return(s); + } + return(NULL); +} + +#endif + static struct proto_ops inet_proto_ops = { AF_INET, @@ -1369,7 +1535,7 @@ void inet_proto_init(struct net_proto *pro) int i; - printk("Swansea University Computer Society TCP/IP for NET3.017\n"); + printk("Swansea University Computer Society TCP/IP for NET3.029 (Snapshot #6)\n"); /* * Tell SOCKET that we are alive... @@ -1389,6 +1555,12 @@ void inet_proto_init(struct net_proto *pro) udp_prot.sock_array[i] = NULL; raw_prot.sock_array[i] = NULL; } + tcp_prot.inuse = 0; + tcp_prot.highestinuse = 0; + udp_prot.inuse = 0; + udp_prot.highestinuse = 0; + raw_prot.inuse = 0; + raw_prot.highestinuse = 0; printk("IP Protocols: "); for(p = inet_protocol_base; p != NULL;) @@ -1398,6 +1570,7 @@ void inet_proto_init(struct net_proto *pro) printk("%s%s",p->name,tmp?", ":"\n"); p = tmp; } + /* * Set the ARP module up */ diff --git a/net/inet/arp.c b/net/ipv4/arp.c index 67174bb7b..64bc060b9 100644 --- a/net/inet/arp.c +++ b/net/ipv4/arp.c @@ -31,10 +31,14 @@ * Alan Cox : Use init_timer(). * Alan Cox : Double lock fixes. * Martin Seine : Move the arphdr structure - * to if_arp.h for compatibility + * to if_arp.h for compatibility. * with BSD based programs. * Andrew Tridgell : Added ARP netmask code and - * re-arranged proxy handling + * re-arranged proxy handling. + * Alan Cox : Changed to use notifiers. + * Niibe Yutaka : Reply for this device or proxies only. + * Alan Cox : Don't proxy across hardware types! + * Jonathan Naylor : Added support for NET/ROM. */ #include <linux/types.h> @@ -47,21 +51,26 @@ #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in.h> +#include <linux/mm.h> #include <asm/system.h> #include <asm/segment.h> #include <stdarg.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "ip.h" -#include "route.h" -#include "protocol.h" -#include "tcp.h" +#include <linux/trdevice.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" +#include <net/sock.h> +#include <net/arp.h> #ifdef CONFIG_AX25 -#include "ax25.h" +#include <net/ax25.h> +#ifdef CONFIG_NETROM +#include <net/netrom.h> +#endif #endif @@ -126,9 +135,15 @@ struct arp_table #define ARP_CHECK_INTERVAL (60 * HZ) +enum proxy { + PROXY_EXACT=0, + PROXY_ANY, + PROXY_NONE, +}; + /* Forward declarations. */ static void arp_check_expire (unsigned long); -static struct arp_table *arp_lookup(unsigned long paddr, int exact); +static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy); static struct timer_list arp_timer = @@ -164,6 +179,8 @@ struct arp_table *arp_tables[FULL_ARP_TABLE_SIZE] = NULL, }; +unsigned long arp_cache_stamp; + /* * The last bits in the IP address are used for the cache lookup. @@ -200,6 +217,7 @@ static void arp_check_expire(unsigned long dummy) && !(entry->flags & ATF_PERM)) { *pentry = entry->next; /* remove from list */ + arp_cache_stamp++; del_timer(&entry->timer); /* Paranoia */ kfree_s(entry, sizeof(struct arp_table)); } @@ -246,15 +264,19 @@ static void arp_release_entry(struct arp_table *entry) /* * Purge a device from the ARP queue */ - -void arp_device_down(struct device *dev) + +int arp_device_event(unsigned long event, void *ptr) { + struct device *dev=ptr; int i; unsigned long flags; + if(event!=NETDEV_DOWN) + return NOTIFY_DONE; /* * This is a bit OTT - maybe we need some arp semaphores instead. */ + save_flags(flags); cli(); for (i = 0; i < FULL_ARP_TABLE_SIZE; i++) @@ -274,7 +296,9 @@ void arp_device_down(struct device *dev) pentry = &entry->next; /* go to next entry */ } } + arp_cache_stamp++; restore_flags(flags); + return NOTIFY_DONE; } @@ -324,7 +348,11 @@ void arp_send(int type, int ptype, unsigned long dest_ip, arp = (struct arphdr *) (skb->data + dev->hard_header_len); arp->ar_hrd = htons(dev->type); #ifdef CONFIG_AX25 +#ifdef CONFIG_NETROM + arp->ar_pro = (dev->type == ARPHRD_AX25 || dev->type == ARPHRD_NETROM) ? htons(AX25_P_IP) : htons(ETH_P_IP); +#else arp->ar_pro = (dev->type != ARPHRD_AX25)? htons(ETH_P_IP) : htons(AX25_P_IP); +#endif #else arp->ar_pro = htons(ETH_P_IP); #endif @@ -415,6 +443,7 @@ static void arp_expire_request (unsigned long arg) del_timer(&entry->timer); restore_flags(flags); arp_release_entry(entry); + arp_cache_stamp++; return; } pentry = &(*pentry)->next; @@ -488,6 +517,7 @@ void arp_destroy(unsigned long ip_addr, int force) struct arp_table **pentry; unsigned long hash = HASH(ip_addr); +ugly: cli(); pentry = &arp_tables[hash]; if (! *pentry) /* also check proxy entries */ @@ -503,7 +533,12 @@ void arp_destroy(unsigned long ip_addr, int force) del_timer(&entry->timer); sti(); arp_release_entry(entry); - return; + /* this would have to be cleaned up */ + goto ugly; + /* perhaps like this ? + cli(); + entry = *pentry; + */ } pentry = &entry->next; if (!checked_proxies && ! *pentry) @@ -572,6 +607,15 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) } break; #endif +#ifdef CONFIG_NETROM + case ARPHRD_NETROM: + if(arp->ar_pro != htons(AX25_P_IP)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + break; +#endif case ARPHRD_ETHER: case ARPHRD_ARCNET: if(arp->ar_pro != htons(ETH_P_IP)) @@ -581,6 +625,14 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) } break; + case ARPHRD_IEEE802: + if(arp->ar_pro != htons(ETH_P_IP)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + break; + default: printk("ARP: dev->type mangled!\n"); kfree_skb(skb, FREE_READ); @@ -649,7 +701,10 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) /* * It is now an arp request */ - if(addr_hint != IS_MYADDR) +/* + * Only reply for the real device address or when it's in our proxy tables + */ + if(tip!=dev->pa_addr) { /* * To get in here, it is a request for someone else. We need to @@ -669,7 +724,7 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) having to use a huge number of proxy arp entries and having to keep them uptodate. */ - if (proxy_entry->htype == htype && + if (proxy_entry->dev != dev && proxy_entry->htype == htype && !((proxy_entry->ip^tip)&proxy_entry->mask)) break; @@ -787,6 +842,10 @@ int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, { struct arp_table *entry; unsigned long hash; +#ifdef CONFIG_IP_MULTICAST + unsigned long taddr; +#endif + switch (ip_chk_addr(paddr)) { case IS_MYADDR: @@ -794,6 +853,26 @@ int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, memcpy(haddr, dev->dev_addr, dev->addr_len); skb->arp = 1; return 0; +#ifdef CONFIG_IP_MULTICAST + case IS_MULTICAST: + if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802) + { + haddr[0]=0x01; + haddr[1]=0x00; + haddr[2]=0x5e; + taddr=ntohl(paddr); + haddr[5]=taddr&0xff; + taddr=taddr>>8; + haddr[4]=taddr&0xff; + taddr=taddr>>8; + haddr[3]=taddr&0x7f; + return 0; + } + /* + * If a device does not support multicast broadcast the stuff (eg AX.25 for now) + */ +#endif + case IS_BROADCAST: memcpy(haddr, dev->broadcast, dev->addr_len); skb->arp = 1; @@ -806,7 +885,7 @@ int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, /* * Find an entry */ - entry = arp_lookup(paddr, 0); + entry = arp_lookup(paddr, PROXY_NONE); if (entry != NULL) /* It exists */ { @@ -846,19 +925,19 @@ int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, GFP_ATOMIC); if (entry != NULL) { - entry->mask = DEF_ARP_NETMASK; + entry->next = arp_tables[hash]; + entry->last_used = jiffies; + entry->flags = 0; entry->ip = paddr; + entry->mask = DEF_ARP_NETMASK; + memset(entry->ha, 0, dev->addr_len); entry->hlen = dev->addr_len; entry->htype = dev->type; - entry->flags = 0; - memset(entry->ha, 0, dev->addr_len); entry->dev = dev; - entry->last_used = jiffies; init_timer(&entry->timer); entry->timer.function = arp_expire_request; entry->timer.data = (unsigned long)entry; entry->timer.expires = ARP_RES_TIME; - entry->next = arp_tables[hash]; arp_tables[hash] = entry; add_timer(&entry->timer); entry->retries = ARP_MAX_TRIES; @@ -918,11 +997,16 @@ int arp_get_info(char *buffer, char **start, off_t offset, int length) * Convert hardware address to XX:XX:XX:XX ... form. */ #ifdef CONFIG_AX25 - +#ifdef CONFIG_NETROM + if (entry->htype == ARPHRD_AX25 || entry->htype == ARPHRD_NETROM) + strcpy(hbuffer,ax2asc((ax25_address *)entry->ha)); + else { +#else if(entry->htype==ARPHRD_AX25) strcpy(hbuffer,ax2asc((ax25_address *)entry->ha)); else { #endif +#endif for(k=0,j=0;k<HBUFFERLEN-3 && j<entry->hlen;j++) { @@ -970,11 +1054,11 @@ int arp_get_info(char *buffer, char **start, off_t offset, int length) /* * This will find an entry in the ARP table by looking at the IP address. - * If exact is true then only exact IP matches will be allowed + * If proxy is PROXY_EXACT then only exact IP matches will be allowed * for proxy entries, otherwise the netmask will be used */ -static struct arp_table *arp_lookup(unsigned long paddr, int exact) +static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy) { struct arp_table *entry; unsigned long hash = HASH(paddr); @@ -983,15 +1067,67 @@ static struct arp_table *arp_lookup(unsigned long paddr, int exact) if (entry->ip == paddr) break; /* it's possibly a proxy entry (with a netmask) */ - if (!entry) + if (!entry && proxy != PROXY_NONE) for (entry=arp_tables[PROXY_HASH]; entry != NULL; entry = entry->next) - if (exact? (entry->ip==paddr) : !((entry->ip^paddr)&entry->mask)) + if ((proxy==PROXY_EXACT) ? (entry->ip==paddr) + : !((entry->ip^paddr)&entry->mask)) break; return entry; } +int arp_find_cache(unsigned char *dp, unsigned long daddr, struct device *dev) +{ + /* + * We need the broadcast/multicast awareness here and the find routine split up. + */ + struct arp_table *entry; +#ifdef CONFIG_IP_MULTICAST + unsigned long taddr; +#endif + + switch (ip_chk_addr(daddr)) + { + case IS_MYADDR: + printk("ARP: arp called for own IP address\n"); + memcpy(dp, dev->dev_addr, dev->addr_len); + return 1; +#ifdef CONFIG_IP_MULTICAST + case IS_MULTICAST: + if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802) + { + dp[0]=0x01; + dp[1]=0x00; + dp[2]=0x5e; + taddr=ntohl(daddr); + dp[5]=taddr&0xff; + taddr=taddr>>8; + dp[4]=taddr&0xff; + taddr=taddr>>8; + dp[3]=taddr&0x7f; + return 1; + } + /* + * If a device does not support multicast broadcast the stuff (eg AX.25 for now) + */ +#endif + + case IS_BROADCAST: + memcpy(dp, dev->broadcast, dev->addr_len); + return 1; + + default: + entry=arp_lookup(daddr, PROXY_NONE); + if(entry) + { + memcpy(dp,entry->ha, ETH_ALEN); + return 1; + } + } + return 0; +} + /* * Set (create) an ARP cache entry. */ @@ -1022,16 +1158,28 @@ static int arp_req_set(struct arpreq *req) htype = ARPHRD_ETHER; hlen = ETH_ALEN; break; + case ARPHRD_ARCNET: htype = ARPHRD_ARCNET; hlen = 1; /* length of arcnet addresses */ break; + #ifdef CONFIG_AX25 case ARPHRD_AX25: htype = ARPHRD_AX25; hlen = 7; break; #endif +#ifdef CONFIG_NETROM + case ARPHRD_NETROM: + htype = ARPHRD_NETROM; + hlen = 7; + break; +#endif + case ARPHRD_IEEE802: + htype = ARPHRD_IEEE802; + hlen = TR_ALEN; + break; default: return -EPFNOSUPPORT; } @@ -1061,7 +1209,14 @@ static int arp_req_set(struct arpreq *req) /* * Find the entry */ - entry = arp_lookup(ip, 1); + entry = arp_lookup(ip, PROXY_EXACT); + if (entry && (entry->flags & ATF_PUBL) != (r.arp_flags & ATF_PUBL)) + { + sti(); + arp_destroy(ip,1); + cli(); + entry = NULL; + } /* * Do we need to create a new entry @@ -1103,6 +1258,7 @@ static int arp_req_set(struct arpreq *req) else entry->mask = DEF_ARP_NETMASK; entry->dev = rt->rt_dev; + arp_cache_stamp++; sti(); return 0; @@ -1134,7 +1290,7 @@ static int arp_req_get(struct arpreq *req) si = (struct sockaddr_in *) &r.arp_pa; cli(); - entry = arp_lookup(si->sin_addr.s_addr,0); + entry = arp_lookup(si->sin_addr.s_addr,PROXY_ANY); if (entry == NULL) { @@ -1211,12 +1367,18 @@ int arp_ioctl(unsigned int cmd, void *arg) static struct packet_type arp_packet_type = { 0, /* Should be: __constant_htons(ETH_P_ARP) - but this _doesn't_ come out constant! */ - 0, /* copy */ + NULL, /* All devices */ arp_rcv, NULL, NULL }; +static struct notifier_block arp_dev_notifier={ + arp_device_event, + NULL, + 0 +}; + void arp_init (void) { /* Register the packet type */ @@ -1224,5 +1386,7 @@ void arp_init (void) dev_add_pack(&arp_packet_type); /* Start with the regular checks for expired arp entries. */ add_timer(&arp_timer); + /* Register for device down reports */ + register_netdevice_notifier(&arp_dev_notifier); } diff --git a/net/ipv4/checksum.c b/net/ipv4/checksum.c new file mode 100644 index 000000000..59355e967 --- /dev/null +++ b/net/ipv4/checksum.c @@ -0,0 +1,276 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <net/checksum.h> +#ifdef __mips__ +#include <asm/string.h> +#endif + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum) { +#ifdef __i386__ + __asm__(" + movl %%ecx, %%edx + cld + shrl $5, %%ecx + jz 2f + orl %%ecx, %%ecx +1: movl (%%esi), %%eax + adcl %%eax, %%ebx + movl 4(%%esi), %%eax + adcl %%eax, %%ebx + movl 8(%%esi), %%eax + adcl %%eax, %%ebx + movl 12(%%esi), %%eax + adcl %%eax, %%ebx + movl 16(%%esi), %%eax + adcl %%eax, %%ebx + movl 20(%%esi), %%eax + adcl %%eax, %%ebx + movl 24(%%esi), %%eax + adcl %%eax, %%ebx + movl 28(%%esi), %%eax + adcl %%eax, %%ebx + lea 32(%%esi), %%esi + dec %%ecx + jne 1b + adcl $0, %%ebx +2: movl %%edx, %%ecx + andl $28, %%ecx + je 4f + shrl $2, %%ecx + orl %%ecx, %%ecx +3: adcl (%%esi), %%ebx + lea 4(%%esi), %%esi + dec %%ecx + jne 3b + adcl $0, %%ebx +4: movl $0, %%eax + testw $2, %%dx + je 5f + lodsw + addl %%eax, %%ebx + adcl $0, %%ebx + movw $0, %%ax +5: test $1, %%edx + je 6f + lodsb + addl %%eax, %%ebx + adcl $0, %%ebx +6: " + : "=b"(sum) + : "0"(sum), "c"(len), "S"(buff) + : "ax", "bx", "cx", "dx", "si" ); +#elif defined (__mips__) + unsigned long scratch1; + unsigned long scratch2; + + __asm__(" + .set noreorder + .set noat + move %1,%4 + srl %1,%1,5 + beqz %1,2f + sll %1,%1,5 # delay slot + + addu %1,%5 +1: lw %2,0(%5) + addu %5,32 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-28(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-24(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-20(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-16(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-12(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-8(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + lw %2,-4(%5) + addu %0,$1 + addu %0,%2 + sltu $1,%0,%2 + + bne %5,%1,1b + addu %0,$1 # delay slot + +2: srl %1,%4,2 + bnez %1,4f + addu %1,%5 # delay slot +3: lw %2,0(%5) + addu %5,4 + addu %0,%2 + sltu $1,%0,%2 + bne %5,%1,3b + addu %0,$1 # delay slot + +4: andi $1,%4,2 + beqz %4,5f + lhu %2,0(%5) # delay slot + addu %5,2 + addu %0,%2 + sltu $1,%0,%2 + addu %0,$1 # delay slot + +5: andi $1,%4,1 + beqz %4,6f + lbu %2,0(%5) # delay slot + addu %0,%2 + sltu $1,%0,%2 + addu %0,$1 # delay slot +6: .set at + .set reorder" + : "=r"(sum), "=r" (scratch1), "=r" (scratch2) + : "0"(sum), "r"(len), "r"(buff) + : "$1"); +#else +#error Not implemented for this CPU +#endif + return(sum); +} + + + +/* + * copy from fs while checksumming, otherwise like csum_partial + */ + +unsigned int csum_partial_copyffs( char *src, char *dst, + int len, int sum) { +#ifdef __i386__ + __asm__(" + push %%ds + push %%es + movw %%ds, %%dx + movw %%dx, %%es + movw %%fs, %%dx + movw %%dx, %%ds + cld + cmpl $32, %%ecx + jb 2f + pushl %%ecx + shrl $5, %%ecx + orl %%ecx, %%ecx +1: movl (%%esi), %%eax + movl 4(%%esi), %%edx + adcl %%eax, %%ebx + movl %%eax, %%es:(%%edi) + adcl %%edx, %%ebx + movl %%edx, %%es:4(%%edi) + + movl 8(%%esi), %%eax + movl 12(%%esi), %%edx + adcl %%eax, %%ebx + movl %%eax, %%es:8(%%edi) + adcl %%edx, %%ebx + movl %%edx, %%es:12(%%edi) + + movl 16(%%esi), %%eax + movl 20(%%esi), %%edx + adcl %%eax, %%ebx + movl %%eax, %%es:16(%%edi) + adcl %%edx, %%ebx + movl %%edx, %%es:20(%%edi) + + movl 24(%%esi), %%eax + movl 28(%%esi), %%edx + adcl %%eax, %%ebx + movl %%eax, %%es:24(%%edi) + adcl %%edx, %%ebx + movl %%edx, %%es:28(%%edi) + + lea 32(%%esi), %%esi + lea 32(%%edi), %%edi + dec %%ecx + jne 1b + adcl $0, %%ebx + popl %%ecx +2: movl %%ecx, %%edx + andl $28, %%ecx + je 4f + shrl $2, %%ecx + orl %%ecx, %%ecx +3: movl (%%esi), %%eax + adcl %%eax, %%ebx + movl %%eax, %%es:(%%edi) + lea 4(%%esi), %%esi + lea 4(%%edi), %%edi + dec %%ecx + jne 3b + adcl $0, %%ebx +4: movl $0, %%eax + testl $2, %%edx + je 5f + lodsw + stosw + addl %%eax, %%ebx + movw $0, %%ax + adcl %%eax, %%ebx +5: test $1, %%edx + je 6f + lodsb + stosb + addl %%eax, %%ebx + adcl $0, %%ebx +6: pop %%es + pop %%ds + " + : "=b"(sum) + : "0"(sum), "c"(len), "S"(src), "D"(dst) + : "ax", "bx", "cx", "dx", "si", "di" ); +#elif defined (__mips__) + /* + * It's 2:30 am and I don't feel like doing it real ... + * This is lots slower than the real thing (tm) + */ + sum = csum_partial(src, len, sum); + memcpy(dst, src, len); +#else +#error Not implemented for this CPU +#endif + return(sum); +} + + + diff --git a/net/inet/devinet.c b/net/ipv4/devinet.c index a0ed0b5c3..794a7e897 100644 --- a/net/inet/devinet.c +++ b/net/ipv4/devinet.c @@ -18,7 +18,6 @@ #include <asm/segment.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -33,13 +32,13 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "ip.h" -#include "route.h" -#include "protocol.h" -#include "tcp.h" +#include <net/ip.h> +#include <net/route.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" +#include <net/sock.h> +#include <net/arp.h> /* * Determine a default network mask, based on the IP address. @@ -101,7 +100,8 @@ int ip_chk_addr(unsigned long addr) return IS_MYADDR; /* - * OK, now check the interface addresses. + * OK, now check the interface addresses. We could + * speed this by keeping a dev and a dev_up chain. */ for (dev = dev_base; dev != NULL; dev = dev->next) @@ -182,7 +182,7 @@ unsigned long ip_my_addr(void) /* * Find an interface that can handle addresses for a certain address. * - * This needs optimising, since its relatively trivial to collapse + * This needs optimising, since it's relatively trivial to collapse * the two loops into one. */ @@ -212,3 +212,4 @@ struct device * ip_dev_check(unsigned long addr) } return NULL; } + diff --git a/net/inet/icmp.c b/net/ipv4/icmp.c index ad2ac801b..7c1eea15d 100644 --- a/net/inet/icmp.c +++ b/net/ipv4/icmp.c @@ -11,6 +11,7 @@ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Stefan Becker, <stefanb@yello.ping.de> * * Fixes: * Alan Cox : Generic queue usage. @@ -27,6 +28,10 @@ * A.N.Kuznetsov : Multihoming fixes. * Laco Rusnak : Multihoming fixes. * Alan Cox : Tightened up icmp_send(). + * Alan Cox : Multicasts. + * Stefan Becker : ICMP redirects in icmp_send(). + * Peter Belding : Tightened up ICMP redirect handling + * Alan Cox : Tightened even more. * * * @@ -44,19 +49,20 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/string.h> -#include "snmp.h" -#include "ip.h" -#include "route.h" -#include "protocol.h" -#include "icmp.h" -#include "tcp.h" -#include "snmp.h" +#include <net/snmp.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/protocol.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/snmp.h> #include <linux/skbuff.h> -#include "sock.h" +#include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> #include <asm/system.h> #include <asm/segment.h> +#include <net/checksum.h> #define min(a,b) ((a)<(b)?(a):(b)) @@ -89,11 +95,9 @@ struct icmp_err icmp_err_convert[] = { /* * Send an ICMP message in response to a situation - * - * Fixme: Fragment handling is wrong really. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, struct device *dev) +void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info, struct device *dev) { struct sk_buff *skb; struct iphdr *iph; @@ -138,9 +142,27 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, struct device *dev) if(type==ICMP_DEST_UNREACH||type==ICMP_REDIRECT||type==ICMP_SOURCE_QUENCH||type==ICMP_TIME_EXCEEDED) { - if(iph->protocol==IPPROTO_ICMP) - return; + /* + * Is the original packet an ICMP packet? + */ + + if(iph->protocol==IPPROTO_ICMP) + { + icmph = (struct icmphdr *) ((char *) iph + + 4 * iph->ihl); + /* + * Check for ICMP error packets (Must never reply to + * an ICMP error). + */ + + if (icmph->type == ICMP_DEST_UNREACH || + icmph->type == ICMP_SOURCE_QUENCH || + icmph->type == ICMP_REDIRECT || + icmph->type == ICMP_TIME_EXCEEDED || + icmph->type == ICMP_PARAMETERPROB) + return; + } } icmp_statistics.IcmpOutMsgs++; @@ -189,7 +211,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, struct device *dev) */ len = dev->hard_header_len + sizeof(struct iphdr) + sizeof(struct icmphdr) + - sizeof(struct iphdr) + 8; /* amount of header to return */ + sizeof(struct iphdr) + 32; /* amount of header to return */ skb = (struct sk_buff *) alloc_skb(len, GFP_ATOMIC); if (skb == NULL) @@ -231,7 +253,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, struct device *dev) icmph->type = type; icmph->code = code; icmph->checksum = 0; - icmph->un.gateway = 0; + icmph->un.gateway = info; /* This might not be meant for + this form of the union but it will + be right anyway */ memcpy(icmph + 1, iph, sizeof(struct iphdr) + 8); icmph->checksum = ip_compute_csum((unsigned char *)icmph, @@ -265,17 +289,23 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb) case ICMP_HOST_UNREACH: break; case ICMP_PROT_UNREACH: +#ifdef CONFIG_NET_DEBUG printk("ICMP: %s:%d: protocol unreachable.\n", in_ntoa(iph->daddr), ntohs(iph->protocol)); +#endif break; case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: +#ifdef CONFIG_NET_DEBUG printk("ICMP: %s: fragmentation needed and DF set.\n", in_ntoa(iph->daddr)); +#endif break; case ICMP_SR_FAILED: +#ifdef CONFIG_NET_DEBUG printk("ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); +#endif break; default: break; @@ -331,6 +361,12 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, iph = (struct iphdr *) (icmph + 1); ip = iph->daddr; +#ifdef CONFIG_IP_FORWARD + /* + * We are a router. Routers should not respond to ICMP_REDIRECT messages. + */ + printk("icmp: ICMP redirect from %s on %s ignored.\n", in_ntoa(source), dev->name); +#else switch(icmph->code & 7) { case ICMP_REDIR_NET: @@ -349,15 +385,20 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, * Add better route to host. * But first check that the redirect * comes from the old gateway.. + * And make sure it's an ok host address + * (not some confused thing sending our + * address) */ rt = ip_rt_route(ip, NULL, NULL); if (!rt) break; - if (rt->rt_gateway != source) + if (rt->rt_gateway != source || + ((icmph->un.gateway^dev->pa_addr)&dev->pa_mask) || + ip_chk_addr(icmph->un.gateway)) break; - printk("redirect from %s\n", in_ntoa(source)); + printk("ICMP redirect from %s\n", in_ntoa(source)); ip_rt_add((RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY), - ip, 0, icmph->un.gateway, dev,0, 0); + ip, 0, icmph->un.gateway, dev,0, 0, 0); break; case ICMP_REDIR_NETTOS: case ICMP_REDIR_HOSTTOS: @@ -366,7 +407,7 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, default: break; } - +#endif /* * Discard the original packet */ @@ -462,10 +503,8 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, struct de "ICMP: Size (%d) of ICMP_TIMESTAMP request should be 20!\n", len); icmp_statistics.IcmpInErrors++; -#if 1 /* correct answers are possible for everything >= 12 */ if (len < 12) -#endif return; } @@ -656,7 +695,7 @@ int icmp_rcv(struct sk_buff *skb1, struct device *dev, struct options *opt, * Parse the ICMP message */ - if (ip_chk_addr(daddr) == IS_BROADCAST) + if (ip_chk_addr(daddr) != IS_MYADDR) { if (icmph->type != ICMP_ECHO) { @@ -716,7 +755,7 @@ int icmp_rcv(struct sk_buff *skb1, struct device *dev, struct options *opt, case ICMP_ADDRESSREPLY: /* * We ought to set our netmask on receiving this, but - * experience shows its a waste of effort. + * experience shows it's a waste of effort. */ icmp_statistics.IcmpInAddrMaskReps++; kfree_skb(skb1, FREE_READ); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c new file mode 100644 index 000000000..ec182d8e3 --- /dev/null +++ b/net/ipv4/igmp.c @@ -0,0 +1,402 @@ +/* + * Linux NET3: Internet Gateway Management Protocol [IGMP] + * + * Authors: + * Alan Cox <Alan.Cox@linux.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + * + * Alan Cox : Added lots of __inline__ to optimise + * the memory usage of all the tiny little + * functions. + */ + + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <linux/igmp.h> +#include <net/checksum.h> +#include <net/head_explode.h> + +#ifdef CONFIG_IP_MULTICAST + + +/* + * Timer management + */ + + +extern __inline__ void igmp_stop_timer(struct ip_mc_list *im) +{ + del_timer(&im->timer); + im->tm_running=0; +} + +extern __inline__ int random(void) +{ + static unsigned long seed=152L; + seed=seed*69069L+1; + return seed^jiffies; +} + +/* + * Inlined as its only called once. + */ + +extern __inline__ void igmp_start_timer(struct ip_mc_list *im) +{ + int tv; + if(im->tm_running) + return; + tv=random()%(10*HZ); /* Pick a number any number 8) */ + im->timer.expires=tv; + im->tm_running=1; + add_timer(&im->timer); +} + +/* + * Send an IGMP report. + */ + +#define MAX_IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+64) + +static void igmp_send_report(struct device *dev, unsigned long address, int type) +{ + struct sk_buff *skb=alloc_skb(MAX_IGMP_SIZE, GFP_ATOMIC); + int tmp; + unsigned char *dp; + + if(skb==NULL) + return; + tmp=ip_build_header(skb, INADDR_ANY, address, &dev, IPPROTO_IGMP, NULL, + skb->mem_len, 0, 1); + if(tmp<0) + { + kfree_skb(skb, FREE_WRITE); + return; + } + dp=skb->data+tmp; + skb->len=tmp+sizeof(struct igmphdr); + + *dp++=type; + *dp++=0; + skb->h.raw=dp; + dp=imp_putu16(dp,0); /* checksum */ + dp=imp_putn32(dp,address); /* Address (already in net order) */ + imp_putn16(skb->h.raw,ip_compute_csum(skb->data+tmp,sizeof(struct igmphdr))); /* Checksum fill */ + ip_queue_xmit(NULL,dev,skb,1); +} + + +static void igmp_timer_expire(unsigned long data) +{ + struct ip_mc_list *im=(struct ip_mc_list *)data; + igmp_stop_timer(im); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); +} + +extern __inline__ void igmp_init_timer(struct ip_mc_list *im) +{ + im->tm_running=0; + init_timer(&im->timer); + im->timer.data=(unsigned long)im; + im->timer.function=&igmp_timer_expire; +} + + +extern __inline__ void igmp_heard_report(struct device *dev, unsigned long address) +{ + struct ip_mc_list *im; + for(im=dev->ip_mc_list;im!=NULL;im=im->next) + if(im->multiaddr==address) + igmp_stop_timer(im); +} + +extern __inline__ void igmp_heard_query(struct device *dev) +{ + struct ip_mc_list *im; + for(im=dev->ip_mc_list;im!=NULL;im=im->next) + if(!im->tm_running && im->multiaddr!=IGMP_ALL_HOSTS) + igmp_start_timer(im); +} + +/* + * Map a multicast IP onto multicast MAC for type ethernet. + */ + +extern __inline__ void ip_mc_map(unsigned long addr, char *buf) +{ + addr=ntohl(addr); + buf[0]=0x01; + buf[1]=0x00; + buf[2]=0x5e; + buf[5]=addr&0xFF; + addr>>=8; + buf[4]=addr&0xFF; + addr>>=8; + buf[3]=addr&0x7F; +} + +/* + * Add a filter to a device + */ + +void ip_mc_filter_add(struct device *dev, unsigned long addr) +{ + char buf[6]; + if(dev->type!=ARPHRD_ETHER) + return; /* Only do ethernet now */ + ip_mc_map(addr,buf); + dev_mc_add(dev,buf,ETH_ALEN,0); +} + +/* + * Remove a filter from a device + */ + +void ip_mc_filter_del(struct device *dev, unsigned long addr) +{ + char buf[6]; + if(dev->type!=ARPHRD_ETHER) + return; /* Only do ethernet now */ + ip_mc_map(addr,buf); + dev_mc_delete(dev,buf,ETH_ALEN,0); +} + +extern __inline__ void igmp_group_dropped(struct ip_mc_list *im) +{ + del_timer(&im->timer); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE); + ip_mc_filter_del(im->interface, im->multiaddr); +/* printk("Left group %lX\n",im->multiaddr);*/ +} + +extern __inline__ void igmp_group_added(struct ip_mc_list *im) +{ + igmp_init_timer(im); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); + ip_mc_filter_add(im->interface, im->multiaddr); +/* printk("Joined group %lX\n",im->multiaddr);*/ +} + +int igmp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, unsigned long saddr, int redo, + struct inet_protocol *protocol) +{ + /* This basically follows the spec line by line -- see RFC1112 */ + struct igmp_header igh; + + /* Pull the IGMP header */ + igmp_explode(skb->h.raw,&igh); + + if(skb->len <sizeof(struct igmphdr) || skb->ip_hdr->ttl!=1 || ip_compute_csum((void *)skb->h.raw,sizeof(struct igmphdr))) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + if(igh.type==IGMP_HOST_MEMBERSHIP_QUERY && daddr==IGMP_ALL_HOSTS) + igmp_heard_query(dev); + if(igh.type==IGMP_HOST_MEMBERSHIP_REPORT && daddr==igh.group) + igmp_heard_report(dev,igh.group); + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * Multicast list managers + */ + + +/* + * A socket has joined a multicast group on device dev. + */ + +static void ip_mc_inc_group(struct device *dev, unsigned long addr) +{ + struct ip_mc_list *i; + for(i=dev->ip_mc_list;i!=NULL;i=i->next) + { + if(i->multiaddr==addr) + { + i->users++; + return; + } + } + i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); + if(!i) + return; + i->users=1; + i->interface=dev; + i->multiaddr=addr; + i->next=dev->ip_mc_list; + igmp_group_added(i); + dev->ip_mc_list=i; +} + +/* + * A socket has left a multicast group on device dev + */ + +static void ip_mc_dec_group(struct device *dev, unsigned long addr) +{ + struct ip_mc_list **i; + for(i=&(dev->ip_mc_list);(*i)!=NULL;i=&(*i)->next) + { + if((*i)->multiaddr==addr) + { + if(--((*i)->users)) + return; + else + { + struct ip_mc_list *tmp= *i; + igmp_group_dropped(tmp); + *i=(*i)->next; + kfree_s(tmp,sizeof(*tmp)); + } + } + } +} + +/* + * Device going down: Clean up. + */ + +void ip_mc_drop_device(struct device *dev) +{ + struct ip_mc_list *i; + struct ip_mc_list *j; + for(i=dev->ip_mc_list;i!=NULL;i=j) + { + j=i->next; + kfree_s(i,sizeof(*i)); + } + dev->ip_mc_list=NULL; +} + +/* + * Device going up. Make sure it is in all hosts + */ + +void ip_mc_allhost(struct device *dev) +{ + struct ip_mc_list *i; + for(i=dev->ip_mc_list;i!=NULL;i=i->next) + if(i->multiaddr==IGMP_ALL_HOSTS) + return; + i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); + if(!i) + return; + i->users=1; + i->interface=dev; + i->multiaddr=IGMP_ALL_HOSTS; + i->next=dev->ip_mc_list; + dev->ip_mc_list=i; + ip_mc_filter_add(i->interface, i->multiaddr); + +} + +/* + * Join a socket to a group + */ + +int ip_mc_join_group(struct sock *sk , struct device *dev, unsigned long addr) +{ + int unused= -1; + int i; + if(!MULTICAST(addr)) + return -EINVAL; + if(!(dev->flags&IFF_MULTICAST)) + return -EADDRNOTAVAIL; + if(sk->ip_mc_list==NULL) + { + if((sk->ip_mc_list=(struct ip_mc_socklist *)kmalloc(sizeof(*sk->ip_mc_list), GFP_KERNEL))==NULL) + return -ENOMEM; + memset(sk->ip_mc_list,'\0',sizeof(*sk->ip_mc_list)); + } + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) + return -EADDRINUSE; + if(sk->ip_mc_list->multidev[i]==NULL) + unused=i; + } + + if(unused==-1) + return -ENOBUFS; + sk->ip_mc_list->multiaddr[unused]=addr; + sk->ip_mc_list->multidev[unused]=dev; + ip_mc_inc_group(dev,addr); + return 0; +} + +/* + * Ask a socket to leave a group. + */ + +int ip_mc_leave_group(struct sock *sk, struct device *dev, unsigned long addr) +{ + int i; + if(!MULTICAST(addr)) + return -EINVAL; + if(!(dev->flags&IFF_MULTICAST)) + return -EADDRNOTAVAIL; + if(sk->ip_mc_list==NULL) + return -EADDRNOTAVAIL; + + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) + { + sk->ip_mc_list->multidev[i]=NULL; + ip_mc_dec_group(dev,addr); + return 0; + } + } + return -EADDRNOTAVAIL; +} + +/* + * A socket is closing. + */ + +void ip_mc_drop_socket(struct sock *sk) +{ + int i; + + if(sk->ip_mc_list==NULL) + return; + + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multidev[i]) + { + ip_mc_dec_group(sk->ip_mc_list->multidev[i], sk->ip_mc_list->multiaddr[i]); + sk->ip_mc_list->multidev[i]=NULL; + } + } + kfree_s(sk->ip_mc_list,sizeof(*sk->ip_mc_list)); + sk->ip_mc_list=NULL; +} + +#endif diff --git a/net/inet/ip.c b/net/ipv4/ip.c index fe48ca8eb..62d2ad38e 100644 --- a/net/inet/ip.c +++ b/net/ipv4/ip.c @@ -11,6 +11,11 @@ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Richard Underwood + * Stefan Becker, <stefanb@yello.ping.de> + * Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * * * Fixes: * Alan Cox : Commented a couple of minor bits of surplus code @@ -54,38 +59,81 @@ * Alan Cox : IP options adjust sk->priority. * Pedro Roque : Fix mtu/length error in ip_forward. * Alan Cox : Avoid ip_chk_addr when possible. + * Richard Underwood : IP multicasting. + * Alan Cox : Cleaned up multicast handlers. + * Alan Cox : RAW sockets demultiplex in the BSD style. + * Gunther Mayer : Fix the SNMP reporting typo + * Alan Cox : Always in group 224.0.0.1 + * Pauline Middelink : Fast ip_checksum update when forwarding + * Masquerading support. + * Alan Cox : Multicast loopback error for 224.0.0.1 + * Alan Cox : IP_MULTICAST_LOOP option. + * Alan Cox : Use notifiers. + * Bjorn Ekwall : Removed ip_csum (from slhc.c too) + * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) + * Stefan Becker : Send out ICMP HOST REDIRECT + * Arnt Gulbrandsen : ip_build_xmit + * Alan Cox : Per socket routing cache + * Alan Cox : Fixed routing cache, added header cache. + * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it. + * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. + * Alan Cox : Incoming IP option handling. + * Alan Cox : Set saddr on raw output frames as per BSD. + * Alan Cox : Stopped broadcast source route explosions. + * Alan Cox : Can disable source routing + * + * * * To Fix: * IP option processing is mostly not needed. ip_forward needs to know about routing rules * and time stamp but that's about all. Use the route mtu field here too + * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient + * and could be made very efficient with the addition of some virtual memory hacks to permit + * the allocation of a buffer that can then be 'grown' by twiddling page tables. + * Output fragmentation wants updating along with the buffer management to use a single + * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet + * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause + * fragmentation anyway. + * + * FIXME: copy frag 0 iph to qp->iph * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + #include <asm/segment.h> #include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/config.h> + #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "snmp.h" -#include "ip.h" -#include "protocol.h" -#include "route.h" -#include "tcp.h" + +#include <net/snmp.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" -#include "icmp.h" +#include <net/sock.h> +#include <net/arp.h> +#include <net/icmp.h> +#include <net/raw.h> +#include <net/checksum.h> +#include <linux/igmp.h> +#include <linux/ip_fw.h> #define CONFIG_IP_DEFRAG @@ -99,7 +147,11 @@ extern void sort_send(struct sock *sk); * SNMP management statistics */ +#ifdef CONFIG_IP_FORWARD struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ +#else +struct ip_mib ip_statistics={0,64,}; /* Forwarding=No, Default TTL=64 */ +#endif /* * Handle the issuing of an ioctl() request @@ -117,43 +169,6 @@ int ip_ioctl(struct sock *sk, int cmd, unsigned long arg) } -/* these two routines will do routing. */ - -static void -strict_route(struct iphdr *iph, struct options *opt) -{ -} - - -static void -loose_route(struct iphdr *iph, struct options *opt) -{ -} - - - - -/* This routine will check to see if we have lost a gateway. */ -void -ip_route_check(unsigned long daddr) -{ -} - - -#if 0 -/* this routine puts the options at the end of an ip header. */ -static int -build_options(struct iphdr *iph, struct options *opt) -{ - unsigned char *ptr; - /* currently we don't support any options. */ - ptr = (unsigned char *)(iph+1); - *ptr = 0; - return (4); -} -#endif - - /* * Take an skb, and fill in the MAC header. */ @@ -192,13 +207,12 @@ int ip_id_count = 0; int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr, struct device **dev, int type, struct options *opt, int len, int tos, int ttl) { - static struct options optmem; - struct iphdr *iph; struct rtable *rt; unsigned char *buff; unsigned long raddr; int tmp; unsigned long src; + struct iphdr *iph; buff = skb->data; @@ -206,12 +220,16 @@ int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long dadd * See if we need to look up the device. */ +#ifdef CONFIG_INET_MULTICAST + if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name) + *dev=dev_get(skb->sk->ip_mc_name); +#endif if (*dev == NULL) { if(skb->localroute) - rt = ip_rt_local(daddr, &optmem, &src); + rt = ip_rt_local(daddr, NULL, &src); else - rt = ip_rt_route(daddr, &optmem, &src); + rt = ip_rt_route(daddr, NULL, &src); if (rt == NULL) { ip_statistics.IpOutNoRoutes++; @@ -227,7 +245,6 @@ int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long dadd saddr = src;/*rt->rt_dev->pa_addr;*/ raddr = rt->rt_gateway; - opt = &optmem; } else { @@ -235,9 +252,9 @@ int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long dadd * We still need the address of the first hop. */ if(skb->localroute) - rt = ip_rt_local(daddr, &optmem, &src); + rt = ip_rt_local(daddr, NULL, &src); else - rt = ip_rt_route(daddr, &optmem, &src); + rt = ip_rt_route(daddr, NULL, &src); /* * If the frame is from us and going off machine it MUST MUST MUST * have the output device ip address and never the loopback @@ -289,7 +306,12 @@ int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long dadd if(type == IPPROTO_RAW) return (tmp); - iph = (struct iphdr *)buff; + /* + * Build the IP addresses + */ + + iph=(struct iphdr *)buff; + iph->version = 4; iph->tos = tos; iph->frag_off = 0; @@ -298,261 +320,23 @@ int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long dadd iph->saddr = saddr; iph->protocol = type; iph->ihl = 5; - - /* Setup the IP options. */ -#ifdef Not_Yet_Avail - build_options(iph, opt); -#endif + skb->ip_hdr = iph; return(20 + tmp); /* IP header plus MAC header size */ } -static int -do_options(struct iphdr *iph, struct options *opt) -{ - unsigned char *buff; - int done = 0; - int i, len = sizeof(struct iphdr); - - /* Zero out the options. */ - opt->record_route.route_size = 0; - opt->loose_route.route_size = 0; - opt->strict_route.route_size = 0; - opt->tstamp.ptr = 0; - opt->security = 0; - opt->compartment = 0; - opt->handling = 0; - opt->stream = 0; - opt->tcc = 0; - return(0); - - /* Advance the pointer to start at the options. */ - buff = (unsigned char *)(iph + 1); - - /* Now start the processing. */ - while (!done && len < iph->ihl*4) switch(*buff) { - case IPOPT_END: - done = 1; - break; - case IPOPT_NOOP: - buff++; - len++; - break; - case IPOPT_SEC: - buff++; - if (*buff != 11) return(1); - buff++; - opt->security = ntohs(*(unsigned short *)buff); - buff += 2; - opt->compartment = ntohs(*(unsigned short *)buff); - buff += 2; - opt->handling = ntohs(*(unsigned short *)buff); - buff += 2; - opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1)); - buff += 3; - len += 11; - break; - case IPOPT_LSRR: - buff++; - if ((*buff - 3)% 4 != 0) return(1); - len += *buff; - opt->loose_route.route_size = (*buff -3)/4; - buff++; - if (*buff % 4 != 0) return(1); - opt->loose_route.pointer = *buff/4 - 1; - buff++; - buff++; - for (i = 0; i < opt->loose_route.route_size; i++) { - if(i>=MAX_ROUTE) - return(1); - opt->loose_route.route[i] = *(unsigned long *)buff; - buff += 4; - } - break; - case IPOPT_SSRR: - buff++; - if ((*buff - 3)% 4 != 0) return(1); - len += *buff; - opt->strict_route.route_size = (*buff -3)/4; - buff++; - if (*buff % 4 != 0) return(1); - opt->strict_route.pointer = *buff/4 - 1; - buff++; - buff++; - for (i = 0; i < opt->strict_route.route_size; i++) { - if(i>=MAX_ROUTE) - return(1); - opt->strict_route.route[i] = *(unsigned long *)buff; - buff += 4; - } - break; - case IPOPT_RR: - buff++; - if ((*buff - 3)% 4 != 0) return(1); - len += *buff; - opt->record_route.route_size = (*buff -3)/4; - buff++; - if (*buff % 4 != 0) return(1); - opt->record_route.pointer = *buff/4 - 1; - buff++; - buff++; - for (i = 0; i < opt->record_route.route_size; i++) { - if(i>=MAX_ROUTE) - return 1; - opt->record_route.route[i] = *(unsigned long *)buff; - buff += 4; - } - break; - case IPOPT_SID: - len += 4; - buff +=2; - opt->stream = *(unsigned short *)buff; - buff += 2; - break; - case IPOPT_TIMESTAMP: - buff++; - len += *buff; - if (*buff % 4 != 0) return(1); - opt->tstamp.len = *buff / 4 - 1; - buff++; - if ((*buff - 1) % 4 != 0) return(1); - opt->tstamp.ptr = (*buff-1)/4; - buff++; - opt->tstamp.x.full_char = *buff; - buff++; - for (i = 0; i < opt->tstamp.len; i++) { - opt->tstamp.data[i] = *(unsigned long *)buff; - buff += 4; - } - break; - default: - return(1); - } - - if (opt->record_route.route_size == 0) { - if (opt->strict_route.route_size != 0) { - memcpy(&(opt->record_route), &(opt->strict_route), - sizeof(opt->record_route)); - } else if (opt->loose_route.route_size != 0) { - memcpy(&(opt->record_route), &(opt->loose_route), - sizeof(opt->record_route)); - } - } - - if (opt->strict_route.route_size != 0 && - opt->strict_route.route_size != opt->strict_route.pointer) { - strict_route(iph, opt); - return(0); - } - - if (opt->loose_route.route_size != 0 && - opt->loose_route.route_size != opt->loose_route.pointer) { - loose_route(iph, opt); - return(0); - } - - return(0); -} - -/* - * This is a version of ip_compute_csum() optimized for IP headers, which - * always checksum on 4 octet boundaries. - */ - -static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen) -{ - unsigned long sum = 0; - - if (wlen) - { - unsigned long bogus; - __asm__("clc\n" - "1:\t" - "lodsl\n\t" - "adcl %3, %0\n\t" - "decl %2\n\t" - "jne 1b\n\t" - "adcl $0, %0\n\t" - "movl %0, %3\n\t" - "shrl $16, %3\n\t" - "addw %w3, %w0\n\t" - "adcw $0, %w0" - : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus) - : "0" (sum), "1" (buff), "2" (wlen)); - } - return (~sum) & 0xffff; -} - -/* - * This routine does all the checksum computations that don't - * require anything special (like copying or special headers). - */ - -unsigned short ip_compute_csum(unsigned char * buff, int len) -{ - unsigned long sum = 0; - - /* Do the first multiple of 4 bytes and convert to 16 bits. */ - if (len > 3) - { - __asm__("clc\n" - "1:\t" - "lodsl\n\t" - "adcl %%eax, %%ebx\n\t" - "loop 1b\n\t" - "adcl $0, %%ebx\n\t" - "movl %%ebx, %%eax\n\t" - "shrl $16, %%eax\n\t" - "addw %%ax, %%bx\n\t" - "adcw $0, %%bx" - : "=b" (sum) , "=S" (buff) - : "0" (sum), "c" (len >> 2) ,"1" (buff) - : "ax", "cx", "si", "bx" ); - } - if (len & 2) - { - __asm__("lodsw\n\t" - "addw %%ax, %%bx\n\t" - "adcw $0, %%bx" - : "=b" (sum), "=S" (buff) - : "0" (sum), "1" (buff) - : "bx", "ax", "si"); - } - if (len & 1) - { - __asm__("lodsb\n\t" - "movb $0, %%ah\n\t" - "addw %%ax, %%bx\n\t" - "adcw $0, %%bx" - : "=b" (sum), "=S" (buff) - : "0" (sum), "1" (buff) - : "bx", "ax", "si"); - } - sum =~sum; - return(sum & 0xffff); -} - -/* - * Check the header of an incoming IP datagram. This version is still used in slhc.c. - */ - -int ip_csum(struct iphdr *iph) -{ - return ip_fast_csum((unsigned char *)iph, iph->ihl); -} - /* * Generate a checksum for an outgoing IP datagram. */ -static void ip_send_check(struct iphdr *iph) +void ip_send_check(struct iphdr *iph) { iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } -/************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/ +/************************ Fragment Handlers From NET2E **********************************/ /* @@ -573,7 +357,7 @@ static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, u fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); if (fp == NULL) { - printk("IP: frag_create: no memory left !\n"); + NETDEBUG(printk("IP: frag_create: no memory left !\n")); return(NULL); } memset(fp, 0, sizeof(struct ipfrag)); @@ -664,7 +448,7 @@ static void ip_free(struct ipq *qp) kfree_s(qp->mac, qp->maclen); /* Release the IP header. */ - kfree_s(qp->iph, qp->ihlen + 8); + kfree_s(qp->iph, 64 + 8); /* Finally, release the queue descriptor itself. */ kfree_s(qp, sizeof(struct ipq)); @@ -691,7 +475,7 @@ static void ip_expire(unsigned long arg) /* This if is always true... shrug */ if(qp->fragments!=NULL) icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, qp->dev); + ICMP_EXC_FRAGTIME, 0, qp->dev); /* * Nuke the fragment queue. @@ -716,7 +500,7 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct devi qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC); if (qp == NULL) { - printk("IP: create: no memory left !\n"); + NETDEBUG(printk("IP: create: no memory left !\n")); return(NULL); skb->dev = qp->dev; } @@ -733,7 +517,7 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct devi qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC); if (qp->mac == NULL) { - printk("IP: create: no memory left !\n"); + NETDEBUG(printk("IP: create: no memory left !\n")); kfree_s(qp, sizeof(struct ipq)); return(NULL); } @@ -743,10 +527,10 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct devi */ ihlen = (iph->ihl * sizeof(unsigned long)); - qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC); + qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC); if (qp->iph == NULL) { - printk("IP: create: no memory left !\n"); + NETDEBUG(printk("IP: create: no memory left !\n")); kfree_s(qp->mac, maclen); kfree_s(qp, sizeof(struct ipq)); return(NULL); @@ -834,7 +618,7 @@ static struct sk_buff *ip_glue(struct ipq *qp) if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL) { ip_statistics.IpReasmFails++; - printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp); + NETDEBUG(printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp)); ip_free(qp); return(NULL); } @@ -858,9 +642,9 @@ static struct sk_buff *ip_glue(struct ipq *qp) fp = qp->fragments; while(fp != NULL) { - if(count+fp->len>skb->len) + if(count+fp->len > skb->len) { - printk("Invalid fragment list: Fragment over size.\n"); + NETDEBUG(printk("Invalid fragment list: Fragment over size.\n")); ip_free(qp); kfree_skb(skb,FREE_WRITE); ip_statistics.IpReasmFails++; @@ -891,7 +675,7 @@ static struct sk_buff *ip_glue(struct ipq *qp) static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev) { - struct ipfrag *prev, *next; + struct ipfrag *prev, *next, *tmp; struct ipfrag *tfp; struct ipq *qp; struct sk_buff *skb2; @@ -996,33 +780,34 @@ static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct * If we can merge fragments, do it. */ - for(; next != NULL; next = tfp) + for(tmp=next; tmp != NULL; tmp = tfp) { - tfp = next->next; - if (next->offset >= end) + tfp = tmp->next; + if (tmp->offset >= end) break; /* no overlaps at all */ i = end - next->offset; /* overlap is 'i' bytes */ - next->len -= i; /* so reduce size of */ - next->offset += i; /* next fragment */ - next->ptr += i; - + tmp->len -= i; /* so reduce size of */ + tmp->offset += i; /* next fragment */ + tmp->ptr += i; /* * If we get a frag size of <= 0, remove it and the packet * that it goes with. */ - if (next->len <= 0) + if (tmp->len <= 0) { - if (next->prev != NULL) - next->prev->next = next->next; + if (tmp->prev != NULL) + tmp->prev->next = tmp->next; else - qp->fragments = next->next; + qp->fragments = tmp->next; if (tfp->next != NULL) - next->next->prev = next->prev; + tmp->next->prev = tmp->prev; + + next=tfp; /* We have killed the original next frame */ - kfree_skb(next->skb,FREE_READ); - kfree_s(next, sizeof(struct ipfrag)); + kfree_skb(tmp->skb,FREE_READ); + kfree_s(tmp, sizeof(struct ipfrag)); } } @@ -1116,8 +901,11 @@ void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int i if (ntohs(iph->frag_off) & IP_DF) { + /* + * Reply giving the MTU of the failed hop. + */ ip_statistics.IpFragFails++; - icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev); + icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev); return; } @@ -1129,8 +917,8 @@ void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int i if(mtu<8) { - /* It's wrong but its better than nothing */ - icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev); + /* It's wrong but it's better than nothing */ + icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev); ip_statistics.IpFragFails++; return; } @@ -1141,7 +929,7 @@ void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int i /* * The initial offset is 0 for a complete frame. When - * fragmenting fragments its wherever this one starts. + * fragmenting fragments it's wherever this one starts. */ if (is_frag & 2) @@ -1173,7 +961,7 @@ void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int i if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL) { - printk("IP: frag: no memory for new fragment!\n"); + NETDEBUG(printk("IP: frag: no memory for new fragment!\n")); ip_statistics.IpFragFails++; return; } @@ -1250,7 +1038,7 @@ void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int i * Forward an IP datagram to its next destination. */ -static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) +void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, unsigned long target_addr, int target_strict) { struct device *dev2; /* Output device */ struct iphdr *iph; /* Our header */ @@ -1258,7 +1046,22 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) struct rtable *rt; /* Route we use */ unsigned char *ptr; /* Data pointer */ unsigned long raddr; /* Router IP address */ +#ifdef CONFIG_IP_FIREWALL + int fw_res = 0; /* Forwarding result */ + + /* + * See if we are allowed to forward this. + * Note: demasqueraded fragments are always 'back'warded. + */ + + if(!(is_frag&4) && (fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0))!=1) + { + if(fw_res==-1) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev); + return; + } +#endif /* * According to the RFC, we must first decrease the TTL field. If * that reaches zero, we must reply an ICMP control message telling @@ -1272,12 +1075,6 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) iph = skb->h.iph; iph->ttl--; - if (iph->ttl <= 0) - { - /* Tell the sender its packet died... */ - icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev); - return; - } /* * Re-compute the IP header checksum. @@ -1285,21 +1082,31 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) * and could thus adjust the checksum as Phil Karn does in KA9Q */ - ip_send_check(iph); + iph->check = ntohs(iph->check) + 0x0100; + if ((iph->check & 0xFF00) == 0) + iph->check++; /* carry overflow */ + iph->check = htons(iph->check); + + if (iph->ttl <= 0) + { + /* Tell the sender its packet died... */ + icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev); + return; + } /* * OK, the packet is still valid. Fetch its destination address, * and give it to the IP sender for further processing. */ - rt = ip_rt_route(iph->daddr, NULL, NULL); + rt = ip_rt_route(target_addr, NULL, NULL); if (rt == NULL) { /* * Tell the sender its packet cannot be delivered. Again * ICMP is screened later. */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev); return; } @@ -1317,23 +1124,35 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) if (raddr != 0) { /* + * Strict routing permits no gatewaying + */ + + if(target_strict) + { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev); + kfree_skb(skb, FREE_READ); + return; + } + + /* * There is a gateway so find the correct route for it. * Gateways cannot in turn be gatewayed. */ + rt = ip_rt_route(raddr, NULL, NULL); if (rt == NULL) { /* * Tell the sender its packet cannot be delivered... */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev); return; } if (rt->rt_gateway != 0) raddr = rt->rt_gateway; } else - raddr = iph->daddr; + raddr = target_addr; /* * Having picked a route we can now send the frame out. @@ -1342,14 +1161,14 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) dev2 = rt->rt_dev; /* - * In IP you never forward a frame on the interface that it arrived - * upon. We should generate an ICMP HOST REDIRECT giving the route + * In IP you never have to forward a frame on the interface that it + * arrived upon. We now generate an ICMP HOST REDIRECT giving the route * we calculated. - * For now just dropping the packet is an acceptable compromise. */ - - if (dev == dev2) - return; +#ifndef CONFIG_IP_NO_ICMP_REDIRECT + if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && rt->rt_flags&RTF_MODIFIED) + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev); +#endif /* * We now allocate a new buffer, and copy the datagram into it. @@ -1358,6 +1177,14 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) if (dev2->flags & IFF_UP) { +#ifdef CONFIG_IP_MASQUERADE + /* + * If this fragment needs masquerading, make it so... + * (Dont masquerade de-masqueraded fragments) + */ + if (!(is_frag&4) && fw_res==2) + ip_fw_masquerade(&skb, dev2); +#endif /* * Current design decrees we copy the packet. For identical header @@ -1372,7 +1199,7 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) */ if (skb2 == NULL) { - printk("\nIP: No memory available for IP forward\n"); + NETDEBUG(printk("\nIP: No memory available for IP forward\n")); return; } ptr = skb2->data; @@ -1403,9 +1230,17 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) } else { +#ifdef CONFIG_IP_ACCT + /* + * Count mapping we shortcut + */ + + ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1); +#endif + /* * Map service types to priority. We lie about - * throughput being low priority, but its a good + * throughput being low priority, but it's a good * choice to help improve general usage. */ if(iph->tos & IPTOS_LOWDELAY) @@ -1428,15 +1263,17 @@ static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct iphdr *iph = skb->h.iph; + struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; - unsigned char opts_p = 0; /* Set iff the packet has options. */ struct inet_protocol *ipprot; - static struct options opt; /* since we don't use these yet, and they - take up stack space. */ int brd=IS_MYADDR; + unsigned long target_addr; + int target_strict=0; int is_frag=0; - +#ifdef CONFIG_IP_FIREWALL + int err; +#endif ip_statistics.IpInReceives++; @@ -1447,15 +1284,20 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) skb->ip_hdr = iph; /* + * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. + * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. + * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] - * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?) + * 4. Doesn't have a bogus length + * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ - if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) + if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 + || skb->len < ntohs(iph->tot_len)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); @@ -1468,20 +1310,128 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) */ skb->len=ntohs(iph->tot_len); + + /* + * See if the firewall wants to dispose of the packet. + */ + +#ifdef CONFIG_IP_FIREWALL + + if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<1) + { + if(err==-1) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); + kfree_skb(skb, FREE_WRITE); + return 0; + } + +#endif + /* * Next analyse the packet for options. Studies show under one packet in * a thousand have options.... */ + + target_addr = iph->daddr; if (iph->ihl != 5) - { /* Fast path for the typical optionless IP packet. */ - memset((char *) &opt, 0, sizeof(opt)); - if (do_options(iph, &opt) != 0) - return 0; - opts_p = 1; + { + /* Humph.. options. Lots of annoying fiddly bits */ + + /* + * This is straight from the RFC. It might even be right ;) + * + * RFC 1122: 3.2.1.8 STREAMID option is obsolete and MUST be ignored. + * RFC 1122: 3.2.1.8 MUST NOT crash on a zero length option. + * RFC 1122: 3.2.1.8 MUST support acting as final destination of a source route. + */ + + int opt_space=4*(iph->ihl-5); + int opt_size; + unsigned char *opt_ptr=skb->h.raw+sizeof(struct iphdr); + + while(opt_space>0) + { + if(*opt_ptr==IPOPT_NOOP) + { + opt_ptr++; + opt_space--; + continue; + } + if(*opt_ptr==IPOPT_END) + break; /* Done */ + if(opt_space<2 || (opt_size=opt_ptr[1])<2 || opt_ptr[1]>opt_space) + { + /* + * RFC 1122: 3.2.2.5 SHOULD send parameter problem reports. + */ + icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev); + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + switch(opt_ptr[0]) + { + case IPOPT_SEC: + /* Should we drop this ?? */ + break; + case IPOPT_SSRR: /* These work almost the same way */ + target_strict=1; + /* Fall through */ + case IPOPT_LSRR: +#ifdef CONFIG_IP_NOSR + kfree_skb(skb, FREE_READ); + return -EINVAL; +#endif + case IPOPT_RR: + /* + * RFC 1122: 3.2.1.8 Support for RR is OPTIONAL. + */ + if (iph->daddr!=skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) + break; + if((opt_size<3) || ( opt_ptr[0]==IPOPT_RR && opt_ptr[2] > opt_size-4 )) + { + if(ip_chk_addr(iph->daddr)) + icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev); + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + if(opt_ptr[2] > opt_size-4 ) + break; + /* Bytes are [IPOPT_xxRR][Length][EntryPointer][Entry0][Entry1].... */ + /* This isn't going to be too portable - FIXME */ + if(opt_ptr[0]!=IPOPT_RR) + { + int t; + target_addr=*(long *)(&opt_ptr[opt_ptr[2]]); /* Get hop */ + t=ip_chk_addr(target_addr); + if(t==IS_MULTICAST||t==IS_BROADCAST) + { + if(ip_chk_addr(iph->daddr)) + icmp_send(skb, ICMP_PARAMETERPROB, 0, 0, skb->dev); + kfree_skb(skb,FREE_READ); + return -EINVAL; + } + } + *(long *)(&opt_ptr[opt_ptr[2]])=skb->dev->pa_addr; /* Record hop */ + break; + case IPOPT_TIMESTAMP: + /* + * RFC 1122: 3.2.1.8 The timestamp option is OPTIONAL but if implemented + * MUST meet various rules (read the spec). + */ + NETDEBUG(printk("ICMP: Someone finish the timestamp routine ;)\n")); + break; + default: + break; + } + opt_ptr+=opt_size; + opt_space-=opt_size; + } + } + /* * Remember if the frame is fragmented. */ @@ -1510,118 +1460,255 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) * function entry. */ - if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) + if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) { - /* - * Don't forward multicast or broadcast frames. - */ +#ifdef CONFIG_IP_MULTICAST - if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) + if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { - kfree_skb(skb,FREE_WRITE); - return 0; + /* + * Check it is for one of our groups + */ + struct ip_mc_list *ip_mc=dev->ip_mc_list; + do + { + if(ip_mc==NULL) + { + kfree_skb(skb, FREE_WRITE); + return 0; + } + if(ip_mc->multiaddr==iph->daddr) + break; + ip_mc=ip_mc->next; + } + while(1); } +#endif +#ifdef CONFIG_IP_MASQUERADE /* - * The packet is for another target. Forward the frame + * Do we need to de-masquerade this fragment? */ - -#ifdef CONFIG_IP_FORWARD - ip_forward(skb, dev, is_frag); -#else -/* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", - iph->saddr,iph->daddr);*/ - ip_statistics.IpInAddrErrors++; + if (ip_fw_demasquerade(skb)) + { + struct iphdr *iph=skb->h.iph; + ip_forward(skb, dev, is_frag|4, iph->daddr, 0); + kfree_skb(skb, FREE_WRITE); + return(0); + } #endif + /* - * The forwarder is inefficient and copies the packet. We - * free the original now. + * Account for the packet */ + +#ifdef CONFIG_IP_ACCT + ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1); +#endif - kfree_skb(skb, FREE_WRITE); - return(0); - } - - /* - * Reassemble IP fragments. - */ + /* + * Reassemble IP fragments. + */ - if(is_frag) - { - /* Defragment. Obtain the complete packet if there is one */ - skb=ip_defrag(iph,skb,dev); - if(skb==NULL) - return 0; - iph=skb->h.iph; - } + if(is_frag) + { + /* Defragment. Obtain the complete packet if there is one */ + skb=ip_defrag(iph,skb,dev); + if(skb==NULL) + return 0; + skb->dev = dev; + iph=skb->h.iph; + } - /* - * Point into the IP datagram, just past the header. - */ + /* + * Point into the IP datagram, just past the header. + */ - skb->ip_hdr = iph; - skb->h.raw += iph->ihl*4; + skb->ip_hdr = iph; + skb->h.raw += iph->ihl*4; - /* - * skb->h.raw now points at the protocol beyond the IP header. - */ + /* + * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. + * + * RFC 1122: SHOULD pass TOS value up to the transport layer. + */ + + hash = iph->protocol & (SOCK_ARRAY_SIZE-1); - hash = iph->protocol & (MAX_INET_PROTOS -1); - for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) - { - struct sk_buff *skb2; - - if (ipprot->protocol != iph->protocol) - continue; - /* - * See if we need to make a copy of it. This will - * only be set if more than one protocol wants it. - * and then not for the last one. - * - * This is an artifact of poor upper protocol design. - * Because the upper protocols damage the actual packet - * we must do copying. In actual fact it's even worse - * than this as TCP may hold on to the buffer. - */ - if (ipprot->copy) + /* + * If there maybe a raw socket we must check - if not we don't care less + */ + + if((raw_sk=raw_prot.sock_array[hash])!=NULL) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if(skb2==NULL) - continue; + struct sock *sknext=NULL; + struct sk_buff *skb1; + raw_sk=get_sock_raw(raw_sk, hash, iph->saddr, iph->daddr); + if(raw_sk) /* Any raw sockets */ + { + do + { + /* Find the next */ + sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr); + if(sknext) + skb1=skb_clone(skb, GFP_ATOMIC); + else + break; /* One pending raw socket left */ + if(skb1) + raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr); + raw_sk=sknext; + } + while(raw_sk!=NULL); + + /* + * Here either raw_sk is the last raw socket, or NULL if none + */ + + /* + * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy + */ + } } - else + + /* + * skb->h.raw now points at the protocol beyond the IP header. + */ + + hash = iph->protocol & (MAX_INET_PROTOS -1); + for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { - skb2 = skb; - } - flag = 1; - - /* - * Pass on the datagram to each protocol that wants it, - * based on the datagram protocol. We should really - * check the protocol handler's return values here... - */ - ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr, + struct sk_buff *skb2; + + if (ipprot->protocol != iph->protocol) + continue; + /* + * See if we need to make a copy of it. This will + * only be set if more than one protocol wants it. + * and then not for the last one. If there is a pending + * raw delivery wait for that + */ + + if (ipprot->copy || raw_sk) + { + skb2 = skb_clone(skb, GFP_ATOMIC); + if(skb2==NULL) + continue; + } + else + { + skb2 = skb; + } + flag = 1; + + /* + * Pass on the datagram to each protocol that wants it, + * based on the datagram protocol. We should really + * check the protocol handler's return values here... + */ + + ipprot->handler(skb2, dev, NULL, iph->daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); + } + + /* + * All protocols checked. + * If this packet was a broadcast, we may *not* reply to it, since that + * causes (proven, grin) ARP storms and a leakage of memory (i.e. all + * ICMP reply messages get queued up for transmission...) + */ + + if(raw_sk!=NULL) /* Shift to last raw user */ + raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr); + else if (!flag) /* Free and report errors */ + { + if (brd != IS_BROADCAST && brd!=IS_MULTICAST) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); + kfree_skb(skb, FREE_WRITE); + } + + return(0); } /* - * All protocols checked. - * If this packet was a broadcast, we may *not* reply to it, since that - * causes (proven, grin) ARP storms and a leakage of memory (i.e. all - * ICMP reply messages get queued up for transmission...) + * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. + * + * This is inefficient. While finding out if it is for us we could also compute + * the routing table entry. This is where the great unified cache theory comes + * in as and when someone implements it + * + * For most hosts over 99% of packets match the first conditional + * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at + * function entry. + */ + + /* + * Don't forward multicast or broadcast frames. */ - if (!flag) + if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { - if (brd != IS_BROADCAST && brd!=IS_MULTICAST) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev); - kfree_skb(skb, FREE_WRITE); + kfree_skb(skb,FREE_WRITE); + return 0; } + /* + * The packet is for another target. Forward the frame + */ + +#ifdef CONFIG_IP_FORWARD + ip_forward(skb, dev, is_frag, target_addr, target_strict); +#else +/* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", + iph->saddr,iph->daddr);*/ + ip_statistics.IpInAddrErrors++; +#endif + /* + * The forwarder is inefficient and copies the packet. We + * free the original now. + */ + + kfree_skb(skb, FREE_WRITE); return(0); } + + +/* + * Loop a packet back to the sender. + */ + +static void ip_loopback(struct device *old_dev, struct sk_buff *skb) +{ + extern struct device loopback_dev; + struct device *dev=&loopback_dev; + int len=skb->len-old_dev->hard_header_len; + struct sk_buff *newskb=alloc_skb(len+dev->hard_header_len, GFP_ATOMIC); + + if(newskb==NULL) + return; + + newskb->link3=NULL; + newskb->sk=NULL; + newskb->dev=dev; + newskb->saddr=skb->saddr; + newskb->daddr=skb->daddr; + newskb->raddr=skb->raddr; + newskb->free=1; + newskb->lock=0; + newskb->users=0; + newskb->pkt_type=skb->pkt_type; + newskb->len=len+dev->hard_header_len; + + + newskb->ip_hdr=(struct iphdr *)(newskb->data+ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr)); + memcpy(newskb->ip_hdr,skb->ip_hdr,len); + + /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */ + + /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/ + ip_queue_xmit(NULL, dev, newskb, 1); +} /* @@ -1642,7 +1729,7 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, /* Sanity check */ if (dev == NULL) { - printk("IP: ip_queue_xmit dev = NULL\n"); + NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n")); return; } @@ -1670,6 +1757,12 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, skb->ip_hdr = iph; iph->tot_len = ntohs(skb->len-dev->hard_header_len); +#ifdef CONFIG_IP_FIREWALL + if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) != 1) + /* just don't send this packet */ + return; +#endif + /* * No reassigning numbers to fragments... */ @@ -1715,7 +1808,7 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, */ if (skb->next != NULL) { - printk("ip_queue_xmit: next != NULL\n"); + NETDEBUG(printk("ip_queue_xmit: next != NULL\n")); skb_unlink(skb); } @@ -1739,7 +1832,7 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, if (skb->link3 != NULL) { - printk("ip.c: link3 != NULL\n"); + NETDEBUG(printk("ip.c: link3 != NULL\n")); skb->link3 = NULL; } if (sk->send_head == NULL) @@ -1756,9 +1849,6 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, /* Interrupt restore */ restore_flags(flags); - /* Set the IP write timeout to the round trip time for the packet. - If an acknowledge has not arrived by then we may wish to act */ - reset_timer(sk, TIME_WRITE, sk->rto); } else /* Remember who owns the buffer */ @@ -1767,8 +1857,50 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, /* * If the indicated interface is up and running, send the packet. */ + ip_statistics.IpOutRequests++; +#ifdef CONFIG_IP_ACCT + ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1); +#endif + +#ifdef CONFIG_IP_MULTICAST + /* + * Multicasts are looped back for other local users + */ + + if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK)) + { + if(sk==NULL || sk->ip_mc_loop) + { + if(iph->daddr==IGMP_ALL_HOSTS) + ip_loopback(dev,skb); + else + { + struct ip_mc_list *imc=dev->ip_mc_list; + while(imc!=NULL) + { + if(imc->multiaddr==iph->daddr) + { + ip_loopback(dev,skb); + break; + } + imc=imc->next; + } + } + } + /* Multicasts with ttl 0 must not go beyond the host */ + + if(skb->ip_hdr->ttl==0) + { + kfree_skb(skb, FREE_READ); + return; + } + } +#endif + if((dev->flags&IFF_BROADCAST) && iph->daddr==dev->pa_brdaddr && !(dev->flags&IFF_LOOPBACK)) + ip_loopback(dev,skb); + if (dev->flags & IFF_UP) { /* @@ -1794,128 +1926,59 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, } + +#ifdef CONFIG_IP_MULTICAST + /* - * A socket has timed out on its send queue and wants to do a - * little retransmitting. Currently this means TCP. + * Write an multicast group list table for the IGMP daemon to + * read. */ - -void ip_do_retransmit(struct sock *sk, int all) + +int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length) { - struct sk_buff * skb; - struct proto *prot; + off_t pos=0, begin=0; + struct ip_mc_list *im; + unsigned long flags; + int len=0; struct device *dev; - - prot = sk->prot; - skb = sk->send_head; - - while (skb != NULL) + + len=sprintf(buffer,"Device : Count\tGroup Users Timer\n"); + save_flags(flags); + cli(); + + for(dev = dev_base; dev; dev = dev->next) { - dev = skb->dev; - IS_SKB(skb); - skb->when = jiffies; - - /* - * In general it's OK just to use the old packet. However we - * need to use the current ack and window fields. Urg and - * urg_ptr could possibly stand to be updated as well, but we - * don't keep the necessary data. That shouldn't be a problem, - * if the other end is doing the right thing. Since we're - * changing the packet, we have to issue a new IP identifier. - */ - - /* this check may be unnecessary - retransmit only for TCP */ - if (sk->protocol == IPPROTO_TCP) { - struct tcphdr *th; - struct iphdr *iph; - int size; - - iph = (struct iphdr *)(skb->data + dev->hard_header_len); - th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); - size = skb->len - (((unsigned char *) th) - skb->data); - - iph->id = htons(ip_id_count++); - ip_send_check(iph); - - th->ack_seq = ntohl(sk->acked_seq); - th->window = ntohs(tcp_select_window(sk)); - tcp_send_check(th, sk->saddr, sk->daddr, size, sk); - } - - /* - * If the interface is (still) up and running, kick it. - */ - - if (dev->flags & IFF_UP) - { - /* - * If the packet is still being sent by the device/protocol - * below then don't retransmit. This is both needed, and good - - * especially with connected mode AX.25 where it stops resends - * occurring of an as yet unsent anyway frame! - * We still add up the counts as the round trip time wants - * adjusting. - */ - if (sk && !skb_device_locked(skb)) - { - /* Remove it from any existing driver queue first! */ - skb_unlink(skb); - /* Now queue it */ - ip_statistics.IpOutRequests++; - dev_queue_xmit(skb, dev, sk->priority); - } - } - - /* - * Count retransmissions - */ - sk->retransmits++; - sk->prot->retransmits ++; - - /* - * Only one retransmit requested. - */ - if (!all) - break; - - /* - * This should cut it off before we send too many packets. - */ - if (sk->retransmits >= sk->cong_window) - break; - skb = skb->link3; + if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)) + { + len+=sprintf(buffer+len,"%-10s: %5d\n", + dev->name, dev->mc_count); + for(im = dev->ip_mc_list; im; im = im->next) + { + len+=sprintf(buffer+len, + "\t\t\t%08lX %5d %d:%08lX\n", + im->multiaddr, im->users, + im->tm_running, im->timer.expires); + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + } } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; } -/* - * This is the normal code called for timeouts. It does the retransmission - * and then does backoff. ip_do_retransmit is separated out because - * tcp_ack needs to send stuff from the retransmit queue without - * initiating a backoff. - */ - -void ip_retransmit(struct sock *sk, int all) -{ - ip_do_retransmit(sk, all); - - /* - * Increase the timeout each time we retransmit. Note that - * we do not increase the rtt estimate. rto is initialized - * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests - * that doubling rto each time is the least we can get away with. - * In KA9Q, Karn uses this for the first few times, and then - * goes to quadratic. netBSD doubles, but only goes up to *64, - * and clamps at 1 to 64 sec afterwards. Note that 120 sec is - * defined in the protocol as the maximum possible RTT. I guess - * we'll have to use something other than TCP to talk to the - * University of Mars. - */ - - sk->retransmits++; - sk->backoff++; - sk->rto = min(sk->rto << 1, 120*HZ); - reset_timer(sk, TIME_WRITE, sk->rto); -} +#endif /* * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on * an IP socket. @@ -1925,10 +1988,26 @@ void ip_retransmit(struct sock *sk, int all) * Next release we will sort out IP_OPTIONS since for some people are kind of important. */ +static struct device *ip_mc_find_devfor(unsigned long addr) +{ + struct device *dev; + for(dev = dev_base; dev; dev = dev->next) + { + if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&& + (dev->pa_addr==addr)) + return dev; + } + + return NULL; +} + int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { int val,err; - + unsigned char ucval; +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + struct ip_fw tmp_fw; +#endif if (optval == NULL) return(-EINVAL); @@ -1937,6 +2016,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return err; val = get_fs_long((unsigned long *)optval); + ucval=get_fs_byte((unsigned char *)optval); if(level!=SOL_IP) return -EOPNOTSUPP; @@ -1957,6 +2037,212 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return -EINVAL; sk->ip_ttl=val; return 0; +#ifdef CONFIG_IP_MULTICAST + case IP_MULTICAST_TTL: + { + sk->ip_mc_ttl=(int)ucval; + return 0; + } + case IP_MULTICAST_LOOP: + { + if(ucval!=0 && ucval!=1) + return -EINVAL; + sk->ip_mc_loop=(int)ucval; + return 0; + } + case IP_MULTICAST_IF: + { + struct in_addr addr; + struct device *dev=NULL; + + /* + * Check the arguments are allowable + */ + + err=verify_area(VERIFY_READ, optval, sizeof(addr)); + if(err) + return err; + + memcpy_fromfs(&addr,optval,sizeof(addr)); + + + /* + * What address has been requested + */ + + if(addr.s_addr==INADDR_ANY) /* Default */ + { + sk->ip_mc_name[0]=0; + return 0; + } + + /* + * Find the device + */ + + dev=ip_mc_find_devfor(addr.s_addr); + + /* + * Did we find one + */ + + if(dev) + { + strcpy(sk->ip_mc_name,dev->name); + return 0; + } + return -EADDRNOTAVAIL; + } + + case IP_ADD_MEMBERSHIP: + { + +/* + * FIXME: Add/Del membership should have a semaphore protecting them from re-entry + */ + struct ip_mreq mreq; + unsigned long route_src; + struct rtable *rt; + struct device *dev=NULL; + + /* + * Check the arguments. + */ + + err=verify_area(VERIFY_READ, optval, sizeof(mreq)); + if(err) + return err; + + memcpy_fromfs(&mreq,optval,sizeof(mreq)); + + /* + * Get device for use later + */ + + if(mreq.imr_interface.s_addr==INADDR_ANY) + { + /* + * Not set so scan. + */ + if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL) + { + dev=rt->rt_dev; + rt->rt_use--; + } + } + else + { + /* + * Find a suitable device. + */ + + dev=ip_mc_find_devfor(mreq.imr_interface.s_addr); + } + + /* + * No device, no cookies. + */ + + if(!dev) + return -ENODEV; + + /* + * Join group. + */ + + return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr); + } + + case IP_DROP_MEMBERSHIP: + { + struct ip_mreq mreq; + struct rtable *rt; + unsigned long route_src; + struct device *dev=NULL; + + /* + * Check the arguments + */ + + err=verify_area(VERIFY_READ, optval, sizeof(mreq)); + if(err) + return err; + + memcpy_fromfs(&mreq,optval,sizeof(mreq)); + + /* + * Get device for use later + */ + + if(mreq.imr_interface.s_addr==INADDR_ANY) + { + if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL) + { + dev=rt->rt_dev; + rt->rt_use--; + } + } + else + { + + dev=ip_mc_find_devfor(mreq.imr_interface.s_addr); + } + + /* + * Did we find a suitable device. + */ + + if(!dev) + return -ENODEV; + + /* + * Leave group + */ + + return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr); + } +#endif +#ifdef CONFIG_IP_FIREWALL + case IP_FW_ADD_BLK: + case IP_FW_DEL_BLK: + case IP_FW_ADD_FWD: + case IP_FW_DEL_FWD: + case IP_FW_CHK_BLK: + case IP_FW_CHK_FWD: + case IP_FW_FLUSH_BLK: + case IP_FW_FLUSH_FWD: + case IP_FW_ZERO_BLK: + case IP_FW_ZERO_FWD: + case IP_FW_POLICY_BLK: + case IP_FW_POLICY_FWD: + if(!suser()) + return -EPERM; + if(optlen>sizeof(tmp_fw) || optlen<1) + return -EINVAL; + err=verify_area(VERIFY_READ,optval,optlen); + if(err) + return err; + memcpy_fromfs(&tmp_fw,optval,optlen); + err=ip_fw_ctl(optname, &tmp_fw,optlen); + return -err; /* -0 is 0 after all */ + +#endif +#ifdef CONFIG_IP_ACCT + case IP_ACCT_DEL: + case IP_ACCT_ADD: + case IP_ACCT_FLUSH: + case IP_ACCT_ZERO: + if(!suser()) + return -EPERM; + if(optlen>sizeof(tmp_fw) || optlen<1) + return -EINVAL; + err=verify_area(VERIFY_READ,optval,optlen); + if(err) + return err; + memcpy_fromfs(&tmp_fw, optval,optlen); + err=ip_acct_ctl(optname, &tmp_fw,optlen); + return -err; /* -0 is 0 after all */ +#endif /* IP_OPTIONS and friends go here eventually */ default: return(-ENOPROTOOPT); @@ -1971,7 +2257,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) { int val,err; - +#ifdef CONFIG_IP_MULTICAST + int len; +#endif + if(level!=SOL_IP) return -EOPNOTSUPP; @@ -1983,6 +2272,25 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op case IP_TTL: val=sk->ip_ttl; break; +#ifdef CONFIG_IP_MULTICAST + case IP_MULTICAST_TTL: + val=sk->ip_mc_ttl; + break; + case IP_MULTICAST_LOOP: + val=sk->ip_mc_loop; + break; + case IP_MULTICAST_IF: + err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); + if(err) + return err; + len=strlen(sk->ip_mc_name); + err=verify_area(VERIFY_WRITE, optval, len); + if(err) + return err; + put_fs_long(len,(unsigned long *) optlen); + memcpy_tofs((void *)optval,sk->ip_mc_name, len); + return 0; +#endif default: return(-ENOPROTOOPT); } @@ -2000,18 +2308,380 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op } /* + * Build and send a packet, with as little as one copy + * + * Doesn't care much about ip options... option length can be + * different for fragment at 0 and other fragments. + * + * Note that the fragment at the highest offset is sent first, + * so the getfrag routine can fill in the TCP/UDP checksum header + * field in the last fragment it sends... actually it also helps + * the reassemblers, they can put most packets in at the head of + * the fragment queue, and they know the total size in advance. This + * last feature will measurable improve the Linux fragment handler. + * + * The callback has five args, an arbitrary pointer (copy of frag), + * the source IP address (may depend on the routing table), the + * destination adddress (char *), the offset to copy from, and the + * length to be copied. + * + */ + +int ip_build_xmit(struct sock *sk, + void getfrag (void *, + int, + char *, + unsigned int, + unsigned int), + void *frag, + unsigned short int length, + int daddr, + int flags, + int type) +{ + struct rtable *rt; + unsigned int fraglen, maxfraglen, fragheaderlen; + int offset, mf; + unsigned long saddr; + unsigned short id; + struct iphdr *iph; + int local=0; + struct device *dev; + + +#ifdef CONFIG_INET_MULTICAST + if(sk && MULTICAST(daddr) && *sk->ip_mc_name) + { + dev=dev_get(skb->ip_mc_name); + if(!dev) + return -ENODEV; + rt=NULL; + } + else + { +#endif + /* + * Perform the IP routing decisions + */ + + if(sk->localroute || flags&MSG_DONTROUTE) + local=1; + + rt = sk->ip_route_cache; + + /* + * See if the routing cache is outdated. We need to clean this up once we are happy it is reliable + * by doing the invalidation actively in the route change and header change. + */ + + saddr=sk->ip_route_saddr; + if(!rt || sk->ip_route_stamp != rt_stamp || daddr!=sk->ip_route_daddr || sk->ip_route_local!=local || sk->saddr!=sk->ip_route_saddr) + { + if(local) + rt = ip_rt_local(daddr, NULL, &saddr); + else + rt = ip_rt_route(daddr, NULL, &saddr); + sk->ip_route_local=local; + sk->ip_route_daddr=daddr; + sk->ip_route_saddr=saddr; + sk->ip_route_stamp=rt_stamp; + sk->ip_route_cache=rt; + sk->ip_hcache_ver=NULL; + sk->ip_hcache_state= 0; + } + else if(rt) + { + /* + * Attempt header caches only if the cached route is being reused. Header cache + * is not ultra cheap to set up. This means we only set it up on the second packet, + * so one shot communications are not slowed. We assume (seems reasonable) that 2 is + * probably going to be a stream of data. + */ + if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1) + { + if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver) + rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr); + else + /* Can't cache. Remember this */ + sk->ip_hcache_state= -1; + } + } + + if (rt == NULL) + { + ip_statistics.IpOutNoRoutes++; + return(-ENETUNREACH); + } + + if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr))) + saddr = sk->saddr; + + dev=rt->rt_dev; +#ifdef CONFIG_INET_MULTICAST + } +#endif + + /* + * Now compute the buffer space we require + */ + + fragheaderlen = dev->hard_header_len; + if(type != IPPROTO_RAW) + fragheaderlen += 20; + + /* + * Fragheaderlen is the size of 'overhead' on each buffer. Now work + * out the size of the frames to send. + */ + + maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen; + + /* + * Start at the end of the frame by handling the remainder. + */ + + offset = length - (length % (maxfraglen - fragheaderlen)); + + /* + * Amount of memory to allocate for final fragment. + */ + + fraglen = length - offset + fragheaderlen; + + if(fraglen==0) + { + fraglen = maxfraglen; + offset -= maxfraglen-fragheaderlen; + } + + + /* + * The last fragment will not have MF (more fragments) set. + */ + + mf = 0; + + /* + * Can't fragment raw packets + */ + + if (type == IPPROTO_RAW && offset > 0) + return(-EMSGSIZE); + + /* + * Get an identifier + */ + + id = htons(ip_id_count++); + + /* + * Being outputting the bytes. + */ + + do + { + struct sk_buff * skb; + int error; + char *data; + + /* + * Get the memory we require. + */ + + skb = sock_alloc_send_skb(sk, fraglen, 0, &error); + if (skb == NULL) + return(error); + + /* + * Fill in the control structures + */ + + skb->next = skb->prev = NULL; + skb->dev = dev; + skb->when = jiffies; + skb->free = 1; /* dubious, this one */ + skb->sk = sk; + skb->arp = 0; + skb->saddr = saddr; + skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr; + skb->len = fraglen; + + /* + * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok) + * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but + * this can be fixed later. For gateway routes we ought to have a rt->.. header cache + * pointer to speed header cache builds for identical targets. + */ + + if(sk->ip_hcache_state>0) + { + memcpy(skb->data,sk->ip_hcache_data, dev->hard_header_len); + skb->arp=1; + } + else if (dev->hard_header) + { + if(dev->hard_header(skb->data, dev, ETH_P_IP, + NULL, NULL, 0, NULL)>0) + skb->arp=1; + } + + /* + * Find where to start putting bytes. + */ + + data = (char *)skb->data + dev->hard_header_len; + iph = (struct iphdr *)data; + + /* + * Only write IP header onto non-raw packets + */ + + if(type != IPPROTO_RAW) + { + + iph->version = 4; + iph->ihl = 5; /* ugh */ + iph->tos = sk->ip_tos; + iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4); + iph->id = id; + iph->frag_off = htons(offset>>3); + iph->frag_off |= mf; +#ifdef CONFIG_IP_MULTICAST + if (MULTICAST(daddr)) + iph->ttl = sk->ip_mc_ttl; + else +#endif + iph->ttl = sk->ip_ttl; + iph->protocol = type; + iph->check = 0; + iph->saddr = saddr; + iph->daddr = daddr; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + data += iph->ihl*4; + + /* + * Any further fragments will have MF set. + */ + + mf = htons(IP_MF); + } + + /* + * User data callback + */ + + getfrag(frag, saddr, data, offset, fraglen-fragheaderlen); + + /* + * Account for the fragment. + */ + +#ifdef CONFIG_IP_ACCT + if(!offset) + ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1); +#endif + offset -= (maxfraglen-fragheaderlen); + fraglen = maxfraglen; + +#ifdef CONFIG_IP_MULTICAST + + /* + * Multicasts are looped back for other local users + */ + + if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) + { + /* + * Loop back any frames. The check for IGMP_ALL_HOSTS is because + * you are always magically a member of this group. + */ + + if(sk==NULL || sk->ip_mc_loop) + { + if(skb->daddr==IGMP_ALL_HOSTS) + ip_loopback(rt->rt_dev,skb); + else + { + struct ip_mc_list *imc=rt->rt_dev->ip_mc_list; + while(imc!=NULL) + { + if(imc->multiaddr==daddr) + { + ip_loopback(rt->rt_dev,skb); + break; + } + imc=imc->next; + } + } + } + + /* + * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the + * extra clone. + */ + + if(skb->ip_hdr->ttl==0) + kfree_skb(skb, FREE_READ); + } +#endif + /* + * Now queue the bytes into the device. + */ + + if (dev->flags & IFF_UP) + { + dev_queue_xmit(skb, dev, sk->priority); + } + else + { + /* + * Whoops... + * + * FIXME: There is a small nasty here. During the ip_build_xmit we could + * page fault between the route lookup and device send, the device might be + * removed and unloaded.... We need to add device locks on this. + */ + + ip_statistics.IpOutDiscards++; + kfree_skb(skb, FREE_WRITE); + return(0); /* lose rest of fragments */ + } + } + while (offset >= 0); + + return(0); +} + + +/* * IP protocol layer initialiser */ static struct packet_type ip_packet_type = { 0, /* MUTTER ntohs(ETH_P_IP),*/ - 0, /* copy */ + NULL, /* All devices */ ip_rcv, NULL, NULL, }; +/* + * Device notifier + */ + +static int ip_rt_event(unsigned long event, void *ptr) +{ + if(event==NETDEV_DOWN) + ip_rt_flush(ptr); + return NOTIFY_DONE; +} + +struct notifier_block ip_rt_notifier={ + ip_rt_event, + NULL, + 0 +}; /* * IP registers the packet type and then calls the subprotocol initialisers @@ -2021,8 +2691,12 @@ void ip_init(void) { ip_packet_type.type=htons(ETH_P_IP); dev_add_pack(&ip_packet_type); + + /* So we flush routes when a device is downed */ + register_netdevice_notifier(&ip_rt_notifier); /* ip_raw_init(); ip_packet_init(); ip_tcp_init(); ip_udp_init();*/ } + diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c new file mode 100644 index 000000000..b2e901926 --- /dev/null +++ b/net/ipv4/ip_fw.c @@ -0,0 +1,1514 @@ +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * -- Alan Cox. + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Zeroing /proc and other additions + * Jos Vos 4/Feb/1995. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan Cox 12/Feb/1995 + * Porting bidirectional entries from BSD, fixing accounting issues, + * adding struct ip_fwpkt for checking packets with interface address + * Jos Vos 5/Mar/1995. + * + * Masquerading functionality + * + * Copyright (c) 1994 Pauline Middelink + * + * The pieces which added masquerading functionality are totaly + * my responsibility and have nothing to with the original authors + * copyright or doing. + * + * Parts distributed under GPL. + * + * Fixes: + * Pauline Middelink : Added masquerading. + * Alan Cox : Fixed an error in the merge. + * + * All the real work was done by ..... + * + */ + + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +#include <linux/config.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/config.h> + +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/icmp.h> +#include <linux/udp.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/icmp.h> +#include <linux/ip_fw.h> +#include <net/checksum.h> + +/* + * Implement IP packet firewall + */ + +#ifdef CONFIG_IPFIREWALL_DEBUG +#define dprintf1(a) printk(a) +#define dprintf2(a1,a2) printk(a1,a2) +#define dprintf3(a1,a2,a3) printk(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) printk(a1,a2,a3,a4) +#else +#define dprintf1(a) +#define dprintf2(a1,a2) +#define dprintf3(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) +#endif + +#define print_ip(a) printk("%ld.%ld.%ld.%ld",(ntohl(a)>>24)&0xFF,\ + (ntohl(a)>>16)&0xFF,\ + (ntohl(a)>>8)&0xFF,\ + (ntohl(a))&0xFF); + +#ifdef IPFIREWALL_DEBUG +#define dprint_ip(a) print_ip(a) +#else +#define dprint_ip(a) +#endif + +#ifdef CONFIG_IP_FIREWALL +struct ip_fw *ip_fw_fwd_chain; +struct ip_fw *ip_fw_blk_chain; +int ip_fw_blk_policy=IP_FW_F_ACCEPT; +int ip_fw_fwd_policy=IP_FW_F_ACCEPT; +#endif +#ifdef CONFIG_IP_ACCT +struct ip_fw *ip_acct_chain; +#endif + +#define IP_INFO_BLK 0 +#define IP_INFO_FWD 1 +#define IP_INFO_ACCT 2 + +#ifdef CONFIG_IP_MASQUERADE +/* + * Implement IP packet masquerading + */ + +static unsigned short masq_port = PORT_MASQ_BEGIN; +static char *strProt[] = {"UDP","TCP"}; +struct ip_masq *ip_msq_hosts; + +#endif + +/* + * Returns 1 if the port is matched by the vector, 0 otherwise + */ + +extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag) +{ + if (!nports) + return 1; + if ( range_flag ) + { + if ( portptr[0] <= port && port <= portptr[1] ) + { + return( 1 ); + } + nports -= 2; + portptr += 2; + } + while ( nports-- > 0 ) + { + if ( *portptr++ == port ) + { + return( 1 ); + } + } + return(0); +} + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + + +/* + * Returns 0 if packet should be dropped, 1 if it should be accepted, + * and -1 if an ICMP host unreachable packet should be sent. + * Also does accounting so you can feed it the accounting chain. + * If opt is set to 1, it means that we do this for accounting + * purposes (searches all entries and handles fragments different). + * If opt is set to 2, it doesn't count a matching packet, which + * is used when calling this for checking purposes (IP_FW_CHK_*). + */ + + +int ip_fw_chk(struct iphdr *ip, struct device *rif, struct ip_fw *chain, int policy, int opt) +{ + struct ip_fw *f; + struct tcphdr *tcp=(struct tcphdr *)((unsigned long *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((unsigned long *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port=0, dst_port=0; + unsigned short f_prt=0, prt; + char notcpsyn=1, frag1, match; + unsigned short f_flag; + + /* + * If the chain is empty follow policy. The BSD one + * accepts anything giving you a time window while + * flushing and rebuilding the tables. + */ + + src = ip->saddr; + dst = ip->daddr; + + /* + * This way we handle fragmented packets. + * we ignore all fragments but the first one + * so the whole packet can't be reassembled. + * This way we relay on the full info which + * stored only in first packet. + * + * Note that this theoretically allows partial packet + * spoofing. Not very dangerous but paranoid people may + * wish to play with this. It also allows the so called + * "fragment bomb" denial of service attack on some types + * of system. + */ + + frag1 = ((ntohs(ip->frag_off) & IP_OFFSET) == 0); + if (!frag1 && (opt != 1) && (ip->protocol == IPPROTO_TCP || + ip->protocol == IPPROTO_UDP)) + return(1); + + src = ip->saddr; + dst = ip->daddr; + + /* + * If we got interface from which packet came + * we can use the address directly. This is unlike + * 4.4BSD derived systems that have an address chain + * per device. We have a device per address with dummy + * devices instead. + */ + + dprintf1("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf1("TCP "); + /* ports stay 0 if it is not the first fragment */ + if (frag1) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + if(tcp->syn && !tcp->ack) + /* We *DO* have SYN, value FALSE */ + notcpsyn=0; + } + prt=IP_FW_F_TCP; + break; + case IPPROTO_UDP: + dprintf1("UDP "); + /* ports stay 0 if it is not the first fragment */ + if (frag1) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + prt=IP_FW_F_UDP; + break; + case IPPROTO_ICMP: + dprintf2("ICMP:%d ",((char *)portptr)[0]&0xff); + prt=IP_FW_F_ICMP; + break; + default: + dprintf2("p=%d ",ip->protocol); + prt=IP_FW_F_ALL; + break; + } +#ifdef CONFIG_IP_FIREWALL_DEBUG + dprint_ip(ip->saddr); + + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 0 when it is not the first fragment! */ + dprintf2(":%d ", src_port); + dprint_ip(ip->daddr); + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 0 when it is not the first fragment! */ + dprintf2(":%d ",dst_port); + dprintf1("\n"); +#endif + + for (f=chain;f;f=f->fw_next) + { + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + /* + * Match can become 0x01 (a "normal" match was found), + * 0x02 (a reverse match was found), and 0x03 (the + * IP addresses match in both directions). + * Now we know in which direction(s) we should look + * for a match for the TCP/UDP ports. Both directions + * might match (e.g., when both addresses are on the + * same network for which an address/mask is given), but + * the ports might only match in one direction. + * This was obviously wrong in the original BSD code. + */ + match = 0x00; + + if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* normal direction */ + match |= 0x01; + + if ((f->fw_flg & IP_FW_F_BIDIR) && + (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* reverse direction */ + match |= 0x02; + + if (match) + { + /* + * Look for a VIA match + */ + if(f->fw_via.s_addr && rif) + { + if(rif->pa_addr!=f->fw_via.s_addr) + continue; /* Mismatch */ + } + /* + * Drop through - this is a match + */ + } + else + continue; + + /* + * Ok the chain addresses match. + */ + + f_prt=f->fw_flg&IP_FW_F_KIND; + if (f_prt!=IP_FW_F_ALL) + { + /* + * This is actually buggy as if you set SYN flag + * on UDP or ICMP firewall it will never work,but + * actually it is a concern of software which sets + * firewall entries. + */ + + if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn) + continue; + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if(prt!=f_prt) + continue; + + if(!(prt==IP_FW_F_ICMP || ((match & 0x01) && + port_match(&f->fw_pts[0], f->fw_nsp, src_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port, + f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) && + port_match(&f->fw_pts[0], f->fw_nsp, dst_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port, + f->fw_flg&IP_FW_F_DRNG)))) + { + continue; + } + } +#ifdef CONFIG_IP_FIREWALL_VERBOSE + /* + * VERY ugly piece of code which actually + * makes kernel printf for denied packets... + */ + + if (f->fw_flg & IP_FW_F_PRN) + { + if(opt != 1) { + if(f->fw_flg&IP_FW_F_ACCEPT) + printk("Accept "); + else if(f->fw_flg&IP_FW_F_ICMPRPL) + printk("Reject "); + else + printk("Deny "); + } + switch(ip->protocol) + { + case IPPROTO_TCP: + printk("TCP "); + break; + case IPPROTO_UDP: + printk("UDP "); + case IPPROTO_ICMP: + printk("ICMP "); + break; + default: + printk("p=%d ",ip->protocol); + break; + } + print_ip(ip->saddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%d", src_port); + printk(" "); + print_ip(ip->daddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%d",dst_port); + printk("\n"); + } +#endif + if (opt != 2) { + f->fw_bcnt+=ntohs(ip->tot_len); + f->fw_pcnt++; + } + if (opt != 1) + break; + } /* Loop */ + + if(opt == 1) + return 0; + + /* + * We rely on policy defined in the rejecting entry or, if no match + * was found, we rely on the general policy variable for this type + * of firewall. + */ + + if(f!=NULL) /* A match was found */ + f_flag=f->fw_flg; + else + f_flag=policy; + if(f_flag&IP_FW_F_ACCEPT) + return ((f_flag&IP_FW_F_MASQ)?2:1); + if(f_flag&IP_FW_F_ICMPRPL) + return -1; + return 0; +} + +#ifdef CONFIG_IP_MASQUERADE + +static void masq_expire(unsigned long data) +{ + struct ip_masq *ms = (struct ip_masq *)data; + struct ip_masq *old,*cur; + unsigned long flags; + +#ifdef DEBUG_MASQ + printk("Masqueraded %s %lX:%X expired\n", + strProt[ms->protocol==IPPROTO_TCP], + ntohl(ms->src),ntohs(ms->sport)); +#endif + + save_flags(flags); + cli(); + + /* delete from list of hosts */ + old = NULL; + cur = ip_msq_hosts; + while (cur!=NULL) { + if (cur==ms) { + if (old==NULL) ip_msq_hosts = ms->next; + else old->next = ms->next; + kfree_s(ms,sizeof(*ms)); + break; + } + old = cur; + cur=cur->next; + } + restore_flags(flags); +} + +/* + * Create a new masquerade list entry, also allocate an + * unused mport, keeping the portnumber between the + * given boundaries MASQ_BEGIN and MASQ_END. + * + * FIXME: possible deadlock if all free ports are exhausted! + */ +static struct ip_masq *alloc_masq_entry(void) +{ + struct ip_masq *ms, *mst; + unsigned long flags; + + ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), GFP_ATOMIC); + if (ms==NULL) + return NULL; + + memset(ms,0,sizeof(*ms)); + init_timer(&ms->timer); + ms->timer.data = (unsigned long)ms; + ms->timer.function = masq_expire; + + save_flags(flags); + cli(); + do + { + /* Try the next available port number */ + ms->mport = htons(masq_port++); + if (masq_port==PORT_MASQ_END) + masq_port = PORT_MASQ_BEGIN; + + /* Now hunt through the used ports to see if + * this port is in use... */ + mst = ip_msq_hosts; + while (mst && mst->mport!=ms->mport) + mst = mst->next; + } + while (mst!=NULL); + + /* add new entry in front of list to minimize lookup-time */ + ms->next = ip_msq_hosts; + ip_msq_hosts = ms; + restore_flags(flags); + + return ms; +} + +/* + * When passing an FTP 'PORT' command, try to replace the IP + * address with an newly assigned (masquereded) port on this + * host, so the ftp-data connect FROM the site will succeed... + * + * Also, when the size of the packet changes, create an delta + * offset, which will be added to every th->seq (and subtracted for + * (th->acqseq) whose seq > init_seq. + * + * Not for the faint of heart! + */ + +static struct sk_buff *revamp(struct sk_buff *skb, struct device *dev, struct ip_masq *ftp) +{ + struct iphdr *iph = skb->h.iph; + struct tcphdr *th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); + struct sk_buff *skb2; + char *p, *data = (char *)&th[1]; + unsigned char p1,p2,p3,p4,p5,p6; + unsigned long from; + unsigned short port; + struct ip_masq *ms; + char buf[20]; /* xxx.xxx.xxx.xxx\r\n */ + + /* + * Adjust seq and ack_seq with delta-offset for + * the packets AFTER this one... + */ + if (ftp->delta && after(ftp->init_seq,th->seq)) + { + th->seq += ftp->delta; +/* th->ack_seq += ftp->delta;*/ + } + + while (skb->len - ((unsigned char *)data - skb->h.raw) > 18) + { + if (memcmp(data,"PORT ",5)!=0 && memcmp(data,"port ",5)!=0) + { + data += 5; + continue; + } + p = data+5; + p1 = simple_strtoul(data+5,&data,10); + if (*data!=',') + continue; + p2 = simple_strtoul(data+1,&data,10); + if (*data!=',') + continue; + p3 = simple_strtoul(data+1,&data,10); + if (*data!=',') + continue; + p4 = simple_strtoul(data+1,&data,10); + if (*data!=',') + continue; + p5 = simple_strtoul(data+1,&data,10); + if (*data!=',') + continue; + p6 = simple_strtoul(data+1,&data,10); + if (*data!='\r' && *data!='\n') + continue; + + from = (p1<<24) | (p2<<16) | (p3<<8) | p4; + port = (p5<<8) | p6; + printk("PORT %lX:%X detected\n",from,port); + + /* + * Now create an masquerade entry for it + */ + ms = alloc_masq_entry(); + if (ms==NULL) + return skb; + ms->protocol = IPPROTO_TCP; + ms->src = htonl(from); /* derived from PORT cmd */ + ms->sport = htons(port); /* derived from PORT cmd */ + ms->dst = iph->daddr; + ms->dport = htons(20); /* ftp-data */ + ms->timer.expires = MASQUERADE_EXPIRE_TCP_FIN; + add_timer(&ms->timer); + + /* + * Replace the old PORT with the new one + */ + from = ntohl(dev->pa_addr); + port = ntohs(ms->mport); + sprintf(buf,"%ld,%ld,%ld,%ld,%d,%d", + from>>24&255,from>>16&255,from>>8&255,from&255, + port>>8&255,port&255); + + /* + * Calculate required delta-offset to keep TCP happy + */ + ftp->delta += strlen(buf) - (data-p); + if (ftp->delta==0) + { + /* + * simple case, just replace the old PORT cmd + */ + ftp->init_seq = 0; + memcpy(p,buf,strlen(buf)); + return skb; + } + + /* + * Sizes differ, make a copy + */ + printk("MASQUERADE: resizing needed for %d bytes (%ld)\n",ftp->delta, skb->len); + if (!ftp->init_seq) + ftp->init_seq = th->seq; + + skb2 = alloc_skb(skb->mem_len-sizeof(struct sk_buff)+ftp->delta, GFP_ATOMIC); + if (skb2 == NULL) { + printk("MASQUERADE: No memory available\n"); + return skb; + } + skb2->free = skb->free; + skb2->len = skb->len + ftp->delta; + skb2->h.raw = &skb2->data[skb->h.raw - skb->data]; + + /* + * Copy the packet data into the new buffer. + * Thereby replacing the PORT cmd. + */ + memcpy(skb2->data, skb->data, (p - (char *)skb->data)); + memcpy(&skb2->data[(p - (char *)skb->data)], buf, strlen(buf)); + memcpy(&skb2->data[(p - (char *)skb->data) + strlen(buf)], data, + skb->mem_len - sizeof(struct sk_buff) - ((char *)skb->h.raw - data)); + + /* + * Problem, how to replace the new skb with old one, + * preferably inplace, so all the pointers in the + * calling tree keep ok :( + */ + kfree_skb(skb, FREE_WRITE); + return skb2; + } + return skb; +} + +static void recalc_check(struct udphdr *uh, unsigned long saddr, + unsigned long daddr, int len) +{ + uh->check=0; + uh->check=csum_tcpudp_magic(saddr,daddr,len, + IPPROTO_UDP, csum_partial((char *)uh,len,0)); + if(uh->check==0) + uh->check=-0xFFFF; +} + +void ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev) +{ + struct sk_buff *skb=*skb_ptr; + struct iphdr *iph = skb->h.iph; + unsigned short *portptr; + struct ip_masq *ms; + int size; + + /* + * We can only masquerade protocols with ports... + */ + + if (iph->protocol!=IPPROTO_UDP && iph->protocol!=IPPROTO_TCP) + return; + + /* + * Now hunt the list to see if we have an old entry + */ + + portptr = (unsigned short *)&(((char *)iph)[iph->ihl*4]); + ms = ip_msq_hosts; + +#ifdef DEBUG_MASQ + printk("Outgoing %s %lX:%X -> %lX:%X\n", + strProt[iph->protocol==IPPROTO_TCP], + ntohl(iph->saddr), ntohs(portptr[0]), + ntohl(iph->daddr), ntohs(portptr[1])); +#endif + while (ms!=NULL) + { + if (iph->protocol == ms->protocol && + iph->saddr == ms->src && iph->daddr == ms->dst && + portptr[0] == ms->sport && portptr[1] == ms->dport) + { + del_timer(&ms->timer); + break; + } + ms = ms->next; + } + + /* + * Nope, not found, create a new entry for it + */ + + if (ms==NULL) + { + ms = alloc_masq_entry(); + if (ms==NULL) + { + printk("MASQUERADE: no memory left !\n"); + return; + } + ms->protocol = iph->protocol; + ms->src = iph->saddr; + ms->dst = iph->daddr; + ms->sport = portptr[0]; + ms->dport = portptr[1]; + } + + /* + * Change the fragments origin + */ + + size = skb->len - ((unsigned char *)portptr - skb->h.raw); + iph->saddr = dev->pa_addr; /* my own address */ + portptr[0] = ms->mport; + + /* + * Adjust packet accordingly to protocol + */ + + if (iph->protocol==IPPROTO_UDP) + { + ms->timer.expires = MASQUERADE_EXPIRE_UDP; + recalc_check((struct udphdr *)portptr,iph->saddr,iph->daddr,size); + } + else + { + struct tcphdr *th; + if (portptr[1]==htons(21)) + { + skb = revamp(*skb_ptr, dev, ms); + skb = *skb_ptr; + iph = skb->h.iph; + portptr = (unsigned short *)&(((char *)iph)[iph->ihl*4]); + } + th = (struct tcphdr *)portptr; + + /* + * Timeout depends if FIN packet was seen + */ + if (ms->sawfin || th->fin) + { + ms->timer.expires = MASQUERADE_EXPIRE_TCP_FIN; + ms->sawfin = 1; + } + else ms->timer.expires = MASQUERADE_EXPIRE_TCP; + + tcp_send_check(th,iph->saddr,iph->daddr,size,skb->sk); + } + add_timer(&ms->timer); + ip_send_check(iph); + + #ifdef DEBUG_MASQ + printk("O-routed from %lX:%X over %s\n",ntohl(dev->pa_addr),ntohs(ms->mport),dev->name); + #endif + } + + /* + * Check if it's an masqueraded port, look it up, + * and send it on it's way... + * + * Better not have many hosts using the designated portrange + * as 'normal' ports, or you'll be spending lots of time in + * this function. + */ + +int ip_fw_demasquerade(struct sk_buff *skb_ptr) +{ + struct iphdr *iph = skb_ptr->h.iph; + unsigned short *portptr; + struct ip_masq *ms; + struct tcphdr *th = (struct tcphdr *)(skb_ptr->h.raw+(iph->ihl<<2)); + + if (iph->protocol!=IPPROTO_UDP && iph->protocol!=IPPROTO_TCP) + return 0; + + portptr = (unsigned short *)&(((char *)iph)[iph->ihl*4]); + if (ntohs(portptr[1]) < PORT_MASQ_BEGIN || + ntohs(portptr[1]) > PORT_MASQ_END) + return 0; + +#ifdef DEBUG_MASQ + printk("Incoming %s %lX:%X -> %lX:%X\n", + strProt[iph->protocol==IPPROTO_TCP], + ntohl(iph->saddr), ntohs(portptr[0]), + ntohl(iph->daddr), ntohs(portptr[1])); +#endif + /* + * reroute to original host:port if found... + * + * NB. Cannot check destination address, just for the incoming port. + * reason: archie.doc.ac.uk has 6 interfaces, you send to + * phoenix and get a reply from any other interface(==dst)! + * + * [Only for UDP] - AC + */ + ms = ip_msq_hosts; + while (ms!=NULL) + { + if (iph->protocol==ms->protocol && + (iph->saddr==ms->dst || iph->protocol==IPPROTO_UDP) && + portptr[0]==ms->dport && + portptr[1]==ms->mport) + { + int size = skb_ptr->len - ((unsigned char *)portptr - skb_ptr->h.raw); + iph->daddr = ms->src; + portptr[1] = ms->sport; + + /* + * Yug! adjust UDP/TCP and IP checksums + */ + if (iph->protocol==IPPROTO_UDP) + recalc_check((struct udphdr *)portptr,iph->saddr,iph->daddr,size); + else + { + /* + * Adjust seq and ack_seq with delta-offset for + * the packets AFTER this one... + */ + if (ms->delta && after(ms->init_seq,th->ack_seq)) + { +/* th->seq += ms->delta;*/ + th->ack_seq -= ms->delta; + } + tcp_send_check((struct tcphdr *)portptr,iph->saddr,iph->daddr,size,skb_ptr->sk); + } + ip_send_check(iph); +#ifdef DEBUG_MASQ + printk("I-routed to %lX:%X\n",ntohl(iph->daddr),ntohs(portptr[1])); +#endif + return 1; + } + ms = ms->next; + } + + /* sorry, all this trouble for a no-hit :) */ + return 0; +} +#endif + + + +static void zero_fw_chain(struct ip_fw *chainptr) +{ + struct ip_fw *ctmp=chainptr; + while(ctmp) + { + ctmp->fw_pcnt=0L; + ctmp->fw_bcnt=0L; + ctmp=ctmp->fw_next; + } +} + +static void free_fw_chain(struct ip_fw *volatile* chainptr) +{ + unsigned long flags; + save_flags(flags); + cli(); + while ( *chainptr != NULL ) + { + struct ip_fw *ftmp; + ftmp = *chainptr; + *chainptr = ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + } + restore_flags(flags); +} + +/* Volatiles to keep some of the compiler versions amused */ + +static int add_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp; + struct ip_fw *chtmp=NULL; + struct ip_fw *volatile chtmp_prev=NULL; + unsigned long flags; + unsigned long m_src_mask,m_dst_mask; + unsigned long n_sa,n_da,o_sa,o_da,o_sm,o_dm,n_sm,n_dm; + unsigned short n_sr,n_dr,o_sr,o_dr; + unsigned short oldkind,newkind; + int addb4=0; + int n_o,n_n; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, sizeof( struct ip_fw ) ); + + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + ftmp->fw_next = NULL; + + cli(); + + if (*chainptr==NULL) + { + *chainptr=ftmp; + } + else + { + chtmp_prev=NULL; + for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) + { + addb4=0; + newkind=ftmp->fw_flg & IP_FW_F_KIND; + oldkind=chtmp->fw_flg & IP_FW_F_KIND; + + if (newkind!=IP_FW_F_ALL + && oldkind!=IP_FW_F_ALL + && oldkind!=newkind) + { + chtmp_prev=chtmp; + continue; + } + + /* + * Very very *UGLY* code... + * Sorry,but i had to do this.... + */ + + n_sa=ntohl(ftmp->fw_src.s_addr); + n_da=ntohl(ftmp->fw_dst.s_addr); + n_sm=ntohl(ftmp->fw_smsk.s_addr); + n_dm=ntohl(ftmp->fw_dmsk.s_addr); + + o_sa=ntohl(chtmp->fw_src.s_addr); + o_da=ntohl(chtmp->fw_dst.s_addr); + o_sm=ntohl(chtmp->fw_smsk.s_addr); + o_dm=ntohl(chtmp->fw_dmsk.s_addr); + + m_src_mask = o_sm & n_sm; + m_dst_mask = o_dm & n_dm; + + if ((o_sa & m_src_mask) == (n_sa & m_src_mask)) + { + if (n_sm > o_sm) + addb4++; + if (n_sm < o_sm) + addb4--; + } + + if ((o_da & m_dst_mask) == (n_da & m_dst_mask)) + { + if (n_dm > o_dm) + addb4++; + if (n_dm < o_dm) + addb4--; + } + + if (((o_da & o_dm) == (n_da & n_dm)) + &&((o_sa & o_sm) == (n_sa & n_sm))) + { + if (newkind!=IP_FW_F_ALL && + oldkind==IP_FW_F_ALL) + addb4++; + if (newkind==oldkind && (oldkind==IP_FW_F_TCP + || oldkind==IP_FW_F_UDP)) + { + + /* + * Here the main idea is to check the size + * of port range which the frwl covers + * We actually don't check their values but + * just the wideness of range they have + * so that less wide ranges or single ports + * go first and wide ranges go later. No ports + * at all treated as a range of maximum number + * of ports. + */ + + if (ftmp->fw_flg & IP_FW_F_SRNG) + n_sr=ftmp->fw_pts[1]-ftmp->fw_pts[0]; + else + n_sr=(ftmp->fw_nsp)? + ftmp->fw_nsp : 0xFFFF; + + if (chtmp->fw_flg & IP_FW_F_SRNG) + o_sr=chtmp->fw_pts[1]-chtmp->fw_pts[0]; + else + o_sr=(chtmp->fw_nsp)?chtmp->fw_nsp : 0xFFFF; + + if (n_sr<o_sr) + addb4++; + if (n_sr>o_sr) + addb4--; + + n_n=ftmp->fw_nsp; + n_o=chtmp->fw_nsp; + + /* + * Actually this cannot happen as the frwl control + * procedure checks for number of ports in source and + * destination range but we will try to be more safe. + */ + + if ((n_n>(IP_FW_MAX_PORTS-2)) || + (n_o>(IP_FW_MAX_PORTS-2))) + goto skip_check; + + if (ftmp->fw_flg & IP_FW_F_DRNG) + n_dr=ftmp->fw_pts[n_n+1]-ftmp->fw_pts[n_n]; + else + n_dr=(ftmp->fw_ndp)? ftmp->fw_ndp : 0xFFFF; + + if (chtmp->fw_flg & IP_FW_F_DRNG) + o_dr=chtmp->fw_pts[n_o+1]-chtmp->fw_pts[n_o]; + else + o_dr=(chtmp->fw_ndp)? chtmp->fw_ndp : 0xFFFF; + if (n_dr<o_dr) + addb4++; + if (n_dr>o_dr) + addb4--; +skip_check: + } + /* finally look at the interface address */ + if ((addb4 == 0) && ftmp->fw_via.s_addr && + !(chtmp->fw_via.s_addr)) + addb4++; + } + if (addb4>0) + { + if (chtmp_prev) + { + chtmp_prev->fw_next=ftmp; + ftmp->fw_next=chtmp; + } + else + { + *chainptr=ftmp; + ftmp->fw_next=chtmp; + } + restore_flags(flags); + return 0; + } + chtmp_prev=chtmp; + } + } + + if (chtmp_prev) + chtmp_prev->fw_next=ftmp; + else + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp,*ltmp; + unsigned short tport1,tport2,tmpnum; + char matches,was_found; + unsigned long flags; + + save_flags(flags); + cli(); + + ftmp=*chainptr; + + if ( ftmp == NULL ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: chain is empty\n"); +#endif + restore_flags(flags); + return( EINVAL ); + } + + ltmp=NULL; + was_found=0; + + while( ftmp != NULL ) + { + matches=1; + if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr + || ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr + || ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr + || ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr + || ftmp->fw_via.s_addr!=frwl->fw_via.s_addr + || ftmp->fw_flg!=frwl->fw_flg) + matches=0; + + tport1=ftmp->fw_nsp+ftmp->fw_ndp; + tport2=frwl->fw_nsp+frwl->fw_ndp; + if (tport1!=tport2) + matches=0; + else if (tport1!=0) + { + for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++) + if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum]) + matches=0; + } + if(matches) + { + was_found=1; + if (ltmp) + { + ltmp->fw_next=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=ltmp->fw_next; + } + else + { + *chainptr=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=*chainptr; + } + } + else + { + ltmp = ftmp; + ftmp = ftmp->fw_next; + } + } + restore_flags(flags); + if (was_found) + return 0; + else + return(EINVAL); +} + +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len) +{ + + if ( len != sizeof(struct ip_fw) ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: len=%d, want %d\n",len, sizeof(struct ip_fw)); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n", + frwl->fw_flg); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: src range set but fw_nsp=%d\n", + frwl->fw_nsp); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: dst range set but fw_ndp=%d\n", + frwl->fw_ndp); +#endif + return(NULL); + } + + if ( frwl->fw_nsp + frwl->fw_ndp > IP_FW_MAX_PORTS ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: too many ports (%d+%d)\n", + frwl->fw_nsp,frwl->fw_ndp); +#endif + return(NULL); + } + + return frwl; +} + + + + +#ifdef CONFIG_IP_ACCT + +#if 0 +void ip_acct_cnt(struct iphdr *iph, struct device *dev, struct ip_fw *f) +{ + (void) ip_fw_chk(iph, dev, f, 0, 1); + return; +} +#endif + +int ip_acct_ctl(int stage, void *m, int len) +{ + if ( stage == IP_ACCT_FLUSH ) + { + free_fw_chain(&ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ZERO ) + { + zero_fw_chain(ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ADD + || stage == IP_ACCT_DEL + ) + { + struct ip_fw *frwl; + + if (!(frwl=check_ipfw_struct(m,len))) + return (EINVAL); + + switch (stage) + { + case IP_ACCT_ADD: + return( add_to_chain(&ip_acct_chain,frwl)); + case IP_ACCT_DEL: + return( del_from_chain(&ip_acct_chain,frwl)); + default: + /* + * Should be panic but... (Why ??? - AC) + */ +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_ctl(int stage, void *m, int len) +{ + int ret; + + if ( stage == IP_FW_FLUSH_BLK ) + { + free_fw_chain(&ip_fw_blk_chain); + return(0); + } + + if ( stage == IP_FW_FLUSH_FWD ) + { + free_fw_chain(&ip_fw_fwd_chain); + return(0); + } + + if ( stage == IP_FW_ZERO_BLK ) + { + zero_fw_chain(ip_fw_blk_chain); + return(0); + } + + if ( stage == IP_FW_ZERO_FWD ) + { + zero_fw_chain(ip_fw_fwd_chain); + return(0); + } + + if ( stage == IP_FW_POLICY_BLK || stage == IP_FW_POLICY_FWD ) + { + int *tmp_policy_ptr; + tmp_policy_ptr=(int *)m; + if ( stage == IP_FW_POLICY_BLK ) + ip_fw_blk_policy=*tmp_policy_ptr; + else + ip_fw_fwd_policy=*tmp_policy_ptr; + return 0; + } + + if ( stage == IP_FW_CHK_BLK || stage == IP_FW_CHK_FWD ) + { + struct device viadev; + struct ip_fwpkt *ipfwp; + struct iphdr *ip; + + if ( len < sizeof(struct ip_fwpkt) ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_fw_ctl: length=%d, expected %d\n", + len, sizeof(struct ip_fwpkt)); +#endif + return( EINVAL ); + } + + ipfwp = (struct ip_fwpkt *)m; + ip = &(ipfwp->fwp_iph); + + if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, + sizeof(struct iphdr)/sizeof(int)); +#endif + return(EINVAL); + } + + viadev.pa_addr = ipfwp->fwp_via.s_addr; + + if ((ret = ip_fw_chk(ip, &viadev, + stage == IP_FW_CHK_BLK ? + ip_fw_blk_chain : ip_fw_fwd_chain, + stage == IP_FW_CHK_BLK ? + ip_fw_blk_policy : ip_fw_fwd_policy, 2 )) > 0 + ) + return(0); + else if (ret == -1) + return(ECONNREFUSED); + else + return(ETIMEDOUT); + } + +/* + * Here we really working hard-adding new elements + * to blocking/forwarding chains or deleting 'em + */ + + if ( stage == IP_FW_ADD_BLK || stage == IP_FW_ADD_FWD + || stage == IP_FW_DEL_BLK || stage == IP_FW_DEL_FWD + ) + { + struct ip_fw *frwl; + frwl=check_ipfw_struct(m,len); + if (frwl==NULL) + return (EINVAL); + + switch (stage) + { + case IP_FW_ADD_BLK: + return(add_to_chain(&ip_fw_blk_chain,frwl)); + case IP_FW_ADD_FWD: + return(add_to_chain(&ip_fw_fwd_chain,frwl)); + case IP_FW_DEL_BLK: + return(del_from_chain(&ip_fw_blk_chain,frwl)); + case IP_FW_DEL_FWD: + return(del_from_chain(&ip_fw_fwd_chain,frwl)); + default: + /* + * Should be panic but... (Why are BSD people panic obsessed ??) + */ +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } + +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif /* CONFIG_IP_FIREWALL */ + +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + +static int ip_chain_procinfo(int stage, char *buffer, char **start, + off_t offset, int length, int reset) +{ + off_t pos=0, begin=0; + struct ip_fw *i; + unsigned long flags; + int len, p; + + + switch(stage) + { +#ifdef CONFIG_IP_FIREWALL + case IP_INFO_BLK: + i = ip_fw_blk_chain; + len=sprintf(buffer, "IP firewall block rules, default %d\n", + ip_fw_blk_policy); + break; + case IP_INFO_FWD: + i = ip_fw_fwd_chain; + len=sprintf(buffer, "IP firewall forward rules, default %d\n", + ip_fw_fwd_policy); + break; +#endif +#ifdef CONFIG_IP_ACCT + case IP_INFO_ACCT: + i = ip_acct_chain; + len=sprintf(buffer,"IP accounting rules\n"); + break; +#endif + default: + /* this should never be reached, but safety first... */ + i = NULL; + len=0; + break; + } + + save_flags(flags); + cli(); + + while(i!=NULL) + { + len+=sprintf(buffer+len,"%08lX/%08lX->%08lX/%08lX %08lX %X ", + ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr), + ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), + ntohl(i->fw_via.s_addr),i->fw_flg); + len+=sprintf(buffer+len,"%u %u %-9lu %-9lu", + i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); + for (p = 0; p < IP_FW_MAX_PORTS; p++) + len+=sprintf(buffer+len, " %u", i->fw_pts[p]); + buffer[len++]='\n'; + buffer[len]='\0'; + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + else if(reset) + { + /* This needs to be done at this specific place! */ + i->fw_pcnt=0L; + i->fw_bcnt=0L; + } + if(pos>offset+length) + break; + i=i->fw_next; + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} +#endif + +#ifdef CONFIG_IP_ACCT + +int ip_acct_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_ACCT, buffer,start,offset,length,reset); +} + +#endif + +#ifdef CONFIG_IP_FIREWALL + +int ip_fw_blk_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_BLK, buffer,start,offset,length,reset); +} + +int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_FWD, buffer,start,offset,length,reset); +} +#endif + +#ifdef CONFIG_IP_MASQUERADE + +int ip_msqhst_procinfo(char *buffer, char **start, off_t offset, int length) +{ + off_t pos=0, begin=0; + struct ip_masq *ms; + unsigned long flags; + int len=0; + + len=sprintf(buffer,"Prc FromIP FPrt ToIP TPrt Masq Init-seq Delta Expires\n"); + save_flags(flags); + cli(); + + ms=ip_msq_hosts; + while (ms!=NULL) + { + int timer_active = del_timer(&ms->timer); + if (!timer_active) + ms->timer.expires = 0; + len+=sprintf(buffer+len,"%s %08lX:%04X %08lX:%04X %04X %08lX %5d %lu\n", + strProt[ms->protocol==IPPROTO_TCP], + ntohl(ms->src),ntohs(ms->sport), + ntohl(ms->dst),ntohs(ms->dport), + ntohs(ms->mport), + ms->init_seq,ms->delta,ms->timer.expires); + if (timer_active) + add_timer(&ms->timer); + + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + ms=ms->next; + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +#endif + diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c new file mode 100644 index 000000000..5227d9474 --- /dev/null +++ b/net/ipv4/ipip.c @@ -0,0 +1,95 @@ +/* + * Linux NET3: IP/IP protocol decoder. + * + * Authors: + * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 + * + * Fixes: + * Alan Cox : Merged and made usable non modular (its so tiny its silly as + * a module taking up 2 pages). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <netinet/in.h> +#include <net/datalink.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/ipip.h> + +/* + * NB. we must include the kernel idenfication string in to install the module. + */ + +#if ( defined(CONFIG_NET_IPIP) && defined(CONFIG_IP_FORWARD)) || defined(MODULE) +#ifdef MODULE +#include <linux/module.h> +#include <linux/version.h> + +static char kernel_version[] = UTS_RELEASE; + +#else +#define MOD_INC_USE_COUNT +#define MOD_DEC_USE_COUNT +#endif + + +/* + * The driver. + */ + +int ipip_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, unsigned long saddr, + int redo, struct inet_protocol *protocol) +{ + /* Don't unlink in the middle of a turnaround */ + MOD_INC_USE_COUNT; +#ifdef TUNNEL_DEBUG + printk("ipip_rcv: got a packet!\n"); +#endif + ip_forward(skb, dev, 0, daddr, 0); + kfree_skb(skb, FREE_READ); + MOD_DEC_USE_COUNT; + return(0); +} + +#ifdef MODULE +static struct inet_protocol ipip_protocol = { + ipip_rcv, /* IPIP handler */ + NULL, /* Will be UDP fraglist handler */ + NULL, /* TUNNEL error control */ + 0, /* next */ + IPPROTO_IPIP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "IPIP" /* name */ +}; + + +/* + * And now the modules code and kernel interface. + */ + +int init_module( void) +{ + inet_add_protocol(&ipip_protocol); + return 0; +} + +void cleanup_module( void) +{ + if ( inet_del_protocol(&ipip_protocol) < 0 ) + printk("ipip close: can't remove protocol\n"); +} + +#endif +#endif diff --git a/net/inet/packet.c b/net/ipv4/packet.c index 0f6c3698d..fbc4dd5ca 100644 --- a/net/inet/packet.c +++ b/net/ipv4/packet.c @@ -23,7 +23,11 @@ * Alan Cox : Re-commented the code. * Alan Cox : Use new kernel side addressing * Rob Janssen : Correct MTU usage. - * + * Dave Platt : Counter leaks caused by incorrect + * interrupt locking and some slightly + * dubious gcc output. Can you read + * compiler: it said _VOLATILE_ + * Richard Kooijman : Timestamp fixes. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -34,15 +38,16 @@ #include <linux/types.h> #include <linux/sched.h> +#include <linux/mm.h> #include <linux/fcntl.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" +#include <net/ip.h> +#include <net/protocol.h> #include <linux/skbuff.h> -#include "sock.h" +#include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> #include <asm/system.h> @@ -67,6 +72,7 @@ static unsigned long min(unsigned long a, unsigned long b) int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct sock *sk; + unsigned long flags; /* * When we registered the protocol we saved the socket in the data @@ -84,19 +90,28 @@ int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) skb->dev = dev; skb->len += dev->hard_header_len; - skb->sk = sk; - /* * Charge the memory to the socket. This is done specifically * to prevent sockets using all the memory up. */ + if (sk->rmem_alloc & 0xFF000000) { + printk("packet_rcv: sk->rmem_alloc = %ld\n", sk->rmem_alloc); + sk->rmem_alloc = 0; + } + if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) { +/* printk("packet_rcv: drop, %d+%d>%d\n", sk->rmem_alloc, skb->mem_len, sk->rcvbuf); */ skb->sk = NULL; kfree_skb(skb, FREE_READ); return(0); } + + save_flags(flags); + cli(); + + skb->sk = sk; sk->rmem_alloc += skb->mem_len; /* @@ -104,13 +119,15 @@ int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) */ skb_queue_tail(&sk->receive_queue,skb); - wake_up_interruptible(sk->sleep); + if(!sk->dead) + sk->data_ready(sk,skb->len); + + restore_flags(flags); /* * Processing complete. */ - release_sock(sk); /* This is now effectively surplus in this layer */ return(0); } @@ -246,6 +263,7 @@ static int packet_init(struct sock *sk) p->func = packet_rcv; p->type = sk->num; p->data = (void *)sk; + p->dev = NULL; dev_add_pack(p); /* @@ -312,6 +330,7 @@ int packet_recvfrom(struct sock *sk, unsigned char *to, int len, copied = min(len, truesize); memcpy_tofs(to, skb->data, copied); /* We can't use skb_copy_datagram here */ + sk->stamp=skb->stamp; /* * Copy the address. @@ -374,7 +393,7 @@ struct proto packet_prot = NULL, NULL, ip_queue_xmit, /* These two are not actually used */ - ip_retransmit, + NULL, NULL, NULL, NULL, @@ -386,6 +405,6 @@ struct proto packet_prot = NULL, 128, 0, - {NULL,}, - "PACKET" + "PACKET", + 0, 0 }; diff --git a/net/inet/proc.c b/net/ipv4/proc.c index c143924bd..e7124a42d 100644 --- a/net/inet/proc.c +++ b/net/ipv4/proc.c @@ -21,6 +21,7 @@ * Alan Cox : Make /proc safer. * Erik Schoenfelder : /proc/net/snmp * Alan Cox : Handle dead sockets properly. + * Gerhard Koerting : Show both timers * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -37,14 +38,14 @@ #include <linux/param.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "icmp.h" -#include "protocol.h" -#include "tcp.h" -#include "udp.h" +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/tcp.h> +#include <net/udp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "raw.h" +#include <net/sock.h> +#include <net/raw.h> /* * Get__netinfo returns the length of that string. @@ -60,6 +61,9 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of struct sock *sp; int i; int timer_active; + int timer_active1; + int timer_active2; + unsigned long timer_expires; unsigned long dest, src; unsigned short destp, srcp; int len=0; @@ -87,17 +91,31 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of /* Since we are Little Endian we need to swap the bytes :-( */ destp = ntohs(destp); srcp = ntohs(srcp); - timer_active = del_timer(&sp->timer); - if (!timer_active) - sp->timer.expires = 0; - len+=sprintf(buffer+len, "%2d: %08lX:%04X %08lX:%04X %02X %08lX:%08lX %02X:%08lX %08X %d\n", + timer_active1 = del_timer(&sp->retransmit_timer); + timer_active2 = del_timer(&sp->timer); + if (!timer_active1) sp->retransmit_timer.expires=0; + if (!timer_active2) sp->timer.expires=0; + timer_active=0; + timer_expires=(unsigned)-1; + if (timer_active1 && + sp->retransmit_timer.expires < timer_expires) { + timer_active=timer_active1; + timer_expires=sp->retransmit_timer.expires; + } + if (timer_active2 && + sp->timer.expires < timer_expires) { + timer_active=timer_active2; + timer_expires=sp->timer.expires; + } + len+=sprintf(buffer+len, "%2d: %08lX:%04X %08lX:%04X %02X %08lX:%08lX %02X:%08lX %08X %d %d\n", i, src, srcp, dest, destp, sp->state, format==0?sp->write_seq-sp->rcv_ack_seq:sp->rmem_alloc, format==0?sp->acked_seq-sp->copied_seq:sp->wmem_alloc, - timer_active, sp->timer.expires, (unsigned) sp->retransmits, - sp->socket?SOCK_INODE(sp->socket)->i_uid:0); - if (timer_active) - add_timer(&sp->timer); + timer_active, timer_expires, (unsigned) sp->retransmits, + sp->socket?SOCK_INODE(sp->socket)->i_uid:0, + timer_active?sp->timeout:0); + if (timer_active1) add_timer(&sp->retransmit_timer); + if (timer_active2) add_timer(&sp->timer); /* * All sockets with (port mod SOCK_ARRAY_SIZE) = i * are kept in sock_array[i], so we must follow the @@ -114,7 +132,7 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of break; } sti(); /* We only turn interrupts back on for a moment, but because the interrupt queues anything built up - before this will clear before we jump back and cli, so its not as bad as it looks */ + before this will clear before we jump back and cli, so it's not as bad as it looks */ if(pos>offset+length) break; } @@ -144,6 +162,34 @@ int raw_get_info(char *buffer, char **start, off_t offset, int length) } +/* + * Report socket allocation statistics [mea@utu.fi] + */ +int afinet_get_info(char *buffer, char **start, off_t offset, int length) +{ + /* From net/socket.c */ + extern int socket_get_info(char *, char **, off_t, int); + extern struct proto packet_prot; + + int len = socket_get_info(buffer,start,offset,length); + + len += sprintf(buffer+len,"SOCK_ARRAY_SIZE=%d\n",SOCK_ARRAY_SIZE); + len += sprintf(buffer+len,"TCP: inuse %d highest %d\n", + tcp_prot.inuse, tcp_prot.highestinuse); + len += sprintf(buffer+len,"UDP: inuse %d highest %d\n", + udp_prot.inuse, udp_prot.highestinuse); + len += sprintf(buffer+len,"RAW: inuse %d highest %d\n", + raw_prot.inuse, raw_prot.highestinuse); + len += sprintf(buffer+len,"PAC: inuse %d highest %d\n", + packet_prot.inuse, packet_prot.highestinuse); + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + return len; +} + + /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ diff --git a/net/inet/protocol.c b/net/ipv4/protocol.c index 57e552ea7..f10cc8254 100644 --- a/net/inet/protocol.c +++ b/net/ipv4/protocol.c @@ -27,24 +27,50 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> +#include <linux/config.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "tcp.h" +#include <linux/timer.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "icmp.h" -#include "udp.h" +#include <net/sock.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/ipip.h> +#include <linux/igmp.h> + + +#ifdef CONFIG_IP_FORWARD +#ifdef CONFIG_NET_IPIP + +static struct inet_protocol ipip_protocol = { + ipip_rcv, /* IPIP handler */ + NULL, /* Will be UDP fraglist handler */ + NULL, /* TUNNEL error control */ + 0, /* next */ + IPPROTO_IPIP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "IPIP" /* name */ +}; + +#endif +#endif static struct inet_protocol tcp_protocol = { tcp_rcv, /* TCP handler */ NULL, /* No fragment handler (and won't be for a long time) */ - tcp_err, /* TCP error control */ + tcp_err, /* TCP error control */ +#if defined(CONFIG_NET_IPIP) && defined(CONFIG_IP_FORWARD) + &ipip_protocol, +#else NULL, /* next */ +#endif IPPROTO_TCP, /* protocol ID */ 0, /* copy */ NULL, /* data */ @@ -75,8 +101,23 @@ static struct inet_protocol icmp_protocol = { "ICMP" /* name */ }; - +#ifndef CONFIG_IP_MULTICAST struct inet_protocol *inet_protocol_base = &icmp_protocol; +#else +static struct inet_protocol igmp_protocol = { + igmp_rcv, /* IGMP handler */ + NULL, /* IGMP never fragments anyway */ + NULL, /* IGMP error control */ + &icmp_protocol, /* next */ + IPPROTO_IGMP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "IGMP" /* name */ +}; + +struct inet_protocol *inet_protocol_base = &igmp_protocol; +#endif + struct inet_protocol *inet_protos[MAX_INET_PROTOS] = { NULL }; diff --git a/net/inet/rarp.c b/net/ipv4/rarp.c index 94db10cf8..a7b3719ed 100644 --- a/net/inet/rarp.c +++ b/net/ipv4/rarp.c @@ -31,6 +31,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/mm.h> #include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -43,16 +44,16 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "ip.h" -#include "route.h" -#include "protocol.h" -#include "tcp.h" +#include <net/ip.h> +#include <net/route.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" -#include "rarp.h" +#include <net/sock.h> +#include <net/arp.h> +#include <net/rarp.h> #ifdef CONFIG_AX25 -#include "ax25.h" +#include <net/ax25.h> #endif #ifdef CONFIG_INET_RARP @@ -151,7 +152,7 @@ int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) unsigned char *sha,*tha; /* s for "source", t for "target" */ /* - * If this test doesn't pass, its not IP, or we should ignore it anyway + * If this test doesn't pass, it's not IP, or we should ignore it anyway */ if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd) @@ -201,13 +202,13 @@ int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) memcpy(&tip,rarp_ptr,4); /* - * Process entry + * Process entry. Use tha for table lookup according to RFC903. */ cli(); for (entry = rarp_tables; entry != NULL; entry = entry->next) - if (!memcmp(entry->ha, sha, rarp->ar_hln)) - break; + if (!memcmp(entry->ha, tha, rarp->ar_hln)) + break; if (entry != NULL) { diff --git a/net/inet/raw.c b/net/ipv4/raw.c index b79c1da3c..f01489b4a 100644 --- a/net/inet/raw.c +++ b/net/ipv4/raw.c @@ -26,6 +26,7 @@ * Alan Cox : Cleaned up old debugging * Alan Cox : Use new kernel side addresses * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets. + * Alan Cox : BSD style RAW socket demultiplexing. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -45,13 +46,13 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" +#include <net/ip.h> +#include <net/protocol.h> #include <linux/skbuff.h> -#include "sock.h" -#include "icmp.h" -#include "udp.h" - +#include <net/sock.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/checksum.h> static inline unsigned long min(unsigned long a, unsigned long b) { @@ -89,35 +90,15 @@ void raw_err (int err, unsigned char *header, unsigned long daddr, /* * This should be the easiest of all, all we do is - * copy it into a buffer. + * copy it into a buffer. All demultiplexing is done + * in ip.c */ -int raw_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, - unsigned long daddr, unsigned short len, unsigned long saddr, - int redo, struct inet_protocol *protocol) +int raw_rcv(struct sock *sk, struct sk_buff *skb, struct device *dev, long saddr, long daddr) { - struct sock *sk; - - if (skb == NULL) - return(0); - - if (protocol == NULL) - { - kfree_skb(skb, FREE_READ); - return(0); - } - - sk = (struct sock *) protocol->data; - if (sk == NULL) - { - kfree_skb(skb, FREE_READ); - return(0); - } - /* Now we need to copy this into memory. */ - skb->sk = sk; - skb->len = len + skb->ip_hdr->ihl*sizeof(long); + skb->len = ntohs(skb->ip_hdr->tot_len); skb->h.raw = (unsigned char *) skb->ip_hdr; skb->dev = dev; skb->saddr = daddr; @@ -142,19 +123,44 @@ int raw_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, * Send a RAW IP packet. */ +/* + * Callback support is trivial for SOCK_RAW + */ + +static void raw_getfrag(void *p, int saddr, char *to, unsigned int offset, unsigned int fraglen) +{ + memcpy_fromfs(to, (unsigned char *)p+offset, fraglen); +} + +/* + * IPPROTO_RAW needs extra work. + */ + +static void raw_getrawfrag(void *p, int saddr, char *to, unsigned int offset, unsigned int fraglen) +{ + memcpy_fromfs(to, (unsigned char *)p+offset, fraglen); + if(offset==0) + { + struct iphdr *iph=(struct iphdr *)to; + iph->saddr=saddr; + iph->check=0; + iph->check=ip_fast_csum((unsigned char *)iph, iph->ihl); + } +} + static int raw_sendto(struct sock *sk, unsigned char *from, int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len) { - struct sk_buff *skb; - struct device *dev=NULL; - struct sockaddr_in sin; - int tmp; int err; + struct sockaddr_in sin; /* * Check the flags. Only MSG_DONTROUTE is permitted. */ - + + if (flags & MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + if (flags & ~MSG_DONTROUTE) return(-EINVAL); /* @@ -186,48 +192,11 @@ static int raw_sendto(struct sock *sk, unsigned char *from, if (sk->broadcast == 0 && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) return -EACCES; - skb=sock_alloc_send_skb(sk, len+sk->prot->max_header, noblock, &err); - if(skb==NULL) - return err; - - skb->sk = sk; - skb->free = 1; - skb->localroute = sk->localroute | (flags&MSG_DONTROUTE); - - tmp = sk->prot->build_header(skb, sk->saddr, - sin.sin_addr.s_addr, &dev, - sk->protocol, sk->opt, skb->mem_len, sk->ip_tos,sk->ip_ttl); - if (tmp < 0) - { - kfree_skb(skb,FREE_WRITE); - release_sock(sk); - return(tmp); - } - - memcpy_fromfs(skb->data + tmp, from, len); - - /* - * If we are using IPPROTO_RAW, we need to fill in the source address in - * the IP header - */ - - if(sk->protocol==IPPROTO_RAW) - { - unsigned char *buff; - struct iphdr *iph; - - buff = skb->data; - buff += tmp; - - iph = (struct iphdr *)buff; - iph->saddr = sk->saddr; - } - - skb->len = tmp + len; - - sk->prot->queue_xmit(sk, dev, skb, 1); - release_sock(sk); - return(len); + if(sk->num==IPPROTO_RAW) + err=ip_build_xmit(sk, raw_getrawfrag, from, len, sin.sin_addr.s_addr, flags, sin.sin_port); + else + err=ip_build_xmit(sk, raw_getfrag, from, len, sin.sin_addr.s_addr, flags, sin.sin_port); + return err<0?err:len; } @@ -240,35 +209,12 @@ static int raw_write(struct sock *sk, unsigned char *buff, int len, int noblock, static void raw_close(struct sock *sk, int timeout) { - sk->inuse = 1; sk->state = TCP_CLOSE; - - inet_del_protocol((struct inet_protocol *)sk->pair); - kfree_s((void *)sk->pair, sizeof (struct inet_protocol)); - sk->pair = NULL; - release_sock(sk); } static int raw_init(struct sock *sk) { - struct inet_protocol *p; - - p = (struct inet_protocol *) kmalloc(sizeof (*p), GFP_KERNEL); - if (p == NULL) - return(-ENOMEM); - - p->handler = raw_rcv; - p->protocol = sk->protocol; - p->data = (void *)sk; - p->err_handler = raw_err; - p->name="USER"; - p->frag_handler = NULL; /* For now */ - inet_add_protocol(p); - - /* We need to remember this somewhere. */ - sk->pair = (struct sock *)p; - return(0); } @@ -287,6 +233,9 @@ int raw_recvfrom(struct sock *sk, unsigned char *to, int len, int err; int truesize; + if (flags & MSG_OOB) + return -EOPNOTSUPP; + if (sk->shutdown & RCV_SHUTDOWN) return(0); @@ -337,10 +286,10 @@ struct proto raw_prot = { udp_connect, NULL, ip_queue_xmit, - ip_retransmit, NULL, NULL, - raw_rcv, + NULL, + NULL, datagram_select, NULL, raw_init, @@ -349,6 +298,7 @@ struct proto raw_prot = { ip_getsockopt, 128, 0, - {NULL,}, - "RAW" + "RAW", + 0, 0, + {NULL,} }; diff --git a/net/inet/route.c b/net/ipv4/route.c index 58401d742..d2186a45d 100644 --- a/net/inet/route.c +++ b/net/ipv4/route.c @@ -23,6 +23,11 @@ * Alan Cox : MTU in route table * Alan Cox : MSS actually. Also added the window * clamper. + * Sam Lantinga : Fixed route matching in rt_del() + * Alan Cox : Routing cache support. + * Alan Cox : Removed compatibility cruft. + * Alan Cox : RTF_REJECT support. + * Alan Cox : TCP irtt support. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -35,6 +40,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/mm.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -42,19 +48,20 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "route.h" -#include "tcp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "icmp.h" +#include <net/sock.h> +#include <net/icmp.h> /* * The routing table list */ static struct rtable *rt_base = NULL; +unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */ /* * Pointer to the loopback route @@ -66,7 +73,7 @@ static struct rtable *rt_loopback = NULL; * Remove a routing table entry. */ -static void rt_del(unsigned long dst) +static void rt_del(unsigned long dst, char *devname) { struct rtable *r, **rp; unsigned long flags; @@ -82,7 +89,9 @@ static void rt_del(unsigned long dst) cli(); while((r = *rp) != NULL) { - if (r->rt_dst != dst) + /* Make sure both the destination and the device match */ + if ( r->rt_dst != dst || + (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) ) { rp = &r->rt_next; continue; @@ -97,6 +106,8 @@ static void rt_del(unsigned long dst) rt_loopback = NULL; kfree_s(r, sizeof(struct rtable)); } + rt_stamp++; /* New table revision */ + restore_flags(flags); } @@ -125,6 +136,7 @@ void ip_rt_flush(struct device *dev) rt_loopback = NULL; kfree_s(r, sizeof(struct rtable)); } + rt_stamp++; /* New table revision */ restore_flags(flags); } @@ -197,7 +209,7 @@ static inline struct device * get_gw_dev(unsigned long gw) */ void ip_rt_add(short flags, unsigned long dst, unsigned long mask, - unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window) + unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt) { struct rtable *r, *rt; struct rtable **rp; @@ -279,6 +291,8 @@ void ip_rt_add(short flags, unsigned long dst, unsigned long mask, if(rt->rt_flags & RTF_WINDOW) rt->rt_window = window; + if(rt->rt_flags & RTF_IRTT) + rt->rt_irtt = irtt; /* * What we have to do is loop though this until we have @@ -297,7 +311,8 @@ void ip_rt_add(short flags, unsigned long dst, unsigned long mask, rp = &rt_base; while ((r = *rp) != NULL) { - if (r->rt_dst != dst) + if (r->rt_dst != dst || + r->rt_mask != mask) { rp = &r->rt_next; continue; @@ -327,6 +342,8 @@ void ip_rt_add(short flags, unsigned long dst, unsigned long mask, if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback) rt_loopback = rt; + + rt_stamp++; /* New table revision */ /* * Restore the interrupts and return @@ -455,7 +472,7 @@ static int rt_new(struct rtentry *r) * Add the route */ - ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window); + ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt); return 0; } @@ -467,9 +484,19 @@ static int rt_new(struct rtentry *r) static int rt_kill(struct rtentry *r) { struct sockaddr_in *trg; + char *devname; + int err; trg = (struct sockaddr_in *) &r->rt_dst; - rt_del(trg->sin_addr.s_addr); + if ((devname = r->rt_dev) != NULL) + { + err = getname(devname, &devname); + if (err) + return err; + } + rt_del(trg->sin_addr.s_addr, devname); + if ( devname != NULL ) + putname(devname); return 0; } @@ -487,7 +514,7 @@ int rt_get_info(char *buffer, char **start, off_t offset, int length) int size; len += sprintf(buffer, - "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\n"); + "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); pos=len; /* @@ -496,10 +523,10 @@ int rt_get_info(char *buffer, char **start, off_t offset, int length) for (r = rt_base; r != NULL; r = r->rt_next) { - size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\n", + size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n", r->rt_dev->name, r->rt_dst, r->rt_gateway, r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric, - r->rt_mask, (int)r->rt_mss, r->rt_window); + r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt); len+=size; pos+=size; if(pos<offset) @@ -549,6 +576,9 @@ struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned l break; } + if(rt->rt_flags&RTF_REJECT) + return NULL; + if(src_addr!=NULL) *src_addr= rt->rt_dev->pa_addr; @@ -599,30 +629,6 @@ no_route: } /* - * Backwards compatibility - */ - -static int ip_get_old_rtent(struct old_rtentry * src, struct rtentry * rt) -{ - int err; - struct old_rtentry tmp; - - err=verify_area(VERIFY_READ, src, sizeof(*src)); - if (err) - return err; - memcpy_fromfs(&tmp, src, sizeof(*src)); - memset(rt, 0, sizeof(*rt)); - rt->rt_dst = tmp.rt_dst; - rt->rt_gateway = tmp.rt_gateway; - rt->rt_genmask.sa_family = AF_INET; - ((struct sockaddr_in *) &rt->rt_genmask)->sin_addr.s_addr = tmp.rt_genmask; - rt->rt_flags = tmp.rt_flags; - rt->rt_dev = tmp.rt_dev; - printk("Warning: obsolete routing request made.\n"); - return 0; -} - -/* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ @@ -633,15 +639,6 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) switch(cmd) { - case SIOCADDRTOLD: /* Old style add route */ - case SIOCDELRTOLD: /* Old style delete route */ - if (!suser()) - return -EPERM; - err = ip_get_old_rtent((struct old_rtentry *) arg, &rt); - if (err) - return err; - return (cmd == SIOCDELRTOLD) ? rt_kill(&rt) : rt_new(&rt); - case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!suser()) diff --git a/net/inet/tcp.c b/net/ipv4/tcp.c index a2bbbe861..352e1a101 100644 --- a/net/inet/tcp.c +++ b/net/ipv4/tcp.c @@ -16,7 +16,8 @@ * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> - * Arnt Gulbrandsen, <agulbra@no.unit.nvg> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Jorge Cwik, <jorge@laser.satlink.net> * * Fixes: * Alan Cox : Numerous verify_area() calls @@ -105,13 +106,68 @@ * socket close. * Alan Cox : Reset tracing code. * Alan Cox : Spurious resets on shutdown. + * Alan Cox : Giant 15 minute/60 second timer error + * Alan Cox : Small whoops in selecting before an accept. + * Alan Cox : Kept the state trace facility since it's + * handy for debugging. + * Alan Cox : More reset handler fixes. + * Alan Cox : Started rewriting the code based on the RFC's + * for other useful protocol references see: + * Comer, KA9Q NOS, and for a reference on the + * difference between specifications and how BSD + * works see the 4.4lite source. + * A.N.Kuznetsov : Don't time wait on completion of tidy + * close. + * Linus Torvalds : Fin/Shutdown & copied_seq changes. + * Linus Torvalds : Fixed BSD port reuse to work first syn + * Alan Cox : Reimplemented timers as per the RFC and using multiple + * timers for sanity. + * Alan Cox : Small bug fixes, and a lot of new + * comments. + * Alan Cox : Fixed dual reader crash by locking + * the buffers (much like datagram.c) + * Alan Cox : Fixed stuck sockets in probe. A probe + * now gets fed up of retrying without + * (even a no space) answer. + * Alan Cox : Extracted closing code better + * Alan Cox : Fixed the closing state machine to + * resemble the RFC. + * Alan Cox : More 'per spec' fixes. + * Jorge Cwik : Even faster checksumming. + * Alan Cox : tcp_data() doesn't ack illegal PSH + * only frames. At least one pc tcp stack + * generates them. + * Alan Cox : Cache last socket. + * Alan Cox : Per route irtt. + * Matt Day : Select() match BSD precisely on error * * * To Fix: - * Fast path the code. Two things here - fix the window calculation + * Fast path the code. Two things here - fix the window calculation * so it doesn't iterate over the queue, also spot packets with no funny * options arriving in order and process directly. * + * Implement RFC 1191 [Path MTU discovery] + * Look at the effect of implementing RFC 1337 suggestions and their impact. + * Rewrite output state machine to use a single queue and do low window + * situations as per the spec (RFC 1122) + * Speed up input assembly algorithm. + * RFC1323 - PAWS and window scaling. PAWS is required for IPv6 so we + * could do with it working on IPv4 + * User settable/learned rtt/max window/mtu + * Cope with MTU/device switches when retransmitting in tcp. + * Fix the window handling to use PR's new code. + * + * Change the fundamental structure to a single send queue maintained + * by TCP (removing the bogus ip stuff [thus fixing mtu drops on + * active routes too]). Cut the queue off in tcp_retransmit/ + * tcp_transmit. + * Change the receive queue to assemble as it goes. This lets us + * dispose of most of tcp_sequence, half of tcp_ack and chunks of + * tcp_data/tcp_read as well as the window shrink crud. + * Separate out duplicated code - tcp_alloc_skb, tcp_build_ack + * tcp_queue_skb seem obvious routines to extract. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -151,10 +207,13 @@ * * TCP_CLOSE socket is finished */ + #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/time.h> #include <linux/string.h> +#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/termios.h> @@ -162,32 +221,61 @@ #include <linux/fcntl.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "snmp.h" -#include "ip.h" -#include "protocol.h" -#include "icmp.h" -#include "tcp.h" +#include <net/snmp.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/arp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "route.h" +#include <net/sock.h> +#include <net/route.h> #include <linux/errno.h> #include <linux/timer.h> #include <asm/system.h> #include <asm/segment.h> #include <linux/mm.h> +#include <net/checksum.h> -#undef TCP_FASTPATH +/* + * The MSL timer is the 'normal' timer. + */ + +#define reset_msl_timer(x,y,z) reset_timer(x,y,z) #define SEQ_TICK 3 unsigned long seq_offset; struct tcp_mib tcp_statistics; +/* + * Cached last hit socket + */ + +volatile unsigned long th_cache_saddr,th_cache_daddr; +volatile unsigned short th_cache_dport, th_cache_sport; +volatile struct sock *th_cache_sk; + +void tcp_cache_zap(void) +{ + unsigned long flags; + save_flags(flags); + cli(); + th_cache_saddr=0; + th_cache_daddr=0; + th_cache_dport=0; + th_cache_sport=0; + th_cache_sk=NULL; + restore_flags(flags); +} + static void tcp_close(struct sock *sk, int timeout); -#ifdef TCP_FASTPATH -unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0; -#endif +/* + * The less said about this the better, but it works and will do for 1.2 + */ + +static struct wait_queue *master_select_wakeup; static __inline__ int min(unsigned int a, unsigned int b) { @@ -198,33 +286,48 @@ static __inline__ int min(unsigned int a, unsigned int b) #undef STATE_TRACE +#ifdef STATE_TRACE +static char *statename[]={ + "Unused","Established","Syn Sent","Syn Recv", + "Fin Wait 1","Fin Wait 2","Time Wait", "Close", + "Close Wait","Last ACK","Listen","Closing" +}; +#endif + static __inline__ void tcp_set_state(struct sock *sk, int state) { if(sk->state==TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab--; #ifdef STATE_TRACE if(sk->debug) - printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state); + printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]); #endif + /* This is a hack but it doesn't occur often and it's going to + be a real to fix nicely */ + + if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV) + { + wake_up_interruptible(&master_select_wakeup); + } sk->state=state; if(state==TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab++; } -/* This routine picks a TCP windows for a socket based on - the following constraints - - 1. The window can never be shrunk once it is offered (RFC 793) - 2. We limit memory per socket - - For now we use NET2E3's heuristic of offering half the memory - we have handy. All is not as bad as this seems however because - of two things. Firstly we will bin packets even within the window - in order to get the data we are waiting for into the memory limit. - Secondly we bin common duplicate forms at receive time - - Better heuristics welcome -*/ +/* + * This routine picks a TCP windows for a socket based on + * the following constraints + * + * 1. The window can never be shrunk once it is offered (RFC 793) + * 2. We limit memory per socket + * + * For now we use NET2E3's heuristic of offering half the memory + * we have handy. All is not as bad as this seems however because + * of two things. Firstly we will bin packets even within the window + * in order to get the data we are waiting for into the memory limit. + * Secondly we bin common duplicate forms at receive time + * Better heuristics welcome + */ int tcp_select_window(struct sock *sk) { @@ -232,15 +335,19 @@ int tcp_select_window(struct sock *sk) if(sk->window_clamp) new_window=min(sk->window_clamp,new_window); -/* - * two things are going on here. First, we don't ever offer a - * window less than min(sk->mss, MAX_WINDOW/2). This is the - * receiver side of SWS as specified in RFC1122. - * Second, we always give them at least the window they - * had before, in order to avoid retracting window. This - * is technically allowed, but RFC1122 advises against it and - * in practice it causes trouble. - */ + /* + * Two things are going on here. First, we don't ever offer a + * window less than min(sk->mss, MAX_WINDOW/2). This is the + * receiver side of SWS as specified in RFC1122. + * Second, we always give them at least the window they + * had before, in order to avoid retracting window. This + * is technically allowed, but RFC1122 advises against it and + * in practice it causes trouble. + * + * Fixme: This doesn't correctly handle the case where + * new_window > sk->window but not by enough to allow for the + * shift in sequence space. + */ if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window) return(sk->window); return(new_window); @@ -262,46 +369,15 @@ static struct sk_buff *tcp_find_established(struct sock *s) return p; p=p->next; } - while(p!=skb_peek(&s->receive_queue)); + while(p!=(struct sk_buff *)&s->receive_queue); return NULL; } - -/* - * This routine closes sockets which have been at least partially - * opened, but not yet accepted. Currently it is only called by - * tcp_close, and timeout mirrors the value there. +/* + * Remove a completed connection and return it. This is used by + * tcp_accept() to get connections from the queue. */ -static void tcp_close_pending (struct sock *sk, int timeout) -{ - unsigned long flags; - struct sk_buff *p, *old_p; - - save_flags(flags); - cli(); - p=skb_peek(&sk->receive_queue); - - if(p==NULL) - { - restore_flags(flags); - return; - } - - do - { - tcp_close (p->sk, timeout); - skb_unlink (p); - old_p = p; - p=p->next; - kfree_skb(old_p, FREE_READ); - } - while(p!=skb_peek(&sk->receive_queue)); - - restore_flags(flags); - return; -} - static struct sk_buff *tcp_dequeue_established(struct sock *s) { struct sk_buff *skb; @@ -315,6 +391,24 @@ static struct sk_buff *tcp_dequeue_established(struct sock *s) return skb; } +/* + * This routine closes sockets which have been at least partially + * opened, but not yet accepted. Currently it is only called by + * tcp_close, and timeout mirrors the value there. + */ + +static void tcp_close_pending (struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) + { + skb->sk->dead=1; + tcp_close(skb->sk, 0); + kfree_skb(skb, FREE_READ); + } + return; +} /* * Enter the time wait state. @@ -326,10 +420,175 @@ static void tcp_time_wait(struct sock *sk) sk->shutdown = SHUTDOWN_MASK; if (!sk->dead) sk->state_change(sk); - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); +} + +/* + * A socket has timed out on its send queue and wants to do a + * little retransmitting. Currently this means TCP. + */ + +void tcp_do_retransmit(struct sock *sk, int all) +{ + struct sk_buff * skb; + struct proto *prot; + struct device *dev; + int ct=0; + + prot = sk->prot; + skb = sk->send_head; + + while (skb != NULL) + { + struct tcphdr *th; + struct iphdr *iph; + int size; + + dev = skb->dev; + IS_SKB(skb); + skb->when = jiffies; + + /* + * In general it's OK just to use the old packet. However we + * need to use the current ack and window fields. Urg and + * urg_ptr could possibly stand to be updated as well, but we + * don't keep the necessary data. That shouldn't be a problem, + * if the other end is doing the right thing. Since we're + * changing the packet, we have to issue a new IP identifier. + */ + + iph = (struct iphdr *)(skb->data + dev->hard_header_len); + th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); + size = skb->len - (((unsigned char *) th) - skb->data); + + /* + * Note: We ought to check for window limits here but + * currently this is done (less efficiently) elsewhere. + * We do need to check for a route change but can't handle + * that until we have the new 1.3.x buffers in. + * + */ + + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + /* + * This is not the right way to handle this. We have to + * issue an up to date window and ack report with this + * retransmit to keep the odd buggy tcp that relies on + * the fact BSD does this happy. + * We don't however need to recalculate the entire + * checksum, so someone wanting a small problem to play + * with might like to implement RFC1141/RFC1624 and speed + * this up by avoiding a full checksum. + */ + + th->ack_seq = ntohl(sk->acked_seq); + th->window = ntohs(tcp_select_window(sk)); + tcp_send_check(th, sk->saddr, sk->daddr, size, sk); + + /* + * If the interface is (still) up and running, kick it. + */ + + if (dev->flags & IFF_UP) + { + /* + * If the packet is still being sent by the device/protocol + * below then don't retransmit. This is both needed, and good - + * especially with connected mode AX.25 where it stops resends + * occurring of an as yet unsent anyway frame! + * We still add up the counts as the round trip time wants + * adjusting. + */ + if (sk && !skb_device_locked(skb)) + { + /* Remove it from any existing driver queue first! */ + skb_unlink(skb); + /* Now queue it */ + ip_statistics.IpOutRequests++; + dev_queue_xmit(skb, dev, sk->priority); + } + } + + /* + * Count retransmissions + */ + + ct++; + sk->prot->retransmits ++; + + /* + * Only one retransmit requested. + */ + + if (!all) + break; + + /* + * This should cut it off before we send too many packets. + */ + + if (ct >= sk->cong_window) + break; + skb = skb->link3; + } } /* + * Reset the retransmission timer + */ + +static void reset_xmit_timer(struct sock *sk, int why, unsigned long when) +{ + del_timer(&sk->retransmit_timer); + sk->ip_xmit_timeout = why; + if((int)when < 0) + { + when=3; + printk("Error: Negative timer in xmit_timer\n"); + } + sk->retransmit_timer.expires=when; + add_timer(&sk->retransmit_timer); +} + +/* + * This is the normal code called for timeouts. It does the retransmission + * and then does backoff. tcp_do_retransmit is separated out because + * tcp_ack needs to send stuff from the retransmit queue without + * initiating a backoff. + */ + + +void tcp_retransmit_time(struct sock *sk, int all) +{ + tcp_do_retransmit(sk, all); + + /* + * Increase the timeout each time we retransmit. Note that + * we do not increase the rtt estimate. rto is initialized + * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests + * that doubling rto each time is the least we can get away with. + * In KA9Q, Karn uses this for the first few times, and then + * goes to quadratic. netBSD doubles, but only goes up to *64, + * and clamps at 1 to 64 sec afterwards. Note that 120 sec is + * defined in the protocol as the maximum possible RTT. I guess + * we'll have to use something other than TCP to talk to the + * University of Mars. + * + * PAWS allows us longer timeouts and large windows, so once + * implemented ftp to mars will work nicely. We will have to fix + * the 120 second clamps though! + */ + + sk->retransmits++; + sk->backoff++; + sk->rto = min(sk->rto << 1, 120*HZ); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); +} + + +/* * A timer event has trigger a tcp retransmit timeout. The * socket xmit queue is ready and set up to send. Because * the ack receive code keeps the queue straight we do @@ -340,7 +599,7 @@ static void tcp_retransmit(struct sock *sk, int all) { if (all) { - ip_retransmit(sk, all); + tcp_retransmit_time(sk, all); return; } @@ -351,9 +610,163 @@ static void tcp_retransmit(struct sock *sk, int all) sk->cong_window = 1; /* Do the actual retransmit. */ - ip_retransmit(sk, all); + tcp_retransmit_time(sk, all); } +/* + * A write timeout has occurred. Process the after effects. + */ + +static int tcp_write_timeout(struct sock *sk) +{ + /* + * Look for a 'soft' timeout. + */ + if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) + || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) + { + /* + * Attempt to recover if arp has changed (unlikely!) or + * a route has shifted (not supported prior to 1.3). + */ + arp_destroy (sk->daddr, 0); + /*ip_route_check (sk->daddr);*/ + } + /* + * Has it gone just too far ? + */ + if (sk->retransmits > TCP_RETR2) + { + sk->err = ETIMEDOUT; + sk->error_report(sk); + del_timer(&sk->retransmit_timer); + /* + * Time wait the socket + */ + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) + { + tcp_set_state(sk,TCP_TIME_WAIT); + reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + } + else + { + /* + * Clean up time. + */ + tcp_set_state(sk, TCP_CLOSE); + return 0; + } + } + return 1; +} + +/* + * The TCP retransmit timer. This lacks a few small details. + * + * 1. An initial rtt timeout on the probe0 should cause what we can + * of the first write queue buffer to be split and sent. + * 2. On a 'major timeout' as defined by RFC1122 we shouldn't report + * ETIMEDOUT if we know an additional 'soft' error caused this. + * tcp_err should save a 'soft error' for us. + */ + +static void retransmit_timer(unsigned long data) +{ + struct sock *sk = (struct sock*)data; + int why = sk->ip_xmit_timeout; + + /* + * only process if socket is not in use + */ + + cli(); + if (sk->inuse || in_bh) + { + /* Try again in 1 second */ + sk->retransmit_timer.expires = HZ; + add_timer(&sk->retransmit_timer); + sti(); + return; + } + + sk->inuse = 1; + sti(); + + /* Always see if we need to send an ack. */ + + if (sk->ack_backlog && !sk->zapped) + { + sk->prot->read_wakeup (sk); + if (! sk->dead) + sk->data_ready(sk,0); + } + + /* Now we need to figure out why the socket was on the timer. */ + + switch (why) + { + /* Window probing */ + case TIME_PROBE0: + tcp_send_probe0(sk); + tcp_write_timeout(sk); + break; + /* Retransmitting */ + case TIME_WRITE: + /* It could be we got here because we needed to send an ack. + * So we need to check for that. + */ + { + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + skb = sk->send_head; + if (!skb) + { + restore_flags(flags); + } + else + { + /* + * Kicked by a delayed ack. Reset timer + * correctly now + */ + if (jiffies < skb->when + sk->rto) + { + reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies); + restore_flags(flags); + break; + } + restore_flags(flags); + /* + * Retransmission + */ + sk->prot->retransmit (sk, 0); + tcp_write_timeout(sk); + } + break; + } + /* Sending Keepalives */ + case TIME_KEEPOPEN: + /* + * this reset_timer() call is a hack, this is not + * how KEEPOPEN is supposed to work. + */ + reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + + /* Send something to keep the connection open. */ + if (sk->prot->write_wakeup) + sk->prot->write_wakeup (sk); + sk->retransmits++; + tcp_write_timeout(sk); + break; + default: + printk ("rexmit_timer: timer expired - reason unknown\n"); + break; + } + release_sock(sk); +} /* * This routine is called by the ICMP module when it gets some @@ -422,7 +835,8 @@ void tcp_err(int err, unsigned char *header, unsigned long daddr, /* * Walk down the receive queue counting readable data until we hit the end or we find a gap - * in the received data queue (ie a frame missing that needs sending to us) + * in the received data queue (ie a frame missing that needs sending to us). Not + * sorting using two queues as data arrives makes life so much harder. */ static int tcp_readable(struct sock *sk) @@ -446,10 +860,13 @@ static int tcp_readable(struct sock *sk) return(0); } - counted = sk->copied_seq+1; /* Where we are at the moment */ + counted = sk->copied_seq; /* Where we are at the moment */ amount = 0; - /* Do until a push or until we are out of data. */ + /* + * Do until a push or until we are out of data. + */ + do { if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */ @@ -493,79 +910,80 @@ static int tcp_readable(struct sock *sk) return(amount); } - /* - * Wait for a TCP event. Note the oddity with SEL_IN and reading. The - * listening socket has a receive queue of sockets to accept. + * LISTEN is a special case for select.. */ +static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait) +{ + if (sel_type == SEL_IN) { + int retval; + sk->inuse = 1; + retval = (tcp_find_established(sk) != NULL); + release_sock(sk); + if (!retval) + select_wait(&master_select_wakeup,wait); + return retval; + } + return 0; +} + + +/* + * Wait for a TCP event. + * + * Note that we don't need to set "sk->inuse", as the upper select layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ static int tcp_select(struct sock *sk, int sel_type, select_table *wait) { - sk->inuse = 1; + if (sk->state == TCP_LISTEN) + return tcp_listen_select(sk, sel_type, wait); - switch(sel_type) - { - case SEL_IN: - select_wait(sk->sleep, wait); - if (skb_peek(&sk->receive_queue) != NULL) - { - if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) - { - release_sock(sk); - return(1); - } - } - if (sk->err != 0) /* Receiver error */ - { - release_sock(sk); - return(1); - } - if (sk->shutdown & RCV_SHUTDOWN) - { - release_sock(sk); - return(1); - } - release_sock(sk); - return(0); - case SEL_OUT: - select_wait(sk->sleep, wait); - if (sk->shutdown & SEND_SHUTDOWN) - { - /* FIXME: should this return an error? */ - release_sock(sk); - return(0); - } + switch(sel_type) { + case SEL_IN: + if (sk->err) + return 1; + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + break; - /* - * This is now right thanks to a small fix - * by Matt Dillon. - */ + if (sk->shutdown & RCV_SHUTDOWN) + return 1; - if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) - { - release_sock(sk); - /* This should cause connect to work ok. */ - if (sk->state == TCP_SYN_RECV || - sk->state == TCP_SYN_SENT) return(0); - return(1); - } - release_sock(sk); - return(0); - case SEL_EX: - select_wait(sk->sleep,wait); - if (sk->err || sk->urg_data) - { - release_sock(sk); - return(1); - } - release_sock(sk); - return(0); - } + if (sk->acked_seq == sk->copied_seq) + break; - release_sock(sk); - return(0); -} + if (sk->urg_seq != sk->copied_seq || + sk->acked_seq != sk->copied_seq+1 || + sk->urginline || !sk->urg_data) + return 1; + break; + case SEL_OUT: + if (sk->err) + return 1; + if (sk->shutdown & SEND_SHUTDOWN) + return 0; + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + break; + /* + * This is now right thanks to a small fix + * by Matt Dillon. + */ + + if (sk->prot->wspace(sk) < sk->mtu+128+sk->prot->max_header) + break; + return 1; + + case SEL_EX: + if (sk->urg_data) + return 1; + break; + } + select_wait(sk->sleep, wait); + return 0; +} int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -595,7 +1013,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) } case SIOCATMARK: { - int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1; + int answ = sk->urg_data && sk->urg_seq == sk->copied_seq; err = verify_area(VERIFY_WRITE,(void *) arg, sizeof(unsigned long)); @@ -625,86 +1043,16 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) /* * This routine computes a TCP checksum. + * + * Modified January 1995 from a go-faster DOS routine by + * Jorge Cwik <jorge@laser.satlink.net> */ unsigned short tcp_check(struct tcphdr *th, int len, unsigned long saddr, unsigned long daddr) { - unsigned long sum; - - if (saddr == 0) saddr = ip_my_addr(); - -/* - * stupid, gcc complains when I use just one __asm__ block, - * something about too many reloads, but this is just two - * instructions longer than what I want - */ - __asm__(" - addl %%ecx, %%ebx - adcl %%edx, %%ebx - adcl $0, %%ebx - " - : "=b"(sum) - : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256) - : "bx", "cx", "dx" ); - __asm__(" - movl %%ecx, %%edx - cld - cmpl $32, %%ecx - jb 2f - shrl $5, %%ecx - clc -1: lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - lodsl - adcl %%eax, %%ebx - loop 1b - adcl $0, %%ebx - movl %%edx, %%ecx -2: andl $28, %%ecx - je 4f - shrl $2, %%ecx - clc -3: lodsl - adcl %%eax, %%ebx - loop 3b - adcl $0, %%ebx -4: movl $0, %%eax - testw $2, %%dx - je 5f - lodsw - addl %%eax, %%ebx - adcl $0, %%ebx - movw $0, %%ax -5: test $1, %%edx - je 6f - lodsb - addl %%eax, %%ebx - adcl $0, %%ebx -6: movl %%ebx, %%eax - shrl $16, %%eax - addw %%ax, %%bx - adcw $0, %%bx - " - : "=b"(sum) - : "0"(sum), "c"(len), "S"(th) - : "ax", "bx", "cx", "dx", "si" ); - - /* We only want the bottom 16 bits, but we never cleared the top 16. */ - - return((~sum) & 0xffff); + return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP, + csum_partial((char *)th,len,0)); } @@ -717,15 +1065,26 @@ void tcp_send_check(struct tcphdr *th, unsigned long saddr, return; } +/* + * This is the main buffer sending routine. We queue the buffer + * having checked it is sane seeming. + */ + static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) { int size; struct tcphdr * th = skb->h.th; - /* length of packet (not counting length of pre-tcp headers) */ + /* + * length of packet (not counting length of pre-tcp headers) + */ + size = skb->len - ((unsigned char *) th - skb->data); - /* sanity check it.. */ + /* + * Sanity check it.. + */ + if (size < sizeof(struct tcphdr) || size > skb->len) { printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", @@ -734,10 +1093,14 @@ static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) return; } - /* If we have queued a header size packet.. */ + /* + * If we have queued a header size packet.. (these crash a few + * tcp stacks if ack is not set) + */ + if (size == sizeof(struct tcphdr)) { - /* If its got a syn or fin its notionally included in the size..*/ + /* If it's got a syn or fin it's notionally included in the size..*/ if(!th->syn && !th->fin) { printk("tcp_send_skb: attempt to queue a bogon.\n"); @@ -746,11 +1109,23 @@ static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) } } + /* + * Actual processing. + */ + tcp_statistics.TcpOutSegs++; - skb->h.seq = ntohl(th->seq) + size - 4*th->doff; + + /* + * We must queue if + * + * a) The right edge of this frame exceeds the window + * b) We are retransmitting (Nagle's rule) + * c) We have too many packets 'in flight' + */ + if (after(skb->h.seq, sk->window_seq) || - (sk->retransmits && sk->timeout == TIME_WRITE) || + (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) || sk->packets_out >= sk->cong_window) { /* checksum will be supplied by tcp_write_xmit. So @@ -762,23 +1137,58 @@ static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) skb_unlink(skb); } skb_queue_tail(&sk->write_queue, skb); + + /* + * If we don't fit we have to start the zero window + * probes. This is broken - we really need to do a partial + * send _first_ (This is what causes the Cisco and PC/TCP + * grief). + */ + if (before(sk->window_seq, sk->write_queue.next->h.seq) && - sk->send_head == NULL && - sk->ack_backlog == 0) - reset_timer(sk, TIME_PROBE0, sk->rto); + sk->send_head == NULL && sk->ack_backlog == 0) + reset_xmit_timer(sk, TIME_PROBE0, sk->rto); } else { + /* + * This is going straight out + */ + th->ack_seq = ntohl(sk->acked_seq); th->window = ntohs(tcp_select_window(sk)); tcp_send_check(th, sk->saddr, sk->daddr, size, sk); sk->sent_seq = sk->write_seq; + + /* + * This is mad. The tcp retransmit queue is put together + * by the ip layer. This causes half the problems with + * unroutable FIN's and other things. + */ + sk->prot->queue_xmit(sk, skb->dev, skb, 0); + + /* + * Set for next retransmit based on expected ACK time. + * FIXME: We set this every time which means our + * retransmits are really about a window behind. + */ + + reset_xmit_timer(sk, TIME_WRITE, sk->rto); } } +/* + * Locking problems lead us to a messy situation where we can have + * multiple partially complete buffers queued up. This is really bad + * as we don't want to be sending partial buffers. Fix this with + * a semaphore or similar to lock tcp_write per socket. + * + * These routines are pretty self descriptive. + */ + struct sk_buff * tcp_dequeue_partial(struct sock * sk) { struct sk_buff * skb; @@ -795,6 +1205,10 @@ struct sk_buff * tcp_dequeue_partial(struct sock * sk) return skb; } +/* + * Empty the partial queue + */ + static void tcp_send_partial(struct sock *sk) { struct sk_buff *skb; @@ -805,6 +1219,10 @@ static void tcp_send_partial(struct sock *sk) tcp_send_skb(sk, skb); } +/* + * Queue a partial frame + */ + void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk) { struct sk_buff * tmp; @@ -817,6 +1235,9 @@ void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk) del_timer(&sk->partial_timer); sk->partial = skb; init_timer(&sk->partial_timer); + /* + * Wait up to 1 second for the buffer to fill. + */ sk->partial_timer.expires = HZ; sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial; sk->partial_timer.data = (unsigned long) sk; @@ -842,6 +1263,7 @@ static void tcp_send_ack(unsigned long sequence, unsigned long ack, if(sk->zapped) return; /* We have been reset, we may not send again */ + /* * We need to grab some memory, and put together an ack, * and then put it into the queue to be sent. @@ -850,34 +1272,46 @@ static void tcp_send_ack(unsigned long sequence, unsigned long ack, buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC); if (buff == NULL) { - /* Force it to send an ack. */ + /* + * Force it to send an ack. We don't have to do this + * (ACK is unreliable) but it's much better use of + * bandwidth on slow links to send a spare ack than + * resend packets. + */ + sk->ack_backlog++; - if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) + if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) { - reset_timer(sk, TIME_WRITE, 10); + reset_xmit_timer(sk, TIME_WRITE, HZ); } return; } + /* + * Assemble a suitable TCP frame + */ + buff->len = sizeof(struct tcphdr); buff->sk = sk; buff->localroute = sk->localroute; t1 =(struct tcphdr *) buff->data; - /* Put in the IP header and routing stuff. */ + /* + * Put in the IP header and routing stuff. + */ + tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev, IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); if (tmp < 0) { - buff->free=1; + buff->free = 1; sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); return; } buff->len += tmp; t1 =(struct tcphdr *)((char *)t1 +tmp); - /* FIXME: */ - memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */ + memcpy(t1, th, sizeof(*t1)); /* * Swap the send and the receive. @@ -896,21 +1330,33 @@ static void tcp_send_ack(unsigned long sequence, unsigned long ack, t1->syn = 0; t1->psh = 0; t1->fin = 0; + + /* + * If we have nothing queued for transmit and the transmit timer + * is on we are just doing an ACK timeout and need to switch + * to a keepalive. + */ + if (ack == sk->acked_seq) { sk->ack_backlog = 0; sk->bytes_rcv = 0; sk->ack_timed = 0; if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL - && sk->timeout == TIME_WRITE) + && sk->ip_xmit_timeout == TIME_WRITE) { if(sk->keepopen) { - reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN); + reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN); } else { delete_timer(sk); } } } + + /* + * Fill in the packet and send it + */ + t1->ack_seq = ntohl(ack); t1->doff = sizeof(*t1)/4; tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk); @@ -925,10 +1371,9 @@ static void tcp_send_ack(unsigned long sequence, unsigned long ack, * This routine builds a generic TCP header. */ -static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) +extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) { - /* FIXME: want to get rid of this. */ memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); th->seq = htonl(sk->write_seq); th->psh =(push == 0) ? 1 : 0; @@ -976,9 +1421,9 @@ static int tcp_write(struct sock *sk, unsigned char *from, return(tmp); } - /* - * First thing we do is make sure that we are established. - */ + /* + * First thing we do is make sure that we are established. + */ if (sk->shutdown & SEND_SHUTDOWN) { @@ -990,10 +1435,9 @@ static int tcp_write(struct sock *sk, unsigned char *from, return(-EPIPE); } - - /* - * Wait for a connection to finish. - */ + /* + * Wait for a connection to finish. + */ while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) { @@ -1130,7 +1574,7 @@ static int tcp_write(struct sock *sk, unsigned char *from, if (copy < sk->mss && !(flags & MSG_OOB)) { /* - * We will release the socket incase we sleep here. + * We will release the socket in case we sleep here. */ release_sock(sk); /* @@ -1144,7 +1588,7 @@ static int tcp_write(struct sock *sk, unsigned char *from, else { /* - * We will release the socket incase we sleep here. + * We will release the socket in case we sleep here. */ release_sock(sk); skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL); @@ -1157,6 +1601,7 @@ static int tcp_write(struct sock *sk, unsigned char *from, if (skb == NULL) { + sk->socket->flags |= SO_NOSPACE; if (nonblock) { release_sock(sk); @@ -1179,6 +1624,7 @@ static int tcp_write(struct sock *sk, unsigned char *from, (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT) && sk->err == 0) { + sk->socket->flags &= ~SO_NOSPACE; interruptible_sleep_on(sk->sleep); if (current->signal & ~current->blocked) { @@ -1274,6 +1720,9 @@ static int tcp_write(struct sock *sk, unsigned char *from, return(copied); } +/* + * This is just a wrapper. + */ static int tcp_sendto(struct sock *sk, unsigned char *from, int len, int nonblock, unsigned flags, @@ -1295,6 +1744,11 @@ static int tcp_sendto(struct sock *sk, unsigned char *from, } +/* + * Send an ack if one is backlogged at this point. Ought to merge + * this with tcp_send_ack(). + */ + static void tcp_read_wakeup(struct sock *sk) { int tmp; @@ -1320,7 +1774,7 @@ static void tcp_read_wakeup(struct sock *sk) if (buff == NULL) { /* Try again real soon. */ - reset_timer(sk, TIME_WRITE, 10); + reset_xmit_timer(sk, TIME_WRITE, HZ); return; } @@ -1336,7 +1790,7 @@ static void tcp_read_wakeup(struct sock *sk) IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); if (tmp < 0) { - buff->free=1; + buff->free = 1; sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); return; } @@ -1388,13 +1842,13 @@ static void cleanup_rbuf(struct sock *sk) left = sk->prot->rspace(sk); /* - * We have to loop through all the buffer headers, - * and try to free up all the space we can. + * We have to loop through all the buffer headers, + * and try to free up all the space we can. */ while((skb=skb_peek(&sk->receive_queue)) != NULL) { - if (!skb->used) + if (!skb->used || skb->users) break; skb_unlink(skb); skb->sk = sk; @@ -1404,10 +1858,10 @@ static void cleanup_rbuf(struct sock *sk) restore_flags(flags); /* - * FIXME: - * At this point we should send an ack if the difference - * in the window, and the amount of space is bigger than - * TCP_WINDOW_DIFF. + * FIXME: + * At this point we should send an ack if the difference + * in the window, and the amount of space is bigger than + * TCP_WINDOW_DIFF. */ if(sk->debug) @@ -1443,27 +1897,32 @@ static void cleanup_rbuf(struct sock *sk) else { /* Force it to send an ack soon. */ - int was_active = del_timer(&sk->timer); + int was_active = del_timer(&sk->retransmit_timer); if (!was_active || TCP_ACK_TIME < sk->timer.expires) { - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); } else - add_timer(&sk->timer); + add_timer(&sk->retransmit_timer); } } } /* - * Handle reading urgent data. + * Handle reading urgent data. BSD has very simple semantics for + * this, no blocking and very strange errors 8) */ static int tcp_read_urg(struct sock * sk, int nonblock, unsigned char *to, int len, unsigned flags) { + /* + * No URG data to read + */ if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ) - return -EINVAL; + return -EINVAL; /* Yes this is right ! */ + if (sk->err) { int tmp = -sk->err; @@ -1518,17 +1977,29 @@ static int tcp_read(struct sock *sk, unsigned char *to, struct wait_queue wait = { current, NULL }; int copied = 0; unsigned long peek_seq; - unsigned long *seq; + volatile unsigned long *seq; /* So gcc doesn't overoptimise */ unsigned long used; - /* This error should be checked. */ + /* + * This error should be checked. + */ + if (sk->state == TCP_LISTEN) return -ENOTCONN; - /* Urgent data needs to be handled specially. */ + /* + * Urgent data needs to be handled specially. + */ + if (flags & MSG_OOB) return tcp_read_urg(sk, nonblock, to, len, flags); + /* + * Copying sequence to update. This is volatile to handle + * the multi-reader case neatly (memcpy_to/fromfs might be + * inline and thus not flush cached variables otherwise). + */ + peek_seq = sk->copied_seq; seq = &sk->copied_seq; if (flags & MSG_PEEK) @@ -1542,11 +2013,16 @@ static int tcp_read(struct sock *sk, unsigned char *to, unsigned long offset; /* - * are we at urgent data? Stop if we have read anything. + * Are we at urgent data? Stop if we have read anything. */ - if (copied && sk->urg_data && sk->urg_seq == 1+*seq) + + if (copied && sk->urg_data && sk->urg_seq == *seq) break; + /* + * Next get a buffer. + */ + current->state = TASK_INTERRUPTIBLE; skb = skb_peek(&sk->receive_queue); @@ -1554,13 +2030,15 @@ static int tcp_read(struct sock *sk, unsigned char *to, { if (!skb) break; - if (before(1+*seq, skb->h.th->seq)) + if (before(*seq, skb->h.th->seq)) break; - offset = 1 + *seq - skb->h.th->seq; + offset = *seq - skb->h.th->seq; if (skb->h.th->syn) offset--; if (offset < skb->len) goto found_ok_skb; + if (skb->h.th->fin) + goto found_fin_ok; if (!(flags & MSG_PEEK)) skb->used = 1; skb = skb->next; @@ -1602,7 +2080,9 @@ static int tcp_read(struct sock *sk, unsigned char *to, cleanup_rbuf(sk); release_sock(sk); + sk->socket->flags |= SO_WAITDATA; schedule(); + sk->socket->flags &= ~SO_WAITDATA; sk->inuse = 1; if (current->signal & ~current->blocked) @@ -1613,14 +2093,29 @@ static int tcp_read(struct sock *sk, unsigned char *to, continue; found_ok_skb: - /* Ok so how much can we use ? */ + /* + * Lock the buffer. We can be fairly relaxed as + * an interrupt will never steal a buffer we are + * using unless I've missed something serious in + * tcp_data. + */ + + skb->users++; + + /* + * Ok so how much can we use ? + */ + used = skb->len - offset; if (len < used) used = len; - /* do we have urgent data here? */ + /* + * Do we have urgent data here? + */ + if (sk->urg_data) { - unsigned long urg_offset = sk->urg_seq - (1 + *seq); + unsigned long urg_offset = sk->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) @@ -1636,17 +2131,63 @@ static int tcp_read(struct sock *sk, unsigned char *to, used = urg_offset; } } - /* Copy it */ + + /* + * Copy it - We _MUST_ update *seq first so that we + * don't ever double read when we have dual readers + */ + + *seq += used; + + /* + * This memcpy_tofs can sleep. If it sleeps and we + * do a second read it relies on the skb->users to avoid + * a crash when cleanup_rbuf() gets called. + */ + memcpy_tofs(to,((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used); copied += used; len -= used; to += used; - *seq += used; - if (after(sk->copied_seq+1,sk->urg_seq)) + + /* + * We now will not sleep again until we are finished + * with skb. Sorry if you are doing the SMP port + * but you'll just have to fix it neatly ;) + */ + + skb->users --; + + if (after(sk->copied_seq,sk->urg_seq)) sk->urg_data = 0; - if (!(flags & MSG_PEEK) && (used + offset >= skb->len)) - skb->used = 1; + if (used + offset < skb->len) + continue; + + /* + * Process the FIN. + */ + + if (skb->h.th->fin) + goto found_fin_ok; + if (flags & MSG_PEEK) + continue; + skb->used = 1; + continue; + + found_fin_ok: + ++*seq; + if (flags & MSG_PEEK) + break; + + /* + * All is done + */ + + skb->used = 1; + sk->shutdown |= RCV_SHUTDOWN; + break; + } remove_wait_queue(sk->sleep, &wait); current->state = TASK_RUNNING; @@ -1657,67 +2198,92 @@ static int tcp_read(struct sock *sk, unsigned char *to, return copied; } +/* + * State processing on a close. This implements the state shift for + * sending our FIN frame. Note that we only send a FIN for some + * states. A shutdown() may have already sent the FIN, or we may be + * closed. + */ +static int tcp_close_state(struct sock *sk, int dead) +{ + int ns=TCP_CLOSE; + int send_fin=0; + switch(sk->state) + { + case TCP_SYN_SENT: /* No SYN back, no FIN needed */ + break; + case TCP_SYN_RECV: + case TCP_ESTABLISHED: /* Closedown begin */ + ns=TCP_FIN_WAIT1; + send_fin=1; + break; + case TCP_FIN_WAIT1: /* Already closing, or FIN sent: no change */ + case TCP_FIN_WAIT2: + case TCP_CLOSING: + ns=sk->state; + break; + case TCP_CLOSE: + case TCP_LISTEN: + break; + case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and + wait only for the ACK */ + ns=TCP_LAST_ACK; + send_fin=1; + } + + tcp_set_state(sk,ns); + + /* + * This is a (useful) BSD violating of the RFC. There is a + * problem with TCP as specified in that the other end could + * keep a socket open forever with no application left this end. + * We use a 3 minute timeout (about the same as BSD) then kill + * our end. If they send after that then tough - BUT: long enough + * that we won't make the old 4*rto = almost no time - whoops + * reset mistake. + */ + if(dead && ns==TCP_FIN_WAIT2) + { + int timer_active=del_timer(&sk->timer); + if(timer_active) + add_timer(&sk->timer); + else + reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT); + } + + return send_fin; +} + /* - * Shutdown the sending side of a connection. + * Send a fin. */ -void tcp_shutdown(struct sock *sk, int how) +static void tcp_send_fin(struct sock *sk) { + struct proto *prot =(struct proto *)sk->prot; + struct tcphdr *th =(struct tcphdr *)&sk->dummy_th; + struct tcphdr *t1; struct sk_buff *buff; - struct tcphdr *t1, *th; - struct proto *prot; + struct device *dev=NULL; int tmp; - struct device *dev = NULL; - - /* - * We need to grab some memory, and put together a FIN, - * and then put it into the queue to be sent. - * FIXME: - * - * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. - * Most of this is guesswork, so maybe it will work... - */ + + release_sock(sk); /* in case the malloc sleeps. */ + + buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); + sk->inuse = 1; - if (!(how & SEND_SHUTDOWN)) - return; - - /* - * If we've already sent a FIN, return. - */ - - if (sk->state == TCP_FIN_WAIT1 || - sk->state == TCP_FIN_WAIT2 || - sk->state == TCP_CLOSING || - sk->state == TCP_LAST_ACK || - sk->state == TCP_TIME_WAIT - ) + if (buff == NULL) { + /* This is a disaster if it occurs */ + printk("tcp_send_fin: Impossible malloc failure"); return; } - sk->inuse = 1; /* - * flag that the sender has shutdown + * Administrivia */ - - sk->shutdown |= SEND_SHUTDOWN; - - /* - * Clear out any half completed packets. - */ - - if (sk->partial) - tcp_send_partial(sk); - - prot =(struct proto *)sk->prot; - th =(struct tcphdr *)&sk->dummy_th; - release_sock(sk); /* incase the malloc sleeps. */ - buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); - if (buff == NULL) - return; - sk->inuse = 1; - + buff->sk = sk; buff->len = sizeof(*t1); buff->localroute = sk->localroute; @@ -1732,29 +2298,27 @@ void tcp_shutdown(struct sock *sk, int how) sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); if (tmp < 0) { + int t; /* * Finish anyway, treat this as a send that got lost. - * - * Enter FIN_WAIT1 on normal shutdown, which waits for - * written data to be completely acknowledged along - * with an acknowledge to our FIN. - * - * Enter FIN_WAIT2 on abnormal shutdown -- close before - * connection established. + * (Not good). */ - buff->free=1; + + buff->free = 1; prot->wfree(sk,buff->mem_addr, buff->mem_len); - - if (sk->state == TCP_ESTABLISHED) - tcp_set_state(sk,TCP_FIN_WAIT1); - else if(sk->state == TCP_CLOSE_WAIT) - tcp_set_state(sk,TCP_LAST_ACK); + sk->write_seq++; + t=del_timer(&sk->timer); + if(t) + add_timer(&sk->timer); else - tcp_set_state(sk,TCP_FIN_WAIT2); - - release_sock(sk); + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); return; } + + /* + * We ought to check if the end of the queue is a buffer and + * if so simply add the fin to that buffer, not send it ahead. + */ t1 =(struct tcphdr *)((char *)t1 +tmp); buff->len += tmp; @@ -1778,10 +2342,10 @@ void tcp_shutdown(struct sock *sk, int how) if (skb_peek(&sk->write_queue) != NULL) { - buff->free=0; + buff->free = 0; if (buff->next != NULL) { - printk("tcp_shutdown: next != NULL\n"); + printk("tcp_send_fin: next != NULL\n"); skb_unlink(buff); } skb_queue_tail(&sk->write_queue, buff); @@ -1790,15 +2354,63 @@ void tcp_shutdown(struct sock *sk, int how) { sk->sent_seq = sk->write_seq; sk->prot->queue_xmit(sk, dev, buff, 0); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); } +} - if (sk->state == TCP_ESTABLISHED) - tcp_set_state(sk,TCP_FIN_WAIT1); - else if (sk->state == TCP_CLOSE_WAIT) - tcp_set_state(sk,TCP_LAST_ACK); - else - tcp_set_state(sk,TCP_FIN_WAIT2); +/* + * Shutdown the sending side of a connection. Much like close except + * that we don't receive shut down or set sk->dead=1. + */ + +void tcp_shutdown(struct sock *sk, int how) +{ + /* + * We need to grab some memory, and put together a FIN, + * and then put it into the queue to be sent. + * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. + */ + + if (!(how & SEND_SHUTDOWN)) + return; + + /* + * If we've already sent a FIN, or it's a closed state + */ + + if (sk->state == TCP_FIN_WAIT1 || + sk->state == TCP_FIN_WAIT2 || + sk->state == TCP_CLOSING || + sk->state == TCP_LAST_ACK || + sk->state == TCP_TIME_WAIT || + sk->state == TCP_CLOSE || + sk->state == TCP_LISTEN + ) + { + return; + } + sk->inuse = 1; + + /* + * flag that the sender has shutdown + */ + + sk->shutdown |= SEND_SHUTDOWN; + + /* + * Clear out any half completed packets. + */ + if (sk->partial) + tcp_send_partial(sk); + + /* + * FIN if needed + */ + + if(tcp_close_state(sk,0)) + tcp_send_fin(sk); + release_sock(sk); } @@ -1844,11 +2456,18 @@ static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *t struct tcphdr *t1; int tmp; struct device *ndev=NULL; + + /* + * Cannot reset a reset (Think about it). + */ + + if(th->rst) + return; -/* - * We need to grab some memory, and put together an RST, - * and then put it into the queue to be sent. - */ + /* + * We need to grab some memory, and put together an RST, + * and then put it into the queue to be sent. + */ buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC); if (buff == NULL) @@ -1920,6 +2539,9 @@ static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *t * However it may also be called with the ack to the SYN. So you * can't assume this is always the SYN. It's always called after * we have set up sk->mtu to our own MTU. + * + * We need at minimum to add PAWS support here. Possibly large windows + * as Linux gets deployed on 100Mb/sec networks. */ static void tcp_options(struct sock *sk, struct tcphdr *th) @@ -1938,8 +2560,9 @@ static void tcp_options(struct sock *sk, struct tcphdr *th) { case TCPOPT_EOL: return; - case TCPOPT_NOP: - length-=2; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + ptr--; /* the opsize=*ptr++ above was a mistake */ continue; default: @@ -1984,11 +2607,18 @@ static inline unsigned long default_mask(unsigned long dst) /* * Default sequence number picking algorithm. + * As close as possible to RFC 793, which + * suggests using a 250kHz clock. + * Further reading shows this assumes 2MB/s networks. + * For 10MB/s ethernet, a 1MHz clock is appropriate. + * That's funny, Linux has one built in! Use it! */ -extern inline long tcp_init_seq(void) +extern inline unsigned long tcp_init_seq(void) { - return jiffies * SEQ_TICK - seq_offset; + struct timeval tv; + do_gettimeofday(&tv); + return tv.tv_usec+tv.tv_sec*1000000; } /* @@ -2088,19 +2718,24 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, newsk->shutdown = 0; newsk->ack_backlog = 0; newsk->acked_seq = skb->h.th->seq+1; + newsk->copied_seq = skb->h.th->seq+1; newsk->fin_seq = skb->h.th->seq; - newsk->copied_seq = skb->h.th->seq; newsk->state = TCP_SYN_RECV; newsk->timeout = 0; + newsk->ip_xmit_timeout = 0; newsk->write_seq = seq; newsk->window_seq = newsk->write_seq; newsk->rcv_ack_seq = newsk->write_seq; newsk->urg_data = 0; newsk->retransmits = 0; + newsk->linger=0; newsk->destroy = 0; init_timer(&newsk->timer); newsk->timer.data = (unsigned long)newsk; newsk->timer.function = &net_timer; + init_timer(&newsk->retransmit_timer); + newsk->retransmit_timer.data = (unsigned long)newsk; + newsk->retransmit_timer.function=&retransmit_timer; newsk->dummy_th.source = skb->h.th->dest; newsk->dummy_th.dest = skb->h.th->source; @@ -2122,7 +2757,7 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, newsk->dummy_th.urg = 0; newsk->dummy_th.res2 = 0; newsk->acked_seq = skb->h.th->seq + 1; - newsk->copied_seq = skb->h.th->seq; + newsk->copied_seq = skb->h.th->seq + 1; newsk->socket = NULL; /* @@ -2174,12 +2809,16 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, */ tcp_options(newsk,skb->h.th); + + tcp_cache_zap(); buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); if (buff == NULL) { - sk->err = -ENOMEM; + sk->err = ENOMEM; newsk->dead = 1; + newsk->state = TCP_CLOSE; + /* And this will destroy it */ release_sock(newsk); kfree_skb(skb, FREE_READ); tcp_statistics.TcpAttemptFails++; @@ -2206,9 +2845,10 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, if (tmp < 0) { sk->err = tmp; - buff->free=1; + buff->free = 1; kfree_skb(buff,FREE_WRITE); newsk->dead = 1; + newsk->state = TCP_CLOSE; release_sock(newsk); skb->sk = sk; kfree_skb(skb, FREE_READ); @@ -2247,8 +2887,7 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk); newsk->prot->queue_xmit(newsk, ndev, buff, 0); - - reset_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT); + reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT); skb->sk = newsk; /* @@ -2267,17 +2906,24 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, static void tcp_close(struct sock *sk, int timeout) { - struct sk_buff *buff; - struct tcphdr *t1, *th; - struct proto *prot; - struct device *dev=NULL; - int tmp; - /* * We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. */ + sk->inuse = 1; + + if(th_cache_sk==sk) + tcp_cache_zap(); + if(sk->state == TCP_LISTEN) + { + /* Special case */ + tcp_set_state(sk, TCP_CLOSE); + tcp_close_pending(sk); + release_sock(sk); + return; + } + sk->keepopen = 1; sk->shutdown = SHUTDOWN_MASK; @@ -2286,197 +2932,52 @@ static void tcp_close(struct sock *sk, int timeout) if (timeout == 0) { + struct sk_buff *skb; + /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ + + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) + kfree_skb(skb, FREE_READ); + /* + * Get rid off any half-completed packets. + */ - if (skb_peek(&sk->receive_queue) != NULL) - { - struct sk_buff *skb; - if(sk->debug) - printk("Clean rcv queue\n"); - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) - kfree_skb(skb, FREE_READ); - if(sk->debug) - printk("Cleaned.\n"); - } + if (sk->partial) + tcp_send_partial(sk); } + /* - * Get rid off any half-completed packets. + * Timeout is not the same thing - however the code likes + * to send both the same way (sigh). */ - if (sk->partial) + if(timeout) { - tcp_send_partial(sk); + tcp_set_state(sk, TCP_CLOSE); /* Dead */ } - - switch(sk->state) + else { - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - case TCP_CLOSING: - /* - * These states occur when we have already closed out - * our end. If there is no timeout, we do not do - * anything. We may still be in the middle of sending - * the remainder of our buffer, for example... - * resetting the timer would be inappropriate. - * - * XXX if retransmit count reaches limit, is tcp_close() - * called with timeout == 1 ? if not, we need to fix that. - */ - if (!timeout) { - int timer_active; - - timer_active = del_timer(&sk->timer); - if (timer_active) - add_timer(&sk->timer); - else - reset_timer(sk, TIME_CLOSE, 4 * sk->rto); - } - if (timeout) - tcp_time_wait(sk); - release_sock(sk); - return; /* break causes a double release - messy */ - case TCP_TIME_WAIT: - case TCP_LAST_ACK: - /* - * A timeout from these states terminates the TCB. - */ - if (timeout) - { - tcp_set_state(sk,TCP_CLOSE); - } - release_sock(sk); - return; - case TCP_LISTEN: - /* we need to drop any sockets which have been connected, - but have not yet been accepted. */ - tcp_close_pending(sk, timeout); - tcp_set_state(sk,TCP_CLOSE); - release_sock(sk); - return; - case TCP_CLOSE: - release_sock(sk); - return; - case TCP_CLOSE_WAIT: - case TCP_ESTABLISHED: - case TCP_SYN_SENT: - case TCP_SYN_RECV: - prot =(struct proto *)sk->prot; - th =(struct tcphdr *)&sk->dummy_th; - buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC); - if (buff == NULL) - { - /* This will force it to try again later. */ - /* Or it would have if someone released the socket - first. Anyway it might work now */ - release_sock(sk); - if (sk->state != TCP_CLOSE_WAIT) - tcp_set_state(sk,TCP_ESTABLISHED); - reset_timer(sk, TIME_CLOSE, 100); - return; - } - buff->sk = sk; - buff->free = 1; - buff->len = sizeof(*t1); - buff->localroute = sk->localroute; - t1 =(struct tcphdr *) buff->data; - - /* - * Put in the IP header and routing stuff. - */ - tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, - sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); - if (tmp < 0) - { - sk->write_seq++; /* Very important 8) */ - kfree_skb(buff,FREE_WRITE); - - /* - * Enter FIN_WAIT1 to await completion of - * written out data and ACK to our FIN. - */ - - if(sk->state==TCP_ESTABLISHED) - tcp_set_state(sk,TCP_FIN_WAIT1); - else - tcp_set_state(sk,TCP_FIN_WAIT2); - reset_timer(sk, TIME_CLOSE,4*sk->rto); - if(timeout) - tcp_time_wait(sk); - - release_sock(sk); - return; - } - - t1 =(struct tcphdr *)((char *)t1 +tmp); - buff->len += tmp; - buff->dev = dev; - memcpy(t1, th, sizeof(*t1)); - t1->seq = ntohl(sk->write_seq); - sk->write_seq++; - buff->h.seq = sk->write_seq; - t1->ack = 1; - - /* - * Ack everything immediately from now on. - */ - - sk->delay_acks = 0; - t1->ack_seq = ntohl(sk->acked_seq); - t1->window = ntohs(sk->window=tcp_select_window(sk)); - t1->fin = 1; - t1->rst = 0; - t1->doff = sizeof(*t1)/4; - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); - - tcp_statistics.TcpOutSegs++; - - if (skb_peek(&sk->write_queue) == NULL) - { - sk->sent_seq = sk->write_seq; - prot->queue_xmit(sk, dev, buff, 0); - } - else - { - reset_timer(sk, TIME_WRITE, sk->rto); - if (buff->next != NULL) - { - printk("tcp_close: next != NULL\n"); - skb_unlink(buff); - } - skb_queue_tail(&sk->write_queue, buff); - } - - /* - * If established (normal close), enter FIN_WAIT1. - * If in CLOSE_WAIT, enter LAST_ACK - * If in CLOSING, remain in CLOSING - * otherwise enter FIN_WAIT2 - */ - - if (sk->state == TCP_ESTABLISHED) - tcp_set_state(sk,TCP_FIN_WAIT1); - else if (sk->state == TCP_CLOSE_WAIT) - tcp_set_state(sk,TCP_LAST_ACK); - else if (sk->state != TCP_CLOSING) - tcp_set_state(sk,TCP_FIN_WAIT2); + if(tcp_close_state(sk,1)==1) + { + tcp_send_fin(sk); + } } release_sock(sk); } /* - * This routine takes stuff off of the write queue, - * and puts it in the xmit queue. + * This routine takes stuff off of the write queue, + * and puts it in the xmit queue. This happens as incoming acks + * open up the remote window for us. */ -static void -tcp_write_xmit(struct sock *sk) + +static void tcp_write_xmit(struct sock *sk) { struct sk_buff *skb; @@ -2488,18 +2989,35 @@ tcp_write_xmit(struct sock *sk) if(sk->zapped) return; + /* + * Anything on the transmit queue that fits the window can + * be added providing we are not + * + * a) retransmitting (Nagle's rule) + * b) exceeding our congestion window. + */ + while((skb = skb_peek(&sk->write_queue)) != NULL && before(skb->h.seq, sk->window_seq + 1) && (sk->retransmits == 0 || - sk->timeout != TIME_WRITE || + sk->ip_xmit_timeout != TIME_WRITE || before(skb->h.seq, sk->rcv_ack_seq + 1)) && sk->packets_out < sk->cong_window) { IS_SKB(skb); skb_unlink(skb); - /* See if we really need to send the packet. */ + + /* + * See if we really need to send the packet. + */ + if (before(skb->h.seq, sk->rcv_ack_seq +1)) { + /* + * This is acked data. We can discard it. This + * cannot currently occur. + */ + sk->retransmits = 0; kfree_skb(skb, FREE_WRITE); if (!sk->dead) @@ -2528,7 +3046,18 @@ tcp_write_xmit(struct sock *sk) tcp_send_check(th, sk->saddr, sk->daddr, size, sk); sk->sent_seq = skb->h.seq; + + /* + * IP manages our queue for some crazy reason + */ + sk->prot->queue_xmit(sk, skb->dev, skb, skb->free); + + /* + * Again we slide the timer wrongly + */ + + reset_xmit_timer(sk, TIME_WRITE, sk->rto); } } } @@ -2538,7 +3067,7 @@ tcp_write_xmit(struct sock *sk) * This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) +extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) { unsigned long ack; int flag = 0; @@ -2553,20 +3082,37 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int if(sk->zapped) return(1); /* Dead, cant ack any more so why bother */ + /* + * Have we discovered a larger window + */ + ack = ntohl(th->ack_seq); + if (ntohs(th->window) > sk->max_window) { sk->max_window = ntohs(th->window); #ifdef CONFIG_INET_PCTCP + /* Hack because we don't send partial packets to non SWS + handling hosts */ sk->mss = min(sk->max_window>>1, sk->mtu); #else sk->mss = min(sk->max_window, sk->mtu); #endif } - if (sk->retransmits && sk->timeout == TIME_KEEPOPEN) + /* + * We have dropped back to keepalive timeouts. Thus we have + * no retransmits pending. + */ + + if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN) sk->retransmits = 0; + /* + * If the ack is newer than sent or older than previous acks + * then we can probably ignore it. + */ + if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) { if(sk->debug) @@ -2576,22 +3122,33 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int * Keepalive processing. */ - if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) + if (after(ack, sk->sent_seq)) { return(0); } + + /* + * Restart the keepalive timer. + */ + if (sk->keepopen) { - if(sk->timeout==TIME_KEEPOPEN) - reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + if(sk->ip_xmit_timeout==TIME_KEEPOPEN) + reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); } return(1); } + /* + * If there is data set flag 1 + */ + if (len != th->doff*4) flag |= 1; - /* See if our window has been shrunk. */ + /* + * See if our window has been shrunk. + */ if (after(sk->window_seq, ack+ntohs(th->window))) { @@ -2610,7 +3167,12 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sk->send_head = NULL; sk->send_tail = NULL; - flag |= 4; + /* + * This is an artifact of a flawed concept. We want one + * queue and a smarter send routine when we send all. + */ + + flag |= 4; /* Window changed */ sk->window_seq = ack + ntohs(th->window); cli(); @@ -2653,6 +3215,10 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sti(); } + /* + * Pipe has emptied + */ + if (sk->send_tail == NULL || sk->send_head == NULL) { sk->send_head = NULL; @@ -2660,32 +3226,39 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sk->packets_out= 0; } + /* + * Update the right hand window edge of the host + */ + sk->window_seq = ack + ntohs(th->window); - /* We don't want too many packets out there. */ - if (sk->timeout == TIME_WRITE && + /* + * We don't want too many packets out there. + */ + + if (sk->ip_xmit_timeout == TIME_WRITE && sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) { -/* - * This is Jacobson's slow start and congestion avoidance. - * SIGCOMM '88, p. 328. Because we keep cong_window in integral - * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a - * counter and increment it once every cwnd times. It's possible - * that this should be done only if sk->retransmits == 0. I'm - * interpreting "new data is acked" as including data that has - * been retransmitted but is just now being acked. - */ + /* + * This is Jacobson's slow start and congestion avoidance. + * SIGCOMM '88, p. 328. Because we keep cong_window in integral + * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a + * counter and increment it once every cwnd times. It's possible + * that this should be done only if sk->retransmits == 0. I'm + * interpreting "new data is acked" as including data that has + * been retransmitted but is just now being acked. + */ if (sk->cong_window < sk->ssthresh) - /* - * In "safe" area, increase - */ + /* + * In "safe" area, increase + */ sk->cong_window++; else { - /* - * In dangerous area, increase slowly. In theory this is - * sk->cong_window += 1 / sk->cong_window - */ + /* + * In dangerous area, increase slowly. In theory this is + * sk->cong_window += 1 / sk->cong_window + */ if (sk->cong_count >= sk->cong_window) { sk->cong_window++; @@ -2696,24 +3269,34 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int } } + /* + * Remember the highest ack received. + */ + sk->rcv_ack_seq = ack; /* - * if this ack opens up a zero window, clear backoff. It was - * being used to time the probes, and is probably far higher than - * it needs to be for normal retransmission. + * If this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than + * it needs to be for normal retransmission. */ - if (sk->timeout == TIME_PROBE0) + if (sk->ip_xmit_timeout == TIME_PROBE0) { + sk->retransmits = 0; /* Our probe was answered */ + + /* + * Was it a usable window open ? + */ + if (skb_peek(&sk->write_queue) != NULL && /* should always be non-null */ ! before (sk->window_seq, sk->write_queue.next->h.seq)) { - sk->retransmits = 0; sk->backoff = 0; - /* - * Recompute rto from rtt. this eliminates any backoff. - */ + + /* + * Recompute rto from rtt. this eliminates any backoff. + */ sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; if (sk->rto > 120*HZ) @@ -2721,13 +3304,13 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int if (sk->rto < 20) /* Was 1*HZ, then 1 - turns out we must allow about .2 of a second because of BSD delayed acks - on a 100Mb/sec link .2 of a second is going to need huge windows (SIGH) */ - sk->rto = 20; + sk->rto = 20; } } - /* - * See if we can take anything off of the retransmit queue. - */ + /* + * See if we can take anything off of the retransmit queue. + */ while(sk->send_head != NULL) { @@ -2735,6 +3318,12 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int if (sk->send_head->link3 && after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) printk("INET: tcp.c: *** bug send_list out of order.\n"); + + /* + * If our packet is before the ack sequence we can + * discard it as it's confirmed to have arrived the other end. + */ + if (before(sk->send_head->h.seq, ack+1)) { struct sk_buff *oskb; @@ -2753,7 +3342,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int * retransmissions. */ - if (sk->send_head->link3) + if (sk->send_head->link3) /* Any more queued retransmits? */ sk->retransmits = 1; else sk->retransmits = 0; @@ -2783,7 +3372,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sk->write_space(sk); oskb = sk->send_head; - if (!(flag&2)) + if (!(flag&2)) /* Not retransmitting */ { long m; @@ -2816,7 +3405,8 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sk->rto = 20; sk->backoff = 0; } - flag |= (2|4); + flag |= (2|4); /* 2 is really more like 'don't adjust the rtt + In this case as we just set it up */ cli(); oskb = sk->send_head; IS_SKB(oskb); @@ -2860,10 +3450,13 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int { if (after (sk->window_seq+1, sk->write_queue.next->h.seq) && (sk->retransmits == 0 || - sk->timeout != TIME_WRITE || + sk->ip_xmit_timeout != TIME_WRITE || before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1)) && sk->packets_out < sk->cong_window) { + /* + * Add more data to the send queue. + */ flag |= 1; tcp_write_xmit(sk); } @@ -2872,7 +3465,10 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int sk->ack_backlog == 0 && sk->state != TCP_TIME_WAIT) { - reset_timer(sk, TIME_PROBE0, sk->rto); + /* + * Data to queue but no room. + */ + reset_xmit_timer(sk, TIME_PROBE0, sk->rto); } } else @@ -2896,7 +3492,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int * keep us in TIME_WAIT until we stop getting packets, * reset the timeout. */ - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); break; case TCP_CLOSE: /* @@ -2905,44 +3501,26 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int break; default: /* - * must check send_head, write_queue, and ack_backlog - * to determine which timeout to use. + * Must check send_head, write_queue, and ack_backlog + * to determine which timeout to use. */ if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) { - reset_timer(sk, TIME_WRITE, sk->rto); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); } else if (sk->keepopen) { - reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); } else { - delete_timer(sk); + del_timer(&sk->retransmit_timer); + sk->ip_xmit_timeout = 0; } break; } -#ifdef NOTDEF - if (sk->send_head == NULL && sk->ack_backlog == 0 && - sk->state != TCP_TIME_WAIT && !sk->keepopen) - { - if (!sk->dead) - sk->write_space(sk); - if (sk->keepopen) { - reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - } else { - delete_timer(sk); - } - } - else - { - if (sk->state != (unsigned char) sk->keepopen) - { - reset_timer(sk, TIME_WRITE, sk->rto); - } - if (sk->state == TCP_TIME_WAIT) - { - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - } - } -#endif } + /* + * We have nothing queued but space to send. Send any partial + * packets immediately (end of Nagle rule application). + */ + if (sk->packets_out == 0 && sk->partial != NULL && skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) { @@ -2962,19 +3540,22 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int { if (!sk->dead) sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) + if(sk->debug) + printk("rcv_ack_seq: %lX==%lX, acked_seq: %lX==%lX\n", + sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq); + if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) { flag |= 1; - tcp_time_wait(sk); + tcp_set_state(sk,TCP_CLOSE); sk->shutdown = SHUTDOWN_MASK; } } /* - * Incoming ACK to a FIN we sent in the case of our initiating the close. + * Incoming ACK to a FIN we sent in the case of our initiating the close. * - * Move to FIN_WAIT2 to await a FIN from the other end. Set - * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in. + * Move to FIN_WAIT2 to await a FIN from the other end. Set + * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in. */ if (sk->state == TCP_FIN_WAIT1) @@ -3007,7 +3588,26 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int tcp_time_wait(sk); } } - + + /* + * Final ack of a three way shake + */ + + if(sk->state==TCP_SYN_RECV) + { + tcp_set_state(sk, TCP_ESTABLISHED); + tcp_options(sk,th); + sk->dummy_th.dest=th->source; + sk->copied_seq = sk->acked_seq; + if(!sk->dead) + sk->state_change(sk); + if(sk->max_window==0) + { + sk->max_window=32; /* Sanity check */ + sk->mss=min(sk->max_window,sk->mtu); + } + } + /* * I make no guarantees about the first clause in the following * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under @@ -3032,7 +3632,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int * It's possible that there should also be a test for TIME_WRITE, but * I think as long as "send_head != NULL" and "retransmit" is on, we've * got to be in real retransmission mode. - * Note that ip_do_retransmit is called with all==1. Setting cong_window + * Note that tcp_do_retransmit is called with all==1. Setting cong_window * back to 1 at the timeout will cause us to send 1, then 2, etc. packets. * As long as no further losses occur, this seems reasonable. */ @@ -3041,8 +3641,13 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int (((flag&2) && sk->retransmits) || (sk->send_head->when + sk->rto < jiffies))) { - ip_do_retransmit(sk, 1); - reset_timer(sk, TIME_WRITE, sk->rto); + if(sk->send_head->when + sk->rto < jiffies) + tcp_retransmit(sk,0); + else + { + tcp_do_retransmit(sk, 1); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } } return(1); @@ -3050,30 +3655,128 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int /* + * Process the FIN bit. This now behaves as it is supposed to work + * and the FIN takes effect when it is validly part of sequence + * space. Not before when we get holes. + * + * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT + * (and thence onto LAST-ACK and finally, CLOSE, we never enter + * TIME-WAIT) + * + * If we are in FINWAIT-1, a received FIN indicates simultaneous + * close and we go into CLOSING (and later onto TIME-WAIT) + * + * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. + * + */ + +static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) +{ + sk->fin_seq = th->seq + skb->len + th->syn + th->fin; + + if (!sk->dead) + { + sk->state_change(sk); + sock_wake_async(sk->socket, 1); + } + + switch(sk->state) + { + case TCP_SYN_RECV: + case TCP_SYN_SENT: + case TCP_ESTABLISHED: + /* + * move to CLOSE_WAIT, tcp_data() already handled + * sending the ack. + */ + tcp_set_state(sk,TCP_CLOSE_WAIT); + if (th->rst) + sk->shutdown = SHUTDOWN_MASK; + break; + + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + /* + * received a retransmission of the FIN, do + * nothing. + */ + break; + case TCP_TIME_WAIT: + /* + * received a retransmission of the FIN, + * restart the TIME_WAIT timer. + */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + return(0); + case TCP_FIN_WAIT1: + /* + * This case occurs when a simultaneous close + * happens, we must ack the received FIN and + * enter the CLOSING state. + * + * This causes a WRITE timeout, which will either + * move on to TIME_WAIT when we timeout, or resend + * the FIN properly (maybe we get rid of that annoying + * FIN lost hang). The TIME_WRITE code is already correct + * for handling this timeout. + */ + + if(sk->ip_xmit_timeout != TIME_WRITE) + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + tcp_set_state(sk,TCP_CLOSING); + break; + case TCP_FIN_WAIT2: + /* + * received a FIN -- send ACK and enter TIME_WAIT + */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + sk->shutdown|=SHUTDOWN_MASK; + tcp_set_state(sk,TCP_TIME_WAIT); + break; + case TCP_CLOSE: + /* + * already in CLOSE + */ + break; + default: + tcp_set_state(sk,TCP_LAST_ACK); + + /* Start the timers. */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + return(0); + } + + return(0); +} + + + +/* * This routine handles the data. If there is room in the buffer, * it will be have already been moved into it. If there is no * room, then we will just have to discard the packet. */ -static int tcp_data(struct sk_buff *skb, struct sock *sk, +extern __inline__ int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned long saddr, unsigned short len) { struct sk_buff *skb1, *skb2; struct tcphdr *th; int dup_dumped=0; unsigned long new_seq; - struct sk_buff *tail; unsigned long shut_seq; th = skb->h.th; skb->len = len -(th->doff*4); - /* The bytes in the receive read/assembly queue has increased. Needed for the - low memory discard algorithm */ + /* + * The bytes in the receive read/assembly queue has increased. Needed for the + * low memory discard algorithm + */ sk->bytes_rcv += skb->len; - if (skb->len == 0 && !th->fin && !th->urg && !th->psh) + if (skb->len == 0 && !th->fin) { /* * Don't want to keep passing ack's back and forth. @@ -3089,63 +3792,68 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, * We no longer have anyone receiving data on this connection. */ +#ifndef TCP_DONT_RST_SHUTDOWN + if(sk->shutdown & RCV_SHUTDOWN) { - new_seq= th->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */ - /* - * This is subtle and not nice. When we shut down we can - * have data in the queue and acked_seq therefore not - * pointing to the last byte that will be read. Thus - * the naive implementation: - * after(new_seq,sk->acked_seq+1) - * will cause bogus resets IFF a resend of a frame that has - * been queued but not yet read after a shutdown has been done. - * What we do now is a bit more complex but works as - * follows. If the queue is empty copied_seq+1 is right (+1 for FIN) - * if the queue has data the shutdown occurs at the right edge of - * the last packet queued +1 - * - * We can't simply ack data beyond this point as it has - * and will never be received by an application. + * FIXME: BSD has some magic to avoid sending resets to + * broken 4.2 BSD keepalives. Much to my surprise a few non + * BSD stacks still have broken keepalives so we want to + * cope with it. */ - tail=skb_peek(&sk->receive_queue); - if(tail!=NULL) - { - tail=sk->receive_queue.prev; - shut_seq=tail->h.th->seq+tail->len+1; - } - else - shut_seq=sk->copied_seq+1; - - if(after(new_seq,shut_seq)) + + if(skb->len) /* We don't care if it's just an ack or + a keepalive/window probe */ { - sk->acked_seq = new_seq + th->fin; - if(sk->debug) - printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n", - sk, new_seq, shut_seq, sk->blog); - tcp_reset(sk->saddr, sk->daddr, skb->h.th, - sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); - tcp_statistics.TcpEstabResets++; - tcp_set_state(sk,TCP_CLOSE); - sk->err = EPIPE; - sk->shutdown = SHUTDOWN_MASK; - kfree_skb(skb, FREE_READ); - if (!sk->dead) - sk->state_change(sk); - return(0); + new_seq= th->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */ + + /* Do this the way 4.4BSD treats it. Not what I'd + regard as the meaning of the spec but it's what BSD + does and clearly they know everything 8) */ + + /* + * This is valid because of two things + * + * a) The way tcp_data behaves at the bottom. + * b) A fin takes effect when read not when received. + */ + + shut_seq=sk->acked_seq+1; /* Last byte */ + + if(after(new_seq,shut_seq)) + { + if(sk->debug) + printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n", + sk, new_seq, shut_seq, sk->blog); + if(sk->dead) + { + sk->acked_seq = new_seq + th->fin; + tcp_reset(sk->saddr, sk->daddr, skb->h.th, + sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); + tcp_statistics.TcpEstabResets++; + tcp_set_state(sk,TCP_CLOSE); + sk->err = EPIPE; + sk->shutdown = SHUTDOWN_MASK; + kfree_skb(skb, FREE_READ); + return 0; + } + } } } + +#endif + /* * Now we have to walk the chain, and figure out where this one * goes into it. This is set up so that the last packet we received * will be the first one we look at, that way if everything comes * in order, there will be no performance loss, and if they come * out of order we will be able to fit things in nicely. - */ - - /* - * This should start at the last one, and then go around forwards. + * + * [AC: This is wrong. We should assume in order first and then walk + * forwards from the first hole based upon real traffic patterns.] + * */ if (skb_peek(&sk->receive_queue) == NULL) /* Empty queue is easy case */ @@ -3222,7 +3930,9 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, } /* - * Now figure out if we can ack anything. + * Now figure out if we can ack anything. This is very messy because we really want two + * receive queues, a completed and an assembly queue. We also want only one transmit + * queue. */ if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) @@ -3241,15 +3951,14 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, } skb->acked = 1; - /* - * When we ack the fin, we turn on the RCV_SHUTDOWN flag. + /* + * When we ack the fin, we do the FIN + * processing. */ if (skb->h.th->fin) { - if (!sk->dead) - sk->state_change(sk); - sk->shutdown |= RCV_SHUTDOWN; + tcp_fin(skb,sk,skb->h.th); } for(skb2 = skb->next; @@ -3269,14 +3978,12 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, } skb2->acked = 1; /* - * When we ack the fin, we turn on - * the RCV_SHUTDOWN flag. + * When we ack the fin, we do + * the fin handling. */ if (skb2->h.th->fin) { - sk->shutdown |= RCV_SHUTDOWN; - if (!sk->dead) - sk->state_change(sk); + tcp_fin(skb,sk,skb->h.th); } /* @@ -3305,7 +4012,7 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, sk->ack_backlog++; if(sk->debug) printk("Ack queued.\n"); - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); } } } @@ -3348,11 +4055,10 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, } tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); sk->ack_backlog++; - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); } else { - /* We missed a packet. Send an ack to try to resync things. */ tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); } @@ -3370,6 +4076,14 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, } +/* + * This routine is only called when we have urgent data + * signalled. Its the 'slow' part of tcp_urg. It could be + * moved inline now as tcp_urg is only called from one + * place. We handle URGent data wrong. We have to - as + * BSD still doesn't use the correction from RFC961. + */ + static void tcp_check_urg(struct sock * sk, struct tcphdr * th) { unsigned long ptr = ntohs(th->urg_ptr); @@ -3379,7 +4093,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) ptr += th->seq; /* ignore urgent data that we've already seen and read */ - if (after(sk->copied_seq+1, ptr)) + if (after(sk->copied_seq, ptr)) return; /* do we already have a newer (or duplicate) urgent pointer? */ @@ -3398,125 +4112,52 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) sk->urg_seq = ptr; } -static inline int tcp_urg(struct sock *sk, struct tcphdr *th, +/* + * This is the 'fast' part of urgent handling. + */ + +extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long saddr, unsigned long len) { unsigned long ptr; - /* check if we get a new urgent pointer */ + /* + * Check if we get a new urgent pointer - normally not + */ + if (th->urg) tcp_check_urg(sk,th); - /* do we wait for any urgent data? */ + /* + * Do we wait for any urgent data? - normally not + */ + if (sk->urg_data != URG_NOTYET) return 0; - /* is the urgent pointer pointing into this packet? */ + /* + * Is the urgent pointer pointing into this packet? + */ + ptr = sk->urg_seq - th->seq + th->doff*4; if (ptr >= len) return 0; - /* ok, got the correct packet, update info */ + /* + * Ok, got the correct packet, update info + */ + sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th); if (!sk->dead) sk->data_ready(sk,0); return 0; } - /* - * This deals with incoming fins. 'Linus at 9 O'clock' 8-) - * - * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT - * (and thence onto LAST-ACK and finally, CLOSE, we never enter - * TIME-WAIT) - * - * If we are in FINWAIT-1, a received FIN indicates simultaneous - * close and we go into CLOSING (and later onto TIME-WAIT) - * - * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. - * + * This will accept the next outstanding connection. */ -static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, - unsigned long saddr, struct device *dev) -{ - sk->fin_seq = th->seq + skb->len + th->syn + th->fin; - - if (!sk->dead) - { - sk->state_change(sk); - } - - switch(sk->state) - { - case TCP_SYN_RECV: - case TCP_SYN_SENT: - case TCP_ESTABLISHED: - /* - * move to CLOSE_WAIT, tcp_data() already handled - * sending the ack. - */ - reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN); - tcp_set_state(sk,TCP_CLOSE_WAIT); - if (th->rst) - sk->shutdown = SHUTDOWN_MASK; - break; - - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - /* - * received a retransmission of the FIN, do - * nothing. - */ - break; - case TCP_TIME_WAIT: - /* - * received a retransmission of the FIN, - * restart the TIME_WAIT timer. - */ - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - return(0); - case TCP_FIN_WAIT1: - /* - * This case occurs when a simultaneous close - * happens, we must ack the received FIN and - * enter the CLOSING state. - * - * XXX timeout not set properly - */ - - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - tcp_set_state(sk,TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - /* - * received a FIN -- send ACK and enter TIME_WAIT - */ - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - sk->shutdown|=SHUTDOWN_MASK; - tcp_set_state(sk,TCP_TIME_WAIT); - break; - case TCP_CLOSE: - /* - * already in CLOSE - */ - break; - default: - tcp_set_state(sk,TCP_LAST_ACK); - - /* Start the timers. */ - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - return(0); - } - sk->ack_backlog++; - - return(0); -} - - -/* This will accept the next outstanding connection. */ -static struct sock * -tcp_accept(struct sock *sk, int flags) + +static struct sock *tcp_accept(struct sock *sk, int flags) { struct sock *newsk; struct sk_buff *skb; @@ -3586,8 +4227,10 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) struct rtable *rt; if (sk->state != TCP_CLOSE) + { return(-EISCONN); - + } + if (addr_len < 8) return(-EINVAL); @@ -3610,7 +4253,7 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) sk->inuse = 1; sk->daddr = usin->sin_addr.s_addr; - sk->write_seq = jiffies * SEQ_TICK - seq_offset; + sk->write_seq = tcp_init_seq(); sk->window_seq = sk->write_seq; sk->rcv_ack_seq = sk->write_seq -1; sk->err = 0; @@ -3625,7 +4268,7 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) sk->inuse = 1; buff->len = 24; buff->sk = sk; - buff->free = 1; + buff->free = 0; buff->localroute = sk->localroute; t1 = (struct tcphdr *) buff->data; @@ -3714,12 +4357,19 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) * This must go first otherwise a really quick response will get reset. */ + tcp_cache_zap(); tcp_set_state(sk,TCP_SYN_SENT); - sk->rto = TCP_TIMEOUT_INIT; - reset_timer(sk, TIME_WRITE, sk->rto); /* Timer for repeating the SYN until an answer */ - sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES; + if(rt&&rt->rt_flags&RTF_IRTT) + sk->rto = rt->rt_irtt; + else + sk->rto = TCP_TIMEOUT_INIT; + sk->retransmit_timer.function=&retransmit_timer; + sk->retransmit_timer.data = (unsigned long)sk; + reset_xmit_timer(sk, TIME_WRITE, sk->rto); /* Timer for repeating the SYN until an answer */ + sk->retransmits = TCP_SYN_RETRIES; sk->prot->queue_xmit(sk, dev, buff, 0); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); tcp_statistics.TcpActiveOpens++; tcp_statistics.TcpOutSegs++; @@ -3729,8 +4379,7 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) /* This functions checks to see if the tcp header is actually acceptable. */ -static int -tcp_sequence(struct sock *sk, struct tcphdr *th, short len, +extern __inline__ int tcp_sequence(struct sock *sk, struct tcphdr *th, short len, struct options *opt, unsigned long saddr, struct device *dev) { unsigned long next_seq; @@ -3771,7 +4420,8 @@ ignore_it: * connect again and it will work (with luck). */ - if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) { + if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) + { tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl); return 1; } @@ -3781,42 +4431,50 @@ ignore_it: return 0; } - -#ifdef TCP_FASTPATH /* - * Is the end of the queue clear of fragments as yet unmerged into the data stream - * Yes if - * a) The queue is empty - * b) The last frame on the queue has the acked flag set + * When we get a reset we do this. */ -static inline int tcp_clean_end(struct sock *sk) +static int tcp_std_reset(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *skb=skb_peek(&sk->receive_queue); - if(skb==NULL || sk->receive_queue.prev->acked) - return 1; + sk->zapped = 1; + sk->err = ECONNRESET; + if (sk->state == TCP_SYN_SENT) + sk->err = ECONNREFUSED; + if (sk->state == TCP_CLOSE_WAIT) + sk->err = EPIPE; +#ifdef TCP_DO_RFC1337 + /* + * Time wait assassination protection [RFC1337] + */ + if(sk->state!=TCP_TIME_WAIT) + { + tcp_set_state(sk,TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + } +#else + tcp_set_state(sk,TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; +#endif + if (!sk->dead) + sk->state_change(sk); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); } -#endif - -int -tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, +/* + * A TCP packet has arrived. + */ + +int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, unsigned long daddr, unsigned short len, unsigned long saddr, int redo, struct inet_protocol * protocol) { struct tcphdr *th; struct sock *sk; - - if (!skb) - { - return(0); - } - - if (!dev) - { - return(0); - } - + int syn_ok=0; + tcp_statistics.TcpInSegs++; if(skb->pkt_type!=PACKET_HOST) @@ -3828,17 +4486,31 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, th = skb->h.th; /* - * Find the socket. + * Find the socket, using the last hit cache if applicable. */ - sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); + if(saddr==th_cache_saddr && daddr==th_cache_daddr && th->dest==th_cache_dport && th->source==th_cache_sport) + sk=(struct sock *)th_cache_sk; + else + { + sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); + th_cache_saddr=saddr; + th_cache_daddr=daddr; + th_cache_dport=th->dest; + th_cache_sport=th->source; + th_cache_sk=sk; + } /* - * If this socket has got a reset its to all intents and purposes - * really dead + * If this socket has got a reset it's to all intents and purposes + * really dead. Count closed sockets as dead. + * + * Note: BSD appears to have a bug here. A 'closed' TCP in BSD + * simply drops data. This seems incorrect as a 'closed' TCP doesn't + * exist so should cause resets as if the port was unreachable. */ - if (sk!=NULL && sk->zapped) + if (sk!=NULL && (sk->zapped || sk->state==TCP_CLOSE)) sk=NULL; if (!redo) @@ -3848,8 +4520,8 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, skb->sk = NULL; kfree_skb(skb,FREE_READ); /* - * We don't release the socket because it was - * never marked in use. + * We don't release the socket because it was + * never marked in use. */ return(0); } @@ -3858,15 +4530,19 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, /* See if we know about the socket. */ if (sk == NULL) { - if (!th->rst) - tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + /* + * No such TCB. If th->rst is 0 send a reset (checked in tcp_reset) + */ + tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); skb->sk = NULL; + /* + * Discard frame + */ kfree_skb(skb, FREE_READ); return(0); } skb->len = len; - skb->sk = sk; skb->acked = 0; skb->used = 0; skb->free = 0; @@ -3886,8 +4562,11 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, } else { - if (!sk) + if (sk==NULL) { + tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + skb->sk = NULL; + kfree_skb(skb, FREE_READ); return(0); } } @@ -3895,6 +4574,7 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, if (!sk->prot) { + printk("IMPOSSIBLE 3\n"); return(0); } @@ -3905,335 +4585,133 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) { - skb->sk = NULL; kfree_skb(skb, FREE_READ); release_sock(sk); return(0); } + skb->sk=sk; sk->rmem_alloc += skb->mem_len; -#ifdef TCP_FASTPATH -/* - * Incoming data stream fastpath. - * - * We try to optimise two things. - * 1) Spot general data arriving without funny options and skip extra checks and the switch. - * 2) Spot the common case in raw data receive streams of a packet that has no funny options, - * fits exactly on the end of the current queue and may or may not have the ack bit set. - * - * Case two especially is done inline in this routine so there are no long jumps causing heavy - * cache thrashing, no function call overhead (except for the ack sending if needed) and for - * speed although further optimizing here is possible. - */ - - /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */ - if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst) - { - /* Packets in order. Fits window */ - if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk)) + /* + * This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We + * don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug + * compatibility. We also set up variables more thoroughly [Karn notes in the + * KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths]. + */ + + if(sk->state!=TCP_ESTABLISHED) /* Skip this lot for normal flow */ + { + + /* + * Now deal with unusual cases. + */ + + if(sk->state==TCP_LISTEN) { - /* Ack is harder */ - if(th->ack && !tcp_ack(sk, th, saddr, len)) + if(th->ack) /* These use the socket TOS.. might want to be the received TOS */ + tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl); + + /* + * We don't care for RST, and non SYN are absorbed (old segments) + * Broadcast/multicast SYN isn't allowed. Note - bug if you change the + * netmask on a running connection it can go broadcast. Even Sun's have + * this problem so I'm ignoring it + */ + + if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR) { kfree_skb(skb, FREE_READ); release_sock(sk); return 0; } + + /* + * Guess we need to make a new socket up + */ + + tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq()); + /* - * Set up variables + * Now we have several options: In theory there is nothing else + * in the frame. KA9Q has an option to send data with the syn, + * BSD accepts data with the syn up to the [to be] advertised window + * and Solaris 2.1 gives you a protocol error. For now we just ignore + * it, that fits the spec precisely and avoids incompatibilities. It + * would be nice in future to drop through and process the data. */ - skb->len -= (th->doff *4); - sk->bytes_rcv += skb->len; - tcp_rx_hit2++; - if(skb->len) - { - skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */ - if(sk->window >= skb->len) - sk->window-=skb->len; /* We know its effect on the window */ - else - sk->window=0; - sk->acked_seq = th->seq+skb->len; /* Easy */ - skb->acked=1; /* Guaranteed true */ - if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || - sk->bytes_rcv > sk->max_unacked) - { - tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr); - } - else - { - sk->ack_backlog++; - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); - } - if(!sk->dead) - sk->data_ready(sk,0); - release_sock(sk); - return 0; - } - } - /* - * More generic case of arriving data stream in ESTABLISHED - */ - tcp_rx_hit1++; - if(!tcp_sequence(sk, th, len, opt, saddr, dev)) - { - kfree_skb(skb, FREE_READ); + release_sock(sk); return 0; } - if(th->ack && !tcp_ack(sk, th, saddr, len)) + + /* retransmitted SYN? */ + if (sk->state == TCP_SYN_RECV && th->syn && th->seq+1 == sk->acked_seq) { kfree_skb(skb, FREE_READ); release_sock(sk); return 0; } - if(tcp_data(skb, sk, saddr, len)) - kfree_skb(skb, FREE_READ); - release_sock(sk); - return 0; - } - tcp_rx_miss++; -#endif - - /* - * Now deal with all cases. - */ - - switch(sk->state) - { - + /* - * This should close the system down if it's waiting - * for an ack that is never going to be sent. + * SYN sent means we have to look for a suitable ack and either reset + * for bad matches or go to connected */ - case TCP_LAST_ACK: - if (th->rst) - { - sk->zapped=1; - sk->err = ECONNRESET; - tcp_set_state(sk,TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) - { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - case TCP_ESTABLISHED: - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - case TCP_TIME_WAIT: - - /* - * is it a good packet? - */ - - if (!tcp_sequence(sk, th, len, opt, saddr,dev)) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (th->rst) + + if(sk->state==TCP_SYN_SENT) + { + /* Crossed SYN or previous junk segment */ + if(th->ack) { - tcp_statistics.TcpEstabResets++; - sk->zapped=1; - /* This means the thing should really be closed. */ - sk->err = ECONNRESET; - if (sk->state == TCP_CLOSE_WAIT) + /* We got an ack, but it's not a good ack */ + if(!tcp_ack(sk,th,saddr,len)) { - sk->err = EPIPE; - } - - /* - * A reset with a fin just means that - * the data was not all read. - */ - tcp_set_state(sk,TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) - { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (th->syn) - { - long seq=sk->write_seq; - int st=sk->state; - tcp_statistics.TcpEstabResets++; - sk->err = ECONNRESET; - tcp_set_state(sk,TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - if(sk->debug) - printk("Socket %p reset by SYN while established.\n", sk); - if (!sk->dead) { - sk->state_change(sk); - } - /* - * The BSD port reuse protocol violation. - * I do sometimes wonder how the *bsd people - * have the nerve to talk about 'standards'. - * - * If seq > last used on connection then - * open a new connection and use 128000+seq of - * old connection. - * - */ - - if(st==TCP_TIME_WAIT && th->seq > sk->acked_seq && sk->dead) - { - struct sock *psk=sk; - /* - * Find the listening socket. - */ - sk=get_sock(&tcp_prot, th->source, daddr, th->dest, saddr); - if(sk && sk->state==TCP_LISTEN) - { - sk->inuse=1; - tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); - release_sock(psk); - /* Fall through in case people are - also using the piggy backed SYN + data - protocol violation */ - } - else - { - tcp_reset(daddr, saddr, th, psk->prot, opt,dev, psk->ip_tos,psk->ip_ttl); - release_sock(psk); - kfree_skb(skb, FREE_READ); - return 0; - } - } - else - { - tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl); + /* Reset the ack - its an ack from a + different connection [ th->rst is checked in tcp_reset()] */ + tcp_statistics.TcpAttemptFails++; + tcp_reset(daddr, saddr, th, + sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); kfree_skb(skb, FREE_READ); release_sock(sk); return(0); } - } - if (th->ack && !tcp_ack(sk, th, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (tcp_urg(sk, th, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - - if (tcp_data(skb, sk, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - release_sock(sk); - return(0); - - - case TCP_CLOSE: - if (sk->dead || sk->daddr) { - kfree_skb(skb, FREE_READ); + if(th->rst) + return tcp_std_reset(sk,skb); + if(!th->syn) + { + /* A valid ack from a different connection + start. Shouldn't happen but cover it */ + kfree_skb(skb, FREE_READ); release_sock(sk); - return(0); - } - - if (!th->rst) { - if (!th->ack) - th->ack_seq = 0; - if(sk->debug) printk("Reset on closed socket %d.\n",sk->blog); - tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - - case TCP_LISTEN: - if (th->rst) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (th->ack) { - if(sk->debug) printk("Reset on listening socket %d.\n",sk->blog); - tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (th->syn) - { + return 0; + } /* - * Now we just put the whole thing including - * the header and saddr, and protocol pointer - * into the buffer. We can't respond until the - * user tells us to accept the connection. + * Ok.. it's good. Set up sequence numbers and + * move to established. */ - tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq()); - release_sock(sk); - return(0); - } - - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - - case TCP_SYN_RECV: - if (th->syn) { - /* Probably a retransmitted syn */ - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - - default: - if (!tcp_sequence(sk, th, len, opt, saddr,dev)) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - case TCP_SYN_SENT: - if (th->rst) - { - tcp_statistics.TcpAttemptFails++; - sk->err = ECONNREFUSED; - tcp_set_state(sk,TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - sk->zapped = 1; - if (!sk->dead) + syn_ok=1; /* Don't reset this connection for the syn */ + sk->acked_seq=th->seq+1; + sk->fin_seq=th->seq; + tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr); + tcp_set_state(sk, TCP_ESTABLISHED); + tcp_options(sk,th); + sk->dummy_th.dest=th->source; + sk->copied_seq = sk->acked_seq; + if(!sk->dead) { sk->state_change(sk); + sock_wake_async(sk->socket, 0); + } + if(sk->max_window==0) + { + sk->max_window = 32; + sk->mss = min(sk->max_window, sk->mtu); } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); } - if (!th->ack) + else { - if (th->syn) + /* See if SYN's cross. Drop if boring */ + if(th->syn && !th->rst) { /* Crossed SYN's are fine - but talking to yourself is right out... */ @@ -4242,155 +4720,150 @@ tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, sk->dummy_th.dest==th->dest) { tcp_statistics.TcpAttemptFails++; - sk->err = ECONNREFUSED; - tcp_set_state(sk,TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - sk->zapped = 1; - if (!sk->dead) - { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); + return tcp_std_reset(sk,skb); } tcp_set_state(sk,TCP_SYN_RECV); - } + + /* + * FIXME: + * Must send SYN|ACK here + */ + } + /* Discard junk segment */ kfree_skb(skb, FREE_READ); release_sock(sk); - return(0); + return 0; } + /* + * SYN_RECV with data maybe.. drop through + */ + goto rfc_step6; + } + + /* + * BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is + * a more complex suggestion for fixing these reuse issues in RFC1644 + * but not yet ready for general use. Also see RFC1379. + */ - switch(sk->state) +#define BSD_TIME_WAIT +#ifdef BSD_TIME_WAIT + if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && + after(th->seq, sk->acked_seq) && !th->rst) + { + long seq=sk->write_seq; + if(sk->debug) + printk("Doing a BSD time wait\n"); + tcp_statistics.TcpEstabResets++; + sk->rmem_alloc -= skb->mem_len; + skb->sk = NULL; + sk->err=ECONNRESET; + tcp_set_state(sk, TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + release_sock(sk); + sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); + if (sk && sk->state==TCP_LISTEN) { - case TCP_SYN_SENT: - if (!tcp_ack(sk, th, saddr, len)) - { - tcp_statistics.TcpAttemptFails++; - tcp_reset(daddr, saddr, th, - sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - /* - * If the syn bit is also set, switch to - * tcp_syn_recv, and then to established. - */ - if (!th->syn) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - /* Ack the syn and fall through. */ - sk->acked_seq = th->seq+1; - sk->fin_seq = th->seq; - tcp_send_ack(sk->sent_seq, th->seq+1, - sk, th, sk->daddr); - - case TCP_SYN_RECV: - if (!tcp_ack(sk, th, saddr, len)) - { - tcp_statistics.TcpAttemptFails++; - tcp_reset(daddr, saddr, th, - sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - tcp_set_state(sk,TCP_ESTABLISHED); + sk->inuse=1; + skb->sk = sk; + sk->rmem_alloc += skb->mem_len; + tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); + release_sock(sk); + return 0; + } + kfree_skb(skb, FREE_READ); + return 0; + } +#endif + } + + /* + * We are now in normal data flow (see the step list in the RFC) + * Note most of these are inline now. I'll inline the lot when + * I have time to test it hard and look at what gcc outputs + */ - /* - * Now we need to finish filling out - * some of the tcp header. - * - * We need to check for mtu info. - */ - tcp_options(sk, th); - sk->dummy_th.dest = th->source; - sk->copied_seq = sk->acked_seq-1; - if (!sk->dead) - { - sk->state_change(sk); - } + if(!tcp_sequence(sk,th,len,opt,saddr,dev)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + if(th->rst) + return tcp_std_reset(sk,skb); - /* - * We've already processed his first - * ack. In just about all cases that - * will have set max_window. This is - * to protect us against the possibility - * that the initial window he sent was 0. - * This must occur after tcp_options, which - * sets sk->mtu. - */ - if (sk->max_window == 0) - { - sk->max_window = 32; - sk->mss = min(sk->max_window, sk->mtu); - } + /* + * !syn_ok is effectively the state test in RFC793. + */ + + if(th->syn && !syn_ok) + { + tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255); + return tcp_std_reset(sk,skb); + } - /* - * Now process the rest like we were - * already in the established state. - */ - if (th->urg) - { - if (tcp_urg(sk, th, saddr, len)) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - } - if (tcp_data(skb, sk, saddr, len)) - kfree_skb(skb, FREE_READ); + /* + * Process the ACK + */ + - if (th->fin) - tcp_fin(skb, sk, th, saddr, dev); - release_sock(sk); - return(0); - } + if(th->ack && !tcp_ack(sk,th,saddr,len)) + { + /* + * Our three way handshake failed. + */ + + if(sk->state==TCP_SYN_RECV) + { + tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); + } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } - if (th->urg) - { - if (tcp_urg(sk, th, saddr, len)) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - } - if (tcp_data(skb, sk, saddr, len)) - { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } +rfc_step6: /* I'll clean this up later */ + + /* + * Process urgent data + */ + + if(tcp_urg(sk, th, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } - if (!th->fin) - { - release_sock(sk); - return(0); - } - tcp_fin(skb, sk, th, saddr, dev); - release_sock(sk); - return(0); + + /* + * Process the encapsulated data + */ + + if(tcp_data(skb,sk, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; } -} + /* + * And done + */ + + release_sock(sk); + return 0; +} /* - * This routine sends a packet with an out of date sequence - * number. It assumes the other end will try to ack it. + * This routine sends a packet with an out of date sequence + * number. It assumes the other end will try to ack it. */ static void tcp_write_wakeup(struct sock *sk) { - struct sk_buff *buff; + struct sk_buff *buff,*skb; struct tcphdr *t1; struct device *dev=NULL; int tmp; @@ -4399,8 +4872,9 @@ static void tcp_write_wakeup(struct sock *sk) return; /* After a valid reset we can send no more */ /* - * Write data can still be transmitted/retransmitted in the - * following states. If any other state is encountered, return. + * Write data can still be transmitted/retransmitted in the + * following states. If any other state is encountered, return. + * [listen/close will never occur here anyway] */ if (sk->state != TCP_ESTABLISHED && @@ -4408,62 +4882,174 @@ static void tcp_write_wakeup(struct sock *sk) sk->state != TCP_FIN_WAIT1 && sk->state != TCP_LAST_ACK && sk->state != TCP_CLOSING - ) { + ) + { return; } - buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); - if (buff == NULL) - return; + if (before(sk->sent_seq, sk->window_seq) && + (skb=skb_peek(&sk->write_queue))) + { + /* + * We are probing the opening of a window + * but the window size is != 0 + * must have been a result SWS advoidance ( sender ) + */ + + struct iphdr *iph; + struct tcphdr *th; + struct tcphdr *nth; + unsigned long win_size, ow_size; + void * tcp_data_start; + + win_size = sk->window_seq - sk->sent_seq; + + iph = (struct iphdr *)(skb->data + skb->dev->hard_header_len); + th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2)); + + buff = sk->prot->wmalloc(sk, win_size + th->doff * 4 + + (iph->ihl << 2) + + skb->dev->hard_header_len, + 1, GFP_ATOMIC); + if ( buff == NULL ) + return; + + buff->len = 0; + + /* + * If we strip the packet on the write queue we must + * be ready to retransmit this one + */ + + buff->free = 0; + + buff->sk = sk; + buff->localroute = sk->localroute; + + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, buff->mem_len, + sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + return; + } - buff->len = sizeof(struct tcphdr); - buff->free = 1; - buff->sk = sk; - buff->localroute = sk->localroute; + buff->len += tmp; + buff->dev = dev; - t1 = (struct tcphdr *) buff->data; + nth = (struct tcphdr *) (buff->data + buff->len); + buff->len += th->doff * 4; - /* Put in the IP header and routing stuff. */ - tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); - if (tmp < 0) - { - sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); - return; + memcpy(nth, th, th->doff * 4); + + nth->ack = 1; + nth->ack_seq = ntohl(sk->acked_seq); + nth->window = ntohs(tcp_select_window(sk)); + nth->check = 0; + + tcp_data_start = skb->data + skb->dev->hard_header_len + + (iph->ihl << 2) + th->doff * 4; + + memcpy(buff->data + buff->len, tcp_data_start, win_size); + buff->len += win_size; + buff->h.seq = sk->sent_seq + win_size; + + /* + * now: shrink the queue head segment + */ + + th->check = 0; + ow_size = skb->len - win_size - + ((unsigned long) (tcp_data_start - (void *) skb->data)); + + memmove(tcp_data_start, tcp_data_start + win_size, ow_size); + skb->len -= win_size; + sk->sent_seq += win_size; + th->seq = htonl(sk->sent_seq); + + if (th->urg) + { + unsigned short urg_ptr; + + urg_ptr = ntohs(th->urg_ptr); + if (urg_ptr <= win_size) + th->urg = 0; + else + { + urg_ptr -= win_size; + th->urg_ptr = htons(urg_ptr); + nth->urg_ptr = htons(win_size); + } + } + + tcp_send_check(nth, sk->saddr, sk->daddr, + nth->doff * 4 + win_size , sk); } + else + { + buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); + if (buff == NULL) + return; - buff->len += tmp; - t1 = (struct tcphdr *)((char *)t1 +tmp); + buff->len = sizeof(struct tcphdr); + buff->free = 1; + buff->sk = sk; + buff->localroute = sk->localroute; - memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); + t1 = (struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + return; + } + + buff->len += tmp; + t1 = (struct tcphdr *)((char *)t1 +tmp); + + memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); + + /* + * Use a previous sequence. + * This should cause the other end to send an ack. + */ + + t1->seq = htonl(sk->sent_seq-1); + t1->ack = 1; + t1->res1= 0; + t1->res2= 0; + t1->rst = 0; + t1->urg = 0; + t1->psh = 0; + t1->fin = 0; /* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */ + t1->syn = 0; + t1->ack_seq = ntohl(sk->acked_seq); + t1->window = ntohs(tcp_select_window(sk)); + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); + + } /* - * Use a previous sequence. - * This should cause the other end to send an ack. + * Send it. */ - t1->seq = htonl(sk->sent_seq-1); - t1->ack = 1; - t1->res1= 0; - t1->res2= 0; - t1->rst = 0; - t1->urg = 0; - t1->psh = 0; - t1->fin = 0; - t1->syn = 0; - t1->ack_seq = ntohl(sk->acked_seq); - t1->window = ntohs(tcp_select_window(sk)); - t1->doff = sizeof(*t1)/4; - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); - - /* Send it and free it. - * This will prevent the timer from automatically being restarted. - */ + sk->prot->queue_xmit(sk, dev, buff, 1); tcp_statistics.TcpOutSegs++; } -void -tcp_send_probe0(struct sock *sk) +/* + * A window probe timeout has occurred. + */ + +void tcp_send_probe0(struct sock *sk) { if (sk->zapped) return; /* After a valid reset we can send no more */ @@ -4472,7 +5058,7 @@ tcp_send_probe0(struct sock *sk) sk->backoff++; sk->rto = min(sk->rto << 1, 120*HZ); - reset_timer (sk, TIME_PROBE0, sk->rto); + reset_xmit_timer (sk, TIME_PROBE0, sk->rto); sk->retransmits++; sk->prot->retransmits ++; } @@ -4577,6 +5163,7 @@ struct proto tcp_prot = { tcp_getsockopt, 128, 0, - {NULL,}, - "TCP" + "TCP", + 0, 0, + {NULL,} }; diff --git a/net/inet/timer.c b/net/ipv4/timer.c index 5ea2f3bbc..ebaa00d70 100644 --- a/net/inet/timer.c +++ b/net/ipv4/timer.c @@ -43,12 +43,12 @@ #include <linux/interrupt.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "ip.h" -#include "protocol.h" -#include "tcp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "arp.h" +#include <net/sock.h> +#include <net/arp.h> void delete_timer (struct sock *t) { @@ -154,107 +154,8 @@ void net_timer (unsigned long data) reset_timer (sk, TIME_DESTROY, TCP_DONE_TIME); release_sock (sk); break; - case TIME_PROBE0: - tcp_send_probe0(sk); - release_sock (sk); - break; - case TIME_WRITE: /* try to retransmit. */ - /* It could be we got here because we needed to send an ack. - * So we need to check for that. - */ - { - struct sk_buff *skb; - unsigned long flags; - - save_flags(flags); - cli(); - skb = sk->send_head; - if (!skb) - { - restore_flags(flags); - } - else - { - if (jiffies < skb->when + sk->rto) - { - reset_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies); - restore_flags(flags); - release_sock (sk); - break; - } - restore_flags(flags); - /* printk("timer: seq %d retrans %d out %d cong %d\n", sk->send_head->h.seq, - sk->retransmits, sk->packets_out, sk->cong_window); */ - sk->prot->retransmit (sk, 0); - if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) - || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) - { - arp_destroy (sk->daddr, 0); - ip_route_check (sk->daddr); - } - if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) - { - sk->err = ETIMEDOUT; - if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) - { - sk->state = TCP_TIME_WAIT; - reset_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - } - else - { - sk->prot->close (sk, 1); - break; - } - } - } - release_sock (sk); - break; - } - case TIME_KEEPOPEN: - /* - * this reset_timer() call is a hack, this is not - * how KEEPOPEN is supposed to work. - */ - reset_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - - /* Send something to keep the connection open. */ - if (sk->prot->write_wakeup) - sk->prot->write_wakeup (sk); - sk->retransmits++; - if (sk->shutdown == SHUTDOWN_MASK) - { - sk->prot->close (sk, 1); - sk->state = TCP_CLOSE; - } - if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) - || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) - { - arp_destroy (sk->daddr, 0); - ip_route_check (sk->daddr); - release_sock (sk); - break; - } - if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) - { - arp_destroy (sk->daddr, 0); - sk->err = ETIMEDOUT; - if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) - { - sk->state = TCP_TIME_WAIT; - if (!sk->dead) - sk->state_change(sk); - release_sock (sk); - } - else - { - sk->prot->close (sk, 1); - } - break; - } - release_sock (sk); - break; default: - printk ("net_timer: timer expired - reason unknown\n"); + printk ("net_timer: timer expired - reason %d is unknown\n", why); release_sock (sk); break; } diff --git a/net/inet/udp.c b/net/ipv4/udp.c index 6e739e703..9976e2be2 100644 --- a/net/inet/udp.c +++ b/net/ipv4/udp.c @@ -9,6 +9,8 @@ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Alan Cox, <Alan.Cox@linux.org> * * Fixes: * Alan Cox : verify_area() calls @@ -40,6 +42,9 @@ * Alan Cox : Smarter af_inet used properly. * Alan Cox : Use new kernel side addressing. * Alan Cox : Incorrect return on truncated datagram receive. + * Arnt Gulbrandsen : New udp_send and stuff + * Alan Cox : Cache last socket + * Alan Cox : Route cache * * * This program is free software; you can redistribute it and/or @@ -60,16 +65,19 @@ #include <linux/timer.h> #include <linux/termios.h> #include <linux/mm.h> +#include <linux/config.h> #include <linux/inet.h> #include <linux/netdevice.h> -#include "snmp.h" -#include "ip.h" -#include "protocol.h" -#include "tcp.h" +#include <net/snmp.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> -#include "sock.h" -#include "udp.h" -#include "icmp.h" +#include <net/sock.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/route.h> +#include <net/checksum.h> /* * SNMP MIB for the UDP layer @@ -77,8 +85,28 @@ struct udp_mib udp_statistics; +/* + * Cached last hit socket + */ + +volatile unsigned long uh_cache_saddr,uh_cache_daddr; +volatile unsigned short uh_cache_dport, uh_cache_sport; +volatile struct sock *uh_cache_sk; +void udp_cache_zap(void) +{ + unsigned long flags; + save_flags(flags); + cli(); + uh_cache_saddr=0; + uh_cache_daddr=0; + uh_cache_dport=0; + uh_cache_sport=0; + uh_cache_sk=NULL; + restore_flags(flags); +} +static int udp_deliver(struct sock *sk, struct udphdr *uh, struct sk_buff *skb, struct device *dev, long saddr, long daddr, int len); #define min(a,b) ((a)<(b)?(a):(b)) @@ -129,204 +157,124 @@ void udp_err(int err, unsigned char *header, unsigned long daddr, * client code people. */ -#ifdef CONFIG_I_AM_A_BROKEN_BSD_WEENIE - /* - * It's only fatal if we have connected to them. I'm not happy - * with this code. Some BSD comparisons need doing. - */ - - if (icmp_err_convert[err & 0xff].fatal && sk->state == TCP_ESTABLISHED) - { - sk->err = icmp_err_convert[err & 0xff].errno; - sk->error_report(sk); - } -#else if (icmp_err_convert[err & 0xff].fatal) { sk->err = icmp_err_convert[err & 0xff].errno; sk->error_report(sk); } -#endif } static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr) { - unsigned long sum; + return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, + csum_partial((char*)uh, len, 0))); +} - __asm__( "\t addl %%ecx,%%ebx\n" - "\t adcl %%edx,%%ebx\n" - "\t adcl $0, %%ebx\n" - : "=b"(sum) - : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_UDP*256) - : "cx","bx","dx" ); +struct udpfakehdr +{ + struct udphdr uh; + int daddr; + int other; + char *from; + int wcheck; +}; - if (len > 3) +/* + * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to + * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode + * for direct user->board I/O transfers. That one will be fun. + */ + +static void udp_getfrag(void *p, int saddr, char * to, unsigned int offset, unsigned int fraglen) +{ + struct udpfakehdr *ufh = (struct udpfakehdr *)p; + char *src, *dst; + unsigned int len; + + if (offset) { - __asm__("\tclc\n" - "1:\n" - "\t lodsl\n" - "\t adcl %%eax, %%ebx\n" - "\t loop 1b\n" - "\t adcl $0, %%ebx\n" - : "=b"(sum) , "=S"(uh) - : "0"(sum), "c"(len/4) ,"1"(uh) - : "ax", "cx", "bx", "si" ); + len = fraglen; + src = ufh->from+(offset-sizeof(struct udphdr)); + dst = to; } - - /* - * Convert from 32 bits to 16 bits. - */ - - __asm__("\t movl %%ebx, %%ecx\n" - "\t shrl $16,%%ecx\n" - "\t addw %%cx, %%bx\n" - "\t adcw $0, %%bx\n" - : "=b"(sum) - : "0"(sum) - : "bx", "cx"); - - /* - * Check for an extra word. - */ - - if ((len & 2) != 0) + else { - __asm__("\t lodsw\n" - "\t addw %%ax,%%bx\n" - "\t adcw $0, %%bx\n" - : "=b"(sum), "=S"(uh) - : "0"(sum) ,"1"(uh) - : "si", "ax", "bx"); - } - - /* - * Now check for the extra byte. - */ - - if ((len & 1) != 0) + len = fraglen-sizeof(struct udphdr); + src = ufh->from; + dst = to+sizeof(struct udphdr); + } + ufh->wcheck = csum_partial_copyffs(src, dst, len, ufh->wcheck); + if (offset == 0) { - __asm__("\t lodsb\n" - "\t movb $0,%%ah\n" - "\t addw %%ax,%%bx\n" - "\t adcw $0, %%bx\n" - : "=b"(sum) - : "0"(sum) ,"S"(uh) - : "si", "ax", "bx"); - } - - /* - * We only want the bottom 16 bits, but we never cleared the top 16. - */ - - return((~sum) & 0xffff); + ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr), + ufh->wcheck); + ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr, + ntohs(ufh->uh.len), + IPPROTO_UDP, ufh->wcheck); + if (ufh->uh.check == 0) + ufh->uh.check = -1; + memcpy(to, ufh, sizeof(struct udphdr)); + } } /* - * Generate UDP checksums. These may be disabled, eg for fast NFS over ethernet - * We default them enabled.. if you turn them off you either know what you are - * doing or get burned... + * Uncheckummed UDP is sufficiently criticial to stuff like ATM video conferencing + * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET + * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if + * this is a valid decision. */ - -static void udp_send_check(struct udphdr *uh, unsigned long saddr, - unsigned long daddr, int len, struct sock *sk) + +static void udp_getfrag_nosum(void *p, int saddr, char * to, unsigned int offset, unsigned int fraglen) { - uh->check = 0; - if (sk && sk->no_check) - return; - uh->check = udp_check(uh, len, saddr, daddr); - - /* - * FFFF and 0 are the same, pick the right one as 0 in the - * actual field means no checksum. - */ - - if (uh->check == 0) - uh->check = 0xffff; + struct udpfakehdr *ufh = (struct udpfakehdr *)p; + char *src, *dst; + unsigned int len; + + if (offset) + { + len = fraglen; + src = ufh->from+(offset-sizeof(struct udphdr)); + dst = to; + } + else + { + len = fraglen-sizeof(struct udphdr); + src = ufh->from; + dst = to+sizeof(struct udphdr); + } + memcpy_fromfs(src,dst,len); + if (offset == 0) + memcpy(to, ufh, sizeof(struct udphdr)); } +/* + * Send UDP frames. + */ + static int udp_send(struct sock *sk, struct sockaddr_in *sin, - unsigned char *from, int len, int rt) + unsigned char *from, int len, int rt) { - struct sk_buff *skb; - struct device *dev; - struct udphdr *uh; - unsigned char *buff; - unsigned long saddr; - int size, tmp; - - /* - * Allocate an sk_buff copy of the packet. - */ - - size = sk->prot->max_header + len; - skb = sock_alloc_send_skb(sk, size, 0, &tmp); - - - if (skb == NULL) - return tmp; - - skb->sk = NULL; /* to avoid changing sk->saddr */ - skb->free = 1; - skb->localroute = sk->localroute|(rt&MSG_DONTROUTE); - - /* - * Now build the IP and MAC header. - */ - - buff = skb->data; - saddr = sk->saddr; - dev = NULL; - tmp = sk->prot->build_header(skb, saddr, sin->sin_addr.s_addr, - &dev, IPPROTO_UDP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl); - skb->sk=sk; /* So memory is freed correctly */ - - /* - * Unable to put a header on the packet. - */ - - if (tmp < 0 ) - { - sk->prot->wfree(sk, skb->mem_addr, skb->mem_len); - return(tmp); - } - - buff += tmp; - saddr = skb->saddr; /*dev->pa_addr;*/ - skb->len = tmp + sizeof(struct udphdr) + len; /* len + UDP + IP + MAC */ - skb->dev = dev; - - /* - * Fill in the UDP header. - */ - - uh = (struct udphdr *) buff; - uh->len = htons(len + sizeof(struct udphdr)); - uh->source = sk->dummy_th.source; - uh->dest = sin->sin_port; - buff = (unsigned char *) (uh + 1); - - /* - * Copy the user data. - */ - - memcpy_fromfs(buff, from, len); - - /* - * Set up the UDP checksum. - */ - - udp_send_check(uh, saddr, sin->sin_addr.s_addr, skb->len - tmp, sk); - - /* - * Send the datagram to the interface. - */ - - udp_statistics.UdpOutDatagrams++; - - sk->prot->queue_xmit(sk, dev, skb, 1); - return(len); + int ulen = len + sizeof(struct udphdr); + int a; + struct udpfakehdr ufh; + + ufh.uh.source = sk->dummy_th.source; + ufh.uh.dest = sin->sin_port; + ufh.uh.len = htons(ulen); + ufh.uh.check = 0; + ufh.daddr = sin->sin_addr.s_addr; + ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; + ufh.from = from; + ufh.wcheck = 0; + if(sk->no_check) + a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, + sin->sin_addr.s_addr, rt, IPPROTO_UDP); + else + a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, + sin->sin_addr.s_addr, rt, IPPROTO_UDP); + return(a<0 ? a : len); } @@ -516,6 +464,8 @@ int udp_read(struct sock *sk, unsigned char *buff, int len, int noblock, int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) { + struct rtable *rt; + unsigned long sa; if (addr_len < sizeof(*usin)) return(-EINVAL); @@ -527,9 +477,16 @@ int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST) return -EACCES; /* Must turn broadcast on first */ + rt=(sk->localroute?ip_rt_local:ip_rt_route)(usin->sin_addr.s_addr, NULL, &sa); + if(rt==NULL) + return -ENETUNREACH; + sk->saddr = sa; /* Update source address */ sk->daddr = usin->sin_addr.s_addr; sk->dummy_th.dest = usin->sin_port; sk->state = TCP_ESTABLISHED; + udp_cache_zap(); + sk->ip_route_cache = rt; + sk->ip_route_stamp = rt_stamp; return(0); } @@ -538,6 +495,8 @@ static void udp_close(struct sock *sk, int timeout) { sk->inuse = 1; sk->state = TCP_CLOSE; + if(uh_cache_sk==sk) + udp_cache_zap(); if (sk->dead) destroy_sock(sk); else @@ -556,10 +515,15 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, struct sock *sk; struct udphdr *uh; unsigned short ulen; + int addr_type = IS_MYADDR; + + if(!dev || dev->pa_addr!=daddr) + addr_type=ip_chk_addr(daddr); /* * Get the header. */ + uh = (struct udphdr *) skb->h.uh; ip_statistics.IpInDelivers++; @@ -572,20 +536,77 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) { - printk("UDP: short packet: %d/%d\n", ulen, len); + NETDEBUG(printk("UDP: short packet: %d/%d\n", ulen, len)); + udp_statistics.UdpInErrors++; + kfree_skb(skb, FREE_WRITE); + return(0); + } + + if (uh->check && udp_check(uh, len, saddr, daddr)) + { + /* <mea@utu.fi> wants to know, who sent it, to + go and stomp on the garbage sender... */ + NETDEBUG(printk("UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n", + ntohl(saddr),ntohs(uh->source), + ntohl(daddr),ntohs(uh->dest), + ulen)); udp_statistics.UdpInErrors++; kfree_skb(skb, FREE_WRITE); return(0); } + + len=ulen; - sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr); +#ifdef CONFIG_IP_MULTICAST + if (addr_type!=IS_MYADDR) + { + /* + * Multicasts and broadcasts go to each listener. + */ + struct sock *sknext=NULL; + sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest, + saddr, uh->source, daddr); + if(sk) + { + do + { + struct sk_buff *skb1; + + sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr); + if(sknext) + skb1=skb_clone(skb,GFP_ATOMIC); + else + skb1=skb; + if(skb1) + udp_deliver(sk, uh, skb1, dev,saddr,daddr,len); + sk=sknext; + } + while(sknext!=NULL); + } + else + kfree_skb(skb, FREE_READ); + return 0; + } +#endif + if(saddr==uh_cache_saddr && daddr==uh_cache_daddr && uh->dest==uh_cache_dport && uh->source==uh_cache_sport) + sk=(struct sock *)uh_cache_sk; + else + { + sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr); + uh_cache_saddr=saddr; + uh_cache_daddr=daddr; + uh_cache_dport=uh->dest; + uh_cache_sport=uh->source; + uh_cache_sk=sk; + } + if (sk == NULL) { udp_statistics.UdpNoPorts++; - if (ip_chk_addr(daddr) == IS_MYADDR) + if (addr_type == IS_MYADDR) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, dev); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); } /* * Hmm. We got an UDP broadcast to a port to which we @@ -595,15 +616,11 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, kfree_skb(skb, FREE_WRITE); return(0); } + return udp_deliver(sk,uh,skb,dev, saddr, daddr, len); +} - if (uh->check && udp_check(uh, len, saddr, daddr)) - { - printk("UDP: bad checksum.\n"); - udp_statistics.UdpInErrors++; - kfree_skb(skb, FREE_WRITE); - return(0); - } - +static int udp_deliver(struct sock *sk, struct udphdr *uh, struct sk_buff *skb, struct device *dev, long saddr, long daddr, int len) +{ skb->sk = sk; skb->dev = dev; skb->len = len; @@ -654,7 +671,7 @@ struct proto udp_prot = { udp_connect, NULL, ip_queue_xmit, - ip_retransmit, + NULL, NULL, NULL, udp_rcv, @@ -666,7 +683,8 @@ struct proto udp_prot = { ip_getsockopt, 128, 0, - {NULL,}, - "UDP" + "UDP", + 0, 0, + {NULL,} }; diff --git a/net/inet/utils.c b/net/ipv4/utils.c index 60bbb9f80..21ce570f5 100644 --- a/net/inet/utils.c +++ b/net/ipv4/utils.c @@ -36,9 +36,9 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include "ip.h" -#include "protocol.h" -#include "tcp.h" +#include <net/ip.h> +#include <net/protocol.h> +#include <net/tcp.h> #include <linux/skbuff.h> diff --git a/net/ipx/Makefile b/net/ipx/Makefile new file mode 100644 index 000000000..8d38f1686 --- /dev/null +++ b/net/ipx/Makefile @@ -0,0 +1,35 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := af_ipx.o + + +ipx.o: $(OBJS) + $(LD) -r -o ipx.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c new file mode 100644 index 000000000..941be7224 --- /dev/null +++ b/net/ipx/af_ipx.c @@ -0,0 +1,1953 @@ +/* + * Implements an IPX socket layer (badly - but I'm working on it). + * + * This code is derived from work by + * Ross Biro : Writing the original IP stack + * Fred Van Kempen : Tidying up the TCP/IP + * + * Many thanks go to Keith Baker, Institute For Industrial Information + * Technology Ltd, Swansea University for allowing me to work on this + * in my own time even though it was in some ways related to commercial + * work I am currently employed to do there. + * + * All the material in this file is subject to the Gnu license version 2. + * Neither Alan Cox nor the Swansea University Computer Society admit liability + * nor provide warranty for any of this software. This material is provided + * as is and at no charge. + * + * Revision 0.21: Uses the new generic socket option code. + * Revision 0.22: Gcc clean ups and drop out device registration. Use the + * new multi-protocol edition of hard_header + * Revision 0.23: IPX /proc by Mark Evans. + * Adding a route will overwrite any existing route to the same + * network. + * Revision 0.24: Supports new /proc with no 4K limit + * Revision 0.25: Add ephemeral sockets, passive local network + * identification, support for local net 0 and + * multiple datalinks <Greg Page> + * Revision 0.26: Device drop kills IPX routes via it. (needed for modules) + * Revision 0.27: Autobind <Mark Evans> + * Revision 0.28: Small fix for multiple local networks <Thomas Winder> + * Revision 0.29: Assorted major errors removed <Mark Evans> + * Small correction to promisc mode error fix <Alan Cox> + * Asynchronous I/O support. + * Changed to use notifiers and the newer packet_type stuff. + * Assorted major fixes <Alejandro Liu> + * Revision 0.30: Moved to net/ipx/... + * Don't set address length on recvfrom that errors. + * Incorrect verify_area. + * + * TODO: use sock_alloc_send_skb to allocate sending buffers. Check with Caldera first + * + * Portions Copyright (c) 1995 Caldera, Inc. <greg@caldera.com> + * Neither Greg Page nor Caldera, Inc. admit liability nor provide + * warranty for any of this software. This material is provided + * "AS-IS" and at no charge. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/ipx.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/interrupt.h> +#include <net/p8022.h> +#include <net/psnap.h> + +#ifdef CONFIG_IPX +/* Configuration Variables */ +static unsigned char ipxcfg_max_hops = 16; +static char ipxcfg_auto_select_primary = 0; +static char ipxcfg_auto_create_interfaces = 0; + +/* Global Variables */ +static struct datalink_proto *p8022_datalink = NULL; +static struct datalink_proto *pEII_datalink = NULL; +static struct datalink_proto *p8023_datalink = NULL; +static struct datalink_proto *pSNAP_datalink = NULL; + +static ipx_interface *ipx_interfaces = NULL; +static ipx_route *ipx_routes = NULL; +static ipx_interface *ipx_internal_net = NULL; +static ipx_interface *ipx_primary_net = NULL; + +static int +ipxcfg_set_auto_create(char val) +{ + ipxcfg_auto_create_interfaces = val; + return 0; +} + +static int +ipxcfg_set_auto_select(char val) +{ + ipxcfg_auto_select_primary = val; + if (val && (ipx_primary_net == NULL)) + ipx_primary_net = ipx_interfaces; + return 0; +} + +static int +ipxcfg_get_config_data(ipx_config_data *arg) +{ + ipx_config_data vals; + + vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces; + vals.ipxcfg_auto_select_primary = ipxcfg_auto_select_primary; + memcpy_tofs(arg, &vals, sizeof(vals)); + return 0; +} + + +/***********************************************************************************************************************\ +* * +* Handlers for the socket list. * +* * +\***********************************************************************************************************************/ + +/* + * Note: Sockets may not be removed _during_ an interrupt or inet_bh + * handler using this technique. They can be added although we do not + * use this facility. + */ + +static void +ipx_remove_socket(ipx_socket *sk) +{ + ipx_socket *s; + ipx_interface *intrfc; + unsigned long flags; + + save_flags(flags); + cli(); + + /* Determine interface with which socket is associated */ + intrfc = sk->ipx_intrfc; + if (intrfc == NULL) { + restore_flags(flags); + return; + } + + s=intrfc->if_sklist; + if(s==sk) { + intrfc->if_sklist=s->next; + restore_flags(flags); + return; + } + + while(s && s->next) { + if(s->next==sk) { + s->next=sk->next; + restore_flags(flags); + return; + } + s=s->next; + } + restore_flags(flags); +} + +/* + * This is only called from user mode. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. + */ + +static void +ipx_destroy_socket(ipx_socket *sk) +{ + struct sk_buff *skb; + + ipx_remove_socket(sk); + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { + kfree_skb(skb,FREE_READ); + } + + kfree_s(sk,sizeof(*sk)); +} + +/* The following code is used to support IPX Interfaces (IPXITF). An + * IPX interface is defined by a physical device and a frame type. + */ + +static ipx_route * ipxrtr_lookup(unsigned long); + +static void +ipxitf_clear_primary_net(void) +{ + if (ipxcfg_auto_select_primary && (ipx_interfaces != NULL)) + ipx_primary_net = ipx_interfaces; + else + ipx_primary_net = NULL; +} + +static ipx_interface * +ipxitf_find_using_phys(struct device *dev, unsigned short datalink) +{ + ipx_interface *i; + + for (i=ipx_interfaces; + i && ((i->if_dev!=dev) || (i->if_dlink_type!=datalink)); + i=i->if_next) + ; + return i; +} + +static ipx_interface * +ipxitf_find_using_net(unsigned long net) +{ + ipx_interface *i; + + if (net == 0L) + return ipx_primary_net; + + for (i=ipx_interfaces; i && (i->if_netnum!=net); i=i->if_next) + ; + + return i; +} + +/* Sockets are bound to a particular IPX interface. */ +static void +ipxitf_insert_socket(ipx_interface *intrfc, ipx_socket *sk) +{ + ipx_socket *s; + + sk->ipx_intrfc = intrfc; + sk->next = NULL; + if (intrfc->if_sklist == NULL) { + intrfc->if_sklist = sk; + } else { + for (s = intrfc->if_sklist; s->next != NULL; s = s->next) + ; + s->next = sk; + } +} + +static ipx_socket * +ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) +{ + ipx_socket *s; + + for (s=intrfc->if_sklist; + (s != NULL) && (s->ipx_port != port); + s=s->next) + ; + + return s; +} + +static void ipxrtr_del_routes(ipx_interface *); + +static void +ipxitf_down(ipx_interface *intrfc) +{ + ipx_interface *i; + ipx_socket *s, *t; + + /* Delete all routes associated with this interface */ + ipxrtr_del_routes(intrfc); + + /* error sockets */ + for (s = intrfc->if_sklist; s != NULL; ) { + s->err = ENOLINK; + s->error_report(s); + s->ipx_intrfc = NULL; + s->ipx_port = 0; + s->zapped=1; /* Indicates it is no longer bound */ + t = s; + s = s->next; + t->next = NULL; + } + intrfc->if_sklist = NULL; + + /* remove this interface from list */ + if (intrfc == ipx_interfaces) { + ipx_interfaces = intrfc->if_next; + } else { + for (i = ipx_interfaces; + (i != NULL) && (i->if_next != intrfc); + i = i->if_next) + ; + if ((i != NULL) && (i->if_next == intrfc)) + i->if_next = intrfc->if_next; + } + + /* remove this interface from *special* networks */ + if (intrfc == ipx_primary_net) + ipxitf_clear_primary_net(); + if (intrfc == ipx_internal_net) + ipx_internal_net = NULL; + + kfree_s(intrfc, sizeof(*intrfc)); +} + +static int +ipxitf_device_event(unsigned long event, void *ptr) +{ + struct device *dev = ptr; + ipx_interface *i, *tmp; + + if(event!=NETDEV_DOWN) + return NOTIFY_DONE; + + for (i = ipx_interfaces; i != NULL; ) { + + tmp = i->if_next; + if (i->if_dev == dev) + ipxitf_down(i); + i = tmp; + + } + + return NOTIFY_DONE; +} + +static int +ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb) +{ + int retval; + + if((retval = sock_queue_rcv_skb(sock, skb))<0) { + /* + * We do a FREE_WRITE here because this indicates how + * to treat the socket with which the packet is + * associated. If this packet is associated with a + * socket at all, it must be the originator of the + * packet. Incoming packets will have no socket + * associated with them at this point. + */ + kfree_skb(skb,FREE_WRITE); + } + return retval; +} + +static int +ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int copy) +{ + ipx_packet *ipx = (ipx_packet *)(skb->h.raw); + ipx_socket *sock1 = NULL, *sock2 = NULL; + struct sk_buff *skb1 = NULL, *skb2 = NULL; + int ipx_offset; + + sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock); + + /* + * We need to check if there is a primary net and if + * this is addressed to one of the *SPECIAL* sockets because + * these need to be propagated to the primary net. + * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and + * 0x456(Diagnostic). + */ + if (ipx_primary_net && (intrfc != ipx_primary_net)) { + switch (ntohs(ipx->ipx_dest.sock)) { + case 0x452: + case 0x453: + case 0x456: + /* + * The appropriate thing to do here is to + * dup the packet and route to the primary net + * interface via ipxitf_send; however, we'll cheat + * and just demux it here. + */ + sock2 = ipxitf_find_socket(ipx_primary_net, + ipx->ipx_dest.sock); + break; + default: + break; + } + } + + /* if there is nothing to do, return */ + if ((sock1 == NULL) && (sock2 == NULL)) { + if (!copy) + kfree_skb(skb,FREE_WRITE); + return 0; + } + + ipx_offset = (char *)(skb->h.raw) - (char *)(skb->data); + + /* This next segment of code is a little awkward, but it sets it up + * so that the appropriate number of copies of the SKB are made and + * that skb1 and skb2 point to it (them) so that it (they) can be + * demuxed to sock1 and/or sock2. If we are unable to make enough + * copies, we do as much as is possible. + */ + if (copy) { + skb1 = skb_clone(skb, GFP_ATOMIC); + if (skb1 != NULL) { + skb1->h.raw = (unsigned char *)&(skb1->data[ipx_offset]); + skb1->arp = skb1->free = 1; + } + } else { + skb1 = skb; + } + + if (skb1 == NULL) return -ENOMEM; + + /* Do we need 2 SKBs? */ + if (sock1 && sock2) { + skb2 = skb_clone(skb1, GFP_ATOMIC); + if (skb2 != NULL) { + skb2->h.raw = (unsigned char *)&(skb2->data[ipx_offset]); + skb2->arp = skb2->free = 1; + } + } else { + skb2 = skb1; + } + + if (sock1) { + (void) ipxitf_def_skb_handler(sock1, skb1); + } + + if (skb2 == NULL) return -ENOMEM; + + if (sock2) { + (void) ipxitf_def_skb_handler(sock2, skb2); + } + + return 0; +} + +static struct sk_buff * +ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buff *skb) +{ + struct sk_buff *skb2; + int in_offset = skb->h.raw - skb->data; + int out_offset = intrfc->if_ipx_offset; + char *oldraw; + int len; + + /* Hopefully, most cases */ + if (in_offset == out_offset) { + skb->len += out_offset; + skb->arp = skb->free = 1; + return skb; + } + + /* Existing SKB will work, just need to move things around a little */ + if (in_offset > out_offset) { + oldraw = skb->h.raw; + skb->h.raw = &(skb->data[out_offset]); + memmove(skb->h.raw, oldraw, skb->len); + skb->len += out_offset; + skb->arp = skb->free = 1; + return skb; + } + + /* Need new SKB */ + len = skb->len + out_offset; + skb2 = alloc_skb(len, GFP_ATOMIC); + if (skb2 != NULL) { + skb2->h.raw = &(skb2->data[out_offset]); + skb2->len = len; + skb2->free=1; + skb2->arp=1; + memcpy(skb2->h.raw, skb->h.raw, skb->len); + } + kfree_skb(skb, FREE_WRITE); + return skb2; +} + +static int +ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node) +{ + ipx_packet *ipx = (ipx_packet *)(skb->h.raw); + struct device *dev = intrfc->if_dev; + struct datalink_proto *dl = intrfc->if_dlink; + char dest_node[IPX_NODE_LEN]; + int send_to_wire = 1; + int addr_len; + + /* We need to know how many skbuffs it will take to send out this + * packet to avoid unnecessary copies. + */ + if ((dl == NULL) || (dev == NULL) || (dev->flags & IFF_LOOPBACK)) + send_to_wire = 0; + + /* See if this should be demuxed to sockets on this interface */ + if (ipx->ipx_dest.net == intrfc->if_netnum) { + if (memcmp(intrfc->if_node, node, IPX_NODE_LEN) == 0) + return ipxitf_demux_socket(intrfc, skb, 0); + if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) { + ipxitf_demux_socket(intrfc, skb, send_to_wire); + if (!send_to_wire) return 0; + } + } + + /* if the originating net is not equal to our net; this is routed */ + if (ipx->ipx_source.net != intrfc->if_netnum) { + if (++(ipx->ipx_tctrl) > ipxcfg_max_hops) + send_to_wire = 0; + } + + if (!send_to_wire) { + /* + * We do a FREE_WRITE here because this indicates how + * to treat the socket with which the packet is + * associated. If this packet is associated with a + * socket at all, it must be the originator of the + * packet. Routed packets will have no socket associated + * with them. + */ + kfree_skb(skb,FREE_WRITE); + return 0; + } + + /* determine the appropriate hardware address */ + addr_len = dev->addr_len; + if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) { + memcpy(dest_node, dev->broadcast, addr_len); + } else { + memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len); + } + + /* make any compensation for differing physical/data link size */ + skb = ipxitf_adjust_skbuff(intrfc, skb); + if (skb == NULL) return 0; + + /* set up data link and physical headers */ + skb->dev = dev; + dl->datalink_header(dl, skb, dest_node); + + if (skb->sk != NULL) { + /* This is an outbound packet from this host. We need to + * increment the write count. + */ + skb->sk->wmem_alloc += skb->mem_len; + } + + /* Send it out */ + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + return 0; +} + +static int +ipxrtr_add_route(unsigned long, ipx_interface *, unsigned char *); + +static int +ipxitf_add_local_route(ipx_interface *intrfc) +{ + return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL); +} + +static char * ipx_frame_name(unsigned short); +static char * ipx_device_name(ipx_interface *); +static int ipxrtr_route_skb(struct sk_buff *); + +static int +ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) +{ + ipx_packet *ipx = (ipx_packet *) (skb->h.raw); + ipx_interface *i; + + /* See if we should update our network number */ + if ((intrfc->if_netnum == 0L) && + (ipx->ipx_source.net == ipx->ipx_dest.net) && + (ipx->ipx_source.net != 0L)) { + /* NB: NetWare servers lie about their hop count so we + * dropped the test based on it. This is the best way + * to determine this is a 0 hop count packet. + */ + if ((i=ipxitf_find_using_net(ipx->ipx_source.net))==NULL) { + intrfc->if_netnum = ipx->ipx_source.net; + (void) ipxitf_add_local_route(intrfc); + } else { + printk("IPX: Network number collision %lx\n\t%s %s and %s %s\n", + htonl(ipx->ipx_source.net), + ipx_device_name(i), + ipx_frame_name(i->if_dlink_type), + ipx_device_name(intrfc), + ipx_frame_name(intrfc->if_dlink_type)); + } + } + + if (ipx->ipx_dest.net == 0L) + ipx->ipx_dest.net = intrfc->if_netnum; + if (ipx->ipx_source.net == 0L) + ipx->ipx_source.net = intrfc->if_netnum; + + if (intrfc->if_netnum != ipx->ipx_dest.net) { + /* We only route point-to-point packets. */ + if ((skb->pkt_type != PACKET_BROADCAST) && + (skb->pkt_type != PACKET_MULTICAST)) + return ipxrtr_route_skb(skb); + + kfree_skb(skb,FREE_READ); + return 0; + } + + /* see if we should keep it */ + if ((memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0) + || (memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0)) { + return ipxitf_demux_socket(intrfc, skb, 0); + } + + /* we couldn't pawn it off so unload it */ + kfree_skb(skb,FREE_READ); + return 0; +} + +static void +ipxitf_insert(ipx_interface *intrfc) +{ + ipx_interface *i; + + intrfc->if_next = NULL; + if (ipx_interfaces == NULL) { + ipx_interfaces = intrfc; + } else { + for (i = ipx_interfaces; i->if_next != NULL; i = i->if_next) + ; + i->if_next = intrfc; + } + + if (ipxcfg_auto_select_primary && (ipx_primary_net == NULL)) + ipx_primary_net = intrfc; +} + +static int +ipxitf_create_internal(ipx_interface_definition *idef) +{ + ipx_interface *intrfc; + + /* Only one primary network allowed */ + if (ipx_primary_net != NULL) return -EEXIST; + + /* Must have a valid network number */ + if (idef->ipx_network == 0L) return -EADDRNOTAVAIL; + if (ipxitf_find_using_net(idef->ipx_network) != NULL) + return -EADDRINUSE; + + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc==NULL) + return -EAGAIN; + intrfc->if_dev=NULL; + intrfc->if_netnum=idef->ipx_network; + intrfc->if_dlink_type = 0; + intrfc->if_dlink = NULL; + intrfc->if_sklist = NULL; + intrfc->if_internal = 1; + intrfc->if_ipx_offset = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN); + ipx_internal_net = intrfc; + ipx_primary_net = intrfc; + ipxitf_insert(intrfc); + return ipxitf_add_local_route(intrfc); +} + +static int +ipx_map_frame_type(unsigned char type) +{ + switch (type) { + case IPX_FRAME_ETHERII: return htons(ETH_P_IPX); + case IPX_FRAME_8022: return htons(ETH_P_802_2); + case IPX_FRAME_SNAP: return htons(ETH_P_SNAP); + case IPX_FRAME_8023: return htons(ETH_P_802_3); + } + return 0; +} + +static int +ipxitf_create(ipx_interface_definition *idef) +{ + struct device *dev; + unsigned short dlink_type = 0; + struct datalink_proto *datalink = NULL; + ipx_interface *intrfc; + + if (idef->ipx_special == IPX_INTERNAL) + return ipxitf_create_internal(idef); + + if ((idef->ipx_special == IPX_PRIMARY) && (ipx_primary_net != NULL)) + return -EEXIST; + + if ((idef->ipx_network != 0L) && + (ipxitf_find_using_net(idef->ipx_network) != NULL)) + return -EADDRINUSE; + + switch (idef->ipx_dlink_type) { + case IPX_FRAME_ETHERII: + dlink_type = htons(ETH_P_IPX); + datalink = pEII_datalink; + break; + case IPX_FRAME_8022: + dlink_type = htons(ETH_P_802_2); + datalink = p8022_datalink; + break; + case IPX_FRAME_SNAP: + dlink_type = htons(ETH_P_SNAP); + datalink = pSNAP_datalink; + break; + case IPX_FRAME_8023: + dlink_type = htons(ETH_P_802_3); + datalink = p8023_datalink; + break; + case IPX_FRAME_NONE: + default: + break; + } + + if (datalink == NULL) + return -EPROTONOSUPPORT; + + dev=dev_get(idef->ipx_device); + if (dev==NULL) + return -ENODEV; + + if (!(dev->flags & IFF_UP)) + return -ENETDOWN; + + /* Check addresses are suitable */ + if(dev->addr_len>IPX_NODE_LEN) + return -EINVAL; + + if ((intrfc = ipxitf_find_using_phys(dev, dlink_type)) == NULL) { + + /* Ok now create */ + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc==NULL) + return -EAGAIN; + intrfc->if_dev=dev; + intrfc->if_netnum=idef->ipx_network; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + /* Setup primary if necessary */ + if ((idef->ipx_special == IPX_PRIMARY)) + ipx_primary_net = intrfc; + intrfc->if_internal = 0; + intrfc->if_ipx_offset = dev->hard_header_len + datalink->header_length; + memset(intrfc->if_node, 0, IPX_NODE_LEN); + memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), dev->dev_addr, dev->addr_len); + + ipxitf_insert(intrfc); + } + + /* If the network number is known, add a route */ + if (intrfc->if_netnum == 0L) + return 0; + + return ipxitf_add_local_route(intrfc); +} + +static int +ipxitf_delete(ipx_interface_definition *idef) +{ + struct device *dev = NULL; + unsigned short dlink_type = 0; + ipx_interface *intrfc; + + if (idef->ipx_special == IPX_INTERNAL) { + if (ipx_internal_net != NULL) { + ipxitf_down(ipx_internal_net); + return 0; + } + return -ENOENT; + } + + dlink_type = ipx_map_frame_type(idef->ipx_dlink_type); + if (dlink_type == 0) + return -EPROTONOSUPPORT; + + dev=dev_get(idef->ipx_device); + if(dev==NULL) return -ENODEV; + + intrfc = ipxitf_find_using_phys(dev, dlink_type); + if (intrfc != NULL) { + ipxitf_down(intrfc); + return 0; + } + return -EINVAL; +} + +static ipx_interface * +ipxitf_auto_create(struct device *dev, unsigned short dlink_type) +{ + struct datalink_proto *datalink = NULL; + ipx_interface *intrfc; + + switch (htons(dlink_type)) { + case ETH_P_IPX: datalink = pEII_datalink; break; + case ETH_P_802_2: datalink = p8022_datalink; break; + case ETH_P_SNAP: datalink = pSNAP_datalink; break; + case ETH_P_802_3: datalink = p8023_datalink; break; + default: return NULL; + } + + if (dev == NULL) + return NULL; + + /* Check addresses are suitable */ + if(dev->addr_len>IPX_NODE_LEN) return NULL; + + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc!=NULL) { + intrfc->if_dev=dev; + intrfc->if_netnum=0L; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_internal = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + intrfc->if_ipx_offset = dev->hard_header_len + + datalink->header_length; + memset(intrfc->if_node, 0, IPX_NODE_LEN); + memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), + dev->dev_addr, dev->addr_len); + ipxitf_insert(intrfc); + } + + return intrfc; +} + +static int +ipxitf_ioctl(unsigned int cmd, void *arg) +{ + int err; + switch(cmd) + { + case SIOCSIFADDR: + { + struct ifreq ifr; + struct sockaddr_ipx *sipx; + ipx_interface_definition f; + err=verify_area(VERIFY_READ,arg,sizeof(ifr)); + if(err) + return err; + memcpy_fromfs(&ifr,arg,sizeof(ifr)); + sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; + if(sipx->sipx_family!=AF_IPX) + return -EINVAL; + f.ipx_network=sipx->sipx_network; + memcpy(f.ipx_device, ifr.ifr_name, sizeof(f.ipx_device)); + memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN); + f.ipx_dlink_type=sipx->sipx_type; + f.ipx_special=sipx->sipx_special; + if(sipx->sipx_action==IPX_DLTITF) + return ipxitf_delete(&f); + else + return ipxitf_create(&f); + } + case SIOCGIFADDR: + { + struct ifreq ifr; + struct sockaddr_ipx *sipx; + ipx_interface *ipxif; + struct device *dev; + err=verify_area(VERIFY_WRITE,arg,sizeof(ifr)); + if(err) + return err; + memcpy_fromfs(&ifr,arg,sizeof(ifr)); + sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; + dev=dev_get(ifr.ifr_name); + if(!dev) + return -ENODEV; + ipxif=ipxitf_find_using_phys(dev, ipx_map_frame_type(sipx->sipx_type)); + if(ipxif==NULL) + return -EADDRNOTAVAIL; + sipx->sipx_network=ipxif->if_netnum; + memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node)); + memcpy_tofs(arg,&ifr,sizeof(ifr)); + return 0; + } + case SIOCAIPXITFCRT: + err=verify_area(VERIFY_READ,arg,sizeof(char)); + if(err) + return err; + return ipxcfg_set_auto_create(get_fs_byte(arg)); + case SIOCAIPXPRISLT: + err=verify_area(VERIFY_READ,arg,sizeof(char)); + if(err) + return err; + return ipxcfg_set_auto_select(get_fs_byte(arg)); + default: + return -EINVAL; + } +} + +/*******************************************************************************************************************\ +* * +* Routing tables for the IPX socket layer * +* * +\*******************************************************************************************************************/ + +static ipx_route * +ipxrtr_lookup(unsigned long net) +{ + ipx_route *r; + + for (r=ipx_routes; (r!=NULL) && (r->ir_net!=net); r=r->ir_next) + ; + + return r; +} + +static int +ipxrtr_add_route(unsigned long network, ipx_interface *intrfc, unsigned char *node) +{ + ipx_route *rt; + + /* Get a route structure; either existing or create */ + rt = ipxrtr_lookup(network); + if (rt==NULL) { + rt=(ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); + if(rt==NULL) + return -EAGAIN; + rt->ir_next=ipx_routes; + ipx_routes=rt; + } + + rt->ir_net = network; + rt->ir_intrfc = intrfc; + if (node == NULL) { + memset(rt->ir_router_node, '\0', IPX_NODE_LEN); + rt->ir_routed = 0; + } else { + memcpy(rt->ir_router_node, node, IPX_NODE_LEN); + rt->ir_routed=1; + } + return 0; +} + +static void +ipxrtr_del_routes(ipx_interface *intrfc) +{ + ipx_route **r, *tmp; + + for (r = &ipx_routes; (tmp = *r) != NULL; ) { + if (tmp->ir_intrfc == intrfc) { + *r = tmp->ir_next; + kfree_s(tmp, sizeof(ipx_route)); + } else { + r = &(tmp->ir_next); + } + } +} + +static int +ipxrtr_create(ipx_route_definition *rd) +{ + ipx_interface *intrfc; + + /* Find the appropriate interface */ + intrfc = ipxitf_find_using_net(rd->ipx_router_network); + if (intrfc == NULL) + return -ENETUNREACH; + + return ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node); +} + + +static int +ipxrtr_delete(long net) +{ + ipx_route **r; + ipx_route *tmp; + + for (r = &ipx_routes; (tmp = *r) != NULL; ) { + if (tmp->ir_net == net) { + if (!(tmp->ir_routed)) { + /* Directly connected; can't lose route */ + return -EPERM; + } + *r = tmp->ir_next; + kfree_s(tmp, sizeof(ipx_route)); + return 0; + } + r = &(tmp->ir_next); + } + + return -ENOENT; +} + +static int +ipxrtr_route_packet(ipx_socket *sk, struct sockaddr_ipx *usipx, void *ubuf, int len) +{ + struct sk_buff *skb; + ipx_interface *intrfc; + ipx_packet *ipx; + int size; + int ipx_offset; + ipx_route *rt = NULL; + + /* Find the appropriate interface on which to send packet */ + if ((usipx->sipx_network == 0L) && (ipx_primary_net != NULL)) { + usipx->sipx_network = ipx_primary_net->if_netnum; + intrfc = ipx_primary_net; + } else { + rt = ipxrtr_lookup(usipx->sipx_network); + if (rt==NULL) { + return -ENETUNREACH; + } + intrfc = rt->ir_intrfc; + } + + ipx_offset = intrfc->if_ipx_offset; + size=sizeof(ipx_packet)+len; + size += ipx_offset; + + if(size+sk->wmem_alloc>sk->sndbuf) return -EAGAIN; + + skb=alloc_skb(size,GFP_KERNEL); + if(skb==NULL) return -ENOMEM; + + skb->sk=sk; + skb->len=size; + skb->free=1; + skb->arp=1; + + /* Fill in IPX header */ + ipx=(ipx_packet *)&(skb->data[ipx_offset]); + ipx->ipx_checksum=0xFFFF; + ipx->ipx_pktsize=htons(len+sizeof(ipx_packet)); + ipx->ipx_tctrl=0; + ipx->ipx_type=usipx->sipx_type; + skb->h.raw = (unsigned char *)ipx; + + ipx->ipx_source.net = sk->ipx_intrfc->if_netnum; + memcpy(ipx->ipx_source.node, sk->ipx_intrfc->if_node, IPX_NODE_LEN); + ipx->ipx_source.sock = sk->ipx_port; + ipx->ipx_dest.net=usipx->sipx_network; + memcpy(ipx->ipx_dest.node,usipx->sipx_node,IPX_NODE_LEN); + ipx->ipx_dest.sock=usipx->sipx_port; + + memcpy_fromfs((char *)(ipx+1),ubuf,len); + return ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? + rt->ir_router_node : ipx->ipx_dest.node); +} + +static int +ipxrtr_route_skb(struct sk_buff *skb) +{ + ipx_packet *ipx = (ipx_packet *) (skb->h.raw); + ipx_route *r; + ipx_interface *i; + + r = ipxrtr_lookup(ipx->ipx_dest.net); + if (r == NULL) { + /* no known route */ + kfree_skb(skb,FREE_READ); + return 0; + } + i = r->ir_intrfc; + (void)ipxitf_send(i, skb, (r->ir_routed) ? + r->ir_router_node : ipx->ipx_dest.node); + return 0; +} + +/* + * We use a normal struct rtentry for route handling + */ + +static int ipxrtr_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct rtentry rt; /* Use these to behave like 'other' stacks */ + struct sockaddr_ipx *sg,*st; + + err=verify_area(VERIFY_READ,arg,sizeof(rt)); + if(err) + return err; + + memcpy_fromfs(&rt,arg,sizeof(rt)); + + sg=(struct sockaddr_ipx *)&rt.rt_gateway; + st=(struct sockaddr_ipx *)&rt.rt_dst; + + if(!(rt.rt_flags&RTF_GATEWAY)) + return -EINVAL; /* Direct routes are fixed */ + if(sg->sipx_family!=AF_IPX) + return -EINVAL; + if(st->sipx_family!=AF_IPX) + return -EINVAL; + + switch(cmd) + { + case SIOCDELRT: + return ipxrtr_delete(st->sipx_network); + case SIOCADDRT: + { + struct ipx_route_definition f; + f.ipx_network=st->sipx_network; + f.ipx_router_network=sg->sipx_network; + memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN); + return ipxrtr_create(&f); + } + default: + return -EINVAL; + } +} + +static char * +ipx_frame_name(unsigned short frame) +{ + switch (ntohs(frame)) { + case ETH_P_IPX: return "EtherII"; + case ETH_P_802_2: return "802.2"; + case ETH_P_SNAP: return "SNAP"; + case ETH_P_802_3: return "802.3"; + default: return "None"; + } +} + +static char * +ipx_device_name(ipx_interface *intrfc) +{ + return (intrfc->if_internal ? "Internal" : + (intrfc->if_dev ? intrfc->if_dev->name : "Unknown")); +} + +/* Called from proc fs */ +int +ipx_get_interface_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_interface *i; + int len=0; + off_t pos=0; + off_t begin=0; + + /* Theory.. Keep printing in the same place until we pass offset */ + + len += sprintf (buffer,"%-11s%-15s%-9s%-11s%s\n", "Network", + "Node_Address", "Primary", "Device", "Frame_Type"); + for (i = ipx_interfaces; i != NULL; i = i->if_next) { + len += sprintf(buffer+len, "%08lX ", ntohl(i->if_netnum)); + len += sprintf (buffer+len,"%02X%02X%02X%02X%02X%02X ", + i->if_node[0], i->if_node[1], i->if_node[2], + i->if_node[3], i->if_node[4], i->if_node[5]); + len += sprintf(buffer+len, "%-9s", (i == ipx_primary_net) ? + "Yes" : "No"); + len += sprintf (buffer+len, "%-11s", ipx_device_name(i)); + len += sprintf (buffer+len, "%s\n", + ipx_frame_name(i->if_dlink_type)); + + /* Are we still dumping unwanted data then discard the record */ + pos=begin+len; + + if(pos<offset) { + len=0; /* Keep dumping into the buffer start */ + begin=pos; + } + if(pos>offset+length) /* We have dumped enough */ + break; + } + + /* The data in question runs from begin to begin+len */ + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Remove unwanted header data from length */ + if(len>length) + len=length; /* Remove unwanted tail data from length */ + + return len; +} + +int +ipx_get_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_socket *s; + ipx_interface *i; + int len=0; + off_t pos=0; + off_t begin=0; + + /* Theory.. Keep printing in the same place until we pass offset */ + + len += sprintf (buffer,"%-15s%-28s%-10s%-10s%-7s%s\n", "Local_Address", + "Remote_Address", "Tx_Queue", "Rx_Queue", + "State", "Uid"); + for (i = ipx_interfaces; i != NULL; i = i->if_next) { + for (s = i->if_sklist; s != NULL; s = s->next) { + len += sprintf (buffer+len,"%08lX:%04X ", + htonl(i->if_netnum), + htons(s->ipx_port)); + if (s->state!=TCP_ESTABLISHED) { + len += sprintf(buffer+len, "%-28s", "Not_Connected"); + } else { + len += sprintf (buffer+len, + "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", + htonl(s->ipx_dest_addr.net), + s->ipx_dest_addr.node[0], s->ipx_dest_addr.node[1], + s->ipx_dest_addr.node[2], s->ipx_dest_addr.node[3], + s->ipx_dest_addr.node[4], s->ipx_dest_addr.node[5], + htons(s->ipx_dest_addr.sock)); + } + len += sprintf (buffer+len,"%08lX %08lX ", + s->wmem_alloc, s->rmem_alloc); + len += sprintf (buffer+len,"%02X %03d\n", + s->state, SOCK_INODE(s->socket)->i_uid); + + /* Are we still dumping unwanted data then discard the record */ + pos=begin+len; + + if(pos<offset) + { + len=0; /* Keep dumping into the buffer start */ + begin=pos; + } + if(pos>offset+length) /* We have dumped enough */ + break; + } + } + + /* The data in question runs from begin to begin+len */ + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Remove unwanted header data from length */ + if(len>length) + len=length; /* Remove unwanted tail data from length */ + + return len; +} + +int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_route *rt; + int len=0; + off_t pos=0; + off_t begin=0; + + len += sprintf (buffer,"%-11s%-13s%s\n", + "Network", "Router_Net", "Router_Node"); + for (rt = ipx_routes; rt != NULL; rt = rt->ir_next) + { + len += sprintf (buffer+len,"%08lX ", ntohl(rt->ir_net)); + if (rt->ir_routed) { + len += sprintf (buffer+len,"%08lX %02X%02X%02X%02X%02X%02X\n", + ntohl(rt->ir_intrfc->if_netnum), + rt->ir_router_node[0], rt->ir_router_node[1], + rt->ir_router_node[2], rt->ir_router_node[3], + rt->ir_router_node[4], rt->ir_router_node[5]); + } else { + len += sprintf (buffer+len, "%-13s%s\n", + "Directly", "Connected"); + } + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/*******************************************************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to an IPX socket object * +* * +\*******************************************************************************************************************/ + +static int ipx_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } +} + +static int ipx_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) +{ + ipx_socket *sk; + int err,opt; + + sk=(ipx_socket *)sock->data; + + if(optval==NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ,optval,sizeof(int)); + if(err) + return err; + opt=get_fs_long((unsigned long *)optval); + + switch(level) + { + case SOL_IPX: + switch(optname) + { + case IPX_TYPE: + sk->ipx_type=opt; + return 0; + default: + return -EOPNOTSUPP; + } + break; + + case SOL_SOCKET: + return sock_setsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } +} + +static int ipx_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + ipx_socket *sk; + int val=0; + int err; + + sk=(ipx_socket *)sock->data; + + switch(level) + { + + case SOL_IPX: + switch(optname) + { + case IPX_TYPE: + val=sk->ipx_type; + break; + default: + return -ENOPROTOOPT; + } + break; + + case SOL_SOCKET: + return sock_getsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } + err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *)optlen); + err=verify_area(VERIFY_WRITE,optval,sizeof(int)); + put_fs_long(val,(unsigned long *)optval); + return(0); +} + +static int ipx_listen(struct socket *sock, int backlog) +{ + return -EOPNOTSUPP; +} + +static void def_callback1(struct sock *sk) +{ + if(!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk, int len) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 1); + } +} + +static int +ipx_create(struct socket *sock, int protocol) +{ + ipx_socket *sk; + sk=(ipx_socket *)kmalloc(sizeof(*sk),GFP_KERNEL); + if(sk==NULL) + return(-ENOMEM); + switch(sock->type) + { + case SOCK_DGRAM: + break; + default: + kfree_s((void *)sk,sizeof(*sk)); + return(-ESOCKTNOSUPPORT); + } + sk->dead=0; + sk->next=NULL; + sk->broadcast=0; + sk->rcvbuf=SK_RMEM_MAX; + sk->sndbuf=SK_WMEM_MAX; + sk->wmem_alloc=0; + sk->rmem_alloc=0; + sk->inuse=0; + sk->shutdown=0; + sk->prot=NULL; /* So we use default free mechanisms */ + sk->err=0; + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + sk->send_head=NULL; + skb_queue_head_init(&sk->back_log); + sk->state=TCP_CLOSE; + sk->socket=sock; + sk->type=sock->type; + sk->ipx_type=0; /* General user level IPX */ + sk->debug=0; + sk->ipx_intrfc = NULL; + memset(&sk->ipx_dest_addr,'\0',sizeof(sk->ipx_dest_addr)); + sk->ipx_port = 0; + sk->mtu=IPX_MTU; + + if(sock!=NULL) + { + sock->data=(void *)sk; + sk->sleep=sock->wait; + } + + sk->state_change=def_callback1; + sk->data_ready=def_callback2; + sk->write_space=def_callback1; + sk->error_report=def_callback1; + + sk->zapped=1; + return 0; +} + +static int ipx_release(struct socket *sock, struct socket *peer) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + if(sk==NULL) + return(0); + if(!sk->dead) + sk->state_change(sk); + sk->dead=1; + sock->data=NULL; + ipx_destroy_socket(sk); + return(0); +} + +static int ipx_dup(struct socket *newsock,struct socket *oldsock) +{ + return(ipx_create(newsock,SOCK_DGRAM)); +} + +static unsigned short +ipx_first_free_socketnum(ipx_interface *intrfc) +{ + unsigned short socketNum = intrfc->if_sknum; + + if (socketNum < IPX_MIN_EPHEMERAL_SOCKET) + socketNum = IPX_MIN_EPHEMERAL_SOCKET; + + while (ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) + if (socketNum > IPX_MAX_EPHEMERAL_SOCKET) + socketNum = IPX_MIN_EPHEMERAL_SOCKET; + else + socketNum++; + + intrfc->if_sknum = socketNum; + return ntohs(socketNum); +} + +static int ipx_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) +{ + ipx_socket *sk; + ipx_interface *intrfc; + struct sockaddr_ipx *addr=(struct sockaddr_ipx *)uaddr; + + sk=(ipx_socket *)sock->data; + + if(sk->zapped==0) + return -EIO; + + if(addr_len!=sizeof(struct sockaddr_ipx)) + return -EINVAL; + + intrfc = ipxitf_find_using_net(addr->sipx_network); + if (intrfc == NULL) + return -EADDRNOTAVAIL; + + if (addr->sipx_port == 0) { + addr->sipx_port = ipx_first_free_socketnum(intrfc); + if (addr->sipx_port == 0) + return -EINVAL; + } + + if(ntohs(addr->sipx_port)<IPX_MIN_EPHEMERAL_SOCKET && !suser()) + return -EPERM; /* protect IPX system stuff like routing/sap */ + + /* Source addresses are easy. It must be our network:node pair for + an interface routed to IPX with the ipx routing ioctl() */ + + if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) { + if(sk->debug) + printk("IPX: bind failed because port %X in use.\n", + (int)addr->sipx_port); + return -EADDRINUSE; + } + + sk->ipx_port=addr->sipx_port; + ipxitf_insert_socket(intrfc, sk); + sk->zapped=0; + if(sk->debug) + printk("IPX: socket is bound.\n"); + return 0; +} + +static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *addr; + + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + if(addr_len!=sizeof(*addr)) + return(-EINVAL); + addr=(struct sockaddr_ipx *)uaddr; + + if(sk->ipx_port==0) + /* put the autobinding in */ + { + struct sockaddr_ipx uaddr; + int ret; + + uaddr.sipx_port = 0; + uaddr.sipx_network = 0L; + ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); + if (ret != 0) return (ret); + } + + if(ipxrtr_lookup(addr->sipx_network)==NULL) + return -ENETUNREACH; + sk->ipx_dest_addr.net=addr->sipx_network; + sk->ipx_dest_addr.sock=addr->sipx_port; + memcpy(sk->ipx_dest_addr.node,addr->sipx_node,IPX_NODE_LEN); + sk->ipx_type=addr->sipx_type; + sock->state = SS_CONNECTED; + sk->state=TCP_ESTABLISHED; + return 0; +} + +static int ipx_socketpair(struct socket *sock1, struct socket *sock2) +{ + return(-EOPNOTSUPP); +} + +static int ipx_accept(struct socket *sock, struct socket *newsock, int flags) +{ + if(newsock->data) + kfree_s(newsock->data,sizeof(ipx_socket)); + return -EOPNOTSUPP; +} + +static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + ipx_address *addr; + struct sockaddr_ipx sipx; + ipx_socket *sk; + + sk=(ipx_socket *)sock->data; + + *uaddr_len = sizeof(struct sockaddr_ipx); + + if(peer) { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + addr=&sk->ipx_dest_addr; + sipx.sipx_network = addr->net; + memcpy(sipx.sipx_node,addr->node,IPX_NODE_LEN); + sipx.sipx_port = addr->sock; + } else { + if (sk->ipx_intrfc != NULL) { + sipx.sipx_network = sk->ipx_intrfc->if_netnum; + memcpy(sipx.sipx_node, sk->ipx_intrfc->if_node, + IPX_NODE_LEN); + } else { + sipx.sipx_network = 0L; + memset(sipx.sipx_node, '\0', IPX_NODE_LEN); + } + sipx.sipx_port = sk->ipx_port; + } + + sipx.sipx_family = AF_IPX; + sipx.sipx_type = sk->ipx_type; + memcpy(uaddr,&sipx,sizeof(sipx)); + return 0; +} + +#if 0 +/* + * User to dump IPX packets (debugging) + */ +void dump_data(char *str,unsigned char *d) { + static char h2c[] = "0123456789ABCDEF"; + int l,i; + char *p, b[64]; + for (l=0;l<16;l++) { + p = b; + for (i=0; i < 8 ; i++) { + *(p++) = h2c[d[i] & 0x0f]; + *(p++) = h2c[(d[i] >> 4) & 0x0f]; + *(p++) = ' '; + } + *(p++) = '-'; + *(p++) = ' '; + for (i=0; i < 8 ; i++) *(p++) = ' '<= d[i] && d[i]<'\177' ? d[i] : '.'; + *p = '\000'; + d += i; + printk("%s-%04X: %s\n",str,l*8,b); + } +} + +void dump_addr(char *str,ipx_address *p) { + printk("%s: %08X:%02X%02X%02X%02X%02X%02X:%04X\n", + str,ntohl(p->net),p->node[0],p->node[1],p->node[2], + p->node[3],p->node[4],p->node[5],ntohs(p->sock)); +} + +void dump_hdr(char *str,ipx_packet *p) { + printk("%s: CHKSUM=%04X SIZE=%d (%04X) HOPS=%d (%02X) TYPE=%02X\n", + str,p->ipx_checksum,ntohs(p->ipx_pktsize),ntohs(p->ipx_pktsize), + p->ipx_tctrl,p->ipx_tctrl,p->ipx_type); + dump_addr(" IPX-DST",&p->ipx_dest); + dump_addr(" IPX-SRC",&p->ipx_source); +} + +void dump_pkt(char *str,ipx_packet *p) { + dump_hdr(str,p); + dump_data(str,(unsigned char *)p); +} +#endif + +int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + /* NULL here for pt means the packet was looped back */ + ipx_interface *intrfc; + ipx_packet *ipx; + + ipx=(ipx_packet *)skb->h.raw; + + if(ipx->ipx_checksum!=IPX_NO_CHECKSUM) { + /* We don't do checksum options. We can't really. Novell don't seem to have documented them. + If you need them try the XNS checksum since IPX is basically XNS in disguise. It might be + the same... */ + kfree_skb(skb,FREE_READ); + return 0; + } + + /* Too small */ + if(htons(ipx->ipx_pktsize)<sizeof(ipx_packet)) { + kfree_skb(skb,FREE_READ); + return 0; + } + + /* Determine what local ipx endpoint this is */ + intrfc = ipxitf_find_using_phys(dev, pt->type); + if (intrfc == NULL) { + if (ipxcfg_auto_create_interfaces) { + intrfc = ipxitf_auto_create(dev, pt->type); + } + + if (intrfc == NULL) { + /* Not one of ours */ + kfree_skb(skb,FREE_READ); + return 0; + } + } + + return ipxitf_rcv(intrfc, skb); +} + +static int ipx_sendto(struct socket *sock, void *ubuf, int len, int noblock, + unsigned flags, struct sockaddr *usip, int addr_len) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *usipx=(struct sockaddr_ipx *)usip; + struct sockaddr_ipx local_sipx; + int retval; + + if (sk->zapped) return -EIO; /* Socket not bound */ + if(flags) return -EINVAL; + + if(usipx) { + if(sk->ipx_port == 0) { + struct sockaddr_ipx uaddr; + int ret; + + uaddr.sipx_port = 0; + uaddr.sipx_network = 0L; + ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); + if (ret != 0) return ret; + } + + if(addr_len <sizeof(*usipx)) + return -EINVAL; + if(usipx->sipx_family != AF_IPX) + return -EINVAL; + } else { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + usipx=&local_sipx; + usipx->sipx_family=AF_IPX; + usipx->sipx_type=sk->ipx_type; + usipx->sipx_port=sk->ipx_dest_addr.sock; + usipx->sipx_network=sk->ipx_dest_addr.net; + memcpy(usipx->sipx_node,sk->ipx_dest_addr.node,IPX_NODE_LEN); + } + + retval = ipxrtr_route_packet(sk, usipx, ubuf, len); + if (retval < 0) return retval; + + return len; +} + +static int ipx_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) +{ + return ipx_sendto(sock,ubuf,size,noblock,flags,NULL,0); +} + +static int ipx_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sip, int *addr_len) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *sipx=(struct sockaddr_ipx *)sip; + struct ipx_packet *ipx = NULL; + int copied = 0; + int truesize; + struct sk_buff *skb; + int er; + + if(sk->err) + { + er= -sk->err; + sk->err=0; + return er; + } + + if (sk->zapped) + return -EIO; + + + skb=skb_recv_datagram(sk,flags,noblock,&er); + if(skb==NULL) + return er; + if(addr_len) + *addr_len=sizeof(*sipx); + + ipx = (ipx_packet *)(skb->h.raw); + truesize=ntohs(ipx->ipx_pktsize) - sizeof(ipx_packet); + copied = (truesize > size) ? size : truesize; + skb_copy_datagram(skb,sizeof(struct ipx_packet),ubuf,copied); + + if(sipx) + { + sipx->sipx_family=AF_IPX; + sipx->sipx_port=ipx->ipx_source.sock; + memcpy(sipx->sipx_node,ipx->ipx_source.node,IPX_NODE_LEN); + sipx->sipx_network=ipx->ipx_source.net; + sipx->sipx_type = ipx->ipx_type; + } + skb_free_datagram(skb); + return(truesize); +} + +static int ipx_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ipx_send(sock,ubuf,size,noblock,0); +} + + +static int ipx_recv(struct socket *sock, void *ubuf, int size , int noblock, + unsigned flags) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + if(sk->zapped) + return -ENOTCONN; + return ipx_recvfrom(sock,ubuf,size,noblock,flags,NULL, NULL); +} + +static int ipx_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ipx_recv(sock,ubuf,size,noblock,0); +} + + +static int ipx_shutdown(struct socket *sk,int how) +{ + return -EOPNOTSUPP; +} + +static int ipx_select(struct socket *sock , int sel_type, select_table *wait) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + + return datagram_select(sk,sel_type,wait); +} + +static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) +{ + int err; + long amount=0; + ipx_socket *sk=(ipx_socket *)sock->data; + + switch(cmd) + { + case TIOCOUTQ: + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + if(err) + return err; + amount=sk->sndbuf-sk->wmem_alloc; + if(amount<0) + amount=0; + put_fs_long(amount,(unsigned long *)arg); + return 0; + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if((skb=skb_peek(&sk->receive_queue))!=NULL) + amount=skb->len; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + if(err) + return err; + put_fs_long(amount,(unsigned long *)arg); + return 0; + } + case SIOCADDRT: + case SIOCDELRT: + if(!suser()) + return -EPERM; + return(ipxrtr_ioctl(cmd,(void *)arg)); + case SIOCSIFADDR: + case SIOCGIFADDR: + case SIOCAIPXITFCRT: + case SIOCAIPXPRISLT: + if(!suser()) + return -EPERM; + return(ipxitf_ioctl(cmd,(void *)arg)); + case SIOCIPXCFGDATA: + { + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(ipx_config_data)); + if(err) return err; + return(ipxcfg_get_config_data((void *)arg)); + } + case SIOCGSTAMP: + if (sk) + { + if(sk->stamp.tv_sec==0) + return -ENOENT; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); + if(err) + return err; + memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); + return 0; + } + return -EINVAL; + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + return -EINVAL; + default: + return(dev_ioctl(cmd,(void *) arg)); + } + /*NOTREACHED*/ + return(0); +} + +static struct proto_ops ipx_proto_ops = { + AF_IPX, + + ipx_create, + ipx_dup, + ipx_release, + ipx_bind, + ipx_connect, + ipx_socketpair, + ipx_accept, + ipx_getname, + ipx_read, + ipx_write, + ipx_select, + ipx_ioctl, + ipx_listen, + ipx_send, + ipx_recv, + ipx_sendto, + ipx_recvfrom, + ipx_shutdown, + ipx_setsockopt, + ipx_getsockopt, + ipx_fcntl, +}; + +/* Called by ddi.c on kernel start up */ + +static struct packet_type ipx_8023_packet_type = + +{ + 0, /* MUTTER ntohs(ETH_P_8023),*/ + NULL, /* All devices */ + ipx_rcv, + NULL, + NULL, +}; + +static struct packet_type ipx_dix_packet_type = +{ + 0, /* MUTTER ntohs(ETH_P_IPX),*/ + NULL, /* All devices */ + ipx_rcv, + NULL, + NULL, +}; + +static struct notifier_block ipx_dev_notifier={ + ipxitf_device_event, + NULL, + 0 +}; + + +extern struct datalink_proto *make_EII_client(void); +extern struct datalink_proto *make_8023_client(void); + +void ipx_proto_init(struct net_proto *pro) +{ + unsigned char val = 0xE0; + unsigned char snapval[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; + + (void) sock_register(ipx_proto_ops.family, &ipx_proto_ops); + + pEII_datalink = make_EII_client(); + ipx_dix_packet_type.type=htons(ETH_P_IPX); + dev_add_pack(&ipx_dix_packet_type); + + p8023_datalink = make_8023_client(); + ipx_8023_packet_type.type=htons(ETH_P_802_3); + dev_add_pack(&ipx_8023_packet_type); + + if ((p8022_datalink = register_8022_client(val, ipx_rcv)) == NULL) + printk("IPX: Unable to register with 802.2\n"); + + if ((pSNAP_datalink = register_snap_client(snapval, ipx_rcv)) == NULL) + printk("IPX: Unable to register with SNAP\n"); + + register_netdevice_notifier(&ipx_dev_notifier); + + printk("Swansea University Computer Society IPX 0.30 for NET3.029\n"); + printk("IPX Portions Copyright (c) 1995 Caldera, Inc.\n"); +} +#endif diff --git a/net/netrom/Makefile b/net/netrom/Makefile new file mode 100644 index 000000000..d838c4da5 --- /dev/null +++ b/net/netrom/Makefile @@ -0,0 +1,40 @@ +# +# Makefile for the Linux TCP/IP (INET) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< +.c.s: + $(CC) $(CFLAGS) -S $< + + +OBJS := af_netrom.o + +ifdef CONFIG_AX25 + +OBJS := $(OBJS) nr_dev.o nr_in.o nr_out.o nr_route.o nr_subr.o nr_timer.o + +endif + +netrom.o: $(OBJS) + $(LD) -r -o netrom.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +tar: + tar -cvf /dev/f1 . + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c new file mode 100644 index 000000000..ca1199be0 --- /dev/null +++ b/net/netrom/af_netrom.c @@ -0,0 +1,1339 @@ +/* + * NET/ROM release 003 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.3.0 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from the AX25 code. + * NET/ROM 002 Darryl(G7LED) Fixes and address enhancement. + * Jonathan(G4KLX) Complete bind re-think. + * Alan(GW4PTS) Trivial tweaks into new format. + * + * To do: + * Fix non-blocking connect failure. + * Make it use normal SIOCADDRT/DELRT not funny node ioctl() calls. + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/termios.h> /* For TIOCINQ/OUTQ */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <net/netrom.h> + +#include <net/ip.h> +#include <net/arp.h> + +/************************************************************************\ +* * +* Handlers for the socket list * +* * +\************************************************************************/ + +struct nr_parms_struct nr_default; + +static unsigned short circuit = 0x101; + +static struct sock *volatile nr_list = NULL; + +/* + * Socket removal during an interrupt is now safe. + */ +static void nr_remove_socket(struct sock *sk) +{ + struct sock *s; + unsigned long flags; + + save_flags(flags); + cli(); + + if ((s = nr_list) == sk) { + nr_list = s->next; + restore_flags(flags); + return; + } + + while (s != NULL && s->next != NULL) { + if (s->next == sk) { + s->next = sk->next; + restore_flags(flags); + return; + } + + s = s->next; + } + + restore_flags(flags); +} + +/* + * Handle device status changes. + */ +static int nr_device_event(unsigned long event, void *ptr) +{ + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + nr_rt_device_down(ptr); + + return NOTIFY_DONE; +} + +/* + * Add a socket to the bound sockets list. + */ +static void nr_insert_socket(struct sock *sk) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + sk->next = nr_list; + nr_list = sk; + + restore_flags(flags); +} + +/* + * Find a socket that wants to accept the Connect Request we just + * received. + */ +static struct sock *nr_find_listener(ax25_address *addr, int type) +{ + unsigned long flags; + struct sock *s; + + save_flags(flags); + cli(); + + for (s = nr_list; s != NULL; s = s->next) { + if (ax25cmp(&s->nr->source_addr, addr) == 0 && s->type == type && s->state == TCP_LISTEN) { + restore_flags(flags); + return s; + } + } + + restore_flags(flags); + return NULL; +} + +/* + * Find a connected NET/ROM socket given my circuit IDs. + */ +static struct sock *nr_find_socket(unsigned char index, unsigned char id, int type) +{ + struct sock *s; + unsigned long flags; + + save_flags(flags); + cli(); + + for (s = nr_list; s != NULL; s = s->next) { + if (s->nr->my_index == index && s->nr->my_id == id && s->type == type) { + restore_flags(flags); + return s; + } + } + + restore_flags(flags); + + return NULL; +} + +/* + * Find a connected NET/ROM socket given their circuit IDs. + */ +static struct sock *nr_find_peer(unsigned char index, unsigned char id, int type) +{ + struct sock *s; + unsigned long flags; + + save_flags(flags); + cli(); + + for (s = nr_list; s != NULL; s = s->next) { + if (s->nr->your_index == index && s->nr->your_id == id && s->type == type) { + restore_flags(flags); + return s; + } + } + + restore_flags(flags); + + return NULL; +} + +/* + * Deferred destroy. + */ +void nr_destory_socket(struct sock *); + +/* + * Handler for deferred kills. + */ +static void nr_destroy_timer(unsigned long data) +{ + nr_destroy_socket((struct sock *)data); +} + +/* + * This is called from user mode and the timers. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. + */ +void nr_destroy_socket(struct sock *sk) /* Not static as its used by the timer */ +{ + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + + del_timer(&sk->timer); + + nr_remove_socket(sk); + nr_clear_tx_queue(sk); /* Flush the send queue */ + + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + if (skb->sk != sk) { /* A pending connection */ + skb->sk->dead = 1; /* Queue the unaccepted socket for death */ + nr_set_timer(skb->sk); + skb->sk->nr->state = NR_STATE_0; + } + + kfree_skb(skb, FREE_READ); + } + + if (sk->wmem_alloc || sk->rmem_alloc) { /* Defer: outstanding buffers */ + init_timer(&sk->timer); + sk->timer.expires = 10 * HZ; + sk->timer.function = nr_destroy_timer; + sk->timer.data = (unsigned long)sk; + add_timer(&sk->timer); + } else { + kfree_s(sk->nr, sizeof(*sk->nr)); + kfree_s(sk, sizeof(*sk)); + } + + restore_flags(flags); +} + +/*******************************************************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to a NET/ROM socket object * +* * +\*******************************************************************************************************************/ + +static int nr_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } +} + +static int nr_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + struct sock *sk; + int err, opt; + + sk = (struct sock *)sock->data; + + if (level == SOL_SOCKET) + return sock_setsockopt(sk, level, optname, optval, optlen); + + if (level != SOL_NETROM) + return -EOPNOTSUPP; + + if (optval == NULL) + return -EINVAL; + + if ((err = verify_area(VERIFY_READ, optval, sizeof(int))) != 0) + return err; + + opt = get_fs_long((unsigned long *)optval); + + switch (optname) { + case NETROM_T1: + if (opt < 1) + return -EINVAL; + sk->nr->t1 = opt * PR_SLOWHZ; + return 0; + + case NETROM_T2: + if (opt < 1) + return -EINVAL; + sk->nr->t2 = opt * PR_SLOWHZ; + return 0; + + case NETROM_N2: + if (opt < 1 || opt > 31) + return -EINVAL; + sk->nr->n2 = opt; + return 0; + + default: + return -ENOPROTOOPT; + } +} + +static int nr_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + struct sock *sk; + int val = 0; + int err; + + sk = (struct sock *)sock->data; + + if (level == SOL_SOCKET) + return sock_getsockopt(sk, level, optname, optval, optlen); + + if (level != SOL_NETROM) + return -EOPNOTSUPP; + + switch (optname) { + case NETROM_T1: + val = sk->nr->t1 / PR_SLOWHZ; + break; + + case NETROM_T2: + val = sk->nr->t2 / PR_SLOWHZ; + break; + + case NETROM_N2: + val = sk->nr->n2; + break; + + default: + return -ENOPROTOOPT; + } + + if ((err = verify_area(VERIFY_WRITE, optlen, sizeof(int))) != 0) + return err; + + put_fs_long(sizeof(int), (unsigned long *)optlen); + + if ((err = verify_area(VERIFY_WRITE, optval, sizeof(int))) != 0) + return err; + + put_fs_long(val, (unsigned long *)optval); + + return 0; +} + +static int nr_listen(struct socket *sock, int backlog) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk->type == SOCK_SEQPACKET && sk->state != TCP_LISTEN) { + memset(&sk->nr->user_addr, '\0', sizeof(ax25_address)); + sk->max_ack_backlog = backlog; + sk->state = TCP_LISTEN; + return 0; + } + + return -EOPNOTSUPP; +} + +static void def_callback1(struct sock *sk) +{ + if (!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk, int len) +{ + if (!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static int nr_create(struct socket *sock, int protocol) +{ + struct sock *sk; + nr_cb *nr; + + if ((sk = (struct sock *)kmalloc(sizeof(*sk), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + if ((nr = (nr_cb *)kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) { + kfree_s(sk, sizeof(*sk)); + return -ENOMEM; + } + + sk->type = sock->type; + + switch (sock->type) { + case SOCK_SEQPACKET: + break; + default: + kfree_s((void *)sk, sizeof(*sk)); + kfree_s((void *)nr, sizeof(*nr)); + return -ESOCKTNOSUPPORT; + } + + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->back_log); + + init_timer(&sk->timer); + + sk->socket = sock; + sk->protocol = protocol; + sk->dead = 0; + sk->next = NULL; + sk->broadcast = 0; + sk->rcvbuf = SK_RMEM_MAX; + sk->sndbuf = SK_WMEM_MAX; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; + sk->inuse = 0; + sk->debug = 0; + sk->prot = NULL; /* So we use default free mechanisms */ + sk->err = 0; + sk->localroute = 0; + sk->send_head = NULL; + sk->state = TCP_CLOSE; + sk->shutdown = 0; + sk->priority = SOPRI_NORMAL; + sk->ack_backlog = 0; + sk->mtu = NETROM_MTU; /* 236 */ + sk->zapped = 1; + sk->window = nr_default.window; + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback1; + sk->error_report = def_callback1; + + if (sock != NULL) { + sock->data = (void *)sk; + sk->sleep = sock->wait; + } + + skb_queue_head_init(&nr->ack_queue); + skb_queue_head_init(&nr->reseq_queue); + + nr->my_index = 0; + nr->my_id = 0; + nr->rtt = nr_default.timeout; + nr->t1 = nr_default.timeout; + nr->t2 = nr_default.ack_delay; + nr->n2 = nr_default.tries; + + nr->t1timer = 0; + nr->t2timer = 0; + nr->t4timer = 0; + nr->n2count = 0; + + nr->va = 0; + nr->vr = 0; + nr->vs = 0; + nr->vl = 0; + + nr->your_index = 0; + nr->your_id = 0; + + nr->my_index = 0; + nr->my_id = 0; + + nr->state = NR_STATE_0; + + memset(&nr->source_addr, '\0', sizeof(ax25_address)); + memset(&nr->user_addr, '\0', sizeof(ax25_address)); + memset(&nr->dest_addr, '\0', sizeof(ax25_address)); + + nr->sk = sk; + sk->nr = nr; + + return 0; +} + +static struct sock *nr_make_new(struct sock *osk) +{ + struct sock *sk; + nr_cb *nr; + + if ((sk = (struct sock *)kmalloc(sizeof(*sk), GFP_ATOMIC)) == NULL) + return NULL; + + if ((nr = (nr_cb *)kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) { + kfree_s(sk, sizeof(*sk)); + return NULL; + } + + sk->type = osk->type; + sk->socket = osk->socket; + + switch (osk->type) { + case SOCK_SEQPACKET: + break; + default: + kfree_s((void *)sk, sizeof(*sk)); + kfree_s((void *)nr, sizeof(*nr)); + return NULL; + } + + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->back_log); + + init_timer(&sk->timer); + + sk->rmem_alloc = 0; + sk->dead = 0; + sk->next = NULL; + sk->priority = osk->priority; + sk->broadcast = 0; + sk->protocol = osk->protocol; + sk->rcvbuf = osk->rcvbuf; + sk->sndbuf = osk->sndbuf; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; + sk->inuse = 0; + sk->ack_backlog = 0; + sk->prot = NULL; /* So we use default free mechanisms */ + sk->err = 0; + sk->localroute = 0; + sk->send_head = NULL; + sk->debug = osk->debug; + sk->state = TCP_ESTABLISHED; + sk->window = osk->window; + sk->shutdown = 0; + sk->mtu = osk->mtu; + sk->sleep = osk->sleep; + sk->zapped = osk->zapped; + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback1; + sk->error_report = def_callback1; + + skb_queue_head_init(&nr->ack_queue); + skb_queue_head_init(&nr->reseq_queue); + + nr->rtt = osk->nr->rtt; + nr->t1 = osk->nr->t1; + nr->t2 = osk->nr->t2; + nr->n2 = osk->nr->n2; + + nr->t1timer = 0; + nr->t2timer = 0; + nr->t4timer = 0; + nr->n2count = 0; + + nr->va = 0; + nr->vr = 0; + nr->vs = 0; + nr->vl = 0; + + sk->nr = nr; + nr->sk = sk; + + return sk; +} + +static int nr_dup(struct socket *newsock, struct socket *oldsock) +{ + struct sock *sk = (struct sock *)oldsock->data; + + return nr_create(newsock, sk->protocol); +} + +static int nr_release(struct socket *sock, struct socket *peer) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk == NULL) return 0; + + if (sk->type == SOCK_SEQPACKET) { + switch (sk->nr->state) { + case NR_STATE_0: + sk->dead = 1; + sk->state_change(sk); + nr_destroy_socket(sk); + break; + + case NR_STATE_1: + sk->nr->state = NR_STATE_0; + sk->dead = 1; + sk->state_change(sk); + nr_destroy_socket(sk); + break; + + case NR_STATE_2: + nr_write_internal(sk, NR_DISCACK); + sk->nr->state = NR_STATE_0; + sk->dead = 1; + sk->state_change(sk); + nr_destroy_socket(sk); + break; + + case NR_STATE_3: + nr_clear_tx_queue(sk); + sk->nr->n2count = 0; + nr_write_internal(sk, NR_DISCREQ); + sk->nr->t1timer = sk->nr->t1 = nr_calculate_t1(sk); + sk->nr->t2timer = 0; + sk->nr->t4timer = 0; + sk->nr->state = NR_STATE_2; + sk->state_change(sk); + sk->dead = 1; + break; + + default: + break; + } + } else { + sk->dead = 1; + sk->state_change(sk); + nr_destroy_socket(sk); + } + + sock->data = NULL; + + return 0; +} + +static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk; + struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; + ax25_address *user, *source; + + sk = (struct sock *)sock->data; + + if (sk->zapped == 0) + return -EIO; + + if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) + return -EINVAL; + +#ifdef DONTDO + if (nr_find_listener(&addr->fsa_ax25.sax25_call, sk->type) != NULL) { + if (sk->debug) + printk("NET/ROM: bind failed: in use\n"); + return -EADDRINUSE; + } +#endif + + if (nr_dev_get(&addr->fsa_ax25.sax25_call) == NULL) { + if (sk->debug) + printk("NET/ROM: bind failed: invalid node callsign\n"); + return -EADDRNOTAVAIL; + } + + /* + * Only the super user can set an arbitrary user callsign. + */ + if (addr->fsa_ax25.sax25_ndigis == 1) { + if (!suser()) + return -EPERM; + memcpy(&sk->nr->user_addr, &addr->fsa_digipeater[0], sizeof(ax25_address)); + memcpy(&sk->nr->source_addr, &addr->fsa_ax25.sax25_call, sizeof(ax25_address)); + } else { + source = &addr->fsa_ax25.sax25_call; + + if ((user = ax25_findbyuid(current->euid)) == NULL) { + if (ax25_uid_policy && !suser()) + return -EPERM; + user = source; + } + + memcpy(&sk->nr->user_addr, user, sizeof(ax25_address)); + memcpy(&sk->nr->source_addr, source, sizeof(ax25_address)); + } + + nr_insert_socket(sk); + + sk->zapped = 0; + + if (sk->debug) + printk("NET/ROM: socket is bound\n"); + + return 0; +} + +static int nr_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; + ax25_address *user, *source = NULL; + struct device *dev; + + if (sk->state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { + sock->state = SS_CONNECTED; + return 0; /* Connect completed during a ERESTARTSYS event */ + } + + if (sk->state == TCP_CLOSE && sock->state == SS_CONNECTING) { + sock->state = SS_UNCONNECTED; + return -ECONNREFUSED; + } + + if (sk->state == TCP_ESTABLISHED && sk->type == SOCK_SEQPACKET) + return -EISCONN; /* No reconnect on a seqpacket socket */ + + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + if (addr_len != sizeof(struct sockaddr_ax25)) + return -EINVAL; + + if ((dev = nr_dev_first()) == NULL) + return -ENETUNREACH; + + if (sk->zapped) { /* Must bind first - autobinding in this may or may not work */ + sk->zapped = 0; + + source = (ax25_address *)dev->dev_addr; + + if ((user = ax25_findbyuid(current->euid)) == NULL) { + if (ax25_uid_policy && !suser()) + return -EPERM; + user = source; + } + + memcpy(&sk->nr->user_addr, user, sizeof(ax25_address)); + memcpy(&sk->nr->source_addr, source, sizeof(ax25_address)); + + nr_insert_socket(sk); /* Finish the bind */ + } + + memcpy(&sk->nr->dest_addr, &addr->sax25_call, sizeof(ax25_address)); + + sk->nr->my_index = circuit / 256; + sk->nr->my_id = circuit % 256; + + circuit++; + + /* Move to connecting socket, start sending Connect Requests */ + sock->state = SS_CONNECTING; + sk->state = TCP_SYN_SENT; + nr_establish_data_link(sk); + sk->nr->state = NR_STATE_1; + nr_set_timer(sk); + + /* Now the loop */ + if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) + return -EINPROGRESS; + + cli(); /* To avoid races on the sleep */ + + /* + * A Connect Ack with Choke or timeout or failed routing will go to closed. + */ + while (sk->state == TCP_SYN_SENT) { + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) { + sti(); + return -ERESTARTSYS; + } + } + + if (sk->state != TCP_ESTABLISHED) { + sti(); + sock->state = SS_UNCONNECTED; + return -sk->err; /* Always set at this point */ + } + + sock->state = SS_CONNECTED; + + sti(); + + return 0; +} + +static int nr_socketpair(struct socket *sock1, struct socket *sock2) +{ + return -EOPNOTSUPP; +} + +static int nr_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *sk; + struct sock *newsk; + struct sk_buff *skb; + + if (newsock->data) + kfree_s(newsock->data, sizeof(struct sock)); + + newsock->data = NULL; + + sk = (struct sock *)sock->data; + + if (sk->type != SOCK_SEQPACKET) + return -EOPNOTSUPP; + + if (sk->state != TCP_LISTEN) + return -EINVAL; + + /* The write queue this time is holding sockets ready to use + hooked into the SABM we saved */ + do { + cli(); + if ((skb = skb_dequeue(&sk->receive_queue)) == NULL) { + if (flags & O_NONBLOCK) { + sti(); + return 0; + } + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) { + sti(); + return -ERESTARTSYS; + } + } + } while (skb == NULL); + + newsk = skb->sk; + newsk->pair = NULL; + sti(); + + /* Now attach up the new socket */ + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + sk->ack_backlog--; + newsock->data = newsk; + + return 0; +} + +static int nr_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; + struct sock *sk; + + sk = (struct sock *)sock->data; + + if (peer != 0) { + if (sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + sax->fsa_ax25.sax25_family = AF_NETROM; + sax->fsa_ax25.sax25_ndigis = 1; + memcpy(&sax->fsa_ax25.sax25_call, &sk->nr->user_addr, sizeof(ax25_address)); + memcpy(&sax->fsa_digipeater[0], &sk->nr->dest_addr, sizeof(ax25_address)); + *uaddr_len = sizeof(struct sockaddr_ax25) + sizeof(ax25_address); + } else { + sax->fsa_ax25.sax25_family = AF_NETROM; + sax->fsa_ax25.sax25_ndigis = 0; + memcpy(&sax->fsa_ax25.sax25_call, &sk->nr->source_addr, sizeof(ax25_address)); + *uaddr_len = sizeof(struct sockaddr_ax25); + } + + return 0; +} + +int nr_rx_frame(struct sk_buff *skb, struct device *dev) +{ + struct sock *sk; + struct sock *make; + ax25_address *src, *dest, *user; + unsigned short circuit_index, circuit_id; + unsigned short frametype, window; + + skb->sk = NULL; /* Initially we don't know who its for */ + + src = (ax25_address *)(skb->data + 17); + dest = (ax25_address *)(skb->data + 24); + + circuit_index = skb->data[32]; + circuit_id = skb->data[33]; + frametype = skb->data[36]; + +#ifdef CONFIG_INET + /* + * Check for an incoming IP over NET/ROM frame. + */ + if ((frametype & 0x0F) == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { + skb->h.raw = skb->data + 37; + + return nr_rx_ip(skb, dev); + } +#endif + + /* + * Find an existing socket connection, based on circuit ID, if its + * a Connect Request base it on their circuit ID. + */ + if (((frametype & 0x0F) != NR_CONNREQ && (sk = nr_find_socket(circuit_index, circuit_id, SOCK_SEQPACKET)) != NULL) || + ((frametype & 0x0F) == NR_CONNREQ && (sk = nr_find_peer(circuit_index, circuit_id, SOCK_SEQPACKET)) != NULL)) { + skb->h.raw = skb->data + 37; + skb->len -= 20; + + return nr_process_rx_frame(sk, skb); + } + + if ((frametype & 0x0F) != NR_CONNREQ) + return 0; + + sk = nr_find_listener(dest, SOCK_SEQPACKET); + + if (sk == NULL || sk->ack_backlog == sk->max_ack_backlog || (make = nr_make_new(sk)) == NULL) { + nr_transmit_dm(skb); + return 0; + } + + user = (ax25_address *)(skb->data + 38); + window = skb->data[37]; + + skb->sk = make; + make->state = TCP_ESTABLISHED; + + /* Fill in his circuit details */ + memcpy(&make->nr->source_addr, dest, sizeof(ax25_address)); + memcpy(&make->nr->dest_addr, src, sizeof(ax25_address)); + memcpy(&make->nr->user_addr, user, sizeof(ax25_address)); + + make->nr->your_index = circuit_index; + make->nr->your_id = circuit_id; + + make->nr->my_index = circuit / 256; + make->nr->my_id = circuit % 256; + + circuit++; + + /* Window negotiation */ + if (window < make->window) + make->window = window; + + nr_write_internal(make, NR_CONNACK); + + make->nr->condition = 0x00; + make->nr->vs = 0; + make->nr->va = 0; + make->nr->vr = 0; + make->nr->vl = 0; + make->nr->state = NR_STATE_3; + sk->ack_backlog++; + make->pair = sk; + + nr_insert_socket(make); + + skb_queue_head(&sk->receive_queue, skb); + + nr_set_timer(make); + + if (!sk->dead) + sk->data_ready(sk, skb->len); + + return 1; +} + +static int nr_sendto(struct socket *sock, void *ubuf, int len, int noblock, + unsigned flags, struct sockaddr *usip, int addr_len) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)usip; + int err; + struct sockaddr_ax25 sax; + struct sk_buff *skb; + unsigned char *asmptr; + int size; + + if (sk->err) { + err = sk->err; + sk->err = 0; + return -err; + } + + if (flags) + return -EINVAL; + + if (sk->zapped) + return -EADDRNOTAVAIL; + + if (usax) { + if (addr_len < sizeof(sax)) + return -EINVAL; + memcpy(&sax, usax, sizeof(sax)); + if (sk->type == SOCK_SEQPACKET && memcmp(&sk->nr->dest_addr, &sax.sax25_call, sizeof(ax25_address)) != 0) + return -EISCONN; + if (sax.sax25_family != AF_NETROM) + return -EINVAL; + } else { + if (sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + sax.sax25_family = AF_NETROM; + memcpy(&sax.sax25_call, &sk->nr->dest_addr, sizeof(ax25_address)); + } + + if (sk->debug) + printk("NET/ROM: sendto: Addresses built.\n"); + + /* Build a packet */ + if (sk->debug) + printk("NET/ROM: sendto: building packet.\n"); + + size = len + 37; + + if ((skb = sock_alloc_send_skb(sk, size, 0, &err)) == NULL) + return err; + + skb->sk = sk; + skb->free = 1; + skb->arp = 1; + skb->len = size; + + asmptr = skb->data + 16; + + if (sk->debug) + printk("Building NET/ROM Header.\n"); + + /* Build a NET/ROM Network header */ + + *asmptr++ = AX25_P_NETROM; + + memcpy(asmptr, &sk->nr->source_addr, sizeof(ax25_address)); + asmptr[6] &= ~LAPB_C; + asmptr[6] &= ~LAPB_E; + asmptr[6] |= SSID_SPARE; + asmptr += 7; + + memcpy(asmptr, &sax.sax25_call, sizeof(ax25_address)); + asmptr[6] &= ~LAPB_C; + asmptr[6] |= LAPB_E; + asmptr[6] |= SSID_SPARE; + asmptr += 7; + + *asmptr++ = nr_default.ttl; + + /* Build a NET/ROM Transport header */ + + *asmptr++ = sk->nr->your_index; + *asmptr++ = sk->nr->your_id; + *asmptr++ = 0; /* To be filled in later */ + *asmptr++ = 0; /* Ditto */ + *asmptr++ = NR_INFO; + + if (sk->debug) + printk("Built header.\n"); + + skb->h.raw = asmptr; + + if (sk->debug) + printk("NET/ROM: Appending user data\n"); + + /* User data follows immediately after the NET/ROM transport header */ + memcpy_fromfs(asmptr, ubuf, len); + + if (sk->debug) + printk("NET/ROM: Transmitting buffer\n"); + + if (sk->state != TCP_ESTABLISHED) { + kfree_skb(skb, FREE_WRITE); + return -ENOTCONN; + } + + nr_output(sk, skb); /* Shove it onto the queue */ + + return len; +} + +static int nr_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) +{ + return nr_sendto(sock, ubuf, size, noblock, flags, NULL, 0); +} + +static int nr_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return nr_send(sock, ubuf, size, noblock, 0); +} + +static int nr_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sip, int *addr_len) +{ + struct sock *sk = (struct sock *)sock->data; + struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)sip; + int copied = 0; + struct sk_buff *skb; + int er; + + if (sk->err) { + er = -sk->err; + sk->err = 0; + return er; + } + + if (addr_len != NULL) + *addr_len = sizeof(*sax); + + /* This works for seqpacket too. The receiver has ordered the queue for us! We do one quick check first though */ + if (sk->type == SOCK_SEQPACKET && sk->state != TCP_ESTABLISHED) + return -ENOTCONN; + + /* Now we can treat all alike */ + if ((skb = skb_recv_datagram(sk, flags, noblock, &er)) == NULL) + return er; + + copied = (size < skb->len) ? size : skb->len; + + skb_copy_datagram(skb, 0, ubuf, copied); + + if (sax != NULL) { + struct sockaddr_ax25 addr; + + addr.sax25_family = AF_NETROM; + memcpy(&addr.sax25_call, skb->data + 24, sizeof(ax25_address)); + + memcpy(sax, &addr, sizeof(*sax)); + + *addr_len = sizeof(*sax); + } + + skb_free_datagram(skb); + + return copied; +} + +static int nr_recv(struct socket *sock, void *ubuf, int size , int noblock, + unsigned flags) +{ + struct sock *sk = (struct sock *)sock->data; + + if (sk->zapped) + return -ENOTCONN; + + return nr_recvfrom(sock, ubuf, size, noblock, flags, NULL, NULL); +} + +static int nr_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + return nr_recv(sock, ubuf, size, noblock, 0); +} + +static int nr_shutdown(struct socket *sk, int how) +{ + return -EOPNOTSUPP; +} + +static int nr_select(struct socket *sock , int sel_type, select_table *wait) +{ + struct sock *sk = (struct sock *)sock->data; + + return datagram_select(sk, sel_type, wait); +} + +static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = (struct sock *)sock->data; + int err; + long amount = 0; + + switch (cmd) { + case TIOCOUTQ: + if ((err = verify_area(VERIFY_WRITE, (void *)arg, sizeof(unsigned long))) != 0) + return err; + amount = sk->sndbuf - sk->wmem_alloc; + if (amount < 0) + amount = 0; + put_fs_long(amount, (unsigned long *)arg); + return 0; + + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if ((skb = skb_peek(&sk->receive_queue)) != NULL) + amount = skb->len; + if ((err = verify_area(VERIFY_WRITE, (void *)arg, sizeof(unsigned long))) != 0) + return err; + put_fs_long(amount, (unsigned long *)arg); + return 0; + } + + case SIOCGSTAMP: + if (sk != NULL) { + if (sk->stamp.tv_sec==0) + return -ENOENT; + if ((err = verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval))) != 0) + return err; + memcpy_tofs((void *)arg, &sk->stamp, sizeof(struct timeval)); + return 0; + } + return -EINVAL; + + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + return -EINVAL; + + case SIOCNRADDNODE: + case SIOCNRDELNODE: + case SIOCNRADDNEIGH: + case SIOCNRDELNEIGH: + case SIOCNRDECOBS: + if (!suser()) return -EPERM; + return nr_rt_ioctl(cmd, (void *)arg); + + case SIOCNRGETPARMS: + { + struct nr_parms_struct nr_parms; + if ((err = verify_area(VERIFY_WRITE, (void *)arg, sizeof(struct nr_parms_struct))) != 0) + return err; + memcpy_fromfs(&nr_parms, (void *)arg, sizeof(struct nr_parms_struct)); + nr_parms = nr_default; + memcpy_tofs((void *)arg, &nr_parms, sizeof(struct nr_parms_struct)); + return 0; + } + + case SIOCNRSETPARMS: + { + struct nr_parms_struct nr_parms; + if (!suser()) return -EPERM; + if ((err = verify_area(VERIFY_READ, (void *)arg, sizeof(struct nr_parms_struct))) != 0) + return err; + memcpy_fromfs(&nr_parms, (void *)arg, sizeof(struct nr_parms_struct)); + nr_default = nr_parms; + return 0; + } + + default: + return dev_ioctl(cmd, (void *)arg); + } + + /*NOTREACHED*/ + return(0); +} + +int nr_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct sock *s; + int len = 0; + off_t pos = 0; + off_t begin = 0; + + cli(); + + len += sprintf(buffer, "user_addr dest_node src_node my your st vs vr va t1 t2 n2 rtt wnd Snd-Q Rcv-Q\n"); + + for (s = nr_list; s != NULL; s = s->next) { + len += sprintf(buffer + len, "%-9s ", + ax2asc(&s->nr->user_addr)); + len += sprintf(buffer + len, "%-9s ", + ax2asc(&s->nr->dest_addr)); + len += sprintf(buffer + len, "%-9s %02X/%02X %02X/%02X %2d %2d %2d %2d %3d/%03d %2d/%02d %2d/%02d %3d %3d %5ld %5ld\n", + ax2asc(&s->nr->source_addr), + s->nr->my_index, s->nr->my_id, + s->nr->your_index, s->nr->your_id, + s->nr->state, + s->nr->vs, s->nr->vr, s->nr->va, + s->nr->t1timer / PR_SLOWHZ, + s->nr->t1 / PR_SLOWHZ, + s->nr->t2timer / PR_SLOWHZ, + s->nr->t2 / PR_SLOWHZ, + s->nr->n2count, s->nr->n2, + s->nr->rtt / PR_SLOWHZ, + s->window, + s->wmem_alloc, s->rmem_alloc); + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= (offset - begin); + + if (len > length) len = length; + + return(len); +} + +static struct proto_ops nr_proto_ops = { + AF_NETROM, + + nr_create, + nr_dup, + nr_release, + nr_bind, + nr_connect, + nr_socketpair, + nr_accept, + nr_getname, + nr_read, + nr_write, + nr_select, + nr_ioctl, + nr_listen, + nr_send, + nr_recv, + nr_sendto, + nr_recvfrom, + nr_shutdown, + nr_setsockopt, + nr_getsockopt, + nr_fcntl, +}; + +static struct notifier_block nr_dev_notifier = { + nr_device_event, + 0 +}; + +void nr_proto_init(struct net_proto *pro) +{ + sock_register(nr_proto_ops.family, &nr_proto_ops); + register_netdevice_notifier(&nr_dev_notifier); + printk("G4KLX NET/ROM for Linux. Version 0.2 ALPHA for AX.25 029 for Linux 1.3.0\n"); + + nr_default.quality = NR_DEFAULT_QUAL; + nr_default.obs_count = NR_DEFAULT_OBS; + nr_default.ttl = NR_DEFAULT_TTL; + nr_default.timeout = NR_DEFAULT_T1; + nr_default.ack_delay = NR_DEFAULT_T2; + nr_default.busy_delay = NR_DEFAULT_T4; + nr_default.tries = NR_DEFAULT_N2; + nr_default.window = NR_DEFAULT_WINDOW; +} + +#endif diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c new file mode 100644 index 000000000..f9fd83f73 --- /dev/null +++ b/net/netrom/nr_dev.c @@ -0,0 +1,254 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.3.0 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from loopback.c + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/in.h> +#include <linux/if_ether.h> /* For the statistics structure. */ + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/io.h> + +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include <net/ip.h> +#include <net/arp.h> + +#include <net/ax25.h> +#include <net/netrom.h> + +/* + * Only allow IP over NET/ROM frames through if the netrom device is up. + */ + +int nr_rx_ip(struct sk_buff *skb, struct device *dev) +{ + struct enet_statistics *stats = (struct enet_statistics *)dev->priv; + + if (!dev->start) { + stats->rx_errors++; + return 0; + } + + stats->rx_packets++; + skb->protocol=htons(ETH_P_IP); + /* Spoof incoming device */ + skb->dev=dev; + + ip_rcv(skb, dev, NULL); + + return 1; +} + +/* + * We can't handle ARP so put some identification characters into the ARP + * packet so that the transmit routine can identify it, and throw it away. + */ + +static int nr_header(unsigned char *buff, struct device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len, struct sk_buff *skb) +{ + if (type == ETH_P_ARP) { + *buff++ = 0xFF; /* Mark it */ + *buff++ = 0xFE; + return 37; + } + + buff += 16; + + *buff++ = AX25_P_NETROM; + + memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len); + buff[6] &= ~LAPB_C; + buff[6] &= ~LAPB_E; + buff[6] |= SSID_SPARE; + buff += dev->addr_len; + + if (daddr != NULL) + memcpy(buff, daddr, dev->addr_len); + buff[6] &= ~LAPB_C; + buff[6] |= LAPB_E; + buff[6] |= SSID_SPARE; + buff += dev->addr_len; + + *buff++ = nr_default.ttl; + + *buff++ = NR_PROTO_IP; + *buff++ = NR_PROTO_IP; + *buff++ = 0; + *buff++ = 0; + *buff++ = NR_PROTOEXT; + + if (daddr != NULL) + return 37; + + return -37; +} + +static int nr_rebuild_header(void *buff, struct device *dev, + unsigned long raddr, struct sk_buff *skb) +{ + unsigned char *bp = (unsigned char *)buff; + + if (arp_find(bp + 24, raddr, dev, dev->pa_addr, skb)) + return 1; + + bp[23] &= ~LAPB_C; + bp[23] &= ~LAPB_E; + bp[23] |= SSID_SPARE; + + bp[30] &= ~LAPB_C; + bp[30] |= LAPB_E; + bp[30] |= SSID_SPARE; + + return 0; +} + +static int nr_set_mac_address(struct device *dev, void *addr) +{ + memcpy(dev->dev_addr, addr, dev->addr_len); + + return 0; +} + +static int nr_open(struct device *dev) +{ + dev->tbusy = 0; + dev->start = 1; + + return 0; +} + +static int nr_close(struct device *dev) +{ + dev->tbusy = 1; + dev->start = 0; + + return 0; +} + +static int nr_xmit(struct sk_buff *skb, struct device *dev) +{ + struct enet_statistics *stats = (struct enet_statistics *)dev->priv; + struct sk_buff *skbn; + + if (skb == NULL || dev == NULL) + return 0; + + if (!dev->start) { + printk("netrom: xmit call when iface is down\n"); + return 1; + } + + cli(); + + if (dev->tbusy != 0) { + sti(); + stats->tx_errors++; + return 1; + } + + dev->tbusy = 1; + + sti(); + + if (skb->data[0] != 0xFF && skb->data[1] != 0xFE) { + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + dev->tbusy = 0; + stats->tx_errors++; + return 1; + } + + if (!nr_route_frame(skbn, NULL)) { + skbn->free = 1; + kfree_skb(skbn, FREE_WRITE); + dev->tbusy = 0; + stats->tx_errors++; + return 1; + } + } + + dev_kfree_skb(skb, FREE_WRITE); + + stats->tx_packets++; + + dev->tbusy = 0; + + mark_bh(NET_BH); + + return 0; +} + +static struct enet_statistics *nr_get_stats(struct device *dev) +{ + return (struct enet_statistics *)dev->priv; +} + +int nr_init(struct device *dev) +{ + int i; + + dev->mtu = 236; /* MTU */ + dev->tbusy = 0; + dev->hard_start_xmit = nr_xmit; + dev->open = nr_open; + dev->stop = nr_close; + + dev->hard_header = nr_header; + dev->hard_header_len = 37; + dev->addr_len = 7; + dev->type = ARPHRD_NETROM; + dev->rebuild_header = nr_rebuild_header; + dev->set_mac_address = nr_set_mac_address; + + /* New-style flags. */ + dev->flags = 0; + dev->family = AF_INET; + + dev->pa_addr = 0; + dev->pa_brdaddr = 0; + dev->pa_mask = 0; + dev->pa_alen = sizeof(unsigned long); + + dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL); + + memset(dev->priv, 0, sizeof(struct enet_statistics)); + + dev->get_stats = nr_get_stats; + + /* Fill in the generic fields of the device structure. */ + for (i = 0; i < DEV_NUMBUFFS; i++) + skb_queue_head_init(&dev->buffs[i]); + + return 0; +}; + +#endif diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c new file mode 100644 index 000000000..7f0513732 --- /dev/null +++ b/net/netrom/nr_in.c @@ -0,0 +1,313 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Most of this code is based on the SDL diagrams published in the 7th + * ARRL Computer Networking Conference papers. The diagrams have mistakes + * in them, but are mostly correct. Before you modify the code could you + * read the SDL diagrams as the code is not obvious and probably very + * easy to break; + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from ax25_in.c + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/ip.h> /* For ip_rcv */ +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <net/netrom.h> + +/* + * State machine for state 1, Awaiting Connection State. + * The handling of the timer(s) is in file nr_timer.c. + * Handling of state 0 and connection release is in netrom.c. + */ +static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype) +{ + switch (frametype) { + + case NR_CONNACK: + nr_calculate_rtt(sk); + sk->window = skb->data[37]; + sk->nr->your_index = skb->data[34]; + sk->nr->your_id = skb->data[35]; + sk->nr->t1timer = 0; + sk->nr->t2timer = 0; + sk->nr->t4timer = 0; + sk->nr->vs = 0; + sk->nr->va = 0; + sk->nr->vr = 0; + sk->nr->vl = 0; + sk->nr->state = NR_STATE_3; + sk->state = TCP_ESTABLISHED; + sk->nr->n2count = 0; + /* For WAIT_SABM connections we will produce an accept ready socket here */ + if (!sk->dead) + sk->state_change(sk); + break; + + case NR_CONNACK + NR_CHOKE_FLAG: + nr_clear_tx_queue(sk); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = ECONNREFUSED; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + break; + + default: + break; + } + + return 0; +} + +/* + * State machine for state 2, Awaiting Release State. + * The handling of the timer(s) is in file nr_timer.c + * Handling of state 0 and connection release is in netrom.c. + */ +static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype) +{ + switch (frametype) { + + case NR_DISCREQ: + nr_write_internal(sk, NR_DISCACK); + break; + + case NR_DISCACK: + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = 0; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + break; + + default: + break; + } + + return 0; +} + +/* + * State machine for state 3, Connected State. + * The handling of the timer(s) is in file nr_timer.c + * Handling of state 0 and connection release is in netrom.c. + */ +static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype) +{ + struct sk_buff_head temp_queue; + struct sk_buff *skbn; + unsigned short save_vr; + unsigned short nr, ns; + int queued = 0; + + nr = skb->data[35]; + ns = skb->data[34]; + + switch (frametype) { + + case NR_CONNREQ: + nr_write_internal(sk, NR_CONNACK); + sk->nr->condition = 0x00; + sk->nr->t1timer = 0; + sk->nr->t2timer = 0; + sk->nr->t4timer = 0; + sk->nr->vs = 0; + sk->nr->va = 0; + sk->nr->vr = 0; + sk->nr->vl = 0; + break; + + case NR_DISCREQ: + nr_clear_tx_queue(sk); + nr_write_internal(sk, NR_DISCACK); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = 0; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + break; + + case NR_DISCACK: + nr_clear_tx_queue(sk); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = ECONNRESET; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + break; + + case NR_INFOACK: + case NR_INFOACK + NR_CHOKE_FLAG: + if (frametype & NR_CHOKE_FLAG) { + sk->nr->condition |= PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = nr_default.busy_delay; + } else { + sk->nr->condition &= ~PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = 0; + } + if (!nr_validate_nr(sk, nr)) { + nr_nr_error_recovery(sk); + sk->nr->state = NR_STATE_1; + break; + } + if (sk->nr->condition & PEER_RX_BUSY_CONDITION) { + nr_frames_acked(sk, nr); + } else { + nr_check_iframes_acked(sk, nr); + } + break; + + case NR_INFOACK + NR_NAK_FLAG: + case NR_INFOACK + NR_NAK_FLAG + NR_CHOKE_FLAG: + if (frametype & NR_CHOKE_FLAG) { + sk->nr->condition |= PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = nr_default.busy_delay; + } else { + sk->nr->condition &= ~PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = 0; + } + if (nr_validate_nr(sk, nr)) { + nr_frames_acked(sk, nr); + nr_send_nak_frame(sk); + } else { + nr_nr_error_recovery(sk); + sk->nr->state = NR_STATE_1; + } + break; + + case NR_INFO: + case NR_INFO + NR_CHOKE_FLAG: + case NR_INFO + NR_MORE_FLAG: + case NR_INFO + NR_CHOKE_FLAG + NR_MORE_FLAG: + if (frametype & NR_CHOKE_FLAG) { + sk->nr->condition |= PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = nr_default.busy_delay; + } else { + sk->nr->condition &= ~PEER_RX_BUSY_CONDITION; + sk->nr->t4timer = 0; + } + if (!nr_validate_nr(sk, nr)) { + nr_nr_error_recovery(sk); + sk->nr->state = NR_STATE_1; + break; + } + if (sk->nr->condition & PEER_RX_BUSY_CONDITION) { + nr_frames_acked(sk, nr); + } else { + nr_check_iframes_acked(sk, nr); + } + queued = 1; + skb_queue_head(&sk->nr->reseq_queue, skb); + if (sk->nr->condition & OWN_RX_BUSY_CONDITION) + break; + skb_queue_head_init(&temp_queue); + do { + save_vr = sk->nr->vr; + while ((skbn = skb_dequeue(&sk->nr->reseq_queue)) != NULL) { + ns = skbn->data[34]; + if (ns == sk->nr->vr) { + if (sock_queue_rcv_skb(sk, skbn) == 0) { + sk->nr->vr = (sk->nr->vr + 1) % NR_MODULUS; + } else { + sk->nr->condition |= OWN_RX_BUSY_CONDITION; + skb_queue_tail(&temp_queue, skbn); + } + } else if (nr_in_rx_window(sk, ns)) { + skb_queue_tail(&temp_queue, skbn); + } else { + skbn->free = 1; + kfree_skb(skbn, FREE_READ); + } + } + while ((skbn = skb_dequeue(&temp_queue)) != NULL) { + skb_queue_tail(&sk->nr->reseq_queue, skbn); + } + } while (save_vr != sk->nr->vr); + /* + * Window is full, ack it immediately. + */ + if (((sk->nr->vl + sk->window) % NR_MODULUS) == sk->nr->vr) { + nr_enquiry_response(sk); + } else { + if (!(sk->nr->condition & ACK_PENDING_CONDITION)) { + sk->nr->t2timer = sk->nr->t2; + sk->nr->condition |= ACK_PENDING_CONDITION; + } + } + break; + + default: + break; + } + + return queued; +} + +/* Higher level upcall for a LAPB frame */ +int nr_process_rx_frame(struct sock *sk, struct sk_buff *skb) +{ + int queued = 0, frametype; + + del_timer(&sk->timer); + + frametype = skb->data[36]; + + switch (sk->nr->state) + { + case NR_STATE_1: + queued = nr_state1_machine(sk, skb, frametype); + break; + case NR_STATE_2: + queued = nr_state2_machine(sk, skb, frametype); + break; + case NR_STATE_3: + queued = nr_state3_machine(sk, skb, frametype); + break; + default: + printk("nr_process_rx_frame: frame received - state: %d\n", sk->nr->state); + break; + } + + nr_set_timer(sk); + + return(queued); +} + +#endif diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c new file mode 100644 index 000000000..2ebdd743d --- /dev/null +++ b/net/netrom/nr_out.c @@ -0,0 +1,243 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from ax25_out.c + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <net/netrom.h> + +int nr_output(struct sock *sk, struct sk_buff *skb) +{ + skb_queue_tail(&sk->write_queue, skb); /* Throw it on the queue */ + + if (sk->nr->state == NR_STATE_3) + nr_kick(sk); + + return 0; +} + +/* + * This procedure is passed a buffer descriptor for an iframe. It builds + * the rest of the control part of the frame and then writes it out. + */ +static void nr_send_iframe(struct sock *sk, struct sk_buff *skb) +{ + unsigned char *dptr; + + if (skb == NULL) + return; + + dptr = skb->data + 34; + + *dptr++ = sk->nr->vs; + *dptr++ = sk->nr->vr; + + nr_transmit_buffer(sk, skb); +} + +void nr_send_nak_frame(struct sock *sk) +{ + struct sk_buff *skb, *skbn; + + if ((skb = skb_peek(&sk->nr->ack_queue)) == NULL) + return; + + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) + return; + + nr_send_iframe(sk, skbn); + + sk->nr->condition &= ~ACK_PENDING_CONDITION; + sk->nr->vl = sk->nr->vr; + sk->nr->t1timer = 0; +} + +void nr_kick(struct sock *sk) +{ + struct sk_buff *skb, *skbn; + int last = 1; + unsigned short start, end, next; + + del_timer(&sk->timer); + + start = (skb_peek(&sk->nr->ack_queue) == NULL) ? sk->nr->va : sk->nr->vs; + end = (sk->nr->va + sk->window) % NR_MODULUS; + + if (!(sk->nr->condition & PEER_RX_BUSY_CONDITION) && + start != end && + skb_peek(&sk->write_queue) != NULL) { + + sk->nr->vs = start; + + /* + * Transmit data until either we're out of data to send or + * the window is full. + */ + do { + /* + * Dequeue the frame and copy it. + */ + skb = skb_dequeue(&sk->write_queue); + + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skb_queue_head(&sk->write_queue, skb); + return; + } + + next = (sk->nr->vs + 1) % NR_MODULUS; + last = (next == end); + + /* + * Transmit the frame copy. + */ + nr_send_iframe(sk, skbn); + + sk->nr->vs = next; + + /* + * Requeue the original data frame. + */ + skb_queue_tail(&sk->nr->ack_queue, skb); + + } while (!last && skb_peek(&sk->write_queue) != NULL); + + sk->nr->vl = sk->nr->vr; + sk->nr->condition &= ~ACK_PENDING_CONDITION; + + if (sk->nr->t1timer == 0) { + sk->nr->t1timer = sk->nr->t1 = nr_calculate_t1(sk); + } + } + + nr_set_timer(sk); +} + +void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) +{ + unsigned char *dptr; + + dptr = skb->data + 16; + + *dptr++ = AX25_P_NETROM; + + memcpy(dptr, &sk->nr->source_addr, sizeof(ax25_address)); + dptr[6] &= ~LAPB_C; + dptr[6] &= ~LAPB_E; + dptr[6] |= SSID_SPARE; + dptr += 7; + + memcpy(dptr, &sk->nr->dest_addr, sizeof(ax25_address)); + dptr[6] &= ~LAPB_C; + dptr[6] |= LAPB_E; + dptr[6] |= SSID_SPARE; + dptr += 7; + + *dptr++ = nr_default.ttl; + + skb->arp = 1; + + if (!nr_route_frame(skb, NULL)) { + kfree_skb(skb, FREE_WRITE); + + sk->state = TCP_CLOSE; + sk->err = ENETUNREACH; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + } +} + +/* + * The following routines are taken from page 170 of the 7th ARRL Computer + * Networking Conference paper, as is the whole state machine. + */ + +void nr_nr_error_recovery(struct sock *sk) +{ + nr_establish_data_link(sk); +} + +void nr_establish_data_link(struct sock *sk) +{ + sk->nr->condition = 0x00; + sk->nr->n2count = 0; + + nr_write_internal(sk, NR_CONNREQ); + + sk->nr->t2timer = 0; + sk->nr->t1timer = sk->nr->t1 = nr_calculate_t1(sk); +} + +/* + * Never send a NAK when we are CHOKEd. + */ +void nr_enquiry_response(struct sock *sk) +{ + int frametype = NR_INFOACK; + + if (sk->nr->condition & OWN_RX_BUSY_CONDITION) { + frametype += NR_CHOKE_FLAG; + } else { + if (skb_peek(&sk->nr->reseq_queue) != NULL) { + frametype += NR_NAK_FLAG; + } + } + + nr_write_internal(sk, frametype); + + sk->nr->vl = sk->nr->vr; + sk->nr->condition &= ~ACK_PENDING_CONDITION; +} + +void nr_check_iframes_acked(struct sock *sk, unsigned short nr) +{ + if (sk->nr->vs == nr) { + nr_frames_acked(sk, nr); + nr_requeue_frames(sk); + nr_calculate_rtt(sk); + sk->nr->t1timer = 0; + sk->nr->n2count = 0; + } else { + if (sk->nr->va != nr) { + nr_frames_acked(sk, nr); + nr_requeue_frames(sk); + sk->nr->t1timer = sk->nr->t1 = nr_calculate_t1(sk); + } + } +} + +#endif diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c new file mode 100644 index 000000000..356d3c0f6 --- /dev/null +++ b/net/netrom/nr_route.c @@ -0,0 +1,750 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) First attempt. + * + * TO DO + * Sort out the which pointer when shuffling entries in the routes + * section. Also reset the which pointer when a route becomes "good" + * again, ie when a NODES broadcast is processed via calls to + * nr_add_node(). + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/termios.h> /* For TIOCINQ/OUTQ */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <net/netrom.h> + +static int nr_neigh_no = 1; + +static struct nr_node *nr_node_list = NULL; +static struct nr_neigh *nr_neigh_list = NULL; + +/* + * Add a new route to a node, and in the process add the node and the + * neighbour if it is new. + */ +static int nr_add_node(ax25_address *nr, char *mnemonic, ax25_address *ax25, + struct device *dev, int quality, int obs_count) +{ + struct nr_node *nr_node; + struct nr_neigh *nr_neigh; + struct nr_route nr_route; + unsigned long flags; + int i, found; + + for (nr_node = nr_node_list; nr_node != NULL; nr_node = nr_node->next) + if (ax25cmp(nr, &nr_node->callsign) == 0) + break; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) + if (ax25cmp(ax25, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) + break; + + if (quality == 0 && nr_neigh != NULL && nr_node != NULL) + return 0; + + if (nr_neigh == NULL) { + if ((nr_neigh = (struct nr_neigh *)kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + memcpy(&nr_neigh->callsign, ax25, sizeof(ax25_address)); + + nr_neigh->dev = dev; + nr_neigh->quality = nr_default.quality; + nr_neigh->locked = 0; + nr_neigh->count = 0; + nr_neigh->number = nr_neigh_no++; + + save_flags(flags); + cli(); + + nr_neigh->next = nr_neigh_list; + nr_neigh_list = nr_neigh; + + restore_flags(flags); + } + + if (nr_node == NULL) { + if ((nr_node = (struct nr_node *)kmalloc(sizeof(*nr_node), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + memcpy(&nr_node->callsign, nr, sizeof(ax25_address)); + memcpy(&nr_node->mnemonic, mnemonic, sizeof(nr_node->mnemonic)); + + nr_node->which = 0; + nr_node->count = 1; + + nr_node->routes[0].quality = quality; + nr_node->routes[0].obs_count = obs_count; + nr_node->routes[0].neighbour = nr_neigh->number; + + save_flags(flags); + cli(); + + nr_node->next = nr_node_list; + nr_node_list = nr_node; + + restore_flags(flags); + + nr_neigh->count++; + + return 0; + } + + for (found = 0, i = 0; i < nr_node->count; i++) { + if (nr_node->routes[i].neighbour == nr_neigh->number) { + nr_node->routes[i].quality = quality; + nr_node->routes[i].obs_count = obs_count; + found = 1; + break; + } + } + + if (!found) { + /* We have space at the bottom, slot it in */ + if (nr_node->count < 3) { + nr_node->routes[2] = nr_node->routes[1]; + nr_node->routes[1] = nr_node->routes[0]; + + nr_node->routes[0].quality = quality; + nr_node->routes[0].obs_count = obs_count; + nr_node->routes[0].neighbour = nr_neigh->number; + + nr_node->count++; + nr_neigh->count++; + } else { + /* It must be better than the worst */ + if (quality > nr_node->routes[2].quality) { + nr_node->routes[2].quality = quality; + nr_node->routes[2].obs_count = obs_count; + nr_node->routes[2].neighbour = nr_neigh->number; + + nr_neigh->count++; + } + } + } + + /* Now re-sort the routes in quality order */ + switch (nr_node->count) { + case 3: + if (nr_node->routes[1].quality > nr_node->routes[0].quality) { + switch (nr_node->which) { + case 0: nr_node->which = 1; break; + case 1: nr_node->which = 0; break; + default: break; + } + nr_route = nr_node->routes[0]; + nr_node->routes[0] = nr_node->routes[1]; + nr_node->routes[1] = nr_route; + } + if (nr_node->routes[2].quality > nr_node->routes[1].quality) { + switch (nr_node->which) { + case 1: nr_node->which = 2; break; + case 2: nr_node->which = 1; break; + default: break; + } + nr_route = nr_node->routes[1]; + nr_node->routes[1] = nr_node->routes[2]; + nr_node->routes[2] = nr_route; + } + case 2: + if (nr_node->routes[1].quality > nr_node->routes[0].quality) { + switch (nr_node->which) { + case 0: nr_node->which = 1; break; + case 1: nr_node->which = 0; break; + default: break; + } + nr_route = nr_node->routes[0]; + nr_node->routes[0] = nr_node->routes[1]; + nr_node->routes[1] = nr_route; + } + case 1: + break; + } + + for (i = 0; i < nr_node->count; i++) { + if (nr_node->routes[i].neighbour == nr_neigh->number) { + if (i < nr_node->which) + nr_node->which = i; + break; + } + } + + return 0; +} + +static void nr_remove_node(struct nr_node *nr_node) +{ + struct nr_node *s; + unsigned long flags; + + save_flags(flags); + cli(); + + if ((s = nr_node_list) == nr_node) { + nr_node_list = nr_node->next; + restore_flags(flags); + kfree_s(nr_node, sizeof(struct nr_node)); + return; + } + + while (s != NULL && s->next != NULL) { + if (s->next == nr_node) { + s->next = nr_node->next; + restore_flags(flags); + kfree_s(nr_node, sizeof(struct nr_node)); + return; + } + + s = s->next; + } + + restore_flags(flags); +} + +static void nr_remove_neigh(struct nr_neigh *nr_neigh) +{ + struct nr_neigh *s; + unsigned long flags; + + save_flags(flags); + cli(); + + if ((s = nr_neigh_list) == nr_neigh) { + nr_neigh_list = nr_neigh->next; + restore_flags(flags); + kfree_s(nr_neigh, sizeof(struct nr_neigh)); + return; + } + + while (s != NULL && s->next != NULL) { + if (s->next == nr_neigh) { + s->next = nr_neigh->next; + restore_flags(flags); + kfree_s(nr_neigh, sizeof(struct nr_neigh)); + return; + } + + s = s->next; + } + + restore_flags(flags); +} + +/* + * "Delete" a node. Strictly speaking remove a route to a node. The node + * is only deleted if no routes are left to it. + */ +static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct device *dev) +{ + struct nr_node *nr_node; + struct nr_neigh *nr_neigh; + int i; + + for (nr_node = nr_node_list; nr_node != NULL; nr_node = nr_node->next) + if (ax25cmp(callsign, &nr_node->callsign) == 0) + break; + + if (nr_node == NULL) return -EINVAL; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) + if (ax25cmp(neighbour, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) + break; + + if (nr_neigh == NULL) return -EINVAL; + + for (i = 0; i < nr_node->count; i++) { + if (nr_node->routes[i].neighbour == nr_neigh->number) { + nr_neigh->count--; + + if (nr_neigh->count == 0 && !nr_neigh->locked) + nr_remove_neigh(nr_neigh); + + nr_node->count--; + + if (nr_node->count == 0) { + nr_remove_node(nr_node); + } else { + switch (i) { + case 0: + nr_node->routes[0] = nr_node->routes[1]; + case 1: + nr_node->routes[1] = nr_node->routes[2]; + case 2: + break; + } + } + + return 0; + } + } + + return -EINVAL; +} + +/* + * Lock a neighbour with a quality. + */ +static int nr_add_neigh(ax25_address *callsign, struct device *dev, unsigned int quality) +{ + struct nr_neigh *nr_neigh; + unsigned long flags; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) { + if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) { + nr_neigh->quality = quality; + nr_neigh->locked = 1; + return 0; + } + } + + if ((nr_neigh = (struct nr_neigh *)kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + memcpy(&nr_neigh->callsign, callsign, sizeof(ax25_address)); + + nr_neigh->dev = dev; + nr_neigh->quality = quality; + nr_neigh->locked = 1; + nr_neigh->count = 0; + nr_neigh->number = nr_neigh_no++; + + save_flags(flags); + cli(); + + nr_neigh->next = nr_neigh_list; + nr_neigh_list = nr_neigh; + + restore_flags(flags); + + return 0; +} + +/* + * "Delete" a neighbour. The neighbour is only removed if the number + * of nodes that may use it is zero. + */ +static int nr_del_neigh(ax25_address *callsign, struct device *dev, unsigned int quality) +{ + struct nr_neigh *nr_neigh; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) + if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) + break; + + if (nr_neigh == NULL) return -EINVAL; + + nr_neigh->quality = quality; + nr_neigh->locked = 0; + + if (nr_neigh->count == 0) + nr_remove_neigh(nr_neigh); + + return 0; +} + +/* + * Decrement the obsolescence count by one. If a route is reduced to a + * count of zero, remove it. Also remove any unlocked neighbours with + * zero nodes routing via it. + */ +static int nr_dec_obs(void) +{ + struct nr_neigh *t, *nr_neigh; + struct nr_node *s, *nr_node; + int i; + + nr_node = nr_node_list; + + while (nr_node != NULL) { + s = nr_node; + nr_node = nr_node->next; + + for (i = 0; i < s->count; i++) { + switch (s->routes[i].obs_count) { + + case 0: /* A locked entry */ + break; + + case 1: /* From 1 -> 0 */ + nr_neigh = nr_neigh_list; + + while (nr_neigh != NULL) { + t = nr_neigh; + nr_neigh = nr_neigh->next; + + if (t->number == s->routes[i].neighbour) { + t->count--; + + if (t->count == 0 && !t->locked) + nr_remove_neigh(t); + + break; + } + } + + s->count--; + + switch (i) { + case 0: + s->routes[0] = s->routes[1]; + case 1: + s->routes[1] = s->routes[2]; + case 2: + break; + } + break; + + default: + s->routes[i].obs_count--; + break; + + } + } + + if (s->count <= 0) + nr_remove_node(s); + } + + return 0; +} + +/* + * A device has been removed. Remove its routes and neighbours. + */ +void nr_rt_device_down(struct device *dev) +{ + struct nr_neigh *s, *nr_neigh = nr_neigh_list; + struct nr_node *t, *nr_node; + int i; + + while (nr_neigh != NULL) { + s = nr_neigh; + nr_neigh = nr_neigh->next; + + if (s->dev == dev) { + nr_node = nr_node_list; + + while (nr_node != NULL) { + t = nr_node; + nr_node = nr_node->next; + + for (i = 0; i < t->count; i++) { + if (t->routes[i].neighbour == s->number) { + t->count--; + + switch (i) { + case 0: + t->routes[0] = t->routes[1]; + case 1: + t->routes[1] = t->routes[2]; + case 2: + break; + } + } + } + + if (t->count <= 0) + nr_remove_node(t); + } + + nr_remove_neigh(s); + } + } +} + +/* + * Check that the device given is a valid AX.25 interface that is "up". + */ +static struct device *nr_ax25_dev_get(char *devname) +{ + struct device *dev; + + if ((dev = dev_get(devname)) == NULL) + return NULL; + + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) + return dev; + + return NULL; +} + +/* + * Find the first active NET/ROM device, usually "nr0". + */ +struct device *nr_dev_first(void) +{ + struct device *dev, *first = NULL; + + for (dev = dev_base; dev != NULL; dev = dev->next) + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) + if (first == NULL || strncmp(dev->name, first->name, 3) < 0) + first = dev; + + return first; +} + +/* + * Find the NET/ROM device for the given callsign. + */ +struct device *nr_dev_get(ax25_address *addr) +{ + struct device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) + return dev; + + return NULL; +} + +/* + * Handle the ioctls that control the routing functions. + */ +int nr_rt_ioctl(unsigned int cmd, void *arg) +{ + struct nr_node_struct nr_node; + struct nr_neigh_struct nr_neigh; + struct device *dev; + int err; + + switch (cmd) { + + case SIOCNRADDNODE: + if ((err = verify_area(VERIFY_READ, arg, sizeof(struct nr_node_struct))) != 0) + return err; + memcpy_fromfs(&nr_node, arg, sizeof(struct nr_node_struct)); + if ((dev = nr_ax25_dev_get(nr_node.device)) == NULL) + return -EINVAL; + return nr_add_node(&nr_node.callsign, nr_node.mnemonic, + &nr_node.neighbour, dev, nr_node.quality, nr_node.obs_count); + + case SIOCNRDELNODE: + if ((err = verify_area(VERIFY_READ, arg, sizeof(struct nr_node_struct))) != 0) + return err; + memcpy_fromfs(&nr_node, arg, sizeof(struct nr_node_struct)); + if ((dev = nr_ax25_dev_get(nr_node.device)) == NULL) + return -EINVAL; + return nr_del_node(&nr_node.callsign, &nr_node.neighbour, dev); + + case SIOCNRADDNEIGH: + if ((err = verify_area(VERIFY_READ, arg, sizeof(struct nr_neigh_struct))) != 0) + return err; + memcpy_fromfs(&nr_neigh, arg, sizeof(struct nr_neigh_struct)); + if ((dev = nr_ax25_dev_get(nr_neigh.device)) == NULL) + return -EINVAL; + return nr_add_neigh(&nr_neigh.callsign, dev, nr_neigh.quality); + + case SIOCNRDELNEIGH: + if ((err = verify_area(VERIFY_READ, arg, sizeof(struct nr_neigh_struct))) != 0) + return err; + memcpy_fromfs(&nr_neigh, arg, sizeof(struct nr_neigh_struct)); + if ((dev = nr_ax25_dev_get(nr_neigh.device)) == NULL) + return -EINVAL; + return nr_del_neigh(&nr_neigh.callsign, dev, nr_neigh.quality); + + case SIOCNRDECOBS: + return nr_dec_obs(); + } + + return 0; +} + +/* + * A level 2 link has timed out, therefore it appears to be a poor link, + * then don't use that neighbour until it is reset. + */ +void nr_link_failed(ax25_address *callsign, struct device *dev) +{ + struct nr_neigh *nr_neigh; + struct nr_node *nr_node; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) + if (ax25cmp(&nr_neigh->callsign, callsign) == 0 && nr_neigh->dev == dev) + break; + + if (nr_neigh == NULL) return; + + for (nr_node = nr_node_list; nr_node != NULL; nr_node = nr_node->next) + if (nr_node->which >= nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh->number) + nr_node->which++; +} + +/* + * Route a frame to an appropriate AX.25 connection. A NULL dev means + * that the frame was generated internally. + */ +int nr_route_frame(struct sk_buff *skb, struct device *device) +{ + ax25_address *ax25_src, *ax25_dest; + ax25_address *nr_src, *nr_dest; + struct nr_neigh *nr_neigh; + struct nr_node *nr_node; + struct device *dev; + + ax25_dest = (ax25_address *)(skb->data + 1); + ax25_src = (ax25_address *)(skb->data + 8); + nr_src = (ax25_address *)(skb->data + 17); + nr_dest = (ax25_address *)(skb->data + 24); + + if (device != NULL) + nr_add_node(nr_src, "", ax25_src, device, 0, nr_default.obs_count); + + if ((dev = nr_dev_get(nr_dest)) != NULL) /* Its for me */ + return nr_rx_frame(skb, dev); + + /* Its Time-To-Live has expired */ + if (--skb->data[31] == 0) + return 0; + + for (nr_node = nr_node_list; nr_node != NULL; nr_node = nr_node->next) + if (ax25cmp(nr_dest, &nr_node->callsign) == 0) + break; + + if (nr_node == NULL || nr_node->which >= nr_node->count) + return 0; + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) + if (nr_neigh->number == nr_node->routes[nr_node->which].neighbour) + break; + + if (nr_neigh == NULL) + return 0; + + if ((dev = nr_dev_first()) == NULL) + return 0; + + if (device != NULL) + skb->len += dev->hard_header_len; + + ax25_send_frame(skb, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->dev); + + return 1; +} + +int nr_nodes_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct nr_node *nr_node; + int len = 0; + off_t pos = 0; + off_t begin = 0; + int i; + + cli(); + + len += sprintf(buffer, "callsign mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n"); + + for (nr_node = nr_node_list; nr_node != NULL; nr_node = nr_node->next) { + len += sprintf(buffer + len, "%-9s %-7s %d %d ", + ax2asc(&nr_node->callsign), + nr_node->mnemonic, + nr_node->which + 1, + nr_node->count); + + for (i = 0; i < nr_node->count; i++) { + len += sprintf(buffer + len, " %3d %d %05d", + nr_node->routes[i].quality, + nr_node->routes[i].obs_count, + nr_node->routes[i].neighbour); + } + + len += sprintf(buffer + len, "\n"); + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= (offset - begin); + + if (len > length) len = length; + + return(len); +} + +int nr_neigh_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct nr_neigh *nr_neigh; + int len = 0; + off_t pos = 0; + off_t begin = 0; + + cli(); + + len += sprintf(buffer, "addr callsign dev qual lock count\n"); + + for (nr_neigh = nr_neigh_list; nr_neigh != NULL; nr_neigh = nr_neigh->next) { + len += sprintf(buffer + len, "%05d %-9s %-3s %3d %d %3d\n", + nr_neigh->number, + ax2asc(&nr_neigh->callsign), + nr_neigh->dev ? nr_neigh->dev->name : "???", + nr_neigh->quality, + nr_neigh->locked, + nr_neigh->count); + + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + + if (pos > offset + length) + break; + } + + sti(); + + *start = buffer + (offset - begin); + len -= (offset - begin); + + if (len > length) len = length; + + return(len); +} + +#endif diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c new file mode 100644 index 000000000..3d5c2fc12 --- /dev/null +++ b/net/netrom/nr_subr.c @@ -0,0 +1,295 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from ax25_subr.c + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <net/netrom.h> + +/* + * This routine purges the input queue of frames. + */ +void nr_clear_tx_queue(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sk->write_queue)) != NULL) { + skb->sk = sk; + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } + + while ((skb = skb_dequeue(&sk->nr->ack_queue)) != NULL) { + skb->sk = sk; + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } + + while ((skb = skb_dequeue(&sk->nr->reseq_queue)) != NULL) { + skb->free = 1; + kfree_skb(skb, FREE_READ); + } +} + +/* + * This routine purges the input queue of those frames that have been + * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the + * SDL diagram. + */ +void nr_frames_acked(struct sock *sk, unsigned short nr) +{ + struct sk_buff *skb; + + /* + * Remove all the ack-ed frames from the ack queue. + */ + if (sk->nr->va != nr) { + while (skb_peek(&sk->nr->ack_queue) != NULL && sk->nr->va != nr) { + skb = skb_dequeue(&sk->nr->ack_queue); + skb->sk = sk; + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + sk->nr->va = (sk->nr->va + 1) % NR_MODULUS; + } + } +} + +/* + * Requeue all the un-ack-ed frames on the output queue to be picked + * up by nr_kick called from the timer. This arrangement handles the + * possibility of an empty output queue. + */ +void nr_requeue_frames(struct sock *sk) +{ + struct sk_buff *skb, *skb_prev = NULL; + + while ((skb = skb_dequeue(&sk->nr->ack_queue)) != NULL) { + if (skb_prev == NULL) + skb_queue_head(&sk->write_queue, skb); + else + skb_append(skb_prev, skb); + skb_prev = skb; + } +} + +/* + * Validate that the value of nr is between va and vs. Return true or + * false for testing. + */ +int nr_validate_nr(struct sock *sk, unsigned short nr) +{ + unsigned short vc = sk->nr->va; + + while (vc != sk->nr->vs) { + if (nr == vc) return 1; + vc = (vc + 1) % NR_MODULUS; + } + + if (nr == sk->nr->vs) return 1; + + return 0; +} + +/* + * Check that ns is within the receive window. + */ +int nr_in_rx_window(struct sock *sk, unsigned short ns) +{ + unsigned short vc = sk->nr->vl; + unsigned short vt = (sk->nr->vl + sk->window) % NR_MODULUS; + + while (vc != vt) { + if (ns == vc) return 1; + vc = (vc + 1) % NR_MODULUS; + } + + if (ns == vt) return 1; + + return 0; +} + +/* + * This routine is called when the HDLC layer internally generates a + * control frame. + */ +void nr_write_internal(struct sock *sk, int frametype) +{ + struct sk_buff *skb; + unsigned char *dptr; + int len; + + switch (frametype & 0x0F) { + case NR_CONNREQ: len = 52; break; + case NR_CONNACK: len = 38; break; + case NR_DISCREQ: len = 37; break; + case NR_DISCACK: len = 37; break; + case NR_INFOACK: len = 37; break; + default: + printk("nr_write_internal: invalid frame type %d\n", frametype); + return; + } + + if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + return; + + dptr = skb->data + 32; + + switch (frametype & 0x0F) { + + case NR_CONNREQ: + *dptr++ = sk->nr->my_index; + *dptr++ = sk->nr->my_id; + *dptr++ = 0; + *dptr++ = 0; + *dptr++ = frametype; + *dptr++ = sk->window; + memcpy(dptr, &sk->nr->user_addr, sizeof(ax25_address)); + dptr[6] &= ~LAPB_C; + dptr[6] &= ~LAPB_E; + dptr[6] |= SSID_SPARE; + dptr += 7; + memcpy(dptr, &sk->nr->source_addr, sizeof(ax25_address)); + dptr[6] &= ~LAPB_C; + dptr[6] &= ~LAPB_E; + dptr[6] |= SSID_SPARE; + break; + + case NR_CONNACK: + *dptr++ = sk->nr->your_index; + *dptr++ = sk->nr->your_id; + *dptr++ = sk->nr->my_index; + *dptr++ = sk->nr->my_id; + *dptr++ = frametype; + *dptr++ = sk->window; + break; + + case NR_DISCREQ: + case NR_DISCACK: + *dptr++ = sk->nr->your_index; + *dptr++ = sk->nr->your_id; + *dptr++ = 0; + *dptr++ = 0; + *dptr++ = frametype; + break; + + case NR_INFOACK: + *dptr++ = sk->nr->your_index; + *dptr++ = sk->nr->your_id; + *dptr++ = 0; + *dptr++ = sk->nr->vr; + *dptr++ = frametype; + break; + } + + skb->free = 1; + skb->len = len; + + nr_transmit_buffer(sk, skb); +} + +/* + * This routine is called when a Connect Acknowledge with the Choke Flag + * set is needed to refuse a connection. + */ +void nr_transmit_dm(struct sk_buff *skb) +{ + struct sk_buff *skbn; + unsigned char *dptr; + + if ((skbn = alloc_skb(38, GFP_ATOMIC)) == NULL) + return; + + dptr = skbn->data + 16; + + *dptr++ = AX25_P_NETROM; + + memcpy(dptr, skb->data + 24, 7); + dptr[6] &= ~LAPB_C; + dptr[6] &= ~LAPB_E; + dptr[6] |= SSID_SPARE; + dptr += 7; + + memcpy(dptr, skb->data + 17, 7); + dptr[6] &= ~LAPB_C; + dptr[6] |= LAPB_E; + dptr[6] |= SSID_SPARE; + dptr += 7; + + *dptr++ = nr_default.ttl; + + *dptr++ = skb->data[32]; + *dptr++ = skb->data[33]; + *dptr++ = 0; + *dptr++ = 0; + *dptr++ = NR_CONNACK + NR_CHOKE_FLAG; + *dptr++ = 0; + + skbn->free = 1; + skbn->len = 38; + skbn->sk = NULL; + + if (!nr_route_frame(skbn, NULL)) + kfree_skb(skbn, FREE_WRITE); +} + +/* + * Exponential backoff for NET/ROM + */ +unsigned short nr_calculate_t1(struct sock *sk) +{ + int n, t; + + for (t = 2, n = 0; n < sk->nr->n2count; n++) + t *= 2; + + return t * sk->nr->rtt; +} + +/* + * Calculate the Round Trip Time + */ +void nr_calculate_rtt(struct sock *sk) +{ + if (sk->nr->n2count == 0) + sk->nr->rtt = (9 * sk->nr->rtt + sk->nr->t1 - sk->nr->t1timer) / 10; + + /* Don't go below one second */ + if (sk->nr->rtt < 1 * PR_SLOWHZ) + sk->nr->rtt = 1 * PR_SLOWHZ; +} + +#endif diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c new file mode 100644 index 000000000..2e9269f13 --- /dev/null +++ b/net/netrom/nr_timer.c @@ -0,0 +1,192 @@ +/* + * NET/ROM release 002 + * + * This is ALPHA test software. This code may break your machine, randomly fail to work with new + * releases, misbehave and/or generally screw up. It might even work. + * + * This code REQUIRES 1.2.1 or higher/ NET3.029 + * + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * NET/ROM 001 Jonathan(G4KLX) Cloned from ax25_timer.c + */ + +#include <linux/config.h> +#ifdef CONFIG_NETROM +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <net/ax25.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <net/netrom.h> + +static void nr_timer(unsigned long); + +/* + * Linux set/reset timer routines + */ +void nr_set_timer(struct sock *sk) +{ + unsigned long flags; + + save_flags(flags); + cli(); + del_timer(&sk->timer); + restore_flags(flags); + + sk->timer.next = sk->timer.prev = NULL; + sk->timer.data = (unsigned long)sk; + sk->timer.function = &nr_timer; + + sk->timer.expires = 10; + add_timer(&sk->timer); +} + +static void nr_reset_timer(struct sock *sk) +{ + unsigned long flags; + + save_flags(flags); + cli(); + del_timer(&sk->timer); + restore_flags(flags); + + sk->timer.data = (unsigned long)sk; + sk->timer.function = &nr_timer; + sk->timer.expires = 10; + add_timer(&sk->timer); +} + +/* + * NET/ROM TIMER + * + * This routine is called every 500ms. Decrement timer by this + * amount - if expired then process the event. + */ +static void nr_timer(unsigned long param) +{ + struct sock *sk = (struct sock *)param; + + switch (sk->nr->state) { + case NR_STATE_0: + /* Magic here: If we listen() and a new link dies before it + is accepted() it isnt 'dead' so doesnt get removed. */ + if (sk->dead) { + del_timer(&sk->timer); + nr_destroy_socket(sk); + return; + } + break; + + case NR_STATE_3: + /* + * Check for the state of the receive buffer. + */ + if (sk->rmem_alloc < (sk->rcvbuf / 2) && (sk->nr->condition & OWN_RX_BUSY_CONDITION)) { + sk->nr->condition &= ~OWN_RX_BUSY_CONDITION; + nr_write_internal(sk, NR_INFOACK); + sk->nr->condition &= ~ACK_PENDING_CONDITION; + sk->nr->vl = sk->nr->vr; + break; + } + /* + * Check for frames to transmit. + */ + nr_kick(sk); + break; + + default: + break; + } + + if (sk->nr->t2timer > 0 && --sk->nr->t2timer == 0) { + if (sk->nr->state == NR_STATE_3) { + if (sk->nr->condition & ACK_PENDING_CONDITION) { + sk->nr->condition &= ~ACK_PENDING_CONDITION; + nr_enquiry_response(sk); + } + } + } + + if (sk->nr->t4timer > 0 && --sk->nr->t4timer == 0) { + sk->nr->condition &= ~PEER_RX_BUSY_CONDITION; + } + + if (sk->nr->t1timer == 0 || --sk->nr->t1timer > 0) { + nr_reset_timer(sk); + return; + } + + switch (sk->nr->state) { + case NR_STATE_1: + if (sk->nr->n2count == sk->nr->n2) { + nr_clear_tx_queue(sk); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = ETIMEDOUT; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + } else { + sk->nr->n2count++; + nr_write_internal(sk, NR_CONNREQ); + } + break; + + case NR_STATE_2: + if (sk->nr->n2count == sk->nr->n2) { + nr_clear_tx_queue(sk); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = ETIMEDOUT; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + } else { + sk->nr->n2count++; + nr_write_internal(sk, NR_DISCREQ); + } + break; + + case NR_STATE_3: + if (sk->nr->n2count == sk->nr->n2) { + nr_clear_tx_queue(sk); + sk->nr->state = NR_STATE_0; + sk->state = TCP_CLOSE; + sk->err = ETIMEDOUT; + if (!sk->dead) + sk->state_change(sk); + sk->dead = 1; + } else { + sk->nr->n2count++; + nr_requeue_frames(sk); + } + break; + } + + sk->nr->t1timer = sk->nr->t1 = nr_calculate_t1(sk); + + nr_set_timer(sk); +} + +#endif diff --git a/net/protocols.c b/net/protocols.c index ba580aaa4..76def9857 100644 --- a/net/protocols.c +++ b/net/protocols.c @@ -13,38 +13,64 @@ #define CONFIG_UNIX /* always present... */ #ifdef CONFIG_UNIX -#include "unix/unix.h" +#include <net/unix.h> #endif #ifdef CONFIG_INET #include <linux/inet.h> #endif #ifdef CONFIG_IPX -#include "inet/ipxcall.h" -#include "inet/p8022call.h" +#include <net/ipxcall.h> +#include <net/p8022call.h> #endif #ifdef CONFIG_AX25 -#include "inet/ax25call.h" +#include <net/ax25call.h> +#ifdef CONFIG_NETROM +#include <net/nrcall.h> +#endif +#endif +#ifdef CONFIG_ATALK +#ifndef CONFIG_IPX +#include <net/p8022call.h> +#endif +#include <net/atalkcall.h> +#endif +#include <net/psnapcall.h> +#ifdef CONFIG_TR +#include <linux/netdevice.h> +#include <linux/trdevice.h> +extern void rif_init(struct net_proto *); #endif - /* * Protocol Table */ struct net_proto protocols[] = { #ifdef CONFIG_UNIX - { "UNIX", unix_proto_init }, + { "UNIX", unix_proto_init }, /* Unix domain socket family */ #endif -#ifdef CONFIG_IPX - { "IPX", ipx_proto_init }, - { "802.2", p8022_proto_init }, +#if defined(CONFIG_IPX)||defined(CONFIG_ATALK) + { "802.2", p8022_proto_init }, /* 802.2 demultiplexor */ + { "SNAP", snap_proto_init }, /* SNAP demultiplexor */ #endif +#ifdef CONFIG_TR + { "RIF", rif_init }, /* RIF for Token ring */ +#endif #ifdef CONFIG_AX25 { "AX.25", ax25_proto_init }, +#ifdef CONFIG_NETROM + { "NET/ROM", nr_proto_init }, +#endif #endif #ifdef CONFIG_INET - { "INET", inet_proto_init }, + { "INET", inet_proto_init }, /* TCP/IP */ +#endif +#ifdef CONFIG_IPX + { "IPX", ipx_proto_init }, /* IPX */ +#endif +#ifdef CONFIG_ATALK + { "DDP", atalk_proto_init }, /* Netatalk Appletalk driver */ #endif - { NULL, NULL } + { NULL, NULL } /* End marker */ }; diff --git a/net/socket.c b/net/socket.c index ad940519e..c036b3dca 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1,7 +1,7 @@ /* * NET An implementation of the SOCKET network access protocol. * - * Version: @(#)socket.c 1.0.5 05/25/93 + * Version: @(#)socket.c 1.1.93 18/02/95 * * Authors: Orest Zborowski, <obz@Kodak.COM> * Ross Biro, <bir7@leland.Stanford.Edu> @@ -20,6 +20,18 @@ * Rob Janssen : Allow 0 length sends. * Alan Cox : Asynchronous I/O support (cribbed from the * tty drivers). + * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) + * Jeff Uphoff : Made max number of sockets command-line + * configurable. + * Matti Aarnio : Made the number of sockets dynamic, + * to be allocated when needed, and mr. + * Uphoff's max is used as max to be + * allowed to allocate. + * Linus : Argh. removed all the socket allocation + * altogether: it's in the inode now. + * Alan Cox : Made sock_alloc()/sock_release() public + * for NetROM and future kernel nfsd type + * stuff. * * * This program is free software; you can redistribute it and/or @@ -40,6 +52,7 @@ #include <linux/signal.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/mm.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/stat.h> @@ -58,8 +71,7 @@ static int sock_read(struct inode *inode, struct file *file, char *buf, int size); static int sock_write(struct inode *inode, struct file *file, char *buf, int size); -static int sock_readdir(struct inode *inode, struct file *file, - struct dirent *dirent, int count); + static void sock_close(struct inode *inode, struct file *file); static int sock_select(struct inode *inode, struct file *file, int which, select_table *seltable); static int sock_ioctl(struct inode *inode, struct file *file, @@ -77,7 +89,7 @@ static struct file_operations socket_file_ops = { sock_lseek, sock_read, sock_write, - sock_readdir, + NULL, /* readdir */ sock_select, sock_ioctl, NULL, /* mmap */ @@ -88,20 +100,13 @@ static struct file_operations socket_file_ops = { }; /* - * The list of sockets - make this atomic. - */ -static struct socket sockets[NSOCKETS]; -/* - * Used to wait for a socket. - */ -static struct wait_queue *socket_wait_free = NULL; -/* * The protocol list. Each protocol is registered in here. */ static struct proto_ops *pops[NPROTO]; - -#define last_socket (sockets + NSOCKETS - 1) - +/* + * Statistics counters of the socket lists + */ +static int sockets_in_use = 0; /* * Support routines. Move socket addresses back and forth across the kernel/user @@ -188,25 +193,13 @@ static int get_fd(struct inode *inode) /* * Go from an inode to its socket slot. + * + * The original socket implementation wasn't very clever, which is + * why this exists at all.. */ - -struct socket *socki_lookup(struct inode *inode) +inline struct socket *socki_lookup(struct inode *inode) { - struct socket *sock; - - if ((sock = inode->i_socket) != NULL) - { - if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) - return sock; - printk("socket.c: uhhuh. stale inode->i_socket pointer\n"); - } - for (sock = sockets; sock <= last_socket; ++sock) - if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) - { - printk("socket.c: uhhuh. Found socket despite no inode->i_socket pointer\n"); - return(sock); - } - return(NULL); + return &inode->u.socket_i; } /* @@ -216,82 +209,52 @@ struct socket *socki_lookup(struct inode *inode) static inline struct socket *sockfd_lookup(int fd, struct file **pfile) { struct file *file; + struct inode *inode; if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd])) - return(NULL); + return NULL; + + inode = file->f_inode; + if (!inode || !inode->i_sock) + return NULL; if (pfile) *pfile = file; - return(socki_lookup(file->f_inode)); + return socki_lookup(inode); } /* - * Allocate a socket. Wait if we are out of sockets. + * Allocate a socket. */ -static struct socket *sock_alloc(int wait) +struct socket *sock_alloc(void) { - struct socket *sock; - - while (1) - { - cli(); - for (sock = sockets; sock <= last_socket; ++sock) - { - if (sock->state == SS_FREE) - { - /* - * Got one.. - */ - sock->state = SS_UNCONNECTED; - sti(); - sock->flags = 0; - sock->ops = NULL; - sock->data = NULL; - sock->conn = NULL; - sock->iconn = NULL; - sock->fasync_list = NULL; - /* - * This really shouldn't be necessary, but everything - * else depends on inodes, so we grab it. - * Sleeps are also done on the i_wait member of this - * inode. The close system call will iput this inode - * for us. - */ - if (!(SOCK_INODE(sock) = get_empty_inode())) - { - printk("NET: sock_alloc: no more inodes\n"); - sock->state = SS_FREE; - return(NULL); - } - SOCK_INODE(sock)->i_mode = S_IFSOCK; - SOCK_INODE(sock)->i_uid = current->euid; - SOCK_INODE(sock)->i_gid = current->egid; - SOCK_INODE(sock)->i_socket = sock; - - sock->wait = &SOCK_INODE(sock)->i_wait; - return(sock); - } - } - sti(); - /* - * If its a 'now or never request' then return. - */ - if (!wait) - return(NULL); - /* - * Sleep on the socket free'ing queue. - */ - interruptible_sleep_on(&socket_wait_free); - /* - * If we have been interrupted then return. - */ - if (current->signal & ~current->blocked) - { - return(NULL); - } - } + struct inode * inode; + struct socket * sock; + + inode = get_empty_inode(); + if (!inode) + return NULL; + + inode->i_mode = S_IFSOCK; + inode->i_sock = 1; + inode->i_uid = current->uid; + inode->i_gid = current->gid; + + sock = &inode->u.socket_i; + sock->state = SS_UNCONNECTED; + sock->flags = 0; + sock->ops = NULL; + sock->data = NULL; + sock->conn = NULL; + sock->iconn = NULL; + sock->next = NULL; + sock->wait = &inode->i_wait; + sock->inode = inode; /* "backlink": we could use pointer arithmetic instead */ + sock->fasync_list = NULL; + sockets_in_use++; + return sock; } /* @@ -302,13 +265,12 @@ static inline void sock_release_peer(struct socket *peer) { peer->state = SS_DISCONNECTING; wake_up_interruptible(peer->wait); + sock_wake_async(peer, 1); } - -static void sock_release(struct socket *sock) +void sock_release(struct socket *sock) { int oldstate; - struct inode *inode; struct socket *peersock, *nextsock; if ((oldstate = sock->state) != SS_UNCONNECTED) @@ -334,19 +296,8 @@ static void sock_release(struct socket *sock) sock->ops->release(sock, peersock); if (peersock) sock_release_peer(peersock); - inode = SOCK_INODE(sock); - sock->state = SS_FREE; /* this really releases us */ - - /* - * This will wake anyone waiting for a free socket. - */ - wake_up_interruptible(&socket_wait_free); - - /* - * We need to do this. If sock alloc was called we already have an inode. - */ - - iput(inode); + --sockets_in_use; /* Bookkeeping.. */ + iput(SOCK_INODE(sock)); } /* @@ -368,17 +319,13 @@ static int sock_read(struct inode *inode, struct file *file, char *ubuf, int siz struct socket *sock; int err; - if (!(sock = socki_lookup(inode))) - { - printk("NET: sock_read: can't find socket for inode!\n"); - return(-EBADF); - } + sock = socki_lookup(inode); if (sock->flags & SO_ACCEPTCON) return(-EINVAL); if(size<0) return -EINVAL; - if(size==0) + if(size==0) /* Match SYS5 behaviour */ return 0; if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) return err; @@ -395,18 +342,14 @@ static int sock_write(struct inode *inode, struct file *file, char *ubuf, int si struct socket *sock; int err; - if (!(sock = socki_lookup(inode))) - { - printk("NET: sock_write: can't find socket for inode!\n"); - return(-EBADF); - } + sock = socki_lookup(inode); if (sock->flags & SO_ACCEPTCON) return(-EINVAL); if(size<0) return -EINVAL; - if(size==0) + if(size==0) /* Match SYS5 behaviour */ return 0; if ((err=verify_area(VERIFY_READ,ubuf,size))<0) @@ -415,16 +358,6 @@ static int sock_write(struct inode *inode, struct file *file, char *ubuf, int si } /* - * You can't read directories from a socket! - */ - -static int sock_readdir(struct inode *inode, struct file *file, struct dirent *dirent, - int count) -{ - return(-EBADF); -} - -/* * With an ioctl arg may well be a user mode pointer, but we don't know what to do * with it - thats up to the protocol still. */ @@ -433,12 +366,7 @@ int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct socket *sock; - - if (!(sock = socki_lookup(inode))) - { - printk("NET: sock_ioctl: can't find socket for inode!\n"); - return(-EBADF); - } + sock = socki_lookup(inode); return(sock->ops->ioctl(sock, cmd, arg)); } @@ -447,17 +375,13 @@ static int sock_select(struct inode *inode, struct file *file, int sel_type, sel { struct socket *sock; - if (!(sock = socki_lookup(inode))) - { - printk("NET: sock_select: can't find socket for inode!\n"); - return(0); - } + sock = socki_lookup(inode); /* - * We can't return errors to select, so its either yes or no. + * We can't return errors to select, so it's either yes or no. */ - if (sock->ops && sock->ops->select) + if (sock->ops->select) return(sock->ops->select(sock, sel_type, wait)); return(0); } @@ -465,22 +389,14 @@ static int sock_select(struct inode *inode, struct file *file, int sel_type, sel void sock_close(struct inode *inode, struct file *filp) { - struct socket *sock; - /* * It's possible the inode is NULL if we're closing an unfinished socket. */ if (!inode) return; - - if (!(sock = socki_lookup(inode))) - { - printk("NET: sock_close: can't find socket for inode!\n"); - return; - } sock_fasync(inode, filp, 0); - sock_release(sock); + sock_release(socki_lookup(inode)); } /* @@ -536,11 +452,27 @@ static int sock_fasync(struct inode *inode, struct file *filp, int on) return 0; } -int sock_wake_async(struct socket *sock) +int sock_wake_async(struct socket *sock, int how) { if (!sock || !sock->fasync_list) return -1; - kill_fasync(sock->fasync_list, SIGIO); + switch (how) + { + case 0: + kill_fasync(sock->fasync_list, SIGIO); + break; + case 1: + if (!(sock->flags & SO_WAITDATA)) + kill_fasync(sock->fasync_list, SIGIO); + break; + case 2: + if (sock->flags & SO_NOSPACE) + { + kill_fasync(sock->fasync_list, SIGIO); + sock->flags &= ~SO_NOSPACE; + } + break; + } return 0; } @@ -549,7 +481,7 @@ int sock_wake_async(struct socket *sock) * Wait for a connection. */ -int sock_awaitconn(struct socket *mysock, struct socket *servsock) +int sock_awaitconn(struct socket *mysock, struct socket *servsock, int flags) { struct socket *last; @@ -584,8 +516,13 @@ int sock_awaitconn(struct socket *mysock, struct socket *servsock) * SS_CONNECTED if we're connected. */ wake_up_interruptible(servsock->wait); + sock_wake_async(servsock, 0); + if (mysock->state != SS_CONNECTED) { + if (flags & O_NONBLOCK) + return -EINPROGRESS; + interruptible_sleep_on(mysock->wait); if (mysock->state != SS_CONNECTED && mysock->state != SS_DISCONNECTING) @@ -660,10 +597,11 @@ static int sock_socket(int family, int type, int protocol) * default. */ - if (!(sock = sock_alloc(1))) + if (!(sock = sock_alloc())) { - printk("sock_socket: no more sockets\n"); - return(-EAGAIN); + printk("NET: sock_socket: no more sockets\n"); + return(-ENOSR); /* Was: EAGAIN, but we are out of + system resources! */ } sock->type = type; @@ -745,7 +683,7 @@ static int sock_socketpair(int family, int type, int protocol, unsigned long uso /* - * Bind a name to a socket. Nothing much to do here since its + * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. * * We move the socket address to kernel space before we call @@ -832,10 +770,11 @@ static int sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrl return(-EINVAL); } - if (!(newsock = sock_alloc(0))) + if (!(newsock = sock_alloc())) { printk("NET: sock_accept: no more sockets\n"); - return(-EAGAIN); + return(-ENOSR); /* Was: EAGAIN, but we are out of system + resources! */ } newsock->type = sock->type; newsock->ops = sock->ops; @@ -906,8 +845,7 @@ static int sock_connect(int fd, struct sockaddr *uservaddr, int addrlen) * an async connect fork and both children connect. Clean * this up in the protocols! */ - return(sock->ops->connect(sock, uservaddr, - addrlen, file->f_flags)); + break; default: return(-EINVAL); } @@ -1128,7 +1066,7 @@ static int sock_getsockopt(int fd, int level, int optname, char *optval, int *op if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - if (!sock->ops || !sock->ops->getsockopt) + if (!sock->ops->getsockopt) return(0); return(sock->ops->getsockopt(sock, level, optname, optval, optlen)); } @@ -1175,132 +1113,93 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg) * I'm now expanding this up to a higher level to separate the assorted * kernel/user space manipulations and global assumptions from the protocol * layers proper - AC. + * + * Argument checking cleaned up. Saved 20% in size. */ asmlinkage int sys_socketcall(int call, unsigned long *args) { int er; + unsigned char nargs[16]={0,3,3,3,2,3,3,3, + 4,4,4,6,6,2,5,5}; + + unsigned long a0,a1; + + if(call<1||call>SYS_GETSOCKOPT) + return -EINVAL; + + er=verify_area(VERIFY_READ, args, nargs[call] * sizeof(unsigned long)); + if(er) + return er; + + a0=get_fs_long(args); + a1=get_fs_long(args+1); + + switch(call) { case SYS_SOCKET: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_socket(get_fs_long(args+0), - get_fs_long(args+1), - get_fs_long(args+2))); + return(sock_socket(a0,a1,get_fs_long(args+2))); case SYS_BIND: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_bind(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), + return(sock_bind(a0,(struct sockaddr *)a1, get_fs_long(args+2))); case SYS_CONNECT: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_connect(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), + return(sock_connect(a0, (struct sockaddr *)a1, get_fs_long(args+2))); case SYS_LISTEN: - er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); - if(er) - return er; - return(sock_listen(get_fs_long(args+0), - get_fs_long(args+1))); + return(sock_listen(a0,a1)); case SYS_ACCEPT: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_accept(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), + return(sock_accept(a0,(struct sockaddr *)a1, (int *)get_fs_long(args+2))); case SYS_GETSOCKNAME: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_getsockname(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), + return(sock_getsockname(a0,(struct sockaddr *)a1, (int *)get_fs_long(args+2))); case SYS_GETPEERNAME: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_getpeername(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), + return(sock_getpeername(a0, (struct sockaddr *)a1, (int *)get_fs_long(args+2))); case SYS_SOCKETPAIR: - er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); - if(er) - return er; - return(sock_socketpair(get_fs_long(args+0), - get_fs_long(args+1), + return(sock_socketpair(a0,a1, get_fs_long(args+2), (unsigned long *)get_fs_long(args+3))); case SYS_SEND: - er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); - if(er) - return er; - return(sock_send(get_fs_long(args+0), - (void *)get_fs_long(args+1), + return(sock_send(a0, + (void *)a1, get_fs_long(args+2), get_fs_long(args+3))); case SYS_SENDTO: - er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); - if(er) - return er; - return(sock_sendto(get_fs_long(args+0), - (void *)get_fs_long(args+1), + return(sock_sendto(a0,(void *)a1, get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), get_fs_long(args+5))); case SYS_RECV: - er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); - if(er) - return er; - return(sock_recv(get_fs_long(args+0), - (void *)get_fs_long(args+1), + return(sock_recv(a0, + (void *)a1, get_fs_long(args+2), get_fs_long(args+3))); case SYS_RECVFROM: - er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); - if(er) - return er; - return(sock_recvfrom(get_fs_long(args+0), - (void *)get_fs_long(args+1), + return(sock_recvfrom(a0, + (void *)a1, get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), (int *)get_fs_long(args+5))); case SYS_SHUTDOWN: - er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); - if(er) - return er; - return(sock_shutdown(get_fs_long(args+0), - get_fs_long(args+1))); + return(sock_shutdown(a0,a1)); case SYS_SETSOCKOPT: - er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); - if(er) - return er; - return(sock_setsockopt(get_fs_long(args+0), - get_fs_long(args+1), + return(sock_setsockopt(a0, + a1, get_fs_long(args+2), (char *)get_fs_long(args+3), get_fs_long(args+4))); case SYS_GETSOCKOPT: - er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); - if(er) - return er; - return(sock_getsockopt(get_fs_long(args+0), - get_fs_long(args+1), + return(sock_getsockopt(a0, + a1, get_fs_long(args+2), (char *)get_fs_long(args+3), (int *)get_fs_long(args+4))); - default: - return(-EINVAL); } + return -EINVAL; /* to keep gcc happy */ } /* @@ -1371,16 +1270,9 @@ void proto_init(void) void sock_init(void) { - struct socket *sock; int i; - printk("Swansea University Computer Society NET3.017\n"); - - /* - * Release all sockets. - */ - for (sock = sockets; sock <= last_socket; ++sock) - sock->state = SS_FREE; + printk("Swansea University Computer Society NET3.029 Snap #6 for Linux 1.3.0\n"); /* * Initialize all address (protocol) families. @@ -1406,6 +1298,21 @@ void sock_init(void) */ bh_base[NET_BH].routine= net_bh; + enable_bh(NET_BH); #endif - +} + +int socket_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len = sprintf(buffer, "sockets: used %d\n", sockets_in_use); + if (offset >= len) + { + *start = buffer; + return 0; + } + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + return len; } diff --git a/net/unix/proc.c b/net/unix/proc.c index 36cd8b1c6..64a777330 100644 --- a/net/unix/proc.c +++ b/net/unix/proc.c @@ -32,7 +32,7 @@ #include <linux/net.h> #include <linux/un.h> #include <linux/param.h> -#include "unix.h" +#include <net/unix.h> /* Called from PROCfs. */ @@ -49,7 +49,7 @@ int unix_get_info(char *buffer, char **start, off_t offset, int length) len += sprintf(buffer, "Num RefCount Protocol Flags Type St Path\n"); - for(i = 0; i < NSOCKETS; i++) + for(i = 0; i < NSOCKETS_UNIX; i++) { save_flags(flags); cli(); diff --git a/net/unix/sock.c b/net/unix/sock.c index 73460bc7e..9066658a7 100644 --- a/net/unix/sock.c +++ b/net/unix/sock.c @@ -27,7 +27,6 @@ * 2 of the License, or(at your option) any later version. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/signal.h> @@ -49,14 +48,14 @@ #include <stdarg.h> -#include "unix.h" +#include <net/unix.h> /* * Because these have the address in them they casually waste an extra 8K of kernel data * space that need not be wasted. */ -struct unix_proto_data unix_datas[NSOCKETS]; +struct unix_proto_data unix_datas[NSOCKETS_UNIX]; static int unix_proto_create(struct socket *sock, int protocol); static int unix_proto_dup(struct socket *newsock, struct socket *oldsock); @@ -331,7 +330,7 @@ static int unix_proto_create(struct socket *sock, int protocol) upd->protocol = protocol; upd->socket = sock; UN_DATA(sock) = upd; - upd->refcnt = 1; /* Now its complete - bgm */ + upd->refcnt = 1; /* Now it's complete - bgm */ return(0); } @@ -422,7 +421,7 @@ static int unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr, i = do_mknod(fname, S_IFSOCK | S_IRWXUGO, 0); if (i == 0) - i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL); + i = open_namei(fname, 2, S_IFSOCK, &upd->inode, NULL); set_fs(old_fs); if (i < 0) { @@ -431,7 +430,7 @@ static int unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr, i=-EADDRINUSE; return(i); } - upd->sockaddr_len = sockaddr_len; /* now its legal */ + upd->sockaddr_len = sockaddr_len; /* now it's legal */ return(0); } @@ -482,7 +481,7 @@ static int unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr, fname[sockaddr_len-UN_PATH_OFFSET] = '\0'; old_fs = get_fs(); set_fs(get_ds()); - i = open_namei(fname, 0, S_IFSOCK, &inode, NULL); + i = open_namei(fname, 2, S_IFSOCK, &inode, NULL); set_fs(old_fs); if (i < 0) { @@ -496,7 +495,7 @@ static int unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr, return(-EINVAL); } - if ((i = sock_awaitconn(sock, serv_upd->socket)) < 0) + if ((i = sock_awaitconn(sock, serv_upd->socket, flags)) < 0) { return(i); } @@ -546,13 +545,14 @@ static int unix_proto_accept(struct socket *sock, struct socket *newsock, int fl { if (flags & O_NONBLOCK) return(-EAGAIN); + sock->flags |= SO_WAITDATA; interruptible_sleep_on(sock->wait); + sock->flags &= ~SO_WAITDATA; if (current->signal & ~current->blocked) { return(-ERESTARTSYS); } } - /* * Great. Finish the connection relative to server and client, * wake up the client and return the new fd to the server. @@ -569,6 +569,7 @@ static int unix_proto_accept(struct socket *sock, struct socket *newsock, int fl UN_DATA(newsock)->sockaddr_un = UN_DATA(sock)->sockaddr_un; UN_DATA(newsock)->sockaddr_len = UN_DATA(sock)->sockaddr_len; wake_up_interruptible(clientsock->wait); + sock_wake_async(clientsock, 0); return(0); } @@ -622,7 +623,9 @@ static int unix_proto_read(struct socket *sock, char *ubuf, int size, int nonblo } if (nonblock) return(-EAGAIN); + sock->flags |= SO_WAITDATA; interruptible_sleep_on(sock->wait); + sock->flags &= ~SO_WAITDATA; if (current->signal & ~current->blocked) { return(-ERESTARTSYS); @@ -655,7 +658,10 @@ static int unix_proto_read(struct socket *sock, char *ubuf, int size, int nonblo ubuf += cando; todo -= cando; if (sock->state == SS_CONNECTED) + { wake_up_interruptible(sock->conn->wait); + sock_wake_async(sock->conn, 2); + } avail = UN_BUF_AVAIL(upd); } while(todo && avail); @@ -690,8 +696,10 @@ static int unix_proto_write(struct socket *sock, char *ubuf, int size, int nonbl while(!(space = UN_BUF_SPACE(pupd))) { + sock->flags |= SO_NOSPACE; if (nonblock) return(-EAGAIN); + sock->flags &= ~SO_NOSPACE; interruptible_sleep_on(sock->wait); if (current->signal & ~current->blocked) { @@ -745,7 +753,10 @@ static int unix_proto_write(struct socket *sock, char *ubuf, int size, int nonbl ubuf += cando; todo -= cando; if (sock->state == SS_CONNECTED) + { wake_up_interruptible(sock->conn->wait); + sock_wake_async(sock->conn, 1); + } space = UN_BUF_SPACE(pupd); } while(todo && space); diff --git a/net/unix/unix.h b/net/unix/unix.h deleted file mode 100644 index 4d9ee251a..000000000 --- a/net/unix/unix.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * UNIX An implementation of the AF_UNIX network domain for the - * LINUX operating system. UNIX is implemented using the - * BSD Socket interface as the means of communication with - * the user level. - * - * This file describes some things of the UNIX protocol family - * module. It is mainly used for the "proc" sub-module now, - * but it may be useful for cleaning up the UNIX module as a - * whole later. - * - * Version: @(#)unix.h 1.0.3 05/25/93 - * - * Authors: Orest Zborowski, <obz@Kodak.COM> - * Ross Biro, <bir7@leland.Stanford.Edu> - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * - * Fixes: - * Dmitry Gorodchanin - proc locking - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - - -#ifdef _LINUX_UN_H - - -struct unix_proto_data { - int refcnt; /* cnt of reference 0=free */ - /* -1=not initialised -bgm */ - struct socket *socket; /* socket we're bound to */ - int protocol; - struct sockaddr_un sockaddr_un; - short sockaddr_len; /* >0 if name bound */ - char *buf; - int bp_head, bp_tail; - struct inode *inode; - struct unix_proto_data *peerupd; - struct wait_queue *wait; /* Lock across page faults (FvK) */ - int lock_flag; -}; - -extern struct unix_proto_data unix_datas[NSOCKETS]; - - -#define last_unix_data (unix_datas + NSOCKETS - 1) - - -#define UN_DATA(SOCK) ((struct unix_proto_data *)(SOCK)->data) -#define UN_PATH_OFFSET ((unsigned long)((struct sockaddr_un *)0) \ - ->sun_path) - -/* - * Buffer size must be power of 2. buffer mgmt inspired by pipe code. - * note that buffer contents can wraparound, and we can write one byte less - * than full size to discern full vs empty. - */ -#define BUF_SIZE PAGE_SIZE -#define UN_BUF_AVAIL(UPD) (((UPD)->bp_head - (UPD)->bp_tail) & \ - (BUF_SIZE-1)) -#define UN_BUF_SPACE(UPD) ((BUF_SIZE-1) - UN_BUF_AVAIL(UPD)) - -#endif /* _LINUX_UN_H */ - - -extern void unix_proto_init(struct net_proto *pro); |