diff options
Diffstat (limited to 'net')
79 files changed, 862 insertions, 606 deletions
diff --git a/net/.cvsignore b/net/.cvsignore index 4671378ae..b9c8aa2e0 100644 --- a/net/.cvsignore +++ b/net/.cvsignore @@ -1 +1,9 @@ +! RCS SCCS CVS CVS.adm +RCSLOG cvslog.* +tags TAGS +.make.state .nse_depinfo +*~ #* .#* ,* _$* *$ +*.old *.bak *.BAK *.orig *.rej .del-* +*.a *.olb *.o *.obj *.so *.exe +*.Z *.elc *.ln .depend diff --git a/net/802/llc_macinit.c b/net/802/llc_macinit.c index 1ee0a9699..c72be3d4d 100644 --- a/net/802/llc_macinit.c +++ b/net/802/llc_macinit.c @@ -26,6 +26,7 @@ #include <linux/malloc.h> #include <linux/unistd.h> #include <linux/netdevice.h> +#include <linux/init.h> #include <net/p8022.h> #include <asm/byteorder.h> @@ -202,7 +203,7 @@ EXPORT_SYMBOL(llc_xid_request); #define ALL_TYPES_8022 0 -void llc_init(struct net_proto *proto) +__initfunc(void llc_init(struct net_proto *proto)) { printk(KERN_NOTICE "IEEE 802.2 LLC for Linux 2.1 (c) 1996 Tim Alpaerts\n"); return; diff --git a/net/802/p8022.c b/net/802/p8022.c index 23e6f2fad..1a12f4d60 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -23,6 +23,7 @@ #include <net/datalink.h> #include <linux/mm.h> #include <linux/in.h> +#include <linux/init.h> #include <net/p8022.h> static struct datalink_proto *p8022_list = NULL; @@ -90,7 +91,7 @@ static struct packet_type p8022_packet_type = EXPORT_SYMBOL(register_8022_client); EXPORT_SYMBOL(unregister_8022_client); -void p8022_proto_init(struct net_proto *pro) +__initfunc(void p8022_proto_init(struct net_proto *pro)) { p8022_packet_type.type=htons(ETH_P_802_2); dev_add_pack(&p8022_packet_type); diff --git a/net/802/p8022tr.c b/net/802/p8022tr.c index 6a5864d54..ef6a4976a 100644 --- a/net/802/p8022tr.c +++ b/net/802/p8022tr.c @@ -15,6 +15,7 @@ #include <net/datalink.h> #include <linux/mm.h> #include <linux/in.h> +#include <linux/init.h> #include <net/p8022tr.h> #define SNAP_HEADER_LEN 8 @@ -91,7 +92,7 @@ static struct packet_type p8022tr_packet_type = EXPORT_SYMBOL(register_8022tr_client); EXPORT_SYMBOL(unregister_8022tr_client); -void p8022tr_proto_init(struct net_proto *pro) +__initfunc(void p8022tr_proto_init(struct net_proto *pro)) { p8022tr_packet_type.type=htons(ETH_P_TR_802_2); dev_add_pack(&p8022tr_packet_type); diff --git a/net/802/psnap.c b/net/802/psnap.c index bdcb5efd2..6ce58da35 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -19,6 +19,7 @@ #include <net/psnap.h> #include <linux/mm.h> #include <linux/in.h> +#include <linux/init.h> static struct datalink_proto *snap_list = NULL; static struct datalink_proto *snap_dl = NULL; /* 802.2 DL for SNAP */ @@ -87,7 +88,7 @@ static void snap_datalink_header(struct datalink_proto *dl, struct sk_buff *skb, EXPORT_SYMBOL(register_snap_client); EXPORT_SYMBOL(unregister_snap_client); -void snap_proto_init(struct net_proto *pro) +__initfunc(void snap_proto_init(struct net_proto *pro)) { snap_dl=register_8022_client(0xAA, snap_rcv); if(snap_dl==NULL) diff --git a/net/802/tr.c b/net/802/tr.c index e903924f7..627dd9a99 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -31,6 +31,7 @@ #include <linux/timer.h> #include <linux/net.h> #include <linux/proc_fs.h> +#include <linux/init.h> #include <net/arp.h> static void tr_source_route(struct trh_hdr *trh, struct device *dev); @@ -460,7 +461,7 @@ int rif_get_info(char *buffer,char **start, off_t offset, int length, int dummy) * too much for this. */ -void rif_init(struct net_proto *unused) +__initfunc(void rif_init(struct net_proto *unused)) { rif_timer.expires = RIF_TIMEOUT; diff --git a/net/TUNABLE b/net/TUNABLE index bd6066126..2e5cc1b6e 100644 --- a/net/TUNABLE +++ b/net/TUNABLE @@ -1,6 +1,5 @@ -The following parameters should be tunable but aren't, until we get sysctl -or similar schemes. For now you'll have to dig around. Various CONFIG_xxx -items that should be configurable using sysctl omitted. +The following parameters should be tunable at compile time. Some of them +exist as sysctls too. This is far from complete @@ -54,8 +53,6 @@ MASQUERADE_EXPIRE_TCP_FIN Time we keep a masquerade for after a FIN MASQUERADE_EXPIRE_UDP Time we keep a UDP masquerade for (tunable) MAXVIFS Maximum mrouted vifs (1-32) MFC_LINES Lines in the multicast router cache (tunable) -SK_RMEM_MAX Max memory a socket owns for receive (tunable) -SK_WMEM_MAX Max memory a socket owns for send (tunable) NetROM parameters are tunable via an ioctl passing a struct diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index e3e87f9e4..9ad9b8e93 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -47,6 +47,7 @@ #include <net/datalink.h> #include <net/psnap.h> #include <linux/atalk.h> +#include <linux/init.h> /* * Lists of aarp entries @@ -796,7 +797,7 @@ static struct notifier_block aarp_notifier={ static char aarp_snap_id[]={0x00,0x00,0x00,0x80,0xF3}; -void aarp_proto_init(void) +__initfunc(void aarp_proto_init(void)) { if((aarp_dl=register_snap_client(aarp_snap_id, aarp_rcv))==NULL) printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index eba533a23..4dbcc0a9c 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -67,6 +67,7 @@ #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/firewall.h> +#include <linux/init.h> #undef APPLETALK_DEBUG @@ -2034,7 +2035,7 @@ static struct proc_dir_entry proc_atalk_iface = { /* Called by proto.c on kernel start up */ -void atalk_proto_init(struct net_proto *pro) +__initfunc(void atalk_proto_init(struct net_proto *pro)) { (void) sock_register(&atalk_family_ops); if ((ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv)) == NULL) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 489993da6..f3692d833 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -126,6 +126,7 @@ #include <linux/stat.h> #include <linux/firewall.h> #include <linux/sysctl.h> +#include <linux/init.h> #include <net/ip.h> #include <net/arp.h> @@ -1795,7 +1796,7 @@ static struct proc_dir_entry proc_ax25_calls = { }; #endif -void ax25_proto_init(struct net_proto *pro) +__initfunc(void ax25_proto_init(struct net_proto *pro)) { sock_register(&ax25_family_ops); ax25_packet_type.type = htons(ETH_P_AX25); diff --git a/net/core/dev.c b/net/core/dev.c index c02d4052e..07a5c1706 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -986,6 +986,9 @@ static int dev_ifconf(char *arg) /* * Loop over the interfaces, and write an info block for each. */ + + dev_lock_wait(); + dev_lock_list(); for (dev = dev_base; dev != NULL; dev = dev->next) { @@ -1013,6 +1016,8 @@ static int dev_ifconf(char *arg) len -= sizeof(struct ifreq); } + dev_unlock_list(); + /* * All done. Write the updated control block back to the caller. */ diff --git a/net/core/firewall.c b/net/core/firewall.c index 32cf52655..44e0709cf 100644 --- a/net/core/firewall.c +++ b/net/core/firewall.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/firewall.h> +#include <linux/init.h> #include <asm/semaphore.h> struct semaphore firewall_sem = MUTEX; @@ -150,7 +151,7 @@ EXPORT_SYMBOL(call_in_firewall); EXPORT_SYMBOL(call_out_firewall); EXPORT_SYMBOL(call_fw_firewall); -void fwchain_init(void) +__initfunc(void fwchain_init(void)) { int i; for(i=0;i<NPROTO;i++) diff --git a/net/core/scm.c b/net/core/scm.c index 3aa0c7b17..d88ab0ae7 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -1,6 +1,7 @@ /* scm.c - Socket level control messages processing. * * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * Alignment and value checking mods by Craig Metz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -60,12 +61,12 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) int num; struct scm_fp_list *fpl = *fplp; struct file **fpp; - int *fdp = (int*)cmsg->cmsg_data; + int *fdp = (int*)CMSG_DATA(cmsg); int i; - num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int); + num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int); - if (!num) + if (num <= 0) return 0; if (num > SCM_MAX_FD) @@ -153,9 +154,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; break; case SCM_CREDENTIALS: - if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(struct ucred)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; - memcpy(&p->creds, cmsg->cmsg_data, sizeof(struct ucred)); + memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); err = scm_check_creds(&p->creds); if (err) goto error; @@ -163,9 +164,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) case SCM_CONNECT: if (scm_flags) goto error; - if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(int)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) goto error; - memcpy(&acc_fd, cmsg->cmsg_data, sizeof(int)); + memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int)); p->sock = NULL; if (acc_fd != -1) { if (acc_fd < 0 || acc_fd >= NR_OPEN || @@ -207,7 +208,7 @@ error: void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control; - int cmlen = sizeof(*cm) + len; + int cmlen = CMSG_LEN(len); int err; if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { @@ -224,9 +225,9 @@ void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) - err = copy_to_user(cm->cmsg_data, data, cmlen - sizeof(*cm)); + err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)); if (!err) { - cmlen = CMSG_ALIGN(cmlen); + cmlen = CMSG_SPACE(len); msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } @@ -243,22 +244,21 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) int i; struct file **fp = scm->fp->fp; - if (fdnum > fdmax) + if (fdnum < fdmax) fdmax = fdnum; - for (i=0, cmfptr=(int*)cm->cmsg_data; i<fdmax; i++, cmfptr++) + for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) { int new_fd = get_unused_fd(); if (new_fd < 0) break; current->files->fd[new_fd] = fp[i]; err = put_user(new_fd, cmfptr); - cmfptr++; } if (i > 0) { - int cmlen = i*sizeof(int) + sizeof(struct cmsghdr); + int cmlen = CMSG_LEN(i*sizeof(int)); if (!err) err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) @@ -266,7 +266,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { - cmlen = CMSG_ALIGN(cmlen); + cmlen = CMSG_SPACE(i*sizeof(int)); msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00a87e0e2..06c321e4f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -64,14 +64,15 @@ static atomic_t net_skbcount = ATOMIC_INIT(0); static atomic_t net_allocs = ATOMIC_INIT(0); static atomic_t net_fails = ATOMIC_INIT(0); + extern atomic_t ip_frag_mem; /* * Strings we don't want inline's duplicating */ -char *skb_push_errstr="skpush:under: %p:%d"; -char *skb_put_errstr ="skput:over: %p:%d"; +const char skb_push_errstr[]="skpush:under: %p:%d"; +const char skb_put_errstr[] ="skput:over: %p:%d"; void show_net_buffers(void) { diff --git a/net/core/sock.c b/net/core/sock.c index 8c008c0f2..f28ea828e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -118,6 +118,12 @@ #define min(a,b) ((a)<(b)?(a):(b)) +/* Run time adjustable parameters. */ +__u32 sysctl_wmem_max = SK_WMEM_MAX; +__u32 sysctl_rmem_max = SK_RMEM_MAX; +__u32 sysctl_wmem_default = SK_WMEM_MAX; +__u32 sysctl_rmem_default = SK_RMEM_MAX; + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -146,13 +152,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif - if(optlen<sizeof(int)) { -#if 1 /* DaveM Debugging */ - printk("sock_setsockopt: optlen is %d, going on anyways.\n", optlen); -#else + if(optlen<sizeof(int)) return(-EINVAL); -#endif - } err = get_user(val, (int *)optval); if (err) @@ -189,15 +190,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * is best */ - if(val > SK_WMEM_MAX*2) + /* printk(KERN_DEBUG "setting SO_SNDBUF %d\n", val); */ + if (val > sysctl_wmem_max) return -EINVAL; - /* - * Once this is all 32bit values we can - * drop this check. + + /* FIXME: the tcp code should be made to work even + * with small sndbuf values. */ - if(val > 65535) - return -EINVAL; - sk->sndbuf = max(val,2048); + sk->sndbuf = max(val*2,2048); + /* * Wake up sending tasks if we * upped the value. @@ -206,12 +207,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_RCVBUF: - if(val > SK_RMEM_MAX*2) - return -EINVAL; - /* Can go soon: FIXME */ - if(val > 65535) + /* printk(KERN_DEBUG "setting SO_RCVBUF %d\n", val); */ + + if (val > sysctl_rmem_max) return -EINVAL; - sk->rcvbuf = max(val,256); + + /* FIXME: is this lower bound the right one? */ + sk->rcvbuf = max(val*2,256); break; case SO_KEEPALIVE: @@ -533,15 +535,29 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int } +/* FIXME: this is insane. We are trying suppose to be controlling how + * how much space we have for data bytes, not packet headers. + * This really points out that we need a better system for doing the + * receive buffer. -- erics + * WARNING: This is currently ONLY used in tcp. If you need it else where + * this will probably not be what you want. Possibly these two routines + * should move over to the ipv4 directory. + */ unsigned long sock_rspace(struct sock *sk) { int amt; - if (sk != NULL) - { - if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf-2*MIN_WINDOW) - return(0); - amt = min((sk->rcvbuf-atomic_read(&sk->rmem_alloc))/2-MIN_WINDOW, MAX_WINDOW); + if (sk != NULL) { + /* This used to have some bizzare complications that + * to attempt to reserve some amount of space. This doesn't + * make sense, since the number returned here does not + * actually reflect allocated space, but rather the amount + * of space we committed to. We gamble that we won't + * run out of memory, and returning a smaller number does + * not change the gamble. If we loose the gamble tcp still + * works, it may just slow down for retransmissions. + */ + amt = sk->rcvbuf - atomic_read(&sk->rmem_alloc); if (amt < 0) return(0); return(amt); @@ -550,10 +566,10 @@ unsigned long sock_rspace(struct sock *sk) } +/* FIXME: this is also insane. See above comment */ unsigned long sock_wspace(struct sock *sk) { - if (sk != NULL) - { + if (sk != NULL) { if (sk->shutdown & SEND_SHUTDOWN) return(0); if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf) @@ -868,8 +884,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) init_timer(&sk->timer); sk->allocation = GFP_KERNEL; - sk->rcvbuf = SK_RMEM_MAX; - sk->sndbuf = SK_WMEM_MAX; + sk->rcvbuf = sysctl_rmem_default*2; + sk->sndbuf = sysctl_wmem_default*2; sk->priority = SOPRI_NORMAL; sk->state = TCP_CLOSE; sk->zapped = 1; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8b5848e6b..fd770becd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -8,6 +8,23 @@ #include <linux/mm.h> #include <linux/sysctl.h> +extern __u32 sysctl_wmem_max; +extern __u32 sysctl_rmem_max; +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; + ctl_table core_table[] = { - {0} + {NET_CORE_WMEM_MAX, "wmem_max", + &sysctl_wmem_max, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_CORE_RMEM_MAX, "rmem_max", + &sysctl_rmem_max, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_CORE_WMEM_DEFAULT, "wmem_default", + &sysctl_wmem_default, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_CORE_RMEM_DEFAULT, "rmem_default", + &sysctl_rmem_default, sizeof(int), 0644, NULL, + &proc_dointvec}, + { 0 } }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9f4477807..bdc6b37fd 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -36,8 +36,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <asm/uaccess.h> -#include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -52,16 +50,17 @@ #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/config.h> +#include <linux/init.h> #include <net/dst.h> #include <net/arp.h> #include <net/sock.h> #include <net/ipv6.h> - - +#include <asm/uaccess.h> +#include <asm/system.h> #include <asm/checksum.h> -void eth_setup(char *str, int *ints) +__initfunc(void eth_setup(char *str, int *ints)) { struct device *d = dev_base; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d96910bb0..a3a126529 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -76,6 +76,7 @@ #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/init.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -1063,7 +1064,7 @@ extern void tcp_init(void); * Called by socket.c on kernel startup. */ -void inet_proto_init(struct net_proto *pro) +__initfunc(void inet_proto_init(struct net_proto *pro)) { struct sk_buff *dummy_skb; struct inet_protocol *p; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8ef0be2af..ebf2c6c6b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -90,6 +90,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/init.h> #include <net/ip.h> #include <net/icmp.h> @@ -378,7 +379,7 @@ static void arp_neigh_destroy(struct neighbour *neigh) extern atomic_t hh_count; atomic_dec(&hh_count); #endif - kfree_s(hh, sizeof(struct(struct hh_cache))); + kfree_s(hh, sizeof(struct hh_cache)); } } } @@ -1976,7 +1977,7 @@ static struct proc_dir_entry proc_net_arp = { }; #endif -void arp_init (void) +__initfunc(void arp_init (void)) { dev_add_pack(&arp_packet_type); /* Start with the regular checks for expired arp entries. */ diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c index c2182728c..b25187a20 100644 --- a/net/ipv4/fib.c +++ b/net/ipv4/fib.c @@ -42,6 +42,7 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> +#include <linux/init.h> #include <net/ip.h> #include <net/protocol.h> @@ -1646,16 +1647,21 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ +printk("ip_rt_ioctl() #1\n"); if (!suser()) return -EPERM; +printk("ip_rt_ioctl() #2\n"); err = get_rt_from_user(&m.rtmsg, arg); if (err) return err; +printk("ip_rt_ioctl() #3\n"); fib_lock(); +printk("ip_rt_ioctl() #4\n"); dummy_nlh.nlmsg_type = cmd == SIOCDELRT ? RTMSG_DELROUTE : RTMSG_NEWROUTE; err = rtmsg_process(&dummy_nlh, &m.rtmsg); fib_unlock(); +printk("ip_rt_ioctl() #5: err == %d\n", err); return err; case SIOCRTMSG: if (!suser()) @@ -2020,7 +2026,7 @@ int ip_rt_event(int event, struct device *dev) } -void ip_fib_init() +__initfunc(void ip_fib_init(void)) { struct in_rtrulemsg r; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6b697d001..79bf058c5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -256,6 +256,7 @@ #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> #include <net/checksum.h> @@ -373,7 +374,7 @@ struct socket *icmp_socket=&icmp_inode.u.socket_i; #ifndef CONFIG_NO_ICMP_LIMIT -static void xrlim_init(void) +__initfunc(static void xrlim_init(void)) { int type, entry; struct icmp_xrlim *xr; @@ -1020,7 +1021,7 @@ int icmp_chkaddr(struct sk_buff *skb) { struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest); + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); if (!sk) return 0; if (sk->saddr != iph->saddr) return 0; if (sk->daddr != iph->daddr) return 0; @@ -1034,7 +1035,7 @@ int icmp_chkaddr(struct sk_buff *skb) { struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest); + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source); if (!sk) return 0; if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR) return 0; @@ -1167,7 +1168,7 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = { { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, NULL } }; -void icmp_init(struct net_proto_family *ops) +__initfunc(void icmp_init(struct net_proto_family *ops)) { int err; diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c index 74ff42a74..a78eef17a 100644 --- a/net/ipv4/ip_alias.c +++ b/net/ipv4/ip_alias.c @@ -26,6 +26,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/route.h> +#include <linux/init.h> #include <net/route.h> #ifdef ALIAS_USER_LAND_DEBUG @@ -137,7 +138,7 @@ struct net_alias_type ip_alias_type = * ip_alias module initialization */ -int ip_alias_init(void) +__initfunc(int ip_alias_init(void)) { return register_net_alias_type(&ip_alias_type, AF_INET); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bf549b047..290f871a1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,11 +5,15 @@ * * The IP fragmentation functionality. * + * Version: $Id: ip_fragment.c,v 1.22 1997/05/17 05:21:56 freitag Exp $ + * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. + * David S. Miller : Begin massive cleanup... + * Andi Kleen : Add sysctls. */ #include <linux/types.h> @@ -29,31 +33,49 @@ #include <linux/ip_fw.h> #include <net/checksum.h> -/* - * Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ - -#define IPFRAG_HIGH_THRESH (256*1024) -#define IPFRAG_LOW_THRESH (192*1024) - -/* - * This fragment handler is a bit of a heap. On the other hand it works quite - * happily and handles things quite well. +/* Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to measurably + * harm machine performance. */ - -static struct ipq *ipqueue = NULL; /* IP fragment queue */ +int sysctl_ipfrag_high_thresh = 256*1024; +int sysctl_ipfrag_low_thresh = 192*1024; + +/* Describe an IP fragment. */ +struct ipfrag { + int offset; /* offset of fragment in IP datagram */ + int end; /* last byte of data in datagram */ + int len; /* length of this fragment */ + struct sk_buff *skb; /* complete received fragment */ + unsigned char *ptr; /* pointer into real fragment data */ + struct ipfrag *next; /* linked list pointers */ + struct ipfrag *prev; +}; + +/* Describe an entry in the "incomplete datagrams" queue. */ +struct ipq { + struct iphdr *iph; /* pointer to IP header */ + struct ipq *next; /* linked list pointers */ + struct ipfrag *fragments; /* linked list of received fragments */ + int len; /* total length of original datagram */ + short ihlen; /* length of the IP header */ + struct timer_list timer; /* when will this queue expire? */ + struct ipq **pprev; + struct device *dev; /* Device - for icmp replies */ +}; + +#define IPQ_HASHSZ 64 + +struct ipq *ipq_hash[IPQ_HASHSZ]; + +#define ipqhashfn(id, saddr, daddr, prot) \ + ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1)) atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ -char *in_ntoa(unsigned long in); +char *in_ntoa(__u32 in); -/* - * Memory Tracking Functions - */ - +/* Memory Tracking Functions. */ extern __inline__ void frag_kfree_skb(struct sk_buff *skb, int type) { atomic_sub(skb->truesize, &ip_frag_mem); @@ -69,28 +91,24 @@ extern __inline__ void frag_kfree_s(void *ptr, int len) extern __inline__ void *frag_kmalloc(int size, int pri) { void *vp=kmalloc(size,pri); + if(!vp) return NULL; atomic_add(size, &ip_frag_mem); return vp; } -/* - * Create a new fragment entry. - */ - -static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr) +/* Create a new fragment entry. */ +static struct ipfrag *ip_frag_create(int offset, int end, + struct sk_buff *skb, unsigned char *ptr) { struct ipfrag *fp; - unsigned long flags; fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); - if (fp == NULL) - { + if (fp == NULL) { NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n")); return(NULL); } - memset(fp, 0, sizeof(struct ipfrag)); /* Fill in the structure. */ fp->offset = offset; @@ -98,85 +116,63 @@ static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, u fp->len = end - offset; fp->skb = skb; fp->ptr = ptr; + fp->next = fp->prev = NULL; - /* - * Charge for the SKB as well. - */ - - save_flags(flags); - cli(); + /* Charge for the SKB as well. */ atomic_add(skb->truesize, &ip_frag_mem); - restore_flags(flags); return(fp); } - -/* - * Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and return the queue entry address if found. +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and return the queue entry address if found. */ - -static struct ipq *ip_find(struct iphdr *iph) +static inline struct ipq *ip_find(struct iphdr *iph) { + __u16 id = iph->id; + __u32 saddr = iph->saddr; + __u32 daddr = iph->daddr; + __u8 protocol = iph->protocol; + unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); struct ipq *qp; - struct ipq *qplast; - - cli(); - qplast = NULL; - for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) - { - if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr && - iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) - { - del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ - sti(); - return(qp); + + start_bh_atomic(); + for(qp = ipq_hash[hash]; qp; qp = qp->next) { + if(qp->iph->id == id && + qp->iph->saddr == saddr && + qp->iph->daddr == daddr && + qp->iph->protocol == protocol) { + del_timer(&qp->timer); + break; } } - sti(); - return(NULL); + end_bh_atomic(); + return qp; } - -/* - * Remove an entry from the "incomplete datagrams" queue, either - * because we completed, reassembled and processed it, or because - * it timed out. +/* Remove an entry from the "incomplete datagrams" queue, either + * because we completed, reassembled and processed it, or because + * it timed out. */ - static void ip_free(struct ipq *qp) { struct ipfrag *fp; - struct ipfrag *xp; - - /* - * Stop the timer for this entry. - */ + /* Stop the timer for this entry. */ del_timer(&qp->timer); /* Remove this entry from the "incomplete datagrams" queue. */ - cli(); - if (qp->prev == NULL) - { - ipqueue = qp->next; - if (ipqueue != NULL) - ipqueue->prev = NULL; - } - else - { - qp->prev->next = qp->next; - if (qp->next != NULL) - qp->next->prev = qp->prev; - } + start_bh_atomic(); + if(qp->next) + qp->next->pprev = qp->pprev; + *qp->pprev = qp->next; + end_bh_atomic(); /* Release all fragment data. */ - fp = qp->fragments; - while (fp != NULL) - { - xp = fp->next; + while (fp) { + struct ipfrag *xp = fp->next; + frag_kfree_skb(fp->skb,FREE_READ); frag_kfree_s(fp, sizeof(struct ipfrag)); fp = xp; @@ -187,83 +183,65 @@ static void ip_free(struct ipq *qp) /* Finally, release the queue descriptor itself. */ frag_kfree_s(qp, sizeof(struct ipq)); - sti(); } - -/* - * Oops- a fragment queue timed out. Kill it and send an ICMP reply. - */ - +/* Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(unsigned long arg) { - struct ipq *qp; - - qp = (struct ipq *)arg; - - /* - * Send an ICMP "Fragment Reassembly Timeout" message. - */ + struct ipq *qp = (struct ipq *) arg; + /* Send an ICMP "Fragment Reassembly Timeout" message. */ ip_statistics.IpReasmTimeout++; ip_statistics.IpReasmFails++; - /* This if is always true... shrug */ - if(qp->fragments!=NULL) - icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); + icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); - /* - * Nuke the fragment queue. - */ + /* Nuke the fragment queue. */ ip_free(qp); } -/* - * Memory limiting on fragments. Evictor trashes the oldest - * fragment queue until we are back under the low threshold +/* Memory limiting on fragments. Evictor trashes the oldest + * fragment queue until we are back under the low threshold. */ - static void ip_evictor(void) { - while(atomic_read(&ip_frag_mem)>IPFRAG_LOW_THRESH) - { - if(!ipqueue) + while(atomic_read(&ip_frag_mem)>sysctl_ipfrag_low_thresh) { + int i; + + /* FIXME: Make LRU queue of frag heads. -DaveM */ + for(i = 0; i < IPQ_HASHSZ; i++) + if(ipq_hash[i]) + break; + if(i >= IPQ_HASHSZ) panic("ip_evictor: memcount"); - ip_free(ipqueue); + ip_free(ipq_hash[i]); } } -/* - * Add an entry to the 'ipq' queue for a newly received IP datagram. - * We will (hopefully :-) receive all other fragments of this datagram - * in time, so we just create a queue for this datagram, in which we - * will insert the received fragments at their respective positions. +/* Add an entry to the 'ipq' queue for a newly received IP datagram. + * We will (hopefully :-) receive all other fragments of this datagram + * in time, so we just create a queue for this datagram, in which we + * will insert the received fragments at their respective positions. */ - static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) { struct ipq *qp; + unsigned int hash; int ihlen; qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC); - if (qp == NULL) - { + if (qp == NULL) { NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); return(NULL); } - memset(qp, 0, sizeof(struct ipq)); - - /* - * Allocate memory for the IP header (plus 8 octets for ICMP). - */ + /* Allocate memory for the IP header (plus 8 octets for ICMP). */ ihlen = iph->ihl * 4; + qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC); - if (qp->iph == NULL) - { + if (qp->iph == NULL) { NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); frag_kfree_s(qp, sizeof(struct ipq)); - return(NULL); + return NULL; } memcpy(qp->iph, iph, ihlen + 8); @@ -279,21 +257,19 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) add_timer(&qp->timer); /* Add this entry to the queue. */ - qp->prev = NULL; - cli(); - qp->next = ipqueue; - if (qp->next != NULL) - qp->next->prev = qp; - ipqueue = qp; - sti(); - return(qp); -} + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); + start_bh_atomic(); + if((qp->next = ipq_hash[hash]) != NULL) + qp->next->pprev = &qp->next; + ipq_hash[hash] = qp; + qp->pprev = &ipq_hash[hash]; + end_bh_atomic(); -/* - * See if a fragment queue is complete. - */ + return qp; +} +/* See if a fragment queue is complete. */ static int ip_done(struct ipq *qp) { struct ipfrag *fp; @@ -301,13 +277,12 @@ static int ip_done(struct ipq *qp) /* Only possible if we received the final fragment. */ if (qp->len == 0) - return(0); + return 0; /* Check all fragment offsets to see if they connect. */ fp = qp->fragments; offset = 0; - while (fp != NULL) - { + while (fp) { if (fp->offset > offset) return(0); /* fragment(s) missing */ offset = fp->end; @@ -315,18 +290,15 @@ static int ip_done(struct ipq *qp) } /* All fragments are present. */ - return(1); + return 1; } - -/* - * Build a new IP datagram from all its fragments. +/* Build a new IP datagram from all its fragments. * - * FIXME: We copy here because we lack an effective way of handling lists - * of bits on input. Until the new skb data handling is in I'm not going - * to touch this with a bargepole. + * FIXME: We copy here because we lack an effective way of handling lists + * of bits on input. Until the new skb data handling is in I'm not going + * to touch this with a bargepole. */ - static struct sk_buff *ip_glue(struct ipq *qp) { struct sk_buff *skb; @@ -335,25 +307,23 @@ static struct sk_buff *ip_glue(struct ipq *qp) unsigned char *ptr; int count, len; - /* - * Allocate a new buffer for the datagram. - */ + /* Allocate a new buffer for the datagram. */ len = qp->ihlen + qp->len; - if(len>65535) - { - printk(KERN_INFO "Oversized IP packet from %s.\n", in_ntoa(qp->iph->saddr)); + if(len>65535) { + printk(KERN_INFO "Oversized IP packet from %s.\n", + in_ntoa(qp->iph->saddr)); ip_statistics.IpReasmFails++; ip_free(qp); return NULL; } - if ((skb = dev_alloc_skb(len)) == NULL) - { + if ((skb = dev_alloc_skb(len)) == NULL) { ip_statistics.IpReasmFails++; - NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); + NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp)); ip_free(qp); - return(NULL); + return NULL; } /* Fill in the basic details. */ @@ -368,11 +338,10 @@ static struct sk_buff *ip_glue(struct ipq *qp) /* Copy the data portions of all fragments into the new buffer. */ fp = qp->fragments; - while(fp != NULL) - { - if(count+fp->len > skb->len) - { - NETDEBUG(printk(KERN_ERR "Invalid fragment list: Fragment over size.\n")); + while(fp) { + if(count+fp->len > skb->len) { + NETDEBUG(printk(KERN_ERR "Invalid fragment list: " + "Fragment over size.\n")); ip_free(qp); kfree_skb(skb,FREE_WRITE); ip_statistics.IpReasmFails++; @@ -396,14 +365,10 @@ static struct sk_buff *ip_glue(struct ipq *qp) iph->tot_len = htons((iph->ihl * 4) + count); ip_statistics.IpReasmOKs++; - return(skb); + return skb; } - -/* - * Process an incoming IP datagram fragment. - */ - +/* Process an incoming IP datagram fragment. */ struct sk_buff *ip_defrag(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; @@ -417,45 +382,37 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) ip_statistics.IpReasmReqds++; - /* - * Start by cleaning up the memory - */ - - if(atomic_read(&ip_frag_mem)>IPFRAG_HIGH_THRESH) + /* Start by cleaning up the memory. */ + if(atomic_read(&ip_frag_mem)>sysctl_ipfrag_high_thresh) ip_evictor(); - /* - * Find the entry of this IP datagram in the "incomplete datagrams" queue. - */ - + + /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ qp = ip_find(iph); /* Is this a non-fragmented datagram? */ offset = ntohs(iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; - if (((flags & IP_MF) == 0) && (offset == 0)) - { - if (qp != NULL) - ip_free(qp); /* Fragmented frame replaced by full unfragmented copy */ - return(skb); + if (((flags & IP_MF) == 0) && (offset == 0)) { + if (qp != NULL) { + /* Fragmented frame replaced by full unfragmented copy. */ + ip_free(qp); + } + return skb; } offset <<= 3; /* offset is in 8-byte chunks */ ihl = iph->ihl * 4; - /* - * If the queue already existed, keep restarting its timer as long + /* If the queue already existed, keep restarting its timer as long * as we still are receiving fragments. Otherwise, create a fresh * queue entry. */ - - if (qp != NULL) - { + if (qp) { /* ANK. If the first fragment is received, * we should remember the correct IP header (with options) */ - if (offset == 0) - { + if (offset == 0) { qp->ihlen = ihl; memcpy(qp->iph, iph, ihl+8); } @@ -464,84 +421,59 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) qp->timer.data = (unsigned long) qp; /* pointer to queue */ qp->timer.function = ip_expire; /* expire function */ add_timer(&qp->timer); - } - else - { - /* - * If we failed to create it, then discard the frame - */ - if ((qp = ip_create(skb, iph)) == NULL) - { + } else { + /* If we failed to create it, then discard the frame. */ + if ((qp = ip_create(skb, iph)) == NULL) { kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; } } - /* - * Attempt to construct an oversize packet. - */ - - if(ntohs(iph->tot_len)+(int)offset>65535) - { - printk(KERN_INFO "Oversized packet received from %s\n",in_ntoa(iph->saddr)); + /* Attempt to construct an oversize packet. */ + if(ntohs(iph->tot_len)+(int)offset>65535) { + printk(KERN_INFO "Oversized packet received from %s\n", + in_ntoa(iph->saddr)); frag_kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; } - /* - * Determine the position of this fragment. - */ - + /* Determine the position of this fragment. */ end = offset + ntohs(iph->tot_len) - ihl; - /* - * Point into the IP datagram 'data' part. - */ - + /* Point into the IP datagram 'data' part. */ ptr = skb->data + ihl; - /* - * Is this the final fragment? - */ - + /* Is this the final fragment? */ if ((flags & IP_MF) == 0) qp->len = end; - /* - * Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? */ - prev = NULL; - for(next = qp->fragments; next != NULL; next = next->next) - { - if (next->offset > offset) + for(next = qp->fragments; next != NULL; next = next->next) { + if (next->offset >= offset) break; /* bingo! */ prev = next; } - /* - * We found where to put this one. - * Check for overlap with preceding fragment, and, if needed, - * align things so that any overlaps are eliminated. + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. */ - if (prev != NULL && offset < prev->end) - { + if (prev != NULL && offset < prev->end) { i = prev->end - offset; offset += i; /* ptr into datagram */ ptr += i; /* ptr into fragment data */ } - /* - * Look for overlap with succeeding segments. + /* Look for overlap with succeeding segments. * If we can merge fragments, do it. */ - - for(tmp=next; tmp != NULL; tmp = tfp) - { + for(tmp=next; tmp != NULL; tmp = tfp) { tfp = tmp->next; if (tmp->offset >= end) break; /* no overlaps at all */ @@ -550,12 +482,11 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) tmp->len -= i; /* so reduce size of */ tmp->offset += i; /* next fragment */ tmp->ptr += i; - /* - * If we get a frag size of <= 0, remove it and the packet - * that it goes with. + + /* If we get a frag size of <= 0, remove it and the packet + * that it goes with. */ - if (tmp->len <= 0) - { + if (tmp->len <= 0) { if (tmp->prev != NULL) tmp->prev->next = tmp->next; else @@ -564,26 +495,20 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) if (tmp->next != NULL) tmp->next->prev = tmp->prev; - next=tfp; /* We have killed the original next frame */ + /* We have killed the original next frame. */ + next = tfp; frag_kfree_skb(tmp->skb,FREE_READ); frag_kfree_s(tmp, sizeof(struct ipfrag)); } } - /* - * Insert this fragment in the chain of fragments. - */ - + /* Insert this fragment in the chain of fragments. */ tfp = NULL; tfp = ip_frag_create(offset, end, skb, ptr); - /* - * No memory to save the fragment - so throw the lot - */ - - if (!tfp) - { + /* No memory to save the fragment - so throw the lot. */ + if (!tfp) { frag_kfree_skb(skb, FREE_READ); return NULL; } @@ -597,16 +522,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) if (next != NULL) next->prev = tfp; - /* - * OK, so we inserted this new fragment into the chain. - * Check if we now have a full IP datagram which we can - * bump up to the IP layer... + /* OK, so we inserted this new fragment into the chain. + * Check if we now have a full IP datagram which we can + * bump up to the IP layer... */ - - if (ip_done(qp)) - { - skb2 = ip_glue(qp); /* glue together the fragments */ + if (ip_done(qp)) { + /* Glue together the fragments. */ + skb2 = ip_glue(qp); return(skb2); } - return(NULL); + return NULL; } diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index e516a2baa..ea9fe48b0 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -107,6 +107,7 @@ #include <net/netlink.h> #include <linux/firewall.h> #include <linux/ip_fw.h> +#include <linux/init.h> #ifdef CONFIG_IP_MASQUERADE #include <net/ip_masq.h> @@ -1298,7 +1299,7 @@ static struct proc_dir_entry proc_net_ipfwfwd = { #endif -void ip_fw_init(void) +__initfunc(void ip_fw_init(void)) { #ifdef CONFIG_PROC_FS #ifdef CONFIG_IP_ACCT diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c index c5976614e..2d2fd3717 100644 --- a/net/ipv4/ip_masq.c +++ b/net/ipv4/ip_masq.c @@ -31,6 +31,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/inet.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/icmp.h> #include <net/tcp.h> @@ -1010,7 +1011,7 @@ static struct proc_dir_entry proc_net_ipmsqhst = { /* * Initialize ip masquerading */ -int ip_masq_init(void) +__initfunc(int ip_masq_init(void)) { #ifdef CONFIG_PROC_FS proc_net_register(&proc_net_ipmsqhst); diff --git a/net/ipv4/ip_masq_app.c b/net/ipv4/ip_masq_app.c index 456888bc1..f7449e0ba 100644 --- a/net/ipv4/ip_masq_app.c +++ b/net/ipv4/ip_masq_app.c @@ -30,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/udp.h> @@ -482,7 +483,7 @@ static struct proc_dir_entry proc_net_ip_masq_app = { * Initialization routine */ -int ip_masq_app_init(void) +__initfunc(int ip_masq_app_init(void)) { #ifdef CONFIG_PROC_FS proc_net_register(&proc_net_ip_masq_app); diff --git a/net/ipv4/ip_masq_ftp.c b/net/ipv4/ip_masq_ftp.c index cc2481746..4d5568d0a 100644 --- a/net/ipv4/ip_masq_ftp.c +++ b/net/ipv4/ip_masq_ftp.c @@ -28,6 +28,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip_masq.h> @@ -187,7 +188,7 @@ struct ip_masq_app ip_masq_ftp = { * ip_masq_ftp initialization */ -int ip_masq_ftp_init(void) +__initfunc(int ip_masq_ftp_init(void)) { return register_ip_masq_app(&ip_masq_ftp, IPPROTO_TCP, 21); } diff --git a/net/ipv4/ip_masq_irc.c b/net/ipv4/ip_masq_irc.c index e0b94f0d6..a1be56f81 100644 --- a/net/ipv4/ip_masq_irc.c +++ b/net/ipv4/ip_masq_irc.c @@ -29,6 +29,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip_masq.h> @@ -238,7 +239,7 @@ struct ip_masq_app ip_masq_irc = { * ip_masq_irc initialization */ -int ip_masq_irc_init(void) +__initfunc(int ip_masq_irc_init(void)) { return register_ip_masq_app(&ip_masq_irc, IPPROTO_TCP, 6667); } diff --git a/net/ipv4/ip_masq_quake.c b/net/ipv4/ip_masq_quake.c index 3614f0cf5..08a062bc7 100644 --- a/net/ipv4/ip_masq_quake.c +++ b/net/ipv4/ip_masq_quake.c @@ -28,6 +28,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/udp.h> #include <net/ip_masq.h> @@ -279,7 +280,7 @@ struct ip_masq_app ip_masq_quakenew = { * ip_masq_quake initialization */ -int ip_masq_quake_init(void) +__initfunc(int ip_masq_quake_init(void)) { return (register_ip_masq_app(&ip_masq_quake, IPPROTO_UDP, 26000) + register_ip_masq_app(&ip_masq_quakenew, IPPROTO_UDP, 27000)); diff --git a/net/ipv4/ip_masq_raudio.c b/net/ipv4/ip_masq_raudio.c index 85bba590e..52f439102 100644 --- a/net/ipv4/ip_masq_raudio.c +++ b/net/ipv4/ip_masq_raudio.c @@ -2,7 +2,7 @@ * IP_MASQ_RAUDIO - Real Audio masquerading module * * - * Version: @(#)$Id: ip_masq_raudio.c,v 1.5 1997/04/03 08:52:02 davem Exp $ + * Version: @(#)$Id: ip_masq_raudio.c,v 1.6 1997/04/29 09:38:26 mj Exp $ * * Author: Nigel Metheringham * [strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne] @@ -45,6 +45,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip_masq.h> @@ -200,7 +201,7 @@ struct ip_masq_app ip_masq_raudio = { * ip_masq_raudio initialization */ -int ip_masq_raudio_init(void) +__initfunc(int ip_masq_raudio_init(void)) { return register_ip_masq_app(&ip_masq_raudio, IPPROTO_TCP, 7070); } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2c7974506..80baf8364 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -505,7 +505,7 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in opt->is_data = 1; opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { - kfree_s(opt, sizeof(struct options) + optlen); + kfree_s(opt, sizeof(struct ip_options) + optlen); return -EINVAL; } *optp = opt; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41e60de61..6558b56e4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -27,6 +27,8 @@ * (in case if packet not accepted by * output firewall rules) * Alexey Kuznetsov: use new route cache + * Andi Kleen: Fix broken PMTU recovery and remove + * some redundant tests. */ #include <asm/uaccess.h> @@ -47,6 +49,7 @@ #include <linux/etherdevice.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/init.h> #include <net/snmp.h> #include <net/ip.h> @@ -126,9 +129,8 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, iph->ihl = 5; iph->tos = sk->ip_tos; iph->frag_off = 0; - if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || - (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - rt->rt_flags&RTF_NOPMTUDISC)) + if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && + !(rt->rt_flags & RTF_NOPMTUDISC)) iph->frag_off |= htons(IP_DF); iph->ttl = sk->ip_ttl; iph->daddr = rt->rt_dst; @@ -207,9 +209,8 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk) iph->ihl = 5; iph->tos = sk->ip_tos; iph->frag_off = 0; - if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || - (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - rt->rt_flags&RTF_NOPMTUDISC)) + if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && + !(rt->rt_flags & RTF_NOPMTUDISC)) iph->frag_off |= htons(IP_DF); iph->ttl = sk->ip_ttl; iph->daddr = rt->rt_dst; @@ -480,8 +481,7 @@ int ip_build_xmit(struct sock *sk, #endif if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || - (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - rt->rt_flags&RTF_NOPMTUDISC)) + rt->rt_flags&RTF_NOPMTUDISC) df = 0; @@ -1036,7 +1036,7 @@ static struct proc_dir_entry proc_net_igmp = { * IP registers the packet type and then calls the subprotocol initialisers */ -void ip_init(void) +__initfunc(void ip_init(void)) { dev_add_pack(&ip_packet_type); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 1689159ed..8c2463d04 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -126,26 +126,24 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level != SOL_IP) continue; - switch (cmsg->cmsg_type) - { + switch (cmsg->cmsg_type) { case IP_LOCALADDR: - if (cmsg->cmsg_len < sizeof(struct in_addr)+sizeof(*cmsg)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) return -EINVAL; - memcpy(&ipc->addr, cmsg->cmsg_data, 4); + memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr)); break; case IP_RETOPTS: - err = cmsg->cmsg_len - sizeof(*cmsg); - err = ip_options_get(&ipc->opt, cmsg->cmsg_data, - err < 40 ? err : 40, 0); + err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0); if (err) return err; break; case IP_TXINFO: { struct in_pktinfo *info; - if (cmsg->cmsg_len < sizeof(*info)+sizeof(*cmsg)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; - info = (struct in_pktinfo*)cmsg->cmsg_data; + info = (struct in_pktinfo *)CMSG_DATA(cmsg); if (info->ipi_ifindex && !devp) return -EINVAL; if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL) @@ -212,7 +210,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt sk->opt = opt; sti(); if (old_opt) - kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen); + kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen); return 0; } case IP_RXINFO: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f76c5b52d..1a38c5275 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -47,6 +47,7 @@ #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/mroute.h> +#include <linux/init.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -1065,7 +1066,7 @@ static struct proc_dir_entry proc_net_ipmr_mfc = { * Setup for IP multicast routing */ -void ip_mr_init(void) +__initfunc(void ip_mr_init(void)) { printk(KERN_INFO "Linux IP multicast router 0.06.\n"); register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c index fb9e2a738..e0323bb85 100644 --- a/net/ipv4/rarp.c +++ b/net/ipv4/rarp.c @@ -45,6 +45,7 @@ #include <linux/if_arp.h> #include <linux/in.h> #include <linux/config.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -553,8 +554,8 @@ struct proc_dir_entry proc_net_rarp = { rarp_get_info }; -void -rarp_init(void) +__initfunc(void +rarp_init(void)) { proc_net_register(&proc_net_rarp); rarp_ioctl_hook = rarp_ioctl; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5ba6467d9..4a4c5321c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -69,6 +69,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> +#include <linux/init.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -1379,7 +1380,7 @@ void ip_rt_multicast_event(struct device *dev) rt_cache_flush(0); } -void ip_rt_init() +__initfunc(void ip_rt_init(void)) { ip_fib_init(); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 84ba6578b..18a8d2bf8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -35,22 +35,27 @@ extern int sysctl_arp_check_interval; extern int sysctl_arp_confirm_interval; extern int sysctl_arp_confirm_timeout; +/* From ip_fragment.c */ +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_high_thresh; + extern int sysctl_tcp_cong_avoidance; extern int sysctl_tcp_hoe_retransmits; extern int sysctl_tcp_sack; extern int sysctl_tcp_tsack; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; +extern int sysctl_syn_retries; extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp); -struct ipv4_config ipv4_config = { 1, 1, 1, 1, }; +struct ipv4_config ipv4_config = { 1, 1, 1, 0, }; #ifdef CONFIG_SYSCTL struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, }; -struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 1, }; +struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, }; int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp) @@ -144,6 +149,12 @@ ctl_table ipv4_table[] = { {NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects", &ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_SYN_RETRIES, "tcp_syn_retries", + &sysctl_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh", + &sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPFRAG_LOW_THRESH, "ipfrag_low_thresh", + &sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 420db4777..000813b94 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.61 1997/04/22 02:53:10 davem Exp $ + * Version: $Id: tcp.c,v 1.65 1997/05/06 09:31:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -424,6 +424,7 @@ #include <linux/types.h> #include <linux/fcntl.h> #include <linux/poll.h> +#include <linux/init.h> #include <net/icmp.h> #include <net/tcp.h> @@ -849,7 +850,6 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) tcp_size = skb->tail - ((unsigned char *)(skb->h.th) + tp->tcp_header_len); - /* printk("extending buffer\n"); */ /* This window_seq test is somewhat dangerous * If the remote does SWS avoidance we should * queue the best we can if not we should in @@ -1100,6 +1100,9 @@ static void cleanup_rbuf(struct sock *sk) struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); __u32 rcv_wnd; + /* FIXME: double check this rule, then check against + * other use of similar rules. Abtract if possible. + */ rcv_wnd = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup); if ((rcv_wnd < sk->mss) && (sock_rspace(sk) > rcv_wnd)) @@ -1357,7 +1360,10 @@ static int tcp_close_state(struct sock *sk, int dead) case TCP_CLOSE: case TCP_LISTEN: break; - case TCP_LAST_ACK: /* Could have shutdown() then close() */ + case TCP_LAST_ACK: /* Could have shutdown() then close() + * (but don't do send_fin again!) */ + ns=TCP_LAST_ACK; + break; case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and wait only for the ACK */ ns=TCP_LAST_ACK; @@ -1655,11 +1661,11 @@ void tcp_set_keepalive(struct sock *sk, int val) tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); } -void tcp_init(void) +__initfunc(void tcp_init(void)) { tcp_openreq_cachep = kmem_cache_create("tcp_open_request", sizeof(struct open_request), - sizeof(long)*8, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!tcp_openreq_cachep) panic("tcp_init: Cannot alloc open_request cache."); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab2b1ef82..3ab1dee42 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.50 1997/04/22 02:53:12 davem Exp $ + * Version: $Id: tcp_input.c,v 1.51 1997/04/27 19:24:40 schenk Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -321,8 +321,10 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp) break; case TCPOPT_WINDOW: if(opsize==TCPOLEN_WINDOW && th->syn) - if (sysctl_tcp_window_scaling) + if (sysctl_tcp_window_scaling) { + tp->wscale_ok = 1; tp->snd_wscale = *(__u8 *)ptr; + } break; case TCPOPT_SACK_PERM: if(opsize==TCPOLEN_SACK_PERM && th->syn) @@ -816,7 +818,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, */ if (before(tp->snd_wl1, ack_seq) || (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) { - unsigned long nwin = ntohs(th->window); + unsigned long nwin = ntohs(th->window) << tp->snd_wscale; if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) { flag |= FLAG_WIN_UPDATE; @@ -1464,17 +1466,21 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0) return 1; - /* Now we have several options: In theory there is - * nothing else in the frame. KA9Q has an option to - * send data with the syn, BSD accepts data with the - * syn up to the [to be] advertised window and - * Solaris 2.1 gives you a protocol error. For now - * we just ignore it, that fits the spec precisely - * and avoids incompatibilities. It would be nice in - * future to drop through and process the data. + /* Now we have several options: In theory there is + * nothing else in the frame. KA9Q has an option to + * send data with the syn, BSD accepts data with the + * syn up to the [to be] advertised window and + * Solaris 2.1 gives you a protocol error. For now + * we just ignore it, that fits the spec precisely + * and avoids incompatibilities. It would be nice in + * future to drop through and process the data. * - * Now that TTCP is starting to be used we ought to - * queue this data. + * Now that TTCP is starting to be used we ought to + * queue this data. + * But, this leaves one open to an easy denial of + * service attack, and SYN cookies can't defend + * against this problem. So, we drop the data + * in the interest of security over speed. */ return 0; } @@ -1514,10 +1520,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * move to established. */ tp->rcv_nxt = skb->seq+1; - tp->rcv_wnd = 0; tp->rcv_wup = skb->seq+1; - tp->snd_wnd = htons(th->window); + tp->snd_wnd = htons(th->window) << tp->snd_wscale; tp->snd_wl1 = skb->seq; tp->snd_wl2 = skb->ack_seq; @@ -1526,6 +1531,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_set_state(sk, TCP_ESTABLISHED); tcp_parse_options(th,tp); /* FIXME: need to make room for SACK still */ + if (tp->wscale_ok == 0) { + tp->snd_wscale = tp->rcv_wscale = 0; + tp->window_clamp = min(tp->window_clamp,65535); + } if (tp->tstamp_ok) { tp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: Define constant! */ sk->dummy_th.doff += 3; /* reserve space of options */ @@ -1695,7 +1704,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk->state_change(sk); tp->snd_una = skb->ack_seq; - tp->snd_wnd = htons(th->window); + tp->snd_wnd = htons(th->window) << tp->snd_wscale; tp->snd_wl1 = skb->seq; tp->snd_wl2 = skb->ack_seq; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f4528f552..c4d12a54f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.39 1997/04/22 02:53:14 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $ * * IPv4 specific functions * @@ -465,7 +465,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct sk_buff *buff; struct sk_buff *skb1; int tmp; - struct tcphdr *t1; + struct tcphdr *th; struct rtable *rt; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -546,20 +546,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return(-ENETUNREACH); } - t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); - buff->h.th = t1; + th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); + buff->h.th = th; - memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1)); + memcpy(th,(void *)&(sk->dummy_th), sizeof(*th)); buff->seq = sk->write_seq++; - t1->seq = htonl(buff->seq); + th->seq = htonl(buff->seq); tp->snd_nxt = sk->write_seq; buff->end_seq = sk->write_seq; - t1->ack = 0; - t1->window = htons(512); - t1->syn = 1; + th->ack = 0; + th->syn = 1; - /* Use 512 or whatever user asked for. */ - tp->window_clamp = rt->u.dst.window; sk->mtu = rt->u.dst.pmtu; if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT || @@ -577,13 +574,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->mss = (sk->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)); + if (sk->mss < 1) { + printk(KERN_DEBUG "intial sk->mss below 1\n"); + sk->mss = 1; /* Sanity limit */ + } + + tp->window_clamp = rt->u.dst.window; + tcp_select_initial_window(sock_rspace(sk)/2,sk->mss, + &tp->rcv_wnd, + &tp->window_clamp, + sysctl_tcp_window_scaling, + &tp->rcv_wscale); + th->window = htons(tp->rcv_wnd); + tmp = tcp_syn_build_options(buff, sk->mss, sysctl_tcp_sack, sysctl_tcp_timestamps, - sysctl_tcp_window_scaling?tp->rcv_wscale:0); + sysctl_tcp_window_scaling,tp->rcv_wscale); buff->csum = 0; - t1->doff = (sizeof(*t1)+ tmp)>>2; + th->doff = (sizeof(*th)+ tmp)>>2; - tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + tmp, buff); + tcp_v4_send_check(sk, th, sizeof(struct tcphdr) + tmp, buff); tcp_set_state(sk,TCP_SYN_SENT); @@ -803,7 +813,6 @@ int tcp_chkaddr(struct sk_buff *skb) static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff * skb; struct tcphdr *th; int tmp; @@ -829,6 +838,11 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) */ req->mss = min(mss, req->mss); + if (req->mss < 1) { + printk(KERN_DEBUG "initial req->mss below 1\n"); + req->mss = 1; + } + /* Yuck, make this header setup more efficient... -DaveM */ memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; @@ -839,7 +853,16 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) skb->end_seq = skb->seq + 1; th->seq = ntohl(skb->seq); th->ack_seq = htonl(req->rcv_isn + 1); - th->window = ntohs(tp->rcv_wnd); + if (req->rcv_wnd == 0) { + /* Set this up on the first call only */ + req->window_clamp = skb->dst->window; + tcp_select_initial_window(sock_rspace(sk)/2,req->mss, + &req->rcv_wnd, + &req->window_clamp, + req->wscale_ok, + &req->rcv_wscale); + } + th->window = htons(req->rcv_wnd); /* XXX Partial csum of 4 byte quantity is itself! -DaveM * Yes, but it's a bit harder to special case now. It's @@ -850,7 +873,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) */ tmp = tcp_syn_build_options(skb, req->mss, req->sack_ok, req->tstamp_ok, - (req->snd_wscale)?tp->rcv_wscale:0); + req->wscale_ok,req->rcv_wscale); skb->csum = 0; th->doff = (sizeof(*th) + tmp)>>2; th->check = tcp_v4_check(th, sizeof(*th) + tmp, @@ -865,7 +888,7 @@ static void tcp_v4_or_free(struct open_request *req) { if(!req->sk && req->af.v4_req.opt) kfree_s(req->af.v4_req.opt, - sizeof(struct options) + req->af.v4_req.opt->optlen); + sizeof(struct ip_options) + req->af.v4_req.opt->optlen); } static struct or_calltable or_ipv4 = { @@ -881,7 +904,7 @@ static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr) int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn) { struct ip_options *opt = (struct ip_options *) ptr; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct tcp_opt tp; struct open_request *req; struct tcphdr *th = skb->h.th; __u32 saddr = skb->nh.iph->saddr; @@ -913,19 +936,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i sk->ack_backlog++; + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ + req->rcv_isn = skb->seq; req->snt_isn = isn; - tp->tstamp_ok = tp->sack_ok = tp->snd_wscale = 0; - tcp_parse_options(th,tp); - if (tp->saw_tstamp) { - tp->ts_recent = tp->rcv_tsval; - tp->ts_recent_stamp = jiffies; - } - req->mss = tp->in_mss; - req->tstamp_ok = tp->tstamp_ok; - req->sack_ok = tp->sack_ok; - req->snd_wscale = tp->snd_wscale; - req->ts_recent = tp->ts_recent; + tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tp.in_mss = 536; + tcp_parse_options(th,&tp); + if (tp.saw_tstamp) + req->ts_recent = tp.rcv_tsval; + req->mss = tp.in_mss; + req->tstamp_ok = tp.tstamp_ok; + req->sack_ok = tp.sack_ok; + req->snd_wscale = tp.snd_wscale; + req->wscale_ok = tp.wscale_ok; req->rmt_port = th->source; req->af.v4_req.loc_addr = daddr; req->af.v4_req.rmt_addr = saddr; @@ -1004,8 +1028,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, atomic_set(&newsk->rmem_alloc, 0); newsk->localroute = sk->localroute; - newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; - newsk->err = 0; newsk->shutdown = 0; newsk->ack_backlog = 0; @@ -1060,7 +1082,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->dst_cache = &rt->u.dst; - newtp->window_clamp = rt->u.dst.window; snd_mss = rt->u.dst.pmtu; /* FIXME: is mtu really the same as snd_mss? */ @@ -1072,10 +1093,19 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->sack_ok = req->sack_ok; newtp->tstamp_ok = req->tstamp_ok; - newtp->snd_wscale = req->snd_wscale; - newtp->ts_recent = req->ts_recent; - newtp->ts_recent_stamp = jiffies; + newtp->window_clamp = req->window_clamp; + newtp->rcv_wnd = req->rcv_wnd; + newtp->wscale_ok = req->wscale_ok; + if (newtp->wscale_ok) { + newtp->snd_wscale = req->snd_wscale; + newtp->rcv_wscale = req->rcv_wscale; + } else { + newtp->snd_wscale = newtp->rcv_wscale = 0; + newtp->window_clamp = min(newtp->window_clamp,65535); + } if (newtp->tstamp_ok) { + newtp->ts_recent = req->ts_recent; + newtp->ts_recent_stamp = jiffies; newtp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: define constant! */ newsk->dummy_th.doff += 3; } else { @@ -1219,9 +1249,8 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len) case CHECKSUM_HW: if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) { struct iphdr * iph = skb->nh.iph; - printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, ack = %u, seq = %u, len=%d/%d/%d\n", + printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n", saddr, ntohs(th->source), daddr, - ntohl(th->ack_seq), ntohl(th->seq), ntohs(th->dest), len, skb->len, ntohs(iph->tot_len)); goto discard_it; } @@ -1346,10 +1375,12 @@ static int tcp_v4_init_sock(struct sock *sk) tp->ato = 0; tp->iat = (HZ/5) << 3; - tp->rcv_wnd = 8192; + /* FIXME: tie this to sk->rcvbuf? (May be unnecessary) */ + /* tp->rcv_wnd = 8192; */ tp->tstamp_ok = 0; tp->sack_ok = 0; - tp->in_mss = 0; + tp->wscale_ok = 0; + tp->in_mss = 536; tp->snd_wscale = 0; tp->sacks = 0; tp->saw_tstamp = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f157abe2..bdc79525f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $ + * Version: $Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -404,14 +404,115 @@ void tcp_write_xmit(struct sock *sk) -/* - * This function returns the amount that we can raise the - * usable window based on the following constraints +/* This function returns the amount that we can raise the + * usable window based on the following constraints * - * 1. The window can never be shrunk once it is offered (RFC 793) - * 2. We limit memory per socket + * 1. The window can never be shrunk once it is offered (RFC 793) + * 2. We limit memory per socket + * + * RFC 1122: + * "the suggested [SWS] avoidance algoritm for the receiver is to keep + * RECV.NEXT + RCV.WIN fixed until: + * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" + * + * i.e. don't raise the right edge of the window until you can raise + * it at least MSS bytes. + * + * Unfortunately, the recomended algorithm breaks header prediction, + * since header prediction assumes th->window stays fixed. + * + * Strictly speaking, keeping th->window fixed violates the receiver + * side SWS prevention criteria. The problem is that under this rule + * a stream of single byte packets will cause the right side of the + * window to always advance by a single byte. + * + * Of course, if the sender implements sender side SWS prevention + * then this will not be a problem. + * + * BSD seems to make the following compromise: + * + * If the free space is less than the 1/4 of the maximum + * space available and the free space is less than 1/2 mss, + * then set the window to 0. + * Otherwise, just prevent the window from shrinking + * and from being larger than the largest representable value. + * + * This prevents incremental opening of the window in the regime + * where TCP is limited by the speed of the reader side taking + * data out of the TCP receive queue. It does nothing about + * those cases where the window is constrained on the sender side + * because the pipeline is full. + * + * BSD also seems to "accidentally" limit itself to windows that are a + * multiple of MSS, at least until the free space gets quite small. + * This would appear to be a side effect of the mbuf implementation. + * Combining these two algorithms results in the observed behavior + * of having a fixed window size at almost all times. + * + * Below we obtain similar behavior by forcing the offered window to + * a multiple of the mss when it is feasible to do so. + * + * FIXME: In our current implementation the value returned by sock_rpsace(sk) + * is the total space we have allocated to the socket to store skbuf's. + * The current design assumes that up to half of that space will be + * taken by headers, and the remaining space will be available for TCP data. + * This should be accounted for correctly instead. */ +unsigned short tcp_select_window(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + int mss = sk->mss; + long free_space = sock_rspace(sk)/2; + long window, cur_win; + + if (tp->window_clamp) { + free_space = min(tp->window_clamp, free_space); + mss = min(tp->window_clamp, mss); + } else + printk(KERN_DEBUG "Clamp failure. Water leaking.\n"); + + if (mss < 1) { + mss = 1; + printk(KERN_DEBUG "tcp_select_window: mss fell to 0.\n"); + } + + /* compute the actual window i.e. + * old_window - received_bytes_on_that_win + */ + cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup); + window = tp->rcv_wnd; + + if (cur_win < 0) { + cur_win = 0; + printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", + tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup); + } + + if (free_space < sk->rcvbuf/4 && free_space < mss/2) + window = 0; + + /* Get the largest window that is a nice multiple of mss. + * Window clamp already applied above. + * If our current window offering is within 1 mss of the + * free space we just keep it. This prevents the divide + * and multiply from happening most of the time. + * We also don't do any window rounding when the free space + * is too small. + */ + if (window < free_space - mss && free_space > mss) + window = (free_space/mss)*mss; + /* Never shrink the offered window */ + if (window < cur_win) + window = cur_win; + + tp->rcv_wnd = window; + tp->rcv_wup = tp->rcv_nxt; + return window >> tp->rcv_wscale; /* RFC1323 scaling applied */ +} + +#if 0 +/* Old algorithm for window selection */ unsigned short tcp_select_window(struct sock *sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; @@ -427,37 +528,31 @@ unsigned short tcp_select_window(struct sock *sk) /* compute the actual window i.e. * old_window - received_bytes_on_that_win */ - cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd); + cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup); window = tp->rcv_wnd; - + if (cur_win < 0) { cur_win = 0; printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup); } - /* - * RFC 1122: + /* RFC 1122: * "the suggested [SWS] avoidance algoritm for the receiver is to keep * RECV.NEXT + RCV.WIN fixed until: * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * - * i.e. don't raise the right edge of the window until you can't raise - * it MSS bytes + * i.e. don't raise the right edge of the window until you can raise + * it at least MSS bytes. */ - /* It would be a good idea if it didn't break header prediction. - * and BSD made the header predition standard... - * It expects the same value in the header i.e. th->window to be - * constant - */ usable = free_space - cur_win; if (usable < 0) usable = 0; if (window < usable) { /* Window is not blocking the sender - * and we have enought free space for it + * and we have enough free space for it */ if (cur_win > (sk->mss << 1)) goto out; @@ -469,7 +564,7 @@ unsigned short tcp_select_window(struct sock *sk) */ window = max(usable, cur_win); } else { - if ((usable - window) >= mss) + while ((usable - window) >= mss) window += mss; } out: @@ -477,6 +572,7 @@ out: tp->rcv_wup = tp->rcv_nxt; return window; } +#endif static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb) { @@ -703,6 +799,11 @@ void tcp_send_fin(struct sock *sk) } } +/* WARNING: This routine must only be called when we have already sent + * a SYN packet that crossed the incoming SYN that caused this routine + * to get called. If this assumption fails then the initial rcv_wnd + * and rcv_wscale values will not be correct. + */ int tcp_send_synack(struct sock *sk) { struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp); @@ -735,13 +836,16 @@ int tcp_send_synack(struct sock *sk) skb->end_seq = skb->seq + 1 /* th->syn */ ; th->seq = ntohl(skb->seq); - th->window = ntohs(tp->rcv_wnd); + /* This is a resend of a previous SYN, now with an ACK. + * we must reuse the previously offered window. + */ + th->window = htons(tp->rcv_wnd); tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt); tmp = tcp_syn_build_options(skb, sk->mss, tp->sack_ok, tp->tstamp_ok, - tp->snd_wscale?tp->rcv_wscale:0); + tp->wscale_ok,tp->rcv_wscale); skb->csum = 0; th->doff = (sizeof(*th) + tmp)>>2; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 365d3dac2..ce6c60feb 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,8 @@ #include <net/tcp.h> +int sysctl_syn_retries = TCP_SYN_RETRIES; + static void tcp_sltimer_handler(unsigned long); static void tcp_syn_recv_timer(unsigned long); static void tcp_keepalive(unsigned long data); @@ -178,7 +180,7 @@ static int tcp_write_timeout(struct sock *sk) } /* Have we tried to SYN too many times (repent repent 8)) */ - if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) { + if(tp->retransmits > sysctl_syn_retries && sk->state==TCP_SYN_SENT) { if(sk->err_soft) sk->err=sk->err_soft; else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9ca5f3045..ed84d5b0f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -154,7 +154,7 @@ static int udp_v4_verify_bind(struct sock *sk, unsigned short snum) return retval; } -static inline int udp_lport_inuse(int num) +static inline int udp_lport_inuse(u16 num) { struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; @@ -168,36 +168,42 @@ static inline int udp_lport_inuse(int num) /* Shared by v4/v6 tcp. */ unsigned short udp_good_socknum(void) { - static int start = 0; - unsigned short base; - int i, best = 0, size = 32767; /* a big num. */ int result; - - base = PROT_SOCK + (start & 1023) + 1; + static int start = 0; + int i, best, best_size_so_far; SOCKHASH_LOCK(); - for(i = 0; i < UDP_HTABLE_SIZE; i++) { - struct sock *sk = udp_hash[i]; - if(!sk) { - start = (i + 1 + start) & 1023; - result = i + base + 1; + + /* Select initial not-so-random "best" */ + best = PROT_SOCK + 1 + (start & 1023); + best_size_so_far = 32767; /* "big" num */ + result = best; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + struct sock *sk; + int size; + + sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + + /* No clashes - take it */ + if (!sk) goto out; - } else { - int j = 0; - do { - if(++j >= size) - goto next; - } while((sk = sk->next)); - best = i; - size = j; - } - next: + + /* Is this one better than our best so far? */ + size = 0; + do { + if(++size >= best_size_so_far) + goto next; + } while((sk = sk->next) != NULL); + best_size_so_far = size; + best = result; +next: } - while(udp_lport_inuse(base + best + 1)) + while (udp_lport_inuse(best)) best += UDP_HTABLE_SIZE; - result = (best + base + 1); + result = best; out: + start = result; SOCKHASH_UNLOCK(); return result; } diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c index cbce01b68..4253c85db 100644 --- a/net/ipv4/utils.c +++ b/net/ipv4/utils.c @@ -46,7 +46,7 @@ * Display an IP address in readable format. */ -char *in_ntoa(unsigned long in) +char *in_ntoa(__u32 in) { static char buff[18]; char *p; @@ -62,7 +62,7 @@ char *in_ntoa(unsigned long in) * Convert an ASCII string to binary IP. */ -unsigned long in_aton(const char *str) +__u32 in_aton(const char *str) { unsigned long l; unsigned int val; diff --git a/net/ipv6/.cvsignore b/net/ipv6/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/ipv6/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9173a7760..1639f916d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: addrconf.c,v 1.18 1997/04/16 05:58:03 davem Exp $ + * $Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,6 +31,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/route.h> +#include <linux/init.h> #include <linux/proc_fs.h> #include <net/sock.h> @@ -1215,7 +1216,7 @@ void addrconf_verify(unsigned long foo) * Init / cleanup code */ -void addrconf_init() +__initfunc(void addrconf_init(void)) { struct device *dev; @@ -1273,6 +1274,7 @@ void addrconf_cleanup(void) for (idev = inet6_dev_lst[i]; idev; ) { struct inet6_dev *back; + addrconf_ifdown(idev->dev); back = idev; idev = idev->next; kfree(back); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0f6bbf4de..1de20e358 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.16 1997/03/18 18:24:26 davem Exp $ + * $Id: af_inet6.c,v 1.18 1997/05/07 09:40:12 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -34,6 +34,7 @@ #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/init.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -457,13 +458,27 @@ static struct proc_dir_entry proc_net_sockstat6 = { #endif /* CONFIG_PROC_FS */ #ifdef MODULE +int ipv6_unload(void) +{ + return 0; +} +#endif + +#ifdef MODULE int init_module(void) #else -void inet6_proto_init(struct net_proto *pro) +__initfunc(void inet6_proto_init(struct net_proto *pro)) #endif { struct sk_buff *dummy_skb; +#ifdef MODULE + if (!mod_member_present(&__this_module, can_unload)) + return -EINVAL; + + __this_module.can_unload = &ipv6_unload; +#endif + printk(KERN_INFO "IPv6 v0.2 for NET3.037\n"); if (sizeof(struct ipv6_options) > sizeof(dummy_skb->cb)) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a898f6008..90f7b25d9 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.10 1997/04/14 05:39:42 davem Exp $ + * $Id: datagram.c,v 1.12 1997/05/15 18:55:09 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,6 +15,9 @@ #include <linux/errno.h> #include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/interrupt.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in6.h> @@ -36,7 +39,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) src_info.ipi6_ifindex = skb->dev->ifindex; ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); - put_cmsg(msg, SOL_IPV6, IPV6_RXINFO, sizeof(src_info), &src_info); + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxhlim) { @@ -64,20 +67,18 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level != SOL_IPV6) { - printk(KERN_DEBUG "cmsg_level %d\n", cmsg->cmsg_level); + printk(KERN_DEBUG "invalid cmsg_level %d\n", cmsg->cmsg_level); continue; } switch (cmsg->cmsg_type) { - - case IPV6_TXINFO: - if (cmsg->cmsg_len < (sizeof(struct cmsghdr) + - sizeof(struct in6_pktinfo))) { + case IPV6_PKTINFO: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; } - src_info = (struct in6_pktinfo *) cmsg->cmsg_data; + src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { int index = src_info->ipi6_ifindex; @@ -101,18 +102,13 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev, break; case IPV6_RXSRCRT: - - len = cmsg->cmsg_len; - - len -= sizeof(struct cmsghdr); - - /* validate option length */ - if (len < sizeof(struct ipv6_rt_hdr)) { + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; goto exit_f; } - rthdr = (struct ipv6_rt_hdr *) cmsg->cmsg_data; + len = cmsg->cmsg_len - sizeof(struct cmsghdr); + rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); /* * TYPE 0 @@ -139,21 +135,16 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev, break; case IPV6_HOPLIMIT: - - len = cmsg->cmsg_len; - len -= sizeof(struct cmsghdr); - - if (len < sizeof(int)) { + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; goto exit_f; } - *hlimit = *((int *) cmsg->cmsg_data); + *hlimit = *(int *)CMSG_DATA(cmsg); break; default: - printk(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); err = -EINVAL; break; }; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 37bd7f814..71ff84b4b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: icmp.c,v 1.8 1997/03/18 18:24:30 davem Exp $ + * $Id: icmp.c,v 1.9 1997/04/29 09:38:42 mj Exp $ * * Based on net/ipv4/icmp.c * @@ -34,6 +34,7 @@ #include <linux/sockios.h> #include <linux/net.h> #include <linux/skbuff.h> +#include <linux/init.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -486,7 +487,7 @@ discard_it: return 0; } -void icmpv6_init(struct net_proto_family *ops) +__initfunc(void icmpv6_init(struct net_proto_family *ops)) { struct sock *sk; int err; diff --git a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c index f6e7f8da4..5a47cc251 100644 --- a/net/ipv6/ip6_fw.c +++ b/net/ipv6/ip6_fw.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fw.c,v 1.4 1997/03/18 18:24:34 davem Exp $ + * $Id: ip6_fw.c,v 1.5 1997/04/29 09:38:44 mj Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,6 +22,7 @@ #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/udp.h> +#include <linux/init.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -365,7 +366,7 @@ static void ip6_fw_destroy(struct flow_rule *rl) #define ip6_fw_init module_init #endif -void ip6_fw_init(void) +__initfunc(void ip6_fw_init(void)) { netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c5e21417d..cf107efcd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.4 1997/03/18 18:24:35 davem Exp $ + * $Id: ip6_input.c,v 1.6 1997/05/11 16:06:52 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -133,7 +133,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb, struct tlvtype_proc *curr; while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt) { - switch (hdr->type & 0x3F) { + switch (hdr->type) { case 0: /* TLV encoded Pad1 */ skb->h.raw++; break; @@ -144,7 +144,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb, default: /* Other TLV code so scan list */ for (curr=procs; curr->type != 255; curr++) { - if (curr->type == (hdr->type & 0x3F)) { + if (curr->type == (hdr->type)) { curr->func(skb, dev, nhptr, opt); skb->h.raw += hdr->len+2; break; @@ -166,10 +166,12 @@ static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev, struct sk_buff *skb=*skb_ptr; struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw; int res = 0; + void *lastopt=skb->h.raw+hdr->hdrlen+sizeof(struct ipv6_destopt_hdr); - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, - skb->h.raw+hdr->hdrlen)) + skb->h.raw += sizeof(struct ipv6_destopt_hdr); + if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, lastopt)) res = hdr->nexthdr; + skb->h.raw+=hdr->hdrlen; return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 88920bb73..64cfb00d5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.11 1997/04/20 09:44:33 davem Exp $ + * $Id: ipv6_sockglue.c,v 1.13 1997/05/15 18:55:10 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,12 +31,11 @@ #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> - +#include <linux/init.h> #include <linux/sysctl.h> #include <net/sock.h> #include <net/snmp.h> - #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> @@ -122,7 +121,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, } break; - case IPV6_RXINFO: + case IPV6_PKTINFO: np->rxinfo = val; retv = 0; break; @@ -239,7 +238,7 @@ extern void ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); #endif -void ipv6_init(void) +__initfunc(void ipv6_init(void)) { dev_add_pack(&ipv6_packet_type); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 573f1f611..637f434d4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.8 1997/04/12 04:32:48 davem Exp $ + * $Id: mcast.c,v 1.10 1997/05/07 09:40:22 davem Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -27,6 +27,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/route.h> +#include <linux/init.h> #include <net/sock.h> #include <net/snmp.h> @@ -186,7 +187,8 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) hash = ipv6_addr_hash(addr); for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) { - if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0) { + if ((ipv6_addr_cmp(&mc->mca_addr, addr) == 0) && + (mc->dev->ifindex == dev->ifindex)) { atomic_inc(&mc->mca_users); return 0; } @@ -495,7 +497,7 @@ void igmp6_timer_handler(unsigned long data) ma->mca_flags &= ~MAF_TIMER_RUNNING; } -void igmp6_init(struct net_proto_family *ops) +__initfunc(void igmp6_init(struct net_proto_family *ops)) { struct sock *sk; int err; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3a1704f37..83b5cf3bc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Mike Shaver <shaver@ingenia.com> * - * $Id: ndisc.c,v 1.14 1997/04/12 04:32:51 davem Exp $ + * $Id: ndisc.c,v 1.15 1997/04/29 09:38:48 mj Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -43,6 +43,7 @@ #include <linux/net.h> #include <linux/in6.h> #include <linux/route.h> +#include <linux/init.h> #include <linux/if_arp.h> #include <linux/ipv6.h> @@ -1647,7 +1648,7 @@ struct proc_dir_entry ndisc_proc_entry = }; #endif /* CONFIG_PROC_FS */ -void ndisc_init(struct net_proto_family *ops) +__initfunc(void ndisc_init(struct net_proto_family *ops)) { struct sock *sk; int err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d04464e26..b8e6ac4a5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.11 1997/04/16 05:58:05 davem Exp $ + * $Id: route.c,v 1.12 1997/04/29 09:38:50 mj Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,6 +22,7 @@ #include <linux/route.h> #include <linux/netdevice.h> #include <linux/in6.h> +#include <linux/init.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> @@ -1573,7 +1574,7 @@ static struct proc_dir_entry proc_rt6_tree = { }; #endif /* CONFIG_PROC_FS */ -void ip6_route_init(void) +__initfunc(void ip6_route_init(void)) { #ifdef CONFIG_PROC_FS proc_net_register(&proc_rt6_info); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4b072889c..d818bc777 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: sit.c,v 1.13 1997/03/18 18:24:50 davem Exp $ + * $Id: sit.c,v 1.14 1997/04/29 09:38:52 mj Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -23,6 +23,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmp.h> +#include <linux/init.h> #include <net/sock.h> #include <net/snmp.h> @@ -243,7 +244,7 @@ static int sit_close(struct device *dev) return 0; } -int sit_init(void) +__initfunc(int sit_init(void)) { int i; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5151013a7..3c61f7b50 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.27 1997/04/22 02:53:20 davem Exp $ + * $Id: tcp_ipv6.c,v 1.31 1997/04/29 21:51:23 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -27,6 +27,7 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> +#include <linux/init.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> @@ -432,21 +433,32 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->snd_nxt = sk->write_seq; buff->end_seq = sk->write_seq; th->ack = 0; - th->window = 2; th->syn = 1; - tp->window_clamp = 0; sk->mtu = dst->pmtu; sk->mss = sk->mtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr); + if (sk->mss < 1) { + printk(KERN_DEBUG "intial ipv6 sk->mss below 1\n"); + sk->mss = 1; /* Sanity limit */ + } + + tp->window_clamp = 0; /* FIXME: shouldn't ipv6 dst cache have this? */ + tcp_select_initial_window(sock_rspace(sk)/2,sk->mss, + &tp->rcv_wnd, + &tp->window_clamp, + sysctl_tcp_window_scaling, + &tp->rcv_wscale); + th->window = htons(tp->rcv_wnd); + /* * Put in the TCP options to say MTU. */ tmp = tcp_syn_build_options(buff, sk->mss, sysctl_tcp_sack, sysctl_tcp_timestamps, - sysctl_tcp_window_scaling?tp->rcv_wscale:0); + sysctl_tcp_window_scaling,tp->rcv_wscale); th->doff = sizeof(*th)/4 + (tmp>>2); buff->csum = 0; tcp_v6_send_check(sk, th, sizeof(struct tcphdr) + tmp, buff); @@ -586,9 +598,11 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, } +/* FIXME: this is substantially similar to the ipv4 code. + * Can some kind of merge be done? -- erics + */ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff * skb; struct tcphdr *th; struct dst_entry *dst; @@ -630,11 +644,32 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) th->seq = ntohl(skb->seq); th->ack_seq = htonl(req->rcv_isn + 1); th->doff = sizeof(*th)/4 + 1; - - th->window = ntohs(tp->rcv_wnd); - tmp = tcp_syn_build_options(skb, sk->mss, req->sack_ok, req->tstamp_ok, - (req->snd_wscale)?tp->rcv_wscale:0); + /* Don't offer more than they did. + * This way we don't have to memorize who said what. + * FIXME: the selection of initial mss here doesn't quite + * match what happens under IPV4. Figure out the right thing to do. + */ + req->mss = min(sk->mss, req->mss); + + if (req->mss < 1) { + printk(KERN_DEBUG "initial req->mss below 1\n"); + req->mss = 1; + } + + if (req->rcv_wnd == 0) { + /* Set this up on the first call only */ + req->window_clamp = 0; /* FIXME: should be in dst cache */ + tcp_select_initial_window(sock_rspace(sk)/2,req->mss, + &req->rcv_wnd, + &req->window_clamp, + req->wscale_ok, + &req->rcv_wscale); + } + th->window = htons(req->rcv_wnd); + + tmp = tcp_syn_build_options(skb, req->mss, req->sack_ok, req->tstamp_ok, + req->snd_wscale,req->rcv_wscale); th->doff = sizeof(*th)/4 + (tmp>>2); th->check = tcp_v6_check(th, sizeof(*th) + tmp, &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, @@ -656,10 +691,13 @@ static struct or_calltable or_ipv6 = { tcp_v6_or_free }; +/* FIXME: this is substantially similar to the ipv4 code. + * Can some kind of merge be done? -- erics + */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct tcp_opt tp; struct open_request *req; __u16 req_mss; @@ -691,14 +729,20 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, sk->ack_backlog++; + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ + req->rcv_isn = skb->seq; req->snt_isn = isn; - - tcp_parse_options(skb->h.th,tp); - req_mss = tp->in_mss; - if (!req_mss) - req_mss = 536; - req->mss = req_mss; + tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tp.in_mss = 536; + tcp_parse_options(skb->h.th,&tp); + if (tp.saw_tstamp) + req->ts_recent = tp.rcv_tsval; + req->mss = tp.in_mss; + req->tstamp_ok = tp.tstamp_ok; + req->sack_ok = tp.sack_ok; + req->snd_wscale = tp.snd_wscale; + req->wscale_ok = tp.wscale_ok; req->rmt_port = skb->h.th->source; ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); @@ -876,6 +920,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->sack_ok = req->sack_ok; newtp->tstamp_ok = req->tstamp_ok; newtp->snd_wscale = req->snd_wscale; + newtp->wscale_ok = req->wscale_ok; newtp->ts_recent = req->ts_recent; if (newtp->tstamp_ok) { newtp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: define the contant. */ @@ -1305,8 +1350,11 @@ static int tcp_v6_init_sock(struct sock *sk) tp->ato = 0; tp->iat = (HZ/5) << 3; - - tp->rcv_wnd = 8192; + + /* FIXME: right thing? */ + tp->rcv_wnd = 0; + tp->in_mss = 536; + /* tp->rcv_wnd = 8192; */ /* start with only sending one packet at a time. */ tp->snd_cwnd = 1; @@ -1320,7 +1368,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->max_ack_backlog = SOMAXCONN; sk->mtu = 576; - sk->mss = 516; + sk->mss = 536; sk->dummy_th.doff = sizeof(sk->dummy_th)/4; @@ -1416,7 +1464,7 @@ static struct inet6_protocol tcpv6_protocol = "TCPv6" /* name */ }; -void tcpv6_init(void) +__initfunc(void tcpv6_init(void)) { /* register inet6 protocol */ inet6_add_protocol(&tcpv6_protocol); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1f0fb8ce5..f18f5a6f8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.16 1997/04/11 22:22:57 davem Exp $ + * $Id: udp.c,v 1.17 1997/04/29 09:38:55 mj Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -26,6 +26,7 @@ #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> +#include <linux/init.h> #include <net/sock.h> #include <net/snmp.h> @@ -770,7 +771,7 @@ struct proto udpv6_prot = { 0 /* highestinuse */ }; -void udpv6_init(void) +__initfunc(void udpv6_init(void)) { inet6_add_protocol(&udpv6_protocol); } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5b131e4a9..cfb47bb42 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -91,6 +91,7 @@ #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/firewall.h> +#include <linux/init.h> #ifdef MODULE static void ipx_proto_finito(void); @@ -2434,7 +2435,7 @@ ipx_proto_init(struct net_proto *pro) * sockets be closed from user space. */ -static void ipx_proto_finito(void) +__initfunc(static void ipx_proto_finito(void)) { ipx_interface *ifc; while (ipx_interfaces) { diff --git a/net/lapb/.cvsignore b/net/lapb/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/lapb/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index d5b586e04..f28f8fb8d 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -39,6 +39,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/stat.h> +#include <linux/init.h> #include <net/lapb.h> static lapb_cb *volatile lapb_list = NULL; @@ -397,7 +398,7 @@ EXPORT_SYMBOL(lapb_disconnect_request); EXPORT_SYMBOL(lapb_data_request); EXPORT_SYMBOL(lapb_data_received); -void lapb_proto_init(struct net_proto *pro) +__initfunc(void lapb_proto_init(struct net_proto *pro)) { printk(KERN_INFO "LAPB for Linux. Version 0.01 for Linux NET3.038 (Linux 2.1)\n"); } diff --git a/net/netbeui/af_netbeui.c b/net/netbeui/af_netbeui.c index e6683d00f..9b1444997 100644 --- a/net/netbeui/af_netbeui.c +++ b/net/netbeui/af_netbeui.c @@ -31,6 +31,7 @@ #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/firewall.h> +#include <linux/init.h> #undef NETBEUI_DEBUG @@ -620,7 +621,7 @@ static struct proc_dir_entry proc_netbeui = { /* Called by proto.c on kernel start up */ -void netbeui_proto_init(struct net_proto *pro) +__initfunc(void netbeui_proto_init(struct net_proto *pro)) { (void) sock_register(netbeui_proto_ops.family, &netbeui_proto_ops); if ((nb_dl = register_8022_client(nb_8022_id, netbeui_rcv)) == NULL) diff --git a/net/netlink.c b/net/netlink.c index 8c3b0aecc..539ec4295 100644 --- a/net/netlink.c +++ b/net/netlink.c @@ -23,6 +23,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/skbuff.h> +#include <linux/init.h> #include <net/netlink.h> @@ -443,7 +444,7 @@ void nlmsg_transmit(struct nlmsg_ctl *ctl) } -int init_netlink(void) +__initfunc(int init_netlink(void)) { int ct; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d66094134..c7383e228 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -64,6 +64,7 @@ #include <net/ip.h> #include <net/arp.h> #include <linux/if_arp.h> +#include <linux/init.h> int sysctl_netrom_default_path_quality = NR_DEFAULT_QUAL; int sysctl_netrom_obsolescence_count_initialiser = NR_DEFAULT_OBS; @@ -1361,7 +1362,7 @@ static struct device dev_nr[] = { {"nr3", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, nr_init} }; -void nr_proto_init(struct net_proto *pro) +__initfunc(void nr_proto_init(struct net_proto *pro)) { int i; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 2502885a3..c6a415ee6 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sysctl.h> +#include <linux/init.h> #include <net/ax25.h> #include <net/netrom.h> @@ -78,7 +79,7 @@ static ctl_table nr_root_table[] = { {0} }; -void nr_register_sysctl(void) +__initfunc(void nr_register_sysctl(void)) { nr_table_header = register_sysctl_table(nr_root_table, 1); } diff --git a/net/netsyms.c b/net/netsyms.c index 34946a5b7..118841c32 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -73,9 +73,6 @@ extern void destroy_8023_client(struct datalink_proto *); #include <net/sock.h> #endif -extern char *skb_push_errstr; -extern char *skb_put_errstr; - /* Skbuff symbols. */ EXPORT_SYMBOL(skb_push_errstr); EXPORT_SYMBOL(skb_put_errstr); @@ -200,6 +197,10 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(__release_sock); EXPORT_SYMBOL(net_timer); /* UDP/TCP exported functions for TCPv6 */ +EXPORT_SYMBOL(sysctl_tcp_sack); +EXPORT_SYMBOL(sysctl_tcp_timestamps); +EXPORT_SYMBOL(sysctl_tcp_window_scaling); +EXPORT_SYMBOL(sock_rspace); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_connect); EXPORT_SYMBOL(udp_sendmsg); diff --git a/net/rose/.cvsignore b/net/rose/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/rose/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index f173dedaf..f3309ade9 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -52,6 +52,7 @@ #include <net/ip.h> #include <net/arp.h> #include <linux/if_arp.h> +#include <linux/init.h> int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0; int sysctl_rose_call_request_timeout = ROSE_DEFAULT_T1; @@ -1381,7 +1382,7 @@ static struct device dev_rose[] = { {"rose5", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, rose_init} }; -void rose_proto_init(struct net_proto *pro) +__initfunc(void rose_proto_init(struct net_proto *pro)) { int i; diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index c899a1837..8cd49695f 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sysctl.h> +#include <linux/init.h> #include <net/ax25.h> #include <net/rose.h> @@ -58,7 +59,7 @@ static ctl_table rose_root_table[] = { {0} }; -void rose_register_sysctl(void) +__initfunc(void rose_register_sysctl(void)) { rose_table_header = register_sysctl_table(rose_root_table, 1); } diff --git a/net/socket.c b/net/socket.c index 2e53ed446..482255255 100644 --- a/net/socket.c +++ b/net/socket.c @@ -39,6 +39,8 @@ * for sockets. May have errors at the * moment. * Kevin Buhr : Fixed the dumb errors in the above. + * Andi Kleen : Some small cleanups, optimizations, + * and fixed a copy_from_user() bug. * * * This program is free software; you can redistribute it and/or @@ -71,6 +73,7 @@ #include <linux/proc_fs.h> #include <linux/firewall.h> #include <linux/wanrouter.h> +#include <linux/init.h> #if defined(CONFIG_KERNELD) && defined(CONFIG_NET) #include <linux/kerneld.h> @@ -179,7 +182,7 @@ int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen) * "fromlen shall refer to the value before truncation.." * 1003.1g */ - return put_user(klen, ulen); + return __put_user(klen, ulen); } /* @@ -207,7 +210,6 @@ static int get_fd(struct inode *inode) file->f_op = &socket_file_ops; file->f_mode = 3; file->f_flags = O_RDWR; - file->f_count = 1; file->f_inode = inode; if (inode) inode->i_count++; @@ -365,6 +367,7 @@ static long sock_read(struct inode *inode, struct file *file, if (size==0) /* Match SYS5 behaviour */ return 0; + /* FIXME: I think this can be removed now. */ if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) return err; msg.msg_name=NULL; @@ -398,7 +401,8 @@ static long sock_write(struct inode *inode, struct file *file, if(size==0) /* Match SYS5 behaviour */ return 0; - + + /* FIXME: I think this can be removed now */ if ((err=verify_area(VERIFY_READ,ubuf,size))<0) return err; @@ -797,7 +801,6 @@ asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_ad { if (!(newsock = sock_alloc())) { - printk(KERN_WARNING "accept: no more sockets\n"); err=-EMFILE; goto out; } @@ -1130,6 +1133,7 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags) struct msghdr msg_sys; int err= -EINVAL; int total_len; + unsigned char *ctl_buf = ctl; lock_kernel(); @@ -1149,22 +1153,26 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags) if (msg_sys.msg_controllen) { - if (msg_sys.msg_controllen > sizeof(ctl)) + /* XXX We just limit the buffer and assume that the + * skbuff accounting stops it from going too far. + * I hope this is correct. + */ + if (msg_sys.msg_controllen > sizeof(ctl) && + msg_sys.msg_controllen <= 256) { - char *tmp = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); - if (tmp == NULL) + ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); + if (ctl_buf == NULL) { err = -ENOBUFS; goto failed2; } - err = copy_from_user(tmp, msg_sys.msg_control, msg_sys.msg_controllen); - msg_sys.msg_control = tmp; - } else { - err = copy_from_user(ctl, msg_sys.msg_control, msg_sys.msg_controllen); - msg_sys.msg_control = ctl; } - if (err) + if (copy_from_user(ctl_buf, msg_sys.msg_control, + msg_sys.msg_controllen)) { + err = -EFAULT; goto failed; + } + msg_sys.msg_control = ctl_buf; } msg_sys.msg_flags = flags; if (current->files->fd[fd]->f_flags & O_NONBLOCK) @@ -1177,8 +1185,8 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags) } failed: - if (msg_sys.msg_controllen && msg_sys.msg_control != ctl) - kfree(msg_sys.msg_control); + if (ctl_buf != ctl) + kfree_s(ctl_buf, msg_sys.msg_controllen); failed2: if (msg_sys.msg_iov != iov) kfree(msg_sys.msg_iov); @@ -1240,7 +1248,6 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags) if (current->files->fd[fd]->f_flags&O_NONBLOCK) flags |= MSG_DONTWAIT; - if ((sock = sockfd_lookup(fd, &err))!=NULL) { err=sock_recvmsg(sock, &msg_sys, total_len, flags); @@ -1253,9 +1260,12 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags) if (uaddr != NULL && err>=0) err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); - if (err>=0 && (put_user(msg_sys.msg_flags, &msg->msg_flags) || - put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, &msg->msg_controllen))) - err = -EFAULT; + if (err>=0) { + err = __put_user(msg_sys.msg_flags, &msg->msg_flags); + if (!err) + err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + &msg->msg_controllen); + } out: unlock_kernel(); if(err<0) @@ -1280,33 +1290,33 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg) return(-EINVAL); } +/* Argument list sizes for sys_socketcall */ +#define AL(x) ((x) * sizeof(unsigned long)) +static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; +#undef AL /* * System call vectors. * * Argument checking cleaned up. Saved 20% in size. + * This function doesn't need to set the kernel lock because + * it is set by the callees. */ asmlinkage int sys_socketcall(int call, unsigned long *args) { - unsigned char nargs[18]={0,3,3,3,2,3,3,3, - 4,4,4,6,6,2,5,5,3,3}; unsigned long a[6]; unsigned long a0,a1; - int err = -EINVAL; - - lock_kernel(); + int err; + if(call<1||call>SYS_RECVMSG) - goto out; - err = -EFAULT; + return -EINVAL; - /* - * Ideally we want to precompute the maths, but unsigned long - * isnt a fixed size.... - */ - - if ((copy_from_user(a, args, nargs[call] * sizeof(unsigned long)))) - goto out; + /* copy_from_user should be SMP safe. */ + if (copy_from_user(a, args, nargs[call])) + return -EFAULT; a0=a[0]; a1=a[1]; @@ -1370,12 +1380,9 @@ asmlinkage int sys_socketcall(int call, unsigned long *args) err = -EINVAL; break; } -out: - unlock_kernel(); return err; } - /* * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the @@ -1400,7 +1407,7 @@ int sock_unregister(int family) return 0; } -void proto_init(void) +__initfunc(void proto_init(void)) { extern struct net_proto protocols[]; /* Network protocols */ struct net_proto *pro; @@ -1417,7 +1424,7 @@ void proto_init(void) extern void sk_init(void); -void sock_init(void) +__initfunc(void sock_init(void)) { int i; diff --git a/net/sunrpc/.cvsignore b/net/sunrpc/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/sunrpc/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index cb1a641e7..4a05efd9c 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -79,6 +79,8 @@ bailout: } #ifdef CONFIG_ROOT_NFS +char *in_ntoa(__u32 in); + int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7c4d4679d..80d91481e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -81,6 +81,7 @@ #include <net/af_unix.h> #include <linux/proc_fs.h> #include <net/scm.h> +#include <linux/init.h> #include <asm/checksum.h> @@ -1459,7 +1460,7 @@ struct net_proto_family unix_family_ops = { unix_create }; -void unix_proto_init(struct net_proto *pro) +__initfunc(void unix_proto_init(struct net_proto *pro)) { struct sk_buff *dummy_skb; struct proc_dir_entry *ent; diff --git a/net/wanrouter/.cvsignore b/net/wanrouter/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/wanrouter/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 948bf81fa..4c0042082 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -34,6 +34,7 @@ #include <asm/byteorder.h> /* htons(), etc. */ #include <asm/uaccess.h> /* copy_to/from_user */ #include <linux/wanrouter.h> /* WAN router API definitions */ +#include <linux/init.h> /* __initfunc et al. */ /****** Defines and Macros **************************************************/ @@ -130,7 +131,7 @@ void cleanup_module (void) #else -void wanrouter_init(void) +__initfunc(void wanrouter_init(void)) { int err = wanrouter_proc_init(); if (err) printk(KERN_ERR diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index ce7140db0..de207d319 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -23,6 +23,7 @@ #include <linux/malloc.h> /* kmalloc(), kfree() */ #include <linux/mm.h> /* verify_area(), etc. */ #include <linux/string.h> /* inline mem*, str* functions */ +#include <linux/init.h> /* __initfunc et al. */ #include <asm/segment.h> /* kernel <-> user copy */ #include <asm/byteorder.h> /* htons(), etc. */ #include <asm/uaccess.h> /* copy_to_user */ @@ -271,7 +272,7 @@ static struct proc_dir_entry proc_router_stat = * Initialize router proc interface. */ -int wanrouter_proc_init (void) +__initfunc(int wanrouter_proc_init (void)) { int err = proc_register(&proc_net, &proc_router); diff --git a/net/x25/.cvsignore b/net/x25/.cvsignore new file mode 100644 index 000000000..4671378ae --- /dev/null +++ b/net/x25/.cvsignore @@ -0,0 +1 @@ +.depend diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 971ae497d..63a616e89 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -45,6 +45,7 @@ #include <linux/notifier.h> #include <linux/proc_fs.h> #include <linux/if_arp.h> +#include <linux/init.h> #include <net/x25.h> int sysctl_x25_restart_request_timeout = X25_DEFAULT_T20; @@ -1283,7 +1284,7 @@ static struct proc_dir_entry proc_net_x25_routes = { }; #endif -void x25_proto_init(struct net_proto *pro) +__initfunc(void x25_proto_init(struct net_proto *pro)) { sock_register(&x25_family_ops); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 892d817d7..8454ac9d9 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -10,6 +10,7 @@ #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/netdevice.h> +#include <linux/init.h> #include <net/x25.h> static int min_timer[] = {1 * X25_SLOWHZ}; @@ -46,7 +47,7 @@ static ctl_table x25_root_table[] = { {0} }; -void x25_register_sysctl(void) +__initfunc(void x25_register_sysctl(void)) { x25_table_header = register_sysctl_table(x25_root_table, 1); } |