summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/arp.c5
-rw-r--r--net/ipv4/fib.c8
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/ip_alias.c3
-rw-r--r--net/ipv4/ip_fragment.c451
-rw-r--r--net/ipv4/ip_fw.c3
-rw-r--r--net/ipv4/ip_masq.c3
-rw-r--r--net/ipv4/ip_masq_app.c3
-rw-r--r--net/ipv4/ip_masq_ftp.c3
-rw-r--r--net/ipv4/ip_masq_irc.c3
-rw-r--r--net/ipv4/ip_masq_quake.c3
-rw-r--r--net/ipv4/ip_masq_raudio.c5
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c18
-rw-r--r--net/ipv4/ip_sockglue.c18
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/rarp.c5
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c15
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_input.c41
-rw-r--r--net/ipv4/tcp_ipv4.c111
-rw-r--r--net/ipv4/tcp_output.c146
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp.c52
-rw-r--r--net/ipv4/utils.c4
27 files changed, 525 insertions, 415 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d96910bb0..a3a126529 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -76,6 +76,7 @@
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
+#include <linux/init.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -1063,7 +1064,7 @@ extern void tcp_init(void);
* Called by socket.c on kernel startup.
*/
-void inet_proto_init(struct net_proto *pro)
+__initfunc(void inet_proto_init(struct net_proto *pro))
{
struct sk_buff *dummy_skb;
struct inet_protocol *p;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8ef0be2af..ebf2c6c6b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -90,6 +90,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -378,7 +379,7 @@ static void arp_neigh_destroy(struct neighbour *neigh)
extern atomic_t hh_count;
atomic_dec(&hh_count);
#endif
- kfree_s(hh, sizeof(struct(struct hh_cache)));
+ kfree_s(hh, sizeof(struct hh_cache));
}
}
}
@@ -1976,7 +1977,7 @@ static struct proc_dir_entry proc_net_arp = {
};
#endif
-void arp_init (void)
+__initfunc(void arp_init (void))
{
dev_add_pack(&arp_packet_type);
/* Start with the regular checks for expired arp entries. */
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
index c2182728c..b25187a20 100644
--- a/net/ipv4/fib.c
+++ b/net/ipv4/fib.c
@@ -42,6 +42,7 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -1646,16 +1647,21 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
{
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
+printk("ip_rt_ioctl() #1\n");
if (!suser())
return -EPERM;
+printk("ip_rt_ioctl() #2\n");
err = get_rt_from_user(&m.rtmsg, arg);
if (err)
return err;
+printk("ip_rt_ioctl() #3\n");
fib_lock();
+printk("ip_rt_ioctl() #4\n");
dummy_nlh.nlmsg_type = cmd == SIOCDELRT ? RTMSG_DELROUTE
: RTMSG_NEWROUTE;
err = rtmsg_process(&dummy_nlh, &m.rtmsg);
fib_unlock();
+printk("ip_rt_ioctl() #5: err == %d\n", err);
return err;
case SIOCRTMSG:
if (!suser())
@@ -2020,7 +2026,7 @@ int ip_rt_event(int event, struct device *dev)
}
-void ip_fib_init()
+__initfunc(void ip_fib_init(void))
{
struct in_rtrulemsg r;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6b697d001..79bf058c5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -256,6 +256,7 @@
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
+#include <linux/init.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <net/checksum.h>
@@ -373,7 +374,7 @@ struct socket *icmp_socket=&icmp_inode.u.socket_i;
#ifndef CONFIG_NO_ICMP_LIMIT
-static void xrlim_init(void)
+__initfunc(static void xrlim_init(void))
{
int type, entry;
struct icmp_xrlim *xr;
@@ -1020,7 +1021,7 @@ int icmp_chkaddr(struct sk_buff *skb)
{
struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
- sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest);
+ sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
if (!sk) return 0;
if (sk->saddr != iph->saddr) return 0;
if (sk->daddr != iph->daddr) return 0;
@@ -1034,7 +1035,7 @@ int icmp_chkaddr(struct sk_buff *skb)
{
struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
- sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+ sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source);
if (!sk) return 0;
if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR)
return 0;
@@ -1167,7 +1168,7 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = {
{ &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, NULL }
};
-void icmp_init(struct net_proto_family *ops)
+__initfunc(void icmp_init(struct net_proto_family *ops))
{
int err;
diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c
index 74ff42a74..a78eef17a 100644
--- a/net/ipv4/ip_alias.c
+++ b/net/ipv4/ip_alias.c
@@ -26,6 +26,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/route.h>
+#include <linux/init.h>
#include <net/route.h>
#ifdef ALIAS_USER_LAND_DEBUG
@@ -137,7 +138,7 @@ struct net_alias_type ip_alias_type =
* ip_alias module initialization
*/
-int ip_alias_init(void)
+__initfunc(int ip_alias_init(void))
{
return register_net_alias_type(&ip_alias_type, AF_INET);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bf549b047..290f871a1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,11 +5,15 @@
*
* The IP fragmentation functionality.
*
+ * Version: $Id: ip_fragment.c,v 1.22 1997/05/17 05:21:56 freitag Exp $
+ *
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
*
* Fixes:
* Alan Cox : Split from ip.c , see ip_input.c for history.
+ * David S. Miller : Begin massive cleanup...
+ * Andi Kleen : Add sysctls.
*/
#include <linux/types.h>
@@ -29,31 +33,49 @@
#include <linux/ip_fw.h>
#include <net/checksum.h>
-/*
- * Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-
-#define IPFRAG_HIGH_THRESH (256*1024)
-#define IPFRAG_LOW_THRESH (192*1024)
-
-/*
- * This fragment handler is a bit of a heap. On the other hand it works quite
- * happily and handles things quite well.
+/* Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to measurably
+ * harm machine performance.
*/
-
-static struct ipq *ipqueue = NULL; /* IP fragment queue */
+int sysctl_ipfrag_high_thresh = 256*1024;
+int sysctl_ipfrag_low_thresh = 192*1024;
+
+/* Describe an IP fragment. */
+struct ipfrag {
+ int offset; /* offset of fragment in IP datagram */
+ int end; /* last byte of data in datagram */
+ int len; /* length of this fragment */
+ struct sk_buff *skb; /* complete received fragment */
+ unsigned char *ptr; /* pointer into real fragment data */
+ struct ipfrag *next; /* linked list pointers */
+ struct ipfrag *prev;
+};
+
+/* Describe an entry in the "incomplete datagrams" queue. */
+struct ipq {
+ struct iphdr *iph; /* pointer to IP header */
+ struct ipq *next; /* linked list pointers */
+ struct ipfrag *fragments; /* linked list of received fragments */
+ int len; /* total length of original datagram */
+ short ihlen; /* length of the IP header */
+ struct timer_list timer; /* when will this queue expire? */
+ struct ipq **pprev;
+ struct device *dev; /* Device - for icmp replies */
+};
+
+#define IPQ_HASHSZ 64
+
+struct ipq *ipq_hash[IPQ_HASHSZ];
+
+#define ipqhashfn(id, saddr, daddr, prot) \
+ ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
-char *in_ntoa(unsigned long in);
+char *in_ntoa(__u32 in);
-/*
- * Memory Tracking Functions
- */
-
+/* Memory Tracking Functions. */
extern __inline__ void frag_kfree_skb(struct sk_buff *skb, int type)
{
atomic_sub(skb->truesize, &ip_frag_mem);
@@ -69,28 +91,24 @@ extern __inline__ void frag_kfree_s(void *ptr, int len)
extern __inline__ void *frag_kmalloc(int size, int pri)
{
void *vp=kmalloc(size,pri);
+
if(!vp)
return NULL;
atomic_add(size, &ip_frag_mem);
return vp;
}
-/*
- * Create a new fragment entry.
- */
-
-static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
+/* Create a new fragment entry. */
+static struct ipfrag *ip_frag_create(int offset, int end,
+ struct sk_buff *skb, unsigned char *ptr)
{
struct ipfrag *fp;
- unsigned long flags;
fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
- if (fp == NULL)
- {
+ if (fp == NULL) {
NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n"));
return(NULL);
}
- memset(fp, 0, sizeof(struct ipfrag));
/* Fill in the structure. */
fp->offset = offset;
@@ -98,85 +116,63 @@ static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, u
fp->len = end - offset;
fp->skb = skb;
fp->ptr = ptr;
+ fp->next = fp->prev = NULL;
- /*
- * Charge for the SKB as well.
- */
-
- save_flags(flags);
- cli();
+ /* Charge for the SKB as well. */
atomic_add(skb->truesize, &ip_frag_mem);
- restore_flags(flags);
return(fp);
}
-
-/*
- * Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and return the queue entry address if found.
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and return the queue entry address if found.
*/
-
-static struct ipq *ip_find(struct iphdr *iph)
+static inline struct ipq *ip_find(struct iphdr *iph)
{
+ __u16 id = iph->id;
+ __u32 saddr = iph->saddr;
+ __u32 daddr = iph->daddr;
+ __u8 protocol = iph->protocol;
+ unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
struct ipq *qp;
- struct ipq *qplast;
-
- cli();
- qplast = NULL;
- for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
- {
- if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
- iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
- {
- del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */
- sti();
- return(qp);
+
+ start_bh_atomic();
+ for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+ if(qp->iph->id == id &&
+ qp->iph->saddr == saddr &&
+ qp->iph->daddr == daddr &&
+ qp->iph->protocol == protocol) {
+ del_timer(&qp->timer);
+ break;
}
}
- sti();
- return(NULL);
+ end_bh_atomic();
+ return qp;
}
-
-/*
- * Remove an entry from the "incomplete datagrams" queue, either
- * because we completed, reassembled and processed it, or because
- * it timed out.
+/* Remove an entry from the "incomplete datagrams" queue, either
+ * because we completed, reassembled and processed it, or because
+ * it timed out.
*/
-
static void ip_free(struct ipq *qp)
{
struct ipfrag *fp;
- struct ipfrag *xp;
-
- /*
- * Stop the timer for this entry.
- */
+ /* Stop the timer for this entry. */
del_timer(&qp->timer);
/* Remove this entry from the "incomplete datagrams" queue. */
- cli();
- if (qp->prev == NULL)
- {
- ipqueue = qp->next;
- if (ipqueue != NULL)
- ipqueue->prev = NULL;
- }
- else
- {
- qp->prev->next = qp->next;
- if (qp->next != NULL)
- qp->next->prev = qp->prev;
- }
+ start_bh_atomic();
+ if(qp->next)
+ qp->next->pprev = qp->pprev;
+ *qp->pprev = qp->next;
+ end_bh_atomic();
/* Release all fragment data. */
-
fp = qp->fragments;
- while (fp != NULL)
- {
- xp = fp->next;
+ while (fp) {
+ struct ipfrag *xp = fp->next;
+
frag_kfree_skb(fp->skb,FREE_READ);
frag_kfree_s(fp, sizeof(struct ipfrag));
fp = xp;
@@ -187,83 +183,65 @@ static void ip_free(struct ipq *qp)
/* Finally, release the queue descriptor itself. */
frag_kfree_s(qp, sizeof(struct ipq));
- sti();
}
-
-/*
- * Oops- a fragment queue timed out. Kill it and send an ICMP reply.
- */
-
+/* Oops, a fragment queue timed out. Kill it and send an ICMP reply. */
static void ip_expire(unsigned long arg)
{
- struct ipq *qp;
-
- qp = (struct ipq *)arg;
-
- /*
- * Send an ICMP "Fragment Reassembly Timeout" message.
- */
+ struct ipq *qp = (struct ipq *) arg;
+ /* Send an ICMP "Fragment Reassembly Timeout" message. */
ip_statistics.IpReasmTimeout++;
ip_statistics.IpReasmFails++;
- /* This if is always true... shrug */
- if(qp->fragments!=NULL)
- icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
+ icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
- /*
- * Nuke the fragment queue.
- */
+ /* Nuke the fragment queue. */
ip_free(qp);
}
-/*
- * Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the low threshold
+/* Memory limiting on fragments. Evictor trashes the oldest
+ * fragment queue until we are back under the low threshold.
*/
-
static void ip_evictor(void)
{
- while(atomic_read(&ip_frag_mem)>IPFRAG_LOW_THRESH)
- {
- if(!ipqueue)
+ while(atomic_read(&ip_frag_mem)>sysctl_ipfrag_low_thresh) {
+ int i;
+
+ /* FIXME: Make LRU queue of frag heads. -DaveM */
+ for(i = 0; i < IPQ_HASHSZ; i++)
+ if(ipq_hash[i])
+ break;
+ if(i >= IPQ_HASHSZ)
panic("ip_evictor: memcount");
- ip_free(ipqueue);
+ ip_free(ipq_hash[i]);
}
}
-/*
- * Add an entry to the 'ipq' queue for a newly received IP datagram.
- * We will (hopefully :-) receive all other fragments of this datagram
- * in time, so we just create a queue for this datagram, in which we
- * will insert the received fragments at their respective positions.
+/* Add an entry to the 'ipq' queue for a newly received IP datagram.
+ * We will (hopefully :-) receive all other fragments of this datagram
+ * in time, so we just create a queue for this datagram, in which we
+ * will insert the received fragments at their respective positions.
*/
-
static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
{
struct ipq *qp;
+ unsigned int hash;
int ihlen;
qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if (qp == NULL)
- {
+ if (qp == NULL) {
NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
return(NULL);
}
- memset(qp, 0, sizeof(struct ipq));
-
- /*
- * Allocate memory for the IP header (plus 8 octets for ICMP).
- */
+ /* Allocate memory for the IP header (plus 8 octets for ICMP). */
ihlen = iph->ihl * 4;
+
qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC);
- if (qp->iph == NULL)
- {
+ if (qp->iph == NULL) {
NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
frag_kfree_s(qp, sizeof(struct ipq));
- return(NULL);
+ return NULL;
}
memcpy(qp->iph, iph, ihlen + 8);
@@ -279,21 +257,19 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
add_timer(&qp->timer);
/* Add this entry to the queue. */
- qp->prev = NULL;
- cli();
- qp->next = ipqueue;
- if (qp->next != NULL)
- qp->next->prev = qp;
- ipqueue = qp;
- sti();
- return(qp);
-}
+ hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+ start_bh_atomic();
+ if((qp->next = ipq_hash[hash]) != NULL)
+ qp->next->pprev = &qp->next;
+ ipq_hash[hash] = qp;
+ qp->pprev = &ipq_hash[hash];
+ end_bh_atomic();
-/*
- * See if a fragment queue is complete.
- */
+ return qp;
+}
+/* See if a fragment queue is complete. */
static int ip_done(struct ipq *qp)
{
struct ipfrag *fp;
@@ -301,13 +277,12 @@ static int ip_done(struct ipq *qp)
/* Only possible if we received the final fragment. */
if (qp->len == 0)
- return(0);
+ return 0;
/* Check all fragment offsets to see if they connect. */
fp = qp->fragments;
offset = 0;
- while (fp != NULL)
- {
+ while (fp) {
if (fp->offset > offset)
return(0); /* fragment(s) missing */
offset = fp->end;
@@ -315,18 +290,15 @@ static int ip_done(struct ipq *qp)
}
/* All fragments are present. */
- return(1);
+ return 1;
}
-
-/*
- * Build a new IP datagram from all its fragments.
+/* Build a new IP datagram from all its fragments.
*
- * FIXME: We copy here because we lack an effective way of handling lists
- * of bits on input. Until the new skb data handling is in I'm not going
- * to touch this with a bargepole.
+ * FIXME: We copy here because we lack an effective way of handling lists
+ * of bits on input. Until the new skb data handling is in I'm not going
+ * to touch this with a bargepole.
*/
-
static struct sk_buff *ip_glue(struct ipq *qp)
{
struct sk_buff *skb;
@@ -335,25 +307,23 @@ static struct sk_buff *ip_glue(struct ipq *qp)
unsigned char *ptr;
int count, len;
- /*
- * Allocate a new buffer for the datagram.
- */
+ /* Allocate a new buffer for the datagram. */
len = qp->ihlen + qp->len;
- if(len>65535)
- {
- printk(KERN_INFO "Oversized IP packet from %s.\n", in_ntoa(qp->iph->saddr));
+ if(len>65535) {
+ printk(KERN_INFO "Oversized IP packet from %s.\n",
+ in_ntoa(qp->iph->saddr));
ip_statistics.IpReasmFails++;
ip_free(qp);
return NULL;
}
- if ((skb = dev_alloc_skb(len)) == NULL)
- {
+ if ((skb = dev_alloc_skb(len)) == NULL) {
ip_statistics.IpReasmFails++;
- NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
+ NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing "
+ "queue %p\n", qp));
ip_free(qp);
- return(NULL);
+ return NULL;
}
/* Fill in the basic details. */
@@ -368,11 +338,10 @@ static struct sk_buff *ip_glue(struct ipq *qp)
/* Copy the data portions of all fragments into the new buffer. */
fp = qp->fragments;
- while(fp != NULL)
- {
- if(count+fp->len > skb->len)
- {
- NETDEBUG(printk(KERN_ERR "Invalid fragment list: Fragment over size.\n"));
+ while(fp) {
+ if(count+fp->len > skb->len) {
+ NETDEBUG(printk(KERN_ERR "Invalid fragment list: "
+ "Fragment over size.\n"));
ip_free(qp);
kfree_skb(skb,FREE_WRITE);
ip_statistics.IpReasmFails++;
@@ -396,14 +365,10 @@ static struct sk_buff *ip_glue(struct ipq *qp)
iph->tot_len = htons((iph->ihl * 4) + count);
ip_statistics.IpReasmOKs++;
- return(skb);
+ return skb;
}
-
-/*
- * Process an incoming IP datagram fragment.
- */
-
+/* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
@@ -417,45 +382,37 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
ip_statistics.IpReasmReqds++;
- /*
- * Start by cleaning up the memory
- */
-
- if(atomic_read(&ip_frag_mem)>IPFRAG_HIGH_THRESH)
+ /* Start by cleaning up the memory. */
+ if(atomic_read(&ip_frag_mem)>sysctl_ipfrag_high_thresh)
ip_evictor();
- /*
- * Find the entry of this IP datagram in the "incomplete datagrams" queue.
- */
-
+
+ /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
qp = ip_find(iph);
/* Is this a non-fragmented datagram? */
offset = ntohs(iph->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
- if (((flags & IP_MF) == 0) && (offset == 0))
- {
- if (qp != NULL)
- ip_free(qp); /* Fragmented frame replaced by full unfragmented copy */
- return(skb);
+ if (((flags & IP_MF) == 0) && (offset == 0)) {
+ if (qp != NULL) {
+ /* Fragmented frame replaced by full unfragmented copy. */
+ ip_free(qp);
+ }
+ return skb;
}
offset <<= 3; /* offset is in 8-byte chunks */
ihl = iph->ihl * 4;
- /*
- * If the queue already existed, keep restarting its timer as long
+ /* If the queue already existed, keep restarting its timer as long
* as we still are receiving fragments. Otherwise, create a fresh
* queue entry.
*/
-
- if (qp != NULL)
- {
+ if (qp) {
/* ANK. If the first fragment is received,
* we should remember the correct IP header (with options)
*/
- if (offset == 0)
- {
+ if (offset == 0) {
qp->ihlen = ihl;
memcpy(qp->iph, iph, ihl+8);
}
@@ -464,84 +421,59 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
add_timer(&qp->timer);
- }
- else
- {
- /*
- * If we failed to create it, then discard the frame
- */
- if ((qp = ip_create(skb, iph)) == NULL)
- {
+ } else {
+ /* If we failed to create it, then discard the frame. */
+ if ((qp = ip_create(skb, iph)) == NULL) {
kfree_skb(skb, FREE_READ);
ip_statistics.IpReasmFails++;
return NULL;
}
}
- /*
- * Attempt to construct an oversize packet.
- */
-
- if(ntohs(iph->tot_len)+(int)offset>65535)
- {
- printk(KERN_INFO "Oversized packet received from %s\n",in_ntoa(iph->saddr));
+ /* Attempt to construct an oversize packet. */
+ if(ntohs(iph->tot_len)+(int)offset>65535) {
+ printk(KERN_INFO "Oversized packet received from %s\n",
+ in_ntoa(iph->saddr));
frag_kfree_skb(skb, FREE_READ);
ip_statistics.IpReasmFails++;
return NULL;
}
- /*
- * Determine the position of this fragment.
- */
-
+ /* Determine the position of this fragment. */
end = offset + ntohs(iph->tot_len) - ihl;
- /*
- * Point into the IP datagram 'data' part.
- */
-
+ /* Point into the IP datagram 'data' part. */
ptr = skb->data + ihl;
- /*
- * Is this the final fragment?
- */
-
+ /* Is this the final fragment? */
if ((flags & IP_MF) == 0)
qp->len = end;
- /*
- * Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
*/
-
prev = NULL;
- for(next = qp->fragments; next != NULL; next = next->next)
- {
- if (next->offset > offset)
+ for(next = qp->fragments; next != NULL; next = next->next) {
+ if (next->offset >= offset)
break; /* bingo! */
prev = next;
}
- /*
- * We found where to put this one.
- * Check for overlap with preceding fragment, and, if needed,
- * align things so that any overlaps are eliminated.
+ /* We found where to put this one. Check for overlap with
+ * preceding fragment, and, if needed, align things so that
+ * any overlaps are eliminated.
*/
- if (prev != NULL && offset < prev->end)
- {
+ if (prev != NULL && offset < prev->end) {
i = prev->end - offset;
offset += i; /* ptr into datagram */
ptr += i; /* ptr into fragment data */
}
- /*
- * Look for overlap with succeeding segments.
+ /* Look for overlap with succeeding segments.
* If we can merge fragments, do it.
*/
-
- for(tmp=next; tmp != NULL; tmp = tfp)
- {
+ for(tmp=next; tmp != NULL; tmp = tfp) {
tfp = tmp->next;
if (tmp->offset >= end)
break; /* no overlaps at all */
@@ -550,12 +482,11 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
tmp->len -= i; /* so reduce size of */
tmp->offset += i; /* next fragment */
tmp->ptr += i;
- /*
- * If we get a frag size of <= 0, remove it and the packet
- * that it goes with.
+
+ /* If we get a frag size of <= 0, remove it and the packet
+ * that it goes with.
*/
- if (tmp->len <= 0)
- {
+ if (tmp->len <= 0) {
if (tmp->prev != NULL)
tmp->prev->next = tmp->next;
else
@@ -564,26 +495,20 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
if (tmp->next != NULL)
tmp->next->prev = tmp->prev;
- next=tfp; /* We have killed the original next frame */
+ /* We have killed the original next frame. */
+ next = tfp;
frag_kfree_skb(tmp->skb,FREE_READ);
frag_kfree_s(tmp, sizeof(struct ipfrag));
}
}
- /*
- * Insert this fragment in the chain of fragments.
- */
-
+ /* Insert this fragment in the chain of fragments. */
tfp = NULL;
tfp = ip_frag_create(offset, end, skb, ptr);
- /*
- * No memory to save the fragment - so throw the lot
- */
-
- if (!tfp)
- {
+ /* No memory to save the fragment - so throw the lot. */
+ if (!tfp) {
frag_kfree_skb(skb, FREE_READ);
return NULL;
}
@@ -597,16 +522,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
if (next != NULL)
next->prev = tfp;
- /*
- * OK, so we inserted this new fragment into the chain.
- * Check if we now have a full IP datagram which we can
- * bump up to the IP layer...
+ /* OK, so we inserted this new fragment into the chain.
+ * Check if we now have a full IP datagram which we can
+ * bump up to the IP layer...
*/
-
- if (ip_done(qp))
- {
- skb2 = ip_glue(qp); /* glue together the fragments */
+ if (ip_done(qp)) {
+ /* Glue together the fragments. */
+ skb2 = ip_glue(qp);
return(skb2);
}
- return(NULL);
+ return NULL;
}
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index e516a2baa..ea9fe48b0 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -107,6 +107,7 @@
#include <net/netlink.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
+#include <linux/init.h>
#ifdef CONFIG_IP_MASQUERADE
#include <net/ip_masq.h>
@@ -1298,7 +1299,7 @@ static struct proc_dir_entry proc_net_ipfwfwd = {
#endif
-void ip_fw_init(void)
+__initfunc(void ip_fw_init(void))
{
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_IP_ACCT
diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c
index c5976614e..2d2fd3717 100644
--- a/net/ipv4/ip_masq.c
+++ b/net/ipv4/ip_masq.c
@@ -31,6 +31,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/inet.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/icmp.h>
#include <net/tcp.h>
@@ -1010,7 +1011,7 @@ static struct proc_dir_entry proc_net_ipmsqhst = {
/*
* Initialize ip masquerading
*/
-int ip_masq_init(void)
+__initfunc(int ip_masq_init(void))
{
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_ipmsqhst);
diff --git a/net/ipv4/ip_masq_app.c b/net/ipv4/ip_masq_app.c
index 456888bc1..f7449e0ba 100644
--- a/net/ipv4/ip_masq_app.c
+++ b/net/ipv4/ip_masq_app.c
@@ -30,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/udp.h>
@@ -482,7 +483,7 @@ static struct proc_dir_entry proc_net_ip_masq_app = {
* Initialization routine
*/
-int ip_masq_app_init(void)
+__initfunc(int ip_masq_app_init(void))
{
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_ip_masq_app);
diff --git a/net/ipv4/ip_masq_ftp.c b/net/ipv4/ip_masq_ftp.c
index cc2481746..4d5568d0a 100644
--- a/net/ipv4/ip_masq_ftp.c
+++ b/net/ipv4/ip_masq_ftp.c
@@ -28,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip_masq.h>
@@ -187,7 +188,7 @@ struct ip_masq_app ip_masq_ftp = {
* ip_masq_ftp initialization
*/
-int ip_masq_ftp_init(void)
+__initfunc(int ip_masq_ftp_init(void))
{
return register_ip_masq_app(&ip_masq_ftp, IPPROTO_TCP, 21);
}
diff --git a/net/ipv4/ip_masq_irc.c b/net/ipv4/ip_masq_irc.c
index e0b94f0d6..a1be56f81 100644
--- a/net/ipv4/ip_masq_irc.c
+++ b/net/ipv4/ip_masq_irc.c
@@ -29,6 +29,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip_masq.h>
@@ -238,7 +239,7 @@ struct ip_masq_app ip_masq_irc = {
* ip_masq_irc initialization
*/
-int ip_masq_irc_init(void)
+__initfunc(int ip_masq_irc_init(void))
{
return register_ip_masq_app(&ip_masq_irc, IPPROTO_TCP, 6667);
}
diff --git a/net/ipv4/ip_masq_quake.c b/net/ipv4/ip_masq_quake.c
index 3614f0cf5..08a062bc7 100644
--- a/net/ipv4/ip_masq_quake.c
+++ b/net/ipv4/ip_masq_quake.c
@@ -28,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/udp.h>
#include <net/ip_masq.h>
@@ -279,7 +280,7 @@ struct ip_masq_app ip_masq_quakenew = {
* ip_masq_quake initialization
*/
-int ip_masq_quake_init(void)
+__initfunc(int ip_masq_quake_init(void))
{
return (register_ip_masq_app(&ip_masq_quake, IPPROTO_UDP, 26000) +
register_ip_masq_app(&ip_masq_quakenew, IPPROTO_UDP, 27000));
diff --git a/net/ipv4/ip_masq_raudio.c b/net/ipv4/ip_masq_raudio.c
index 85bba590e..52f439102 100644
--- a/net/ipv4/ip_masq_raudio.c
+++ b/net/ipv4/ip_masq_raudio.c
@@ -2,7 +2,7 @@
* IP_MASQ_RAUDIO - Real Audio masquerading module
*
*
- * Version: @(#)$Id: ip_masq_raudio.c,v 1.5 1997/04/03 08:52:02 davem Exp $
+ * Version: @(#)$Id: ip_masq_raudio.c,v 1.6 1997/04/29 09:38:26 mj Exp $
*
* Author: Nigel Metheringham
* [strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne]
@@ -45,6 +45,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/init.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip_masq.h>
@@ -200,7 +201,7 @@ struct ip_masq_app ip_masq_raudio = {
* ip_masq_raudio initialization
*/
-int ip_masq_raudio_init(void)
+__initfunc(int ip_masq_raudio_init(void))
{
return register_ip_masq_app(&ip_masq_raudio, IPPROTO_TCP, 7070);
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2c7974506..80baf8364 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -505,7 +505,7 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in
opt->is_data = 1;
opt->is_setbyuser = 1;
if (optlen && ip_options_compile(opt, NULL)) {
- kfree_s(opt, sizeof(struct options) + optlen);
+ kfree_s(opt, sizeof(struct ip_options) + optlen);
return -EINVAL;
}
*optp = opt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 41e60de61..6558b56e4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -27,6 +27,8 @@
* (in case if packet not accepted by
* output firewall rules)
* Alexey Kuznetsov: use new route cache
+ * Andi Kleen: Fix broken PMTU recovery and remove
+ * some redundant tests.
*/
#include <asm/uaccess.h>
@@ -47,6 +49,7 @@
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
+#include <linux/init.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -126,9 +129,8 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
iph->ihl = 5;
iph->tos = sk->ip_tos;
iph->frag_off = 0;
- if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- rt->rt_flags&RTF_NOPMTUDISC))
+ if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
+ !(rt->rt_flags & RTF_NOPMTUDISC))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -207,9 +209,8 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
iph->ihl = 5;
iph->tos = sk->ip_tos;
iph->frag_off = 0;
- if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- rt->rt_flags&RTF_NOPMTUDISC))
+ if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
+ !(rt->rt_flags & RTF_NOPMTUDISC))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -480,8 +481,7 @@ int ip_build_xmit(struct sock *sk,
#endif
if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- rt->rt_flags&RTF_NOPMTUDISC))
+ rt->rt_flags&RTF_NOPMTUDISC)
df = 0;
@@ -1036,7 +1036,7 @@ static struct proc_dir_entry proc_net_igmp = {
* IP registers the packet type and then calls the subprotocol initialisers
*/
-void ip_init(void)
+__initfunc(void ip_init(void))
{
dev_add_pack(&ip_packet_type);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1689159ed..8c2463d04 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -126,26 +126,24 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (cmsg->cmsg_level != SOL_IP)
continue;
- switch (cmsg->cmsg_type)
- {
+ switch (cmsg->cmsg_type) {
case IP_LOCALADDR:
- if (cmsg->cmsg_len < sizeof(struct in_addr)+sizeof(*cmsg))
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
return -EINVAL;
- memcpy(&ipc->addr, cmsg->cmsg_data, 4);
+ memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr));
break;
case IP_RETOPTS:
- err = cmsg->cmsg_len - sizeof(*cmsg);
- err = ip_options_get(&ipc->opt, cmsg->cmsg_data,
- err < 40 ? err : 40, 0);
+ err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+ err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
if (err)
return err;
break;
case IP_TXINFO:
{
struct in_pktinfo *info;
- if (cmsg->cmsg_len < sizeof(*info)+sizeof(*cmsg))
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
return -EINVAL;
- info = (struct in_pktinfo*)cmsg->cmsg_data;
+ info = (struct in_pktinfo *)CMSG_DATA(cmsg);
if (info->ipi_ifindex && !devp)
return -EINVAL;
if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL)
@@ -212,7 +210,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
sk->opt = opt;
sti();
if (old_opt)
- kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen);
+ kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen);
return 0;
}
case IP_RXINFO:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f76c5b52d..1a38c5275 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -47,6 +47,7 @@
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/mroute.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -1065,7 +1066,7 @@ static struct proc_dir_entry proc_net_ipmr_mfc = {
* Setup for IP multicast routing
*/
-void ip_mr_init(void)
+__initfunc(void ip_mr_init(void))
{
printk(KERN_INFO "Linux IP multicast router 0.06.\n");
register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index fb9e2a738..e0323bb85 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -45,6 +45,7 @@
#include <linux/if_arp.h>
#include <linux/in.h>
#include <linux/config.h>
+#include <linux/init.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -553,8 +554,8 @@ struct proc_dir_entry proc_net_rarp = {
rarp_get_info
};
-void
-rarp_init(void)
+__initfunc(void
+rarp_init(void))
{
proc_net_register(&proc_net_rarp);
rarp_ioctl_hook = rarp_ioctl;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5ba6467d9..4a4c5321c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -69,6 +69,7 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -1379,7 +1380,7 @@ void ip_rt_multicast_event(struct device *dev)
rt_cache_flush(0);
}
-void ip_rt_init()
+__initfunc(void ip_rt_init(void))
{
ip_fib_init();
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 84ba6578b..18a8d2bf8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -35,22 +35,27 @@ extern int sysctl_arp_check_interval;
extern int sysctl_arp_confirm_interval;
extern int sysctl_arp_confirm_timeout;
+/* From ip_fragment.c */
+extern int sysctl_ipfrag_low_thresh;
+extern int sysctl_ipfrag_high_thresh;
+
extern int sysctl_tcp_cong_avoidance;
extern int sysctl_tcp_hoe_retransmits;
extern int sysctl_tcp_sack;
extern int sysctl_tcp_tsack;
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
+extern int sysctl_syn_retries;
extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp);
-struct ipv4_config ipv4_config = { 1, 1, 1, 1, };
+struct ipv4_config ipv4_config = { 1, 1, 1, 0, };
#ifdef CONFIG_SYSCTL
struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, };
-struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 1, };
+struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, };
int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp)
@@ -144,6 +149,12 @@ ctl_table ipv4_table[] = {
{NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects",
&ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_TCP_SYN_RETRIES, "tcp_syn_retries",
+ &sysctl_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
+ &sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
+ &sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 420db4777..000813b94 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.61 1997/04/22 02:53:10 davem Exp $
+ * Version: $Id: tcp.c,v 1.65 1997/05/06 09:31:43 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -424,6 +424,7 @@
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/poll.h>
+#include <linux/init.h>
#include <net/icmp.h>
#include <net/tcp.h>
@@ -849,7 +850,6 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
tcp_size = skb->tail -
((unsigned char *)(skb->h.th) + tp->tcp_header_len);
- /* printk("extending buffer\n"); */
/* This window_seq test is somewhat dangerous
* If the remote does SWS avoidance we should
* queue the best we can if not we should in
@@ -1100,6 +1100,9 @@ static void cleanup_rbuf(struct sock *sk)
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
__u32 rcv_wnd;
+ /* FIXME: double check this rule, then check against
+ * other use of similar rules. Abtract if possible.
+ */
rcv_wnd = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
if ((rcv_wnd < sk->mss) && (sock_rspace(sk) > rcv_wnd))
@@ -1357,7 +1360,10 @@ static int tcp_close_state(struct sock *sk, int dead)
case TCP_CLOSE:
case TCP_LISTEN:
break;
- case TCP_LAST_ACK: /* Could have shutdown() then close() */
+ case TCP_LAST_ACK: /* Could have shutdown() then close()
+ * (but don't do send_fin again!) */
+ ns=TCP_LAST_ACK;
+ break;
case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and
wait only for the ACK */
ns=TCP_LAST_ACK;
@@ -1655,11 +1661,11 @@ void tcp_set_keepalive(struct sock *sk, int val)
tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
}
-void tcp_init(void)
+__initfunc(void tcp_init(void))
{
tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
sizeof(struct open_request),
- sizeof(long)*8, SLAB_HWCACHE_ALIGN,
+ 0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if(!tcp_openreq_cachep)
panic("tcp_init: Cannot alloc open_request cache.");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ab2b1ef82..3ab1dee42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.50 1997/04/22 02:53:12 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.51 1997/04/27 19:24:40 schenk Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -321,8 +321,10 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
break;
case TCPOPT_WINDOW:
if(opsize==TCPOLEN_WINDOW && th->syn)
- if (sysctl_tcp_window_scaling)
+ if (sysctl_tcp_window_scaling) {
+ tp->wscale_ok = 1;
tp->snd_wscale = *(__u8 *)ptr;
+ }
break;
case TCPOPT_SACK_PERM:
if(opsize==TCPOLEN_SACK_PERM && th->syn)
@@ -816,7 +818,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
*/
if (before(tp->snd_wl1, ack_seq) ||
(tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
- unsigned long nwin = ntohs(th->window);
+ unsigned long nwin = ntohs(th->window) << tp->snd_wscale;
if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
flag |= FLAG_WIN_UPDATE;
@@ -1464,17 +1466,21 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
return 1;
- /* Now we have several options: In theory there is
- * nothing else in the frame. KA9Q has an option to
- * send data with the syn, BSD accepts data with the
- * syn up to the [to be] advertised window and
- * Solaris 2.1 gives you a protocol error. For now
- * we just ignore it, that fits the spec precisely
- * and avoids incompatibilities. It would be nice in
- * future to drop through and process the data.
+ /* Now we have several options: In theory there is
+ * nothing else in the frame. KA9Q has an option to
+ * send data with the syn, BSD accepts data with the
+ * syn up to the [to be] advertised window and
+ * Solaris 2.1 gives you a protocol error. For now
+ * we just ignore it, that fits the spec precisely
+ * and avoids incompatibilities. It would be nice in
+ * future to drop through and process the data.
*
- * Now that TTCP is starting to be used we ought to
- * queue this data.
+ * Now that TTCP is starting to be used we ought to
+ * queue this data.
+ * But, this leaves one open to an easy denial of
+ * service attack, and SYN cookies can't defend
+ * against this problem. So, we drop the data
+ * in the interest of security over speed.
*/
return 0;
}
@@ -1514,10 +1520,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* move to established.
*/
tp->rcv_nxt = skb->seq+1;
- tp->rcv_wnd = 0;
tp->rcv_wup = skb->seq+1;
- tp->snd_wnd = htons(th->window);
+ tp->snd_wnd = htons(th->window) << tp->snd_wscale;
tp->snd_wl1 = skb->seq;
tp->snd_wl2 = skb->ack_seq;
@@ -1526,6 +1531,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tcp_set_state(sk, TCP_ESTABLISHED);
tcp_parse_options(th,tp);
/* FIXME: need to make room for SACK still */
+ if (tp->wscale_ok == 0) {
+ tp->snd_wscale = tp->rcv_wscale = 0;
+ tp->window_clamp = min(tp->window_clamp,65535);
+ }
if (tp->tstamp_ok) {
tp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: Define constant! */
sk->dummy_th.doff += 3; /* reserve space of options */
@@ -1695,7 +1704,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
sk->state_change(sk);
tp->snd_una = skb->ack_seq;
- tp->snd_wnd = htons(th->window);
+ tp->snd_wnd = htons(th->window) << tp->snd_wscale;
tp->snd_wl1 = skb->seq;
tp->snd_wl2 = skb->ack_seq;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f4528f552..c4d12a54f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.39 1997/04/22 02:53:14 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $
*
* IPv4 specific functions
*
@@ -465,7 +465,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct sk_buff *buff;
struct sk_buff *skb1;
int tmp;
- struct tcphdr *t1;
+ struct tcphdr *th;
struct rtable *rt;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
@@ -546,20 +546,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return(-ENETUNREACH);
}
- t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
- buff->h.th = t1;
+ th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+ buff->h.th = th;
- memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+ memcpy(th,(void *)&(sk->dummy_th), sizeof(*th));
buff->seq = sk->write_seq++;
- t1->seq = htonl(buff->seq);
+ th->seq = htonl(buff->seq);
tp->snd_nxt = sk->write_seq;
buff->end_seq = sk->write_seq;
- t1->ack = 0;
- t1->window = htons(512);
- t1->syn = 1;
+ th->ack = 0;
+ th->syn = 1;
- /* Use 512 or whatever user asked for. */
- tp->window_clamp = rt->u.dst.window;
sk->mtu = rt->u.dst.pmtu;
if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
@@ -577,13 +574,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->mss = (sk->mtu - sizeof(struct iphdr) -
sizeof(struct tcphdr));
+ if (sk->mss < 1) {
+ printk(KERN_DEBUG "intial sk->mss below 1\n");
+ sk->mss = 1; /* Sanity limit */
+ }
+
+ tp->window_clamp = rt->u.dst.window;
+ tcp_select_initial_window(sock_rspace(sk)/2,sk->mss,
+ &tp->rcv_wnd,
+ &tp->window_clamp,
+ sysctl_tcp_window_scaling,
+ &tp->rcv_wscale);
+ th->window = htons(tp->rcv_wnd);
+
tmp = tcp_syn_build_options(buff, sk->mss, sysctl_tcp_sack,
sysctl_tcp_timestamps,
- sysctl_tcp_window_scaling?tp->rcv_wscale:0);
+ sysctl_tcp_window_scaling,tp->rcv_wscale);
buff->csum = 0;
- t1->doff = (sizeof(*t1)+ tmp)>>2;
+ th->doff = (sizeof(*th)+ tmp)>>2;
- tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + tmp, buff);
+ tcp_v4_send_check(sk, th, sizeof(struct tcphdr) + tmp, buff);
tcp_set_state(sk,TCP_SYN_SENT);
@@ -803,7 +813,6 @@ int tcp_chkaddr(struct sk_buff *skb)
static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
struct sk_buff * skb;
struct tcphdr *th;
int tmp;
@@ -829,6 +838,11 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
*/
req->mss = min(mss, req->mss);
+ if (req->mss < 1) {
+ printk(KERN_DEBUG "initial req->mss below 1\n");
+ req->mss = 1;
+ }
+
/* Yuck, make this header setup more efficient... -DaveM */
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
@@ -839,7 +853,16 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
skb->end_seq = skb->seq + 1;
th->seq = ntohl(skb->seq);
th->ack_seq = htonl(req->rcv_isn + 1);
- th->window = ntohs(tp->rcv_wnd);
+ if (req->rcv_wnd == 0) {
+ /* Set this up on the first call only */
+ req->window_clamp = skb->dst->window;
+ tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+ &req->rcv_wnd,
+ &req->window_clamp,
+ req->wscale_ok,
+ &req->rcv_wscale);
+ }
+ th->window = htons(req->rcv_wnd);
/* XXX Partial csum of 4 byte quantity is itself! -DaveM
* Yes, but it's a bit harder to special case now. It's
@@ -850,7 +873,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
*/
tmp = tcp_syn_build_options(skb, req->mss, req->sack_ok, req->tstamp_ok,
- (req->snd_wscale)?tp->rcv_wscale:0);
+ req->wscale_ok,req->rcv_wscale);
skb->csum = 0;
th->doff = (sizeof(*th) + tmp)>>2;
th->check = tcp_v4_check(th, sizeof(*th) + tmp,
@@ -865,7 +888,7 @@ static void tcp_v4_or_free(struct open_request *req)
{
if(!req->sk && req->af.v4_req.opt)
kfree_s(req->af.v4_req.opt,
- sizeof(struct options) + req->af.v4_req.opt->optlen);
+ sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
}
static struct or_calltable or_ipv4 = {
@@ -881,7 +904,7 @@ static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
{
struct ip_options *opt = (struct ip_options *) ptr;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct tcp_opt tp;
struct open_request *req;
struct tcphdr *th = skb->h.th;
__u32 saddr = skb->nh.iph->saddr;
@@ -913,19 +936,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
sk->ack_backlog++;
+ req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+
req->rcv_isn = skb->seq;
req->snt_isn = isn;
- tp->tstamp_ok = tp->sack_ok = tp->snd_wscale = 0;
- tcp_parse_options(th,tp);
- if (tp->saw_tstamp) {
- tp->ts_recent = tp->rcv_tsval;
- tp->ts_recent_stamp = jiffies;
- }
- req->mss = tp->in_mss;
- req->tstamp_ok = tp->tstamp_ok;
- req->sack_ok = tp->sack_ok;
- req->snd_wscale = tp->snd_wscale;
- req->ts_recent = tp->ts_recent;
+ tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+ tp.in_mss = 536;
+ tcp_parse_options(th,&tp);
+ if (tp.saw_tstamp)
+ req->ts_recent = tp.rcv_tsval;
+ req->mss = tp.in_mss;
+ req->tstamp_ok = tp.tstamp_ok;
+ req->sack_ok = tp.sack_ok;
+ req->snd_wscale = tp.snd_wscale;
+ req->wscale_ok = tp.wscale_ok;
req->rmt_port = th->source;
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;
@@ -1004,8 +1028,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
atomic_set(&newsk->rmem_alloc, 0);
newsk->localroute = sk->localroute;
- newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
-
newsk->err = 0;
newsk->shutdown = 0;
newsk->ack_backlog = 0;
@@ -1060,7 +1082,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->dst_cache = &rt->u.dst;
- newtp->window_clamp = rt->u.dst.window;
snd_mss = rt->u.dst.pmtu;
/* FIXME: is mtu really the same as snd_mss? */
@@ -1072,10 +1093,19 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->sack_ok = req->sack_ok;
newtp->tstamp_ok = req->tstamp_ok;
- newtp->snd_wscale = req->snd_wscale;
- newtp->ts_recent = req->ts_recent;
- newtp->ts_recent_stamp = jiffies;
+ newtp->window_clamp = req->window_clamp;
+ newtp->rcv_wnd = req->rcv_wnd;
+ newtp->wscale_ok = req->wscale_ok;
+ if (newtp->wscale_ok) {
+ newtp->snd_wscale = req->snd_wscale;
+ newtp->rcv_wscale = req->rcv_wscale;
+ } else {
+ newtp->snd_wscale = newtp->rcv_wscale = 0;
+ newtp->window_clamp = min(newtp->window_clamp,65535);
+ }
if (newtp->tstamp_ok) {
+ newtp->ts_recent = req->ts_recent;
+ newtp->ts_recent_stamp = jiffies;
newtp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: define constant! */
newsk->dummy_th.doff += 3;
} else {
@@ -1219,9 +1249,8 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
case CHECKSUM_HW:
if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) {
struct iphdr * iph = skb->nh.iph;
- printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, ack = %u, seq = %u, len=%d/%d/%d\n",
+ printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n",
saddr, ntohs(th->source), daddr,
- ntohl(th->ack_seq), ntohl(th->seq),
ntohs(th->dest), len, skb->len, ntohs(iph->tot_len));
goto discard_it;
}
@@ -1346,10 +1375,12 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->ato = 0;
tp->iat = (HZ/5) << 3;
- tp->rcv_wnd = 8192;
+ /* FIXME: tie this to sk->rcvbuf? (May be unnecessary) */
+ /* tp->rcv_wnd = 8192; */
tp->tstamp_ok = 0;
tp->sack_ok = 0;
- tp->in_mss = 0;
+ tp->wscale_ok = 0;
+ tp->in_mss = 536;
tp->snd_wscale = 0;
tp->sacks = 0;
tp->saw_tstamp = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f157abe2..bdc79525f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $
+ * Version: $Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -404,14 +404,115 @@ void tcp_write_xmit(struct sock *sk)
-/*
- * This function returns the amount that we can raise the
- * usable window based on the following constraints
+/* This function returns the amount that we can raise the
+ * usable window based on the following constraints
*
- * 1. The window can never be shrunk once it is offered (RFC 793)
- * 2. We limit memory per socket
+ * 1. The window can never be shrunk once it is offered (RFC 793)
+ * 2. We limit memory per socket
+ *
+ * RFC 1122:
+ * "the suggested [SWS] avoidance algoritm for the receiver is to keep
+ * RECV.NEXT + RCV.WIN fixed until:
+ * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
+ *
+ * Unfortunately, the recomended algorithm breaks header prediction,
+ * since header prediction assumes th->window stays fixed.
+ *
+ * Strictly speaking, keeping th->window fixed violates the receiver
+ * side SWS prevention criteria. The problem is that under this rule
+ * a stream of single byte packets will cause the right side of the
+ * window to always advance by a single byte.
+ *
+ * Of course, if the sender implements sender side SWS prevention
+ * then this will not be a problem.
+ *
+ * BSD seems to make the following compromise:
+ *
+ * If the free space is less than the 1/4 of the maximum
+ * space available and the free space is less than 1/2 mss,
+ * then set the window to 0.
+ * Otherwise, just prevent the window from shrinking
+ * and from being larger than the largest representable value.
+ *
+ * This prevents incremental opening of the window in the regime
+ * where TCP is limited by the speed of the reader side taking
+ * data out of the TCP receive queue. It does nothing about
+ * those cases where the window is constrained on the sender side
+ * because the pipeline is full.
+ *
+ * BSD also seems to "accidentally" limit itself to windows that are a
+ * multiple of MSS, at least until the free space gets quite small.
+ * This would appear to be a side effect of the mbuf implementation.
+ * Combining these two algorithms results in the observed behavior
+ * of having a fixed window size at almost all times.
+ *
+ * Below we obtain similar behavior by forcing the offered window to
+ * a multiple of the mss when it is feasible to do so.
+ *
+ * FIXME: In our current implementation the value returned by sock_rpsace(sk)
+ * is the total space we have allocated to the socket to store skbuf's.
+ * The current design assumes that up to half of that space will be
+ * taken by headers, and the remaining space will be available for TCP data.
+ * This should be accounted for correctly instead.
*/
+unsigned short tcp_select_window(struct sock *sk)
+{
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ int mss = sk->mss;
+ long free_space = sock_rspace(sk)/2;
+ long window, cur_win;
+
+ if (tp->window_clamp) {
+ free_space = min(tp->window_clamp, free_space);
+ mss = min(tp->window_clamp, mss);
+ } else
+ printk(KERN_DEBUG "Clamp failure. Water leaking.\n");
+
+ if (mss < 1) {
+ mss = 1;
+ printk(KERN_DEBUG "tcp_select_window: mss fell to 0.\n");
+ }
+
+ /* compute the actual window i.e.
+ * old_window - received_bytes_on_that_win
+ */
+ cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
+ window = tp->rcv_wnd;
+
+ if (cur_win < 0) {
+ cur_win = 0;
+ printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
+ tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
+ }
+
+ if (free_space < sk->rcvbuf/4 && free_space < mss/2)
+ window = 0;
+
+ /* Get the largest window that is a nice multiple of mss.
+ * Window clamp already applied above.
+ * If our current window offering is within 1 mss of the
+ * free space we just keep it. This prevents the divide
+ * and multiply from happening most of the time.
+ * We also don't do any window rounding when the free space
+ * is too small.
+ */
+ if (window < free_space - mss && free_space > mss)
+ window = (free_space/mss)*mss;
+ /* Never shrink the offered window */
+ if (window < cur_win)
+ window = cur_win;
+
+ tp->rcv_wnd = window;
+ tp->rcv_wup = tp->rcv_nxt;
+ return window >> tp->rcv_wscale; /* RFC1323 scaling applied */
+}
+
+#if 0
+/* Old algorithm for window selection */
unsigned short tcp_select_window(struct sock *sk)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
@@ -427,37 +528,31 @@ unsigned short tcp_select_window(struct sock *sk)
/* compute the actual window i.e.
* old_window - received_bytes_on_that_win
*/
- cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
+ cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
window = tp->rcv_wnd;
-
+
if (cur_win < 0) {
cur_win = 0;
printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
}
- /*
- * RFC 1122:
+ /* RFC 1122:
* "the suggested [SWS] avoidance algoritm for the receiver is to keep
* RECV.NEXT + RCV.WIN fixed until:
* RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
*
- * i.e. don't raise the right edge of the window until you can't raise
- * it MSS bytes
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
*/
- /* It would be a good idea if it didn't break header prediction.
- * and BSD made the header predition standard...
- * It expects the same value in the header i.e. th->window to be
- * constant
- */
usable = free_space - cur_win;
if (usable < 0)
usable = 0;
if (window < usable) {
/* Window is not blocking the sender
- * and we have enought free space for it
+ * and we have enough free space for it
*/
if (cur_win > (sk->mss << 1))
goto out;
@@ -469,7 +564,7 @@ unsigned short tcp_select_window(struct sock *sk)
*/
window = max(usable, cur_win);
} else {
- if ((usable - window) >= mss)
+ while ((usable - window) >= mss)
window += mss;
}
out:
@@ -477,6 +572,7 @@ out:
tp->rcv_wup = tp->rcv_nxt;
return window;
}
+#endif
static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
{
@@ -703,6 +799,11 @@ void tcp_send_fin(struct sock *sk)
}
}
+/* WARNING: This routine must only be called when we have already sent
+ * a SYN packet that crossed the incoming SYN that caused this routine
+ * to get called. If this assumption fails then the initial rcv_wnd
+ * and rcv_wscale values will not be correct.
+ */
int tcp_send_synack(struct sock *sk)
{
struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
@@ -735,13 +836,16 @@ int tcp_send_synack(struct sock *sk)
skb->end_seq = skb->seq + 1 /* th->syn */ ;
th->seq = ntohl(skb->seq);
- th->window = ntohs(tp->rcv_wnd);
+ /* This is a resend of a previous SYN, now with an ACK.
+ * we must reuse the previously offered window.
+ */
+ th->window = htons(tp->rcv_wnd);
tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
tmp = tcp_syn_build_options(skb, sk->mss,
tp->sack_ok, tp->tstamp_ok,
- tp->snd_wscale?tp->rcv_wscale:0);
+ tp->wscale_ok,tp->rcv_wscale);
skb->csum = 0;
th->doff = (sizeof(*th) + tmp)>>2;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 365d3dac2..ce6c60feb 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,6 +22,8 @@
#include <net/tcp.h>
+int sysctl_syn_retries = TCP_SYN_RETRIES;
+
static void tcp_sltimer_handler(unsigned long);
static void tcp_syn_recv_timer(unsigned long);
static void tcp_keepalive(unsigned long data);
@@ -178,7 +180,7 @@ static int tcp_write_timeout(struct sock *sk)
}
/* Have we tried to SYN too many times (repent repent 8)) */
- if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) {
+ if(tp->retransmits > sysctl_syn_retries && sk->state==TCP_SYN_SENT) {
if(sk->err_soft)
sk->err=sk->err_soft;
else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ca5f3045..ed84d5b0f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -154,7 +154,7 @@ static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
return retval;
}
-static inline int udp_lport_inuse(int num)
+static inline int udp_lport_inuse(u16 num)
{
struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
@@ -168,36 +168,42 @@ static inline int udp_lport_inuse(int num)
/* Shared by v4/v6 tcp. */
unsigned short udp_good_socknum(void)
{
- static int start = 0;
- unsigned short base;
- int i, best = 0, size = 32767; /* a big num. */
int result;
-
- base = PROT_SOCK + (start & 1023) + 1;
+ static int start = 0;
+ int i, best, best_size_so_far;
SOCKHASH_LOCK();
- for(i = 0; i < UDP_HTABLE_SIZE; i++) {
- struct sock *sk = udp_hash[i];
- if(!sk) {
- start = (i + 1 + start) & 1023;
- result = i + base + 1;
+
+ /* Select initial not-so-random "best" */
+ best = PROT_SOCK + 1 + (start & 1023);
+ best_size_so_far = 32767; /* "big" num */
+ result = best;
+ for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+ struct sock *sk;
+ int size;
+
+ sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+
+ /* No clashes - take it */
+ if (!sk)
goto out;
- } else {
- int j = 0;
- do {
- if(++j >= size)
- goto next;
- } while((sk = sk->next));
- best = i;
- size = j;
- }
- next:
+
+ /* Is this one better than our best so far? */
+ size = 0;
+ do {
+ if(++size >= best_size_so_far)
+ goto next;
+ } while((sk = sk->next) != NULL);
+ best_size_so_far = size;
+ best = result;
+next:
}
- while(udp_lport_inuse(base + best + 1))
+ while (udp_lport_inuse(best))
best += UDP_HTABLE_SIZE;
- result = (best + base + 1);
+ result = best;
out:
+ start = result;
SOCKHASH_UNLOCK();
return result;
}
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index cbce01b68..4253c85db 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -46,7 +46,7 @@
* Display an IP address in readable format.
*/
-char *in_ntoa(unsigned long in)
+char *in_ntoa(__u32 in)
{
static char buff[18];
char *p;
@@ -62,7 +62,7 @@ char *in_ntoa(unsigned long in)
* Convert an ASCII string to binary IP.
*/
-unsigned long in_aton(const char *str)
+__u32 in_aton(const char *str)
{
unsigned long l;
unsigned int val;