Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c

driver due to the Origin A64 hacks.
author: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
commit: 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree: 78f1c0b805f5656aa7b0417a043c5346f700a2cf /net/ipv4
parent: 0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
11 files changed, 142 insertions, 103 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bc2c97779..969fee200 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.104 2000/01/18 08:24:14 davem Exp $
+ * Version:	$Id: af_inet.c,v 1.106 2000/02/04 21:04:06 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -675,7 +675,9 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
   
 	sin->sin_family = AF_INET;
 	if (peer) {
-		if (!sk->dport) 
+		if (!sk->dport)
+			return -ENOTCONN;
+		if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1)
 			return -ENOTCONN;
 		sin->sin_port = sk->dport;
 		sin->sin_addr.s_addr = sk->daddr;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2b61c67af..c01d447b1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
  *	
  *		Alan Cox, <alan@redhat.com>
  *
- *	Version: $Id: icmp.c,v 1.63 2000/01/09 02:19:45 davem Exp $
+ *	Version: $Id: icmp.c,v 1.64 2000/02/09 11:16:40 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -468,7 +468,7 @@ static void icmp_out_count(int type)
 {
 	if (type>NR_ICMP_TYPES)
 		return;
-	(icmp_pointers[type].output)[(smp_processor_id()*2+!in_interrupt())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++;
+	(icmp_pointers[type].output)[(smp_processor_id()*2+!in_softirq())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++;
 	ICMP_INC_STATS(IcmpOutMsgs);
 }
  
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 93dd76391..3aad90680 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,7 +8,7 @@
  *	the older version didn't come out right using gcc 2.5.8, the newer one
  *	seems to fall out with gcc 2.6.2.
  *
- *	Version: $Id: igmp.c,v 1.36 2000/01/06 00:41:54 davem Exp $
+ *	Version: $Id: igmp.c,v 1.37 2000/02/09 11:16:40 davem Exp $
  *
  *	Authors:
  *		Alan Cox <Alan.Cox@linux.org>
@@ -154,11 +154,9 @@ static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay)
 	int tv=net_random() % max_delay;
 
 	spin_lock_bh(&im->lock);
-	if (!del_timer(&im->timer))
-		atomic_inc(&im->refcnt);
-	im->timer.expires=jiffies+tv+2;
 	im->tm_running=1;
-	add_timer(&im->timer);
+	if (!mod_timer(&im->timer, jiffies+tv+2))
+		atomic_inc(&im->refcnt);
 	spin_unlock_bh(&im->lock);
 }
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1c0b9dae7..852a4fb2c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
  *
  *		The IP fragmentation functionality.
  *		
- * Version:	$Id: ip_fragment.c,v 1.46 2000/01/09 02:19:36 davem Exp $
+ * Version:	$Id: ip_fragment.c,v 1.47 2000/02/09 21:11:33 davem Exp $
  *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox <Alan.Cox@linux.org>
@@ -77,7 +77,7 @@ static spinlock_t ipfrag_lock = SPIN_LOCK_UNLOCKED;
 #define ipqhashfn(id, saddr, daddr, prot) \
 	((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
 
-atomic_t ip_frag_mem = ATOMIC_INIT(0);		/* Memory used for fragments */
+static atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
 
 /* Memory Tracking Functions. */
 extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 2a4e3cf41..e06825e2e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
  *
  *		The Internet Protocol (IP) output module.
  *
- * Version:	$Id: ip_output.c,v 1.78 2000/01/16 05:11:22 davem Exp $
+ * Version:	$Id: ip_output.c,v 1.80 2000/02/09 11:16:41 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -645,14 +645,14 @@ static int ip_build_xmit_slow(struct sock *sk,
 	} while (offset >= 0);
 
 	if (nfrags>1)
-		ip_statistics[smp_processor_id()*2 + !in_interrupt()].IpFragCreates += nfrags;
+		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
 out:
 	return 0;
 
 error:
 	IP_INC_STATS(IpOutDiscards);
 	if (nfrags>1)
-		ip_statistics[smp_processor_id()*2 + !in_interrupt()].IpFragCreates += nfrags;
+		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
 	return err; 
 }
 
@@ -972,10 +972,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 		return;
 
 	daddr = ipc.addr = rt->rt_src;
-	ipc.opt = &replyopts.opt;
+	ipc.opt = NULL;
+
+	if (replyopts.opt.optlen) {
+		ipc.opt = &replyopts.opt;
+
+		if (ipc.opt->srr)
+			daddr = replyopts.opt.faddr;
+	}
 
-	if (ipc.opt->srr)
-		daddr = replyopts.opt.faddr;
 	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
 		return;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bbc6ec111..4e649eded 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
  *
  *		ROUTE - implementation of the IP router.
  *
- * Version:	$Id: route.c,v 1.80 2000/01/21 06:37:27 davem Exp $
+ * Version:	$Id: route.c,v 1.81 2000/02/09 11:16:42 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -313,7 +313,7 @@ static __inline__ int rt_may_expire(struct rtable *rth, int tmo1, int tmo2)
 }
 
 /* This runs via a timer and thus is always in BH context. */
-static void rt_check_expire(unsigned long dummy)
+static void SMP_TIMER_NAME(rt_check_expire)(unsigned long dummy)
 {
 	int i, t;
 	static int rover;
@@ -359,10 +359,12 @@ static void rt_check_expire(unsigned long dummy)
 	mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
 }
 
+SMP_TIMER_DEFINE(rt_check_expire, rt_gc_task);
+
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
-static void rt_run_flush(unsigned long dummy)
+static void SMP_TIMER_NAME(rt_run_flush)(unsigned long dummy)
 {
 	int i;
 	struct rtable * rth, * next;
@@ -382,13 +384,15 @@ static void rt_run_flush(unsigned long dummy)
 		}
 	}
 }
+
+SMP_TIMER_DEFINE(rt_run_flush, rt_cache_flush_task);
   
 static spinlock_t rt_flush_lock = SPIN_LOCK_UNLOCKED;
 
 void rt_cache_flush(int delay)
 {
 	unsigned long now = jiffies;
-	int user_mode = !in_interrupt();
+	int user_mode = !in_softirq();
 
 	if (delay < 0)
 		delay = ip_rt_min_delay;
@@ -414,7 +418,7 @@ void rt_cache_flush(int delay)
 
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
-		rt_run_flush(0);
+		SMP_TIMER_NAME(rt_run_flush)(0);
 		return;
 	}
 
@@ -529,7 +533,7 @@ static int rt_garbage_collect(void)
 
 		if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
 			return 0;
-	} while (!in_interrupt() && jiffies - now < 1);
+	} while (!in_softirq() && jiffies - now < 1);
 
 	if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
 		return 0;
@@ -552,7 +556,7 @@ static int rt_intern_hash(unsigned hash, struct rtable * rt, struct rtable ** rp
 {
 	struct rtable	*rth, **rthp;
 	unsigned long	now = jiffies;
-	int attempts = !in_interrupt();
+	int attempts = !in_softirq();
 
 restart:
 	rthp = &rt_hash_table[hash].chain;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index aa890aef3..9f7ad441e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.161 2000/01/31 01:21:16 davem Exp $
+ * Version:	$Id: tcp.c,v 1.163 2000/02/08 21:27:13 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1106,8 +1106,8 @@ static void cleanup_rbuf(struct sock *sk, int copied)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct sk_buff *skb;
-	int time_to_ack;
-	
+	int time_to_ack = 0;
+
 	/* NOTE! The socket must be locked, so that we don't get
 	 * a messed-up receive queue.
 	 */
@@ -1117,13 +1117,39 @@ static void cleanup_rbuf(struct sock *sk, int copied)
 		tcp_eat_skb(sk, skb);
 	}
 
-	/* Delayed ACKs frequently hit locked sockets during bulk receive. */
-	time_to_ack = tp->ack.blocked && tp->ack.pending;
-#ifdef CONFIG_TCP_MORE_COARSE_ACKS
-	if (tp->ack.pending &&
-	    (tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss)
-		time_to_ack = 1;
+	if (tp->ack.pending) {
+		   /* Delayed ACKs frequently hit locked sockets during bulk receive. */
+		if (tp->ack.blocked
+#ifdef TCP_MORE_COARSE_ACKS
+		    /* Once-per-two-segments ACK was not sent by tcp_input.c */
+		    || tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss
 #endif
+		    /*
+		     * If this read emptied read buffer, we send ACK when:
+		     *
+		     * -- ATO estimator diverged. In this case it is useless
+		     * to delay ACK, it will miss in any case.
+		     *
+		     * -- The second condition is triggered when we did not
+		     * ACK 8 segments not depending of their size.
+		     * Linux senders allocate full-sized frame even for one byte
+		     * packets, so that default queue for MTU=8K can hold
+		     * only 8 packets. Note, that no other workarounds
+		     * but counting packets are possible. If sender selected
+		     * a small sndbuf or have larger mtu lockup will still
+		     * occur. Well, not lockup, but 10-20msec gap.
+		     * It is essentially dead lockup for 1Gib ethernet
+		     * and loopback :-). The value 8 covers all reasonable
+		     * cases and we may receive packet of any size
+		     * with maximal possible rate now.
+		     */
+		    || (copied > 0 &&
+			(tp->ack.ato >= TCP_DELACK_MAX || tp->ack.rcv_segs > 7) &&
+			!tp->ack.pingpong &&
+			atomic_read(&sk->rmem_alloc) == 0)) {
+			time_to_ack = 1;
+		}
+	}
 
   	/* We send an ACK if we can now advertise a non-zero window
 	 * which has been raised "significantly".
@@ -1135,14 +1161,12 @@ static void cleanup_rbuf(struct sock *sk, int copied)
 		__u32 rcv_window_now = tcp_receive_window(tp);
 		__u32 new_window = __tcp_select_window(sk);
 
-		/* We won't be raising the window any further than
-		 * the window-clamp allows.  Our window selection
-		 * also keeps things a nice multiple of MSS.  These
-		 * checks are necessary to prevent spurious ACKs
-		 * which don't advertize a larger window.
+		/* Send ACK now, if this read freed lots of space
+		 * in our buffer. Certainly, new_window is new window.
+		 * We can advertise it now, if it is not less than current one.
+		 * "Lots" means "at least twice" here.
 		 */
-		if((new_window && (new_window >= rcv_window_now * 2)) &&
-		   ((rcv_window_now + tp->ack.rcv_mss) <= tp->window_clamp))
+		if(new_window && new_window >= 2*rcv_window_now)
 			time_to_ack = 1;
 	}
 	if (time_to_ack)
@@ -1408,11 +1432,6 @@ do_prequeue:
 					copied += chunk;
 				}
 			}
-#ifdef CONFIG_TCP_MORE_COARSE_ACKS
-			if (tp->ack.pending &&
-			    (tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss)
-				tcp_send_ack(sk);
-#endif
 		}
 		continue;
 
@@ -1472,7 +1491,7 @@ do_prequeue:
 		skb->used = 1;
 		tcp_eat_skb(sk, skb);
 
-#ifdef CONFIG_TCP_LESS_COARSE_ACKS
+#ifdef TCP_LESS_COARSE_ACKS
 		/* Possible improvement. When sender is faster than receiver,
 		 * traffic looks like: fill window ... wait for window open ...
 		 * fill window. We lose at least one rtt, because call
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 366dddc89..88483d516 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.186 2000/01/31 20:26:13 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.188 2000/02/08 21:27:14 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -134,25 +134,6 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
 				tp->ack.rcv_mss = len;
 			tp->ack.last_seg_size = len;
 		}
-
-#if 0
-		/* Tiny-grams with PSH set artifically deflate our
-		 * ato measurement.
-		 *
-		 * Mmm... I copied this test from tcp_remember_ack(), but
-		 * I did not understand this. Is it to speedup nagling sender?
-		 * It does not because classic (non-Minshall) sender nagles
-		 * guided by not-acked frames not depending on size.
-		 * And it does not help NODELAY sender, because latency
-		 * is too high in any case. The only result is timer trashing
-		 * and redundant ACKs. Grr... Seems, I missed something.  --ANK
-		 *
-		 * Let me to comment out this yet... TCP should work
-		 * perfectly without this. 				  --ANK
-		 */
-		if (len < (tp->ack.rcv_mss >> 1) && skb->h.th->psh)
-			tp->ack.ato = TCP_ATO_MIN;
-#endif
 	}
 }
 
@@ -199,6 +180,7 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb)
 	tcp_measure_rcv_mss(tp, skb);
 
 	tp->ack.pending = 1;
+	tp->ack.rcv_segs++;
 
 	now = tcp_time_stamp;
 
@@ -232,7 +214,8 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb)
 		} else {
 			if (m <= 0)
 				m = TCP_ATO_MIN/2;
-			tp->ack.ato = (tp->ack.ato >> 1) + m;
+			if (m <= tp->ack.ato)
+				tp->ack.ato = (tp->ack.ato >> 1) + m;
 		}
 	}
 	tp->ack.lrcvtime = now;
@@ -458,7 +441,7 @@ reset:
 extern __inline__ void
 tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp, u32 seq)
 {
-	if (!after(seq, tp->last_ack_sent)) {
+	if (!after(seq, tp->rcv_wup)) {
 		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
 		 * extra check below makes sure this can only happen
 		 * for pure ACK frames.  -DaveM
@@ -2303,6 +2286,8 @@ static int prune_queue(struct sock *sk)
 	if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
 		return 0;
 
+	NET_INC_STATS_BH(RcvPruned);
+
 	/* Massive buffer overcommit. */
 	return -1;
 }
@@ -2470,10 +2455,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				goto slow_path;
 
 			/* Predicted packet is in window by definition.
-			 * seq == rcv_nxt and last_ack_sent <= rcv_nxt.
-			 * Hence, check seq<=last_ack_sent reduces to:
+			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+			 * Hence, check seq<=rcv_wup reduces to:
 			 */
-			if (tp->rcv_nxt == tp->last_ack_sent) {
+			if (tp->rcv_nxt == tp->rcv_wup) {
 				tp->ts_recent = tp->rcv_tsval;
 				tp->ts_recent_stamp = xtime.tv_sec;
 			}
@@ -2544,7 +2529,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 			tcp_event_data_recv(tp, skb);
 
-#if 1/*def CONFIG_TCP_MORE_COARSE_ACKS*/
+#ifdef TCP_MORE_COARSE_ACKS
 			if (eaten) {
 				if (tcp_in_quickack_mode(tp)) {
 					tcp_send_ack(sk);
@@ -2747,7 +2732,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		newtp->copied_seq = req->rcv_isn + 1;
 
 		newtp->saw_tstamp = 0;
-		newtp->last_ack_sent = req->rcv_isn + 1;
 
 		newtp->probes_out = 0;
 		newtp->syn_seq = req->rcv_isn;
@@ -3146,7 +3130,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->ack.pending = 1;
 			tp->ack.lrcvtime = tcp_time_stamp;
 			tcp_enter_quickack_mode(tp);
-			tp->ack.pingpong = 1;
 			tp->ack.ato = TCP_ATO_MIN;
 			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MIN);
 			goto discard;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e54ce2ec2..470f47e7e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.198 2000/01/31 01:21:20 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.199 2000/02/08 21:27:17 davem Exp $
  *
  *		IPv4 specific functions
  *
@@ -1340,6 +1340,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_parse_options(NULL, th, &tp, want_cookie);
 
+	if (tp.saw_tstamp && tp.rcv_tsval == 0) {
+		/* Some OSes (unknown ones, but I see them on web server, which
+		 * contains information interesting only for windows'
+		 * users) do not send their stamp in SYN. It is easy case.
+		 * We simply do not advertise TS support.
+		 */
+		tp.saw_tstamp = 0;
+		tp.tstamp_ok = 0;
+	}
+
 	tcp_openreq_init(req, &tp, skb);
 
 	req->af.v4_req.loc_addr = daddr;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5583ea6cb..f3f1d0bcf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.120 2000/01/31 01:21:22 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.121 2000/02/08 21:27:19 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -94,9 +94,9 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-	tp->last_ack_sent = tp->rcv_nxt;
 	tcp_dec_quickack_mode(tp);
 	tp->ack.pending = 0;
+	tp->ack.rcv_segs = 0;
 	tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
 }
 
@@ -363,7 +363,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
 
 	/* Bound mss with half of window */
 	if (tp->max_window && mss_now > (tp->max_window>>1))
-		mss_now = max((tp->max_window>>1), 1);
+		mss_now = max((tp->max_window>>1), 68 - tp->tcp_header_len);
 
 	/* And store cached results */
 	tp->pmtu_cookie = pmtu;
@@ -509,10 +509,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (tp->window_clamp < mss)
 		mss = tp->window_clamp; 
 
-	if ((free_space < (min((int)tp->window_clamp, tcp_full_space(sk)) / 2)) && 
-		(free_space < ((int) (mss/2)))) {
-		window = 0;
-
+	if (free_space < min((int)tp->window_clamp, tcp_full_space(sk)) / 2) {
 		/* THIS IS _VERY_ GOOD PLACE to play window clamp.
 		 * if free_space becomes suspiciously low
 		 * verify ratio rmem_alloc/(rcv_nxt - copied_seq),
@@ -520,21 +517,28 @@ u32 __tcp_select_window(struct sock *sk)
 		 * rmem_alloc will run out of rcvbuf*2, shrink window_clamp.
 		 * It will eliminate most of prune events! Very simple,
 		 * it is the next thing to do.			--ANK
+		 *
+		 * Provided we found a way to raise it back...  --ANK
 		 */
-	} else {
-		/* Get the largest window that is a nice multiple of mss.
-		 * Window clamp already applied above.
-		 * If our current window offering is within 1 mss of the
-		 * free space we just keep it. This prevents the divide
-		 * and multiply from happening most of the time.
-		 * We also don't do any window rounding when the free space
-		 * is too small.
-		 */
-		window = tp->rcv_wnd;
-		if ((((int) window) <= (free_space - ((int) mss))) ||
-				(((int) window) > free_space))
-			window = (((unsigned int) free_space)/mss)*mss;
+		tp->ack.quick = 0;
+
+		if (free_space < ((int) (mss/2)))
+			return 0;
 	}
+
+	/* Get the largest window that is a nice multiple of mss.
+	 * Window clamp already applied above.
+	 * If our current window offering is within 1 mss of the
+	 * free space we just keep it. This prevents the divide
+	 * and multiply from happening most of the time.
+	 * We also don't do any window rounding when the free space
+	 * is too small.
+	 */
+	window = tp->rcv_wnd;
+	if ((((int) window) <= (free_space - ((int) mss))) ||
+	    (((int) window) > free_space))
+		window = (((unsigned int) free_space)/mss)*mss;
+
 	return window;
 }
 
@@ -1092,8 +1096,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 	unsigned long timeout;
 
 	/* Stay within the limit we were given */
-	timeout = tp->ack.ato;
-	timeout += jiffies + (timeout>>2);
+	timeout = jiffies + tp->ack.ato;
 
 	/* Use new timeout only if there wasn't a older one earlier. */
 	spin_lock_bh(&sk->timer_lock);
@@ -1151,6 +1154,7 @@ void tcp_send_ack(struct sock *sk)
 		buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
 		if (buff == NULL) {
 			tp->ack.pending = 1;
+			tp->ack.ato = TCP_ATO_MAX;
 			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
 			return;
 		}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index bff4e872f..33eea733d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_timer.c,v 1.71 2000/01/18 08:24:19 davem Exp $
+ * Version:	$Id: tcp_timer.c,v 1.73 2000/02/09 11:16:42 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -200,15 +200,23 @@ static void tcp_delack_timer(unsigned long data)
 	}
 
 	if (tp->ack.pending) {
-		/* Delayed ACK missed: inflate ATO, leave pingpong mode */
-		tp->ack.ato = min(tp->ack.ato<<1, TCP_ATO_MAX);
-		tp->ack.pingpong = 0;
+		if (!tp->ack.pingpong) {
+			/* Delayed ACK missed: inflate ATO. */
+			tp->ack.ato = min(tp->ack.ato<<1, TCP_ATO_MAX);
+		} else {
+			/* Delayed ACK missed: leave pingpong mode and
+			 * deflate ATO.
+			 */
+			tp->ack.pingpong = 0;
+			tp->ack.ato = TCP_ATO_MIN;
+		}
 		tcp_send_ack(sk);
 		NET_INC_STATS_BH(DelayedACKs);
 	}
 	TCP_CHECK_TIMER(sk);
 
 out_unlock:
+	timer_exit(&tp->delack_timer);
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
@@ -259,6 +267,7 @@ static void tcp_probe_timer(unsigned long data)
 		TCP_CHECK_TIMER(sk);
 	}
 out_unlock:
+	timer_exit(&tp->probe_timer);
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
@@ -272,7 +281,7 @@ static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS];
 static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;
 static struct timer_list tcp_tw_timer = { function: tcp_twkill };
 
-static void tcp_twkill(unsigned long data)
+static void SMP_TIMER_NAME(tcp_twkill)(unsigned long dummy)
 {
 	struct tcp_tw_bucket *tw;
 	int killed = 0;
@@ -310,6 +319,8 @@ out:
 	spin_unlock(&tw_death_lock);
 }
 
+SMP_TIMER_DEFINE(tcp_twkill, tcp_twkill_task);
+
 /* These are always called from BH context.  See callers in
  * tcp_input.c to verify this.
  */
@@ -419,7 +430,7 @@ void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
 	spin_unlock(&tw_death_lock);
 }
 
-void tcp_twcal_tick(unsigned long dummy)
+void SMP_TIMER_NAME(tcp_twcal_tick)(unsigned long dummy)
 {
 	int n, slot;
 	unsigned long j;
@@ -470,6 +481,7 @@ out:
 	spin_unlock(&tw_death_lock);
 }
 
+SMP_TIMER_DEFINE(tcp_twcal_tick, tcp_twcal_tasklet);
 
 /*
  *	The TCP retransmit timer.
@@ -565,6 +577,7 @@ static void tcp_retransmit_timer(unsigned long data)
 	TCP_CHECK_TIMER(sk);
 
 out_unlock:
+	timer_exit(&tp->retransmit_timer);
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
@@ -763,6 +776,7 @@ death:
 	tcp_done(sk);
 
 out:
+	timer_exit(&sk->timer);
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
author	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
commit	529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree	78f1c0b805f5656aa7b0417a043c5346f700a2cf /net/ipv4
parent	0bd079751d25808d1972baee5c4eaa1db2227257 (diff)