summaryrefslogtreecommitdiffstats
path: root/net/sched/sch_red.c
blob: 637288d99002044c7d4a343a44cf6499338ad344 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*
 * net/sched/sch_red.c	Random Early Detection scheduler.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>


/*	Random Early Detection (RED) algorithm.
	=======================================

	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.

	This file codes a "divisionless" version of RED algorithm
	written down in Fig.17 of the paper.

Short description.
------------------

	When new packet arrives we calculate average queue length:

	avg = (1-W)*avg + W*current_queue_len,

	W is filter time constant (choosen as 2^(-Wlog)), controlling
	inertia of algorithm. To allow larger bursts, W should be
	decreased.

	if (avg > th_max) -> packet marked (dropped).
	if (avg < th_min) -> packet passes.
	if (th_min < avg < th_max) we calculate probability:

	Pb = max_P * (avg - th_min)/(th_max-th_min)

	and mark (drop) packet with this probability.
	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
	max_P should be small (not 1!).

	NB.	SF&VJ assumed that Pb[avg] is linear function. I think it
	        is wrong. I'd make:
		P[th_min] = 0, P[th_max] = 1;
		dP/davg[th_min] = 0, dP/davg[th_max] = infinity, or a large number.

	I choose max_P as a number between 0.01 and 0.1, so that
	C1 = max_P/(th_max-th_min) is power of two: C1 = 2^(-C1log)

	Parameters, settable by user (with default values):

	qmaxbytes=256K - hard limit on queue length, should be chosen >qth_max
	                 to allow packet bursts. This parameter does not
			 affect algorithm behaviour and can be chosen
			 arbitrarily high (well, less than ram size)
			 Really, this limit will never be achieved
			 if RED works correctly.
	qth_min=32K
	qth_max=128K   - qth_max should be at least 2*qth_min
	Wlog=8	       - log(1/W).
	Alog=Wlog      - fixed point position in th_min and th_max.
	Rlog=10
	C1log=24       - C1log = trueC1log+Alog-Rlog
	                 so that trueC1log=22 and max_P~0.02
	

NOTES:

Upper bound on W.
-----------------

	If you want to allow bursts of L packets of size S,
	you should choose W:

	L + 1 -th_min/S < (1-(1-W)^L)/W

	For th_min/S = 32

	log(W)	L
	-1	33
	-2	35
	-3	39
	-4	46
	-5	57
	-6	75
	-7	101
	-8	135
	-9	190
	etc.
 */

struct red_sched_data
{
/* Parameters */
	unsigned long	qmaxbytes;	/* HARD maximal queue length	*/
	unsigned long	qth_min;	/* Min average length threshold: A scaled */
	unsigned long	qth_max;	/* Max average length threshold: A scaled */
	char		Alog;		/* Point position in average lengths */
	char		Wlog;		/* log(W)		*/
	char		Rlog;		/* random number bits	*/
	char		C1log;		/* log(1/C1)		*/
	char		Slog;
	char		Stab[256];

/* Variables */
	unsigned long	qbytes;		/* Queue length in bytes	*/
	unsigned long	qave;		/* Average queue length: A scaled */
	int		qcount;		/* Packets since last random number generation */
	unsigned	qR;		/* Cached random number [0..1<Rlog) */
	psched_time_t	qidlestart;	/* Start of idle period		*/
};

/* Stolen from igmp.c. */

static __inline__ unsigned red_random(int log)
{
	static unsigned long seed=152L;
	seed=seed*69069L+1;
	return (seed^jiffies)&((1<<log)-1);
}

static int
red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct red_sched_data *q = (struct red_sched_data *)sch->data;

	psched_time_t now;

	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
		long us_idle;
		PSCHED_SET_PASTPERFECT(q->qidlestart);
		PSCHED_GET_TIME(now);
		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, (256<<q->Slog)-1, 0);

/* It is wrong, but I do not think that SF+VJ proposal is reasonable
   and did not invented anything more clever 8)

   The problem: ideally, average length queue recalcultion should
   be done over constant clock intervals. It is too expensive, so that
   calculation is driven by outgoing packets.
   When queue is idle we have to model this clock by hands.

   SF+VJ proposed to "generate" m = (idletime/bandwidth)*average_pkt_size
   dummy packets as burst after idle time, i.e.

          q->qave *= (1-W)^m

   It is apparently overcomplicated solution (f.e. we have to precompute
   a table to make this calculation for reasonable time)
   I believe, that a simpler model may be used here,
   but it is field for experiments.
*/
		q->qave >>= q->Stab[(us_idle>>q->Slog)&0xFF];
	}

	q->qave += ((q->qbytes<<q->Alog) - q->qave) >> q->Wlog;

	if (q->qave < q->qth_min) {
enqueue:
		q->qcount = -1;
		if (q->qbytes <= q->qmaxbytes) {
			skb_queue_tail(&sch->q, skb);
			q->qbytes += skb->len;
			return 1;
		}
drop:
		kfree_skb(skb);
		return 0;
	}
	if (q->qave >= q->qth_max) {
		q->qcount = -1;
		goto drop;
	}
	q->qcount++;
	if (q->qcount++) {
		if ((((q->qave - q->qth_min)*q->qcount)>>q->C1log) < q->qR)
			goto enqueue;
		q->qcount = 0;
		q->qR = red_random(q->Rlog);
		goto drop;
	}
	q->qR = red_random(q->Rlog);
	goto enqueue;
}

static struct sk_buff *
red_dequeue(struct Qdisc* sch)
{
	struct sk_buff *skb;
	struct red_sched_data *q = (struct red_sched_data *)sch->data;

	skb = skb_dequeue(&sch->q);
	if (skb) {
		q->qbytes -= skb->len;
		return skb;
	}
	PSCHED_GET_TIME(q->qidlestart);
	return NULL;
}

static void
red_reset(struct Qdisc* sch)
{
	struct red_sched_data *q = (struct red_sched_data *)sch->data;
	struct sk_buff *skb;

	while((skb=skb_dequeue(&sch->q))!=NULL) {
		q->qbytes -= skb->len;
		kfree_skb(skb);
	}
	if (q->qbytes) {
		printk("red_reset: qbytes=%lu\n", q->qbytes);
		q->qbytes = 0;
	}
	PSCHED_SET_PASTPERFECT(q->qidlestart);
	q->qave = 0;
	q->qcount = -1;
}

static int red_init(struct Qdisc *sch, struct pschedctl *pctl)
{
	struct red_sched_data *q;
	struct redctl *ctl = (struct redctl*)pctl->args;

	q = (struct red_sched_data *)sch->data;

	if (pctl->arglen < sizeof(struct redctl))
		return -EINVAL;

	q->Wlog = ctl->Wlog;
	q->Alog = ctl->Alog;
	q->Rlog = ctl->Rlog;
	q->C1log = ctl->C1log;
	q->Slog = ctl->Slog;
	q->qth_min = ctl->qth_min;
	q->qth_max = ctl->qth_max;
	q->qmaxbytes = ctl->qmaxbytes;
	memcpy(q->Stab, ctl->Stab, 256);

	q->qcount = -1;
	PSCHED_SET_PASTPERFECT(q->qidlestart);
	return 0;
}

struct Qdisc_ops red_ops =
{
	NULL,
	"red",
	0,
	sizeof(struct red_sched_data),
	red_enqueue,
	red_dequeue,
	red_reset,
	NULL,
	red_init,
	NULL
};


#ifdef MODULE
#include <linux/module.h>
int init_module(void)
{
	int err;

	/* Load once and never free it. */
	MOD_INC_USE_COUNT;

	err = register_qdisc(&red_ops);
	if (err)
		MOD_DEC_USE_COUNT;
	return err;
}

void cleanup_module(void) 
{
}
#endif