summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-06-13 16:29:25 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-06-13 16:29:25 +0000
commitdb7d4daea91e105e3859cf461d7e53b9b77454b2 (patch)
tree9bb65b95440af09e8aca63abe56970dd3360cc57 /net/sched
parent9c1c01ead627bdda9211c9abd5b758d6c687d8ac (diff)
Merge with Linux 2.2.8.
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Config.in7
-rw-r--r--net/sched/Makefile10
-rw-r--r--net/sched/cls_api.c58
-rw-r--r--net/sched/cls_fw.c305
-rw-r--r--net/sched/cls_route.c594
-rw-r--r--net/sched/cls_rsvp.h81
-rw-r--r--net/sched/cls_u32.c59
-rw-r--r--net/sched/estimator.c4
-rw-r--r--net/sched/police.c61
-rw-r--r--net/sched/sch_api.c447
-rw-r--r--net/sched/sch_cbq.c335
-rw-r--r--net/sched/sch_csz.c17
-rw-r--r--net/sched/sch_fifo.c14
-rw-r--r--net/sched/sch_generic.c39
-rw-r--r--net/sched/sch_prio.c120
-rw-r--r--net/sched/sch_red.c4
-rw-r--r--net/sched/sch_sfq.c40
-rw-r--r--net/sched/sch_tbf.c100
-rw-r--r--net/sched/sch_teql.c1
19 files changed, 1819 insertions, 477 deletions
diff --git a/net/sched/Config.in b/net/sched/Config.in
index 5d497a050..ffb7a4810 100644
--- a/net/sched/Config.in
+++ b/net/sched/Config.in
@@ -18,10 +18,11 @@ if [ "$CONFIG_NET_QOS" = "y" ]; then
fi
bool 'Packet classifier API' CONFIG_NET_CLS
if [ "$CONFIG_NET_CLS" = "y" ]; then
- bool 'Routing tables based classifier' CONFIG_NET_CLS_ROUTE
- if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
- bool 'Firewall based classifier' CONFIG_NET_CLS_FW
+ tristate 'Routing table based classifier' CONFIG_NET_CLS_ROUTE4
+ if [ "$CONFIG_NET_CLS_ROUTE4" != "n" ]; then
+ define_bool CONFIG_NET_CLS_ROUTE y
fi
+ tristate 'Firewall based classifier' CONFIG_NET_CLS_FW
tristate 'U32 classifier' CONFIG_NET_CLS_U32
if [ "$CONFIG_NET_QOS" = "y" ]; then
tristate 'Special RSVP classifier' CONFIG_NET_CLS_RSVP
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 21a1cf07a..6e1169fab 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -125,12 +125,20 @@ else
endif
endif
-ifeq ($(CONFIG_NET_CLS_ROUTE), y)
+ifeq ($(CONFIG_NET_CLS_ROUTE4), y)
O_OBJS += cls_route.o
+else
+ ifeq ($(CONFIG_NET_CLS_ROUTE4), m)
+ M_OBJS += cls_route.o
+ endif
endif
ifeq ($(CONFIG_NET_CLS_FW), y)
O_OBJS += cls_fw.o
+else
+ ifeq ($(CONFIG_NET_CLS_FW), m)
+ M_OBJS += cls_fw.o
+ endif
endif
endif
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 081896dc5..683063137 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -7,6 +7,10 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
*/
#include <asm/uaccess.h>
@@ -27,6 +31,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#include <linux/kmod.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -87,21 +92,13 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
/* Select new prio value from the range, managed by kernel. */
-static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp, u32 prio)
+static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
{
u32 first = TC_H_MAKE(0xC0000000U,0U);
- if (!tp || tp->next == NULL)
- return first;
-
- if (prio == TC_H_MAKE(0xFFFF0000U,0U))
- first = tp->prio+1;
- else
+ if (tp)
first = tp->prio-1;
- if (first == prio)
- first = tp->prio;
-
return first;
}
@@ -129,10 +126,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* If no priority is given, user wants we allocated it. */
if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
return -ENOENT;
- if (n->nlmsg_flags&NLM_F_APPEND)
- prio = TC_H_MAKE(0xFFFF0000U,0U);
- else
- prio = TC_H_MAKE(0x80000000U,0U);
+ prio = TC_H_MAKE(0x80000000U,0U);
}
/* Find head of filter chain. */
@@ -194,6 +188,18 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
goto errout;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
+#ifdef CONFIG_KMOD
+ if (tp_ops==NULL && tca[TCA_KIND-1] != NULL) {
+ struct rtattr *kind = tca[TCA_KIND-1];
+ char module_name[4 + IFNAMSIZ + 1];
+
+ if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
+ sprintf(module_name, "cls_%s", (char*)RTA_DATA(kind));
+ request_module (module_name);
+ tp_ops = tcf_proto_lookup_ops(kind);
+ }
+ }
+#endif
if (tp_ops == NULL) {
err = -EINVAL;
kfree(tp);
@@ -202,7 +208,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
memset(tp, 0, sizeof(*tp));
tp->ops = tp_ops;
tp->protocol = protocol;
- tp->prio = nprio ? : tcf_auto_prio(*back, prio);
+ tp->prio = nprio ? : tcf_auto_prio(*back);
tp->q = q;
tp->classify = tp_ops->classify;
tp->classid = parent;
@@ -221,6 +227,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
*back = tp->next;
+ synchronize_bh();
+
tp->ops->destroy(tp);
kfree(tp);
err = 0;
@@ -249,7 +257,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
}
}
- err = tp->ops->change(tp, t->tcm_handle, tca, &fh);
+ err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
if (err == 0)
tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
@@ -336,12 +344,16 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return skb->len;
- if ((q = qdisc_lookup(dev, tcm->tcm_parent)) == NULL)
+ if (!tcm->tcm_parent)
+ q = dev->qdisc_sleeping;
+ else
+ q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ if (q == NULL)
return skb->len;
- cops = q->ops->cl_ops;
+ if ((cops = q->ops->cl_ops) == NULL)
+ goto errout;
if (TC_H_MIN(tcm->tcm_parent)) {
- if (cops)
- cl = cops->get(q, tcm->tcm_parent);
+ cl = cops->get(q, tcm->tcm_parent);
if (cl == 0)
goto errout;
}
@@ -360,7 +372,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
TC_H_MIN(tcm->tcm_info) != tp->protocol)
continue;
if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) {
@@ -418,8 +430,8 @@ __initfunc(int tc_filter_init(void))
#ifdef CONFIG_NET_CLS_U32
INIT_TC_FILTER(u32);
#endif
-#ifdef CONFIG_NET_CLS_ROUTE
- INIT_TC_FILTER(route);
+#ifdef CONFIG_NET_CLS_ROUTE4
+ INIT_TC_FILTER(route4);
#endif
#ifdef CONFIG_NET_CLS_FW
INIT_TC_FILTER(fw);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 0fab64dda..e92b846ee 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -1,5 +1,5 @@
/*
- * net/sched/cls_fw.c Routing table based packet classifier.
+ * net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -7,8 +7,13 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
*/
+#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -34,15 +39,56 @@
#include <net/sock.h>
#include <net/pkt_sched.h>
+struct fw_head
+{
+ struct fw_filter *ht[256];
+};
+
+struct fw_filter
+{
+ struct fw_filter *next;
+ u32 id;
+ struct tcf_result res;
+#ifdef CONFIG_NET_CLS_POLICE
+ struct tcf_police *police;
+#endif
+};
+
+static __inline__ int fw_hash(u32 handle)
+{
+ return handle&0xFF;
+}
static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
- u32 clid = skb->fwmark;
+ struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_filter *f;
+#ifdef CONFIG_IP_FIREWALL
+ u32 id = skb->fwmark;
+#else
+ u32 id = 0;
+#endif
- if (clid && (TC_H_MAJ(clid) == 0 ||
- !(TC_H_MAJ(clid^tp->q->handle)))) {
- res->classid = clid;
+ if (head == NULL)
+ goto old_method;
+
+ for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+ if (f->id == id) {
+ *res = f->res;
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police)
+ return tcf_police(skb, f->police);
+#endif
+ return 0;
+ }
+ }
+ return -1;
+
+old_method:
+ if (id && (TC_H_MAJ(id) == 0 ||
+ !(TC_H_MAJ(id^tp->q->handle)))) {
+ res->classid = id;
res->class = 0;
return 0;
}
@@ -51,6 +97,16 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
+ struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_filter *f;
+
+ if (head == NULL)
+ return 0;
+
+ for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+ if (f->id == handle)
+ return (unsigned long)f;
+ }
return 0;
}
@@ -60,24 +116,236 @@ static void fw_put(struct tcf_proto *tp, unsigned long f)
static int fw_init(struct tcf_proto *tp)
{
+ MOD_INC_USE_COUNT;
return 0;
}
static void fw_destroy(struct tcf_proto *tp)
{
+ struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+ struct fw_filter *f;
+ int h;
+
+ if (head == NULL) {
+ MOD_DEC_USE_COUNT;
+ return;
+ }
+
+ for (h=0; h<256; h++) {
+ while ((f=head->ht[h]) != NULL) {
+ unsigned long cl;
+ head->ht[h] = f->next;
+
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+#ifdef CONFIG_NET_CLS_POLICE
+ tcf_police_release(f->police);
+#endif
+ kfree(f);
+ }
+ }
+ kfree(head);
+ MOD_DEC_USE_COUNT;
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
+ struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_filter *f = (struct fw_filter*)arg;
+ struct fw_filter **fp;
+
+ if (head == NULL || f == NULL)
+ return -EINVAL;
+
+ for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+ if (*fp == f) {
+ unsigned long cl;
+
+ *fp = f->next;
+ synchronize_bh();
+
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+#ifdef CONFIG_NET_CLS_POLICE
+ tcf_police_release(f->police);
+#endif
+ kfree(f);
+ return 0;
+ }
+ }
return -EINVAL;
}
-static int fw_change(struct tcf_proto *tp, u32 handle,
- struct rtattr **tca,
- unsigned long *arg)
+static int fw_change(struct tcf_proto *tp, unsigned long base,
+ u32 handle,
+ struct rtattr **tca,
+ unsigned long *arg)
{
- return handle ? -EINVAL : 0;
+ struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_filter *f;
+ struct rtattr *opt = tca[TCA_OPTIONS-1];
+ struct rtattr *tb[TCA_FW_MAX];
+ int err;
+
+ if (!opt)
+ return handle ? -EINVAL : 0;
+
+ if (rtattr_parse(tb, TCA_FW_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
+ return -EINVAL;
+
+ if ((f = (struct fw_filter*)*arg) != NULL) {
+ /* Node exists: adjust only classid */
+
+ if (f->id != handle && handle)
+ return -EINVAL;
+ if (tb[TCA_FW_CLASSID-1]) {
+ unsigned long cl;
+
+ f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
+ cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid);
+ cl = cls_set_class(&f->res.class, cl);
+ if (cl)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ }
+#ifdef CONFIG_NET_CLS_POLICE
+ if (tb[TCA_FW_POLICE-1]) {
+ struct tcf_police *police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]);
+
+ police = xchg(&f->police, police);
+ synchronize_bh();
+
+ tcf_police_release(police);
+ }
+#endif
+ return 0;
+ }
+
+ if (!handle)
+ return -EINVAL;
+
+ if (head == NULL) {
+ head = kmalloc(sizeof(struct fw_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+ memset(head, 0, sizeof(*head));
+
+ tp->root = head;
+ synchronize_bh();
+ }
+
+ f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL);
+ if (f == NULL)
+ return -ENOBUFS;
+ memset(f, 0, sizeof(*f));
+
+ f->id = handle;
+
+ if (tb[TCA_FW_CLASSID-1]) {
+ err = -EINVAL;
+ if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != 4)
+ goto errout;
+ f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
+ cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
+ }
+
+#ifdef CONFIG_NET_CLS_POLICE
+ if (tb[TCA_FW_POLICE-1])
+ f->police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]);
+#endif
+
+ f->next = head->ht[fw_hash(handle)];
+ wmb();
+ head->ht[fw_hash(handle)] = f;
+
+ *arg = (unsigned long)f;
+ return 0;
+
+errout:
+ if (f)
+ kfree(f);
+ return err;
+}
+
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct fw_head *head = (struct fw_head*)tp->root;
+ int h;
+
+ if (head == NULL)
+ arg->stop = 1;
+
+ if (arg->stop)
+ return;
+
+ for (h = 0; h < 256; h++) {
+ struct fw_filter *f;
+
+ for (f = head->ht[h]; f; f = f->next) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+ }
+}
+
+#ifdef CONFIG_RTNETLINK
+static int fw_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct fw_filter *f = (struct fw_filter*)fh;
+ unsigned char *b = skb->tail;
+ struct rtattr *rta;
+
+ if (f == NULL)
+ return skb->len;
+
+ t->tcm_handle = f->id;
+
+ if (!f->res.classid
+#ifdef CONFIG_NET_CLS_POLICE
+ && !f->police
+#endif
+ )
+ return skb->len;
+
+ rta = (struct rtattr*)b;
+ RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+ if (f->res.classid)
+ RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police) {
+ struct rtattr * p_rta = (struct rtattr*)skb->tail;
+
+ RTA_PUT(skb, TCA_FW_POLICE, 0, NULL);
+
+ if (tcf_police_dump(skb, f->police) < 0)
+ goto rtattr_failure;
+
+ p_rta->rta_len = skb->tail - (u8*)p_rta;
+ }
+#endif
+
+ rta->rta_len = skb->tail - b;
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police) {
+ RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats);
+ }
+#endif
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
}
+#endif
+
struct tcf_proto_ops cls_fw_ops = {
NULL,
@@ -90,5 +358,22 @@ struct tcf_proto_ops cls_fw_ops = {
fw_put,
fw_change,
fw_delete,
- NULL,
+ fw_walk,
+#ifdef CONFIG_RTNETLINK
+ fw_dump
+#else
+ NULL
+#endif
};
+
+#ifdef MODULE
+int init_module(void)
+{
+ return register_tcf_proto_ops(&cls_fw_ops);
+}
+
+void cleanup_module(void)
+{
+ unregister_tcf_proto_ops(&cls_fw_ops);
+}
+#endif
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a78f2090e..f83e79134 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -1,5 +1,5 @@
/*
- * net/sched/cls_route.c Routing table based packet classifier.
+ * net/sched/cls_route.c ROUTE4 classifier.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
+#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
@@ -34,65 +35,598 @@
#include <net/sock.h>
#include <net/pkt_sched.h>
+/*
+ 1. For now we assume that route tags < 256.
+ It allows to use direct table lookups, instead of hash tables.
+ 2. For now we assume that "from TAG" and "fromdev DEV" statements
+ are mutually exclusive.
+ 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ */
+
+struct route4_fastmap
+{
+ struct route4_filter *filter;
+ u32 id;
+ int iif;
+};
+
+struct route4_head
+{
+ struct route4_fastmap fastmap[16];
+ struct route4_bucket *table[256+1];
+};
+
+struct route4_bucket
+{
+ struct route4_filter *ht[16+16+1];
+};
+
+struct route4_filter
+{
+ struct route4_filter *next;
+ u32 id;
+ int iif;
+
+ struct tcf_result res;
+#ifdef CONFIG_NET_CLS_POLICE
+ struct tcf_police *police;
+#endif
+
+ u32 handle;
+ struct route4_bucket *bkt;
+};
+
+#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+
+static __inline__ int route4_fastmap_hash(u32 id, int iif)
+{
+ return id&0xF;
+}
+
+static void route4_reset_fastmap(struct route4_head *head, u32 id)
+{
+ start_bh_atomic();
+ memset(head->fastmap, 0, sizeof(head->fastmap));
+ end_bh_atomic();
+}
+
+static void __inline__
+route4_set_fastmap(struct route4_head *head, u32 id, int iif,
+ struct route4_filter *f)
+{
+ int h = route4_fastmap_hash(id, iif);
+ head->fastmap[h].id = id;
+ head->fastmap[h].iif = iif;
+ head->fastmap[h].filter = f;
+}
+
+static __inline__ int route4_hash_to(u32 id)
+{
+ return id&0xFF;
+}
+
+static __inline__ int route4_hash_from(u32 id)
+{
+ return (id>>16)&0xF;
+}
+
+static __inline__ int route4_hash_iif(int iif)
+{
+ return 16 + ((iif>>16)&0xF);
+}
+
+static __inline__ int route4_hash_wild(void)
+{
+ return 32;
+}
+
+#ifdef CONFIG_NET_CLS_POLICE
+#define IF_ROUTE_POLICE \
+if (f->police) { \
+ int pol_res = tcf_police(skb, f->police); \
+ if (pol_res >= 0) return pol_res; \
+ dont_cache = 1; \
+ continue; \
+} \
+if (!dont_cache)
+#else
+#define IF_ROUTE_POLICE
+#endif
+
-static int route_classify(struct sk_buff *skb, struct tcf_proto *tp,
- struct tcf_result *res)
+static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
+ struct tcf_result *res)
{
- struct dst_entry *dst = skb->dst;
+ struct route4_head *head = (struct route4_head*)tp->root;
+ struct dst_entry *dst;
+ struct route4_bucket *b;
+ struct route4_filter *f;
+#ifdef CONFIG_NET_CLS_POLICE
+ int dont_cache = 0;
+#endif
+ u32 id, h;
+ int iif;
- if (dst) {
- u32 clid = dst->tclassid;
+ if ((dst = skb->dst) == NULL)
+ goto failure;
- if (clid && (TC_H_MAJ(clid) == 0 ||
- !(TC_H_MAJ(clid^tp->q->handle)))) {
- res->classid = clid;
- res->class = 0;
+ id = dst->tclassid;
+ if (head == NULL)
+ goto old_method;
+
+ iif = ((struct rtable*)dst)->key.iif;
+
+ h = route4_fastmap_hash(id, iif);
+ if (id == head->fastmap[h].id &&
+ iif == head->fastmap[h].iif &&
+ (f = head->fastmap[h].filter) != NULL) {
+ if (f == ROUTE4_FAILURE)
+ goto failure;
+
+ *res = f->res;
+ return 0;
+ }
+
+ h = route4_hash_to(id);
+
+restart:
+ if ((b = head->table[h]) != NULL) {
+ f = b->ht[route4_hash_from(id)];
+
+ for ( ; f; f = f->next) {
+ if (f->id == id) {
+ *res = f->res;
+ IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f);
+ return 0;
+ }
+ }
+
+ for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next) {
+ if (f->iif == iif) {
+ *res = f->res;
+ IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f);
+ return 0;
+ }
+ }
+
+ for (f = b->ht[route4_hash_wild()]; f; f = f->next) {
+ *res = f->res;
+ IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f);
return 0;
}
+
+ }
+ if (h < 256) {
+ h = 256;
+ id &= ~0xFFFF;
+ goto restart;
+ }
+
+#ifdef CONFIG_NET_CLS_POLICE
+ if (!dont_cache)
+#endif
+ route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
+failure:
+ return -1;
+
+old_method:
+ if (id && (TC_H_MAJ(id) == 0 ||
+ !(TC_H_MAJ(id^tp->q->handle)))) {
+ res->classid = id;
+ res->class = 0;
+ return 0;
}
return -1;
}
-static unsigned long route_get(struct tcf_proto *tp, u32 handle)
+static u32 to_hash(u32 id)
+{
+ u32 h = id&0xFF;
+ if (id&0x8000)
+ h += 256;
+ return h;
+}
+
+static u32 from_hash(u32 id)
{
+ id &= 0xFFFF;
+ if (id == 0xFFFF)
+ return 32;
+ if (!(id & 0x8000)) {
+ if (id > 255)
+ return 256;
+ return id&0xF;
+ }
+ return 16 + (id&0xF);
+}
+
+static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+{
+ struct route4_head *head = (struct route4_head*)tp->root;
+ struct route4_bucket *b;
+ struct route4_filter *f;
+ unsigned h1, h2;
+
+ if (!head)
+ return 0;
+
+ h1 = to_hash(handle);
+ if (h1 > 256)
+ return 0;
+
+ h2 = from_hash(handle>>16);
+ if (h2 > 32)
+ return 0;
+
+ if ((b = head->table[h1]) != NULL) {
+ for (f = b->ht[h2]; f; f = f->next)
+ if (f->handle == handle)
+ return (unsigned long)f;
+ }
return 0;
}
-static void route_put(struct tcf_proto *tp, unsigned long f)
+static void route4_put(struct tcf_proto *tp, unsigned long f)
{
}
-static int route_init(struct tcf_proto *tp)
+static int route4_init(struct tcf_proto *tp)
{
+ MOD_INC_USE_COUNT;
return 0;
}
-static void route_destroy(struct tcf_proto *tp)
+static void route4_destroy(struct tcf_proto *tp)
{
+ struct route4_head *head = xchg(&tp->root, NULL);
+ int h1, h2;
+
+ if (head == NULL) {
+ MOD_DEC_USE_COUNT;
+ return;
+ }
+
+ for (h1=0; h1<=256; h1++) {
+ struct route4_bucket *b;
+
+ if ((b = head->table[h1]) != NULL) {
+ for (h2=0; h2<=32; h2++) {
+ struct route4_filter *f;
+
+ while ((f = b->ht[h2]) != NULL) {
+ unsigned long cl;
+
+ b->ht[h2] = f->next;
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+#ifdef CONFIG_NET_CLS_POLICE
+ tcf_police_release(f->police);
+#endif
+ kfree(f);
+ }
+ }
+ kfree(b);
+ }
+ }
+ kfree(head);
+ MOD_DEC_USE_COUNT;
}
-static int route_delete(struct tcf_proto *tp, unsigned long arg)
+static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- return -EINVAL;
+ struct route4_head *head = (struct route4_head*)tp->root;
+ struct route4_filter **fp, *f = (struct route4_filter*)arg;
+ unsigned h = f->handle;
+ struct route4_bucket *b;
+ int i;
+
+ if (!head || !f)
+ return -EINVAL;
+
+ b = f->bkt;
+
+ for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+ if (*fp == f) {
+ unsigned long cl;
+
+ *fp = f->next;
+ synchronize_bh();
+
+ route4_reset_fastmap(head, f->id);
+
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+
+#ifdef CONFIG_NET_CLS_POLICE
+ tcf_police_release(f->police);
+#endif
+ kfree(f);
+
+ /* Strip tree */
+
+ for (i=0; i<=32; i++)
+ if (b->ht[i])
+ return 0;
+
+ /* OK, session has no flows */
+ head->table[to_hash(h)] = NULL;
+ synchronize_bh();
+
+ kfree(b);
+ return 0;
+ }
+ }
+ return 0;
}
-static int route_change(struct tcf_proto *tp, u32 handle,
- struct rtattr **tca,
- unsigned long *arg)
+static int route4_change(struct tcf_proto *tp, unsigned long base,
+ u32 handle,
+ struct rtattr **tca,
+ unsigned long *arg)
{
- return handle ? -EINVAL : 0;
+ struct route4_head *head = tp->root;
+ struct route4_filter *f, *f1, **ins_f;
+ struct route4_bucket *b;
+ struct rtattr *opt = tca[TCA_OPTIONS-1];
+ struct rtattr *tb[TCA_ROUTE4_MAX];
+ unsigned h1, h2;
+ int err;
+
+ if (opt == NULL)
+ return handle ? -EINVAL : 0;
+
+ if (rtattr_parse(tb, TCA_ROUTE4_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
+ return -EINVAL;
+
+ if ((f = (struct route4_filter*)*arg) != NULL) {
+ /* Node exists: adjust only classid */
+
+ if (f->handle != handle && handle)
+ return -EINVAL;
+ if (tb[TCA_ROUTE4_CLASSID-1]) {
+ unsigned long cl;
+
+ f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]);
+ cl = cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
+ if (cl)
+ tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ }
+#ifdef CONFIG_NET_CLS_POLICE
+ if (tb[TCA_ROUTE4_POLICE-1]) {
+ struct tcf_police *police = tcf_police_locate(tb[TCA_ROUTE4_POLICE-1], tca[TCA_RATE-1]);
+
+ police = xchg(&f->police, police);
+ synchronize_bh();
+
+ tcf_police_release(police);
+ }
+#endif
+ return 0;
+ }
+
+ /* Now more serious part... */
+
+ if (head == NULL) {
+ head = kmalloc(sizeof(struct route4_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+ memset(head, 0, sizeof(struct route4_head));
+
+ tp->root = head;
+ synchronize_bh();
+ }
+
+ f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL);
+ if (f == NULL)
+ return -ENOBUFS;
+
+ memset(f, 0, sizeof(*f));
+
+ err = -EINVAL;
+ f->handle = 0x8000;
+ if (tb[TCA_ROUTE4_TO-1]) {
+ if (handle&0x8000)
+ goto errout;
+ if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < 4)
+ goto errout;
+ f->id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
+ if (f->id > 0xFF)
+ goto errout;
+ f->handle = f->id;
+ }
+ if (tb[TCA_ROUTE4_FROM-1]) {
+ u32 sid;
+ if (tb[TCA_ROUTE4_IIF-1])
+ goto errout;
+ if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < 4)
+ goto errout;
+ sid = (*(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]));
+ if (sid > 0xFF)
+ goto errout;
+ f->handle |= sid<<16;
+ f->id |= sid<<16;
+ } else if (tb[TCA_ROUTE4_IIF-1]) {
+ if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < 4)
+ goto errout;
+ f->iif = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
+ if (f->iif > 0x7FFF)
+ goto errout;
+ f->handle |= (f->iif|0x8000)<<16;
+ } else
+ f->handle |= 0xFFFF<<16;
+
+ if (handle) {
+ f->handle |= handle&0x7F00;
+ if (f->handle != handle)
+ goto errout;
+ }
+
+ if (tb[TCA_ROUTE4_CLASSID-1]) {
+ if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < 4)
+ goto errout;
+ f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]);
+ }
+
+ h1 = to_hash(f->handle);
+ if ((b = head->table[h1]) == NULL) {
+ err = -ENOBUFS;
+ b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+ if (b == NULL)
+ goto errout;
+ memset(b, 0, sizeof(*b));
+
+ head->table[h1] = b;
+ synchronize_bh();
+ }
+ f->bkt = b;
+
+ err = -EEXIST;
+ h2 = from_hash(f->handle>>16);
+ for (ins_f = &b->ht[h2]; (f1=*ins_f) != NULL; ins_f = &f1->next) {
+ if (f->handle < f1->handle)
+ break;
+ if (f1->handle == f->handle)
+ goto errout;
+ }
+
+ cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
+#ifdef CONFIG_NET_CLS_POLICE
+ if (tb[TCA_ROUTE4_POLICE-1])
+ f->police = tcf_police_locate(tb[TCA_ROUTE4_POLICE-1], tca[TCA_RATE-1]);
+#endif
+
+ f->next = f1;
+ wmb();
+ *ins_f = f;
+
+ route4_reset_fastmap(head, f->id);
+ *arg = (unsigned long)f;
+ return 0;
+
+errout:
+ if (f)
+ kfree(f);
+ return err;
+}
+
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct route4_head *head = tp->root;
+ unsigned h, h1;
+
+ if (head == NULL)
+ arg->stop = 1;
+
+ if (arg->stop)
+ return;
+
+ for (h = 0; h <= 256; h++) {
+ struct route4_bucket *b = head->table[h];
+
+ if (b) {
+ for (h1 = 0; h1 <= 32; h1++) {
+ struct route4_filter *f;
+
+ for (f = b->ht[h1]; f; f = f->next) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+ }
+ }
+ }
}
-struct tcf_proto_ops cls_route_ops = {
+#ifdef CONFIG_RTNETLINK
+static int route4_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct route4_filter *f = (struct route4_filter*)fh;
+ unsigned char *b = skb->tail;
+ struct rtattr *rta;
+ u32 id;
+
+ if (f == NULL)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ rta = (struct rtattr*)b;
+ RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+ if (!(f->handle&0x8000)) {
+ id = f->id&0xFF;
+ RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
+ }
+ if (f->handle&0x80000000) {
+ if ((f->handle>>16) != 0xFFFF)
+ RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
+ } else {
+ id = f->id>>16;
+ RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
+ }
+ if (f->res.classid)
+ RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police) {
+ struct rtattr * p_rta = (struct rtattr*)skb->tail;
+
+ RTA_PUT(skb, TCA_ROUTE4_POLICE, 0, NULL);
+
+ if (tcf_police_dump(skb, f->police) < 0)
+ goto rtattr_failure;
+
+ p_rta->rta_len = skb->tail - (u8*)p_rta;
+ }
+#endif
+
+ rta->rta_len = skb->tail - b;
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police) {
+ RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats);
+ }
+#endif
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+#endif
+
+struct tcf_proto_ops cls_route4_ops = {
NULL,
"route",
- route_classify,
- route_init,
- route_destroy,
-
- route_get,
- route_put,
- route_change,
- route_delete,
- NULL,
+ route4_classify,
+ route4_init,
+ route4_destroy,
+
+ route4_get,
+ route4_put,
+ route4_change,
+ route4_delete,
+ route4_walk,
+#ifdef CONFIG_RTNETLINK
+ route4_dump
+#else
+ NULL
+#endif
};
+
+#ifdef MODULE
+int init_module(void)
+{
+ return register_tcf_proto_ops(&cls_route4_ops);
+}
+
+void cleanup_module(void)
+{
+ unregister_tcf_proto_ops(&cls_route4_ops);
+}
+#endif
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4168f541f..48142c6e7 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -120,6 +120,18 @@ static __inline__ unsigned hash_src(u32 *src)
return h & 0xF;
}
+#ifdef CONFIG_NET_CLS_POLICE
+#define RSVP_POLICE() \
+if (f->police) { \
+ int pol_res = tcf_police(skb, f->police); \
+ if (pol_res < 0) continue; \
+ if (pol_res) return pol_res; \
+}
+#else
+#define RSVP_POLICE()
+#endif
+
+
static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -137,7 +149,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct iphdr *nhptr = skb->nh.iph;
#endif
-#ifndef __i386__
+#if !defined( __i386__) && !defined(__mc68000__)
if ((unsigned long)nhptr & 3)
return -1;
#endif
@@ -181,25 +193,26 @@ restart:
&& src[2] == f->src[2]
#endif
) {
+ *res = f->res;
+
+ RSVP_POLICE();
+
matched:
- if (f->tunnelhdr == 0) {
- *res = f->res;
-#ifdef CONFIG_NET_CLS_POLICE
- if (f->police)
- return tcf_police(skb, f->police);
-#endif
+ if (f->tunnelhdr == 0)
return 0;
- } else {
- tunnelid = f->res.classid;
- nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
+
+ tunnelid = f->res.classid;
+ nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+ goto restart;
}
}
/* And wildcard bucket... */
- if ((f = s->ht[16]) != NULL)
+ for (f = s->ht[16]; f; f = f->next) {
+ *res = f->res;
+ RSVP_POLICE();
goto matched;
+ }
return -1;
}
}
@@ -260,7 +273,6 @@ static void rsvp_destroy(struct tcf_proto *tp)
struct rsvp_session *s;
while ((s = sht[h1]) != NULL) {
-
sht[h1] = s->next;
for (h2=0; h2<=16; h2++) {
@@ -270,7 +282,7 @@ static void rsvp_destroy(struct tcf_proto *tp)
unsigned long cl;
s->ht[h2] = f->next;
- if ((cl = xchg(&f->res.class, 0)) != 0)
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
@@ -297,8 +309,11 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
if (*fp == f) {
unsigned long cl;
+
*fp = f->next;
- if ((cl = xchg(&f->res.class, 0)) != 0)
+ synchronize_bh();
+
+ if ((cl = cls_set_class(&f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
@@ -318,11 +333,13 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
*sp; sp = &(*sp)->next) {
if (*sp == s) {
*sp = s->next;
+ synchronize_bh();
+
kfree(s);
return 0;
}
}
-
+
return 0;
}
}
@@ -399,7 +416,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
return 0;
}
-static int rsvp_change(struct tcf_proto *tp, u32 handle,
+static int rsvp_change(struct tcf_proto *tp, unsigned long base,
+ u32 handle,
struct rtattr **tca,
unsigned long *arg)
{
@@ -425,17 +443,21 @@ static int rsvp_change(struct tcf_proto *tp, u32 handle,
if (f->handle != handle && handle)
return -EINVAL;
if (tb[TCA_RSVP_CLASSID-1]) {
- unsigned long cl = xchg(&f->res.class, 0);
+ unsigned long cl;
+
+ f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+ cl = cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
if (cl)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
- f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
- f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid);
}
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_RSVP_POLICE-1]) {
- struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]);
+ struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
- tcf_police_release(xchg(&f->police, police));
+ police = xchg(&f->police, police);
+ synchronize_bh();
+
+ tcf_police_release(police);
}
#endif
return 0;
@@ -514,17 +536,19 @@ insert:
f->sess = s;
if (f->tunnelhdr == 0)
- f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid);
+ cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_RSVP_POLICE-1])
- f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]);
+ f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
#endif
for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
break;
f->next = *fp;
+ wmb();
*fp = f;
+
*arg = (unsigned long)f;
return 0;
}
@@ -546,7 +570,9 @@ insert:
break;
}
s->next = *sp;
+ wmb();
*sp = s;
+
goto insert;
errout:
@@ -631,6 +657,11 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
#endif
rta->rta_len = skb->tail - b;
+#ifdef CONFIG_NET_CLS_POLICE
+ if (f->police) {
+ RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats);
+ }
+#endif
return skb->len;
rtattr_failure:
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cb52e9d07..98d4e1f7b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -114,7 +114,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
int sel = 0;
int i;
-#ifndef __i386__
+#if !defined(__i386__) && !defined(__mc68000__)
if ((unsigned long)ptr & 3)
return -1;
#endif
@@ -137,10 +137,13 @@ check_terminal:
if (n->sel.flags&TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_POLICE
- if (n->police)
- return tcf_police(skb, n->police);
+ if (n->police) {
+ int pol_res = tcf_police(skb, n->police);
+ if (pol_res >= 0)
+ return pol_res;
+ } else
#endif
- return 0;
+ return 0;
}
n = n->next;
goto next_knode;
@@ -304,7 +307,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
{
unsigned long cl;
- if ((cl = xchg(&n->res.class, 0)) != 0)
+ if ((cl = cls_set_class(&n->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(n->police);
@@ -324,6 +327,8 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
if (*kp == key) {
*kp = key->next;
+ synchronize_bh();
+
u32_destroy_key(tp, key);
return 0;
}
@@ -341,6 +346,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
for (h=0; h<=ht->divisor; h++) {
while ((n = ht->ht[h]) != NULL) {
ht->ht[h] = n->next;
+ synchronize_bh();
+
u32_destroy_key(tp, n);
}
}
@@ -402,6 +409,7 @@ static void u32_destroy(struct tcf_proto *tp)
kfree(tp_c);
}
+ MOD_DEC_USE_COUNT;
tp->data = NULL;
}
@@ -437,8 +445,10 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
return handle|(i>0xFFF ? 0xFFF : i);
}
-static int u32_set_parms(struct Qdisc *q, struct tc_u_hnode *ht,
- struct tc_u_knode *n, struct rtattr **tb)
+static int u32_set_parms(struct Qdisc *q, unsigned long base,
+ struct tc_u_hnode *ht,
+ struct tc_u_knode *n, struct rtattr **tb,
+ struct rtattr *est)
{
if (tb[TCA_U32_LINK-1]) {
u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
@@ -456,28 +466,33 @@ static int u32_set_parms(struct Qdisc *q, struct tc_u_hnode *ht,
}
ht_down = xchg(&n->ht_down, ht_down);
+ synchronize_bh();
if (ht_down)
ht_down->refcnt--;
}
if (tb[TCA_U32_CLASSID-1]) {
- unsigned long cl = xchg(&n->res.class, 0);
+ unsigned long cl;
+
+ n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
+ cl = cls_set_class(&n->res.class, q->ops->cl_ops->bind_tcf(q, base, n->res.classid));
if (cl)
q->ops->cl_ops->unbind_tcf(q, cl);
- n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
- n->res.class = q->ops->cl_ops->bind_tcf(q, n->res.classid);
}
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_U32_POLICE-1]) {
- struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1]);
+ struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1], est);
+
+ police = xchg(&n->police, police);
+ synchronize_bh();
- tcf_police_release(xchg(&n->police, police));
+ tcf_police_release(police);
}
#endif
return 0;
}
-static int u32_change(struct tcf_proto *tp, u32 handle,
+static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
struct rtattr **tca,
unsigned long *arg)
{
@@ -500,7 +515,7 @@ static int u32_change(struct tcf_proto *tp, u32 handle,
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(tp->q, n->ht_up, n, tb);
+ return u32_set_parms(tp->q, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
}
if (tb[TCA_U32_DIVISOR-1]) {
@@ -531,7 +546,7 @@ static int u32_change(struct tcf_proto *tp, u32 handle,
if (tb[TCA_U32_HASH-1]) {
htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
- if (TC_U32_HTID(handle) == TC_U32_ROOT) {
+ if (TC_U32_HTID(htid) == TC_U32_ROOT) {
ht = tp->root;
htid = ht->handle;
} else {
@@ -550,8 +565,6 @@ static int u32_change(struct tcf_proto *tp, u32 handle,
if (handle) {
if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
return -EINVAL;
- if (TC_U32_HASH(handle) && TC_U32_HASH(handle^htid))
- return -EINVAL;
handle = htid | TC_U32_NODE(handle);
} else
handle = gen_new_kid(ht, htid);
@@ -568,14 +581,17 @@ static int u32_change(struct tcf_proto *tp, u32 handle,
memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
n->ht_up = ht;
n->handle = handle;
- err = u32_set_parms(tp->q, ht, n, tb);
+ err = u32_set_parms(tp->q, base, ht, n, tb, tca[TCA_RATE-1]);
if (err == 0) {
struct tc_u_knode **ins;
for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
- if (TC_U32_NODE(handle) >= TC_U32_NODE((*ins)->handle))
+ if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
break;
+
n->next = *ins;
+ wmb();
*ins = n;
+
*arg = (unsigned long)n;
return 0;
}
@@ -664,6 +680,11 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
}
rta->rta_len = skb->tail - b;
+#ifdef CONFIG_NET_CLS_POLICE
+ if (TC_U32_KEY(n->handle) && n->police) {
+ RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &n->police->stats);
+ }
+#endif
return skb->len;
rtattr_failure:
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
index 463879606..d51017c84 100644
--- a/net/sched/estimator.c
+++ b/net/sched/estimator.c
@@ -171,8 +171,10 @@ void qdisc_kill_estimator(struct tc_stats *stats)
pest = &est->next;
continue;
}
- /* ATOMIC_SET */
+
*pest = est->next;
+ synchronize_bh();
+
kfree(est);
killed++;
}
diff --git a/net/sched/police.c b/net/sched/police.c
index 13599ac49..89e58d8be 100644
--- a/net/sched/police.c
+++ b/net/sched/police.c
@@ -74,6 +74,9 @@ void tcf_police_destroy(struct tcf_police *p)
for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
if (*p1p == p) {
*p1p = p->next;
+#ifdef CONFIG_NET_ESTIMATOR
+ qdisc_kill_estimator(&p->stats);
+#endif
if (p->R_tab)
qdisc_put_rtab(p->R_tab);
if (p->P_tab)
@@ -85,7 +88,7 @@ void tcf_police_destroy(struct tcf_police *p)
BUG_TRAP(0);
}
-struct tcf_police * tcf_police_locate(struct rtattr *rta)
+struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
{
unsigned h;
struct tcf_police *p;
@@ -111,20 +114,35 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta)
memset(p, 0, sizeof(*p));
p->refcnt = 1;
- if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL)
- goto failure;
- if (parm->peakrate.rate &&
- (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL)
- goto failure;
+ if (parm->rate.rate) {
+ if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL)
+ goto failure;
+ if (parm->peakrate.rate &&
+ (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL)
+ goto failure;
+ }
+ if (tb[TCA_POLICE_RESULT-1])
+ p->result = *(int*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tb[TCA_POLICE_AVRATE-1])
+ p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+#endif
p->toks = p->burst = parm->burst;
p->mtu = parm->mtu;
- if (p->mtu == 0)
- p->mtu = 255<<p->R_tab->rate.cell_log;
+ if (p->mtu == 0) {
+ p->mtu = ~0;
+ if (p->R_tab)
+ p->mtu = 255<<p->R_tab->rate.cell_log;
+ }
if (p->P_tab)
p->ptoks = L2T_P(p, p->mtu);
PSCHED_GET_TIME(p->t_c);
p->index = parm->index ? : tcf_police_new_index();
p->action = parm->action;
+#ifdef CONFIG_NET_ESTIMATOR
+ if (est)
+ qdisc_new_estimator(&p->stats, est);
+#endif
h = tcf_police_hash(p->index);
p->next = tcf_police_ht[h];
tcf_police_ht[h] = p;
@@ -143,7 +161,20 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p)
long toks;
long ptoks = 0;
+ p->stats.bytes += skb->len;
+ p->stats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate && p->stats.bps >= p->ewma_rate) {
+ p->stats.overlimits++;
+ return p->action;
+ }
+#endif
+
if (skb->len <= p->mtu) {
+ if (p->R_tab == NULL)
+ return p->result;
+
PSCHED_GET_TIME(now);
toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst, 0);
@@ -163,10 +194,11 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p)
p->t_c = now;
p->toks = toks;
p->ptoks = ptoks;
- return TC_POLICE_OK;
+ return p->result;
}
}
+ p->stats.overlimits++;
return p->action;
}
@@ -180,12 +212,21 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
opt.action = p->action;
opt.mtu = p->mtu;
opt.burst = p->burst;
- opt.rate = p->R_tab->rate;
+ if (p->R_tab)
+ opt.rate = p->R_tab->rate;
+ else
+ memset(&opt.rate, 0, sizeof(opt.rate));
if (p->P_tab)
opt.peakrate = p->P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+ if (p->result)
+ RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate)
+ RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
return skb->len;
rtattr_failure:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f16638081..0ced70bbc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -11,6 +11,7 @@
* Fixes:
*
* Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
*/
#include <linux/config.h>
@@ -29,6 +30,7 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <linux/kmod.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -41,7 +43,7 @@
#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
#ifdef CONFIG_RTNETLINK
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new);
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
struct Qdisc *q, unsigned long cl, int event);
@@ -116,6 +118,10 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
---destroy
destroys resources allocated by init and during lifetime of qdisc.
+
+ ---change
+
+ changes qdisc parameters.
*/
/************************************************
@@ -177,22 +183,22 @@ struct Qdisc *qdisc_lookup(struct device *dev, u32 handle)
return NULL;
}
-/* We know classid. Find qdisc among all qdisc's attached to device
- (root qdisc, all its children, children of children etc.)
- */
-
-struct Qdisc *qdisc_lookup_class(struct device *dev, u32 classid)
+struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
- struct Qdisc *q;
+ unsigned long cl;
+ struct Qdisc *leaf;
+ struct Qdisc_class_ops *cops = p->ops->cl_ops;
- for (q = dev->qdisc_list; q; q = q->next) {
- if (q->classid == classid)
- return q;
- }
- return NULL;
+ if (cops == NULL)
+ return NULL;
+ cl = cops->get(p, classid);
+ if (cl == 0)
+ return NULL;
+ leaf = cops->leaf(p, cl);
+ cops->put(p, cl);
+ return leaf;
}
-
/* Find queueing discipline by name */
struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
@@ -268,6 +274,37 @@ u32 qdisc_alloc_handle(struct device *dev)
return i>0 ? autohandle : 0;
}
+/* Attach toplevel qdisc to device dev */
+
+static struct Qdisc *
+dev_graft_qdisc(struct device *dev, struct Qdisc *qdisc)
+{
+ struct Qdisc *oqdisc;
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ start_bh_atomic();
+ oqdisc = dev->qdisc_sleeping;
+
+ /* Prune old scheduler */
+ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+ qdisc_reset(oqdisc);
+
+ /* ... and graft new one */
+ if (qdisc == NULL)
+ qdisc = &noop_qdisc;
+ dev->qdisc_sleeping = qdisc;
+ dev->qdisc = &noop_qdisc;
+ end_bh_atomic();
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ return oqdisc;
+}
+
+
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
to device "dev".
@@ -280,17 +317,10 @@ int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid,
int err = 0;
if (parent == NULL) {
- BUG_TRAP(classid == TC_H_ROOT);
- if (new) {
- new->parent = NULL;
- new->classid = TC_H_ROOT;
- }
- *old = dev_set_scheduler(dev, new);
+ *old = dev_graft_qdisc(dev, new);
} else {
struct Qdisc_class_ops *cops = parent->ops->cl_ops;
- BUG_TRAP(classid != TC_H_ROOT);
-
err = -EINVAL;
if (cops) {
@@ -313,22 +343,30 @@ int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid,
*/
static struct Qdisc *
-qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle,
- u32 parentid, struct rtattr **tca, int *errp)
+qdisc_create(struct device *dev, u32 handle, struct rtattr **tca, int *errp)
{
int err;
struct rtattr *kind = tca[TCA_KIND-1];
struct Qdisc *sch = NULL;
+ struct Qdisc_ops *ops;
int size;
- int new = 0;
- if (ops == NULL) {
- ops = qdisc_lookup_ops(kind);
- err = -EINVAL;
- if (ops == NULL)
- goto err_out;
- new = 1;
+ ops = qdisc_lookup_ops(kind);
+#ifdef CONFIG_KMOD
+ if (ops==NULL && tca[TCA_KIND-1] != NULL) {
+ char module_name[4 + IFNAMSIZ + 1];
+
+ if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
+ sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
+ request_module (module_name);
+ ops = qdisc_lookup_ops(kind);
+ }
}
+#endif
+
+ err = -EINVAL;
+ if (ops == NULL)
+ goto err_out;
size = sizeof(*sch) + ops->priv_size;
@@ -340,13 +378,8 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle,
/* Grrr... Resolve race condition with module unload */
err = -EINVAL;
- if (new) {
- if (ops != qdisc_lookup_ops(kind))
- goto err_out;
- } else if (kind) {
- if (rtattr_strcmp(kind, ops->id))
- goto err_out;
- }
+ if (ops != qdisc_lookup_ops(kind))
+ goto err_out;
memset(sch, 0, size);
@@ -355,6 +388,7 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev = dev;
+ atomic_set(&sch->refcnt, 1);
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
err = -ENOMEM;
@@ -362,9 +396,8 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle,
goto err_out;
}
sch->handle = handle;
- sch->classid = parentid;
- if (ops->init && (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+ if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
sch->next = dev->qdisc_list;
dev->qdisc_list = sch;
#ifdef CONFIG_NET_ESTIMATOR
@@ -381,135 +414,241 @@ err_out:
return NULL;
}
+static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
+{
+ if (tca[TCA_OPTIONS-1]) {
+ int err;
+
+ if (sch->ops->change == NULL)
+ return -EINVAL;
+ err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
+ if (err)
+ return err;
+ }
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tca[TCA_RATE-1]) {
+ qdisc_kill_estimator(&sch->stats);
+ qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
+ }
+#endif
+ return 0;
+}
+
+struct check_loop_arg
+{
+ struct qdisc_walker w;
+ struct Qdisc *p;
+ int depth;
+};
+
+static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
+
+static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
+{
+ struct check_loop_arg arg;
+
+ if (q->ops->cl_ops == NULL)
+ return 0;
+
+ arg.w.stop = arg.w.skip = arg.w.count = 0;
+ arg.w.fn = check_loop_fn;
+ arg.depth = depth;
+ arg.p = p;
+ q->ops->cl_ops->walk(q, &arg.w);
+ return arg.w.stop ? -ELOOP : 0;
+}
+
+static int
+check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
+{
+ struct Qdisc *leaf;
+ struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ struct check_loop_arg *arg = (struct check_loop_arg *)w;
+
+ leaf = cops->leaf(q, cl);
+ if (leaf) {
+ if (leaf == arg->p || arg->depth > 7)
+ return -ELOOP;
+ return check_loop(leaf, arg->p, arg->depth + 1);
+ }
+ return 0;
+}
/*
- Create/delete/change/get qdisc.
+ * Delete/get qdisc.
*/
-static int tc_ctl_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct tcmsg *tcm = NLMSG_DATA(n);
struct rtattr **tca = arg;
struct device *dev;
u32 clid = tcm->tcm_parent;
- struct Qdisc *old_q;
struct Qdisc *q = NULL;
struct Qdisc *p = NULL;
- struct Qdisc *leaf = NULL;
- struct Qdisc_ops *qops = NULL;
int err;
- /* Find device */
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return -ENODEV;
- /* If parent is specified, it must exist
- and tcm_parent selects a class in parent which
- new qdisc will be attached to.
-
- The place may be already busy by another qdisc,
- remember this fact, if it was not auto-created discipline.
- */
if (clid) {
if (clid != TC_H_ROOT) {
- p = qdisc_lookup(dev, TC_H_MAJ(clid));
- if (p == NULL)
+ if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
return -ENOENT;
- leaf = qdisc_lookup_class(dev, clid);
+ q = qdisc_leaf(p, clid);
} else
- leaf = dev->qdisc_sleeping;
-
- if (leaf && leaf->flags&TCQ_F_DEFAULT && n->nlmsg_type == RTM_NEWQDISC)
- leaf = NULL;
+ q = dev->qdisc_sleeping;
- /*
- Also, leaf may be exactly that qdisc, which we want
- to control. Remember this to avoid one more qdisc_lookup.
- */
-
- if (leaf && leaf->handle == tcm->tcm_handle)
- q = leaf;
- }
+ if (!q)
+ return -ENOENT;
- /* Try to locate the discipline */
- if (tcm->tcm_handle && q == NULL) {
- if (TC_H_MIN(tcm->tcm_handle))
+ if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
return -EINVAL;
- q = qdisc_lookup(dev, tcm->tcm_handle);
+ } else {
+ if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+ return -ENOENT;
}
- /* If discipline already exists, check that its real parent
- matches to one selected by tcm_parent.
- */
-
- if (q) {
- if (clid && p != q->parent)
- return -EINVAL;
- BUG_TRAP(!leaf || leaf == q);
- if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+ if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+ return -EINVAL;
+
+ if (n->nlmsg_type == RTM_DELQDISC) {
+ if (!clid)
return -EINVAL;
- clid = q->classid;
- goto process_existing;
+ if (q->handle == 0)
+ return -ENOENT;
+ if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
+ return err;
+ if (q) {
+ qdisc_notify(skb, n, clid, q, NULL);
+ qdisc_destroy(q);
+ }
+ } else {
+ qdisc_notify(skb, n, clid, NULL, q);
}
+ return 0;
+}
- /* The discipline is known not to exist.
- If parent was not selected too, return error.
- */
- if (clid == 0)
- return tcm->tcm_handle ? -ENOENT : -EINVAL;
+/*
+ Create/change qdisc.
+ */
- /* Check for the case when leaf is exactly the thing,
- that you want.
- */
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+ struct tcmsg *tcm = NLMSG_DATA(n);
+ struct rtattr **tca = arg;
+ struct device *dev;
+ u32 clid = tcm->tcm_parent;
+ struct Qdisc *q = NULL;
+ struct Qdisc *p = NULL;
+ int err;
+
+ if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ return -ENODEV;
- if (leaf && tcm->tcm_handle == 0) {
- q = leaf;
- if (!tca[TCA_KIND-1] || rtattr_strcmp(tca[TCA_KIND-1], q->ops->id) == 0)
- goto process_existing;
+ if (clid) {
+ if (clid != TC_H_ROOT) {
+ if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+ return -ENOENT;
+ q = qdisc_leaf(p, clid);
+ } else {
+ q = dev->qdisc_sleeping;
+ }
+
+ /* It may be default qdisc, ignore it */
+ if (q && q->handle == 0)
+ q = NULL;
+
+ if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
+ if (tcm->tcm_handle) {
+ if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+ return -EEXIST;
+ if (TC_H_MIN(tcm->tcm_handle))
+ return -EINVAL;
+ if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+ goto create_n_graft;
+ if (n->nlmsg_flags&NLM_F_EXCL)
+ return -EEXIST;
+ if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+ return -EINVAL;
+ if (q == p ||
+ (p && check_loop(q, p, 0)))
+ return -ELOOP;
+ atomic_inc(&q->refcnt);
+ goto graft;
+ } else {
+ if (q == NULL)
+ goto create_n_graft;
+
+ /* This magic test requires explanation.
+ *
+ * We know, that some child q is already
+ * attached to this parent and have choice:
+ * either to change it or to create/graft new one.
+ *
+ * 1. We are allowed to create/graft only
+ * if CREATE and REPLACE flags are set.
+ *
+ * 2. If EXCL is set, requestor wanted to say,
+ * that qdisc tcm_handle is not expected
+ * to exist, so that we choose create/graft too.
+ *
+ * 3. The last case is when no flags are set.
+ * Alas, it is sort of hole in API, we
+ * cannot decide what to do unambiguously.
+ * For now we select create/graft, if
+ * user gave KIND, which does not match existing.
+ */
+ if ((n->nlmsg_flags&NLM_F_CREATE) &&
+ (n->nlmsg_flags&NLM_F_REPLACE) &&
+ ((n->nlmsg_flags&NLM_F_EXCL) ||
+ (tca[TCA_KIND-1] &&
+ rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
+ goto create_n_graft;
+ }
+ }
+ } else {
+ if (!tcm->tcm_handle)
+ return -EINVAL;
+ q = qdisc_lookup(dev, tcm->tcm_handle);
}
- if (n->nlmsg_type != RTM_NEWQDISC || !(n->nlmsg_flags&NLM_F_CREATE))
+ /* Change qdisc parameters */
+ if (q == NULL)
return -ENOENT;
- if (leaf && n->nlmsg_flags&NLM_F_EXCL)
+ if (n->nlmsg_flags&NLM_F_EXCL)
return -EEXIST;
+ if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+ return -EINVAL;
+ err = qdisc_change(q, tca);
+ if (err == 0)
+ qdisc_notify(skb, n, clid, NULL, q);
+ return err;
-create_and_graft:
- q = qdisc_create(dev, qops, tcm->tcm_handle, clid, tca, &err);
+create_n_graft:
+ if (!(n->nlmsg_flags&NLM_F_CREATE))
+ return -ENOENT;
+ q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
if (q == NULL)
return err;
graft:
- err = qdisc_graft(dev, p, clid, q, &old_q);
- if (err) {
- if (q)
- qdisc_destroy(q);
- return err;
+ if (1) {
+ struct Qdisc *old_q = NULL;
+ err = qdisc_graft(dev, p, clid, q, &old_q);
+ if (err) {
+ if (q)
+ qdisc_destroy(q);
+ return err;
+ }
+ qdisc_notify(skb, n, clid, old_q, q);
+ if (old_q)
+ qdisc_destroy(old_q);
}
- qdisc_notify(skb, n, old_q, q);
- if (old_q)
- qdisc_destroy(old_q);
return 0;
-
-process_existing:
-
- switch (n->nlmsg_type) {
- case RTM_NEWQDISC:
- if (n->nlmsg_flags&NLM_F_EXCL)
- return -EEXIST;
- qops = q->ops;
- goto create_and_graft;
- case RTM_GETQDISC:
- qdisc_notify(skb, n, NULL, q);
- return 0;
- case RTM_DELQDISC:
- q = NULL;
- goto graft;
- default:
- return -EINVAL;
- }
}
-static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q,
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 pid, u32 seq, unsigned flags, int event)
{
struct tcmsg *tcm;
@@ -521,9 +660,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q,
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
- tcm->tcm_parent = q->classid;
+ tcm->tcm_parent = clid;
tcm->tcm_handle = q->handle;
- tcm->tcm_info = 0;
+ tcm->tcm_info = atomic_read(&q->refcnt);
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto rtattr_failure;
@@ -539,7 +678,7 @@ rtattr_failure:
}
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
- struct Qdisc *old, struct Qdisc *new)
+ u32 clid, struct Qdisc *old, struct Qdisc *new)
{
struct sk_buff *skb;
u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -548,12 +687,12 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
if (!skb)
return -ENOBUFS;
- if (old && !(old->flags&TCQ_F_DEFAULT)) {
- if (tc_fill_qdisc(skb, old, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+ if (old && old->handle) {
+ if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
goto err_out;
}
if (new) {
- if (tc_fill_qdisc(skb, new, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
}
@@ -583,7 +722,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
q = q->next, q_idx++) {
if (q_idx < s_q_idx)
continue;
- if (tc_fill_qdisc(skb, q, NETLINK_CB(cb->skb).pid,
+ if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
}
@@ -797,11 +936,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
if (t < s_t) continue;
if (!q->ops->cl_ops) continue;
- if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle
- && (tcm->tcm_parent != TC_H_ROOT || q->parent != NULL))
+ if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)
continue;
if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
arg.w.fn = qdisc_class_dump;
arg.skb = skb;
arg.cb = cb;
@@ -846,6 +984,20 @@ static int psched_read_proc(char *buffer, char **start, off_t offset,
}
#endif
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+int psched_tod_diff(int delta_sec, int bound)
+{
+ int delta;
+
+ if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
+ return bound;
+ delta = delta_sec * 1000000;
+ if (delta > bound)
+ delta = bound;
+ return delta;
+}
+#endif
+
psched_time_t psched_time_base;
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
@@ -866,7 +1018,8 @@ static void psched_tick(unsigned long dummy)
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
psched_time_t dummy_stamp;
PSCHED_GET_TIME(dummy_stamp);
- psched_timer.expires = jiffies + 4*HZ;
+ /* It is OK up to 4GHz cpu */
+ psched_timer.expires = jiffies + 1*HZ;
#else
unsigned long now = jiffies;
psched_time_base = ((u64)now)<<PSCHED_JSCALE;
@@ -891,7 +1044,6 @@ __initfunc(int psched_calibrate_clock(void))
return -1;
#endif
- start_bh_atomic();
#ifdef PSCHED_WATCHER
psched_tick(0);
#endif
@@ -902,7 +1054,6 @@ __initfunc(int psched_calibrate_clock(void))
barrier();
PSCHED_GET_TIME(stamp1);
do_gettimeofday(&tv1);
- end_bh_atomic();
delay = PSCHED_TDIFF(stamp1, stamp);
rdelay = tv1.tv_usec - tv.tv_usec;
@@ -921,6 +1072,9 @@ __initfunc(int psched_calibrate_clock(void))
__initfunc(int pktsched_init(void))
{
+#ifdef CONFIG_RTNETLINK
+ struct rtnetlink_link *link_p;
+#endif
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *ent;
#endif
@@ -931,19 +1085,22 @@ __initfunc(int pktsched_init(void))
#elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
psched_tick_per_us = HZ<<PSCHED_JSCALE;
psched_us_per_tick = 1000000;
+#ifdef PSCHED_WATCHER
+ psched_tick(0);
+#endif
#endif
#ifdef CONFIG_RTNETLINK
- struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+ link_p = rtnetlink_links[PF_UNSPEC];
/* Setup rtnetlink links. It is made here to avoid
exporting large number of public symbols.
*/
if (link_p) {
- link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_ctl_qdisc;
- link_p[RTM_DELQDISC-RTM_BASE].doit = tc_ctl_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].doit = tc_ctl_qdisc;
+ link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
+ link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
+ link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
@@ -975,6 +1132,12 @@ __initfunc(int pktsched_init(void))
#ifdef CONFIG_NET_SCH_RED
INIT_QDISC(red);
#endif
+#ifdef CONFIG_NET_SCH_GRED
+ INIT_QDISC(gred);
+#endif
+#ifdef CONFIG_NET_SCH_DSMARK
+ INIT_QDISC(dsmark);
+#endif
#ifdef CONFIG_NET_SCH_SFQ
INIT_QDISC(sfq);
#endif
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9ae14c243..c8094a882 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -30,13 +30,13 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
-#include <linux/module.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
+
/* Class-Based Queueing (CBQ) algorithm.
=======================================
@@ -169,6 +169,9 @@ struct cbq_sched_data
struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
with backlog */
+#ifdef CONFIG_NET_CLS_POLICE
+ struct cbq_class *rx_class;
+#endif
struct cbq_class *tx_class;
struct cbq_class *tx_borrowed;
int tx_len;
@@ -269,17 +272,21 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
cl = defmap[TC_PRIO_BESTEFFORT];
- if (cl == NULL)
+ if (cl == NULL || cl->level >= head->level)
goto fallback;
}
- if (cl->level == 0) {
#ifdef CONFIG_NET_CLS_POLICE
- if (result)
- return cbq_reclassify(skb, cl);
+ switch (result) {
+ case TC_POLICE_RECLASSIFY:
+ return cbq_reclassify(skb, cl);
+ case TC_POLICE_SHOT:
+ return NULL;
+ default:
+ }
#endif
+ if (cl->level == 0)
return cl;
- }
/*
* Step 3+n. If classifier selected a link sharing class,
@@ -321,11 +328,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
if (cl_tail != NULL) {
cl->next_alive = cl_tail->next_alive;
cl_tail->next_alive = cl;
- cl->deficit = 0;
} else {
cl->next_alive = cl;
q->activemask |= (1<<prio);
- cl->deficit = cl->quantum;
}
}
@@ -358,31 +363,28 @@ static void cbq_deactivate_class(struct cbq_class *this)
}
cl = cl_prev->next_alive;
- cl->deficit += cl->quantum;
return;
}
} while ((cl_prev = cl) != q->active[prio]);
}
-static __inline__ void
+static void
cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
{
- if (q->toplevel > 0) {
+ int toplevel = q->toplevel;
+
+ if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
psched_time_t now;
PSCHED_GET_TIME(now);
if (PSCHED_TLESS(now, q->now))
now = q->now;
- if (PSCHED_TLESS(cl->undertime, now)) {
- q->toplevel = 0;
- return;
- }
- while ((cl = cl->borrow) != NULL
- && q->toplevel > cl->level) {
- if (PSCHED_TLESS(cl->borrow->undertime, now)) {
+
+ do {
+ if (PSCHED_TLESS(cl->undertime, now)) {
q->toplevel = cl->level;
return;
}
- }
+ } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
}
}
@@ -393,23 +395,31 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct cbq_class *cl = cbq_classify(skb, sch);
int len = skb->len;
- if (cl && cl->q->enqueue(skb, cl->q) == 1) {
- sch->q.qlen++;
- sch->stats.packets++;
- cl->stats.packets++;
- sch->stats.bytes+=len;
- cl->stats.bytes+=len;
- cbq_mark_toplevel(q, cl);
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return 1;
+#ifdef CONFIG_NET_CLS_POLICE
+ q->rx_class = cl;
+#endif
+ if (cl) {
+#ifdef CONFIG_NET_CLS_POLICE
+ cl->q->__parent = sch;
+#endif
+ if (cl->q->enqueue(skb, cl->q) == 1) {
+ sch->q.qlen++;
+ sch->stats.packets++;
+ sch->stats.bytes+=len;
+ cbq_mark_toplevel(q, cl);
+ if (!cl->next_alive)
+ cbq_activate_class(cl);
+ return 1;
+ }
}
sch->stats.drops++;
if (cl == NULL)
kfree_skb(skb);
- else
+ else {
+ cbq_mark_toplevel(q, cl);
cl->stats.drops++;
+ }
return 0;
}
@@ -426,9 +436,14 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
}
q->tx_class = NULL;
+ cbq_mark_toplevel(q, cl);
+
+#ifdef CONFIG_NET_CLS_POLICE
+ q->rx_class = cl;
+ cl->q->__parent = sch;
+#endif
if (cl->q->ops->requeue(skb, cl->q) == 1) {
sch->q.qlen++;
- cbq_mark_toplevel(q, cl);
if (!cl->next_alive)
cbq_activate_class(cl);
return 1;
@@ -445,11 +460,9 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
static void cbq_ovl_classic(struct cbq_class *cl)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
+ psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
if (!cl->delayed) {
- psched_tdiff_t delay;
-
- delay = PSCHED_TDIFF(cl->undertime, q->now);
delay += cl->offtime;
/*
@@ -463,15 +476,35 @@ static void cbq_ovl_classic(struct cbq_class *cl)
delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
if (cl->avgidle < cl->minidle)
cl->avgidle = cl->minidle;
- if (delay < 0)
- delay = 0;
+ if (delay <= 0)
+ delay = 1;
PSCHED_TADD2(q->now, delay, cl->undertime);
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
cl->xstats.overactions++;
cl->delayed = 1;
}
+ if (q->wd_expires == 0 || q->wd_expires > delay)
+ q->wd_expires = delay;
+
+ /* Dirty work! We must schedule wakeups based on
+ real available rate, rather than leaf rate,
+ which may be tiny (even zero).
+ */
+ if (q->toplevel == TC_CBQ_MAXLEVEL) {
+ struct cbq_class *b;
+ psched_tdiff_t base_delay = q->wd_expires;
+
+ for (b = cl->borrow; b; b = b->borrow) {
+ delay = PSCHED_TDIFF(b->undertime, q->now);
+ if (delay < base_delay) {
+ if (delay <= 0)
+ delay = 1;
+ base_delay = delay;
+ }
+ }
+
+ q->wd_expires = delay;
+ }
}
/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
@@ -481,15 +514,18 @@ static void cbq_ovl_classic(struct cbq_class *cl)
static void cbq_ovl_rclassic(struct cbq_class *cl)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
+ struct cbq_class *this = cl;
- while (cl && cl->delayed) {
- cl = cl->borrow;
- if (cl->level > q->toplevel)
- return;
- }
+ do {
+ if (cl->level > q->toplevel) {
+ cl = NULL;
+ break;
+ }
+ } while ((cl = cl->borrow) != NULL);
- if (cl)
- cbq_ovl_classic(cl);
+ if (cl == NULL)
+ cl = this;
+ cbq_ovl_classic(cl);
}
/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
@@ -497,12 +533,11 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
static void cbq_ovl_delay(struct cbq_class *cl)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
+ psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
if (!cl->delayed) {
- psched_tdiff_t delay;
unsigned long sched = jiffies;
- delay = PSCHED_TDIFF(cl->undertime, q->now);
delay += cl->offtime;
if (cl->avgidle < 0)
delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -521,8 +556,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
add_timer(&q->delay_timer);
cl->delayed = 1;
cl->xstats.overactions++;
+ return;
}
+ delay = 1;
}
+ if (q->wd_expires == 0 || q->wd_expires > delay)
+ q->wd_expires = delay;
}
/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
@@ -555,6 +594,7 @@ static void cbq_ovl_drop(struct cbq_class *cl)
static void cbq_watchdog(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc*)arg;
+ sch->flags &= ~TCQ_F_THROTTLED;
qdisc_wakeup(sch->dev);
}
@@ -622,6 +662,7 @@ static void cbq_undelay(unsigned long arg)
add_timer(&q->delay_timer);
}
+ sch->flags &= ~TCQ_F_THROTTLED;
qdisc_wakeup(sch->dev);
}
@@ -631,18 +672,23 @@ static void cbq_undelay(unsigned long arg)
static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
{
int len = skb->len;
- struct Qdisc *sch = child->parent;
+ struct Qdisc *sch = child->__parent;
struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
- struct cbq_class *cl = cbq_class_lookup(q, child->classid);
+ struct cbq_class *cl = q->rx_class;
+
+ q->rx_class = NULL;
if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+
+ cbq_mark_toplevel(q, cl);
+
+ q->rx_class = cl;
+ cl->q->__parent = sch;
+
if (cl->q->enqueue(skb, cl->q) == 1) {
sch->q.qlen++;
sch->stats.packets++;
- cl->stats.packets++;
sch->stats.bytes+=len;
- cl->stats.bytes+=len;
- cbq_mark_toplevel(q, cl);
if (!cl->next_alive)
cbq_activate_class(cl);
return 0;
@@ -656,21 +702,42 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
}
#endif
+/*
+ It is mission critical procedure.
+
+ We "regenerate" toplevel cutoff, if transmitting class
+ has backlog and it is not regulated. It is not part of
+ original CBQ description, but looks more reasonable.
+ Probably, it is wrong. This question needs further investigation.
+*/
+
static __inline__ void
-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
+cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
+ struct cbq_class *borrowed)
{
- if (cl && q->toplevel >= cl->level) {
- if (cl->q->q.qlen <= 1 || PSCHED_TLESS(q->now, cl->undertime))
- q->toplevel = TC_CBQ_MAXLEVEL;
- else /* BUGGGG? if (cl != this) */
- q->toplevel = cl->level;
+ if (cl && q->toplevel >= borrowed->level) {
+ if (cl->q->q.qlen > 1) {
+ do {
+ if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+ q->toplevel = borrowed->level;
+ return;
+ }
+ } while ((borrowed=borrowed->borrow) != NULL);
+ }
+#if 0
+ /* It is not necessary now. Uncommenting it
+ will save CPU cycles, but decrease fairness.
+ */
+ q->toplevel = TC_CBQ_MAXLEVEL;
+#endif
}
}
-static __inline__ void
+static void
cbq_update(struct cbq_sched_data *q)
{
- struct cbq_class *cl = q->tx_class;
+ struct cbq_class *this = q->tx_class;
+ struct cbq_class *cl = this;
int len = q->tx_len;
q->tx_class = NULL;
@@ -679,6 +746,9 @@ cbq_update(struct cbq_sched_data *q)
long avgidle = cl->avgidle;
long idle;
+ cl->stats.packets++;
+ cl->stats.bytes += len;
+
/*
(now - last) is total time between packet right edges.
(last_pktlen/rate) is "virtual" busy time, so that
@@ -697,6 +767,10 @@ cbq_update(struct cbq_sched_data *q)
if (avgidle <= 0) {
/* Overlimit or at-limit */
+
+ if (avgidle < cl->minidle)
+ avgidle = cl->minidle;
+
cl->avgidle = avgidle;
/* Calculate expected time, when this class
@@ -732,12 +806,11 @@ cbq_update(struct cbq_sched_data *q)
cl->avgidle = cl->maxidle;
else
cl->avgidle = avgidle;
-
}
cl->last = q->now;
}
- cbq_update_toplevel(q, q->tx_borrowed);
+ cbq_update_toplevel(q, this, q->tx_borrowed);
}
static __inline__ struct cbq_class *
@@ -750,21 +823,33 @@ cbq_under_limit(struct cbq_class *cl)
return cl;
if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
- PSCHED_TLESS(cl->undertime, q->now)) {
+ !PSCHED_TLESS(q->now, cl->undertime)) {
cl->delayed = 0;
return cl;
}
- while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
- PSCHED_TLESS(q->now, cl->undertime)) {
- if ((cl = cl->borrow) == NULL || cl->level > q->toplevel) {
+ do {
+ /* It is very suspicious place. Now overlimit
+ action is generated for not bounded classes
+ only if link is completely congested.
+ Though it is in agree with ancestor-only paradigm,
+ it looks very stupid. Particularly,
+ it means that this chunk of code will either
+ never be called or result in strong amplification
+ of burstiness. Dangerous, silly, and, however,
+ no another solution exists.
+ */
+ if ((cl = cl->borrow) == NULL) {
this_cl->stats.overlimits++;
this_cl->overlimit(this_cl);
return NULL;
}
- }
- this_cl->xstats.borrows++;
- cl->xstats.borrows++;
+ if (cl->level > q->toplevel)
+ return NULL;
+ } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
+ PSCHED_TLESS(q->now, cl->undertime));
+
+ cl->delayed = 0;
return cl;
}
@@ -784,27 +869,26 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
/* Start round */
do {
- struct cbq_class *borrow;
+ struct cbq_class *borrow = NULL;
- /* Class is empty */
- if (cl->q->q.qlen == 0)
- goto skip_class;
-
- if ((borrow = cbq_under_limit(cl)) == NULL)
+ if (cl->q->q.qlen &&
+ (borrow = cbq_under_limit(cl)) == NULL)
goto skip_class;
if (cl->deficit <= 0) {
- /* Class exhausted its allotment per this
- round.
+ /* Class exhausted its allotment per
+ this round. Switch to the next one.
*/
deficit = 1;
+ cl->deficit += cl->quantum;
goto next_class;
}
skb = cl->q->dequeue(cl->q);
/* Class did not give us any skb :-(
- It could occur if cl->q == "tbf"
+ It could occur even if cl->q->q.qlen != 0
+ f.e. if cl->q == "tbf"
*/
if (skb == NULL)
goto skip_class;
@@ -812,6 +896,15 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
cl->deficit -= skb->len;
q->tx_class = cl;
q->tx_borrowed = borrow;
+ if (borrow != cl) {
+#ifndef CBQ_XSTATS_BORROWS_BYTES
+ borrow->xstats.borrows++;
+ cl->xstats.borrows++;
+#else
+ borrow->xstats.borrows += skb->len;
+ cl->xstats.borrows += skb->len;
+#endif
+ }
q->tx_len = skb->len;
if (cl->deficit <= 0) {
@@ -822,8 +915,6 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
return skb;
skip_class:
- cl->deficit = 0;
-
if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
/* Class is empty or penalized.
Unlink it from active chain.
@@ -857,7 +948,6 @@ skip_class:
next_class:
cl_prev = cl;
cl = cl->next_alive;
- cl->deficit += cl->quantum;
} while (cl_prev != cl_tail);
} while (deficit);
@@ -914,6 +1004,7 @@ cbq_dequeue(struct Qdisc *sch)
skb = cbq_dequeue_1(sch);
if (skb) {
sch->q.qlen--;
+ sch->flags &= ~TCQ_F_THROTTLED;
return skb;
}
@@ -955,6 +1046,7 @@ cbq_dequeue(struct Qdisc *sch)
delay = 1;
q->wd_timer.expires = jiffies + delay;
add_timer(&q->wd_timer);
+ sch->flags |= TCQ_F_THROTTLED;
}
}
return NULL;
@@ -1129,14 +1221,18 @@ static void cbq_link_class(struct cbq_class *this)
static int cbq_drop(struct Qdisc* sch)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
- struct cbq_class *cl;
- int h;
+ struct cbq_class *cl, *cl_head;
+ int prio;
- for (h = TC_CBQ_MAXPRIO; h >= 0; h++) {
- for (cl = q->classes[h]; cl; cl = cl->next) {
+ for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio++) {
+ if ((cl_head = q->active[prio]) == NULL)
+ continue;
+
+ cl = cl_head;
+ do {
if (cl->q->ops->drop && cl->q->ops->drop(cl->q))
return 1;
- }
+ } while ((cl = cl->next_alive) != cl_head);
}
return 0;
}
@@ -1166,8 +1262,8 @@ cbq_reset(struct Qdisc* sch)
cl->next_alive = NULL;
PSCHED_SET_PASTPERFECT(cl->undertime);
- cl->avgidle = 0;
- cl->deficit = 0;
+ cl->avgidle = cl->maxidle;
+ cl->deficit = cl->quantum;
cl->cpriority = cl->priority;
}
}
@@ -1187,8 +1283,10 @@ static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
cl->avpkt = lss->avpkt;
if (lss->change&TCF_CBQ_LSS_MINIDLE)
cl->minidle = -(long)lss->minidle;
- if (lss->change&TCF_CBQ_LSS_MAXIDLE)
+ if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
cl->maxidle = lss->maxidle;
+ cl->avgidle = lss->maxidle;
+ }
if (lss->change&TCF_CBQ_LSS_OFFTIME)
cl->offtime = lss->offtime;
return 0;
@@ -1261,7 +1359,7 @@ static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
{
cl->police = p->police;
- if (!(cl->q->flags&TCQ_F_DEFAULT)) {
+ if (cl->q->handle) {
if (p->police == TC_POLICE_RECLASSIFY)
cl->q->reshape_fail = cbq_reshape_fail;
else
@@ -1300,6 +1398,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
return -EINVAL;
}
+ q->link.refcnt = 1;
q->link.sibling = &q->link;
q->link.classid = sch->handle;
q->link.qdisc = sch;
@@ -1493,6 +1592,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
else
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->classid;
+ tcm->tcm_info = cl->q->handle;
rta = (struct rtattr*)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
@@ -1533,12 +1633,20 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
}
if ((*old = xchg(&cl->q, new)) != NULL)
qdisc_reset(*old);
-
+
return 0;
}
return -ENOENT;
}
+static struct Qdisc *
+cbq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct cbq_class *cl = (struct cbq_class*)arg;
+
+ return cl ? cl->q : NULL;
+}
+
static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
@@ -1569,6 +1677,7 @@ static void cbq_destroy_class(struct cbq_class *cl)
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&cl->stats);
#endif
+ kfree(cl);
}
static void
@@ -1578,6 +1687,9 @@ cbq_destroy(struct Qdisc* sch)
struct cbq_class *cl;
unsigned h;
+#ifdef CONFIG_NET_CLS_POLICE
+ q->rx_class = NULL;
+#endif
for (h = 0; h < 16; h++) {
for (cl = q->classes[h]; cl; cl = cl->next)
cbq_destroy_filters(cl);
@@ -1590,20 +1702,29 @@ cbq_destroy(struct Qdisc* sch)
}
qdisc_put_rtab(q->link.R_tab);
+ MOD_DEC_USE_COUNT;
}
-static void cbq_put(struct Qdisc *q, unsigned long arg)
+static void cbq_put(struct Qdisc *sch, unsigned long arg)
{
+ struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
struct cbq_class *cl = (struct cbq_class*)arg;
- if (--cl->refcnt == 0)
+ start_bh_atomic();
+ if (--cl->refcnt == 0) {
+#ifdef CONFIG_NET_CLS_POLICE
+ if (q->rx_class == cl)
+ q->rx_class = NULL;
+#endif
cbq_destroy_class(cl);
+ }
+ end_bh_atomic();
return;
}
static int
-cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
- unsigned long *arg)
+cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
+ unsigned long *arg)
{
int err;
struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
@@ -1763,6 +1884,7 @@ cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
cl->borrow = cl->tparent;
if (cl->tparent != &q->link)
cl->share = cl->tparent;
+ cbq_adjust_levels(parent);
cl->minidle = -0x7FFFFFFF;
cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
@@ -1781,7 +1903,6 @@ cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
#endif
if (tb[TCA_CBQ_FOPT-1])
cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
- cbq_adjust_levels(parent);
end_bh_atomic();
#ifdef CONFIG_NET_ESTIMATOR
@@ -1810,10 +1931,16 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
if (cl->next_alive)
cbq_deactivate_class(cl);
- if (q->tx_class == cl)
- q->tx_class = cl->borrow;
if (q->tx_borrowed == cl)
q->tx_borrowed = q->tx_class;
+ if (q->tx_class == cl) {
+ q->tx_class = NULL;
+ q->tx_borrowed = NULL;
+ }
+#ifdef CONFIG_NET_CLS_POLICE
+ if (q->rx_class == cl)
+ q->rx_class = NULL;
+#endif
cbq_unlink_class(cl);
cbq_adjust_levels(cl->tparent);
@@ -1841,12 +1968,16 @@ static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
return &cl->filter_list;
}
-static unsigned long cbq_bind_filter(struct Qdisc *sch, u32 classid)
+static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
{
struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
+ struct cbq_class *p = (struct cbq_class*)parent;
struct cbq_class *cl = cbq_class_lookup(q, classid);
if (cl) {
+ if (p && p->level <= cl->level)
+ return 0;
cl->filters++;
return (unsigned long)cl;
}
@@ -1878,7 +2009,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
arg->stop = 1;
- break;
+ return;
}
arg->count++;
}
@@ -1888,9 +2019,10 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static struct Qdisc_class_ops cbq_class_ops =
{
cbq_graft,
+ cbq_leaf,
cbq_get,
cbq_put,
- cbq_change,
+ cbq_change_class,
cbq_delete,
cbq_walk,
@@ -1918,6 +2050,7 @@ struct Qdisc_ops cbq_qdisc_ops =
cbq_init,
cbq_reset,
cbq_destroy,
+ NULL /* cbq_change */,
#ifdef CONFIG_RTNETLINK
cbq_dump,
diff --git a/net/sched/sch_csz.c b/net/sched/sch_csz.c
index 9bdc656c9..2202fd81a 100644
--- a/net/sched/sch_csz.c
+++ b/net/sched/sch_csz.c
@@ -826,6 +826,12 @@ static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return -EINVAL;
}
+static struct Qdisc * csz_leaf(struct Qdisc *sch, unsigned long cl)
+{
+ return NULL;
+}
+
+
static unsigned long csz_get(struct Qdisc *sch, u32 classid)
{
struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
@@ -840,6 +846,12 @@ static unsigned long csz_get(struct Qdisc *sch, u32 classid)
return band+1;
}
+static unsigned long csz_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+ return csz_get(sch, classid);
+}
+
+
static void csz_put(struct Qdisc *sch, unsigned long cl)
{
return;
@@ -1006,6 +1018,8 @@ static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl)
struct Qdisc_class_ops csz_class_ops =
{
csz_graft,
+ csz_leaf,
+
csz_get,
csz_put,
csz_change,
@@ -1013,7 +1027,7 @@ struct Qdisc_class_ops csz_class_ops =
csz_walk,
csz_find_tcf,
- csz_get,
+ csz_bind,
csz_put,
#ifdef CONFIG_RTNETLINK
@@ -1036,6 +1050,7 @@ struct Qdisc_ops csz_qdisc_ops =
csz_init,
csz_reset,
csz_destroy,
+ NULL /* csz_change */,
#ifdef CONFIG_RTNETLINK
csz_dump,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 14bc8bb8b..c93f206a2 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -97,10 +97,7 @@ fifo_drop(struct Qdisc* sch)
static void
fifo_reset(struct Qdisc* sch)
{
- struct sk_buff *skb;
-
- while ((skb=__skb_dequeue(&sch->q)) != NULL)
- kfree_skb(skb);
+ skb_queue_purge(&sch->q);
sch->stats.backlog = 0;
}
@@ -137,15 +134,15 @@ pfifo_dequeue(struct Qdisc* sch)
return __skb_dequeue(&sch->q);
}
-
static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
{
struct fifo_sched_data *q = (void*)sch->data;
if (opt == NULL) {
- q->limit = sch->dev->tx_queue_len;
if (sch->ops == &bfifo_qdisc_ops)
- q->limit *= sch->dev->mtu;
+ q->limit = sch->dev->tx_queue_len*sch->dev->mtu;
+ else
+ q->limit = sch->dev->tx_queue_len;
} else {
struct tc_fifo_qopt *ctl = RTA_DATA(opt);
if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
@@ -188,6 +185,8 @@ struct Qdisc_ops pfifo_qdisc_ops =
fifo_init,
fifo_reset,
NULL,
+ fifo_init,
+
#ifdef CONFIG_RTNETLINK
fifo_dump,
#endif
@@ -208,6 +207,7 @@ struct Qdisc_ops bfifo_qdisc_ops =
fifo_init,
fifo_reset,
NULL,
+ fifo_init,
#ifdef CONFIG_RTNETLINK
fifo_dump,
#endif
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7ba2e94cc..ba40033e5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -184,7 +184,7 @@ struct Qdisc noop_qdisc =
{ NULL },
noop_enqueue,
noop_dequeue,
- TCQ_F_DEFAULT|TCQ_F_BUILTIN,
+ TCQ_F_BUILTIN,
&noop_qdisc_ops,
};
@@ -207,7 +207,7 @@ struct Qdisc noqueue_qdisc =
{ NULL },
NULL,
NULL,
- TCQ_F_DEFAULT|TCQ_F_BUILTIN,
+ TCQ_F_BUILTIN,
&noqueue_qdisc_ops,
};
@@ -322,8 +322,8 @@ struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops)
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev = dev;
- sch->flags |= TCQ_F_DEFAULT;
- if (ops->init && ops->init(sch, NULL) == 0)
+ atomic_set(&sch->refcnt, 1);
+ if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
kfree(sch);
@@ -342,6 +342,10 @@ void qdisc_reset(struct Qdisc *qdisc)
void qdisc_destroy(struct Qdisc *qdisc)
{
struct Qdisc_ops *ops = qdisc->ops;
+
+ if (!atomic_dec_and_test(&qdisc->refcnt))
+ return;
+
#ifdef CONFIG_NET_SCHED
if (qdisc->dev) {
struct Qdisc *q, **qp;
@@ -444,30 +448,3 @@ void dev_shutdown(struct device *dev)
end_bh_atomic();
}
-struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc)
-{
- struct Qdisc *oqdisc;
-
- if (dev->flags & IFF_UP)
- dev_deactivate(dev);
-
- start_bh_atomic();
- oqdisc = dev->qdisc_sleeping;
-
- /* Prune old scheduler */
- if (oqdisc)
- qdisc_reset(oqdisc);
-
- /* ... and graft new one */
- if (qdisc == NULL)
- qdisc = &noop_qdisc;
- dev->qdisc_sleeping = qdisc;
- dev->qdisc = &noop_qdisc;
- end_bh_atomic();
-
- if (dev->flags & IFF_UP)
- dev_activate(dev);
-
- return oqdisc;
-}
-
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 5b7b39fea..5222d149d 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -49,17 +49,19 @@ static __inline__ unsigned prio_classify(struct sk_buff *skb, struct Qdisc *sch)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
struct tcf_result res;
+ u32 band;
- res.classid = skb->priority;
- if (TC_H_MAJ(res.classid) != sch->handle) {
+ band = skb->priority;
+ if (TC_H_MAJ(skb->priority) != sch->handle) {
if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
- if (TC_H_MAJ(res.classid))
- res.classid = 0;
- res.classid = q->prio2band[res.classid&TC_PRIO_MAX] + 1;
+ if (TC_H_MAJ(band))
+ band = 0;
+ return q->prio2band[band&TC_PRIO_MAX];
}
+ band = res.classid;
}
-
- return res.classid - 1;
+ band = TC_H_MIN(band) - 1;
+ return band < q->bands ? band : q->prio2band[0];
}
static int
@@ -160,38 +162,74 @@ prio_destroy(struct Qdisc* sch)
MOD_DEC_USE_COUNT;
}
+static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
+ struct tc_prio_qopt *qopt = RTA_DATA(opt);
+ int i;
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+ return -EINVAL;
+ if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+ return -EINVAL;
+
+ for (i=0; i<=TC_PRIO_MAX; i++) {
+ if (qopt->priomap[i] >= qopt->bands)
+ return -EINVAL;
+ }
+
+ start_bh_atomic();
+ q->bands = qopt->bands;
+ memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
+
+ for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+ struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+ if (child != &noop_qdisc)
+ qdisc_destroy(child);
+ }
+ end_bh_atomic();
+
+ for (i=0; i<=TC_PRIO_MAX; i++) {
+ int band = q->prio2band[i];
+ if (q->queues[band] == &noop_qdisc) {
+ struct Qdisc *child;
+ child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (child) {
+ child = xchg(&q->queues[band], child);
+ synchronize_bh();
+
+ if (child != &noop_qdisc)
+ qdisc_destroy(child);
+ }
+ }
+ }
+ return 0;
+}
+
static int prio_init(struct Qdisc *sch, struct rtattr *opt)
{
static const u8 prio2band[TC_PRIO_MAX+1] =
{ 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 };
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
- unsigned mask = 0;
int i;
+ for (i=0; i<TCQ_PRIO_BANDS; i++)
+ q->queues[i] = &noop_qdisc;
+
if (opt == NULL) {
q->bands = 3;
memcpy(q->prio2band, prio2band, sizeof(prio2band));
- mask = 7;
+ for (i=0; i<3; i++) {
+ struct Qdisc *child;
+ child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (child)
+ q->queues[i] = child;
+ }
} else {
- struct tc_prio_qopt *qopt = RTA_DATA(opt);
+ int err;
- if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
- return -EINVAL;
- if (qopt->bands > TCQ_PRIO_BANDS)
- return -EINVAL;
- q->bands = qopt->bands;
- for (i=0; i<=TC_PRIO_MAX; i++) {
- if (qopt->priomap[i] >= q->bands)
- return -EINVAL;
- q->prio2band[i] = qopt->priomap[i];
- mask |= (1<<qopt->priomap[i]);
- }
- }
- for (i=0; i<TCQ_PRIO_BANDS; i++) {
- if (mask&(1<<i))
- q->queues[i] = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
- if (q->queues[i] == NULL)
- q->queues[i] = &noop_qdisc;
+ if ((err= prio_tune(sch, opt)) != 0)
+ return err;
}
MOD_INC_USE_COUNT;
return 0;
@@ -232,6 +270,18 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
return 0;
}
+static struct Qdisc *
+prio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
+ unsigned long band = arg - 1;
+
+ if (band >= q->bands)
+ return NULL;
+
+ return q->queues[band];
+}
+
static unsigned long prio_get(struct Qdisc *sch, u32 classid)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
@@ -242,6 +292,12 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid)
return band;
}
+static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+ return prio_get(sch, classid);
+}
+
+
static void prio_put(struct Qdisc *q, unsigned long cl)
{
return;
@@ -267,12 +323,15 @@ static int prio_delete(struct Qdisc *sch, unsigned long cl)
#ifdef CONFIG_RTNETLINK
-static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm)
+static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
+ struct tcmsg *tcm)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
if (cl - 1 > q->bands)
return -ENOENT;
+ if (q->queues[cl-1])
+ tcm->tcm_info = q->queues[cl-1]->handle;
return 0;
}
#endif
@@ -310,6 +369,8 @@ static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
static struct Qdisc_class_ops prio_class_ops =
{
prio_graft,
+ prio_leaf,
+
prio_get,
prio_put,
prio_change,
@@ -317,7 +378,7 @@ static struct Qdisc_class_ops prio_class_ops =
prio_walk,
prio_find_tcf,
- prio_get,
+ prio_bind,
prio_put,
#ifdef CONFIG_RTNETLINK
@@ -340,6 +401,7 @@ struct Qdisc_ops prio_qdisc_ops =
prio_init,
prio_reset,
prio_destroy,
+ prio_tune,
#ifdef CONFIG_RTNETLINK
prio_dump,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index eac678b83..30b537b53 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -193,8 +193,8 @@ red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
}
if (q->qave < q->qth_min) {
-enqueue:
q->qcount = -1;
+enqueue:
if (sch->stats.backlog <= q->limit) {
__skb_queue_tail(&sch->q, skb);
sch->stats.backlog += skb->len;
@@ -231,6 +231,7 @@ drop:
*/
if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
goto enqueue;
+printk(KERN_DEBUG "Drop %d\n", q->qcount);
q->qcount = 0;
q->qR = net_random()&q->Rmask;
sch->stats.overlimits++;
@@ -375,6 +376,7 @@ struct Qdisc_ops red_qdisc_ops =
red_init,
red_reset,
red_destroy,
+ NULL /* red_change */,
#ifdef CONFIG_RTNETLINK
red_dump,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c6f43badc..8baf254eb 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -14,7 +14,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
-#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -380,6 +379,27 @@ static void sfq_perturbation(unsigned long arg)
}
}
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
+ struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ start_bh_atomic();
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+
+ del_timer(&q->perturb_timer);
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+ end_bh_atomic();
+ return 0;
+}
+
static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
{
struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
@@ -399,24 +419,15 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
q->max_depth = 0;
q->tail = SFQ_DEPTH;
if (opt == NULL) {
- q->quantum = sch->dev->mtu;
+ q->quantum = psched_mtu(sch->dev);
q->perturb_period = 0;
- if (sch->dev->hard_header)
- q->quantum += sch->dev->hard_header_len;
} else {
- struct tc_sfq_qopt *ctl = RTA_DATA(opt);
- if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
- return -EINVAL;
- q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
- q->perturb_period = ctl->perturb_period*HZ;
- /* The rest is compiled in */
+ int err = sfq_change(sch, opt);
+ if (err)
+ return err;
}
for (i=0; i<SFQ_DEPTH; i++)
sfq_link(q, i);
- if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
- }
MOD_INC_USE_COUNT;
return 0;
}
@@ -467,6 +478,7 @@ struct Qdisc_ops sfq_qdisc_ops =
sfq_init,
sfq_reset,
sfq_destroy,
+ NULL, /* sfq_change */
#ifdef CONFIG_RTNETLINK
sfq_dump,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 83d6da87c..a4d13b628 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -114,6 +114,7 @@ struct tbf_sched_data
u32 limit; /* Maximal length of backlog: bytes */
u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
u32 mtu;
+ u32 max_size;
struct qdisc_rate_table *R_tab;
struct qdisc_rate_table *P_tab;
@@ -132,6 +133,8 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
+ if (skb->len > q->max_size)
+ goto drop;
__skb_queue_tail(&sch->q, skb);
if ((sch->stats.backlog += skb->len) <= q->limit) {
sch->stats.bytes += skb->len;
@@ -145,6 +148,8 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
__skb_unlink(skb, &sch->q);
sch->stats.backlog -= skb->len;
+
+drop:
sch->stats.drops++;
#ifdef CONFIG_NET_CLS_POLICE
if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
@@ -180,6 +185,7 @@ static void tbf_watchdog(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc*)arg;
+ sch->flags &= ~TCQ_F_THROTTLED;
qdisc_wakeup(sch->dev);
}
@@ -216,6 +222,7 @@ tbf_dequeue(struct Qdisc* sch)
q->tokens = toks;
q->ptokens = ptoks;
sch->stats.backlog -= skb->len;
+ sch->flags &= ~TCQ_F_THROTTLED;
return skb;
}
@@ -238,10 +245,11 @@ tbf_dequeue(struct Qdisc* sch)
Really, if we split the flow into independent
subflows, it would be a very good solution.
This is the main idea of all FQ algorithms
- (cf. CSZ, HPFQ, HFCS)
+ (cf. CSZ, HPFQ, HFSC)
*/
__skb_queue_head(&sch->q, skb);
+ sch->flags |= TCQ_F_THROTTLED;
sch->stats.overlimits++;
}
return NULL;
@@ -258,53 +266,86 @@ tbf_reset(struct Qdisc* sch)
PSCHED_GET_TIME(q->t_c);
q->tokens = q->buffer;
q->ptokens = q->mtu;
+ sch->flags &= ~TCQ_F_THROTTLED;
del_timer(&q->wd_timer);
}
-static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
+static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
{
+ int err = -EINVAL;
struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
struct rtattr *tb[TCA_TBF_PTAB];
struct tc_tbf_qopt *qopt;
+ struct qdisc_rate_table *rtab = NULL;
+ struct qdisc_rate_table *ptab = NULL;
+ int max_size;
- MOD_INC_USE_COUNT;
-
- if (opt == NULL ||
- rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
+ if (rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
tb[TCA_TBF_PARMS-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) {
- MOD_DEC_USE_COUNT;
- return -EINVAL;
- }
+ RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt))
+ goto done;
qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]);
- q->R_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
- if (q->R_tab == NULL) {
- MOD_DEC_USE_COUNT;
- return -EINVAL;
- }
+ rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
+ if (rtab == NULL)
+ goto done;
if (qopt->peakrate.rate) {
- q->P_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_PTAB-1]);
- if (q->P_tab == NULL) {
- MOD_DEC_USE_COUNT;
- qdisc_put_rtab(q->R_tab);
- return -EINVAL;
+ if (qopt->peakrate.rate > qopt->rate.rate)
+ ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]);
+ if (ptab == NULL)
+ goto done;
+ }
+
+ max_size = psched_mtu(sch->dev);
+ if (ptab) {
+ int n = max_size>>qopt->peakrate.cell_log;
+ while (n>0 && ptab->data[n-1] > qopt->mtu) {
+ max_size -= (1<<qopt->peakrate.cell_log);
+ n--;
}
}
+ if (rtab->data[max_size>>qopt->rate.cell_log] > qopt->buffer)
+ goto done;
- PSCHED_GET_TIME(q->t_c);
- init_timer(&q->wd_timer);
- q->wd_timer.function = tbf_watchdog;
- q->wd_timer.data = (unsigned long)sch;
+ start_bh_atomic();
q->limit = qopt->limit;
q->mtu = qopt->mtu;
- if (q->mtu == 0)
- q->mtu = psched_mtu(sch->dev);
+ q->max_size = max_size;
q->buffer = qopt->buffer;
q->tokens = q->buffer;
q->ptokens = q->mtu;
- return 0;
+ rtab = xchg(&q->R_tab, rtab);
+ ptab = xchg(&q->P_tab, ptab);
+ end_bh_atomic();
+ err = 0;
+done:
+ if (rtab)
+ qdisc_put_rtab(rtab);
+ if (ptab)
+ qdisc_put_rtab(ptab);
+ return err;
+}
+
+static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
+{
+ int err;
+ struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
+
+ if (opt == NULL)
+ return -EINVAL;
+
+ MOD_INC_USE_COUNT;
+
+ PSCHED_GET_TIME(q->t_c);
+ init_timer(&q->wd_timer);
+ q->wd_timer.function = tbf_watchdog;
+ q->wd_timer.data = (unsigned long)sch;
+
+ if ((err = tbf_change(sch, opt)) != 0) {
+ MOD_DEC_USE_COUNT;
+ }
+ return err;
}
static void tbf_destroy(struct Qdisc *sch)
@@ -328,10 +369,10 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
unsigned char *b = skb->tail;
struct rtattr *rta;
struct tc_tbf_qopt opt;
-
+
rta = (struct rtattr*)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-
+
opt.limit = q->limit;
opt.rate = q->R_tab->rate;
if (q->P_tab)
@@ -366,6 +407,7 @@ struct Qdisc_ops tbf_qdisc_ops =
tbf_init,
tbf_reset,
tbf_destroy,
+ tbf_change,
#ifdef CONFIG_RTNETLINK
tbf_dump,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 212e6f696..66040d5e9 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -444,6 +444,7 @@ static struct teql_master the_master = {
teql_qdisc_init,
teql_reset,
teql_destroy,
+ NULL,
},};