diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-06-13 16:29:25 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-06-13 16:29:25 +0000 |
commit | db7d4daea91e105e3859cf461d7e53b9b77454b2 (patch) | |
tree | 9bb65b95440af09e8aca63abe56970dd3360cc57 /net/sched | |
parent | 9c1c01ead627bdda9211c9abd5b758d6c687d8ac (diff) |
Merge with Linux 2.2.8.
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Config.in | 7 | ||||
-rw-r--r-- | net/sched/Makefile | 10 | ||||
-rw-r--r-- | net/sched/cls_api.c | 58 | ||||
-rw-r--r-- | net/sched/cls_fw.c | 305 | ||||
-rw-r--r-- | net/sched/cls_route.c | 594 | ||||
-rw-r--r-- | net/sched/cls_rsvp.h | 81 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 59 | ||||
-rw-r--r-- | net/sched/estimator.c | 4 | ||||
-rw-r--r-- | net/sched/police.c | 61 | ||||
-rw-r--r-- | net/sched/sch_api.c | 447 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 335 | ||||
-rw-r--r-- | net/sched/sch_csz.c | 17 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 14 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 39 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 120 | ||||
-rw-r--r-- | net/sched/sch_red.c | 4 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 40 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 100 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 1 |
19 files changed, 1819 insertions, 477 deletions
diff --git a/net/sched/Config.in b/net/sched/Config.in index 5d497a050..ffb7a4810 100644 --- a/net/sched/Config.in +++ b/net/sched/Config.in @@ -18,10 +18,11 @@ if [ "$CONFIG_NET_QOS" = "y" ]; then fi bool 'Packet classifier API' CONFIG_NET_CLS if [ "$CONFIG_NET_CLS" = "y" ]; then - bool 'Routing tables based classifier' CONFIG_NET_CLS_ROUTE - if [ "$CONFIG_IP_FIREWALL" = "y" ]; then - bool 'Firewall based classifier' CONFIG_NET_CLS_FW + tristate 'Routing table based classifier' CONFIG_NET_CLS_ROUTE4 + if [ "$CONFIG_NET_CLS_ROUTE4" != "n" ]; then + define_bool CONFIG_NET_CLS_ROUTE y fi + tristate 'Firewall based classifier' CONFIG_NET_CLS_FW tristate 'U32 classifier' CONFIG_NET_CLS_U32 if [ "$CONFIG_NET_QOS" = "y" ]; then tristate 'Special RSVP classifier' CONFIG_NET_CLS_RSVP diff --git a/net/sched/Makefile b/net/sched/Makefile index 21a1cf07a..6e1169fab 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -125,12 +125,20 @@ else endif endif -ifeq ($(CONFIG_NET_CLS_ROUTE), y) +ifeq ($(CONFIG_NET_CLS_ROUTE4), y) O_OBJS += cls_route.o +else + ifeq ($(CONFIG_NET_CLS_ROUTE4), m) + M_OBJS += cls_route.o + endif endif ifeq ($(CONFIG_NET_CLS_FW), y) O_OBJS += cls_fw.o +else + ifeq ($(CONFIG_NET_CLS_FW), m) + M_OBJS += cls_fw.o + endif endif endif diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 081896dc5..683063137 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -7,6 +7,10 @@ * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * Changes: + * + * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support */ #include <asm/uaccess.h> @@ -27,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> +#include <linux/kmod.h> #include <net/sock.h> #include <net/pkt_sched.h> @@ -87,21 +92,13 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, /* Select new prio value from the range, managed by kernel. */ -static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp, u32 prio) +static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp) { u32 first = TC_H_MAKE(0xC0000000U,0U); - if (!tp || tp->next == NULL) - return first; - - if (prio == TC_H_MAKE(0xFFFF0000U,0U)) - first = tp->prio+1; - else + if (tp) first = tp->prio-1; - if (first == prio) - first = tp->prio; - return first; } @@ -129,10 +126,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* If no priority is given, user wants we allocated it. */ if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; - if (n->nlmsg_flags&NLM_F_APPEND) - prio = TC_H_MAKE(0xFFFF0000U,0U); - else - prio = TC_H_MAKE(0x80000000U,0U); + prio = TC_H_MAKE(0x80000000U,0U); } /* Find head of filter chain. */ @@ -194,6 +188,18 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL) goto errout; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); +#ifdef CONFIG_KMOD + if (tp_ops==NULL && tca[TCA_KIND-1] != NULL) { + struct rtattr *kind = tca[TCA_KIND-1]; + char module_name[4 + IFNAMSIZ + 1]; + + if (RTA_PAYLOAD(kind) <= IFNAMSIZ) { + sprintf(module_name, "cls_%s", (char*)RTA_DATA(kind)); + request_module (module_name); + tp_ops = tcf_proto_lookup_ops(kind); + } + } +#endif if (tp_ops == NULL) { err = -EINVAL; kfree(tp); @@ -202,7 +208,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) memset(tp, 0, sizeof(*tp)); tp->ops = tp_ops; tp->protocol = protocol; - tp->prio = nprio ? : tcf_auto_prio(*back, prio); + tp->prio = nprio ? : tcf_auto_prio(*back); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; @@ -221,6 +227,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { *back = tp->next; + synchronize_bh(); + tp->ops->destroy(tp); kfree(tp); err = 0; @@ -249,7 +257,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } } - err = tp->ops->change(tp, t->tcm_handle, tca, &fh); + err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); if (err == 0) tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); @@ -336,12 +344,16 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) return skb->len; - if ((q = qdisc_lookup(dev, tcm->tcm_parent)) == NULL) + if (!tcm->tcm_parent) + q = dev->qdisc_sleeping; + else + q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + if (q == NULL) return skb->len; - cops = q->ops->cl_ops; + if ((cops = q->ops->cl_ops) == NULL) + goto errout; if (TC_H_MIN(tcm->tcm_parent)) { - if (cops) - cl = cops->get(q, tcm->tcm_parent); + cl = cops->get(q, tcm->tcm_parent); if (cl == 0) goto errout; } @@ -360,7 +372,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) TC_H_MIN(tcm->tcm_info) != tp->protocol) continue; if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) { @@ -418,8 +430,8 @@ __initfunc(int tc_filter_init(void)) #ifdef CONFIG_NET_CLS_U32 INIT_TC_FILTER(u32); #endif -#ifdef CONFIG_NET_CLS_ROUTE - INIT_TC_FILTER(route); +#ifdef CONFIG_NET_CLS_ROUTE4 + INIT_TC_FILTER(route4); #endif #ifdef CONFIG_NET_CLS_FW INIT_TC_FILTER(fw); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 0fab64dda..e92b846ee 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -1,5 +1,5 @@ /* - * net/sched/cls_fw.c Routing table based packet classifier. + * net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -7,8 +7,13 @@ * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * Changes: + * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one + * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel). */ +#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -34,15 +39,56 @@ #include <net/sock.h> #include <net/pkt_sched.h> +struct fw_head +{ + struct fw_filter *ht[256]; +}; + +struct fw_filter +{ + struct fw_filter *next; + u32 id; + struct tcf_result res; +#ifdef CONFIG_NET_CLS_POLICE + struct tcf_police *police; +#endif +}; + +static __inline__ int fw_hash(u32 handle) +{ + return handle&0xFF; +} static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { - u32 clid = skb->fwmark; + struct fw_head *head = (struct fw_head*)tp->root; + struct fw_filter *f; +#ifdef CONFIG_IP_FIREWALL + u32 id = skb->fwmark; +#else + u32 id = 0; +#endif - if (clid && (TC_H_MAJ(clid) == 0 || - !(TC_H_MAJ(clid^tp->q->handle)))) { - res->classid = clid; + if (head == NULL) + goto old_method; + + for (f=head->ht[fw_hash(id)]; f; f=f->next) { + if (f->id == id) { + *res = f->res; +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) + return tcf_police(skb, f->police); +#endif + return 0; + } + } + return -1; + +old_method: + if (id && (TC_H_MAJ(id) == 0 || + !(TC_H_MAJ(id^tp->q->handle)))) { + res->classid = id; res->class = 0; return 0; } @@ -51,6 +97,16 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, static unsigned long fw_get(struct tcf_proto *tp, u32 handle) { + struct fw_head *head = (struct fw_head*)tp->root; + struct fw_filter *f; + + if (head == NULL) + return 0; + + for (f=head->ht[fw_hash(handle)]; f; f=f->next) { + if (f->id == handle) + return (unsigned long)f; + } return 0; } @@ -60,24 +116,236 @@ static void fw_put(struct tcf_proto *tp, unsigned long f) static int fw_init(struct tcf_proto *tp) { + MOD_INC_USE_COUNT; return 0; } static void fw_destroy(struct tcf_proto *tp) { + struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); + struct fw_filter *f; + int h; + + if (head == NULL) { + MOD_DEC_USE_COUNT; + return; + } + + for (h=0; h<256; h++) { + while ((f=head->ht[h]) != NULL) { + unsigned long cl; + head->ht[h] = f->next; + + if ((cl = cls_set_class(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + kfree(f); + } + } + kfree(head); + MOD_DEC_USE_COUNT; } static int fw_delete(struct tcf_proto *tp, unsigned long arg) { + struct fw_head *head = (struct fw_head*)tp->root; + struct fw_filter *f = (struct fw_filter*)arg; + struct fw_filter **fp; + + if (head == NULL || f == NULL) + return -EINVAL; + + for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { + if (*fp == f) { + unsigned long cl; + + *fp = f->next; + synchronize_bh(); + + if ((cl = cls_set_class(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + kfree(f); + return 0; + } + } return -EINVAL; } -static int fw_change(struct tcf_proto *tp, u32 handle, - struct rtattr **tca, - unsigned long *arg) +static int fw_change(struct tcf_proto *tp, unsigned long base, + u32 handle, + struct rtattr **tca, + unsigned long *arg) { - return handle ? -EINVAL : 0; + struct fw_head *head = (struct fw_head*)tp->root; + struct fw_filter *f; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_FW_MAX]; + int err; + + if (!opt) + return handle ? -EINVAL : 0; + + if (rtattr_parse(tb, TCA_FW_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) + return -EINVAL; + + if ((f = (struct fw_filter*)*arg) != NULL) { + /* Node exists: adjust only classid */ + + if (f->id != handle && handle) + return -EINVAL; + if (tb[TCA_FW_CLASSID-1]) { + unsigned long cl; + + f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]); + cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid); + cl = cls_set_class(&f->res.class, cl); + if (cl) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + } +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_FW_POLICE-1]) { + struct tcf_police *police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]); + + police = xchg(&f->police, police); + synchronize_bh(); + + tcf_police_release(police); + } +#endif + return 0; + } + + if (!handle) + return -EINVAL; + + if (head == NULL) { + head = kmalloc(sizeof(struct fw_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + memset(head, 0, sizeof(*head)); + + tp->root = head; + synchronize_bh(); + } + + f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL); + if (f == NULL) + return -ENOBUFS; + memset(f, 0, sizeof(*f)); + + f->id = handle; + + if (tb[TCA_FW_CLASSID-1]) { + err = -EINVAL; + if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != 4) + goto errout; + f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]); + cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid)); + } + +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_FW_POLICE-1]) + f->police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]); +#endif + + f->next = head->ht[fw_hash(handle)]; + wmb(); + head->ht[fw_hash(handle)] = f; + + *arg = (unsigned long)f; + return 0; + +errout: + if (f) + kfree(f); + return err; +} + +static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct fw_head *head = (struct fw_head*)tp->root; + int h; + + if (head == NULL) + arg->stop = 1; + + if (arg->stop) + return; + + for (h = 0; h < 256; h++) { + struct fw_filter *f; + + for (f = head->ht[h]; f; f = f->next) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(tp, (unsigned long)f, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } + } +} + +#ifdef CONFIG_RTNETLINK +static int fw_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct fw_filter *f = (struct fw_filter*)fh; + unsigned char *b = skb->tail; + struct rtattr *rta; + + if (f == NULL) + return skb->len; + + t->tcm_handle = f->id; + + if (!f->res.classid +#ifdef CONFIG_NET_CLS_POLICE + && !f->police +#endif + ) + return skb->len; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + if (f->res.classid) + RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid); +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + struct rtattr * p_rta = (struct rtattr*)skb->tail; + + RTA_PUT(skb, TCA_FW_POLICE, 0, NULL); + + if (tcf_police_dump(skb, f->police) < 0) + goto rtattr_failure; + + p_rta->rta_len = skb->tail - (u8*)p_rta; + } +#endif + + rta->rta_len = skb->tail - b; +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats); + } +#endif + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } +#endif + struct tcf_proto_ops cls_fw_ops = { NULL, @@ -90,5 +358,22 @@ struct tcf_proto_ops cls_fw_ops = { fw_put, fw_change, fw_delete, - NULL, + fw_walk, +#ifdef CONFIG_RTNETLINK + fw_dump +#else + NULL +#endif }; + +#ifdef MODULE +int init_module(void) +{ + return register_tcf_proto_ops(&cls_fw_ops); +} + +void cleanup_module(void) +{ + unregister_tcf_proto_ops(&cls_fw_ops); +} +#endif diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a78f2090e..f83e79134 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -1,5 +1,5 @@ /* - * net/sched/cls_route.c Routing table based packet classifier. + * net/sched/cls_route.c ROUTE4 classifier. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> @@ -34,65 +35,598 @@ #include <net/sock.h> #include <net/pkt_sched.h> +/* + 1. For now we assume that route tags < 256. + It allows to use direct table lookups, instead of hash tables. + 2. For now we assume that "from TAG" and "fromdev DEV" statements + are mutually exclusive. + 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" + */ + +struct route4_fastmap +{ + struct route4_filter *filter; + u32 id; + int iif; +}; + +struct route4_head +{ + struct route4_fastmap fastmap[16]; + struct route4_bucket *table[256+1]; +}; + +struct route4_bucket +{ + struct route4_filter *ht[16+16+1]; +}; + +struct route4_filter +{ + struct route4_filter *next; + u32 id; + int iif; + + struct tcf_result res; +#ifdef CONFIG_NET_CLS_POLICE + struct tcf_police *police; +#endif + + u32 handle; + struct route4_bucket *bkt; +}; + +#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) + +static __inline__ int route4_fastmap_hash(u32 id, int iif) +{ + return id&0xF; +} + +static void route4_reset_fastmap(struct route4_head *head, u32 id) +{ + start_bh_atomic(); + memset(head->fastmap, 0, sizeof(head->fastmap)); + end_bh_atomic(); +} + +static void __inline__ +route4_set_fastmap(struct route4_head *head, u32 id, int iif, + struct route4_filter *f) +{ + int h = route4_fastmap_hash(id, iif); + head->fastmap[h].id = id; + head->fastmap[h].iif = iif; + head->fastmap[h].filter = f; +} + +static __inline__ int route4_hash_to(u32 id) +{ + return id&0xFF; +} + +static __inline__ int route4_hash_from(u32 id) +{ + return (id>>16)&0xF; +} + +static __inline__ int route4_hash_iif(int iif) +{ + return 16 + ((iif>>16)&0xF); +} + +static __inline__ int route4_hash_wild(void) +{ + return 32; +} + +#ifdef CONFIG_NET_CLS_POLICE +#define IF_ROUTE_POLICE \ +if (f->police) { \ + int pol_res = tcf_police(skb, f->police); \ + if (pol_res >= 0) return pol_res; \ + dont_cache = 1; \ + continue; \ +} \ +if (!dont_cache) +#else +#define IF_ROUTE_POLICE +#endif + -static int route_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res) +static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) { - struct dst_entry *dst = skb->dst; + struct route4_head *head = (struct route4_head*)tp->root; + struct dst_entry *dst; + struct route4_bucket *b; + struct route4_filter *f; +#ifdef CONFIG_NET_CLS_POLICE + int dont_cache = 0; +#endif + u32 id, h; + int iif; - if (dst) { - u32 clid = dst->tclassid; + if ((dst = skb->dst) == NULL) + goto failure; - if (clid && (TC_H_MAJ(clid) == 0 || - !(TC_H_MAJ(clid^tp->q->handle)))) { - res->classid = clid; - res->class = 0; + id = dst->tclassid; + if (head == NULL) + goto old_method; + + iif = ((struct rtable*)dst)->key.iif; + + h = route4_fastmap_hash(id, iif); + if (id == head->fastmap[h].id && + iif == head->fastmap[h].iif && + (f = head->fastmap[h].filter) != NULL) { + if (f == ROUTE4_FAILURE) + goto failure; + + *res = f->res; + return 0; + } + + h = route4_hash_to(id); + +restart: + if ((b = head->table[h]) != NULL) { + f = b->ht[route4_hash_from(id)]; + + for ( ; f; f = f->next) { + if (f->id == id) { + *res = f->res; + IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f); + return 0; + } + } + + for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next) { + if (f->iif == iif) { + *res = f->res; + IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f); + return 0; + } + } + + for (f = b->ht[route4_hash_wild()]; f; f = f->next) { + *res = f->res; + IF_ROUTE_POLICE route4_set_fastmap(head, id, iif, f); return 0; } + + } + if (h < 256) { + h = 256; + id &= ~0xFFFF; + goto restart; + } + +#ifdef CONFIG_NET_CLS_POLICE + if (!dont_cache) +#endif + route4_set_fastmap(head, id, iif, ROUTE4_FAILURE); +failure: + return -1; + +old_method: + if (id && (TC_H_MAJ(id) == 0 || + !(TC_H_MAJ(id^tp->q->handle)))) { + res->classid = id; + res->class = 0; + return 0; } return -1; } -static unsigned long route_get(struct tcf_proto *tp, u32 handle) +static u32 to_hash(u32 id) +{ + u32 h = id&0xFF; + if (id&0x8000) + h += 256; + return h; +} + +static u32 from_hash(u32 id) { + id &= 0xFFFF; + if (id == 0xFFFF) + return 32; + if (!(id & 0x8000)) { + if (id > 255) + return 256; + return id&0xF; + } + return 16 + (id&0xF); +} + +static unsigned long route4_get(struct tcf_proto *tp, u32 handle) +{ + struct route4_head *head = (struct route4_head*)tp->root; + struct route4_bucket *b; + struct route4_filter *f; + unsigned h1, h2; + + if (!head) + return 0; + + h1 = to_hash(handle); + if (h1 > 256) + return 0; + + h2 = from_hash(handle>>16); + if (h2 > 32) + return 0; + + if ((b = head->table[h1]) != NULL) { + for (f = b->ht[h2]; f; f = f->next) + if (f->handle == handle) + return (unsigned long)f; + } return 0; } -static void route_put(struct tcf_proto *tp, unsigned long f) +static void route4_put(struct tcf_proto *tp, unsigned long f) { } -static int route_init(struct tcf_proto *tp) +static int route4_init(struct tcf_proto *tp) { + MOD_INC_USE_COUNT; return 0; } -static void route_destroy(struct tcf_proto *tp) +static void route4_destroy(struct tcf_proto *tp) { + struct route4_head *head = xchg(&tp->root, NULL); + int h1, h2; + + if (head == NULL) { + MOD_DEC_USE_COUNT; + return; + } + + for (h1=0; h1<=256; h1++) { + struct route4_bucket *b; + + if ((b = head->table[h1]) != NULL) { + for (h2=0; h2<=32; h2++) { + struct route4_filter *f; + + while ((f = b->ht[h2]) != NULL) { + unsigned long cl; + + b->ht[h2] = f->next; + if ((cl = cls_set_class(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + kfree(f); + } + } + kfree(b); + } + } + kfree(head); + MOD_DEC_USE_COUNT; } -static int route_delete(struct tcf_proto *tp, unsigned long arg) +static int route4_delete(struct tcf_proto *tp, unsigned long arg) { - return -EINVAL; + struct route4_head *head = (struct route4_head*)tp->root; + struct route4_filter **fp, *f = (struct route4_filter*)arg; + unsigned h = f->handle; + struct route4_bucket *b; + int i; + + if (!head || !f) + return -EINVAL; + + b = f->bkt; + + for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { + if (*fp == f) { + unsigned long cl; + + *fp = f->next; + synchronize_bh(); + + route4_reset_fastmap(head, f->id); + + if ((cl = cls_set_class(&f->res.class, 0)) != 0) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + +#ifdef CONFIG_NET_CLS_POLICE + tcf_police_release(f->police); +#endif + kfree(f); + + /* Strip tree */ + + for (i=0; i<=32; i++) + if (b->ht[i]) + return 0; + + /* OK, session has no flows */ + head->table[to_hash(h)] = NULL; + synchronize_bh(); + + kfree(b); + return 0; + } + } + return 0; } -static int route_change(struct tcf_proto *tp, u32 handle, - struct rtattr **tca, - unsigned long *arg) +static int route4_change(struct tcf_proto *tp, unsigned long base, + u32 handle, + struct rtattr **tca, + unsigned long *arg) { - return handle ? -EINVAL : 0; + struct route4_head *head = tp->root; + struct route4_filter *f, *f1, **ins_f; + struct route4_bucket *b; + struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *tb[TCA_ROUTE4_MAX]; + unsigned h1, h2; + int err; + + if (opt == NULL) + return handle ? -EINVAL : 0; + + if (rtattr_parse(tb, TCA_ROUTE4_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) + return -EINVAL; + + if ((f = (struct route4_filter*)*arg) != NULL) { + /* Node exists: adjust only classid */ + + if (f->handle != handle && handle) + return -EINVAL; + if (tb[TCA_ROUTE4_CLASSID-1]) { + unsigned long cl; + + f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]); + cl = cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid)); + if (cl) + tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); + } +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_ROUTE4_POLICE-1]) { + struct tcf_police *police = tcf_police_locate(tb[TCA_ROUTE4_POLICE-1], tca[TCA_RATE-1]); + + police = xchg(&f->police, police); + synchronize_bh(); + + tcf_police_release(police); + } +#endif + return 0; + } + + /* Now more serious part... */ + + if (head == NULL) { + head = kmalloc(sizeof(struct route4_head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + memset(head, 0, sizeof(struct route4_head)); + + tp->root = head; + synchronize_bh(); + } + + f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL); + if (f == NULL) + return -ENOBUFS; + + memset(f, 0, sizeof(*f)); + + err = -EINVAL; + f->handle = 0x8000; + if (tb[TCA_ROUTE4_TO-1]) { + if (handle&0x8000) + goto errout; + if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < 4) + goto errout; + f->id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]); + if (f->id > 0xFF) + goto errout; + f->handle = f->id; + } + if (tb[TCA_ROUTE4_FROM-1]) { + u32 sid; + if (tb[TCA_ROUTE4_IIF-1]) + goto errout; + if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < 4) + goto errout; + sid = (*(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1])); + if (sid > 0xFF) + goto errout; + f->handle |= sid<<16; + f->id |= sid<<16; + } else if (tb[TCA_ROUTE4_IIF-1]) { + if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < 4) + goto errout; + f->iif = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]); + if (f->iif > 0x7FFF) + goto errout; + f->handle |= (f->iif|0x8000)<<16; + } else + f->handle |= 0xFFFF<<16; + + if (handle) { + f->handle |= handle&0x7F00; + if (f->handle != handle) + goto errout; + } + + if (tb[TCA_ROUTE4_CLASSID-1]) { + if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < 4) + goto errout; + f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]); + } + + h1 = to_hash(f->handle); + if ((b = head->table[h1]) == NULL) { + err = -ENOBUFS; + b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL); + if (b == NULL) + goto errout; + memset(b, 0, sizeof(*b)); + + head->table[h1] = b; + synchronize_bh(); + } + f->bkt = b; + + err = -EEXIST; + h2 = from_hash(f->handle>>16); + for (ins_f = &b->ht[h2]; (f1=*ins_f) != NULL; ins_f = &f1->next) { + if (f->handle < f1->handle) + break; + if (f1->handle == f->handle) + goto errout; + } + + cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid)); +#ifdef CONFIG_NET_CLS_POLICE + if (tb[TCA_ROUTE4_POLICE-1]) + f->police = tcf_police_locate(tb[TCA_ROUTE4_POLICE-1], tca[TCA_RATE-1]); +#endif + + f->next = f1; + wmb(); + *ins_f = f; + + route4_reset_fastmap(head, f->id); + *arg = (unsigned long)f; + return 0; + +errout: + if (f) + kfree(f); + return err; +} + +static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct route4_head *head = tp->root; + unsigned h, h1; + + if (head == NULL) + arg->stop = 1; + + if (arg->stop) + return; + + for (h = 0; h <= 256; h++) { + struct route4_bucket *b = head->table[h]; + + if (b) { + for (h1 = 0; h1 <= 32; h1++) { + struct route4_filter *f; + + for (f = b->ht[h1]; f; f = f->next) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(tp, (unsigned long)f, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } + } + } + } } -struct tcf_proto_ops cls_route_ops = { +#ifdef CONFIG_RTNETLINK +static int route4_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct route4_filter *f = (struct route4_filter*)fh; + unsigned char *b = skb->tail; + struct rtattr *rta; + u32 id; + + if (f == NULL) + return skb->len; + + t->tcm_handle = f->handle; + + rta = (struct rtattr*)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + + if (!(f->handle&0x8000)) { + id = f->id&0xFF; + RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id); + } + if (f->handle&0x80000000) { + if ((f->handle>>16) != 0xFFFF) + RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif); + } else { + id = f->id>>16; + RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id); + } + if (f->res.classid) + RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid); +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + struct rtattr * p_rta = (struct rtattr*)skb->tail; + + RTA_PUT(skb, TCA_ROUTE4_POLICE, 0, NULL); + + if (tcf_police_dump(skb, f->police) < 0) + goto rtattr_failure; + + p_rta->rta_len = skb->tail - (u8*)p_rta; + } +#endif + + rta->rta_len = skb->tail - b; +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats); + } +#endif + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} +#endif + +struct tcf_proto_ops cls_route4_ops = { NULL, "route", - route_classify, - route_init, - route_destroy, - - route_get, - route_put, - route_change, - route_delete, - NULL, + route4_classify, + route4_init, + route4_destroy, + + route4_get, + route4_put, + route4_change, + route4_delete, + route4_walk, +#ifdef CONFIG_RTNETLINK + route4_dump +#else + NULL +#endif }; + +#ifdef MODULE +int init_module(void) +{ + return register_tcf_proto_ops(&cls_route4_ops); +} + +void cleanup_module(void) +{ + unregister_tcf_proto_ops(&cls_route4_ops); +} +#endif diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 4168f541f..48142c6e7 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -120,6 +120,18 @@ static __inline__ unsigned hash_src(u32 *src) return h & 0xF; } +#ifdef CONFIG_NET_CLS_POLICE +#define RSVP_POLICE() \ +if (f->police) { \ + int pol_res = tcf_police(skb, f->police); \ + if (pol_res < 0) continue; \ + if (pol_res) return pol_res; \ +} +#else +#define RSVP_POLICE() +#endif + + static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { @@ -137,7 +149,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, struct iphdr *nhptr = skb->nh.iph; #endif -#ifndef __i386__ +#if !defined( __i386__) && !defined(__mc68000__) if ((unsigned long)nhptr & 3) return -1; #endif @@ -181,25 +193,26 @@ restart: && src[2] == f->src[2] #endif ) { + *res = f->res; + + RSVP_POLICE(); + matched: - if (f->tunnelhdr == 0) { - *res = f->res; -#ifdef CONFIG_NET_CLS_POLICE - if (f->police) - return tcf_police(skb, f->police); -#endif + if (f->tunnelhdr == 0) return 0; - } else { - tunnelid = f->res.classid; - nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); - goto restart; - } + + tunnelid = f->res.classid; + nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); + goto restart; } } /* And wildcard bucket... */ - if ((f = s->ht[16]) != NULL) + for (f = s->ht[16]; f; f = f->next) { + *res = f->res; + RSVP_POLICE(); goto matched; + } return -1; } } @@ -260,7 +273,6 @@ static void rsvp_destroy(struct tcf_proto *tp) struct rsvp_session *s; while ((s = sht[h1]) != NULL) { - sht[h1] = s->next; for (h2=0; h2<=16; h2++) { @@ -270,7 +282,7 @@ static void rsvp_destroy(struct tcf_proto *tp) unsigned long cl; s->ht[h2] = f->next; - if ((cl = xchg(&f->res.class, 0)) != 0) + if ((cl = cls_set_class(&f->res.class, 0)) != 0) tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); #ifdef CONFIG_NET_CLS_POLICE tcf_police_release(f->police); @@ -297,8 +309,11 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) if (*fp == f) { unsigned long cl; + *fp = f->next; - if ((cl = xchg(&f->res.class, 0)) != 0) + synchronize_bh(); + + if ((cl = cls_set_class(&f->res.class, 0)) != 0) tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); #ifdef CONFIG_NET_CLS_POLICE @@ -318,11 +333,13 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) *sp; sp = &(*sp)->next) { if (*sp == s) { *sp = s->next; + synchronize_bh(); + kfree(s); return 0; } } - + return 0; } } @@ -399,7 +416,8 @@ static u32 gen_tunnel(struct rsvp_head *data) return 0; } -static int rsvp_change(struct tcf_proto *tp, u32 handle, +static int rsvp_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct rtattr **tca, unsigned long *arg) { @@ -425,17 +443,21 @@ static int rsvp_change(struct tcf_proto *tp, u32 handle, if (f->handle != handle && handle) return -EINVAL; if (tb[TCA_RSVP_CLASSID-1]) { - unsigned long cl = xchg(&f->res.class, 0); + unsigned long cl; + + f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); + cl = cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid)); if (cl) tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); - f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); - f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid); } #ifdef CONFIG_NET_CLS_POLICE if (tb[TCA_RSVP_POLICE-1]) { - struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]); + struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]); - tcf_police_release(xchg(&f->police, police)); + police = xchg(&f->police, police); + synchronize_bh(); + + tcf_police_release(police); } #endif return 0; @@ -514,17 +536,19 @@ insert: f->sess = s; if (f->tunnelhdr == 0) - f->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q, f->res.classid); + cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid)); #ifdef CONFIG_NET_CLS_POLICE if (tb[TCA_RSVP_POLICE-1]) - f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1]); + f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]); #endif for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) break; f->next = *fp; + wmb(); *fp = f; + *arg = (unsigned long)f; return 0; } @@ -546,7 +570,9 @@ insert: break; } s->next = *sp; + wmb(); *sp = s; + goto insert; errout: @@ -631,6 +657,11 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, #endif rta->rta_len = skb->tail - b; +#ifdef CONFIG_NET_CLS_POLICE + if (f->police) { + RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats); + } +#endif return skb->len; rtattr_failure: diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cb52e9d07..98d4e1f7b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -114,7 +114,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re int sel = 0; int i; -#ifndef __i386__ +#if !defined(__i386__) && !defined(__mc68000__) if ((unsigned long)ptr & 3) return -1; #endif @@ -137,10 +137,13 @@ check_terminal: if (n->sel.flags&TC_U32_TERMINAL) { *res = n->res; #ifdef CONFIG_NET_CLS_POLICE - if (n->police) - return tcf_police(skb, n->police); + if (n->police) { + int pol_res = tcf_police(skb, n->police); + if (pol_res >= 0) + return pol_res; + } else #endif - return 0; + return 0; } n = n->next; goto next_knode; @@ -304,7 +307,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) { unsigned long cl; - if ((cl = xchg(&n->res.class, 0)) != 0) + if ((cl = cls_set_class(&n->res.class, 0)) != 0) tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); #ifdef CONFIG_NET_CLS_POLICE tcf_police_release(n->police); @@ -324,6 +327,8 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) { if (*kp == key) { *kp = key->next; + synchronize_bh(); + u32_destroy_key(tp, key); return 0; } @@ -341,6 +346,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) for (h=0; h<=ht->divisor; h++) { while ((n = ht->ht[h]) != NULL) { ht->ht[h] = n->next; + synchronize_bh(); + u32_destroy_key(tp, n); } } @@ -402,6 +409,7 @@ static void u32_destroy(struct tcf_proto *tp) kfree(tp_c); } + MOD_DEC_USE_COUNT; tp->data = NULL; } @@ -437,8 +445,10 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) return handle|(i>0xFFF ? 0xFFF : i); } -static int u32_set_parms(struct Qdisc *q, struct tc_u_hnode *ht, - struct tc_u_knode *n, struct rtattr **tb) +static int u32_set_parms(struct Qdisc *q, unsigned long base, + struct tc_u_hnode *ht, + struct tc_u_knode *n, struct rtattr **tb, + struct rtattr *est) { if (tb[TCA_U32_LINK-1]) { u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]); @@ -456,28 +466,33 @@ static int u32_set_parms(struct Qdisc *q, struct tc_u_hnode *ht, } ht_down = xchg(&n->ht_down, ht_down); + synchronize_bh(); if (ht_down) ht_down->refcnt--; } if (tb[TCA_U32_CLASSID-1]) { - unsigned long cl = xchg(&n->res.class, 0); + unsigned long cl; + + n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); + cl = cls_set_class(&n->res.class, q->ops->cl_ops->bind_tcf(q, base, n->res.classid)); if (cl) q->ops->cl_ops->unbind_tcf(q, cl); - n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); - n->res.class = q->ops->cl_ops->bind_tcf(q, n->res.classid); } #ifdef CONFIG_NET_CLS_POLICE if (tb[TCA_U32_POLICE-1]) { - struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1]); + struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1], est); + + police = xchg(&n->police, police); + synchronize_bh(); - tcf_police_release(xchg(&n->police, police)); + tcf_police_release(police); } #endif return 0; } -static int u32_change(struct tcf_proto *tp, u32 handle, +static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, struct rtattr **tca, unsigned long *arg) { @@ -500,7 +515,7 @@ static int u32_change(struct tcf_proto *tp, u32 handle, if (TC_U32_KEY(n->handle) == 0) return -EINVAL; - return u32_set_parms(tp->q, n->ht_up, n, tb); + return u32_set_parms(tp->q, base, n->ht_up, n, tb, tca[TCA_RATE-1]); } if (tb[TCA_U32_DIVISOR-1]) { @@ -531,7 +546,7 @@ static int u32_change(struct tcf_proto *tp, u32 handle, if (tb[TCA_U32_HASH-1]) { htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]); - if (TC_U32_HTID(handle) == TC_U32_ROOT) { + if (TC_U32_HTID(htid) == TC_U32_ROOT) { ht = tp->root; htid = ht->handle; } else { @@ -550,8 +565,6 @@ static int u32_change(struct tcf_proto *tp, u32 handle, if (handle) { if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) return -EINVAL; - if (TC_U32_HASH(handle) && TC_U32_HASH(handle^htid)) - return -EINVAL; handle = htid | TC_U32_NODE(handle); } else handle = gen_new_kid(ht, htid); @@ -568,14 +581,17 @@ static int u32_change(struct tcf_proto *tp, u32 handle, memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); n->ht_up = ht; n->handle = handle; - err = u32_set_parms(tp->q, ht, n, tb); + err = u32_set_parms(tp->q, base, ht, n, tb, tca[TCA_RATE-1]); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) - if (TC_U32_NODE(handle) >= TC_U32_NODE((*ins)->handle)) + if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle)) break; + n->next = *ins; + wmb(); *ins = n; + *arg = (unsigned long)n; return 0; } @@ -664,6 +680,11 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, } rta->rta_len = skb->tail - b; +#ifdef CONFIG_NET_CLS_POLICE + if (TC_U32_KEY(n->handle) && n->police) { + RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &n->police->stats); + } +#endif return skb->len; rtattr_failure: diff --git a/net/sched/estimator.c b/net/sched/estimator.c index 463879606..d51017c84 100644 --- a/net/sched/estimator.c +++ b/net/sched/estimator.c @@ -171,8 +171,10 @@ void qdisc_kill_estimator(struct tc_stats *stats) pest = &est->next; continue; } - /* ATOMIC_SET */ + *pest = est->next; + synchronize_bh(); + kfree(est); killed++; } diff --git a/net/sched/police.c b/net/sched/police.c index 13599ac49..89e58d8be 100644 --- a/net/sched/police.c +++ b/net/sched/police.c @@ -74,6 +74,9 @@ void tcf_police_destroy(struct tcf_police *p) for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { if (*p1p == p) { *p1p = p->next; +#ifdef CONFIG_NET_ESTIMATOR + qdisc_kill_estimator(&p->stats); +#endif if (p->R_tab) qdisc_put_rtab(p->R_tab); if (p->P_tab) @@ -85,7 +88,7 @@ void tcf_police_destroy(struct tcf_police *p) BUG_TRAP(0); } -struct tcf_police * tcf_police_locate(struct rtattr *rta) +struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) { unsigned h; struct tcf_police *p; @@ -111,20 +114,35 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta) memset(p, 0, sizeof(*p)); p->refcnt = 1; - if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL) - goto failure; - if (parm->peakrate.rate && - (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL) - goto failure; + if (parm->rate.rate) { + if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL) + goto failure; + if (parm->peakrate.rate && + (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL) + goto failure; + } + if (tb[TCA_POLICE_RESULT-1]) + p->result = *(int*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); +#ifdef CONFIG_NET_ESTIMATOR + if (tb[TCA_POLICE_AVRATE-1]) + p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); +#endif p->toks = p->burst = parm->burst; p->mtu = parm->mtu; - if (p->mtu == 0) - p->mtu = 255<<p->R_tab->rate.cell_log; + if (p->mtu == 0) { + p->mtu = ~0; + if (p->R_tab) + p->mtu = 255<<p->R_tab->rate.cell_log; + } if (p->P_tab) p->ptoks = L2T_P(p, p->mtu); PSCHED_GET_TIME(p->t_c); p->index = parm->index ? : tcf_police_new_index(); p->action = parm->action; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + qdisc_new_estimator(&p->stats, est); +#endif h = tcf_police_hash(p->index); p->next = tcf_police_ht[h]; tcf_police_ht[h] = p; @@ -143,7 +161,20 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p) long toks; long ptoks = 0; + p->stats.bytes += skb->len; + p->stats.packets++; + +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate && p->stats.bps >= p->ewma_rate) { + p->stats.overlimits++; + return p->action; + } +#endif + if (skb->len <= p->mtu) { + if (p->R_tab == NULL) + return p->result; + PSCHED_GET_TIME(now); toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst, 0); @@ -163,10 +194,11 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p) p->t_c = now; p->toks = toks; p->ptoks = ptoks; - return TC_POLICE_OK; + return p->result; } } + p->stats.overlimits++; return p->action; } @@ -180,12 +212,21 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) opt.action = p->action; opt.mtu = p->mtu; opt.burst = p->burst; - opt.rate = p->R_tab->rate; + if (p->R_tab) + opt.rate = p->R_tab->rate; + else + memset(&opt.rate, 0, sizeof(opt.rate)); if (p->P_tab) opt.peakrate = p->P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); + if (p->result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); +#endif return skb->len; rtattr_failure: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f16638081..0ced70bbc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -11,6 +11,7 @@ * Fixes: * * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. + * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support */ #include <linux/config.h> @@ -29,6 +30,7 @@ #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <linux/kmod.h> #include <net/sock.h> #include <net/pkt_sched.h> @@ -41,7 +43,7 @@ #define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } #ifdef CONFIG_RTNETLINK -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, +static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, int event); @@ -116,6 +118,10 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, ---destroy destroys resources allocated by init and during lifetime of qdisc. + + ---change + + changes qdisc parameters. */ /************************************************ @@ -177,22 +183,22 @@ struct Qdisc *qdisc_lookup(struct device *dev, u32 handle) return NULL; } -/* We know classid. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) - */ - -struct Qdisc *qdisc_lookup_class(struct device *dev, u32 classid) +struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { - struct Qdisc *q; + unsigned long cl; + struct Qdisc *leaf; + struct Qdisc_class_ops *cops = p->ops->cl_ops; - for (q = dev->qdisc_list; q; q = q->next) { - if (q->classid == classid) - return q; - } - return NULL; + if (cops == NULL) + return NULL; + cl = cops->get(p, classid); + if (cl == 0) + return NULL; + leaf = cops->leaf(p, cl); + cops->put(p, cl); + return leaf; } - /* Find queueing discipline by name */ struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) @@ -268,6 +274,37 @@ u32 qdisc_alloc_handle(struct device *dev) return i>0 ? autohandle : 0; } +/* Attach toplevel qdisc to device dev */ + +static struct Qdisc * +dev_graft_qdisc(struct device *dev, struct Qdisc *qdisc) +{ + struct Qdisc *oqdisc; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + start_bh_atomic(); + oqdisc = dev->qdisc_sleeping; + + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); + + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev->qdisc_sleeping = qdisc; + dev->qdisc = &noop_qdisc; + end_bh_atomic(); + + if (dev->flags & IFF_UP) + dev_activate(dev); + + return oqdisc; +} + + /* Graft qdisc "new" to class "classid" of qdisc "parent" or to device "dev". @@ -280,17 +317,10 @@ int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid, int err = 0; if (parent == NULL) { - BUG_TRAP(classid == TC_H_ROOT); - if (new) { - new->parent = NULL; - new->classid = TC_H_ROOT; - } - *old = dev_set_scheduler(dev, new); + *old = dev_graft_qdisc(dev, new); } else { struct Qdisc_class_ops *cops = parent->ops->cl_ops; - BUG_TRAP(classid != TC_H_ROOT); - err = -EINVAL; if (cops) { @@ -313,22 +343,30 @@ int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid, */ static struct Qdisc * -qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle, - u32 parentid, struct rtattr **tca, int *errp) +qdisc_create(struct device *dev, u32 handle, struct rtattr **tca, int *errp) { int err; struct rtattr *kind = tca[TCA_KIND-1]; struct Qdisc *sch = NULL; + struct Qdisc_ops *ops; int size; - int new = 0; - if (ops == NULL) { - ops = qdisc_lookup_ops(kind); - err = -EINVAL; - if (ops == NULL) - goto err_out; - new = 1; + ops = qdisc_lookup_ops(kind); +#ifdef CONFIG_KMOD + if (ops==NULL && tca[TCA_KIND-1] != NULL) { + char module_name[4 + IFNAMSIZ + 1]; + + if (RTA_PAYLOAD(kind) <= IFNAMSIZ) { + sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind)); + request_module (module_name); + ops = qdisc_lookup_ops(kind); + } } +#endif + + err = -EINVAL; + if (ops == NULL) + goto err_out; size = sizeof(*sch) + ops->priv_size; @@ -340,13 +378,8 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle, /* Grrr... Resolve race condition with module unload */ err = -EINVAL; - if (new) { - if (ops != qdisc_lookup_ops(kind)) - goto err_out; - } else if (kind) { - if (rtattr_strcmp(kind, ops->id)) - goto err_out; - } + if (ops != qdisc_lookup_ops(kind)) + goto err_out; memset(sch, 0, size); @@ -355,6 +388,7 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev = dev; + atomic_set(&sch->refcnt, 1); if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; @@ -362,9 +396,8 @@ qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle, goto err_out; } sch->handle = handle; - sch->classid = parentid; - if (ops->init && (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { + if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { sch->next = dev->qdisc_list; dev->qdisc_list = sch; #ifdef CONFIG_NET_ESTIMATOR @@ -381,135 +414,241 @@ err_out: return NULL; } +static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) +{ + if (tca[TCA_OPTIONS-1]) { + int err; + + if (sch->ops->change == NULL) + return -EINVAL; + err = sch->ops->change(sch, tca[TCA_OPTIONS-1]); + if (err) + return err; + } +#ifdef CONFIG_NET_ESTIMATOR + if (tca[TCA_RATE-1]) { + qdisc_kill_estimator(&sch->stats); + qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]); + } +#endif + return 0; +} + +struct check_loop_arg +{ + struct qdisc_walker w; + struct Qdisc *p; + int depth; +}; + +static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); + +static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) +{ + struct check_loop_arg arg; + + if (q->ops->cl_ops == NULL) + return 0; + + arg.w.stop = arg.w.skip = arg.w.count = 0; + arg.w.fn = check_loop_fn; + arg.depth = depth; + arg.p = p; + q->ops->cl_ops->walk(q, &arg.w); + return arg.w.stop ? -ELOOP : 0; +} + +static int +check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) +{ + struct Qdisc *leaf; + struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct check_loop_arg *arg = (struct check_loop_arg *)w; + + leaf = cops->leaf(q, cl); + if (leaf) { + if (leaf == arg->p || arg->depth > 7) + return -ELOOP; + return check_loop(leaf, arg->p, arg->depth + 1); + } + return 0; +} /* - Create/delete/change/get qdisc. + * Delete/get qdisc. */ -static int tc_ctl_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct device *dev; u32 clid = tcm->tcm_parent; - struct Qdisc *old_q; struct Qdisc *q = NULL; struct Qdisc *p = NULL; - struct Qdisc *leaf = NULL; - struct Qdisc_ops *qops = NULL; int err; - /* Find device */ if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) return -ENODEV; - /* If parent is specified, it must exist - and tcm_parent selects a class in parent which - new qdisc will be attached to. - - The place may be already busy by another qdisc, - remember this fact, if it was not auto-created discipline. - */ if (clid) { if (clid != TC_H_ROOT) { - p = qdisc_lookup(dev, TC_H_MAJ(clid)); - if (p == NULL) + if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) return -ENOENT; - leaf = qdisc_lookup_class(dev, clid); + q = qdisc_leaf(p, clid); } else - leaf = dev->qdisc_sleeping; - - if (leaf && leaf->flags&TCQ_F_DEFAULT && n->nlmsg_type == RTM_NEWQDISC) - leaf = NULL; + q = dev->qdisc_sleeping; - /* - Also, leaf may be exactly that qdisc, which we want - to control. Remember this to avoid one more qdisc_lookup. - */ - - if (leaf && leaf->handle == tcm->tcm_handle) - q = leaf; - } + if (!q) + return -ENOENT; - /* Try to locate the discipline */ - if (tcm->tcm_handle && q == NULL) { - if (TC_H_MIN(tcm->tcm_handle)) + if (tcm->tcm_handle && q->handle != tcm->tcm_handle) return -EINVAL; - q = qdisc_lookup(dev, tcm->tcm_handle); + } else { + if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) + return -ENOENT; } - /* If discipline already exists, check that its real parent - matches to one selected by tcm_parent. - */ - - if (q) { - if (clid && p != q->parent) - return -EINVAL; - BUG_TRAP(!leaf || leaf == q); - if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + return -EINVAL; + + if (n->nlmsg_type == RTM_DELQDISC) { + if (!clid) return -EINVAL; - clid = q->classid; - goto process_existing; + if (q->handle == 0) + return -ENOENT; + if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + return err; + if (q) { + qdisc_notify(skb, n, clid, q, NULL); + qdisc_destroy(q); + } + } else { + qdisc_notify(skb, n, clid, NULL, q); } + return 0; +} - /* The discipline is known not to exist. - If parent was not selected too, return error. - */ - if (clid == 0) - return tcm->tcm_handle ? -ENOENT : -EINVAL; +/* + Create/change qdisc. + */ - /* Check for the case when leaf is exactly the thing, - that you want. - */ +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +{ + struct tcmsg *tcm = NLMSG_DATA(n); + struct rtattr **tca = arg; + struct device *dev; + u32 clid = tcm->tcm_parent; + struct Qdisc *q = NULL; + struct Qdisc *p = NULL; + int err; + + if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + return -ENODEV; - if (leaf && tcm->tcm_handle == 0) { - q = leaf; - if (!tca[TCA_KIND-1] || rtattr_strcmp(tca[TCA_KIND-1], q->ops->id) == 0) - goto process_existing; + if (clid) { + if (clid != TC_H_ROOT) { + if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) + return -ENOENT; + q = qdisc_leaf(p, clid); + } else { + q = dev->qdisc_sleeping; + } + + /* It may be default qdisc, ignore it */ + if (q && q->handle == 0) + q = NULL; + + if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { + if (tcm->tcm_handle) { + if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) + return -EEXIST; + if (TC_H_MIN(tcm->tcm_handle)) + return -EINVAL; + if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) + goto create_n_graft; + if (n->nlmsg_flags&NLM_F_EXCL) + return -EEXIST; + if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + return -EINVAL; + if (q == p || + (p && check_loop(q, p, 0))) + return -ELOOP; + atomic_inc(&q->refcnt); + goto graft; + } else { + if (q == NULL) + goto create_n_graft; + + /* This magic test requires explanation. + * + * We know, that some child q is already + * attached to this parent and have choice: + * either to change it or to create/graft new one. + * + * 1. We are allowed to create/graft only + * if CREATE and REPLACE flags are set. + * + * 2. If EXCL is set, requestor wanted to say, + * that qdisc tcm_handle is not expected + * to exist, so that we choose create/graft too. + * + * 3. The last case is when no flags are set. + * Alas, it is sort of hole in API, we + * cannot decide what to do unambiguously. + * For now we select create/graft, if + * user gave KIND, which does not match existing. + */ + if ((n->nlmsg_flags&NLM_F_CREATE) && + (n->nlmsg_flags&NLM_F_REPLACE) && + ((n->nlmsg_flags&NLM_F_EXCL) || + (tca[TCA_KIND-1] && + rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)))) + goto create_n_graft; + } + } + } else { + if (!tcm->tcm_handle) + return -EINVAL; + q = qdisc_lookup(dev, tcm->tcm_handle); } - if (n->nlmsg_type != RTM_NEWQDISC || !(n->nlmsg_flags&NLM_F_CREATE)) + /* Change qdisc parameters */ + if (q == NULL) return -ENOENT; - if (leaf && n->nlmsg_flags&NLM_F_EXCL) + if (n->nlmsg_flags&NLM_F_EXCL) return -EEXIST; + if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + return -EINVAL; + err = qdisc_change(q, tca); + if (err == 0) + qdisc_notify(skb, n, clid, NULL, q); + return err; -create_and_graft: - q = qdisc_create(dev, qops, tcm->tcm_handle, clid, tca, &err); +create_n_graft: + if (!(n->nlmsg_flags&NLM_F_CREATE)) + return -ENOENT; + q = qdisc_create(dev, tcm->tcm_handle, tca, &err); if (q == NULL) return err; graft: - err = qdisc_graft(dev, p, clid, q, &old_q); - if (err) { - if (q) - qdisc_destroy(q); - return err; + if (1) { + struct Qdisc *old_q = NULL; + err = qdisc_graft(dev, p, clid, q, &old_q); + if (err) { + if (q) + qdisc_destroy(q); + return err; + } + qdisc_notify(skb, n, clid, old_q, q); + if (old_q) + qdisc_destroy(old_q); } - qdisc_notify(skb, n, old_q, q); - if (old_q) - qdisc_destroy(old_q); return 0; - -process_existing: - - switch (n->nlmsg_type) { - case RTM_NEWQDISC: - if (n->nlmsg_flags&NLM_F_EXCL) - return -EEXIST; - qops = q->ops; - goto create_and_graft; - case RTM_GETQDISC: - qdisc_notify(skb, n, NULL, q); - return 0; - case RTM_DELQDISC: - q = NULL; - goto graft; - default: - return -EINVAL; - } } -static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, +static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 pid, u32 seq, unsigned flags, int event) { struct tcmsg *tcm; @@ -521,9 +660,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0; - tcm->tcm_parent = q->classid; + tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; - tcm->tcm_info = 0; + tcm->tcm_info = atomic_read(&q->refcnt); RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); if (q->ops->dump && q->ops->dump(q, skb) < 0) goto rtattr_failure; @@ -539,7 +678,7 @@ rtattr_failure: } static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *old, struct Qdisc *new) + u32 clid, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -548,12 +687,12 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, if (!skb) return -ENOBUFS; - if (old && !(old->flags&TCQ_F_DEFAULT)) { - if (tc_fill_qdisc(skb, old, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) + if (old && old->handle) { + if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new) { - if (tc_fill_qdisc(skb, new, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) + if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } @@ -583,7 +722,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) q = q->next, q_idx++) { if (q_idx < s_q_idx) continue; - if (tc_fill_qdisc(skb, q, NETLINK_CB(cb->skb).pid, + if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; } @@ -797,11 +936,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) for (q=dev->qdisc_list, t=0; q; q = q->next, t++) { if (t < s_t) continue; if (!q->ops->cl_ops) continue; - if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle - && (tcm->tcm_parent != TC_H_ROOT || q->parent != NULL)) + if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle) continue; if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); arg.w.fn = qdisc_class_dump; arg.skb = skb; arg.cb = cb; @@ -846,6 +984,20 @@ static int psched_read_proc(char *buffer, char **start, off_t offset, } #endif +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY +int psched_tod_diff(int delta_sec, int bound) +{ + int delta; + + if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1) + return bound; + delta = delta_sec * 1000000; + if (delta > bound) + delta = bound; + return delta; +} +#endif + psched_time_t psched_time_base; #if PSCHED_CLOCK_SOURCE == PSCHED_CPU @@ -866,7 +1018,8 @@ static void psched_tick(unsigned long dummy) #if PSCHED_CLOCK_SOURCE == PSCHED_CPU psched_time_t dummy_stamp; PSCHED_GET_TIME(dummy_stamp); - psched_timer.expires = jiffies + 4*HZ; + /* It is OK up to 4GHz cpu */ + psched_timer.expires = jiffies + 1*HZ; #else unsigned long now = jiffies; psched_time_base = ((u64)now)<<PSCHED_JSCALE; @@ -891,7 +1044,6 @@ __initfunc(int psched_calibrate_clock(void)) return -1; #endif - start_bh_atomic(); #ifdef PSCHED_WATCHER psched_tick(0); #endif @@ -902,7 +1054,6 @@ __initfunc(int psched_calibrate_clock(void)) barrier(); PSCHED_GET_TIME(stamp1); do_gettimeofday(&tv1); - end_bh_atomic(); delay = PSCHED_TDIFF(stamp1, stamp); rdelay = tv1.tv_usec - tv.tv_usec; @@ -921,6 +1072,9 @@ __initfunc(int psched_calibrate_clock(void)) __initfunc(int pktsched_init(void)) { +#ifdef CONFIG_RTNETLINK + struct rtnetlink_link *link_p; +#endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *ent; #endif @@ -931,19 +1085,22 @@ __initfunc(int pktsched_init(void)) #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES psched_tick_per_us = HZ<<PSCHED_JSCALE; psched_us_per_tick = 1000000; +#ifdef PSCHED_WATCHER + psched_tick(0); +#endif #endif #ifdef CONFIG_RTNETLINK - struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; + link_p = rtnetlink_links[PF_UNSPEC]; /* Setup rtnetlink links. It is made here to avoid exporting large number of public symbols. */ if (link_p) { - link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_ctl_qdisc; - link_p[RTM_DELQDISC-RTM_BASE].doit = tc_ctl_qdisc; - link_p[RTM_GETQDISC-RTM_BASE].doit = tc_ctl_qdisc; + link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc; + link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc; + link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc; link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc; link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass; link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass; @@ -975,6 +1132,12 @@ __initfunc(int pktsched_init(void)) #ifdef CONFIG_NET_SCH_RED INIT_QDISC(red); #endif +#ifdef CONFIG_NET_SCH_GRED + INIT_QDISC(gred); +#endif +#ifdef CONFIG_NET_SCH_DSMARK + INIT_QDISC(dsmark); +#endif #ifdef CONFIG_NET_SCH_SFQ INIT_QDISC(sfq); #endif diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9ae14c243..c8094a882 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -30,13 +30,13 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/notifier.h> -#include <linux/module.h> #include <net/ip.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/pkt_sched.h> + /* Class-Based Queueing (CBQ) algorithm. ======================================= @@ -169,6 +169,9 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ +#ifdef CONFIG_NET_CLS_POLICE + struct cbq_class *rx_class; +#endif struct cbq_class *tx_class; struct cbq_class *tx_borrowed; int tx_len; @@ -269,17 +272,21 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch) else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL) + if (cl == NULL || cl->level >= head->level) goto fallback; } - if (cl->level == 0) { #ifdef CONFIG_NET_CLS_POLICE - if (result) - return cbq_reclassify(skb, cl); + switch (result) { + case TC_POLICE_RECLASSIFY: + return cbq_reclassify(skb, cl); + case TC_POLICE_SHOT: + return NULL; + default: + } #endif + if (cl->level == 0) return cl; - } /* * Step 3+n. If classifier selected a link sharing class, @@ -321,11 +328,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl) if (cl_tail != NULL) { cl->next_alive = cl_tail->next_alive; cl_tail->next_alive = cl; - cl->deficit = 0; } else { cl->next_alive = cl; q->activemask |= (1<<prio); - cl->deficit = cl->quantum; } } @@ -358,31 +363,28 @@ static void cbq_deactivate_class(struct cbq_class *this) } cl = cl_prev->next_alive; - cl->deficit += cl->quantum; return; } } while ((cl_prev = cl) != q->active[prio]); } -static __inline__ void +static void cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { - if (q->toplevel > 0) { + int toplevel = q->toplevel; + + if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { psched_time_t now; PSCHED_GET_TIME(now); if (PSCHED_TLESS(now, q->now)) now = q->now; - if (PSCHED_TLESS(cl->undertime, now)) { - q->toplevel = 0; - return; - } - while ((cl = cl->borrow) != NULL - && q->toplevel > cl->level) { - if (PSCHED_TLESS(cl->borrow->undertime, now)) { + + do { + if (PSCHED_TLESS(cl->undertime, now)) { q->toplevel = cl->level; return; } - } + } while ((cl=cl->borrow) != NULL && toplevel > cl->level); } } @@ -393,23 +395,31 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct cbq_class *cl = cbq_classify(skb, sch); int len = skb->len; - if (cl && cl->q->enqueue(skb, cl->q) == 1) { - sch->q.qlen++; - sch->stats.packets++; - cl->stats.packets++; - sch->stats.bytes+=len; - cl->stats.bytes+=len; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return 1; +#ifdef CONFIG_NET_CLS_POLICE + q->rx_class = cl; +#endif + if (cl) { +#ifdef CONFIG_NET_CLS_POLICE + cl->q->__parent = sch; +#endif + if (cl->q->enqueue(skb, cl->q) == 1) { + sch->q.qlen++; + sch->stats.packets++; + sch->stats.bytes+=len; + cbq_mark_toplevel(q, cl); + if (!cl->next_alive) + cbq_activate_class(cl); + return 1; + } } sch->stats.drops++; if (cl == NULL) kfree_skb(skb); - else + else { + cbq_mark_toplevel(q, cl); cl->stats.drops++; + } return 0; } @@ -426,9 +436,14 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) } q->tx_class = NULL; + cbq_mark_toplevel(q, cl); + +#ifdef CONFIG_NET_CLS_POLICE + q->rx_class = cl; + cl->q->__parent = sch; +#endif if (cl->q->ops->requeue(skb, cl->q) == 1) { sch->q.qlen++; - cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); return 1; @@ -445,11 +460,9 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) static void cbq_ovl_classic(struct cbq_class *cl) { struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); if (!cl->delayed) { - psched_tdiff_t delay; - - delay = PSCHED_TDIFF(cl->undertime, q->now); delay += cl->offtime; /* @@ -463,15 +476,35 @@ static void cbq_ovl_classic(struct cbq_class *cl) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); if (cl->avgidle < cl->minidle) cl->avgidle = cl->minidle; - if (delay < 0) - delay = 0; + if (delay <= 0) + delay = 1; PSCHED_TADD2(q->now, delay, cl->undertime); - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; cl->xstats.overactions++; cl->delayed = 1; } + if (q->wd_expires == 0 || q->wd_expires > delay) + q->wd_expires = delay; + + /* Dirty work! We must schedule wakeups based on + real available rate, rather than leaf rate, + which may be tiny (even zero). + */ + if (q->toplevel == TC_CBQ_MAXLEVEL) { + struct cbq_class *b; + psched_tdiff_t base_delay = q->wd_expires; + + for (b = cl->borrow; b; b = b->borrow) { + delay = PSCHED_TDIFF(b->undertime, q->now); + if (delay < base_delay) { + if (delay <= 0) + delay = 1; + base_delay = delay; + } + } + + q->wd_expires = delay; + } } /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when @@ -481,15 +514,18 @@ static void cbq_ovl_classic(struct cbq_class *cl) static void cbq_ovl_rclassic(struct cbq_class *cl) { struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + struct cbq_class *this = cl; - while (cl && cl->delayed) { - cl = cl->borrow; - if (cl->level > q->toplevel) - return; - } + do { + if (cl->level > q->toplevel) { + cl = NULL; + break; + } + } while ((cl = cl->borrow) != NULL); - if (cl) - cbq_ovl_classic(cl); + if (cl == NULL) + cl = this; + cbq_ovl_classic(cl); } /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */ @@ -497,12 +533,11 @@ static void cbq_ovl_rclassic(struct cbq_class *cl) static void cbq_ovl_delay(struct cbq_class *cl) { struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; + psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); if (!cl->delayed) { - psched_tdiff_t delay; unsigned long sched = jiffies; - delay = PSCHED_TDIFF(cl->undertime, q->now); delay += cl->offtime; if (cl->avgidle < 0) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); @@ -521,8 +556,12 @@ static void cbq_ovl_delay(struct cbq_class *cl) add_timer(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; + return; } + delay = 1; } + if (q->wd_expires == 0 || q->wd_expires > delay) + q->wd_expires = delay; } /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */ @@ -555,6 +594,7 @@ static void cbq_ovl_drop(struct cbq_class *cl) static void cbq_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; + sch->flags &= ~TCQ_F_THROTTLED; qdisc_wakeup(sch->dev); } @@ -622,6 +662,7 @@ static void cbq_undelay(unsigned long arg) add_timer(&q->delay_timer); } + sch->flags &= ~TCQ_F_THROTTLED; qdisc_wakeup(sch->dev); } @@ -631,18 +672,23 @@ static void cbq_undelay(unsigned long arg) static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { int len = skb->len; - struct Qdisc *sch = child->parent; + struct Qdisc *sch = child->__parent; struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; - struct cbq_class *cl = cbq_class_lookup(q, child->classid); + struct cbq_class *cl = q->rx_class; + + q->rx_class = NULL; if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { + + cbq_mark_toplevel(q, cl); + + q->rx_class = cl; + cl->q->__parent = sch; + if (cl->q->enqueue(skb, cl->q) == 1) { sch->q.qlen++; sch->stats.packets++; - cl->stats.packets++; sch->stats.bytes+=len; - cl->stats.bytes+=len; - cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -656,21 +702,42 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) } #endif +/* + It is mission critical procedure. + + We "regenerate" toplevel cutoff, if transmitting class + has backlog and it is not regulated. It is not part of + original CBQ description, but looks more reasonable. + Probably, it is wrong. This question needs further investigation. +*/ + static __inline__ void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) +cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, + struct cbq_class *borrowed) { - if (cl && q->toplevel >= cl->level) { - if (cl->q->q.qlen <= 1 || PSCHED_TLESS(q->now, cl->undertime)) - q->toplevel = TC_CBQ_MAXLEVEL; - else /* BUGGGG? if (cl != this) */ - q->toplevel = cl->level; + if (cl && q->toplevel >= borrowed->level) { + if (cl->q->q.qlen > 1) { + do { + if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) { + q->toplevel = borrowed->level; + return; + } + } while ((borrowed=borrowed->borrow) != NULL); + } +#if 0 + /* It is not necessary now. Uncommenting it + will save CPU cycles, but decrease fairness. + */ + q->toplevel = TC_CBQ_MAXLEVEL; +#endif } } -static __inline__ void +static void cbq_update(struct cbq_sched_data *q) { - struct cbq_class *cl = q->tx_class; + struct cbq_class *this = q->tx_class; + struct cbq_class *cl = this; int len = q->tx_len; q->tx_class = NULL; @@ -679,6 +746,9 @@ cbq_update(struct cbq_sched_data *q) long avgidle = cl->avgidle; long idle; + cl->stats.packets++; + cl->stats.bytes += len; + /* (now - last) is total time between packet right edges. (last_pktlen/rate) is "virtual" busy time, so that @@ -697,6 +767,10 @@ cbq_update(struct cbq_sched_data *q) if (avgidle <= 0) { /* Overlimit or at-limit */ + + if (avgidle < cl->minidle) + avgidle = cl->minidle; + cl->avgidle = avgidle; /* Calculate expected time, when this class @@ -732,12 +806,11 @@ cbq_update(struct cbq_sched_data *q) cl->avgidle = cl->maxidle; else cl->avgidle = avgidle; - } cl->last = q->now; } - cbq_update_toplevel(q, q->tx_borrowed); + cbq_update_toplevel(q, this, q->tx_borrowed); } static __inline__ struct cbq_class * @@ -750,21 +823,33 @@ cbq_under_limit(struct cbq_class *cl) return cl; if (PSCHED_IS_PASTPERFECT(cl->undertime) || - PSCHED_TLESS(cl->undertime, q->now)) { + !PSCHED_TLESS(q->now, cl->undertime)) { cl->delayed = 0; return cl; } - while (!PSCHED_IS_PASTPERFECT(cl->undertime) && - PSCHED_TLESS(q->now, cl->undertime)) { - if ((cl = cl->borrow) == NULL || cl->level > q->toplevel) { + do { + /* It is very suspicious place. Now overlimit + action is generated for not bounded classes + only if link is completely congested. + Though it is in agree with ancestor-only paradigm, + it looks very stupid. Particularly, + it means that this chunk of code will either + never be called or result in strong amplification + of burstiness. Dangerous, silly, and, however, + no another solution exists. + */ + if ((cl = cl->borrow) == NULL) { this_cl->stats.overlimits++; this_cl->overlimit(this_cl); return NULL; } - } - this_cl->xstats.borrows++; - cl->xstats.borrows++; + if (cl->level > q->toplevel) + return NULL; + } while (!PSCHED_IS_PASTPERFECT(cl->undertime) && + PSCHED_TLESS(q->now, cl->undertime)); + + cl->delayed = 0; return cl; } @@ -784,27 +869,26 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) /* Start round */ do { - struct cbq_class *borrow; + struct cbq_class *borrow = NULL; - /* Class is empty */ - if (cl->q->q.qlen == 0) - goto skip_class; - - if ((borrow = cbq_under_limit(cl)) == NULL) + if (cl->q->q.qlen && + (borrow = cbq_under_limit(cl)) == NULL) goto skip_class; if (cl->deficit <= 0) { - /* Class exhausted its allotment per this - round. + /* Class exhausted its allotment per + this round. Switch to the next one. */ deficit = 1; + cl->deficit += cl->quantum; goto next_class; } skb = cl->q->dequeue(cl->q); /* Class did not give us any skb :-( - It could occur if cl->q == "tbf" + It could occur even if cl->q->q.qlen != 0 + f.e. if cl->q == "tbf" */ if (skb == NULL) goto skip_class; @@ -812,6 +896,15 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) cl->deficit -= skb->len; q->tx_class = cl; q->tx_borrowed = borrow; + if (borrow != cl) { +#ifndef CBQ_XSTATS_BORROWS_BYTES + borrow->xstats.borrows++; + cl->xstats.borrows++; +#else + borrow->xstats.borrows += skb->len; + cl->xstats.borrows += skb->len; +#endif + } q->tx_len = skb->len; if (cl->deficit <= 0) { @@ -822,8 +915,6 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) return skb; skip_class: - cl->deficit = 0; - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { /* Class is empty or penalized. Unlink it from active chain. @@ -857,7 +948,6 @@ skip_class: next_class: cl_prev = cl; cl = cl->next_alive; - cl->deficit += cl->quantum; } while (cl_prev != cl_tail); } while (deficit); @@ -914,6 +1004,7 @@ cbq_dequeue(struct Qdisc *sch) skb = cbq_dequeue_1(sch); if (skb) { sch->q.qlen--; + sch->flags &= ~TCQ_F_THROTTLED; return skb; } @@ -955,6 +1046,7 @@ cbq_dequeue(struct Qdisc *sch) delay = 1; q->wd_timer.expires = jiffies + delay; add_timer(&q->wd_timer); + sch->flags |= TCQ_F_THROTTLED; } } return NULL; @@ -1129,14 +1221,18 @@ static void cbq_link_class(struct cbq_class *this) static int cbq_drop(struct Qdisc* sch) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; - struct cbq_class *cl; - int h; + struct cbq_class *cl, *cl_head; + int prio; - for (h = TC_CBQ_MAXPRIO; h >= 0; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio++) { + if ((cl_head = q->active[prio]) == NULL) + continue; + + cl = cl_head; + do { if (cl->q->ops->drop && cl->q->ops->drop(cl->q)) return 1; - } + } while ((cl = cl->next_alive) != cl_head); } return 0; } @@ -1166,8 +1262,8 @@ cbq_reset(struct Qdisc* sch) cl->next_alive = NULL; PSCHED_SET_PASTPERFECT(cl->undertime); - cl->avgidle = 0; - cl->deficit = 0; + cl->avgidle = cl->maxidle; + cl->deficit = cl->quantum; cl->cpriority = cl->priority; } } @@ -1187,8 +1283,10 @@ static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) cl->avpkt = lss->avpkt; if (lss->change&TCF_CBQ_LSS_MINIDLE) cl->minidle = -(long)lss->minidle; - if (lss->change&TCF_CBQ_LSS_MAXIDLE) + if (lss->change&TCF_CBQ_LSS_MAXIDLE) { cl->maxidle = lss->maxidle; + cl->avgidle = lss->maxidle; + } if (lss->change&TCF_CBQ_LSS_OFFTIME) cl->offtime = lss->offtime; return 0; @@ -1261,7 +1359,7 @@ static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; - if (!(cl->q->flags&TCQ_F_DEFAULT)) { + if (cl->q->handle) { if (p->police == TC_POLICE_RECLASSIFY) cl->q->reshape_fail = cbq_reshape_fail; else @@ -1300,6 +1398,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; } + q->link.refcnt = 1; q->link.sibling = &q->link; q->link.classid = sch->handle; q->link.qdisc = sch; @@ -1493,6 +1592,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, else tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle = cl->classid; + tcm->tcm_info = cl->q->handle; rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); @@ -1533,12 +1633,20 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, } if ((*old = xchg(&cl->q, new)) != NULL) qdisc_reset(*old); - + return 0; } return -ENOENT; } +static struct Qdisc * +cbq_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct cbq_class *cl = (struct cbq_class*)arg; + + return cl ? cl->q : NULL; +} + static unsigned long cbq_get(struct Qdisc *sch, u32 classid) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; @@ -1569,6 +1677,7 @@ static void cbq_destroy_class(struct cbq_class *cl) #ifdef CONFIG_NET_ESTIMATOR qdisc_kill_estimator(&cl->stats); #endif + kfree(cl); } static void @@ -1578,6 +1687,9 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; +#ifdef CONFIG_NET_CLS_POLICE + q->rx_class = NULL; +#endif for (h = 0; h < 16; h++) { for (cl = q->classes[h]; cl; cl = cl->next) cbq_destroy_filters(cl); @@ -1590,20 +1702,29 @@ cbq_destroy(struct Qdisc* sch) } qdisc_put_rtab(q->link.R_tab); + MOD_DEC_USE_COUNT; } -static void cbq_put(struct Qdisc *q, unsigned long arg) +static void cbq_put(struct Qdisc *sch, unsigned long arg) { + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl = (struct cbq_class*)arg; - if (--cl->refcnt == 0) + start_bh_atomic(); + if (--cl->refcnt == 0) { +#ifdef CONFIG_NET_CLS_POLICE + if (q->rx_class == cl) + q->rx_class = NULL; +#endif cbq_destroy_class(cl); + } + end_bh_atomic(); return; } static int -cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, - unsigned long *arg) +cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, + unsigned long *arg) { int err; struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; @@ -1763,6 +1884,7 @@ cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, cl->borrow = cl->tparent; if (cl->tparent != &q->link) cl->share = cl->tparent; + cbq_adjust_levels(parent); cl->minidle = -0x7FFFFFFF; cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); @@ -1781,7 +1903,6 @@ cbq_change(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, #endif if (tb[TCA_CBQ_FOPT-1]) cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); - cbq_adjust_levels(parent); end_bh_atomic(); #ifdef CONFIG_NET_ESTIMATOR @@ -1810,10 +1931,16 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) if (cl->next_alive) cbq_deactivate_class(cl); - if (q->tx_class == cl) - q->tx_class = cl->borrow; if (q->tx_borrowed == cl) q->tx_borrowed = q->tx_class; + if (q->tx_class == cl) { + q->tx_class = NULL; + q->tx_borrowed = NULL; + } +#ifdef CONFIG_NET_CLS_POLICE + if (q->rx_class == cl) + q->rx_class = NULL; +#endif cbq_unlink_class(cl); cbq_adjust_levels(cl->tparent); @@ -1841,12 +1968,16 @@ static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg) return &cl->filter_list; } -static unsigned long cbq_bind_filter(struct Qdisc *sch, u32 classid) +static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, + u32 classid) { struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *p = (struct cbq_class*)parent; struct cbq_class *cl = cbq_class_lookup(q, classid); if (cl) { + if (p && p->level <= cl->level) + return 0; cl->filters++; return (unsigned long)cl; } @@ -1878,7 +2009,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } if (arg->fn(sch, (unsigned long)cl, arg) < 0) { arg->stop = 1; - break; + return; } arg->count++; } @@ -1888,9 +2019,10 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops cbq_class_ops = { cbq_graft, + cbq_leaf, cbq_get, cbq_put, - cbq_change, + cbq_change_class, cbq_delete, cbq_walk, @@ -1918,6 +2050,7 @@ struct Qdisc_ops cbq_qdisc_ops = cbq_init, cbq_reset, cbq_destroy, + NULL /* cbq_change */, #ifdef CONFIG_RTNETLINK cbq_dump, diff --git a/net/sched/sch_csz.c b/net/sched/sch_csz.c index 9bdc656c9..2202fd81a 100644 --- a/net/sched/sch_csz.c +++ b/net/sched/sch_csz.c @@ -826,6 +826,12 @@ static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return -EINVAL; } +static struct Qdisc * csz_leaf(struct Qdisc *sch, unsigned long cl) +{ + return NULL; +} + + static unsigned long csz_get(struct Qdisc *sch, u32 classid) { struct csz_sched_data *q = (struct csz_sched_data *)sch->data; @@ -840,6 +846,12 @@ static unsigned long csz_get(struct Qdisc *sch, u32 classid) return band+1; } +static unsigned long csz_bind(struct Qdisc *sch, unsigned long parent, u32 classid) +{ + return csz_get(sch, classid); +} + + static void csz_put(struct Qdisc *sch, unsigned long cl) { return; @@ -1006,6 +1018,8 @@ static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl) struct Qdisc_class_ops csz_class_ops = { csz_graft, + csz_leaf, + csz_get, csz_put, csz_change, @@ -1013,7 +1027,7 @@ struct Qdisc_class_ops csz_class_ops = csz_walk, csz_find_tcf, - csz_get, + csz_bind, csz_put, #ifdef CONFIG_RTNETLINK @@ -1036,6 +1050,7 @@ struct Qdisc_ops csz_qdisc_ops = csz_init, csz_reset, csz_destroy, + NULL /* csz_change */, #ifdef CONFIG_RTNETLINK csz_dump, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 14bc8bb8b..c93f206a2 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -97,10 +97,7 @@ fifo_drop(struct Qdisc* sch) static void fifo_reset(struct Qdisc* sch) { - struct sk_buff *skb; - - while ((skb=__skb_dequeue(&sch->q)) != NULL) - kfree_skb(skb); + skb_queue_purge(&sch->q); sch->stats.backlog = 0; } @@ -137,15 +134,15 @@ pfifo_dequeue(struct Qdisc* sch) return __skb_dequeue(&sch->q); } - static int fifo_init(struct Qdisc *sch, struct rtattr *opt) { struct fifo_sched_data *q = (void*)sch->data; if (opt == NULL) { - q->limit = sch->dev->tx_queue_len; if (sch->ops == &bfifo_qdisc_ops) - q->limit *= sch->dev->mtu; + q->limit = sch->dev->tx_queue_len*sch->dev->mtu; + else + q->limit = sch->dev->tx_queue_len; } else { struct tc_fifo_qopt *ctl = RTA_DATA(opt); if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) @@ -188,6 +185,8 @@ struct Qdisc_ops pfifo_qdisc_ops = fifo_init, fifo_reset, NULL, + fifo_init, + #ifdef CONFIG_RTNETLINK fifo_dump, #endif @@ -208,6 +207,7 @@ struct Qdisc_ops bfifo_qdisc_ops = fifo_init, fifo_reset, NULL, + fifo_init, #ifdef CONFIG_RTNETLINK fifo_dump, #endif diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7ba2e94cc..ba40033e5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -184,7 +184,7 @@ struct Qdisc noop_qdisc = { NULL }, noop_enqueue, noop_dequeue, - TCQ_F_DEFAULT|TCQ_F_BUILTIN, + TCQ_F_BUILTIN, &noop_qdisc_ops, }; @@ -207,7 +207,7 @@ struct Qdisc noqueue_qdisc = { NULL }, NULL, NULL, - TCQ_F_DEFAULT|TCQ_F_BUILTIN, + TCQ_F_BUILTIN, &noqueue_qdisc_ops, }; @@ -322,8 +322,8 @@ struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops) sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev = dev; - sch->flags |= TCQ_F_DEFAULT; - if (ops->init && ops->init(sch, NULL) == 0) + atomic_set(&sch->refcnt, 1); + if (!ops->init || ops->init(sch, NULL) == 0) return sch; kfree(sch); @@ -342,6 +342,10 @@ void qdisc_reset(struct Qdisc *qdisc) void qdisc_destroy(struct Qdisc *qdisc) { struct Qdisc_ops *ops = qdisc->ops; + + if (!atomic_dec_and_test(&qdisc->refcnt)) + return; + #ifdef CONFIG_NET_SCHED if (qdisc->dev) { struct Qdisc *q, **qp; @@ -444,30 +448,3 @@ void dev_shutdown(struct device *dev) end_bh_atomic(); } -struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc) -{ - struct Qdisc *oqdisc; - - if (dev->flags & IFF_UP) - dev_deactivate(dev); - - start_bh_atomic(); - oqdisc = dev->qdisc_sleeping; - - /* Prune old scheduler */ - if (oqdisc) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; - end_bh_atomic(); - - if (dev->flags & IFF_UP) - dev_activate(dev); - - return oqdisc; -} - diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 5b7b39fea..5222d149d 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -49,17 +49,19 @@ static __inline__ unsigned prio_classify(struct sk_buff *skb, struct Qdisc *sch) { struct prio_sched_data *q = (struct prio_sched_data *)sch->data; struct tcf_result res; + u32 band; - res.classid = skb->priority; - if (TC_H_MAJ(res.classid) != sch->handle) { + band = skb->priority; + if (TC_H_MAJ(skb->priority) != sch->handle) { if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) { - if (TC_H_MAJ(res.classid)) - res.classid = 0; - res.classid = q->prio2band[res.classid&TC_PRIO_MAX] + 1; + if (TC_H_MAJ(band)) + band = 0; + return q->prio2band[band&TC_PRIO_MAX]; } + band = res.classid; } - - return res.classid - 1; + band = TC_H_MIN(band) - 1; + return band < q->bands ? band : q->prio2band[0]; } static int @@ -160,38 +162,74 @@ prio_destroy(struct Qdisc* sch) MOD_DEC_USE_COUNT; } +static int prio_tune(struct Qdisc *sch, struct rtattr *opt) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + struct tc_prio_qopt *qopt = RTA_DATA(opt); + int i; + + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + return -EINVAL; + if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + return -EINVAL; + + for (i=0; i<=TC_PRIO_MAX; i++) { + if (qopt->priomap[i] >= qopt->bands) + return -EINVAL; + } + + start_bh_atomic(); + q->bands = qopt->bands; + memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); + + for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { + struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + if (child != &noop_qdisc) + qdisc_destroy(child); + } + end_bh_atomic(); + + for (i=0; i<=TC_PRIO_MAX; i++) { + int band = q->prio2band[i]; + if (q->queues[band] == &noop_qdisc) { + struct Qdisc *child; + child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (child) { + child = xchg(&q->queues[band], child); + synchronize_bh(); + + if (child != &noop_qdisc) + qdisc_destroy(child); + } + } + } + return 0; +} + static int prio_init(struct Qdisc *sch, struct rtattr *opt) { static const u8 prio2band[TC_PRIO_MAX+1] = { 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }; struct prio_sched_data *q = (struct prio_sched_data *)sch->data; - unsigned mask = 0; int i; + for (i=0; i<TCQ_PRIO_BANDS; i++) + q->queues[i] = &noop_qdisc; + if (opt == NULL) { q->bands = 3; memcpy(q->prio2band, prio2band, sizeof(prio2band)); - mask = 7; + for (i=0; i<3; i++) { + struct Qdisc *child; + child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (child) + q->queues[i] = child; + } } else { - struct tc_prio_qopt *qopt = RTA_DATA(opt); + int err; - if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) - return -EINVAL; - if (qopt->bands > TCQ_PRIO_BANDS) - return -EINVAL; - q->bands = qopt->bands; - for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= q->bands) - return -EINVAL; - q->prio2band[i] = qopt->priomap[i]; - mask |= (1<<qopt->priomap[i]); - } - } - for (i=0; i<TCQ_PRIO_BANDS; i++) { - if (mask&(1<<i)) - q->queues[i] = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); - if (q->queues[i] == NULL) - q->queues[i] = &noop_qdisc; + if ((err= prio_tune(sch, opt)) != 0) + return err; } MOD_INC_USE_COUNT; return 0; @@ -232,6 +270,18 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return 0; } +static struct Qdisc * +prio_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + unsigned long band = arg - 1; + + if (band >= q->bands) + return NULL; + + return q->queues[band]; +} + static unsigned long prio_get(struct Qdisc *sch, u32 classid) { struct prio_sched_data *q = (struct prio_sched_data *)sch->data; @@ -242,6 +292,12 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid) return band; } +static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) +{ + return prio_get(sch, classid); +} + + static void prio_put(struct Qdisc *q, unsigned long cl) { return; @@ -267,12 +323,15 @@ static int prio_delete(struct Qdisc *sch, unsigned long cl) #ifdef CONFIG_RTNETLINK -static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) +static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, + struct tcmsg *tcm) { struct prio_sched_data *q = (struct prio_sched_data *)sch->data; if (cl - 1 > q->bands) return -ENOENT; + if (q->queues[cl-1]) + tcm->tcm_info = q->queues[cl-1]->handle; return 0; } #endif @@ -310,6 +369,8 @@ static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) static struct Qdisc_class_ops prio_class_ops = { prio_graft, + prio_leaf, + prio_get, prio_put, prio_change, @@ -317,7 +378,7 @@ static struct Qdisc_class_ops prio_class_ops = prio_walk, prio_find_tcf, - prio_get, + prio_bind, prio_put, #ifdef CONFIG_RTNETLINK @@ -340,6 +401,7 @@ struct Qdisc_ops prio_qdisc_ops = prio_init, prio_reset, prio_destroy, + prio_tune, #ifdef CONFIG_RTNETLINK prio_dump, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index eac678b83..30b537b53 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -193,8 +193,8 @@ red_enqueue(struct sk_buff *skb, struct Qdisc* sch) } if (q->qave < q->qth_min) { -enqueue: q->qcount = -1; +enqueue: if (sch->stats.backlog <= q->limit) { __skb_queue_tail(&sch->q, skb); sch->stats.backlog += skb->len; @@ -231,6 +231,7 @@ drop: */ if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR) goto enqueue; +printk(KERN_DEBUG "Drop %d\n", q->qcount); q->qcount = 0; q->qR = net_random()&q->Rmask; sch->stats.overlimits++; @@ -375,6 +376,7 @@ struct Qdisc_ops red_qdisc_ops = red_init, red_reset, red_destroy, + NULL /* red_change */, #ifdef CONFIG_RTNETLINK red_dump, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c6f43badc..8baf254eb 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -380,6 +379,27 @@ static void sfq_perturbation(unsigned long arg) } } +static int sfq_change(struct Qdisc *sch, struct rtattr *opt) +{ + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + struct tc_sfq_qopt *ctl = RTA_DATA(opt); + + if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + return -EINVAL; + + start_bh_atomic(); + q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->perturb_period = ctl->perturb_period*HZ; + + del_timer(&q->perturb_timer); + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } + end_bh_atomic(); + return 0; +} + static int sfq_init(struct Qdisc *sch, struct rtattr *opt) { struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; @@ -399,24 +419,15 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt) q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = sch->dev->mtu; + q->quantum = psched_mtu(sch->dev); q->perturb_period = 0; - if (sch->dev->hard_header) - q->quantum += sch->dev->hard_header_len; } else { - struct tc_sfq_qopt *ctl = RTA_DATA(opt); - if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) - return -EINVAL; - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); - q->perturb_period = ctl->perturb_period*HZ; - /* The rest is compiled in */ + int err = sfq_change(sch, opt); + if (err) + return err; } for (i=0; i<SFQ_DEPTH; i++) sfq_link(q, i); - if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); - } MOD_INC_USE_COUNT; return 0; } @@ -467,6 +478,7 @@ struct Qdisc_ops sfq_qdisc_ops = sfq_init, sfq_reset, sfq_destroy, + NULL, /* sfq_change */ #ifdef CONFIG_RTNETLINK sfq_dump, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83d6da87c..a4d13b628 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -114,6 +114,7 @@ struct tbf_sched_data u32 limit; /* Maximal length of backlog: bytes */ u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ u32 mtu; + u32 max_size; struct qdisc_rate_table *R_tab; struct qdisc_rate_table *P_tab; @@ -132,6 +133,8 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + if (skb->len > q->max_size) + goto drop; __skb_queue_tail(&sch->q, skb); if ((sch->stats.backlog += skb->len) <= q->limit) { sch->stats.bytes += skb->len; @@ -145,6 +148,8 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) __skb_unlink(skb, &sch->q); sch->stats.backlog -= skb->len; + +drop: sch->stats.drops++; #ifdef CONFIG_NET_CLS_POLICE if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) @@ -180,6 +185,7 @@ static void tbf_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; + sch->flags &= ~TCQ_F_THROTTLED; qdisc_wakeup(sch->dev); } @@ -216,6 +222,7 @@ tbf_dequeue(struct Qdisc* sch) q->tokens = toks; q->ptokens = ptoks; sch->stats.backlog -= skb->len; + sch->flags &= ~TCQ_F_THROTTLED; return skb; } @@ -238,10 +245,11 @@ tbf_dequeue(struct Qdisc* sch) Really, if we split the flow into independent subflows, it would be a very good solution. This is the main idea of all FQ algorithms - (cf. CSZ, HPFQ, HFCS) + (cf. CSZ, HPFQ, HFSC) */ __skb_queue_head(&sch->q, skb); + sch->flags |= TCQ_F_THROTTLED; sch->stats.overlimits++; } return NULL; @@ -258,53 +266,86 @@ tbf_reset(struct Qdisc* sch) PSCHED_GET_TIME(q->t_c); q->tokens = q->buffer; q->ptokens = q->mtu; + sch->flags &= ~TCQ_F_THROTTLED; del_timer(&q->wd_timer); } -static int tbf_init(struct Qdisc* sch, struct rtattr *opt) +static int tbf_change(struct Qdisc* sch, struct rtattr *opt) { + int err = -EINVAL; struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; struct rtattr *tb[TCA_TBF_PTAB]; struct tc_tbf_qopt *qopt; + struct qdisc_rate_table *rtab = NULL; + struct qdisc_rate_table *ptab = NULL; + int max_size; - MOD_INC_USE_COUNT; - - if (opt == NULL || - rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) || + if (rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) || tb[TCA_TBF_PARMS-1] == NULL || - RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) { - MOD_DEC_USE_COUNT; - return -EINVAL; - } + RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) + goto done; qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]); - q->R_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]); - if (q->R_tab == NULL) { - MOD_DEC_USE_COUNT; - return -EINVAL; - } + rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]); + if (rtab == NULL) + goto done; if (qopt->peakrate.rate) { - q->P_tab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_PTAB-1]); - if (q->P_tab == NULL) { - MOD_DEC_USE_COUNT; - qdisc_put_rtab(q->R_tab); - return -EINVAL; + if (qopt->peakrate.rate > qopt->rate.rate) + ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]); + if (ptab == NULL) + goto done; + } + + max_size = psched_mtu(sch->dev); + if (ptab) { + int n = max_size>>qopt->peakrate.cell_log; + while (n>0 && ptab->data[n-1] > qopt->mtu) { + max_size -= (1<<qopt->peakrate.cell_log); + n--; } } + if (rtab->data[max_size>>qopt->rate.cell_log] > qopt->buffer) + goto done; - PSCHED_GET_TIME(q->t_c); - init_timer(&q->wd_timer); - q->wd_timer.function = tbf_watchdog; - q->wd_timer.data = (unsigned long)sch; + start_bh_atomic(); q->limit = qopt->limit; q->mtu = qopt->mtu; - if (q->mtu == 0) - q->mtu = psched_mtu(sch->dev); + q->max_size = max_size; q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; - return 0; + rtab = xchg(&q->R_tab, rtab); + ptab = xchg(&q->P_tab, ptab); + end_bh_atomic(); + err = 0; +done: + if (rtab) + qdisc_put_rtab(rtab); + if (ptab) + qdisc_put_rtab(ptab); + return err; +} + +static int tbf_init(struct Qdisc* sch, struct rtattr *opt) +{ + int err; + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + + if (opt == NULL) + return -EINVAL; + + MOD_INC_USE_COUNT; + + PSCHED_GET_TIME(q->t_c); + init_timer(&q->wd_timer); + q->wd_timer.function = tbf_watchdog; + q->wd_timer.data = (unsigned long)sch; + + if ((err = tbf_change(sch, opt)) != 0) { + MOD_DEC_USE_COUNT; + } + return err; } static void tbf_destroy(struct Qdisc *sch) @@ -328,10 +369,10 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb->tail; struct rtattr *rta; struct tc_tbf_qopt opt; - + rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - + opt.limit = q->limit; opt.rate = q->R_tab->rate; if (q->P_tab) @@ -366,6 +407,7 @@ struct Qdisc_ops tbf_qdisc_ops = tbf_init, tbf_reset, tbf_destroy, + tbf_change, #ifdef CONFIG_RTNETLINK tbf_dump, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 212e6f696..66040d5e9 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -444,6 +444,7 @@ static struct teql_master the_master = { teql_qdisc_init, teql_reset, teql_destroy, + NULL, },}; |