diff options
author | David Ahern <dsahern@gmail.com> | 2018-01-21 11:20:56 -0800 |
---|---|---|
committer | David Ahern <dsahern@gmail.com> | 2018-01-21 11:20:56 -0800 |
commit | c2e368df0a12eef89dbb1dd06eda9a6bbb478dc4 (patch) | |
tree | 474e991a21c3221f47ef7bd4440f86bbd59331c0 | |
parent | 40cf5b095965f5bda81d517020c4107fa74023e6 (diff) | |
parent | 063463efd7f0d91b7372b089a7b7aff7fc9ac0f6 (diff) |
Merge branch 'shared_block' into net-next
Jiri Pirko says:
====================
From: Jiri Pirko <jiri@mellanox.com>
Kernel allows to share all filters between qdiscs with use
of shared block.
Example:
block number 22. "22" is just an identification:
$ tc qdisc add dev ens7 ingress_block 22 ingress
^^^^^^^^^^^^^^^^
$ tc qdisc add dev ens8 ingress_block 22 ingress
^^^^^^^^^^^^^^^^
If we don't specify "block" command line option, no shared block would
be created:
$ tc qdisc add dev ens9 ingress
Now if we list the qdiscs, we will see the block index in the output:
$ tc qdisc
qdisc ingress ffff: dev ens7 parent ffff:fff1 ingress_block 22
qdisc ingress ffff: dev ens8 parent ffff:fff1 ingress_block 22
qdisc ingress ffff: dev ens9 parent ffff:fff1
To make is more visual, the situation looks like this:
ens7 ingress qdisc ens7 ingress qdisc
| |
| |
+----------> block 22 <----------+
Unlimited number of qdiscs may share the same block.
Block sharing is also supported for clsact qdisc:
$ tc qdisc add dev ens10 ingress_block 23 egress_block 24 clsact
$ tc qdisc show dev ens10
qdisc clsact ffff: dev ens10 parent ffff:fff1 ingress_block 23 egress_block 24
We can add filter using the block index:
$ tc filter add block 22 protocol ip pref 25 flower dst_ip 192.168.0.0/16 action drop
Note we cannot use the qdisc for filter manipulations of shared blocks:
$ tc filter add dev ens8 ingress protocol ip pref 1 flower dst_ip 192.168.100.2 action drop
Error: This filter block is shared. Please use the block index to manipulate the filters.
We will see the same output if we list filters for ingress qdisc of
ens7 and ens8, also for the block 22:
$ tc filter show block 22
filter protocol ip pref 25 flower chain 0
filter protocol ip pref 25 flower chain 0 handle 0x1
...
$ tc filter show dev ens7 ingress
filter block 22 protocol ip pref 25 flower chain 0
filter block 22 protocol ip pref 25 flower chain 0 handle 0x1
...
$ tc filter show dev ens8 ingress
filter block 22 protocol ip pref 25 flower chain 0
filter block 22 protocol ip pref 25 flower chain 0 handle 0x1
...
====================
Signed-off-by: David Ahern <dsahern@gmail.com>
-rw-r--r-- | man/man8/tc.8 | 24 | ||||
-rw-r--r-- | tc/tc_filter.c | 105 | ||||
-rw-r--r-- | tc/tc_qdisc.c | 97 | ||||
-rw-r--r-- | tc/tc_util.h | 2 |
4 files changed, 210 insertions, 18 deletions
diff --git a/man/man8/tc.8 b/man/man8/tc.8 index ff071b33..5ffea373 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -11,7 +11,11 @@ tc \- show / manipulate traffic control settings \fIqdisc-id\fR .B | root ] .B [ handle -\fIqdisc-id\fR ] qdisc +\fIqdisc-id\fR ] +.B [ ingress_block +\fIBLOCK_INDEX\fR ] +.B [ egress_block +\fIBLOCK_INDEX\fR ] qdisc [ qdisc specific parameters ] .P @@ -43,6 +47,19 @@ tc \- show / manipulate traffic control settings .B tc .RI "[ " OPTIONS " ]" +.B filter [ add | change | replace | delete | get ] block +\fIBLOCK_INDEX\fR +.B [ handle \fIfilter-id\fR ] +.B protocol +\fIprotocol\fR +.B prio +\fIpriority\fR filtertype +[ filtertype specific parameters ] +.B flowid +\fIflow-id\fR + +.B tc +.RI "[ " OPTIONS " ]" .RI "[ " FORMAT " ]" .B qdisc show [ dev \fIDEV\fR @@ -58,6 +75,11 @@ tc \- show / manipulate traffic control settings .RI "[ " OPTIONS " ]" .B filter show dev \fIDEV\fR +.P +.B tc +.RI "[ " OPTIONS " ]" +.B filter show block +\fIBLOCK_INDEX\fR .P .ti 8 diff --git a/tc/tc_filter.c b/tc/tc_filter.c index a86785d6..5c31a4ce 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -28,14 +28,17 @@ static void usage(void) { fprintf(stderr, - "Usage: tc filter [ add | del | change | replace | show ] dev STRING\n" - "Usage: tc filter get dev STRING parent CLASSID protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" + "Usage: tc filter [ add | del | change | replace | show ] [ dev STRING ]\n" + " tc filter [ add | del | change | replace | show ] [ block BLOCK_INDEX ]\n" + " tc filter get dev STRING parent CLASSID protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" + " tc filter get block BLOCK_INDEX protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" " [ pref PRIO ] protocol PROTO [ chain CHAIN_INDEX ]\n" " [ estimator INTERVAL TIME_CONSTANT ]\n" " [ root | ingress | egress | parent CLASSID ]\n" " [ handle FILTERID ] [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n" "\n" " tc filter show [ dev STRING ] [ root | ingress | egress | parent CLASSID ]\n" + " tc filter show [ block BLOCK_INDEX ]\n" "Where:\n" "FILTER_TYPE := { rsvp | u32 | bpf | fw | route | etc. }\n" "FILTERID := ... format depends on classifier, see there\n" @@ -58,6 +61,7 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, int chain_index_set = 0; char d[IFNAMSIZ] = {}; int protocol_set = 0; + __u32 block_index = 0; char *fhandle = NULL; __u32 protocol = 0; __u32 chain_index; @@ -89,7 +93,21 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, NEXT_ARG(); if (d[0]) duparg("dev", *argv); + if (block_index) { + fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); + return -1; + } strncpy(d, *argv, sizeof(d)-1); + } else if (matches(*argv, "block") == 0) { + NEXT_ARG(); + if (block_index) + duparg("block", *argv); + if (d[0]) { + fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); + return -1; + } + if (get_u32(&block_index, *argv, 0) || !block_index) + invarg("invalid block index value", *argv); } else if (strcmp(*argv, "root") == 0) { if (req->t.tcm_parent) { fprintf(stderr, @@ -184,6 +202,9 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, fprintf(stderr, "Cannot find device \"%s\"\n", d); return 1; } + } else if (block_index) { + req->t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; + req->t.tcm_block_index = block_index; } if (q) { @@ -228,6 +249,7 @@ static __u32 filter_prio; static __u32 filter_protocol; static __u32 filter_chain_index; static int filter_chain_index_set; +static __u32 filter_block_index; __u16 f_proto; int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) @@ -274,20 +296,27 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) print_bool(PRINT_ANY, "added", "added ", true); print_string(PRINT_FP, NULL, "filter ", NULL); - if (!filter_ifindex || filter_ifindex != t->tcm_ifindex) - print_string(PRINT_ANY, "dev", "dev %s ", - ll_index_to_name(t->tcm_ifindex)); - - if (!filter_parent || filter_parent != t->tcm_parent) { - if (t->tcm_parent == TC_H_ROOT) - print_bool(PRINT_ANY, "root", "root ", true); - else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)) - print_bool(PRINT_ANY, "ingress", "ingress ", true); - else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)) - print_bool(PRINT_ANY, "egress", "egress ", true); - else { - print_tc_classid(abuf, sizeof(abuf), t->tcm_parent); - print_string(PRINT_ANY, "parent", "parent %s ", abuf); + if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + if (!filter_block_index || + filter_block_index != t->tcm_block_index) + print_uint(PRINT_ANY, "block", "block %u ", + t->tcm_block_index); + } else { + if (!filter_ifindex || filter_ifindex != t->tcm_ifindex) + print_string(PRINT_ANY, "dev", "dev %s ", + ll_index_to_name(t->tcm_ifindex)); + + if (!filter_parent || filter_parent != t->tcm_parent) { + if (t->tcm_parent == TC_H_ROOT) + print_bool(PRINT_ANY, "root", "root ", true); + else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)) + print_bool(PRINT_ANY, "ingress", "ingress ", true); + else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)) + print_bool(PRINT_ANY, "egress", "egress ", true); + else { + print_tc_classid(abuf, sizeof(abuf), t->tcm_parent); + print_string(PRINT_ANY, "parent", "parent %s ", abuf); + } } } @@ -367,6 +396,7 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) int protocol_set = 0; __u32 chain_index; int chain_index_set = 0; + __u32 block_index = 0; __u32 parent_handle = 0; char *fhandle = NULL; char d[IFNAMSIZ] = {}; @@ -377,7 +407,21 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) NEXT_ARG(); if (d[0]) duparg("dev", *argv); + if (block_index) { + fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); + return -1; + } strncpy(d, *argv, sizeof(d)-1); + } else if (matches(*argv, "block") == 0) { + NEXT_ARG(); + if (block_index) + duparg("block", *argv); + if (d[0]) { + fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); + return -1; + } + if (get_u32(&block_index, *argv, 0) || !block_index) + invarg("invalid block index value", *argv); } else if (strcmp(*argv, "root") == 0) { if (req.t.tcm_parent) { fprintf(stderr, @@ -491,8 +535,12 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) return 1; } filter_ifindex = req.t.tcm_ifindex; + } else if (block_index) { + req.t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; + req.t.tcm_block_index = block_index; + filter_block_index = block_index; } else { - fprintf(stderr, "Must specify netdevice \"dev\"\n"); + fprintf(stderr, "Must specify netdevice \"dev\" or block index \"block\"\n"); return -1; } @@ -542,6 +590,7 @@ static int tc_filter_list(int argc, char **argv) __u32 prio = 0; __u32 protocol = 0; __u32 chain_index; + __u32 block_index = 0; char *fhandle = NULL; while (argc > 0) { @@ -549,7 +598,21 @@ static int tc_filter_list(int argc, char **argv) NEXT_ARG(); if (d[0]) duparg("dev", *argv); + if (block_index) { + fprintf(stderr, "Error: \"dev\" cannot be used in the same time as \"block\"\n"); + return -1; + } strncpy(d, *argv, sizeof(d)-1); + } else if (matches(*argv, "block") == 0) { + NEXT_ARG(); + if (block_index) + duparg("block", *argv); + if (d[0]) { + fprintf(stderr, "Error: \"block\" cannot be used in the same time as \"dev\"\n"); + return -1; + } + if (get_u32(&block_index, *argv, 0) || !block_index) + invarg("invalid block index value", *argv); } else if (strcmp(*argv, "root") == 0) { if (req.t.tcm_parent) { fprintf(stderr, @@ -638,6 +701,14 @@ static int tc_filter_list(int argc, char **argv) return 1; } filter_ifindex = req.t.tcm_ifindex; + } else if (block_index) { + if (!tc_qdisc_block_exists(block_index)) { + fprintf(stderr, "Cannot find block \"%u\"\n", block_index); + return 1; + } + req.t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; + req.t.tcm_block_index = block_index; + filter_block_index = block_index; } if (filter_chain_index_set) diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 70279b9d..8cc4b73d 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -31,6 +31,7 @@ static int usage(void) fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | clsact | parent CLASSID ]\n"); fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n"); fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n"); + fprintf(stderr, " [ ingress_block BLOCK_INDEX ] [ egress_block BLOCK_INDEX ]\n"); fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n"); fprintf(stderr, "\n"); fprintf(stderr, " tc qdisc show [ dev STRING ] [ ingress | clsact ] [ invisible ]\n"); @@ -61,6 +62,8 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) .n.nlmsg_type = cmd, .t.tcm_family = AF_UNSPEC, }; + __u32 ingress_block = 0; + __u32 egress_block = 0; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { @@ -121,6 +124,14 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) if (parse_size_table(&argc, &argv, &stab.szopts) < 0) return -1; continue; + } else if (matches(*argv, "ingress_block") == 0) { + NEXT_ARG(); + if (get_u32(&ingress_block, *argv, 0) || !ingress_block) + invarg("invalid ingress block index value", *argv); + } else if (matches(*argv, "egress_block") == 0) { + NEXT_ARG(); + if (get_u32(&egress_block, *argv, 0) || !egress_block) + invarg("invalid egress block index value", *argv); } else if (matches(*argv, "help") == 0) { usage(); } else { @@ -138,6 +149,13 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) if (est.ewma_log) addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); + if (ingress_block) + addattr32(&req.n, sizeof(req), + TCA_INGRESS_BLOCK, ingress_block); + if (egress_block) + addattr32(&req.n, sizeof(req), + TCA_EGRESS_BLOCK, egress_block); + if (q) { if (q->parse_qopt) { if (q->parse_qopt(q, argc, argv, &req.n, d)) @@ -270,6 +288,24 @@ int print_qdisc(const struct sockaddr_nl *who, (rta_getattr_u8(tb[TCA_HW_OFFLOAD]))) print_bool(PRINT_ANY, "offloaded", "offloaded ", true); + if (tb[TCA_INGRESS_BLOCK] && + RTA_PAYLOAD(tb[TCA_INGRESS_BLOCK]) >= sizeof(__u32)) { + __u32 block = rta_getattr_u32(tb[TCA_INGRESS_BLOCK]); + + if (block) + print_uint(PRINT_ANY, "ingress_block", + "ingress_block %u ", block); + } + + if (tb[TCA_EGRESS_BLOCK] && + RTA_PAYLOAD(tb[TCA_EGRESS_BLOCK]) >= sizeof(__u32)) { + __u32 block = rta_getattr_u32(tb[TCA_EGRESS_BLOCK]); + + if (block) + print_uint(PRINT_ANY, "egress_block", + "egress_block %u ", block); + } + /* pfifo_fast is generic enough to warrant the hardcoding --JHS */ if (strcmp("pfifo_fast", RTA_DATA(tb[TCA_KIND])) == 0) q = get_qdisc_kind("prio"); @@ -412,3 +448,64 @@ int do_qdisc(int argc, char **argv) fprintf(stderr, "Command \"%s\" is unknown, try \"tc qdisc help\".\n", *argv); return -1; } + +struct tc_qdisc_block_exists_ctx { + __u32 block_index; + bool found; +}; + +static int tc_qdisc_block_exists_cb(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + struct tc_qdisc_block_exists_ctx *ctx = arg; + struct tcmsg *t = NLMSG_DATA(n); + struct rtattr *tb[TCA_MAX+1]; + int len = n->nlmsg_len; + + if (n->nlmsg_type != RTM_NEWQDISC) + return 0; + + len -= NLMSG_LENGTH(sizeof(*t)); + if (len < 0) + return -1; + + parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len); + + if (tb[TCA_KIND] == NULL) + return -1; + + if (tb[TCA_INGRESS_BLOCK] && + RTA_PAYLOAD(tb[TCA_INGRESS_BLOCK]) >= sizeof(__u32)) { + __u32 block = rta_getattr_u32(tb[TCA_INGRESS_BLOCK]); + + if (block == ctx->block_index) + ctx->found = true; + } + + if (tb[TCA_EGRESS_BLOCK] && + RTA_PAYLOAD(tb[TCA_EGRESS_BLOCK]) >= sizeof(__u32)) { + __u32 block = rta_getattr_u32(tb[TCA_EGRESS_BLOCK]); + + if (block == ctx->block_index) + ctx->found = true; + } + return 0; +} + +bool tc_qdisc_block_exists(__u32 block_index) +{ + struct tc_qdisc_block_exists_ctx ctx = { .block_index = block_index }; + struct tcmsg t = { .tcm_family = AF_UNSPEC }; + + if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) { + perror("Cannot send dump request"); + return false; + } + + if (rtnl_dump_filter(&rth, tc_qdisc_block_exists_cb, &ctx) < 0) { + perror("Dump terminated\n"); + return false; + } + + return ctx.found; +} diff --git a/tc/tc_util.h b/tc/tc_util.h index e354765f..cd2ff596 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -134,4 +134,6 @@ void cls_names_uninit(void); int action_a2n(char *arg, int *result, bool allow_num); +bool tc_qdisc_block_exists(__u32 block_index); + #endif |