diff options
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/br.h | 95 | ||||
-rw-r--r-- | include/net/dst.h | 5 | ||||
-rw-r--r-- | include/net/ip.h | 22 | ||||
-rw-r--r-- | include/net/ip_fib.h | 7 | ||||
-rw-r--r-- | include/net/ip_masq.h | 5 | ||||
-rw-r--r-- | include/net/ipx.h | 5 | ||||
-rw-r--r-- | include/net/netrom.h | 7 | ||||
-rw-r--r-- | include/net/pkt_cls.h | 83 | ||||
-rw-r--r-- | include/net/pkt_sched.h | 237 | ||||
-rw-r--r-- | include/net/rose.h | 12 | ||||
-rw-r--r-- | include/net/snmp.h | 8 | ||||
-rw-r--r-- | include/net/sock.h | 157 | ||||
-rw-r--r-- | include/net/spx.h | 113 | ||||
-rw-r--r-- | include/net/spxcall.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 189 |
15 files changed, 715 insertions, 232 deletions
diff --git a/include/net/br.h b/include/net/br.h index 3b4ccf094..8f47d3c12 100644 --- a/include/net/br.h +++ b/include/net/br.h @@ -27,7 +27,7 @@ /* * We time out our entries in the FDB after this many seconds. */ -#define FDB_TIMEOUT 300 +#define FDB_TIMEOUT 20 /* JRP: 20s as NSC bridge code, was 300 for Linux */ /* * the following defines are the initial values used when the @@ -40,6 +40,10 @@ #define BRIDGE_FORWARD_DELAY 15 #define HOLD_TIME 1 +/* broacast/multicast storm limitation. This per source. */ +#define MAX_MCAST_PER_PERIOD 4 +#define MCAST_HOLD_TIME 10 /* in jiffies unit (10ms increment) */ + #define Default_path_cost 10 /* @@ -71,13 +75,25 @@ typedef struct { #define BRIDGE_ID_ULA bi.p_u.ula #define BRIDGE_ID bi.id +/* JRP: on the network the flags field is between "type" and "root_id" + * this is unfortunated! To make the code portable to a RISC machine + * the pdus are now massaged a little bit for processing + */ +#define TOPOLOGY_CHANGE 0x01 +#define TOPOLOGY_CHANGE_ACK 0x80 +#define BRIDGE_BPDU_8021_CONFIG_SIZE 35 /* real size */ +#define BRIDGE_BPDU_8021_CONFIG_FLAG_OFFSET 4 +#define BRIDGE_BPDU_8021_PROTOCOL_ID 0 +#define BRIDGE_BPDU_8021_PROTOCOL_VERSION_ID 0 +#define BRIDGE_LLC1_HS 3 +#define BRIDGE_LLC1_DSAP 0x42 +#define BRIDGE_LLC1_SSAP 0x42 +#define BRIDGE_LLC1_CTRL 0x03 + typedef struct { unsigned short protocol_id; unsigned char protocol_version_id; unsigned char type; - unsigned char flags; -#define TOPOLOGY_CHANGE 0x01 -#define TOPOLOGY_CHANGE_ACK 0x80 bridge_id_t root_id; /* (4.5.1.1) */ unsigned int root_path_cost; /* (4.5.1.2) */ bridge_id_t bridge_id; /* (4.5.1.3) */ @@ -86,8 +102,23 @@ typedef struct { unsigned short max_age; /* (4.5.1.6) */ unsigned short hello_time; /* (4.5.1.7) */ unsigned short forward_delay; /* (4.5.1.8) */ + unsigned char top_change_ack; + unsigned char top_change; } Config_bpdu; +#ifdef __LITTLE_ENDIAN +#define config_bpdu_hton(config_bpdu) \ + (config_bpdu)->root_path_cost = htonl((config_bpdu)->root_path_cost); \ + (config_bpdu)->port_id = htons((config_bpdu)->port_id); \ + (config_bpdu)->message_age = htons((config_bpdu)->message_age); \ + (config_bpdu)->max_age = htons((config_bpdu)->max_age); \ + (config_bpdu)->hello_time = htons((config_bpdu)->hello_time); \ + (config_bpdu)->forward_delay = htons((config_bpdu)->forward_delay); +#else +#define config_bpdu_hton(config_bpdu) +#endif +#define config_bpdu_ntoh config_bpdu_hton + /** Topology Change Notification BPDU Parameters (4.5.2) **/ @@ -112,12 +143,10 @@ typedef struct { unsigned short bridge_max_age; /* (4.5.3.8) */ unsigned short bridge_hello_time; /* (4.5.3.9) */ unsigned short bridge_forward_delay; /* (4.5.3.10) */ - unsigned int topology_change_detected; /* (4.5.3.11) */ - unsigned int topology_change; /* (4.5.3.12) */ + unsigned int top_change_detected; /* (4.5.3.11) */ + unsigned int top_change; /* (4.5.3.12) */ unsigned short topology_change_time; /* (4.5.3.13) */ unsigned short hold_time; /* (4.5.3.14) */ - unsigned int top_change; - unsigned int top_change_detected; } Bridge_data; /** Port Parameters (4.5.5) **/ @@ -149,8 +178,11 @@ struct fdb { unsigned char pad[2]; unsigned short port; unsigned int timer; - unsigned int flags; + unsigned short flags; #define FDB_ENT_VALID 0x01 + unsigned short mcast_count; + unsigned int mcast_timer; /* oldest xxxxxcast */ + /* AVL tree of all addresses, sorted by address */ short fdb_avl_height; struct fdb *fdb_avl_left; @@ -159,6 +191,19 @@ struct fdb { struct fdb *fdb_next; }; +/* data returned on BRCMD_DISPLAY_FDB */ +struct fdb_info { + unsigned char ula[6]; + unsigned char port; + unsigned char flags; + unsigned int timer; +}; +struct fdb_info_hdr { + int copied; /* nb of entries copied to user */ + int not_copied; /* when user buffer is too small */ + int cmd_time; +}; + #define IS_BRIDGED 0x2e @@ -169,6 +214,34 @@ struct fdb { #define BR_ACCEPT 1 #define BR_REJECT 0 +/* JRP: extra statistics for debug */ +typedef struct { + /* br_receive_frame counters */ + int port_disable_up_stack; + int rcv_bpdu; + int notForwarding; + int forwarding_up_stack; + int unknown_state; + + /* br_tx_frame counters */ + int port_disable; + int port_not_disable; + + /* br_forward counters */ + int local_multicast; + int forwarded_multicast; /* up stack as well */ + int flood_unicast; + int aged_flood_unicast; + int forwarded_unicast; + int forwarded_unicast_up_stack; + int forwarded_ip_up_stack; + int forwarded_ip_up_stack_lie; /* received on alternate device */ + int arp_for_local_mac; + int drop_same_port; + int drop_same_port_aged; + int drop_multicast; +} br_stats_counter; + struct br_stat { unsigned int flags; Bridge_data bridge_data; @@ -178,6 +251,7 @@ struct br_stat { unsigned short protocols[BR_MAX_PROTOCOLS]; unsigned short prot_id[BR_MAX_PROT_STATS]; /* Protocol encountered */ unsigned int prot_counter[BR_MAX_PROT_STATS]; /* How many packets ? */ + br_stats_counter packet_cnts; }; /* defined flags for br_stat.flags */ @@ -215,9 +289,10 @@ int br_receive_frame(struct sk_buff *skb); /* 3.5 */ int br_tx_frame(struct sk_buff *skb); int br_ioctl(unsigned int cmd, void *arg); int br_protocol_ok(unsigned short protocol); +void requeue_fdb(struct fdb *node, int new_port); struct fdb *br_avl_find_addr(unsigned char addr[6]); -int br_avl_insert (struct fdb * new_node); +struct fdb *br_avl_insert (struct fdb * new_node); /* externs */ diff --git a/include/net/dst.h b/include/net/dst.h index 0d18f60d2..a73a2d045 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -8,6 +8,7 @@ #ifndef _NET_DST_H #define _NET_DST_H +#include <linux/config.h> #include <net/neighbour.h> /* @@ -51,6 +52,10 @@ struct dst_entry int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); +#ifdef CONFIG_NET_CLS_ROUTE + __u32 tclassid; +#endif + struct dst_ops *ops; char info[0]; diff --git a/include/net/ip.h b/include/net/ip.h index 9b536ddf7..add85700b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -100,7 +100,6 @@ extern int ip_acct_output(struct sk_buff *skb); #define ip_acct_output dev_queue_xmit #endif extern void ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*)); -extern struct sk_buff * ip_reply(struct sk_buff *skb, int payload); extern int ip_do_nat(struct sk_buff *skb); extern void ip_send_check(struct iphdr *ip); extern int ip_id_count; @@ -117,6 +116,18 @@ extern int ip_build_xmit(struct sock *sk, struct rtable *rt, int flags); + +struct ip_reply_arg { + struct iovec iov[2]; + int n_iov; /* redundant */ + u32 csum; + int csumoffset; /* u16 offset of csum in iov[0].iov_base */ + /* -1 if not needed */ +}; + +void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, + unsigned int len); + extern int __ip_finish_output(struct sk_buff *skb); struct ipv4_config @@ -128,6 +139,7 @@ struct ipv4_config extern struct ipv4_config ipv4_config; extern struct ip_mib ip_statistics; +extern struct linux_mib net_statistics; extern int sysctl_local_port_range[2]; @@ -141,15 +153,7 @@ extern __inline__ int ip_finish_output(struct sk_buff *skb) skb->protocol = __constant_htons(ETH_P_IP); if (hh) { -#ifdef __alpha__ - /* Alpha has disguisting memcpy. Help it. */ - u64 *aligned_hdr = (u64*)(skb->data - 16); - u64 *aligned_hdr0 = hh->hh_data; - aligned_hdr[0] = aligned_hdr0[0]; - aligned_hdr[1] = aligned_hdr0[1]; -#else memcpy(skb->data - 16, hh->hh_data, 16); -#endif skb_push(skb, dev->hard_header_len); return hh->hh_output(skb); } else if (dst->neighbour) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index d725e78d4..f96fa618c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -50,6 +50,9 @@ struct fib_nh int nh_weight; int nh_power; #endif +#ifdef CONFIG_NET_CLS_ROUTE + __u32 nh_tclassid; +#endif int nh_oif; u32 nh_gw; }; @@ -229,9 +232,11 @@ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res); +#ifdef CONFIG_NET_CLS_ROUTE +extern u32 fib_rules_tclass(struct fib_result *res); +#endif extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags); extern void fib_rules_init(void); #endif - #endif _NET_FIB_H diff --git a/include/net/ip_masq.h b/include/net/ip_masq.h index 0faa88336..65282bfcb 100644 --- a/include/net/ip_masq.h +++ b/include/net/ip_masq.h @@ -24,11 +24,6 @@ * I used an extra 4K port-space */ -/* - * Linux ports don't normally get allocated above 32K. - * I used an extra 4K port-space - */ - #define PORT_MASQ_BEGIN 61000 #define PORT_MASQ_END (PORT_MASQ_BEGIN+4096) diff --git a/include/net/ipx.h b/include/net/ipx.h index 52f09384e..2152e388b 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -34,7 +34,7 @@ struct ipxhdr #define IPX_TYPE_UNKNOWN 0x00 #define IPX_TYPE_RIP 0x01 /* may also be 0 */ #define IPX_TYPE_SAP 0x04 /* may also be 0 */ -#define IPX_TYPE_SPX 0x05 /* Not yet implemented */ +#define IPX_TYPE_SPX 0x05 /* SPX protocol */ #define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */ #define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast [Not supported] */ ipx_address ipx_dest __attribute__ ((packed)); @@ -76,4 +76,7 @@ typedef struct ipx_route { #define IPX_MIN_EPHEMERAL_SOCKET 0x4000 #define IPX_MAX_EPHEMERAL_SOCKET 0x7fff +extern int ipx_register_spx(struct proto_ops **, struct net_proto_family *); +extern int ipx_unregister_spx(void); + #endif /* def _NET_INET_IPX_H_ */ diff --git a/include/net/netrom.h b/include/net/netrom.h index cc9fc842c..356a7d270 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -126,6 +126,11 @@ extern int nr_init(struct device *); /* nr_in.c */ extern int nr_process_rx_frame(struct sock *, struct sk_buff *); +/* nr_loopback.c */ +extern void nr_loopback_init(void); +extern void nr_loopback_clear(void); +extern int nr_loopback_queue(struct sk_buff *); + /* nr_out.c */ extern void nr_output(struct sock *, struct sk_buff *); extern void nr_send_nak_frame(struct sock *); @@ -153,7 +158,7 @@ extern void nr_requeue_frames(struct sock *); extern int nr_validate_nr(struct sock *, unsigned short); extern int nr_in_rx_window(struct sock *, unsigned short); extern void nr_write_internal(struct sock *, int); -extern void nr_transmit_dm(struct sk_buff *); +extern void nr_transmit_refusal(struct sk_buff *, int); extern void nr_disconnect(struct sock *, int); /* nr_timer.c */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h new file mode 100644 index 000000000..0d3c25e25 --- /dev/null +++ b/include/net/pkt_cls.h @@ -0,0 +1,83 @@ +#ifndef __NET_PKT_CLS_H +#define __NET_PKT_CLS_H + + +#include <linux/pkt_cls.h> + +struct rtattr; +struct tcmsg; + +/* Basic packet classifier frontend definitions. */ + +struct tcf_result +{ + unsigned long class; + u32 classid; +}; + +struct tcf_proto +{ + /* Fast access part */ + struct tcf_proto *next; + void *root; + int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *); + u32 protocol; + + /* All the rest */ + u32 prio; + u32 classid; + struct Qdisc *q; + void *data; + struct tcf_proto_ops *ops; +}; + +struct tcf_walker +{ + int stop; + int skip; + int count; + int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *); +}; + +struct tcf_proto_ops +{ + struct tcf_proto_ops *next; + char kind[IFNAMSIZ]; + + int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *); + int (*init)(struct tcf_proto*); + void (*destroy)(struct tcf_proto*); + + unsigned long (*get)(struct tcf_proto*, u32 handle); + void (*put)(struct tcf_proto*, unsigned long); + int (*change)(struct tcf_proto*, u32 handle, struct rtattr **, unsigned long *); + int (*delete)(struct tcf_proto*, unsigned long); + void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + + /* rtnetlink specific */ + int (*dump)(struct tcf_proto*, unsigned long, struct sk_buff *skb, struct tcmsg*); +}; + +/* Main classifier routine: scans classifier chain attached + to this qdisc, (optionally) tests for protocol and asks + specific classifiers. + */ + +extern __inline__ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) +{ + int err = 0; + u32 protocol = skb->protocol; + + for ( ; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == __constant_htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) + return err; + } + return -1; +} + +extern int register_tcf_proto_ops(struct tcf_proto_ops *ops); +extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); + +#endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5faad9ad4..de7c7691a 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -1,21 +1,64 @@ #ifndef __NET_PKT_SCHED_H #define __NET_PKT_SCHED_H +#define PSCHED_GETTIMEOFDAY 1 +#define PSCHED_JIFFIES 2 +#define PSCHED_CPU 3 + +#define PSCHED_CLOCK_SOURCE PSCHED_GETTIMEOFDAY + #include <linux/pkt_sched.h> +#include <net/pkt_cls.h> + +struct rtattr; +struct Qdisc; + +struct qdisc_walker +{ + int stop; + int skip; + int count; + int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); +}; + +struct Qdisc_class_ops +{ + /* Child qdisc manipulation */ + int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); + + /* Class manipulation routines */ + unsigned long (*get)(struct Qdisc *, u32 classid); + void (*put)(struct Qdisc *, unsigned long); + int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *); + int (*delete)(struct Qdisc *, unsigned long); + void (*walk)(struct Qdisc *, struct qdisc_walker * arg); + + /* Filter manipulation */ + struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long); + unsigned long (*bind_tcf)(struct Qdisc *, u32 classid); + void (*unbind_tcf)(struct Qdisc *, unsigned long); + + /* rtnetlink specific */ + int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*); +}; struct Qdisc_ops { struct Qdisc_ops *next; + struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; - int refcnt; int priv_size; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *); + + int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); + int (*requeue)(struct sk_buff *, struct Qdisc *); + int (*drop)(struct Qdisc *); + + int (*init)(struct Qdisc *, struct rtattr *arg); void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); - int (*init)(struct Qdisc *, void *arg); - int (*control)(struct Qdisc *, void *); - int (*requeue)(struct sk_buff *skb, struct Qdisc *); + + int (*dump)(struct Qdisc *, struct sk_buff *); }; struct Qdisc_head @@ -30,23 +73,35 @@ struct Qdisc struct Qdisc_head h; int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); struct sk_buff * (*dequeue)(struct Qdisc *dev); + unsigned flags; +#define TCQ_F_DEFAULT 1 +#define TCQ_F_BUILTIN 2 struct Qdisc_ops *ops; - int handle; + struct Qdisc *next; + u32 handle; + u32 classid; struct Qdisc *parent; struct sk_buff_head q; struct device *dev; - unsigned long dropped; - unsigned long tx_last; + + struct tc_stats stats; unsigned long tx_timeo; + unsigned long tx_last; + int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); char data[0]; }; +struct qdisc_rate_table +{ + struct tc_ratespec rate; + u32 data[256]; + struct qdisc_rate_table *next; + int refcnt; +}; -/* Yes, it is slow for [34]86, but we have no choice. - 10 msec resolution is appropriate only for bandwidth < 32Kbit/sec. - RULE: +/* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth Normal IP packet size ~ 512byte, hence: @@ -57,22 +112,96 @@ struct Qdisc 10msec resolution -> <50Kbit/sec. The result: [34]86 is not good choice for QoS router :-( - */ + The things are not so bad, because we may use artifical + clock evaluated by integration of network data flow + in the most critical places. -typedef struct timeval psched_time_t; + Note: we do not use fastgettimeofday. + The reason is that, when it is not the same thing as + gettimeofday, it returns invalid timestamp, which is + not updated, when net_bh is active. -/* On 64bit architecures it would be clever to define: -typedef u64 psched_time_t; - and make all this boring arithmetics directly + So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums + with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86 + and pentiums without rtdsc. + You can use PSCHED_GETTIMEOFDAY on another architectures, + which have fast and precise clock source, but it is too expensive. */ -#ifndef SCHEDULE_ONLY_LOW_BANDWIDTH + +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY + +typedef struct timeval psched_time_t; +typedef long psched_tdiff_t; + #define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) +#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) + +#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */ + +typedef u64 psched_time_t; +typedef long psched_tdiff_t; + +extern psched_time_t psched_time_base; + +#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES + +#define PSCHED_WATCHER + +extern unsigned long psched_time_mark; + +#if HZ == 100 +#define PSCHED_JSCALE 7 +#elif HZ == 1024 +#define PSCHED_JSCALE 10 #else -#define PSCHED_GET_TIME(stamp) ((stamp) = xtime) +#define PSCHED_JSCALE 0 #endif +#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE)) +#define PSCHED_US2JIFFIE(delay) ((delay)>>PSCHED_JSCALE) + +#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU + +extern psched_tdiff_t psched_clock_per_hz; +extern int psched_clock_scale; + +#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) + +#if CPU == 586 || CPU == 686 + +#define PSCHED_GET_TIME(stamp) \ +({ u32 hi, lo; \ + __asm__ __volatile__ (".byte 0x0f,0x31" :"=a" (lo), "=d" (hi)); \ + (stamp) = ((((u64)hi)<<32) + lo)>>psched_clock_scale; \ +}) + +#elif defined (__alpha__) + +#define PSCHED_WATCHER + +extern u32 psched_time_mark; + +#define PSCHED_GET_TIME(stamp) \ +({ u32 __res; \ + __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \ + if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \ + psched_time_mark = __res; \ + (stamp) = (psched_time_base + __res)>>psched_clock_scale; \ +}) + +#else + +#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch. + +#endif /* ARCH */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */ + +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY #define PSCHED_TDIFF(tv1, tv2) \ ({ \ int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ @@ -106,8 +235,6 @@ typedef u64 psched_time_t; __delta; \ }) -#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) - #define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ (tv1).tv_sec <= (tv2).tv_sec) || \ (tv1).tv_sec < (tv2).tv_sec) @@ -127,24 +254,86 @@ typedef u64 psched_time_t; (tv).tv_usec -= 1000000; } \ }) -/* Set/check that undertime is in the "past perfect"; +/* Set/check that time is in the "past perfect"; it depends on concrete representation of system time */ #define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) #define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) +#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) + +#else + +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ +({ \ + long __delta = (tv1) - (tv2); \ + if ( __delta > (bound)) { __delta = (bound); guard; } \ + __delta; \ +}) + + +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) +#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) +#define PSCHED_TADD(tv, delta) ((tv) += (delta)) +#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) +#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) +#define PSCHED_AUDIT_TDIFF(t) + +#endif + +struct tcf_police +{ + struct tcf_police *next; + int refcnt; + u32 index; + + int action; + u32 burst; + u32 mtu; + + u32 toks; + u32 ptoks; + psched_time_t t_c; + struct qdisc_rate_table *R_tab; + struct qdisc_rate_table *P_tab; +}; + +extern void tcf_police_destroy(struct tcf_police *p); +extern struct tcf_police * tcf_police_locate(struct rtattr *rta); +extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); +extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); + +extern __inline__ void tcf_police_release(struct tcf_police *p) +{ + if (p && --p->refcnt == 0) + tcf_police_destroy(p); +} extern struct Qdisc noop_qdisc; +extern struct Qdisc_ops noop_qdisc_ops; +extern struct Qdisc_ops pfifo_qdisc_ops; +extern struct Qdisc_ops bfifo_qdisc_ops; int register_qdisc(struct Qdisc_ops *qops); int unregister_qdisc(struct Qdisc_ops *qops); +struct Qdisc *qdisc_lookup(struct device *dev, u32 handle); +struct Qdisc *qdisc_lookup_class(struct device *dev, u32 handle); void dev_init_scheduler(struct device *dev); void dev_shutdown(struct device *dev); void dev_activate(struct device *dev); void dev_deactivate(struct device *dev); void qdisc_reset(struct Qdisc *qdisc); void qdisc_destroy(struct Qdisc *qdisc); +struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops); +struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc); +int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt); +void qdisc_kill_estimator(struct tc_stats *stats); +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); +void qdisc_put_rtab(struct qdisc_rate_table *tab); +int teql_init(void); +int tc_filter_init(void); int pktsched_init(void); void qdisc_run_queues(void); @@ -161,4 +350,10 @@ extern __inline__ void qdisc_wakeup(struct device *dev) } } +extern __inline__ unsigned psched_mtu(struct device *dev) +{ + unsigned mtu = dev->mtu; + return dev->hard_header ? mtu + dev->hard_header_len : mtu; +} + #endif diff --git a/include/net/rose.h b/include/net/rose.h index 8e86c1457..366168287 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -87,6 +87,7 @@ struct rose_neigh { unsigned int number; char restarted; char dce_mode; + char loopback; struct sk_buff_head queue; struct timer_list t0timer; struct timer_list ftimer; @@ -97,6 +98,7 @@ struct rose_node { rose_address address; unsigned short mask; unsigned char count; + char loopback; struct rose_neigh *neighbour[3]; }; @@ -179,11 +181,21 @@ extern void rose_transmit_diagnostic(struct rose_neigh *, unsigned char); extern void rose_transmit_clear_request(struct rose_neigh *, unsigned int, unsigned char, unsigned char); extern void rose_transmit_link(struct sk_buff *, struct rose_neigh *); +/* rose_loopback.c */ +extern void rose_loopback_init(void); +extern void rose_loopback_clear(void); +extern int rose_loopback_queue(struct sk_buff *, struct rose_neigh *); + /* rose_out.c */ extern void rose_kick(struct sock *); extern void rose_enquiry_response(struct sock *); /* rose_route.c */ +extern struct rose_neigh *rose_loopback_neigh; + +extern int rose_add_loopback_neigh(void); +extern int rose_add_loopback_node(rose_address *); +extern void rose_del_loopback_node(rose_address *); extern void rose_rt_device_down(struct device *); extern void rose_link_device_down(struct device *); extern struct device *rose_dev_first(void); diff --git a/include/net/snmp.h b/include/net/snmp.h index ee3bf0e20..eeeeb6aa2 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -124,5 +124,13 @@ struct udp_mib unsigned long UdpInErrors; unsigned long UdpOutDatagrams; }; + +struct linux_mib +{ + unsigned long SyncookiesSent; + unsigned long SyncookiesRecv; + unsigned long SyncookiesFailed; + unsigned long EmbryonicRsts; +}; #endif diff --git a/include/net/sock.h b/include/net/sock.h index f06f94ea9..29d7985be 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,8 +66,12 @@ #endif #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) +#if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE) +#include <net/spx.h> +#else #include <net/ipx.h> -#endif +#endif /* CONFIG_SPX */ +#endif /* CONFIG_IPX */ #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) #include <linux/atalk.h> @@ -83,12 +87,8 @@ #include <asm/atomic.h> -/* - * The AF_UNIX specific socket options - */ - -struct unix_opt -{ +/* The AF_UNIX specific socket options */ +struct unix_opt { int family; char * name; int locks; @@ -105,8 +105,7 @@ struct unix_opt #ifdef CONFIG_NETLINK struct netlink_callback; -struct netlink_opt -{ +struct netlink_opt { pid_t pid; unsigned groups; pid_t dst_pid; @@ -117,13 +116,9 @@ struct netlink_opt }; #endif -/* - * Once the IPX ncpd patches are in these are going into protinfo - */ - +/* Once the IPX ncpd patches are in these are going into protinfo. */ #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) -struct ipx_opt -{ +struct ipx_opt { ipx_address dest_addr; ipx_interface *intrfc; unsigned short port; @@ -141,8 +136,7 @@ struct ipx_opt #endif #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -struct ipv6_pinfo -{ +struct ipv6_pinfo { struct in6_addr saddr; struct in6_addr rcv_saddr; struct in6_addr daddr; @@ -191,8 +185,7 @@ struct tcp_sack_block { __u32 end_seq; }; -struct tcp_opt -{ +struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ /* @@ -214,7 +207,7 @@ struct tcp_opt __u32 lrcvtime; /* timestamp of last received data packet*/ __u32 srtt; /* smothed round trip time << 3 */ - __u32 ato; /* delayed ack timeout */ + __u32 ato; /* delayed ack timeout */ __u32 snd_wl1; /* Sequence for window update */ __u32 snd_wl2; /* Ack sequence for update */ @@ -229,13 +222,14 @@ struct tcp_opt __u32 snd_cwnd; /* Sending congestion window */ __u32 rto; /* retransmit timeout */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 high_seq; /* highest sequence number sent by onset of congestion */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 fackets_out; /* Non-retrans SACK'd packets */ + __u32 retrans_out; /* Fast-retransmitted packets out */ + __u32 high_seq; /* snd_nxt at onset of congestion */ /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ __u32 snd_ssthresh; /* Slow start size threshold */ - __u16 snd_cwnd_cnt; __u8 dup_acks; /* Consequetive duplicate acks seen from other end */ __u8 delayed_acks; @@ -275,7 +269,6 @@ struct tcp_opt struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ struct timer_list probe_timer; /* Probes */ - __u32 basertt; /* Vegas baseRTT */ __u32 window_clamp; /* XXX Document this... -DaveM */ __u32 probes_out; /* unanswered 0 window probes */ __u32 syn_seq; @@ -285,8 +278,8 @@ struct tcp_opt struct open_request *syn_wait_queue; struct open_request **syn_wait_last; - int syn_backlog; + int syn_backlog; /* Backlog of received SYNs */ }; @@ -330,17 +323,12 @@ struct tcp_opt /* Define this to get the sk->debug debugging facility. */ #define SOCK_DEBUGGING #ifdef SOCK_DEBUGGING -#define SOCK_DEBUG(sk, msg...) if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg) +#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg); } while (0) #else #define SOCK_DEBUG(sk, msg...) do { } while (0) #endif -/* - * TCP will start to use the new protinfo while *still using the old* fields - */ - -struct sock -{ +struct sock { /* This must be first. */ struct sock *sklist_next; struct sock *sklist_prev; @@ -349,28 +337,29 @@ struct sock struct sock *bind_next; struct sock **bind_pprev; - /* Main hash linkage for various protocol lookup tables. */ - struct sock *next; - struct sock **pprev; - /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ - int bound_dev_if; /* Bound device index if != 0 */ + __u16 dport; /* Destination port */ unsigned short num; /* Local port */ + int bound_dev_if; /* Bound device index if != 0 */ + + /* Main hash linkage for various protocol lookup tables. */ + struct sock *next; + struct sock **pprev; + volatile unsigned char state, /* Connection state */ zapped; /* In ax25 & ipx means not linked */ __u16 sport; /* Source port */ - __u16 dport; /* Destination port */ - unsigned short family; - unsigned char reuse, - nonagle; + unsigned short family; /* Address family */ + unsigned char reuse, /* SO_REUSEADDR setting */ + nonagle; /* Disable Nagle algorithm? */ - int sock_readers; /* user count */ - int rcvbuf; + int sock_readers; /* User count */ + int rcvbuf; /* Size of receive buffer in bytes */ - struct wait_queue **sleep; + struct wait_queue **sleep; /* Sock wait queue */ struct dst_entry *dst_cache; /* Destination cache */ atomic_t rmem_alloc; /* Receive queue bytes committed */ struct sk_buff_head receive_queue; /* Incoming packets */ @@ -379,13 +368,12 @@ struct sock atomic_t omem_alloc; /* "o" is "option" or "other" */ __u32 saddr; /* Sending source */ unsigned int allocation; /* Allocation mode */ - int sndbuf; + int sndbuf; /* Size of send buffer in bytes */ struct sock *prev; - /* - * Not all are volatile, but some are, so we - * might as well say they all are. - */ + /* Not all are volatile, but some are, so we might as well say they all are. + * XXX Make this a flag word -DaveM + */ volatile char dead, done, urginline, @@ -408,9 +396,9 @@ struct sock struct proto *prot; -/* - * mss is min(mtu, max_window) - */ + /* mss is min(mtu, max_window) + * XXX Fix this, mtu only used in one TCP place and that is it -DaveM + */ unsigned short mtu; /* mss negotiated in the syn's */ unsigned short mss; /* current eff. mss - can change */ unsigned short user_mss; /* mss requested by user in ioctl */ @@ -429,7 +417,11 @@ struct sock #endif #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct raw6_opt tp_raw; -#endif +#endif /* CONFIG_IPV6 */ +#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE) + struct spx_opt af_spx; +#endif /* CONFIG_SPX */ + } tp_pinfo; int err, err_soft; /* Soft holds errors that don't @@ -450,13 +442,10 @@ struct sock struct sock_filter *filter_data; #endif /* CONFIG_FILTER */ -/* - * This is where all the private (optional) areas that don't - * overlap will eventually live. - */ - - union - { + /* This is where all the private (optional) areas that don't + * overlap will eventually live. + */ + union { void *destruct_hook; struct unix_opt af_unix; #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) @@ -488,9 +477,7 @@ struct sock #endif } protinfo; -/* - * IP 'private area' or will be eventually - */ + /* IP 'private area' or will be eventually. */ int ip_ttl; /* TTL setting */ int ip_tos; /* TOS */ unsigned ip_cmsg_flags; @@ -504,31 +491,18 @@ struct sock __u32 ip_mc_addr; struct ip_mc_socklist *ip_mc_list; /* Group array */ -/* - * This part is used for the timeout functions (timer.c). - */ - + /* This part is used for the timeout functions (timer.c). */ int timeout; /* What are we waiting for? */ - struct timer_list timer; /* This is the TIME_WAIT/receive timer - * when we are doing IP - */ + struct timer_list timer; /* This is the sock cleanup timer. */ struct timeval stamp; - /* - * Identd - */ - + /* Identd */ struct socket *socket; - /* - * RPC layer private data - */ + /* RPC layer private data */ void *user_data; - /* - * Callbacks - */ - + /* Callbacks */ void (*state_change)(struct sock *sk); void (*data_ready)(struct sock *sk,int bytes); void (*write_space)(struct sock *sk); @@ -539,14 +513,11 @@ struct sock void (*destruct)(struct sock *sk); }; -/* - * IP protocol blocks we attach to sockets. - * socket layer -> transport layer interface - * transport -> network interface is defined by struct inet_proto +/* IP protocol blocks we attach to sockets. + * socket layer -> transport layer interface + * transport -> network interface is defined by struct inet_proto */ - -struct proto -{ +struct proto { /* These must be first. */ struct sock *sklist_next; struct sock *sklist_prev; @@ -608,16 +579,10 @@ struct proto #define TIME_DONE 7 /* Used to absorb those last few packets */ #define TIME_PROBE0 8 -/* - * About 10 seconds - */ - +/* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) -/* - * Sockets 0-1023 can't be bound to unless you are superuser - */ - +/* Sockets 0-1023 can't be bound to unless you are superuser */ #define PROT_SOCK 1024 #define SHUTDOWN_MASK 3 diff --git a/include/net/spx.h b/include/net/spx.h index 3e9b1d185..a449b891b 100644 --- a/include/net/spx.h +++ b/include/net/spx.h @@ -1,38 +1,93 @@ #ifndef __NET_SPX_H #define __NET_SPX_H -/* - * Internal definitions for the SPX protocol. - */ - -/* - * The SPX header following an IPX header. - */ - +#include <net/ipx.h> + struct spxhdr -{ - __u8 cctl; -#define CCTL_SPXII_XHD 0x01 /* SPX2 extended header */ -#define CCTL_SPX_UNKNOWN 0x02 /* Unknown (unused ??) */ -#define CCTL_SPXII_NEG 0x04 /* Negotiate size */ -#define CCTL_SPXII 0x08 /* Set for SPX2 */ -#define CCTL_EOM 0x10 /* End of message marker */ -#define CCTL_URG 0x20 /* Urgent marker in SPP (not used in SPX?) */ -#define CCTL_ACK 0x40 /* Send me an ACK */ -#define CCTL_CTL 0x80 /* Control message */ - __u8 dtype; +{ __u8 cctl; + __u8 dtype; #define SPX_DTYPE_ECONN 0xFE /* Finished */ #define SPX_DTYPE_ECACK 0xFF /* Ok */ - __u16 sconn; /* Connection ID */ - __u16 dconn; /* Connection ID */ - __u16 sequence; - __u16 ackseq; - __u16 allocseq; + __u16 sconn; /* Connection ID */ + __u16 dconn; /* Connection ID */ + __u16 sequence; + __u16 ackseq; + __u16 allocseq; }; -#define IPXTYPE_SPX 5 +struct ipxspxhdr +{ struct ipxhdr ipx; + struct spxhdr spx; +}; + +#define SPX_SYS_PKT_LEN (sizeof(struct ipxspxhdr)) + +#ifdef __KERNEL__ +struct spx_opt +{ int state; + int sndbuf; + int retries; /* Number of WD retries */ + int retransmits; /* Number of retransmits */ + int max_retries; + int wd_interval; + void *owner; + __u16 dest_connid; /* Net order */ + __u16 source_connid; /* Net order */ + __u16 sequence; /* Host order - our current pkt # */ + __u16 alloc; /* Host order - max seq we can rcv now */ + __u16 rmt_ack; /* Host order - last pkt ACKd by remote */ + __u16 rmt_seq; + __u16 acknowledge; + __u16 rmt_alloc; /* Host order - max seq remote can handle now */ + ipx_address dest_addr; + ipx_address source_addr; + struct timer_list watchdog; /* Idle watch */ + struct timer_list retransmit; /* Retransmit timer */ + struct sk_buff_head rcv_queue; + struct sk_buff_head transmit_queue; + struct sk_buff_head retransmit_queue; +}; + +/* Packet connectino control defines */ +#define CCTL_SPXII_XHD 0x01 /* SPX2 extended header */ +#define CCTL_SPX_UNKNOWN 0x02 /* Unknown (unused ??) */ +#define CCTL_SPXII_NEG 0x04 /* Negotiate size */ +#define CCTL_SPXII 0x08 /* Set for SPX2 */ +#define CCTL_EOM 0x10 /* End of message marker */ +#define CCTL_URG 0x20 /* Urgent marker in SPP (not used in SPX?) */ +#define CCTL_ACK 0x40 /* Send me an ACK */ +#define CCTL_CTL 0x80 /* Control message */ +#define CCTL_SYS CCTL_CTL /* Spec uses CCTL_SYS */ + +/* Connection state defines */ +#define SPX_CLOSED 7 +#define SPX_CONNECTING 8 +#define SPX_CONNECTED 9 + +/* Packet transmit types - Internal */ +#define DATA 0 /* Data */ +#define ACK 1 /* Data ACK */ +#define WDACK 2 /* WD ACK */ +#define CONACK 3 /* Connection Request ACK */ +#define CONREQ 4 /* Connection Request */ +#define WDREQ 5 /* WD Request */ +#define DISCON 6 /* Informed Disconnect */ +#define DISACK 7 /* Informed Disconnect ACK */ +#define RETRAN 8 /* Int. Retransmit of packet */ +#define TQUEUE 9 /* Int. Transmit of a queued packet */ + +/* + * These are good canidates for IOcontrol calls + */ + +/* Watchdog defines */ +#define VERIFY_TIMEOUT 3 * HZ +#define ABORT_TIMEOUT 30 * HZ + +/* Packet retransmit defines */ +#define RETRY_COUNT 10 +#define RETRY_TIME 1 * HZ +#define MAX_RETRY_DELAY 5 * HZ - - - -#endif +#endif /* __KERNEL__ */ +#endif /* def __NET_SPX_H */ diff --git a/include/net/spxcall.h b/include/net/spxcall.h new file mode 100644 index 000000000..0461fbbe1 --- /dev/null +++ b/include/net/spxcall.h @@ -0,0 +1,2 @@ +/* Separate to keep compilation of protocols.c simpler */ +extern void spx_proto_init(struct net_proto *pro); diff --git a/include/net/tcp.h b/include/net/tcp.h index 84bf7f55e..52853f44a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -154,16 +154,16 @@ struct tcp_tw_bucket { struct sock *sklist_prev; struct sock *bind_next; struct sock **bind_pprev; - struct sock *next; - struct sock **pprev; __u32 daddr; __u32 rcv_saddr; - int bound_dev_if; + __u16 dport; unsigned short num; + int bound_dev_if; + struct sock *next; + struct sock **pprev; unsigned char state, zapped; __u16 sport; - __u16 dport; unsigned short family; unsigned char reuse, nonagle; @@ -172,7 +172,8 @@ struct tcp_tw_bucket { __u32 rcv_nxt; struct tcp_func *af_specific; struct tcp_bind_bucket *tb; - struct timer_list timer; + struct tcp_tw_bucket *next_death; + int death_slot; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr v6_daddr; struct in6_addr v6_rcv_saddr; @@ -181,16 +182,42 @@ struct tcp_tw_bucket { extern kmem_cache_t *tcp_timewait_cachep; -/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6 - * because the v6 tcp code to intialize a connection needs to interoperate - * with the v4 code using the same variables. - * FIXME: It would be better to rewrite the connection code to be - * address family independent and just leave one copy in the ipv4 section. - * This would also clean up some code duplication. -- erics - */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; -extern int sysctl_tcp_sack; +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define TCP_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport)<<16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define TCP_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport)<<16) | (__u32)(__sport)) +#endif + +#if defined(__alpha__) || defined(__sparc_v9__) +#ifdef __BIG_ENDIAN +#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ + __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ + __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \ + ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) +#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->daddr == (__saddr)) && \ + ((__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ + +#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ + (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ + ((__sk)->family == AF_INET6) && \ + !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \ + !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \ + (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) /* These can have wildcards, don't try too hard. */ static __inline__ int tcp_lhashfn(unsigned short num) @@ -222,9 +249,11 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define MAX_RESET_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15) #define MAX_TCPHEADER_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + 20 + MAX_HEADER + 15) -#define MAX_WINDOW 32767 /* Never offer a window over 32767 without using - window scaling (not yet supported). Some poor - stacks do signed 16bit maths! */ +/* + * Never offer a window over 32767 without using window scaling. Some + * poor stacks do signed 16bit maths! + */ +#define MAX_WINDOW 32767 #define MIN_WINDOW 2048 #define MAX_ACK_BACKLOG 2 #define MAX_DELAY_ACK 2 @@ -266,16 +295,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define TCP_KEEPALIVE_TIME (180*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_PERIOD ((75*HZ)>>2) /* period of keepalive check */ -#define TCP_NO_CHECK 0 /* turn to one if you want the default - * to be no checksum */ -#define TCP_SYNACK_PERIOD (HZ/2) +#define TCP_SYNACK_PERIOD (HZ/2) /* How often to run the synack slow timer */ #define TCP_QUICK_TRIES 8 /* How often we try to retransmit, until - * we tell the LL layer that it is something + * we tell the link layer that it is something * wrong (e.g. that it can expire redirects) */ #define TCP_BUCKETGC_PERIOD (HZ) +/* TIME_WAIT reaping mechanism. */ +#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ +#define TCP_TWKILL_PERIOD ((HZ*60)/TCP_TWKILL_SLOTS) + /* * TCP option */ @@ -305,14 +336,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -/* - * TCP Vegas constants - */ - -#define TCP_VEGAS_ALPHA 2 /* v_cong_detect_top_nseg */ -#define TCP_VEGAS_BETA 4 /* v_cong_detect_bot_nseg */ -#define TCP_VEGAS_GAMMA 1 /* v_exp_inc_nseg */ - struct open_request; struct or_calltable { @@ -548,15 +571,16 @@ extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); +/* tcp_output.c */ + extern void tcp_read_wakeup(struct sock *); extern void tcp_write_xmit(struct sock *); extern void tcp_time_wait(struct sock *); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); +extern void tcp_fack_retransmit(struct sock *); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); -/* tcp_output.c */ - extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern void tcp_write_wakeup(struct sock *); @@ -598,11 +622,38 @@ struct tcp_sl_timer { #define TCP_SLT_SYNACK 0 #define TCP_SLT_KEEPALIVE 1 -#define TCP_SLT_BUCKETGC 2 -#define TCP_SLT_MAX 3 +#define TCP_SLT_TWKILL 2 +#define TCP_SLT_BUCKETGC 3 +#define TCP_SLT_MAX 4 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX]; +/* Compute the current effective MSS, taking SACKs and IP options, + * and even PMTU discovery events into account. + */ +static __inline__ unsigned int tcp_current_mss(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct dst_entry *dst = sk->dst_cache; + unsigned int mss_now = sk->mss; + + if(dst && (sk->mtu < dst->pmtu)) { + unsigned int mss_distance = (sk->mtu - sk->mss); + + /* PMTU discovery event has occurred. */ + sk->mtu = dst->pmtu; + sk->mss = sk->mtu - mss_distance; + } + + if(tp->sack_ok && tp->num_sacks) + mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + + (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); + if(sk->opt) + mss_now -= sk->opt->optlen; + + return mss_now; +} + /* Compute the actual receive window we are currently advertising. */ static __inline__ u32 tcp_receive_window(struct tcp_opt *tp) { @@ -651,10 +702,17 @@ extern __inline__ int tcp_raise_window(struct sock *sk) /* This is what the send packet queueing engine uses to pass * TCP per-packet control information to the transmission - * code. + * code. We also store the host-order sequence numbers in + * here too. This is 36 bytes on 32-bit architectures, + * 40 bytes on 64-bit machines, if this grows please adjust + * skbuff.h:skbuff->cb[xxx] size appropriately. */ struct tcp_skb_cb { - __u8 flags; /* TCP header flags. */ + struct inet_skb_parm header; /* For incoming frames */ + __u32 seq; /* Starting sequence number */ + __u32 end_seq; /* SEQ + FIN + SYN + datalen */ + unsigned long when; /* used to compute rtt's */ + __u8 flags; /* TCP header flags. */ /* NOTE: These must match up to the flags byte in a * real TCP header. @@ -666,15 +724,41 @@ struct tcp_skb_cb { #define TCPCB_FLAG_ACK 0x10 #define TCPCB_FLAG_URG 0x20 - __u8 sacked; /* State flags for SACK/FACK. */ + __u8 sacked; /* State flags for SACK/FACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ - __u16 urg_ptr; /* Valid w/URG flags is set. */ + __u16 urg_ptr; /* Valid w/URG flags is set. */ + __u32 ack_seq; /* Sequence number ACK'd */ }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) +/* We store the congestion window as a packet count, shifted by + * a factor so that implementing the 1/2 MSS ssthresh rules + * is easy. + */ +#define TCP_CWND_SHIFT 1 + +/* This determines how many packets are "in the network" to the best + * of our knowledge. In many cases it is conservative, but where + * detailed information is available from the receiver (via SACK + * blocks etc.) we can make more aggressive calculations. + * + * Use this for decisions involving congestion control, use just + * tp->packets_out to determine if the send queue is empty or not. + * + * Read this equation as: + * + * "Packets sent once on transmission queue" MINUS + * "Packets acknowledged by FACK information" PLUS + * "Packets fast retransmitted" + */ +static __inline__ int tcp_packets_in_flight(struct tcp_opt *tp) +{ + return tp->packets_out - tp->fackets_out + tp->retrans_out; +} + /* This checks if the data bearing packet SKB (usually tp->send_head) * should be put on the wire right now. */ @@ -682,7 +766,6 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int nagle_check = 1; - int len; /* RFC 1122 - section 4.2.3.4 * @@ -697,13 +780,13 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb) * * Don't use the nagle rule for urgent data. */ - len = skb->end_seq - skb->seq; - if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out && + if (!sk->nonagle && skb->len < (sk->mss >> 1) && tp->packets_out && !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG)) nagle_check = 0; - return (nagle_check && tp->packets_out < tp->snd_cwnd && - !after(skb->end_seq, tp->snd_una + tp->snd_wnd) && + return (nagle_check && + (tcp_packets_in_flight(tp) < (tp->snd_cwnd>>TCP_CWND_SHIFT)) && + !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) && tp->retransmits == 0); } @@ -749,7 +832,6 @@ static char *statename[]={ static __inline__ void tcp_set_state(struct sock *sk, int state) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; int oldstate = sk->state; sk->state = state; @@ -765,10 +847,13 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) break; case TCP_CLOSE: + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* Should be about 2 rtt's */ net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME)); sk->prot->unhash(sk); /* fall through */ + } default: if (oldstate==TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab--; @@ -868,7 +953,7 @@ extern __inline__ void tcp_select_initial_window(__u32 space, __u16 mss, * our initial window offering to 32k. There should also * be a sysctl option to stop being nice. */ - (*rcv_wnd) = min(space,32767); + (*rcv_wnd) = min(space, MAX_WINDOW); (*rcv_wscale) = 0; if (wscale_ok) { /* See RFC1323 for an explanation of the limit to 14 */ @@ -901,20 +986,6 @@ extern __inline__ void tcp_synq_init(struct tcp_opt *tp) tp->syn_wait_last = &tp->syn_wait_queue; } -extern __inline__ struct open_request *tcp_synq_unlink_tail(struct tcp_opt *tp) -{ - struct open_request *head = tp->syn_wait_queue; -#if 0 - /* Should be a net-ratelimit'd thing, not all the time. */ - printk(KERN_DEBUG "synq tail drop with expire=%ld\n", - head->expires-jiffies); -#endif - if (head->dl_next == NULL) - tp->syn_wait_last = &tp->syn_wait_queue; - tp->syn_wait_queue = head->dl_next; - return head; -} - extern void __tcp_inc_slow_timer(struct tcp_sl_timer *slt); extern __inline__ void tcp_inc_slow_timer(int timer) { |