summaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/br.h95
-rw-r--r--include/net/dst.h5
-rw-r--r--include/net/ip.h22
-rw-r--r--include/net/ip_fib.h7
-rw-r--r--include/net/ip_masq.h5
-rw-r--r--include/net/ipx.h5
-rw-r--r--include/net/netrom.h7
-rw-r--r--include/net/pkt_cls.h83
-rw-r--r--include/net/pkt_sched.h237
-rw-r--r--include/net/rose.h12
-rw-r--r--include/net/snmp.h8
-rw-r--r--include/net/sock.h157
-rw-r--r--include/net/spx.h113
-rw-r--r--include/net/spxcall.h2
-rw-r--r--include/net/tcp.h189
15 files changed, 715 insertions, 232 deletions
diff --git a/include/net/br.h b/include/net/br.h
index 3b4ccf094..8f47d3c12 100644
--- a/include/net/br.h
+++ b/include/net/br.h
@@ -27,7 +27,7 @@
/*
* We time out our entries in the FDB after this many seconds.
*/
-#define FDB_TIMEOUT 300
+#define FDB_TIMEOUT 20 /* JRP: 20s as NSC bridge code, was 300 for Linux */
/*
* the following defines are the initial values used when the
@@ -40,6 +40,10 @@
#define BRIDGE_FORWARD_DELAY 15
#define HOLD_TIME 1
+/* broacast/multicast storm limitation. This per source. */
+#define MAX_MCAST_PER_PERIOD 4
+#define MCAST_HOLD_TIME 10 /* in jiffies unit (10ms increment) */
+
#define Default_path_cost 10
/*
@@ -71,13 +75,25 @@ typedef struct {
#define BRIDGE_ID_ULA bi.p_u.ula
#define BRIDGE_ID bi.id
+/* JRP: on the network the flags field is between "type" and "root_id"
+ * this is unfortunated! To make the code portable to a RISC machine
+ * the pdus are now massaged a little bit for processing
+ */
+#define TOPOLOGY_CHANGE 0x01
+#define TOPOLOGY_CHANGE_ACK 0x80
+#define BRIDGE_BPDU_8021_CONFIG_SIZE 35 /* real size */
+#define BRIDGE_BPDU_8021_CONFIG_FLAG_OFFSET 4
+#define BRIDGE_BPDU_8021_PROTOCOL_ID 0
+#define BRIDGE_BPDU_8021_PROTOCOL_VERSION_ID 0
+#define BRIDGE_LLC1_HS 3
+#define BRIDGE_LLC1_DSAP 0x42
+#define BRIDGE_LLC1_SSAP 0x42
+#define BRIDGE_LLC1_CTRL 0x03
+
typedef struct {
unsigned short protocol_id;
unsigned char protocol_version_id;
unsigned char type;
- unsigned char flags;
-#define TOPOLOGY_CHANGE 0x01
-#define TOPOLOGY_CHANGE_ACK 0x80
bridge_id_t root_id; /* (4.5.1.1) */
unsigned int root_path_cost; /* (4.5.1.2) */
bridge_id_t bridge_id; /* (4.5.1.3) */
@@ -86,8 +102,23 @@ typedef struct {
unsigned short max_age; /* (4.5.1.6) */
unsigned short hello_time; /* (4.5.1.7) */
unsigned short forward_delay; /* (4.5.1.8) */
+ unsigned char top_change_ack;
+ unsigned char top_change;
} Config_bpdu;
+#ifdef __LITTLE_ENDIAN
+#define config_bpdu_hton(config_bpdu) \
+ (config_bpdu)->root_path_cost = htonl((config_bpdu)->root_path_cost); \
+ (config_bpdu)->port_id = htons((config_bpdu)->port_id); \
+ (config_bpdu)->message_age = htons((config_bpdu)->message_age); \
+ (config_bpdu)->max_age = htons((config_bpdu)->max_age); \
+ (config_bpdu)->hello_time = htons((config_bpdu)->hello_time); \
+ (config_bpdu)->forward_delay = htons((config_bpdu)->forward_delay);
+#else
+#define config_bpdu_hton(config_bpdu)
+#endif
+#define config_bpdu_ntoh config_bpdu_hton
+
/** Topology Change Notification BPDU Parameters (4.5.2) **/
@@ -112,12 +143,10 @@ typedef struct {
unsigned short bridge_max_age; /* (4.5.3.8) */
unsigned short bridge_hello_time; /* (4.5.3.9) */
unsigned short bridge_forward_delay; /* (4.5.3.10) */
- unsigned int topology_change_detected; /* (4.5.3.11) */
- unsigned int topology_change; /* (4.5.3.12) */
+ unsigned int top_change_detected; /* (4.5.3.11) */
+ unsigned int top_change; /* (4.5.3.12) */
unsigned short topology_change_time; /* (4.5.3.13) */
unsigned short hold_time; /* (4.5.3.14) */
- unsigned int top_change;
- unsigned int top_change_detected;
} Bridge_data;
/** Port Parameters (4.5.5) **/
@@ -149,8 +178,11 @@ struct fdb {
unsigned char pad[2];
unsigned short port;
unsigned int timer;
- unsigned int flags;
+ unsigned short flags;
#define FDB_ENT_VALID 0x01
+ unsigned short mcast_count;
+ unsigned int mcast_timer; /* oldest xxxxxcast */
+
/* AVL tree of all addresses, sorted by address */
short fdb_avl_height;
struct fdb *fdb_avl_left;
@@ -159,6 +191,19 @@ struct fdb {
struct fdb *fdb_next;
};
+/* data returned on BRCMD_DISPLAY_FDB */
+struct fdb_info {
+ unsigned char ula[6];
+ unsigned char port;
+ unsigned char flags;
+ unsigned int timer;
+};
+struct fdb_info_hdr {
+ int copied; /* nb of entries copied to user */
+ int not_copied; /* when user buffer is too small */
+ int cmd_time;
+};
+
#define IS_BRIDGED 0x2e
@@ -169,6 +214,34 @@ struct fdb {
#define BR_ACCEPT 1
#define BR_REJECT 0
+/* JRP: extra statistics for debug */
+typedef struct {
+ /* br_receive_frame counters */
+ int port_disable_up_stack;
+ int rcv_bpdu;
+ int notForwarding;
+ int forwarding_up_stack;
+ int unknown_state;
+
+ /* br_tx_frame counters */
+ int port_disable;
+ int port_not_disable;
+
+ /* br_forward counters */
+ int local_multicast;
+ int forwarded_multicast; /* up stack as well */
+ int flood_unicast;
+ int aged_flood_unicast;
+ int forwarded_unicast;
+ int forwarded_unicast_up_stack;
+ int forwarded_ip_up_stack;
+ int forwarded_ip_up_stack_lie; /* received on alternate device */
+ int arp_for_local_mac;
+ int drop_same_port;
+ int drop_same_port_aged;
+ int drop_multicast;
+} br_stats_counter;
+
struct br_stat {
unsigned int flags;
Bridge_data bridge_data;
@@ -178,6 +251,7 @@ struct br_stat {
unsigned short protocols[BR_MAX_PROTOCOLS];
unsigned short prot_id[BR_MAX_PROT_STATS]; /* Protocol encountered */
unsigned int prot_counter[BR_MAX_PROT_STATS]; /* How many packets ? */
+ br_stats_counter packet_cnts;
};
/* defined flags for br_stat.flags */
@@ -215,9 +289,10 @@ int br_receive_frame(struct sk_buff *skb); /* 3.5 */
int br_tx_frame(struct sk_buff *skb);
int br_ioctl(unsigned int cmd, void *arg);
int br_protocol_ok(unsigned short protocol);
+void requeue_fdb(struct fdb *node, int new_port);
struct fdb *br_avl_find_addr(unsigned char addr[6]);
-int br_avl_insert (struct fdb * new_node);
+struct fdb *br_avl_insert (struct fdb * new_node);
/* externs */
diff --git a/include/net/dst.h b/include/net/dst.h
index 0d18f60d2..a73a2d045 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -8,6 +8,7 @@
#ifndef _NET_DST_H
#define _NET_DST_H
+#include <linux/config.h>
#include <net/neighbour.h>
/*
@@ -51,6 +52,10 @@ struct dst_entry
int (*input)(struct sk_buff*);
int (*output)(struct sk_buff*);
+#ifdef CONFIG_NET_CLS_ROUTE
+ __u32 tclassid;
+#endif
+
struct dst_ops *ops;
char info[0];
diff --git a/include/net/ip.h b/include/net/ip.h
index 9b536ddf7..add85700b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -100,7 +100,6 @@ extern int ip_acct_output(struct sk_buff *skb);
#define ip_acct_output dev_queue_xmit
#endif
extern void ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*));
-extern struct sk_buff * ip_reply(struct sk_buff *skb, int payload);
extern int ip_do_nat(struct sk_buff *skb);
extern void ip_send_check(struct iphdr *ip);
extern int ip_id_count;
@@ -117,6 +116,18 @@ extern int ip_build_xmit(struct sock *sk,
struct rtable *rt,
int flags);
+
+struct ip_reply_arg {
+ struct iovec iov[2];
+ int n_iov; /* redundant */
+ u32 csum;
+ int csumoffset; /* u16 offset of csum in iov[0].iov_base */
+ /* -1 if not needed */
+};
+
+void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ unsigned int len);
+
extern int __ip_finish_output(struct sk_buff *skb);
struct ipv4_config
@@ -128,6 +139,7 @@ struct ipv4_config
extern struct ipv4_config ipv4_config;
extern struct ip_mib ip_statistics;
+extern struct linux_mib net_statistics;
extern int sysctl_local_port_range[2];
@@ -141,15 +153,7 @@ extern __inline__ int ip_finish_output(struct sk_buff *skb)
skb->protocol = __constant_htons(ETH_P_IP);
if (hh) {
-#ifdef __alpha__
- /* Alpha has disguisting memcpy. Help it. */
- u64 *aligned_hdr = (u64*)(skb->data - 16);
- u64 *aligned_hdr0 = hh->hh_data;
- aligned_hdr[0] = aligned_hdr0[0];
- aligned_hdr[1] = aligned_hdr0[1];
-#else
memcpy(skb->data - 16, hh->hh_data, 16);
-#endif
skb_push(skb, dev->hard_header_len);
return hh->hh_output(skb);
} else if (dst->neighbour)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d725e78d4..f96fa618c 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -50,6 +50,9 @@ struct fib_nh
int nh_weight;
int nh_power;
#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+ __u32 nh_tclassid;
+#endif
int nh_oif;
u32 nh_gw;
};
@@ -229,9 +232,11 @@ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg
extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res);
+#ifdef CONFIG_NET_CLS_ROUTE
+extern u32 fib_rules_tclass(struct fib_result *res);
+#endif
extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags);
extern void fib_rules_init(void);
#endif
-
#endif _NET_FIB_H
diff --git a/include/net/ip_masq.h b/include/net/ip_masq.h
index 0faa88336..65282bfcb 100644
--- a/include/net/ip_masq.h
+++ b/include/net/ip_masq.h
@@ -24,11 +24,6 @@
* I used an extra 4K port-space
*/
-/*
- * Linux ports don't normally get allocated above 32K.
- * I used an extra 4K port-space
- */
-
#define PORT_MASQ_BEGIN 61000
#define PORT_MASQ_END (PORT_MASQ_BEGIN+4096)
diff --git a/include/net/ipx.h b/include/net/ipx.h
index 52f09384e..2152e388b 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -34,7 +34,7 @@ struct ipxhdr
#define IPX_TYPE_UNKNOWN 0x00
#define IPX_TYPE_RIP 0x01 /* may also be 0 */
#define IPX_TYPE_SAP 0x04 /* may also be 0 */
-#define IPX_TYPE_SPX 0x05 /* Not yet implemented */
+#define IPX_TYPE_SPX 0x05 /* SPX protocol */
#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */
#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast [Not supported] */
ipx_address ipx_dest __attribute__ ((packed));
@@ -76,4 +76,7 @@ typedef struct ipx_route {
#define IPX_MIN_EPHEMERAL_SOCKET 0x4000
#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff
+extern int ipx_register_spx(struct proto_ops **, struct net_proto_family *);
+extern int ipx_unregister_spx(void);
+
#endif /* def _NET_INET_IPX_H_ */
diff --git a/include/net/netrom.h b/include/net/netrom.h
index cc9fc842c..356a7d270 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -126,6 +126,11 @@ extern int nr_init(struct device *);
/* nr_in.c */
extern int nr_process_rx_frame(struct sock *, struct sk_buff *);
+/* nr_loopback.c */
+extern void nr_loopback_init(void);
+extern void nr_loopback_clear(void);
+extern int nr_loopback_queue(struct sk_buff *);
+
/* nr_out.c */
extern void nr_output(struct sock *, struct sk_buff *);
extern void nr_send_nak_frame(struct sock *);
@@ -153,7 +158,7 @@ extern void nr_requeue_frames(struct sock *);
extern int nr_validate_nr(struct sock *, unsigned short);
extern int nr_in_rx_window(struct sock *, unsigned short);
extern void nr_write_internal(struct sock *, int);
-extern void nr_transmit_dm(struct sk_buff *);
+extern void nr_transmit_refusal(struct sk_buff *, int);
extern void nr_disconnect(struct sock *, int);
/* nr_timer.c */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
new file mode 100644
index 000000000..0d3c25e25
--- /dev/null
+++ b/include/net/pkt_cls.h
@@ -0,0 +1,83 @@
+#ifndef __NET_PKT_CLS_H
+#define __NET_PKT_CLS_H
+
+
+#include <linux/pkt_cls.h>
+
+struct rtattr;
+struct tcmsg;
+
+/* Basic packet classifier frontend definitions. */
+
+struct tcf_result
+{
+ unsigned long class;
+ u32 classid;
+};
+
+struct tcf_proto
+{
+ /* Fast access part */
+ struct tcf_proto *next;
+ void *root;
+ int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *);
+ u32 protocol;
+
+ /* All the rest */
+ u32 prio;
+ u32 classid;
+ struct Qdisc *q;
+ void *data;
+ struct tcf_proto_ops *ops;
+};
+
+struct tcf_walker
+{
+ int stop;
+ int skip;
+ int count;
+ int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *);
+};
+
+struct tcf_proto_ops
+{
+ struct tcf_proto_ops *next;
+ char kind[IFNAMSIZ];
+
+ int (*classify)(struct sk_buff*, struct tcf_proto*, struct tcf_result *);
+ int (*init)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto*);
+
+ unsigned long (*get)(struct tcf_proto*, u32 handle);
+ void (*put)(struct tcf_proto*, unsigned long);
+ int (*change)(struct tcf_proto*, u32 handle, struct rtattr **, unsigned long *);
+ int (*delete)(struct tcf_proto*, unsigned long);
+ void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
+
+ /* rtnetlink specific */
+ int (*dump)(struct tcf_proto*, unsigned long, struct sk_buff *skb, struct tcmsg*);
+};
+
+/* Main classifier routine: scans classifier chain attached
+ to this qdisc, (optionally) tests for protocol and asks
+ specific classifiers.
+ */
+
+extern __inline__ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
+{
+ int err = 0;
+ u32 protocol = skb->protocol;
+
+ for ( ; tp; tp = tp->next) {
+ if ((tp->protocol == protocol ||
+ tp->protocol == __constant_htons(ETH_P_ALL)) &&
+ (err = tp->classify(skb, tp, res)) >= 0)
+ return err;
+ }
+ return -1;
+}
+
+extern int register_tcf_proto_ops(struct tcf_proto_ops *ops);
+extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+
+#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 5faad9ad4..de7c7691a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -1,21 +1,64 @@
#ifndef __NET_PKT_SCHED_H
#define __NET_PKT_SCHED_H
+#define PSCHED_GETTIMEOFDAY 1
+#define PSCHED_JIFFIES 2
+#define PSCHED_CPU 3
+
+#define PSCHED_CLOCK_SOURCE PSCHED_GETTIMEOFDAY
+
#include <linux/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+struct rtattr;
+struct Qdisc;
+
+struct qdisc_walker
+{
+ int stop;
+ int skip;
+ int count;
+ int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
+};
+
+struct Qdisc_class_ops
+{
+ /* Child qdisc manipulation */
+ int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **);
+
+ /* Class manipulation routines */
+ unsigned long (*get)(struct Qdisc *, u32 classid);
+ void (*put)(struct Qdisc *, unsigned long);
+ int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *);
+ int (*delete)(struct Qdisc *, unsigned long);
+ void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
+
+ /* Filter manipulation */
+ struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long);
+ unsigned long (*bind_tcf)(struct Qdisc *, u32 classid);
+ void (*unbind_tcf)(struct Qdisc *, unsigned long);
+
+ /* rtnetlink specific */
+ int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*);
+};
struct Qdisc_ops
{
struct Qdisc_ops *next;
+ struct Qdisc_class_ops *cl_ops;
char id[IFNAMSIZ];
- int refcnt;
int priv_size;
- int (*enqueue)(struct sk_buff *skb, struct Qdisc *);
+
+ int (*enqueue)(struct sk_buff *, struct Qdisc *);
struct sk_buff * (*dequeue)(struct Qdisc *);
+ int (*requeue)(struct sk_buff *, struct Qdisc *);
+ int (*drop)(struct Qdisc *);
+
+ int (*init)(struct Qdisc *, struct rtattr *arg);
void (*reset)(struct Qdisc *);
void (*destroy)(struct Qdisc *);
- int (*init)(struct Qdisc *, void *arg);
- int (*control)(struct Qdisc *, void *);
- int (*requeue)(struct sk_buff *skb, struct Qdisc *);
+
+ int (*dump)(struct Qdisc *, struct sk_buff *);
};
struct Qdisc_head
@@ -30,23 +73,35 @@ struct Qdisc
struct Qdisc_head h;
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
struct sk_buff * (*dequeue)(struct Qdisc *dev);
+ unsigned flags;
+#define TCQ_F_DEFAULT 1
+#define TCQ_F_BUILTIN 2
struct Qdisc_ops *ops;
- int handle;
+ struct Qdisc *next;
+ u32 handle;
+ u32 classid;
struct Qdisc *parent;
struct sk_buff_head q;
struct device *dev;
- unsigned long dropped;
- unsigned long tx_last;
+
+ struct tc_stats stats;
unsigned long tx_timeo;
+ unsigned long tx_last;
+ int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
char data[0];
};
+struct qdisc_rate_table
+{
+ struct tc_ratespec rate;
+ u32 data[256];
+ struct qdisc_rate_table *next;
+ int refcnt;
+};
-/* Yes, it is slow for [34]86, but we have no choice.
- 10 msec resolution is appropriate only for bandwidth < 32Kbit/sec.
- RULE:
+/*
Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
Normal IP packet size ~ 512byte, hence:
@@ -57,22 +112,96 @@ struct Qdisc
10msec resolution -> <50Kbit/sec.
The result: [34]86 is not good choice for QoS router :-(
- */
+ The things are not so bad, because we may use artifical
+ clock evaluated by integration of network data flow
+ in the most critical places.
-typedef struct timeval psched_time_t;
+ Note: we do not use fastgettimeofday.
+ The reason is that, when it is not the same thing as
+ gettimeofday, it returns invalid timestamp, which is
+ not updated, when net_bh is active.
-/* On 64bit architecures it would be clever to define:
-typedef u64 psched_time_t;
- and make all this boring arithmetics directly
+ So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums
+ with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86
+ and pentiums without rtdsc.
+ You can use PSCHED_GETTIMEOFDAY on another architectures,
+ which have fast and precise clock source, but it is too expensive.
*/
-#ifndef SCHEDULE_ONLY_LOW_BANDWIDTH
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+
+typedef struct timeval psched_time_t;
+typedef long psched_tdiff_t;
+
#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
+#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
+
+#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */
+
+typedef u64 psched_time_t;
+typedef long psched_tdiff_t;
+
+extern psched_time_t psched_time_base;
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
+
+#define PSCHED_WATCHER
+
+extern unsigned long psched_time_mark;
+
+#if HZ == 100
+#define PSCHED_JSCALE 7
+#elif HZ == 1024
+#define PSCHED_JSCALE 10
#else
-#define PSCHED_GET_TIME(stamp) ((stamp) = xtime)
+#define PSCHED_JSCALE 0
#endif
+#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE))
+#define PSCHED_US2JIFFIE(delay) ((delay)>>PSCHED_JSCALE)
+
+#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU
+
+extern psched_tdiff_t psched_clock_per_hz;
+extern int psched_clock_scale;
+
+#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
+
+#if CPU == 586 || CPU == 686
+
+#define PSCHED_GET_TIME(stamp) \
+({ u32 hi, lo; \
+ __asm__ __volatile__ (".byte 0x0f,0x31" :"=a" (lo), "=d" (hi)); \
+ (stamp) = ((((u64)hi)<<32) + lo)>>psched_clock_scale; \
+})
+
+#elif defined (__alpha__)
+
+#define PSCHED_WATCHER
+
+extern u32 psched_time_mark;
+
+#define PSCHED_GET_TIME(stamp) \
+({ u32 __res; \
+ __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \
+ if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \
+ psched_time_mark = __res; \
+ (stamp) = (psched_time_base + __res)>>psched_clock_scale; \
+})
+
+#else
+
+#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch.
+
+#endif /* ARCH */
+
+#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */
+
+#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
#define PSCHED_TDIFF(tv1, tv2) \
({ \
int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
@@ -106,8 +235,6 @@ typedef u64 psched_time_t;
__delta; \
})
-#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
-
#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
(tv1).tv_sec <= (tv2).tv_sec) || \
(tv1).tv_sec < (tv2).tv_sec)
@@ -127,24 +254,86 @@ typedef u64 psched_time_t;
(tv).tv_usec -= 1000000; } \
})
-/* Set/check that undertime is in the "past perfect";
+/* Set/check that time is in the "past perfect";
it depends on concrete representation of system time
*/
#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
+#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
+
+#else
+
+#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
+#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
+({ \
+ long __delta = (tv1) - (tv2); \
+ if ( __delta > (bound)) { __delta = (bound); guard; } \
+ __delta; \
+})
+
+
+#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
+#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
+#define PSCHED_TADD(tv, delta) ((tv) += (delta))
+#define PSCHED_SET_PASTPERFECT(t) ((t) = 0)
+#define PSCHED_IS_PASTPERFECT(t) ((t) == 0)
+#define PSCHED_AUDIT_TDIFF(t)
+
+#endif
+
+struct tcf_police
+{
+ struct tcf_police *next;
+ int refcnt;
+ u32 index;
+
+ int action;
+ u32 burst;
+ u32 mtu;
+
+ u32 toks;
+ u32 ptoks;
+ psched_time_t t_c;
+ struct qdisc_rate_table *R_tab;
+ struct qdisc_rate_table *P_tab;
+};
+
+extern void tcf_police_destroy(struct tcf_police *p);
+extern struct tcf_police * tcf_police_locate(struct rtattr *rta);
+extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
+extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
+
+extern __inline__ void tcf_police_release(struct tcf_police *p)
+{
+ if (p && --p->refcnt == 0)
+ tcf_police_destroy(p);
+}
extern struct Qdisc noop_qdisc;
+extern struct Qdisc_ops noop_qdisc_ops;
+extern struct Qdisc_ops pfifo_qdisc_ops;
+extern struct Qdisc_ops bfifo_qdisc_ops;
int register_qdisc(struct Qdisc_ops *qops);
int unregister_qdisc(struct Qdisc_ops *qops);
+struct Qdisc *qdisc_lookup(struct device *dev, u32 handle);
+struct Qdisc *qdisc_lookup_class(struct device *dev, u32 handle);
void dev_init_scheduler(struct device *dev);
void dev_shutdown(struct device *dev);
void dev_activate(struct device *dev);
void dev_deactivate(struct device *dev);
void qdisc_reset(struct Qdisc *qdisc);
void qdisc_destroy(struct Qdisc *qdisc);
+struct Qdisc * qdisc_create_dflt(struct device *dev, struct Qdisc_ops *ops);
+struct Qdisc * dev_set_scheduler(struct device *dev, struct Qdisc *qdisc);
+int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt);
+void qdisc_kill_estimator(struct tc_stats *stats);
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab);
+void qdisc_put_rtab(struct qdisc_rate_table *tab);
+int teql_init(void);
+int tc_filter_init(void);
int pktsched_init(void);
void qdisc_run_queues(void);
@@ -161,4 +350,10 @@ extern __inline__ void qdisc_wakeup(struct device *dev)
}
}
+extern __inline__ unsigned psched_mtu(struct device *dev)
+{
+ unsigned mtu = dev->mtu;
+ return dev->hard_header ? mtu + dev->hard_header_len : mtu;
+}
+
#endif
diff --git a/include/net/rose.h b/include/net/rose.h
index 8e86c1457..366168287 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -87,6 +87,7 @@ struct rose_neigh {
unsigned int number;
char restarted;
char dce_mode;
+ char loopback;
struct sk_buff_head queue;
struct timer_list t0timer;
struct timer_list ftimer;
@@ -97,6 +98,7 @@ struct rose_node {
rose_address address;
unsigned short mask;
unsigned char count;
+ char loopback;
struct rose_neigh *neighbour[3];
};
@@ -179,11 +181,21 @@ extern void rose_transmit_diagnostic(struct rose_neigh *, unsigned char);
extern void rose_transmit_clear_request(struct rose_neigh *, unsigned int, unsigned char, unsigned char);
extern void rose_transmit_link(struct sk_buff *, struct rose_neigh *);
+/* rose_loopback.c */
+extern void rose_loopback_init(void);
+extern void rose_loopback_clear(void);
+extern int rose_loopback_queue(struct sk_buff *, struct rose_neigh *);
+
/* rose_out.c */
extern void rose_kick(struct sock *);
extern void rose_enquiry_response(struct sock *);
/* rose_route.c */
+extern struct rose_neigh *rose_loopback_neigh;
+
+extern int rose_add_loopback_neigh(void);
+extern int rose_add_loopback_node(rose_address *);
+extern void rose_del_loopback_node(rose_address *);
extern void rose_rt_device_down(struct device *);
extern void rose_link_device_down(struct device *);
extern struct device *rose_dev_first(void);
diff --git a/include/net/snmp.h b/include/net/snmp.h
index ee3bf0e20..eeeeb6aa2 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -124,5 +124,13 @@ struct udp_mib
unsigned long UdpInErrors;
unsigned long UdpOutDatagrams;
};
+
+struct linux_mib
+{
+ unsigned long SyncookiesSent;
+ unsigned long SyncookiesRecv;
+ unsigned long SyncookiesFailed;
+ unsigned long EmbryonicRsts;
+};
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index f06f94ea9..29d7985be 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -66,8 +66,12 @@
#endif
#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
+#if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
+#include <net/spx.h>
+#else
#include <net/ipx.h>
-#endif
+#endif /* CONFIG_SPX */
+#endif /* CONFIG_IPX */
#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
#include <linux/atalk.h>
@@ -83,12 +87,8 @@
#include <asm/atomic.h>
-/*
- * The AF_UNIX specific socket options
- */
-
-struct unix_opt
-{
+/* The AF_UNIX specific socket options */
+struct unix_opt {
int family;
char * name;
int locks;
@@ -105,8 +105,7 @@ struct unix_opt
#ifdef CONFIG_NETLINK
struct netlink_callback;
-struct netlink_opt
-{
+struct netlink_opt {
pid_t pid;
unsigned groups;
pid_t dst_pid;
@@ -117,13 +116,9 @@ struct netlink_opt
};
#endif
-/*
- * Once the IPX ncpd patches are in these are going into protinfo
- */
-
+/* Once the IPX ncpd patches are in these are going into protinfo. */
#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
-struct ipx_opt
-{
+struct ipx_opt {
ipx_address dest_addr;
ipx_interface *intrfc;
unsigned short port;
@@ -141,8 +136,7 @@ struct ipx_opt
#endif
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-struct ipv6_pinfo
-{
+struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_addr rcv_saddr;
struct in6_addr daddr;
@@ -191,8 +185,7 @@ struct tcp_sack_block {
__u32 end_seq;
};
-struct tcp_opt
-{
+struct tcp_opt {
int tcp_header_len; /* Bytes of tcp header to send */
/*
@@ -214,7 +207,7 @@ struct tcp_opt
__u32 lrcvtime; /* timestamp of last received data packet*/
__u32 srtt; /* smothed round trip time << 3 */
- __u32 ato; /* delayed ack timeout */
+ __u32 ato; /* delayed ack timeout */
__u32 snd_wl1; /* Sequence for window update */
__u32 snd_wl2; /* Ack sequence for update */
@@ -229,13 +222,14 @@ struct tcp_opt
__u32 snd_cwnd; /* Sending congestion window */
__u32 rto; /* retransmit timeout */
- __u32 packets_out; /* Packets which are "in flight" */
- __u32 high_seq; /* highest sequence number sent by onset of congestion */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 fackets_out; /* Non-retrans SACK'd packets */
+ __u32 retrans_out; /* Fast-retransmitted packets out */
+ __u32 high_seq; /* snd_nxt at onset of congestion */
/*
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
*/
__u32 snd_ssthresh; /* Slow start size threshold */
- __u16 snd_cwnd_cnt;
__u8 dup_acks; /* Consequetive duplicate acks seen from other end */
__u8 delayed_acks;
@@ -275,7 +269,6 @@ struct tcp_opt
struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
struct timer_list probe_timer; /* Probes */
- __u32 basertt; /* Vegas baseRTT */
__u32 window_clamp; /* XXX Document this... -DaveM */
__u32 probes_out; /* unanswered 0 window probes */
__u32 syn_seq;
@@ -285,8 +278,8 @@ struct tcp_opt
struct open_request *syn_wait_queue;
struct open_request **syn_wait_last;
- int syn_backlog;
+ int syn_backlog; /* Backlog of received SYNs */
};
@@ -330,17 +323,12 @@ struct tcp_opt
/* Define this to get the sk->debug debugging facility. */
#define SOCK_DEBUGGING
#ifdef SOCK_DEBUGGING
-#define SOCK_DEBUG(sk, msg...) if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg)
+#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg); } while (0)
#else
#define SOCK_DEBUG(sk, msg...) do { } while (0)
#endif
-/*
- * TCP will start to use the new protinfo while *still using the old* fields
- */
-
-struct sock
-{
+struct sock {
/* This must be first. */
struct sock *sklist_next;
struct sock *sklist_prev;
@@ -349,28 +337,29 @@ struct sock
struct sock *bind_next;
struct sock **bind_pprev;
- /* Main hash linkage for various protocol lookup tables. */
- struct sock *next;
- struct sock **pprev;
-
/* Socket demultiplex comparisons on incoming packets. */
__u32 daddr; /* Foreign IPv4 addr */
__u32 rcv_saddr; /* Bound local IPv4 addr */
- int bound_dev_if; /* Bound device index if != 0 */
+ __u16 dport; /* Destination port */
unsigned short num; /* Local port */
+ int bound_dev_if; /* Bound device index if != 0 */
+
+ /* Main hash linkage for various protocol lookup tables. */
+ struct sock *next;
+ struct sock **pprev;
+
volatile unsigned char state, /* Connection state */
zapped; /* In ax25 & ipx means not linked */
__u16 sport; /* Source port */
- __u16 dport; /* Destination port */
- unsigned short family;
- unsigned char reuse,
- nonagle;
+ unsigned short family; /* Address family */
+ unsigned char reuse, /* SO_REUSEADDR setting */
+ nonagle; /* Disable Nagle algorithm? */
- int sock_readers; /* user count */
- int rcvbuf;
+ int sock_readers; /* User count */
+ int rcvbuf; /* Size of receive buffer in bytes */
- struct wait_queue **sleep;
+ struct wait_queue **sleep; /* Sock wait queue */
struct dst_entry *dst_cache; /* Destination cache */
atomic_t rmem_alloc; /* Receive queue bytes committed */
struct sk_buff_head receive_queue; /* Incoming packets */
@@ -379,13 +368,12 @@ struct sock
atomic_t omem_alloc; /* "o" is "option" or "other" */
__u32 saddr; /* Sending source */
unsigned int allocation; /* Allocation mode */
- int sndbuf;
+ int sndbuf; /* Size of send buffer in bytes */
struct sock *prev;
- /*
- * Not all are volatile, but some are, so we
- * might as well say they all are.
- */
+ /* Not all are volatile, but some are, so we might as well say they all are.
+ * XXX Make this a flag word -DaveM
+ */
volatile char dead,
done,
urginline,
@@ -408,9 +396,9 @@ struct sock
struct proto *prot;
-/*
- * mss is min(mtu, max_window)
- */
+ /* mss is min(mtu, max_window)
+ * XXX Fix this, mtu only used in one TCP place and that is it -DaveM
+ */
unsigned short mtu; /* mss negotiated in the syn's */
unsigned short mss; /* current eff. mss - can change */
unsigned short user_mss; /* mss requested by user in ioctl */
@@ -429,7 +417,11 @@ struct sock
#endif
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
struct raw6_opt tp_raw;
-#endif
+#endif /* CONFIG_IPV6 */
+#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
+ struct spx_opt af_spx;
+#endif /* CONFIG_SPX */
+
} tp_pinfo;
int err, err_soft; /* Soft holds errors that don't
@@ -450,13 +442,10 @@ struct sock
struct sock_filter *filter_data;
#endif /* CONFIG_FILTER */
-/*
- * This is where all the private (optional) areas that don't
- * overlap will eventually live.
- */
-
- union
- {
+ /* This is where all the private (optional) areas that don't
+ * overlap will eventually live.
+ */
+ union {
void *destruct_hook;
struct unix_opt af_unix;
#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
@@ -488,9 +477,7 @@ struct sock
#endif
} protinfo;
-/*
- * IP 'private area' or will be eventually
- */
+ /* IP 'private area' or will be eventually. */
int ip_ttl; /* TTL setting */
int ip_tos; /* TOS */
unsigned ip_cmsg_flags;
@@ -504,31 +491,18 @@ struct sock
__u32 ip_mc_addr;
struct ip_mc_socklist *ip_mc_list; /* Group array */
-/*
- * This part is used for the timeout functions (timer.c).
- */
-
+ /* This part is used for the timeout functions (timer.c). */
int timeout; /* What are we waiting for? */
- struct timer_list timer; /* This is the TIME_WAIT/receive timer
- * when we are doing IP
- */
+ struct timer_list timer; /* This is the sock cleanup timer. */
struct timeval stamp;
- /*
- * Identd
- */
-
+ /* Identd */
struct socket *socket;
- /*
- * RPC layer private data
- */
+ /* RPC layer private data */
void *user_data;
- /*
- * Callbacks
- */
-
+ /* Callbacks */
void (*state_change)(struct sock *sk);
void (*data_ready)(struct sock *sk,int bytes);
void (*write_space)(struct sock *sk);
@@ -539,14 +513,11 @@ struct sock
void (*destruct)(struct sock *sk);
};
-/*
- * IP protocol blocks we attach to sockets.
- * socket layer -> transport layer interface
- * transport -> network interface is defined by struct inet_proto
+/* IP protocol blocks we attach to sockets.
+ * socket layer -> transport layer interface
+ * transport -> network interface is defined by struct inet_proto
*/
-
-struct proto
-{
+struct proto {
/* These must be first. */
struct sock *sklist_next;
struct sock *sklist_prev;
@@ -608,16 +579,10 @@ struct proto
#define TIME_DONE 7 /* Used to absorb those last few packets */
#define TIME_PROBE0 8
-/*
- * About 10 seconds
- */
-
+/* About 10 seconds */
#define SOCK_DESTROY_TIME (10*HZ)
-/*
- * Sockets 0-1023 can't be bound to unless you are superuser
- */
-
+/* Sockets 0-1023 can't be bound to unless you are superuser */
#define PROT_SOCK 1024
#define SHUTDOWN_MASK 3
diff --git a/include/net/spx.h b/include/net/spx.h
index 3e9b1d185..a449b891b 100644
--- a/include/net/spx.h
+++ b/include/net/spx.h
@@ -1,38 +1,93 @@
#ifndef __NET_SPX_H
#define __NET_SPX_H
-/*
- * Internal definitions for the SPX protocol.
- */
-
-/*
- * The SPX header following an IPX header.
- */
-
+#include <net/ipx.h>
+
struct spxhdr
-{
- __u8 cctl;
-#define CCTL_SPXII_XHD 0x01 /* SPX2 extended header */
-#define CCTL_SPX_UNKNOWN 0x02 /* Unknown (unused ??) */
-#define CCTL_SPXII_NEG 0x04 /* Negotiate size */
-#define CCTL_SPXII 0x08 /* Set for SPX2 */
-#define CCTL_EOM 0x10 /* End of message marker */
-#define CCTL_URG 0x20 /* Urgent marker in SPP (not used in SPX?) */
-#define CCTL_ACK 0x40 /* Send me an ACK */
-#define CCTL_CTL 0x80 /* Control message */
- __u8 dtype;
+{ __u8 cctl;
+ __u8 dtype;
#define SPX_DTYPE_ECONN 0xFE /* Finished */
#define SPX_DTYPE_ECACK 0xFF /* Ok */
- __u16 sconn; /* Connection ID */
- __u16 dconn; /* Connection ID */
- __u16 sequence;
- __u16 ackseq;
- __u16 allocseq;
+ __u16 sconn; /* Connection ID */
+ __u16 dconn; /* Connection ID */
+ __u16 sequence;
+ __u16 ackseq;
+ __u16 allocseq;
};
-#define IPXTYPE_SPX 5
+struct ipxspxhdr
+{ struct ipxhdr ipx;
+ struct spxhdr spx;
+};
+
+#define SPX_SYS_PKT_LEN (sizeof(struct ipxspxhdr))
+
+#ifdef __KERNEL__
+struct spx_opt
+{ int state;
+ int sndbuf;
+ int retries; /* Number of WD retries */
+ int retransmits; /* Number of retransmits */
+ int max_retries;
+ int wd_interval;
+ void *owner;
+ __u16 dest_connid; /* Net order */
+ __u16 source_connid; /* Net order */
+ __u16 sequence; /* Host order - our current pkt # */
+ __u16 alloc; /* Host order - max seq we can rcv now */
+ __u16 rmt_ack; /* Host order - last pkt ACKd by remote */
+ __u16 rmt_seq;
+ __u16 acknowledge;
+ __u16 rmt_alloc; /* Host order - max seq remote can handle now */
+ ipx_address dest_addr;
+ ipx_address source_addr;
+ struct timer_list watchdog; /* Idle watch */
+ struct timer_list retransmit; /* Retransmit timer */
+ struct sk_buff_head rcv_queue;
+ struct sk_buff_head transmit_queue;
+ struct sk_buff_head retransmit_queue;
+};
+
+/* Packet connectino control defines */
+#define CCTL_SPXII_XHD 0x01 /* SPX2 extended header */
+#define CCTL_SPX_UNKNOWN 0x02 /* Unknown (unused ??) */
+#define CCTL_SPXII_NEG 0x04 /* Negotiate size */
+#define CCTL_SPXII 0x08 /* Set for SPX2 */
+#define CCTL_EOM 0x10 /* End of message marker */
+#define CCTL_URG 0x20 /* Urgent marker in SPP (not used in SPX?) */
+#define CCTL_ACK 0x40 /* Send me an ACK */
+#define CCTL_CTL 0x80 /* Control message */
+#define CCTL_SYS CCTL_CTL /* Spec uses CCTL_SYS */
+
+/* Connection state defines */
+#define SPX_CLOSED 7
+#define SPX_CONNECTING 8
+#define SPX_CONNECTED 9
+
+/* Packet transmit types - Internal */
+#define DATA 0 /* Data */
+#define ACK 1 /* Data ACK */
+#define WDACK 2 /* WD ACK */
+#define CONACK 3 /* Connection Request ACK */
+#define CONREQ 4 /* Connection Request */
+#define WDREQ 5 /* WD Request */
+#define DISCON 6 /* Informed Disconnect */
+#define DISACK 7 /* Informed Disconnect ACK */
+#define RETRAN 8 /* Int. Retransmit of packet */
+#define TQUEUE 9 /* Int. Transmit of a queued packet */
+
+/*
+ * These are good canidates for IOcontrol calls
+ */
+
+/* Watchdog defines */
+#define VERIFY_TIMEOUT 3 * HZ
+#define ABORT_TIMEOUT 30 * HZ
+
+/* Packet retransmit defines */
+#define RETRY_COUNT 10
+#define RETRY_TIME 1 * HZ
+#define MAX_RETRY_DELAY 5 * HZ
-
-
-
-#endif
+#endif /* __KERNEL__ */
+#endif /* def __NET_SPX_H */
diff --git a/include/net/spxcall.h b/include/net/spxcall.h
new file mode 100644
index 000000000..0461fbbe1
--- /dev/null
+++ b/include/net/spxcall.h
@@ -0,0 +1,2 @@
+/* Separate to keep compilation of protocols.c simpler */
+extern void spx_proto_init(struct net_proto *pro);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 84bf7f55e..52853f44a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -154,16 +154,16 @@ struct tcp_tw_bucket {
struct sock *sklist_prev;
struct sock *bind_next;
struct sock **bind_pprev;
- struct sock *next;
- struct sock **pprev;
__u32 daddr;
__u32 rcv_saddr;
- int bound_dev_if;
+ __u16 dport;
unsigned short num;
+ int bound_dev_if;
+ struct sock *next;
+ struct sock **pprev;
unsigned char state,
zapped;
__u16 sport;
- __u16 dport;
unsigned short family;
unsigned char reuse,
nonagle;
@@ -172,7 +172,8 @@ struct tcp_tw_bucket {
__u32 rcv_nxt;
struct tcp_func *af_specific;
struct tcp_bind_bucket *tb;
- struct timer_list timer;
+ struct tcp_tw_bucket *next_death;
+ int death_slot;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr v6_daddr;
struct in6_addr v6_rcv_saddr;
@@ -181,16 +182,42 @@ struct tcp_tw_bucket {
extern kmem_cache_t *tcp_timewait_cachep;
-/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6
- * because the v6 tcp code to intialize a connection needs to interoperate
- * with the v4 code using the same variables.
- * FIXME: It would be better to rewrite the connection code to be
- * address family independent and just leave one copy in the ipv4 section.
- * This would also clean up some code duplication. -- erics
- */
-extern int sysctl_tcp_timestamps;
-extern int sysctl_tcp_window_scaling;
-extern int sysctl_tcp_sack;
+/* Socket demux engine toys. */
+#ifdef __BIG_ENDIAN
+#define TCP_COMBINED_PORTS(__sport, __dport) \
+ (((__u32)(__sport)<<16) | (__u32)(__dport))
+#else /* __LITTLE_ENDIAN */
+#define TCP_COMBINED_PORTS(__sport, __dport) \
+ (((__u32)(__dport)<<16) | (__u32)(__sport))
+#endif
+
+#if defined(__alpha__) || defined(__sparc_v9__)
+#ifdef __BIG_ENDIAN
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+ __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
+#else /* __LITTLE_ENDIAN */
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+ __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
+#endif /* __BIG_ENDIAN */
+#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \
+ ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
+ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
+#else /* 32-bit arch */
+#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
+#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ (((__sk)->daddr == (__saddr)) && \
+ ((__sk)->rcv_saddr == (__daddr)) && \
+ ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
+ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
+#endif /* 64-bit arch */
+
+#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
+ (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
+ ((__sk)->family == AF_INET6) && \
+ !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \
+ !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \
+ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
/* These can have wildcards, don't try too hard. */
static __inline__ int tcp_lhashfn(unsigned short num)
@@ -222,9 +249,11 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
#define MAX_RESET_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15)
#define MAX_TCPHEADER_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + 20 + MAX_HEADER + 15)
-#define MAX_WINDOW 32767 /* Never offer a window over 32767 without using
- window scaling (not yet supported). Some poor
- stacks do signed 16bit maths! */
+/*
+ * Never offer a window over 32767 without using window scaling. Some
+ * poor stacks do signed 16bit maths!
+ */
+#define MAX_WINDOW 32767
#define MIN_WINDOW 2048
#define MAX_ACK_BACKLOG 2
#define MAX_DELAY_ACK 2
@@ -266,16 +295,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
#define TCP_KEEPALIVE_TIME (180*60*HZ) /* two hours */
#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
#define TCP_KEEPALIVE_PERIOD ((75*HZ)>>2) /* period of keepalive check */
-#define TCP_NO_CHECK 0 /* turn to one if you want the default
- * to be no checksum */
-#define TCP_SYNACK_PERIOD (HZ/2)
+#define TCP_SYNACK_PERIOD (HZ/2) /* How often to run the synack slow timer */
#define TCP_QUICK_TRIES 8 /* How often we try to retransmit, until
- * we tell the LL layer that it is something
+ * we tell the link layer that it is something
* wrong (e.g. that it can expire redirects) */
#define TCP_BUCKETGC_PERIOD (HZ)
+/* TIME_WAIT reaping mechanism. */
+#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
+#define TCP_TWKILL_PERIOD ((HZ*60)/TCP_TWKILL_SLOTS)
+
/*
* TCP option
*/
@@ -305,14 +336,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
#define TCPOLEN_SACK_BASE_ALIGNED 4
#define TCPOLEN_SACK_PERBLOCK 8
-/*
- * TCP Vegas constants
- */
-
-#define TCP_VEGAS_ALPHA 2 /* v_cong_detect_top_nseg */
-#define TCP_VEGAS_BETA 4 /* v_cong_detect_bot_nseg */
-#define TCP_VEGAS_GAMMA 1 /* v_exp_inc_nseg */
-
struct open_request;
struct or_calltable {
@@ -548,15 +571,16 @@ extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
__u16 *mss);
+/* tcp_output.c */
+
extern void tcp_read_wakeup(struct sock *);
extern void tcp_write_xmit(struct sock *);
extern void tcp_time_wait(struct sock *);
extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
+extern void tcp_fack_retransmit(struct sock *);
extern void tcp_xmit_retransmit_queue(struct sock *);
extern void tcp_simple_retransmit(struct sock *);
-/* tcp_output.c */
-
extern void tcp_send_probe0(struct sock *);
extern void tcp_send_partial(struct sock *);
extern void tcp_write_wakeup(struct sock *);
@@ -598,11 +622,38 @@ struct tcp_sl_timer {
#define TCP_SLT_SYNACK 0
#define TCP_SLT_KEEPALIVE 1
-#define TCP_SLT_BUCKETGC 2
-#define TCP_SLT_MAX 3
+#define TCP_SLT_TWKILL 2
+#define TCP_SLT_BUCKETGC 3
+#define TCP_SLT_MAX 4
extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
+/* Compute the current effective MSS, taking SACKs and IP options,
+ * and even PMTU discovery events into account.
+ */
+static __inline__ unsigned int tcp_current_mss(struct sock *sk)
+{
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ struct dst_entry *dst = sk->dst_cache;
+ unsigned int mss_now = sk->mss;
+
+ if(dst && (sk->mtu < dst->pmtu)) {
+ unsigned int mss_distance = (sk->mtu - sk->mss);
+
+ /* PMTU discovery event has occurred. */
+ sk->mtu = dst->pmtu;
+ sk->mss = sk->mtu - mss_distance;
+ }
+
+ if(tp->sack_ok && tp->num_sacks)
+ mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+ (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
+ if(sk->opt)
+ mss_now -= sk->opt->optlen;
+
+ return mss_now;
+}
+
/* Compute the actual receive window we are currently advertising. */
static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
{
@@ -651,10 +702,17 @@ extern __inline__ int tcp_raise_window(struct sock *sk)
/* This is what the send packet queueing engine uses to pass
* TCP per-packet control information to the transmission
- * code.
+ * code. We also store the host-order sequence numbers in
+ * here too. This is 36 bytes on 32-bit architectures,
+ * 40 bytes on 64-bit machines, if this grows please adjust
+ * skbuff.h:skbuff->cb[xxx] size appropriately.
*/
struct tcp_skb_cb {
- __u8 flags; /* TCP header flags. */
+ struct inet_skb_parm header; /* For incoming frames */
+ __u32 seq; /* Starting sequence number */
+ __u32 end_seq; /* SEQ + FIN + SYN + datalen */
+ unsigned long when; /* used to compute rtt's */
+ __u8 flags; /* TCP header flags. */
/* NOTE: These must match up to the flags byte in a
* real TCP header.
@@ -666,15 +724,41 @@ struct tcp_skb_cb {
#define TCPCB_FLAG_ACK 0x10
#define TCPCB_FLAG_URG 0x20
- __u8 sacked; /* State flags for SACK/FACK. */
+ __u8 sacked; /* State flags for SACK/FACK. */
#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
- __u16 urg_ptr; /* Valid w/URG flags is set. */
+ __u16 urg_ptr; /* Valid w/URG flags is set. */
+ __u32 ack_seq; /* Sequence number ACK'd */
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
+/* We store the congestion window as a packet count, shifted by
+ * a factor so that implementing the 1/2 MSS ssthresh rules
+ * is easy.
+ */
+#define TCP_CWND_SHIFT 1
+
+/* This determines how many packets are "in the network" to the best
+ * of our knowledge. In many cases it is conservative, but where
+ * detailed information is available from the receiver (via SACK
+ * blocks etc.) we can make more aggressive calculations.
+ *
+ * Use this for decisions involving congestion control, use just
+ * tp->packets_out to determine if the send queue is empty or not.
+ *
+ * Read this equation as:
+ *
+ * "Packets sent once on transmission queue" MINUS
+ * "Packets acknowledged by FACK information" PLUS
+ * "Packets fast retransmitted"
+ */
+static __inline__ int tcp_packets_in_flight(struct tcp_opt *tp)
+{
+ return tp->packets_out - tp->fackets_out + tp->retrans_out;
+}
+
/* This checks if the data bearing packet SKB (usually tp->send_head)
* should be put on the wire right now.
*/
@@ -682,7 +766,6 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
int nagle_check = 1;
- int len;
/* RFC 1122 - section 4.2.3.4
*
@@ -697,13 +780,13 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
*
* Don't use the nagle rule for urgent data.
*/
- len = skb->end_seq - skb->seq;
- if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out &&
+ if (!sk->nonagle && skb->len < (sk->mss >> 1) && tp->packets_out &&
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG))
nagle_check = 0;
- return (nagle_check && tp->packets_out < tp->snd_cwnd &&
- !after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
+ return (nagle_check &&
+ (tcp_packets_in_flight(tp) < (tp->snd_cwnd>>TCP_CWND_SHIFT)) &&
+ !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
tp->retransmits == 0);
}
@@ -749,7 +832,6 @@ static char *statename[]={
static __inline__ void tcp_set_state(struct sock *sk, int state)
{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
int oldstate = sk->state;
sk->state = state;
@@ -765,10 +847,13 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
break;
case TCP_CLOSE:
+ {
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
/* Should be about 2 rtt's */
net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME));
sk->prot->unhash(sk);
/* fall through */
+ }
default:
if (oldstate==TCP_ESTABLISHED)
tcp_statistics.TcpCurrEstab--;
@@ -868,7 +953,7 @@ extern __inline__ void tcp_select_initial_window(__u32 space, __u16 mss,
* our initial window offering to 32k. There should also
* be a sysctl option to stop being nice.
*/
- (*rcv_wnd) = min(space,32767);
+ (*rcv_wnd) = min(space, MAX_WINDOW);
(*rcv_wscale) = 0;
if (wscale_ok) {
/* See RFC1323 for an explanation of the limit to 14 */
@@ -901,20 +986,6 @@ extern __inline__ void tcp_synq_init(struct tcp_opt *tp)
tp->syn_wait_last = &tp->syn_wait_queue;
}
-extern __inline__ struct open_request *tcp_synq_unlink_tail(struct tcp_opt *tp)
-{
- struct open_request *head = tp->syn_wait_queue;
-#if 0
- /* Should be a net-ratelimit'd thing, not all the time. */
- printk(KERN_DEBUG "synq tail drop with expire=%ld\n",
- head->expires-jiffies);
-#endif
- if (head->dl_next == NULL)
- tp->syn_wait_last = &tp->syn_wait_queue;
- tp->syn_wait_queue = head->dl_next;
- return head;
-}
-
extern void __tcp_inc_slow_timer(struct tcp_sl_timer *slt);
extern __inline__ void tcp_inc_slow_timer(int timer)
{