summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1998-03-18 17:17:51 +0000
committerRalf Baechle <ralf@linux-mips.org>1998-03-18 17:17:51 +0000
commitf1382dc4850bb459d24a81c6cb0ef93ea7bd4a79 (patch)
tree225271a3d5dcd4e9dea5ee393556abd754c964b1 /include
parent135b00fc2e90e605ac2a96b20b0ebd93851a3f89 (diff)
o Merge with Linux 2.1.90.
o Divide L1 cache sizes by 1024 before printing, makes the numbers a bit more credible ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-alpha/fpu.h22
-rw-r--r--include/asm-i386/page.h7
-rw-r--r--include/asm-i386/pgtable.h14
-rw-r--r--include/asm-i386/processor.h6
-rw-r--r--include/asm-i386/uaccess.h3
-rw-r--r--include/linux/coda.h6
-rw-r--r--include/linux/coda_cache.h15
-rw-r--r--include/linux/coda_fs_i.h7
-rw-r--r--include/linux/coda_linux.h18
-rw-r--r--include/linux/file.h44
-rw-r--r--include/linux/hfs_fs.h6
-rw-r--r--include/linux/hfs_fs_i.h2
-rw-r--r--include/linux/hfs_sysdep.h4
-rw-r--r--include/linux/kerneld.h135
-rw-r--r--include/linux/kmod.h4
-rw-r--r--include/linux/module.h2
-rw-r--r--include/linux/mroute.h2
-rw-r--r--include/linux/ncp_fs.h11
-rw-r--r--include/linux/ncp_fs_sb.h6
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/nfs_fs.h3
-rw-r--r--include/linux/rtnetlink.h2
-rw-r--r--include/linux/sched.h51
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/linux/sunrpc/clnt.h5
-rw-r--r--include/linux/swap.h6
-rw-r--r--include/linux/swapctl.h46
-rw-r--r--include/linux/sysctl.h9
-rw-r--r--include/linux/tty.h3
-rw-r--r--include/linux/umsdos_fs.p13
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/route.h1
-rw-r--r--include/net/sock.h201
-rw-r--r--include/net/tcp.h441
36 files changed, 577 insertions, 532 deletions
diff --git a/include/asm-alpha/fpu.h b/include/asm-alpha/fpu.h
index ab9b28f6e..333e5caeb 100644
--- a/include/asm-alpha/fpu.h
+++ b/include/asm-alpha/fpu.h
@@ -37,21 +37,21 @@
* compatibly. The corresponding definitions are in
* /usr/include/machine/fpu.h under OSF/1.
*/
-#define IEEE_TRAP_ENABLE_INV (1<<1) /* invalid op */
-#define IEEE_TRAP_ENABLE_DZE (1<<2) /* division by zero */
-#define IEEE_TRAP_ENABLE_OVF (1<<3) /* overflow */
-#define IEEE_TRAP_ENABLE_UNF (1<<4) /* underflow */
-#define IEEE_TRAP_ENABLE_INE (1<<5) /* inexact */
+#define IEEE_TRAP_ENABLE_INV (1UL<<1) /* invalid op */
+#define IEEE_TRAP_ENABLE_DZE (1UL<<2) /* division by zero */
+#define IEEE_TRAP_ENABLE_OVF (1UL<<3) /* overflow */
+#define IEEE_TRAP_ENABLE_UNF (1UL<<4) /* underflow */
+#define IEEE_TRAP_ENABLE_INE (1UL<<5) /* inexact */
#define IEEE_TRAP_ENABLE_MASK (IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
IEEE_TRAP_ENABLE_INE)
/* status bits coming from fpcr: */
-#define IEEE_STATUS_INV (1<<17)
-#define IEEE_STATUS_DZE (1<<18)
-#define IEEE_STATUS_OVF (1<<19)
-#define IEEE_STATUS_UNF (1<<20)
-#define IEEE_STATUS_INE (1<<21)
+#define IEEE_STATUS_INV (1UL<<17)
+#define IEEE_STATUS_DZE (1UL<<18)
+#define IEEE_STATUS_OVF (1UL<<19)
+#define IEEE_STATUS_UNF (1UL<<20)
+#define IEEE_STATUS_INE (1UL<<21)
#define IEEE_STATUS_MASK (IEEE_STATUS_INV | IEEE_STATUS_DZE | \
IEEE_STATUS_OVF | IEEE_STATUS_UNF | \
@@ -64,7 +64,7 @@
#define IEEE_INHERIT (1UL<<63) /* inherit on thread create? */
/*
- * Convert the spftware IEEE trap enable and status bits into the
+ * Convert the software IEEE trap enable and status bits into the
* hardware fpcr format.
*/
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index 4744df9c8..5889ec880 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -1,12 +1,15 @@
#ifndef _I386_PAGE_H
#define _I386_PAGE_H
+#include <linux/config.h>
+
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
#define STRICT_MM_TYPECHECKS
@@ -52,12 +55,14 @@ typedef unsigned long pgprot_t;
#define __pgprot(x) (x)
#endif
+#endif /* !__ASSEMBLY__ */
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
/* This handles the memory map.. */
-#define PAGE_OFFSET 0xC0000000
+#define __PAGE_OFFSET ((0x1000-CONFIG_MAX_MEMSIZE)<<20)
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define MAP_NR(addr) (__pa(addr) >> PAGE_SHIFT)
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index fe0864913..47e1d2cfc 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -13,6 +13,7 @@
* the i386 page table tree.
*/
+#ifndef __ASSEMBLY__
/* Caches aren't brain-dead on the intel. */
#define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0)
@@ -155,6 +156,7 @@ static inline void flush_tlb_range(struct mm_struct *mm,
}
#endif
#endif
+#endif /* !__ASSEMBLY__ */
/* Certain architectures need to do special things when pte's
@@ -181,6 +183,16 @@ static inline void flush_tlb_range(struct mm_struct *mm,
#define PTRS_PER_PMD 1
#define PTRS_PER_PGD 1024
+/*
+ * pgd entries used up by user/kernel:
+ */
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+#define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff)
+#define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS)
+
+#ifndef __ASSEMBLY__
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
@@ -497,4 +509,6 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
#define module_map vmalloc
#define module_unmap vfree
+#endif /* !__ASSEMBLY__ */
+
#endif /* _I386_PAGE_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 778466bbe..fc62069a5 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -10,6 +10,7 @@
#include <asm/vm86.h>
#include <asm/math_emu.h>
#include <asm/segment.h>
+#include <asm/page.h>
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
@@ -71,10 +72,9 @@ extern unsigned int machine_submodel_id;
extern unsigned int BIOS_revision;
/*
- * User space process size: 3GB. This is hardcoded into a few places,
- * so don't change it unless you know what you are doing.
+ * User space process size: 3GB (default).
*/
-#define TASK_SIZE (0xC0000000UL)
+#define TASK_SIZE (PAGE_OFFSET)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
index ef08ac510..9da2fff06 100644
--- a/include/asm-i386/uaccess.h
+++ b/include/asm-i386/uaccess.h
@@ -5,6 +5,7 @@
* User space memory access functions
*/
#include <linux/sched.h>
+#include <asm/page.h>
#define VERIFY_READ 0
#define VERIFY_WRITE 1
@@ -21,7 +22,7 @@
#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
-#define USER_DS MAKE_MM_SEG(0xC0000000)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
#define get_ds() (KERNEL_DS)
#define get_fs() (current->addr_limit)
diff --git a/include/linux/coda.h b/include/linux/coda.h
index 3faa2e9ca..5c3cb563e 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -587,9 +587,9 @@ struct cfs_open_by_path_out {
};
/*
- * Occasionally, don't cache the fid returned by CFS_LOOKUP. For instance, if
- * the fid is inconsistent. This case is handled by setting the top bit of the
- * return result parameter.
+ * Occasionally, we don't cache the fid returned by CFS_LOOKUP.
+ * For instance, if the fid is inconsistent.
+ * This case is handled by setting the top bit of the type result parameter.
*/
#define CFS_NOCACHE 0x80000000
diff --git a/include/linux/coda_cache.h b/include/linux/coda_cache.h
index 44251867f..fc607fdba 100644
--- a/include/linux/coda_cache.h
+++ b/include/linux/coda_cache.h
@@ -21,19 +21,16 @@ struct coda_cache {
struct coda_cred cc_cred;
};
-void coda_ccinsert(struct coda_cache *el, struct super_block *sb);
-void coda_cninsert(struct coda_cache *el, struct coda_inode_info *cnp);
-void coda_ccremove(struct coda_cache *el);
-void coda_cnremove(struct coda_cache *el);
-void coda_cache_create(struct inode *inode, int mask);
-struct coda_cache *coda_cache_find(struct inode *inode);
+/* credential cache */
void coda_cache_enter(struct inode *inode, int mask);
-void coda_cache_clear_cnp(struct coda_inode_info *cnp);
+void coda_cache_clear_inode(struct inode *);
void coda_cache_clear_all(struct super_block *sb);
void coda_cache_clear_cred(struct super_block *sb, struct coda_cred *cred);
int coda_cache_check(struct inode *inode, int mask);
-void coda_dentry_delete(struct dentry *dentry);
-void coda_zapfid(struct ViceFid *fid, struct super_block *sb, int flag);
+
+/* for downcalls and attributes and lookups */
+void coda_flag_inode(struct inode *inode, int flag);
+void coda_flag_alias_children(struct inode *inode, int flag);
/*
diff --git a/include/linux/coda_fs_i.h b/include/linux/coda_fs_i.h
index 1277445b9..d312013d5 100644
--- a/include/linux/coda_fs_i.h
+++ b/include/linux/coda_fs_i.h
@@ -17,7 +17,7 @@
#define CODA_CNODE_MAGIC 0x47114711
/*
- * smb fs inode data (in memory only)
+ * coda fs inode data
*/
struct coda_inode_info {
struct ViceFid c_fid; /* Coda identifier */
@@ -36,7 +36,7 @@ struct coda_inode_info {
#define C_VATTR 0x1 /* Validity of vattr in the cnode */
#define C_SYMLINK 0x2 /* Validity of symlink pointer in the cnode */
#define C_DYING 0x4 /* Set for outstanding cnodes from venus (which died) */
-#define C_ZAPFID 0x8
+#define C_PURGE 0x8
#define C_ZAPDIR 0x10
#define C_INITED 0x20
@@ -44,9 +44,6 @@ int coda_cnode_make(struct inode **, struct ViceFid *, struct super_block *);
int coda_cnode_makectl(struct inode **inode, struct super_block *sb);
struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb);
-/* inode to cnode */
-#define ITOC(inode) ((struct coda_inode_info *)&((inode)->u.coda_i))
-
#endif
#endif
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index fa477cb52..9dd30eaeb 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -36,6 +36,7 @@ extern struct file_operations coda_ioctl_operations;
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
int coda_permission(struct inode *inode, int mask);
+int coda_revalidate_inode(struct dentry *);
/* global variables */
extern int coda_debug;
@@ -43,10 +44,13 @@ extern int coda_print_entry;
extern int coda_access_cache;
/* this file: heloers */
+static __inline__ struct ViceFid *coda_i2f(struct inode *);
char *coda_f2s(ViceFid *f);
int coda_isroot(struct inode *i);
int coda_fid_is_volroot(struct ViceFid *);
int coda_iscontrol(const char *name, size_t length);
+
+
void coda_load_creds(struct coda_cred *cred);
int coda_mycred(struct coda_cred *);
void coda_vattr_to_iattr(struct inode *, struct coda_vattr *);
@@ -112,4 +116,18 @@ do { \
#define CODA_FREE(ptr,size) do {if (size < 3000) { kfree_s((ptr), (size)); CDEBUG(D_MALLOC, "kfreed: %x at %x.\n", (int) size, (int) ptr); } else { vfree((ptr)); CDEBUG(D_MALLOC, "vfreed: %x at %x.\n", (int) size, (int) ptr);} } while (0)
+/* inode to cnode */
+
+static __inline__ struct ViceFid *coda_i2f(struct inode *inode)
+{
+ return &(inode->u.coda_i.c_fid);
+}
+
+#define ITOC(inode) (&((inode)->u.coda_i))
+
+
+
+
+
+
#endif
diff --git a/include/linux/file.h b/include/linux/file.h
index 3f3870b9e..240a5039c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -1,19 +1,41 @@
+/*
+ * Wrapper functions for accessing the file_struct fd array.
+ */
+
#ifndef __LINUX_FILE_H
#define __LINUX_FILE_H
-extern inline struct file * fget(unsigned long fd)
+extern int __fput(struct file *);
+extern void insert_file_free(struct file *file);
+
+/*
+ * Check whether the specified fd has an open file.
+ */
+extern inline struct file * fcheck(unsigned int fd)
{
struct file * file = NULL;
- if (fd < NR_OPEN) {
+
+ if (fd < NR_OPEN)
file = current->files->fd[fd];
- if (file)
- file->f_count++;
- }
return file;
}
-extern int __fput(struct file *);
-extern void insert_file_free(struct file *file);
+extern inline struct file * fget(unsigned int fd)
+{
+ struct file * file = fcheck(fd);
+
+ if (file)
+ file->f_count++;
+ return file;
+}
+
+/*
+ * Install a file pointer in the fd array.
+ */
+extern inline void fd_install(unsigned int fd, struct file *file)
+{
+ current->files->fd[fd] = file;
+}
/* It does not matter which list it is on. */
extern inline void remove_filp(struct file *file)
@@ -47,12 +69,4 @@ extern inline void put_filp(struct file *file)
}
}
-/*
- * Install a file pointer in the files structure.
- */
-extern inline void fd_install(unsigned long fd, struct file *file)
-{
- current->files->fd[fd] = file;
-}
-
#endif
diff --git a/include/linux/hfs_fs.h b/include/linux/hfs_fs.h
index de51db0b1..9b43579c0 100644
--- a/include/linux/hfs_fs.h
+++ b/include/linux/hfs_fs.h
@@ -237,20 +237,20 @@ extern const struct hfs_name hfs_cap_reserved2[];
extern struct inode_operations hfs_cap_ndir_inode_operations;
extern struct inode_operations hfs_cap_fdir_inode_operations;
extern struct inode_operations hfs_cap_rdir_inode_operations;
-extern void hfs_cap_drop_dentry(const ino_t, struct dentry *);
+extern void hfs_cap_drop_dentry(struct dentry *, const ino_t);
/* dir_dbl.c */
extern const struct hfs_name hfs_dbl_reserved1[];
extern const struct hfs_name hfs_dbl_reserved2[];
extern struct inode_operations hfs_dbl_dir_inode_operations;
-extern void hfs_dbl_drop_dentry(const ino_t, struct dentry *);
+extern void hfs_dbl_drop_dentry(struct dentry *, const ino_t);
/* dir_nat.c */
extern const struct hfs_name hfs_nat_reserved1[];
extern const struct hfs_name hfs_nat_reserved2[];
extern struct inode_operations hfs_nat_ndir_inode_operations;
extern struct inode_operations hfs_nat_hdir_inode_operations;
-extern void hfs_nat_drop_dentry(const ino_t, struct dentry *);
+extern void hfs_nat_drop_dentry(struct dentry *, const ino_t);
/* dir_sngl.c */
extern const struct hfs_name hfs_sngl_reserved1[];
diff --git a/include/linux/hfs_fs_i.h b/include/linux/hfs_fs_i.h
index cf9ed53e0..453896882 100644
--- a/include/linux/hfs_fs_i.h
+++ b/include/linux/hfs_fs_i.h
@@ -34,7 +34,7 @@ struct hfs_inode_info {
struct hfs_hdr_layout *layout;
/* for dentry cleanup */
- void (*d_drop_op)(const ino_t, struct dentry *);
+ void (*d_drop_op)(struct dentry *, const ino_t);
};
#endif
diff --git a/include/linux/hfs_sysdep.h b/include/linux/hfs_sysdep.h
index 93de05aad..22e2ac66b 100644
--- a/include/linux/hfs_sysdep.h
+++ b/include/linux/hfs_sysdep.h
@@ -78,6 +78,10 @@ extern inline hfs_u32 hfs_time(void) {
*/
typedef struct wait_queue *hfs_wait_queue;
+extern inline void hfs_init_waitqueue(hfs_wait_queue *queue) {
+ init_waitqueue(queue);
+}
+
extern inline void hfs_sleep_on(hfs_wait_queue *queue) {
sleep_on(queue);
}
diff --git a/include/linux/kerneld.h b/include/linux/kerneld.h
deleted file mode 100644
index b2db5f8c7..000000000
--- a/include/linux/kerneld.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _LINUX_KERNELD_H
-#define _LINUX_KERNELD_H
-
-#define KERNELD_SYSTEM 1
-#define KERNELD_REQUEST_MODULE 2 /* "insmod" */
-#define KERNELD_RELEASE_MODULE 3 /* "rmmod" */
-#define KERNELD_DELAYED_RELEASE_MODULE 4 /* "rmmod" */
-#define KERNELD_CANCEL_RELEASE_MODULE 5 /* "rmmod" */
-#define KERNELD_REQUEST_ROUTE 6 /* from net/ipv4/route.c */
-#define KERNELD_BLANKER 7 /* from drivers/char/console.c */
-#define KERNELD_PNP 8 /* from drivers/pnp/kerneld.c */
-#define KERNELD_ARP 256 /* from net/ipv4/arp.c */
-
-/*
- * Uncomment the following line for the new kerneld protocol
- * This includes the pid of the kernel level requester into the kerneld header
- */
-/*
-#define NEW_KERNELD_PROTOCOL
- */
-#ifdef NEW_KERNELD_PROTOCOL
-#define OLDIPC_KERNELD 00040000 /* use the kerneld message channel */
-#define IPC_KERNELD 00140000 /* use the kerneld message channel, new protocol */
-#define KDHDR (sizeof(long) + sizeof(short) + sizeof(short))
-#define NULL_KDHDR 0, 2, 0
-#else
-#define IPC_KERNELD 00040000 /* use the kerneld message channel */
-#define KDHDR (sizeof(long))
-#define NULL_KDHDR 0
-#endif
-#define KERNELD_MAXCMD 0x7ffeffff
-#define KERNELD_MINSEQ 0x7fff0000 /* "commands" legal up to 0x7ffeffff */
-#define KERNELD_WAIT 0x80000000
-#define KERNELD_NOWAIT 0
-
-struct kerneld_msg {
- long mtype;
- long id;
-#ifdef NEW_KERNELD_PROTOCOL
- short version;
- short pid;
-#endif
-#ifdef __KERNEL__
- char *text;
-#else
- char text[1];
-#endif /* __KERNEL__ */
-};
-
-#ifdef __KERNEL__
-#include <linux/string.h>
-
-extern int kerneld_send(int msgtype, int ret_size, int msgsz,
- const char *text, const char *ret_val);
-
-/*
- * Request that a module should be loaded.
- * Wait for the exit status from insmod/modprobe.
- * If it fails, it fails... at least we tried...
- */
-static inline int request_module(const char *name)
-{
- return kerneld_send(KERNELD_REQUEST_MODULE,
- 0 | KERNELD_WAIT,
- strlen(name), name, NULL);
-}
-
-/*
- * Request the removal of a module, maybe don't wait for it.
- * It doesn't matter if the removal fails, now does it?
- */
-static inline int release_module(const char *name, int waitflag)
-{
- return kerneld_send(KERNELD_RELEASE_MODULE,
- 0 | (waitflag?KERNELD_WAIT:KERNELD_NOWAIT),
- strlen(name), name, NULL);
-}
-
-/*
- * Request a delayed removal of a module, but don't wait for it.
- * The delay is done by kerneld (default: 60 seconds)
- */
-static inline int delayed_release_module(const char *name)
-{
- return kerneld_send(KERNELD_DELAYED_RELEASE_MODULE,
- 0 | KERNELD_NOWAIT,
- strlen(name), name, NULL);
-}
-
-/*
- * Attempt to cancel a previous request for removal of a module,
- * but don't wait for it.
- * This call can be made if the kernel wants to prevent a delayed
- * unloading of a module.
- */
-static inline int cancel_release_module(const char *name)
-{
- return kerneld_send(KERNELD_CANCEL_RELEASE_MODULE,
- 0 | KERNELD_NOWAIT,
- strlen(name), name, NULL);
-}
-
-/*
- * Perform an "inverted" system call, maybe return the exit status
- */
-static inline int ksystem(const char *cmd, int waitflag)
-{
- return kerneld_send(KERNELD_SYSTEM,
- 0 | (waitflag?KERNELD_WAIT:KERNELD_NOWAIT),
- strlen(cmd), cmd, NULL);
-}
-
-/*
- * Try to create a route, possibly by opening a ppp-connection
- */
-static inline int kerneld_route(const char *ip_route)
-{
- return kerneld_send(KERNELD_REQUEST_ROUTE,
- 0 | KERNELD_WAIT,
- strlen(ip_route), ip_route, NULL);
-}
-
-/*
- * Handle an external screen blanker
- */
-static inline int kerneld_blanker(int on_off)
-{
- char *s = on_off ? "on" : "off";
- return kerneld_send(KERNELD_BLANKER,
- 0 | (on_off ? KERNELD_NOWAIT : KERNELD_WAIT),
- strlen(s), s, NULL);
-}
-
-#endif /* __KERNEL__ */
-#endif
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
new file mode 100644
index 000000000..876c7f222
--- /dev/null
+++ b/include/linux/kmod.h
@@ -0,0 +1,4 @@
+/*
+ kmod header
+*/
+extern int request_module(const char * name);
diff --git a/include/linux/module.h b/include/linux/module.h
index 475c68854..ad3d10baf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -148,7 +148,7 @@ const char __module_author[] __attribute__((section(".modinfo"))) = \
const char __module_description[] __attribute__((section(".modinfo"))) = \
"description=" desc
-/* Could potentially be used by kerneld... */
+/* Could potentially be used by kmod... */
#define MODULE_SUPPORTED_DEVICE(dev) \
const char __module_device[] __attribute__((section(".modinfo"))) = \
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 55193867d..b57519b72 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -217,7 +217,7 @@ extern int pim_rcv(struct sk_buff * , unsigned short);
extern int pim_rcv_v1(struct sk_buff * , unsigned short len);
struct rtmsg;
-extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm);
+extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait);
#endif
#endif
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index 03904df71..eb83cfe01 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -83,8 +83,15 @@ struct ncp_privatedata_ioctl
};
#define NCP_IOC_NCPREQUEST _IOR('n', 1, struct ncp_ioctl_request)
-#define NCP_IOC_GETMOUNTUID _IOW('n', 2, uid_t)
-#define NCP_IOC_GETMOUNTUID_INT _IOW('n', 2, unsigned int)
+#define NCP_IOC_GETMOUNTUID _IOW('n', 2, __kernel_uid_t)
+
+#if 1
+#ifdef __KERNEL__
+/* remove after ncpfs-2.0.13 gets released or at the beginning of kernel-2.1. codefreeze */
+#define NCP_IOC_GETMOUNTUID_INT _IOW('n', 2, unsigned int)
+#endif
+#endif
+
#define NCP_IOC_CONN_LOGGED_IN _IO('n', 3)
#define NCP_GET_FS_INFO_VERSION (1)
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index efcc20556..38492fc92 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -51,11 +51,9 @@ struct ncp_server {
int ncp_reply_size;
struct ncp_inode_info root;
-#if 0
- char root_path; /* '\0' */
-#else
struct dentry* root_dentry;
-#endif
+
+ int root_setuped;
/* info for packet signing */
int sign_wanted; /* 1=Server needs signed packets */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 72430508a..d1c005c70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -125,6 +125,9 @@ struct net_device_stats
unsigned long tx_heartbeat_errors;
unsigned long tx_window_errors;
+ /* for cslip etc */
+ unsigned long rx_compressed;
+ unsigned long tx_compressed;
};
#ifdef CONFIG_NET_FASTROUTE
@@ -352,6 +355,7 @@ extern __inline__ int unregister_gifconf(unsigned int family)
#define HAVE_NETIF_RX 1
extern void netif_rx(struct sk_buff *skb);
extern void net_bh(void);
+extern void dev_tint(struct device *dev);
extern int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy);
extern int dev_ioctl(unsigned int cmd, void *);
extern int dev_change_flags(struct device *, unsigned);
@@ -423,7 +427,7 @@ extern int dev_mc_add(struct device *dev, void *addr, int alen, int newonly);
extern void dev_mc_discard(struct device *dev);
extern void dev_set_promiscuity(struct device *dev, int inc);
extern void dev_set_allmulti(struct device *dev, int inc);
-/* Load a device via the kerneld */
+/* Load a device via the kmod */
extern void dev_load(const char *name);
extern void dev_mcast_init(void);
extern int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev));
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a7b51b977..b72ad4ed1 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -85,6 +85,9 @@ do { \
*/
#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
+/* Flags in the RPC client structure */
+#define NFS_CLNTF_BUFSIZE 0x0001 /* readdir buffer in longwords */
+
#ifdef __KERNEL__
/*
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4a309eb91..8c6467010 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -66,7 +66,7 @@ struct rtattr
#define RTA_ALIGNTO 4
#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
-#define RTA_OK(rta,len) ((rta)->rta_len > sizeof(struct rtattr) && \
+#define RTA_OK(rta,len) ((rta)->rta_len >= sizeof(struct rtattr) && \
(rta)->rta_len <= (len))
#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
(struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 096d0656c..7eae346a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -86,6 +86,12 @@ extern int last_pid;
#define SCHED_FIFO 1
#define SCHED_RR 2
+/*
+ * This is an additional bit set when we want to
+ * yield the CPU for one re-schedule..
+ */
+#define SCHED_YIELD 0x10
+
struct sched_param {
int sched_priority;
};
@@ -113,19 +119,24 @@ extern void trap_init(void);
asmlinkage void schedule(void);
-/* Open file table structure */
+
+/*
+ * Open file table structure
+ */
struct files_struct {
int count;
+ int max_fds;
+ struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
- struct file * fd[NR_OPEN];
};
#define INIT_FILES { \
1, \
+ NR_OPEN, \
+ &init_fd_array[0], \
{ { 0, } }, \
- { { 0, } }, \
- { NULL, } \
+ { { 0, } } \
}
struct fs_struct {
@@ -387,43 +398,32 @@ extern __inline__ struct task_struct **get_free_taskslot(void)
/* PID hashing. */
#define PIDHASH_SZ (NR_TASKS >> 2)
extern struct task_struct *pidhash[PIDHASH_SZ];
-extern spinlock_t pidhash_lock;
#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
extern __inline__ void hash_pid(struct task_struct *p)
{
struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
- unsigned long flags;
- spin_lock_irqsave(&pidhash_lock, flags);
if((p->pidhash_next = *htable) != NULL)
(*htable)->pidhash_pprev = &p->pidhash_next;
*htable = p;
p->pidhash_pprev = htable;
- spin_unlock_irqrestore(&pidhash_lock, flags);
}
extern __inline__ void unhash_pid(struct task_struct *p)
{
- unsigned long flags;
-
- spin_lock_irqsave(&pidhash_lock, flags);
if(p->pidhash_next)
p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
*p->pidhash_pprev = p->pidhash_next;
- spin_unlock_irqrestore(&pidhash_lock, flags);
}
extern __inline__ struct task_struct *find_task_by_pid(int pid)
{
struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
- unsigned long flags;
- spin_lock_irqsave(&pidhash_lock, flags);
for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
;
- spin_unlock_irqrestore(&pidhash_lock, flags);
return p;
}
@@ -571,19 +571,6 @@ extern void exit_sighand(struct task_struct *);
extern int do_execve(char *, char **, char **, struct pt_regs *);
extern int do_fork(unsigned long, unsigned long, struct pt_regs *);
-/* See if we have a valid user level fd.
- * If it makes sense, return the file structure it references.
- * Otherwise return NULL.
- */
-extern inline struct file *file_from_fd(const unsigned int fd)
-{
-
- if (fd >= NR_OPEN)
- return NULL;
- /* either valid or null */
- return current->files->fd[fd];
-}
-
/*
* The wait-queues are circular lists, and you have to be *very* sure
* to keep them correct. Use only these two functions to add/remove
@@ -627,11 +614,9 @@ extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue *
write_unlock_irqrestore(&waitqueue_lock, flags);
}
-#define REMOVE_LINKS(p) do { unsigned long flags; \
- write_lock_irqsave(&tasklist_lock, flags); \
+#define REMOVE_LINKS(p) do { \
(p)->next_task->prev_task = (p)->prev_task; \
(p)->prev_task->next_task = (p)->next_task; \
- write_unlock_irqrestore(&tasklist_lock, flags); \
if ((p)->p_osptr) \
(p)->p_osptr->p_ysptr = (p)->p_ysptr; \
if ((p)->p_ysptr) \
@@ -640,13 +625,11 @@ extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue *
(p)->p_pptr->p_cptr = (p)->p_osptr; \
} while (0)
-#define SET_LINKS(p) do { unsigned long flags; \
- write_lock_irqsave(&tasklist_lock, flags); \
+#define SET_LINKS(p) do { \
(p)->next_task = &init_task; \
(p)->prev_task = init_task.prev_task; \
init_task.prev_task->next_task = (p); \
init_task.prev_task = (p); \
- write_unlock_irqrestore(&tasklist_lock, flags); \
(p)->p_ysptr = NULL; \
if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \
(p)->p_osptr->p_ysptr = p; \
diff --git a/include/linux/socket.h b/include/linux/socket.h
index e274a3c51..afff2fd5c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -137,6 +137,7 @@ struct ucred {
#define AF_NETLINK 16
#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */
#define AF_PACKET 17 /* Packet family */
+#define AF_ASH 18 /* Ash */
#define AF_MAX 32 /* For now.. */
/* Protocol families, same as address families. */
@@ -160,6 +161,7 @@ struct ucred {
#define PF_NETLINK AF_NETLINK
#define PF_ROUTE AF_ROUTE
#define PF_PACKET AF_PACKET
+#define PF_ASH AF_ASH
#define PF_MAX AF_MAX
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 60fb2d74f..da2b2cdd1 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -30,6 +30,7 @@ struct rpc_portmap {
* The high-level client handle
*/
struct rpc_clnt {
+ unsigned int cl_users; /* number of references */
struct rpc_xprt * cl_xprt; /* transport */
struct rpc_procinfo * cl_procinfo; /* procedure info */
u32 cl_maxproc; /* max procedure number */
@@ -37,7 +38,6 @@ struct rpc_clnt {
char * cl_server; /* server machine name */
char * cl_protname; /* protocol name */
struct rpc_auth * cl_auth; /* authenticator */
- struct rpc_portmap cl_pmap; /* port mapping */
struct rpc_stat * cl_stats; /* statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
@@ -47,10 +47,11 @@ struct rpc_clnt {
cl_binding : 1,/* doing a getport() */
cl_oneshot : 1,/* dispose after use */
cl_dead : 1;/* abandoned */
+ unsigned int cl_flags; /* misc client flags */
unsigned long cl_hardmax; /* max hard timeout */
+ struct rpc_portmap cl_pmap; /* port mapping */
struct rpc_wait_queue cl_bindwait; /* waiting on getport() */
- unsigned int cl_users; /* number of references */
};
#define cl_timeout cl_xprt->timeout
#define cl_prog cl_pmap.pm_prog
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d291146e..494490c32 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -36,10 +36,10 @@ struct swap_info_struct {
extern int nr_swap_pages;
extern int nr_free_pages;
extern atomic_t nr_async_pages;
-extern int min_free_pages;
-extern int free_pages_low;
-extern int free_pages_high;
extern struct inode swapper_inode;
+extern unsigned long page_cache_size;
+extern int buffermem;
+#define BUFFER_MEM ((buffermem >> PAGE_SHIFT) + page_cache_size)
/* Incomplete types for prototype declarations: */
struct task_struct;
diff --git a/include/linux/swapctl.h b/include/linux/swapctl.h
index e71dcd067..cc169d2da 100644
--- a/include/linux/swapctl.h
+++ b/include/linux/swapctl.h
@@ -6,29 +6,18 @@
/* Swap tuning control */
-/* First, enumerate the different reclaim policies */
-enum RCL_POLICY {RCL_ROUND_ROBIN, RCL_BUFF_FIRST, RCL_PERSIST};
-
-typedef struct swap_control_v5
+typedef struct swap_control_v6
{
unsigned int sc_max_page_age;
unsigned int sc_page_advance;
unsigned int sc_page_decline;
unsigned int sc_page_initial_age;
- unsigned int sc_max_buff_age;
- unsigned int sc_buff_advance;
- unsigned int sc_buff_decline;
- unsigned int sc_buff_initial_age;
unsigned int sc_age_cluster_fract;
unsigned int sc_age_cluster_min;
unsigned int sc_pageout_weight;
unsigned int sc_bufferout_weight;
- unsigned int sc_buffer_grace;
- unsigned int sc_nr_buffs_to_free;
- unsigned int sc_nr_pages_to_free;
- enum RCL_POLICY sc_policy;
-} swap_control_v5;
-typedef struct swap_control_v5 swap_control_t;
+} swap_control_v6;
+typedef struct swap_control_v6 swap_control_t;
extern swap_control_t swap_control;
typedef struct swapstat_v1
@@ -42,7 +31,23 @@ typedef struct swapstat_v1
typedef swapstat_v1 swapstat_t;
extern swapstat_t swapstats;
-extern int min_free_pages, free_pages_low, free_pages_high;
+typedef struct buffer_mem_v1
+{
+ unsigned int min_percent;
+ unsigned int borrow_percent;
+ unsigned int max_percent;
+} buffer_mem_v1;
+typedef buffer_mem_v1 buffer_mem_t;
+extern buffer_mem_t buffer_mem;
+
+typedef struct freepages_v1
+{
+ unsigned int min;
+ unsigned int low;
+ unsigned int high;
+} freepages_v1;
+typedef freepages_v1 freepages_t;
+extern freepages_t freepages;
#define SC_VERSION 1
#define SC_MAX_VERSION 1
@@ -55,17 +60,11 @@ extern int min_free_pages, free_pages_low, free_pages_high;
failure to free a resource at any priority */
#define RCL_FAILURE (RCL_MAXPRI + 1)
-#define RCL_POLICY (swap_control.sc_policy)
#define AGE_CLUSTER_FRACT (swap_control.sc_age_cluster_fract)
#define AGE_CLUSTER_MIN (swap_control.sc_age_cluster_min)
#define PAGEOUT_WEIGHT (swap_control.sc_pageout_weight)
#define BUFFEROUT_WEIGHT (swap_control.sc_bufferout_weight)
-#define NR_BUFFS_TO_FREE (swap_control.sc_nr_buffs_to_free)
-#define NR_PAGES_TO_FREE (swap_control.sc_nr_pages_to_free)
-
-#define BUFFERMEM_GRACE (swap_control.sc_buffer_grace)
-
/* Page aging (see mm/swap.c) */
#define MAX_PAGE_AGE (swap_control.sc_max_page_age)
@@ -73,11 +72,6 @@ extern int min_free_pages, free_pages_low, free_pages_high;
#define PAGE_DECLINE (swap_control.sc_page_decline)
#define PAGE_INITIAL_AGE (swap_control.sc_page_initial_age)
-#define MAX_BUFF_AGE (swap_control.sc_max_buff_age)
-#define BUFF_ADVANCE (swap_control.sc_buff_advance)
-#define BUFF_DECLINE (swap_control.sc_buff_decline)
-#define BUFF_INITIAL_AGE (swap_control.sc_buff_initial_age)
-
/* Given a resource of N units (pages or buffers etc), we only try to
* age and reclaim AGE_CLUSTER_FRACT per 1024 resources each time we
* scan the resource list. */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 865bdd1dd..b7550ba2c 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -70,7 +70,9 @@ enum
KERN_PRINTK, /* sturct: control printk logging parameters */
KERN_NAMETRANS, /* Name translation */
KERN_STATINODE,
- KERN_DENTRY /* dentry statistics */
+ KERN_DENTRY, /* dentry statistics */
+ KERN_MODPROBE,
+ KERN_KMOD_UNLOAD_DELAY
};
@@ -82,6 +84,7 @@ enum
VM_FREEPG, /* struct: Set free page thresholds */
VM_BDFLUSH, /* struct: Control buffer cache flushing */
VM_OVERCOMMIT_MEMORY, /* Turn off the virtual memory safety limit */
+ VM_BUFFERMEM /* struct: Set cache memory thresholds */
};
@@ -118,6 +121,7 @@ enum
NET_CORE_FASTROUTE,
NET_CORE_MSG_COST,
NET_CORE_MSG_BURST,
+ NET_CORE_OPTMEM_MAX,
};
/* /proc/sys/net/ethernet */
@@ -145,8 +149,6 @@ enum
NET_IPV4_FIB_HASH = 19,
NET_IPV4_TCP_HOE_RETRANSMITS=32,
- NET_IPV4_TCP_SACK,
- NET_IPV4_TCP_TSACK,
NET_IPV4_TCP_TIMESTAMPS,
NET_IPV4_TCP_WINDOW_SCALING,
NET_IPV4_TCP_VEGAS_CONG_AVOID,
@@ -167,6 +169,7 @@ enum
NET_IPV4_IP_MASQ_DEBUG,
NET_TCP_SYNCOOKIES,
NET_TCP_STDURG,
+ NET_TCP_RFC1337,
NET_TCP_SYN_TAILDROP,
NET_TCP_MAX_SYN_BACKLOG,
NET_IPV4_LOCAL_PORT_RANGE,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6b00c4329..34c88d721 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -320,8 +320,7 @@ extern int espserial_init(void);
extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
const char *routine);
-extern char *_tty_name(struct tty_struct *tty, char *buf);
-extern char *tty_name(struct tty_struct *tty);
+extern char *tty_name(struct tty_struct *tty, char *buf);
extern void tty_wait_until_sent(struct tty_struct * tty, int timeout);
extern int tty_check_change(struct tty_struct * tty);
extern void stop_tty(struct tty_struct * tty);
diff --git a/include/linux/umsdos_fs.p b/include/linux/umsdos_fs.p
index 62ce67d0c..7c0e64ec3 100644
--- a/include/linux/umsdos_fs.p
+++ b/include/linux/umsdos_fs.p
@@ -1,9 +1,6 @@
/* check.c 23/01/95 03.38.30 */
void check_page_tables (void);
/* dir.c 22/06/95 00.22.12 */
-struct dentry *creat_dentry (const char *name,
- const int len,
- struct inode *inode);
int compat_msdos_create(struct inode *dir,
const char *name,
int len,
@@ -30,6 +27,16 @@ int UMSDOS_lookup(struct inode *dir,struct dentry *dentry);
int umsdos_hlink2inode (struct inode *hlink, struct inode **result);
/* emd.c 22/06/95 00.22.04 */
+void fill_new_filp (struct file *filp, struct dentry *dentry);
+void kill_dentry (struct dentry *dentry);
+struct dentry *creat_dentry (const char *name,
+ const int len,
+ struct inode *inode);
+ssize_t umsdos_file_write_kmem_real (struct file *filp,
+ const char *buf,
+ size_t count,
+ loff_t *offs);
+
ssize_t umsdos_file_read_kmem (struct inode *emd_dir,
struct file *filp,
char *buf,
diff --git a/include/net/dst.h b/include/net/dst.h
index b879bb059..0d18f60d2 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -120,6 +120,8 @@ extern void dst_destroy(struct dst_entry * dst);
extern __inline__
void dst_free(struct dst_entry * dst)
{
+ if (dst->obsolete > 1)
+ return;
if (!atomic_read(&dst->use)) {
dst_destroy(dst);
return;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 42233aadf..863037b23 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -114,7 +114,7 @@ extern __inline__ void ip6_dst_store(struct sock *sk, struct dst_entry *dst)
struct rt6_info *rt;
np = &sk->net_pinfo.af_inet6;
- sk->dst_cache = dst;
+ dst_release(xchg(&sk->dst_cache,dst));
rt = (struct rt6_info *) dst;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b6055ae44..1a322a498 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -4,7 +4,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ipv6.h,v 1.8 1997/12/29 19:52:09 kuznet Exp $
+ * $Id: ipv6.h,v 1.9 1998/03/08 05:55:20 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/include/net/route.h b/include/net/route.h
index 338e158fd..624fd233a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -102,6 +102,7 @@ extern unsigned inet_addr_type(u32 addr);
extern void ip_rt_multicast_event(struct in_device *);
extern int ip_rt_ioctl(unsigned int cmd, void *arg);
extern void ip_rt_get_source(u8 *src, struct rtable *rt);
+extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb);
extern __inline__ void ip_rt_put(struct rtable * rt)
diff --git a/include/net/sock.h b/include/net/sock.h
index c225a0015..589f58c7c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -191,42 +191,75 @@ struct raw_opt {
struct tcp_opt
{
+ int tcp_header_len; /* Bytes of tcp header to send */
+
+/*
+ * Header prediction flags
+ * 0x5?10 << 16 + snd_wnd in net byte order
+ */
+ __u32 pred_flags;
+
/*
* RFC793 variables by their proper names. This means you can
* read the code and the spec side by side (and laugh ...)
* See RFC793 and RFC1122. The RFC writes these in capitals.
*/
__u32 rcv_nxt; /* What we want to receive next */
- __u32 rcv_up; /* The urgent point (may not be valid) */
- __u32 rcv_wnd; /* Current receiver window */
__u32 snd_nxt; /* Next sequence we send */
+
__u32 snd_una; /* First byte we want an ack for */
- __u32 snd_up; /* Outgoing urgent pointer */
- __u32 snd_wl1; /* Sequence for window update */
- __u32 snd_wl2; /* Ack sequence for update */
+ __u32 rcv_tstamp; /* timestamp of last received packet */
+ __u32 lrcvtime; /* timestamp of last received data packet*/
+ __u32 srtt; /* smothed round trip time << 3 */
- __u32 rcv_wup; /* rcv_nxt on last window update sent */
+ __u32 ato; /* delayed ack timeout */
+ __u32 snd_wl1; /* Sequence for window update */
- __u32 fin_seq; /* XXX This one should go, we don't need it. -DaveM */
+ __u32 snd_wl2; /* Ack sequence for update */
+ __u32 snd_wnd; /* The window we expect to receive */
+ __u32 max_window;
+ __u8 pending; /* pending events */
+ __u8 retransmits;
+ __u32 last_ack_sent; /* last ack we sent */
- __u32 srtt; /* smothed round trip time << 3 */
+ __u32 backoff; /* backoff */
__u32 mdev; /* medium deviation */
+ __u32 snd_cwnd; /* Sending congestion window */
__u32 rto; /* retransmit timeout */
- __u32 backoff; /* backoff */
+
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 high_seq; /* highest sequence number sent by onset of congestion */
/*
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
*/
- __u32 snd_cwnd; /* Sending congestion window */
__u32 snd_ssthresh; /* Slow start size threshold */
__u16 snd_cwnd_cnt;
- __u16 max_window;
+ __u8 dup_acks; /* Consequetive duplicate acks seen from other end */
+ __u8 delayed_acks;
+
+ /* Two commonly used timers in both sender and receiver paths. */
+ struct timer_list retransmit_timer; /* Resend (no ack) */
+ struct timer_list delack_timer; /* Ack delay */
+
+ struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
+ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
+ struct sk_buff *send_head; /* Front of stuff to transmit */
+ struct sk_buff *retrans_head; /* retrans head can be
+ * different to the head of
+ * write queue if we are doing
+ * fast retransmit
+ */
+
+ __u32 rcv_wnd; /* Current receiver window */
+ __u32 rcv_wup; /* rcv_nxt on last window update sent */
+ __u32 write_seq;
+ __u32 copied_seq;
/*
* Options received (usually on last packet, some only on SYN packets).
*/
char tstamp_ok, /* TIMESTAMP seen on SYN packet */
- wscale_ok, /* Wscale seen on SYN packet */
- sack_ok; /* SACK_PERM seen on SYN packet */
+ wscale_ok; /* Wscale seen on SYN packet */
char saw_tstamp; /* Saw TIMESTAMP on last packet */
__u16 in_mss; /* MSS option received from sender */
__u8 snd_wscale; /* Window scaling received from sender */
@@ -235,60 +268,20 @@ struct tcp_opt
__u32 rcv_tsecr; /* Time stamp echo reply */
__u32 ts_recent; /* Time stamp to echo next */
__u32 ts_recent_stamp;/* Time we stored ts_recent (for aging) */
- __u32 last_ack_sent; /* last ack we sent */
- int sacks; /* Number of SACK blocks if any */
- __u32 left_sack[4]; /* Left edges of blocks */
- __u32 right_sack[4]; /* Right edges of blocks */
- int tcp_header_len; /* Bytes of tcp header to send */
-/*
- * Timers used by the TCP protocol layer
- */
- struct timer_list delack_timer; /* Ack delay */
- struct timer_list idle_timer; /* Idle watch */
- struct timer_list completion_timer; /* Up/Down timer */
struct timer_list probe_timer; /* Probes */
- struct timer_list retransmit_timer; /* Resend (no ack) */
-
- __u32 basertt; /* Vegas baseRTT */
- __u32 packets_out; /* Packets which are "in flight" */
- __u32 window_clamp; /* XXX Document this... -DaveM */
-
- __u8 pending; /* pending events */
- __u8 delayed_acks;
- __u8 dup_acks; /* Consequetive duplicate acks seen from other end */
- __u8 retransmits;
-
- __u32 lrcvtime; /* timestamp of last received data packet */
- __u32 rcv_tstamp; /* timestamp of last received packet */
- __u32 iat_mdev; /* interarrival time medium deviation */
- __u32 iat; /* interarrival time */
- __u32 ato; /* delayed ack timeout */
- __u32 high_seq; /* highest sequence number sent by onset of congestion */
-
-/*
- * new send pointers
- */
- struct sk_buff * send_head;
- struct sk_buff * retrans_head; /* retrans head can be
- * different to the head of
- * write queue if we are doing
- * fast retransmit
- */
-/*
- * Header prediction flags
- * 0x5?10 << 16 + snd_wnd in net byte order
- */
- __u32 pred_flags;
- __u32 snd_wnd; /* The window we expect to receive */
-
- __u32 probes_out; /* unanswered 0 window probes */
+ __u32 basertt; /* Vegas baseRTT */
+ __u32 window_clamp; /* XXX Document this... -DaveM */
+ __u32 probes_out; /* unanswered 0 window probes */
+ __u32 syn_seq;
+ __u32 fin_seq;
+ __u32 urg_seq;
+ __u32 urg_data;
struct open_request *syn_wait_queue;
struct open_request **syn_wait_last;
int syn_backlog;
- struct tcp_func *af_specific;
};
@@ -347,73 +340,73 @@ struct sock
struct sock *sklist_next;
struct sock *sklist_prev;
- atomic_t wmem_alloc;
- atomic_t rmem_alloc;
- unsigned long allocation; /* Allocation mode */
+ /* Local port binding hash linkage. */
+ struct sock *bind_next;
+ struct sock **bind_pprev;
+
+ /* Main hash linkage for various protocol lookup tables. */
+ struct sock *next;
+ struct sock **pprev;
- /* The following stuff should probably move to the tcp private area */
- __u32 write_seq;
- __u32 copied_seq;
- __u32 syn_seq;
- __u32 urg_seq;
- __u32 urg_data;
- unsigned char delayed_acks;
- /* End of block to move */
+ /* Socket demultiplex comparisons on incoming packets. */
+ __u32 daddr; /* Foreign IPv4 addr */
+ __u32 rcv_saddr; /* Bound local IPv4 addr */
+ int bound_dev_if; /* Bound device index if != 0 */
+ unsigned short num; /* Local port */
+ volatile unsigned char state, /* Connection state */
+ zapped; /* In ax25 & ipx means not linked */
+ struct tcphdr dummy_th; /* TCP header template */
- int sock_readers; /* user count */
+ int sock_readers; /* user count */
+ int rcvbuf;
+
+ struct wait_queue **sleep;
+ struct dst_entry *dst_cache; /* Destination cache */
+ atomic_t rmem_alloc; /* Receive queue bytes committed */
+ struct sk_buff_head receive_queue; /* Incoming packets */
+ atomic_t wmem_alloc; /* Transmit queue bytes committed */
+ struct sk_buff_head write_queue; /* Packet sending queue */
+ atomic_t omem_alloc; /* "o" is "option" or "other" */
+ __u32 saddr; /* Sending source */
+ unsigned int allocation; /* Allocation mode */
+ int sndbuf;
+ struct sock *prev;
/*
* Not all are volatile, but some are, so we
* might as well say they all are.
*/
volatile char dead,
- urginline,
done,
+ urginline,
reuse,
keepopen,
linger,
destroy,
no_check,
- zapped, /* In ax25 & ipx means not linked */
broadcast,
nonagle,
bsdism;
- int bound_dev_if;
- unsigned long lingertime;
+ unsigned char debug;
int proc;
+ unsigned long lingertime;
- struct sock *next;
- struct sock **pprev;
- struct sock *bind_next;
- struct sock **bind_pprev;
- struct sock *prev;
int hashent;
struct sock *pair;
- struct sk_buff_head back_log;
-
- struct sk_buff_head write_queue,
- receive_queue,
- out_of_order_queue,
+ /* Error and backlog packet queues, rarely used. */
+ struct sk_buff_head back_log,
error_queue;
unsigned short family;
struct proto *prot;
- struct wait_queue **sleep;
-
- __u32 daddr;
- __u32 saddr; /* Sending source */
- __u32 rcv_saddr; /* Bound address */
- struct dst_entry *dst_cache;
/*
* mss is min(mtu, max_window)
*/
unsigned short mtu; /* mss negotiated in the syn's */
unsigned short mss; /* current eff. mss - can change */
unsigned short user_mss; /* mss requested by user in ioctl */
- unsigned short num;
-
unsigned short shutdown;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -436,16 +429,12 @@ struct sock
cause failure but are the cause
of a persistent failure not just
'timed out' */
- unsigned char protocol;
- volatile unsigned char state;
unsigned short ack_backlog;
unsigned short max_ack_backlog;
- unsigned char debug;
__u32 priority;
- int rcvbuf;
- int sndbuf;
unsigned short type;
unsigned char localroute; /* Route locally only */
+ unsigned char protocol;
struct ucred peercred;
#ifdef CONFIG_FILTER
@@ -472,11 +461,6 @@ struct sock
#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
struct packet_opt *af_packet;
#endif
-#ifdef CONFIG_INET
-#ifdef CONFIG_NUTCP
- struct tcp_opt af_tcp;
-#endif
-#endif
#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
x25_cb *x25;
#endif
@@ -503,7 +487,6 @@ struct sock
int ip_ttl; /* TTL setting */
int ip_tos; /* TOS */
unsigned ip_cmsg_flags;
- struct tcphdr dummy_th;
struct ip_options *opt;
unsigned char ip_hdrincl; /* Include headers ? */
__u8 ip_mc_ttl; /* Multicasting TTL */
@@ -731,7 +714,7 @@ here:
}
/*
- * This might not be the most apropriate place for this two
+ * This might not be the most appropriate place for this two
* but since they are used by a lot of the net related code
* at least they get declared on a include that is common to all
*/
@@ -750,7 +733,7 @@ static __inline__ int max(unsigned int a, unsigned int b)
return a;
}
-extern struct sock * sk_alloc(int family, int priority);
+extern struct sock * sk_alloc(int family, int priority, int zero_it);
extern void sk_free(struct sock *sk);
extern void destroy_sock(struct sock *sk);
@@ -884,7 +867,6 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
return -ENOMEM;
- skb_set_owner_r(skb, sk);
#ifdef CONFIG_FILTER
if (sk->filter)
@@ -894,7 +876,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
#endif /* CONFIG_FILTER */
- skb_queue_tail(&sk->receive_queue,skb);
+ skb_set_owner_r(skb, sk);
+ skb_queue_tail(&sk->receive_queue, skb);
if (!sk->dead)
sk->data_ready(sk,skb->len);
return 0;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4c445ca1a..cec01dfe6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -27,13 +27,13 @@
* New scheme, half the table is for TIME_WAIT, the other half is
* for the rest. I'll experiment with dynamic table growth later.
*/
-#define TCP_HTABLE_SIZE 1024
+#define TCP_HTABLE_SIZE 512
/* This is for listening sockets, thus all sockets which possess wildcards. */
#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
/* This is for all sockets, to keep track of the local port allocations. */
-#define TCP_BHTABLE_SIZE 64
+#define TCP_BHTABLE_SIZE 512
/* tcp_ipv4.c: These need to be shared by v4 and v6 because the lookup
* and hashing code needs to work with different AF's yet
@@ -41,47 +41,153 @@
*/
extern struct sock *tcp_established_hash[TCP_HTABLE_SIZE];
extern struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
-extern struct sock *tcp_bound_hash[TCP_BHTABLE_SIZE];
-/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6
- * because the v6 tcp code to intialize a connection needs to interoperate
- * with the v4 code using the same variables.
- * FIXME: It would be better to rewrite the connection code to be
- * address family independent and just leave one copy in the ipv4 section.
- * This would also clean up some code duplication. -- erics
+/* There are a few simple rules, which allow for local port reuse by
+ * an application. In essence:
+ *
+ * 1) Sockets bound to different interfaces may share a local port.
+ * Failing that, goto test 2.
+ * 2) If all sockets have sk->reuse set, and none of them are in
+ * TCP_LISTEN state, the port may be shared.
+ * Failing that, goto test 3.
+ * 3) If all sockets are bound to a specific sk->rcv_saddr local
+ * address, and none of them are the same, the port may be
+ * shared.
+ * Failing this, the port cannot be shared.
+ *
+ * The interesting point, is test #2. This is what an FTP server does
+ * all day. To optimize this case we use a specific flag bit defined
+ * below. As we add sockets to a bind bucket list, we perform a
+ * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
+ * As long as all sockets added to a bind bucket pass this test,
+ * the flag bit will be set.
+ * The resulting situation is that tcp_v[46]_verify_bind() can just check
+ * for this flag bit, if it is set and the socket trying to bind has
+ * sk->reuse set, we don't even have to walk the owners list at all,
+ * we return that it is ok to bind this socket to the requested local port.
+ *
+ * Sounds like a lot of work, but it is worth it. In a more naive
+ * implementation (ie. current FreeBSD etc.) the entire list of ports
+ * must be walked for each data port opened by an ftp server. Needless
+ * to say, this does not scale at all. With a couple thousand FTP
+ * users logged onto your box, isn't it nice to know that new data
+ * ports are created in O(1) time? I thought so. ;-) -DaveM
*/
-extern int sysctl_tcp_sack;
-extern int sysctl_tcp_timestamps;
-extern int sysctl_tcp_window_scaling;
+struct tcp_bind_bucket {
+ unsigned short port;
+ unsigned short flags;
+#define TCPB_FLAG_LOCKED 0x0001
+#define TCPB_FLAG_FASTREUSE 0x0002
+
+ struct tcp_bind_bucket *next;
+ struct sock *owners;
+ struct tcp_bind_bucket **pprev;
+};
-/* These are AF independent. */
-static __inline__ int tcp_bhashfn(__u16 lport)
+extern struct tcp_bind_bucket *tcp_bound_hash[TCP_BHTABLE_SIZE];
+extern kmem_cache_t *tcp_bucket_cachep;
+extern struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum);
+extern void tcp_bucket_unlock(struct sock *sk);
+extern int tcp_port_rover;
+
+/* Level-1 socket-demux cache. */
+#define TCP_NUM_REGS 32
+extern struct sock *tcp_regs[TCP_NUM_REGS];
+
+#define TCP_RHASH_FN(__fport) \
+ ((((__fport) >> 7) ^ (__fport)) & (TCP_NUM_REGS - 1))
+#define TCP_RHASH(__fport) tcp_regs[TCP_RHASH_FN((__fport))]
+#define TCP_SK_RHASH_FN(__sock) TCP_RHASH_FN((__sock)->dummy_th.dest)
+#define TCP_SK_RHASH(__sock) tcp_regs[TCP_SK_RHASH_FN((__sock))]
+
+static __inline__ void tcp_reg_zap(struct sock *sk)
{
- return (lport ^ (lport >> 7)) & (TCP_BHTABLE_SIZE - 1);
+ struct sock **rpp;
+
+ rpp = &(TCP_SK_RHASH(sk));
+ if(*rpp == sk)
+ *rpp = NULL;
}
-/* Find the next port that hashes h that is larger than lport.
- * If you change the hash, change this function to match, or you will
- * break TCP port selection. This function must also NOT wrap around
- * when the next number exceeds the largest possible port (2^16-1).
- */
-static __inline__ int tcp_bhashnext(__u16 lport, __u16 h)
+/* These are AF independent. */
+static __inline__ int tcp_bhashfn(__u16 lport)
{
- __u32 s; /* don't change this to a smaller type! */
-
- s = (lport ^ (h ^ tcp_bhashfn(lport)));
- if (s > lport)
- return s;
- s = lport + TCP_BHTABLE_SIZE;
- return (s ^ (h ^ tcp_bhashfn(s)));
+ return (lport & (TCP_BHTABLE_SIZE - 1));
}
-static __inline__ int tcp_sk_bhashfn(struct sock *sk)
+static __inline__ void tcp_sk_bindify(struct sock *sk)
{
- __u16 lport = sk->num;
- return tcp_bhashfn(lport);
+ struct tcp_bind_bucket *tb;
+ unsigned short snum = sk->num;
+
+ for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next)
+ ;
+ /* Update bucket flags. */
+ if(tb->owners == NULL) {
+ /* We're the first. */
+ if(sk->reuse && sk->state != TCP_LISTEN)
+ tb->flags = TCPB_FLAG_FASTREUSE;
+ else
+ tb->flags = 0;
+ } else {
+ if((tb->flags & TCPB_FLAG_FASTREUSE) &&
+ ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+ tb->flags &= ~TCPB_FLAG_FASTREUSE;
+ }
+ if((sk->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &sk->bind_next;
+ tb->owners = sk;
+ sk->bind_pprev = &tb->owners;
+ sk->prev = (struct sock *) tb;
}
+/* This is a TIME_WAIT bucket. It works around the memory consumption
+ * problems of sockets in such a state on heavily loaded servers, but
+ * without violating the protocol specification.
+ */
+struct tcp_tw_bucket {
+ /* These _must_ match the beginning of struct sock precisely.
+ * XXX Yes I know this is gross, but I'd have to edit every single
+ * XXX networking file if I created a "struct sock_header". -DaveM
+ */
+ struct sock *sklist_next;
+ struct sock *sklist_prev;
+ struct sock *bind_next;
+ struct sock **bind_pprev;
+ struct sock *next;
+ struct sock **pprev;
+ __u32 daddr;
+ __u32 rcv_saddr;
+ int bound_dev_if;
+ unsigned short num;
+ unsigned char state,
+ family; /* sk->zapped */
+ __u16 source; /* sk->dummy_th.source */
+ __u16 dest; /* sk->dummy_th.dest */
+
+ /* And these are ours. */
+ __u32 rcv_nxt;
+ struct tcp_func *af_specific;
+ struct tcp_bind_bucket *tb;
+ struct timer_list timer;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct in6_addr v6_daddr;
+ struct in6_addr v6_rcv_saddr;
+#endif
+};
+
+extern kmem_cache_t *tcp_timewait_cachep;
+
+/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6
+ * because the v6 tcp code to intialize a connection needs to interoperate
+ * with the v4 code using the same variables.
+ * FIXME: It would be better to rewrite the connection code to be
+ * address family independent and just leave one copy in the ipv4 section.
+ * This would also clean up some code duplication. -- erics
+ */
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+
/* These can have wildcards, don't try too hard. */
static __inline__ int tcp_lhashfn(unsigned short num)
{
@@ -93,28 +199,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
return tcp_lhashfn(sk->num);
}
-/* Only those holding the sockhash lock call these two things here.
- * Note the slightly gross overloading of sk->prev, AF_UNIX is the
- * only other main benefactor of that member of SK, so who cares.
- */
-static __inline__ void tcp_sk_bindify(struct sock *sk)
-{
- int hashent = tcp_sk_bhashfn(sk);
- struct sock **htable = &tcp_bound_hash[hashent];
-
- if((sk->bind_next = *htable) != NULL)
- (*htable)->bind_pprev = &sk->bind_next;
- *htable = sk;
- sk->bind_pprev = htable;
-}
-
-static __inline__ void tcp_sk_unbindify(struct sock *sk)
-{
- if(sk->bind_next)
- sk->bind_next->bind_pprev = sk->bind_pprev;
- *(sk->bind_pprev) = sk->bind_next;
-}
-
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#define NETHDR_SIZE sizeof(struct ipv6hdr)
#else
@@ -186,6 +270,8 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk)
* we tell the LL layer that it is something
* wrong (e.g. that it can expire redirects) */
+#define TCP_BUCKETGC_PERIOD (HZ)
+
/*
* TCP option
*/
@@ -193,9 +279,6 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk)
#define TCPOPT_NOP 1 /* Padding */
#define TCPOPT_EOL 0 /* End of options */
#define TCPOPT_MSS 2 /* Segment size negotiating */
-/*
- * We don't use these yet, but they are for PAWS and big windows
- */
#define TCPOPT_WINDOW 3 /* Window scaling */
#define TCPOPT_SACK_PERM 4 /* SACK Permitted */
#define TCPOPT_SACK 5 /* SACK Block */
@@ -210,6 +293,10 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk)
#define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10
+/* But this is what stacks really send out. */
+#define TCPOLEN_TSTAMP_ALIGNED 12
+#define TCPOLEN_WSCALE_ALIGNED 4
+
/*
* TCP option flags for parsed options.
*/
@@ -259,7 +346,6 @@ struct open_request {
__u8 __pad;
unsigned snd_wscale : 4,
rcv_wscale : 4,
- sack_ok : 1,
tstamp_ok : 1,
wscale_ok : 1;
/* The following two fields can be easily recomputed I think -AK */
@@ -355,7 +441,7 @@ extern __inline int after(__u32 seq1, __u32 seq2)
/* is s2<=s1<=s3 ? */
extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
{
- return (after(seq1+1, seq2) && before(seq1, seq3+1));
+ return seq3 - seq2 >= seq1 - seq2;
}
@@ -390,6 +476,11 @@ extern int tcp_rcv_established(struct sock *sk,
struct tcphdr *th,
__u16 len);
+extern int tcp_timewait_state_process(struct tcp_tw_bucket *tw,
+ struct sk_buff *skb,
+ struct tcphdr *th,
+ void *opt, __u16 len);
+
extern void tcp_close(struct sock *sk,
unsigned long timeout);
extern struct sock * tcp_accept(struct sock *sk, int flags);
@@ -427,6 +518,10 @@ extern int tcp_v4_conn_request(struct sock *sk,
struct sk_buff *skb,
void *ptr, __u32 isn);
+extern struct sock * tcp_create_openreq_child(struct sock *sk,
+ struct open_request *req,
+ struct sk_buff *skb);
+
extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
struct sk_buff *skb,
struct open_request *req,
@@ -457,10 +552,11 @@ extern void tcp_send_probe0(struct sock *);
extern void tcp_send_partial(struct sock *);
extern void tcp_write_wakeup(struct sock *);
extern void tcp_send_fin(struct sock *sk);
+extern void tcp_send_active_reset(struct sock *sk);
extern int tcp_send_synack(struct sock *);
-extern void tcp_send_skb(struct sock *, struct sk_buff *);
+extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue);
extern void tcp_send_ack(struct sock *sk);
-extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout);
+extern void tcp_send_delayed_ack(struct tcp_opt *tp, int max_timeout);
/* CONFIG_IP_TRANSPARENT_PROXY */
extern int tcp_chkaddr(struct sk_buff *);
@@ -492,40 +588,94 @@ struct tcp_sl_timer {
#define TCP_SLT_SYNACK 0
#define TCP_SLT_KEEPALIVE 1
-#define TCP_SLT_MAX 2
+#define TCP_SLT_BUCKETGC 2
+#define TCP_SLT_MAX 3
extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
-/*
- * FIXME: this method of choosing when to send a window update
- * does not seem correct to me. -- erics
+/* Compute the actual receive window we are currently advertising. */
+static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
+{
+ return tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
+}
+
+/* Choose a new window, without checks for shrinking, and without
+ * scaling applied to the result. The caller does these things
+ * if necessary. This is a "raw" window selection.
*/
-static __inline__ unsigned short tcp_raise_window(struct sock *sk)
+extern u32 __tcp_select_window(struct sock *sk);
+
+/* Chose a new window to advertise, update state in tcp_opt for the
+ * socket, and return result with RFC1323 scaling applied. The return
+ * value can be stuffed directly into th->window for an outgoing
+ * frame.
+ */
+extern __inline__ u16 tcp_select_window(struct sock *sk)
{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- long cur_win;
- int res = 0;
-
- /*
- * compute the actual window i.e.
- * old_window - received_bytes_on_that_win
- */
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ u32 new_win = __tcp_select_window(sk);
+ u32 cur_win = tcp_receive_window(tp);
- cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
+ /* Never shrink the offered window */
+ if(new_win < cur_win)
+ new_win = cur_win;
+ tp->rcv_wnd = new_win;
+ tp->rcv_wup = tp->rcv_nxt;
+ /* RFC1323 scaling applied */
+ return new_win >> tp->rcv_wscale;
+}
- /*
- * We need to send an ack right away if
- * our rcv window is blocking the sender and
- * we have more free space to offer.
- */
+/* See if we can advertise non-zero, and if so how much we
+ * can increase our advertisement. If it becomes more than
+ * twice what we are talking about right now, return true.
+ */
+extern __inline__ int tcp_raise_window(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ u32 new_win = __tcp_select_window(sk);
+ u32 cur_win = tcp_receive_window(tp);
- if (cur_win < (sk->mss << 1))
- res = 1;
- return res;
+ return (new_win && (new_win > (cur_win << 1)));
}
-extern unsigned short tcp_select_window(struct sock *sk);
+/* This checks if the data bearing packet SKB (usually tp->send_head)
+ * should be put on the wire right now.
+ */
+static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ int nagle_check = 1;
+ int len;
+
+ /* RFC 1122 - section 4.2.3.4
+ *
+ * We must queue if
+ *
+ * a) The right edge of this frame exceeds the window
+ * b) There are packets in flight and we have a small segment
+ * [SWS avoidance and Nagle algorithm]
+ * (part of SWS is done on packetization)
+ * c) We are retransmiting [Nagle]
+ * d) We have too many packets 'in flight'
+ *
+ * Don't use the nagle rule for urgent data.
+ */
+ len = skb->end_seq - skb->seq;
+ if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out &&
+ !skb->h.th->urg)
+ nagle_check = 0;
+
+ return (nagle_check && tp->packets_out < tp->snd_cwnd &&
+ !after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
+ tp->retransmits == 0);
+}
+
+/* This tells the input processing path that an ACK should go out
+ * right now.
+ */
+#define tcp_enter_quickack_mode(__tp) ((__tp)->ato = (HZ/100))
+#define tcp_in_quickack_mode(__tp) ((__tp)->ato == (HZ/100))
/*
* List all states of a TCP socket that can be viewed as a "connected"
@@ -581,41 +731,49 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
case TCP_CLOSE:
/* Should be about 2 rtt's */
net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME));
+ sk->prot->unhash(sk);
/* fall through */
default:
if (oldstate==TCP_ESTABLISHED)
tcp_statistics.TcpCurrEstab--;
- if (state == TCP_TIME_WAIT || state == TCP_CLOSE)
- sk->prot->rehash(sk);
}
}
static __inline__ void tcp_build_options(__u32 *ptr, struct tcp_opt *tp)
{
- /* FIXME: We will still need to do SACK here. */
if (tp->tstamp_ok) {
- *ptr = ntohl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP);
+ *ptr = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
/* rest filled in by tcp_update_options */
}
}
static __inline__ void tcp_update_options(__u32 *ptr, struct tcp_opt *tp)
{
- /* FIXME: We will still need to do SACK here. */
if (tp->tstamp_ok) {
*++ptr = htonl(jiffies);
*++ptr = htonl(tp->ts_recent);
}
}
+static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp)
+{
+ if (tp->tstamp_ok) {
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(jiffies);
+ *ptr = htonl(tp->ts_recent);
+ }
+}
+
/*
* This routines builds a generic TCP header.
* They also build the RFC1323 Timestamp, but don't fill the
* actual timestamp in (you need to call tcp_update_options for this).
- * It can't (unfortunately) do SACK as well.
* XXX: pass tp instead of sk here.
*/
@@ -624,23 +782,12 @@ static inline void tcp_build_header_data(struct tcphdr *th, struct sock *sk, int
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
- th->seq = htonl(sk->write_seq);
+ th->seq = htonl(tp->write_seq);
if (!push)
th->psh = 1;
tcp_build_options((__u32*)(th+1), tp);
}
-static inline void tcp_build_header(struct tcphdr *th, struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
- th->seq = htonl(sk->write_seq);
- th->ack_seq = htonl(tp->last_ack_sent = tp->rcv_nxt);
- th->window = htons(tcp_select_window(sk));
- tcp_build_options((__u32 *)(th+1), tp);
-}
-
/*
* Construct a tcp options header for a SYN or SYN_ACK packet.
* If this is every changed make sure to change the definition of
@@ -651,31 +798,32 @@ static inline void tcp_build_header(struct tcphdr *th, struct sock *sk)
* It would be especially magical to compute the checksum for this
* stuff on the fly here.
*/
-extern __inline__ int tcp_syn_build_options(struct sk_buff *skb, int mss, int sack, int ts, int offer_wscale, int wscale)
+extern __inline__ int tcp_syn_build_options(struct sk_buff *skb, int mss, int ts, int offer_wscale, int wscale)
{
- int count = 4 + (offer_wscale ? 4 : 0) + ((ts || sack) ? 4 : 0) + (ts ? 8 : 0);
+ int count = 4 + (offer_wscale ? TCPOLEN_WSCALE_ALIGNED : 0) +
+ ((ts) ? TCPOLEN_TSTAMP_ALIGNED : 0);
unsigned char *optr = skb_put(skb,count);
__u32 *ptr = (__u32 *)optr;
- /*
- * We always get an MSS option.
+ /* We always get an MSS option.
+ * The option bytes which will be seen in normal data
+ * packets should timestamps be used, must be in the MSS
+ * advertised. But we subtract them from sk->mss so
+ * that calculations in tcp_sendmsg are simpler etc.
+ * So account for this fact here if necessary. If we
+ * don't do this correctly, as a receiver we won't
+ * recognize data packets as being full sized when we
+ * should, and thus we won't abide by the delayed ACK
+ * rules correctly.
*/
+ if(ts)
+ mss += TCPOLEN_TSTAMP_ALIGNED;
*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
if (ts) {
- if (sack) {
- *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16)
- | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(jiffies); /* TSVAL */
- *ptr++ = htonl(0); /* TSECR */
- } else {
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(jiffies); /* TSVAL */
- *ptr++ = htonl(0); /* TSECR */
- }
- } else if (sack) {
- *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16)
- | (TCPOPT_NOP << 8) | TCPOPT_NOP);
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(jiffies); /* TSVAL */
+ *ptr++ = __constant_htonl(0); /* TSECR */
}
if (offer_wscale)
*ptr++ = htonl((TCPOPT_WINDOW << 24) | (TCPOLEN_WINDOW << 16) | (wscale << 8));
@@ -724,33 +872,15 @@ extern __inline__ void tcp_select_initial_window(__u32 space, __u16 mss,
(*window_clamp) = min(65535<<(*rcv_wscale),*window_clamp);
}
-#define SYNQ_DEBUG 1
-
extern __inline__ void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req, struct open_request *prev)
{
-#ifdef SYNQ_DEBUG
- if (prev->dl_next != req) {
- printk(KERN_DEBUG "synq_unlink: bad prev ptr: %p\n",prev);
- return;
- }
-#endif
- if(!req->dl_next) {
-#ifdef SYNQ_DEBUG
- if (tp->syn_wait_last != (void*) req)
- printk(KERN_DEBUG "synq_unlink: bad last ptr %p,%p\n",
- req,tp->syn_wait_last);
-#endif
+ if(!req->dl_next)
tp->syn_wait_last = (struct open_request **)prev;
- }
prev->dl_next = req->dl_next;
}
extern __inline__ void tcp_synq_queue(struct tcp_opt *tp, struct open_request *req)
{
-#ifdef SYNQ_DEBUG
- if (*tp->syn_wait_last != NULL)
- printk("synq_queue: last ptr doesn't point to last req.\n");
-#endif
req->dl_next = NULL;
*tp->syn_wait_last = req;
tp->syn_wait_last = &req->dl_next;
@@ -765,14 +895,11 @@ extern __inline__ void tcp_synq_init(struct tcp_opt *tp)
extern __inline__ struct open_request *tcp_synq_unlink_tail(struct tcp_opt *tp)
{
struct open_request *head = tp->syn_wait_queue;
-#ifdef SYNQ_DEBUG
- if (!head) {
- printk(KERN_DEBUG "tail drop on empty queue? - bug\n");
- return NULL;
- }
-#endif
+#if 0
+ /* Should be a net-ratelimit'd thing, not all the time. */
printk(KERN_DEBUG "synq tail drop with expire=%ld\n",
head->expires-jiffies);
+#endif
if (head->dl_next == NULL)
tp->syn_wait_last = &tp->syn_wait_queue;
tp->syn_wait_queue = head->dl_next;
@@ -799,6 +926,17 @@ extern __inline__ void tcp_dec_slow_timer(int timer)
atomic_dec(&slt->count);
}
+/* This needs to use a slow timer, so it is here. */
+static __inline__ void tcp_sk_unbindify(struct sock *sk)
+{
+ struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev;
+ if(sk->bind_next)
+ sk->bind_next->bind_pprev = sk->bind_pprev;
+ *sk->bind_pprev = sk->bind_next;
+ if(tb->owners == NULL)
+ tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
+}
+
extern const char timer_bug_msg[];
static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
@@ -820,7 +958,8 @@ static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
printk(timer_bug_msg);
return;
};
- del_timer(timer);
+ if(timer->prev != NULL)
+ del_timer(timer);
}
static inline int tcp_timer_is_set(struct sock *sk, int what)
@@ -829,13 +968,13 @@ static inline int tcp_timer_is_set(struct sock *sk, int what)
switch (what) {
case TIME_RETRANS:
- return tp->retransmit_timer.next != NULL;
+ return tp->retransmit_timer.prev != NULL;
break;
case TIME_DACK:
- return tp->delack_timer.next != NULL;
+ return tp->delack_timer.prev != NULL;
break;
case TIME_PROBE0:
- return tp->probe_timer.next != NULL;
+ return tp->probe_timer.prev != NULL;
break;
default:
printk(timer_bug_msg);