diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-03-18 17:17:51 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-03-18 17:17:51 +0000 |
commit | f1382dc4850bb459d24a81c6cb0ef93ea7bd4a79 (patch) | |
tree | 225271a3d5dcd4e9dea5ee393556abd754c964b1 /include | |
parent | 135b00fc2e90e605ac2a96b20b0ebd93851a3f89 (diff) |
o Merge with Linux 2.1.90.
o Divide L1 cache sizes by 1024 before printing, makes the numbers a
bit more credible ...
Diffstat (limited to 'include')
36 files changed, 577 insertions, 532 deletions
diff --git a/include/asm-alpha/fpu.h b/include/asm-alpha/fpu.h index ab9b28f6e..333e5caeb 100644 --- a/include/asm-alpha/fpu.h +++ b/include/asm-alpha/fpu.h @@ -37,21 +37,21 @@ * compatibly. The corresponding definitions are in * /usr/include/machine/fpu.h under OSF/1. */ -#define IEEE_TRAP_ENABLE_INV (1<<1) /* invalid op */ -#define IEEE_TRAP_ENABLE_DZE (1<<2) /* division by zero */ -#define IEEE_TRAP_ENABLE_OVF (1<<3) /* overflow */ -#define IEEE_TRAP_ENABLE_UNF (1<<4) /* underflow */ -#define IEEE_TRAP_ENABLE_INE (1<<5) /* inexact */ +#define IEEE_TRAP_ENABLE_INV (1UL<<1) /* invalid op */ +#define IEEE_TRAP_ENABLE_DZE (1UL<<2) /* division by zero */ +#define IEEE_TRAP_ENABLE_OVF (1UL<<3) /* overflow */ +#define IEEE_TRAP_ENABLE_UNF (1UL<<4) /* underflow */ +#define IEEE_TRAP_ENABLE_INE (1UL<<5) /* inexact */ #define IEEE_TRAP_ENABLE_MASK (IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\ IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\ IEEE_TRAP_ENABLE_INE) /* status bits coming from fpcr: */ -#define IEEE_STATUS_INV (1<<17) -#define IEEE_STATUS_DZE (1<<18) -#define IEEE_STATUS_OVF (1<<19) -#define IEEE_STATUS_UNF (1<<20) -#define IEEE_STATUS_INE (1<<21) +#define IEEE_STATUS_INV (1UL<<17) +#define IEEE_STATUS_DZE (1UL<<18) +#define IEEE_STATUS_OVF (1UL<<19) +#define IEEE_STATUS_UNF (1UL<<20) +#define IEEE_STATUS_INE (1UL<<21) #define IEEE_STATUS_MASK (IEEE_STATUS_INV | IEEE_STATUS_DZE | \ IEEE_STATUS_OVF | IEEE_STATUS_UNF | \ @@ -64,7 +64,7 @@ #define IEEE_INHERIT (1UL<<63) /* inherit on thread create? */ /* - * Convert the spftware IEEE trap enable and status bits into the + * Convert the software IEEE trap enable and status bits into the * hardware fpcr format. */ diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 4744df9c8..5889ec880 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -1,12 +1,15 @@ #ifndef _I386_PAGE_H #define _I386_PAGE_H +#include <linux/config.h> + /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #define STRICT_MM_TYPECHECKS @@ -52,12 +55,14 @@ typedef unsigned long pgprot_t; #define __pgprot(x) (x) #endif +#endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) /* This handles the memory map.. */ -#define PAGE_OFFSET 0xC0000000 +#define __PAGE_OFFSET ((0x1000-CONFIG_MAX_MEMSIZE)<<20) +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define MAP_NR(addr) (__pa(addr) >> PAGE_SHIFT) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index fe0864913..47e1d2cfc 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -13,6 +13,7 @@ * the i386 page table tree. */ +#ifndef __ASSEMBLY__ /* Caches aren't brain-dead on the intel. */ #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) @@ -155,6 +156,7 @@ static inline void flush_tlb_range(struct mm_struct *mm, } #endif #endif +#endif /* !__ASSEMBLY__ */ /* Certain architectures need to do special things when pte's @@ -181,6 +183,16 @@ static inline void flush_tlb_range(struct mm_struct *mm, #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 1024 +/* + * pgd entries used up by user/kernel: + */ + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) +#define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff) +#define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS) + +#ifndef __ASSEMBLY__ /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that @@ -497,4 +509,6 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, #define module_map vmalloc #define module_unmap vfree +#endif /* !__ASSEMBLY__ */ + #endif /* _I386_PAGE_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 778466bbe..fc62069a5 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -10,6 +10,7 @@ #include <asm/vm86.h> #include <asm/math_emu.h> #include <asm/segment.h> +#include <asm/page.h> /* * CPU type and hardware bug flags. Kept separately for each CPU. @@ -71,10 +72,9 @@ extern unsigned int machine_submodel_id; extern unsigned int BIOS_revision; /* - * User space process size: 3GB. This is hardcoded into a few places, - * so don't change it unless you know what you are doing. + * User space process size: 3GB (default). */ -#define TASK_SIZE (0xC0000000UL) +#define TASK_SIZE (PAGE_OFFSET) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h index ef08ac510..9da2fff06 100644 --- a/include/asm-i386/uaccess.h +++ b/include/asm-i386/uaccess.h @@ -5,6 +5,7 @@ * User space memory access functions */ #include <linux/sched.h> +#include <asm/page.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -21,7 +22,7 @@ #define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -#define USER_DS MAKE_MM_SEG(0xC0000000) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) #define get_ds() (KERNEL_DS) #define get_fs() (current->addr_limit) diff --git a/include/linux/coda.h b/include/linux/coda.h index 3faa2e9ca..5c3cb563e 100644 --- a/include/linux/coda.h +++ b/include/linux/coda.h @@ -587,9 +587,9 @@ struct cfs_open_by_path_out { }; /* - * Occasionally, don't cache the fid returned by CFS_LOOKUP. For instance, if - * the fid is inconsistent. This case is handled by setting the top bit of the - * return result parameter. + * Occasionally, we don't cache the fid returned by CFS_LOOKUP. + * For instance, if the fid is inconsistent. + * This case is handled by setting the top bit of the type result parameter. */ #define CFS_NOCACHE 0x80000000 diff --git a/include/linux/coda_cache.h b/include/linux/coda_cache.h index 44251867f..fc607fdba 100644 --- a/include/linux/coda_cache.h +++ b/include/linux/coda_cache.h @@ -21,19 +21,16 @@ struct coda_cache { struct coda_cred cc_cred; }; -void coda_ccinsert(struct coda_cache *el, struct super_block *sb); -void coda_cninsert(struct coda_cache *el, struct coda_inode_info *cnp); -void coda_ccremove(struct coda_cache *el); -void coda_cnremove(struct coda_cache *el); -void coda_cache_create(struct inode *inode, int mask); -struct coda_cache *coda_cache_find(struct inode *inode); +/* credential cache */ void coda_cache_enter(struct inode *inode, int mask); -void coda_cache_clear_cnp(struct coda_inode_info *cnp); +void coda_cache_clear_inode(struct inode *); void coda_cache_clear_all(struct super_block *sb); void coda_cache_clear_cred(struct super_block *sb, struct coda_cred *cred); int coda_cache_check(struct inode *inode, int mask); -void coda_dentry_delete(struct dentry *dentry); -void coda_zapfid(struct ViceFid *fid, struct super_block *sb, int flag); + +/* for downcalls and attributes and lookups */ +void coda_flag_inode(struct inode *inode, int flag); +void coda_flag_alias_children(struct inode *inode, int flag); /* diff --git a/include/linux/coda_fs_i.h b/include/linux/coda_fs_i.h index 1277445b9..d312013d5 100644 --- a/include/linux/coda_fs_i.h +++ b/include/linux/coda_fs_i.h @@ -17,7 +17,7 @@ #define CODA_CNODE_MAGIC 0x47114711 /* - * smb fs inode data (in memory only) + * coda fs inode data */ struct coda_inode_info { struct ViceFid c_fid; /* Coda identifier */ @@ -36,7 +36,7 @@ struct coda_inode_info { #define C_VATTR 0x1 /* Validity of vattr in the cnode */ #define C_SYMLINK 0x2 /* Validity of symlink pointer in the cnode */ #define C_DYING 0x4 /* Set for outstanding cnodes from venus (which died) */ -#define C_ZAPFID 0x8 +#define C_PURGE 0x8 #define C_ZAPDIR 0x10 #define C_INITED 0x20 @@ -44,9 +44,6 @@ int coda_cnode_make(struct inode **, struct ViceFid *, struct super_block *); int coda_cnode_makectl(struct inode **inode, struct super_block *sb); struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb); -/* inode to cnode */ -#define ITOC(inode) ((struct coda_inode_info *)&((inode)->u.coda_i)) - #endif #endif diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index fa477cb52..9dd30eaeb 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -36,6 +36,7 @@ extern struct file_operations coda_ioctl_operations; int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask); +int coda_revalidate_inode(struct dentry *); /* global variables */ extern int coda_debug; @@ -43,10 +44,13 @@ extern int coda_print_entry; extern int coda_access_cache; /* this file: heloers */ +static __inline__ struct ViceFid *coda_i2f(struct inode *); char *coda_f2s(ViceFid *f); int coda_isroot(struct inode *i); int coda_fid_is_volroot(struct ViceFid *); int coda_iscontrol(const char *name, size_t length); + + void coda_load_creds(struct coda_cred *cred); int coda_mycred(struct coda_cred *); void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); @@ -112,4 +116,18 @@ do { \ #define CODA_FREE(ptr,size) do {if (size < 3000) { kfree_s((ptr), (size)); CDEBUG(D_MALLOC, "kfreed: %x at %x.\n", (int) size, (int) ptr); } else { vfree((ptr)); CDEBUG(D_MALLOC, "vfreed: %x at %x.\n", (int) size, (int) ptr);} } while (0) +/* inode to cnode */ + +static __inline__ struct ViceFid *coda_i2f(struct inode *inode) +{ + return &(inode->u.coda_i.c_fid); +} + +#define ITOC(inode) (&((inode)->u.coda_i)) + + + + + + #endif diff --git a/include/linux/file.h b/include/linux/file.h index 3f3870b9e..240a5039c 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -1,19 +1,41 @@ +/* + * Wrapper functions for accessing the file_struct fd array. + */ + #ifndef __LINUX_FILE_H #define __LINUX_FILE_H -extern inline struct file * fget(unsigned long fd) +extern int __fput(struct file *); +extern void insert_file_free(struct file *file); + +/* + * Check whether the specified fd has an open file. + */ +extern inline struct file * fcheck(unsigned int fd) { struct file * file = NULL; - if (fd < NR_OPEN) { + + if (fd < NR_OPEN) file = current->files->fd[fd]; - if (file) - file->f_count++; - } return file; } -extern int __fput(struct file *); -extern void insert_file_free(struct file *file); +extern inline struct file * fget(unsigned int fd) +{ + struct file * file = fcheck(fd); + + if (file) + file->f_count++; + return file; +} + +/* + * Install a file pointer in the fd array. + */ +extern inline void fd_install(unsigned int fd, struct file *file) +{ + current->files->fd[fd] = file; +} /* It does not matter which list it is on. */ extern inline void remove_filp(struct file *file) @@ -47,12 +69,4 @@ extern inline void put_filp(struct file *file) } } -/* - * Install a file pointer in the files structure. - */ -extern inline void fd_install(unsigned long fd, struct file *file) -{ - current->files->fd[fd] = file; -} - #endif diff --git a/include/linux/hfs_fs.h b/include/linux/hfs_fs.h index de51db0b1..9b43579c0 100644 --- a/include/linux/hfs_fs.h +++ b/include/linux/hfs_fs.h @@ -237,20 +237,20 @@ extern const struct hfs_name hfs_cap_reserved2[]; extern struct inode_operations hfs_cap_ndir_inode_operations; extern struct inode_operations hfs_cap_fdir_inode_operations; extern struct inode_operations hfs_cap_rdir_inode_operations; -extern void hfs_cap_drop_dentry(const ino_t, struct dentry *); +extern void hfs_cap_drop_dentry(struct dentry *, const ino_t); /* dir_dbl.c */ extern const struct hfs_name hfs_dbl_reserved1[]; extern const struct hfs_name hfs_dbl_reserved2[]; extern struct inode_operations hfs_dbl_dir_inode_operations; -extern void hfs_dbl_drop_dentry(const ino_t, struct dentry *); +extern void hfs_dbl_drop_dentry(struct dentry *, const ino_t); /* dir_nat.c */ extern const struct hfs_name hfs_nat_reserved1[]; extern const struct hfs_name hfs_nat_reserved2[]; extern struct inode_operations hfs_nat_ndir_inode_operations; extern struct inode_operations hfs_nat_hdir_inode_operations; -extern void hfs_nat_drop_dentry(const ino_t, struct dentry *); +extern void hfs_nat_drop_dentry(struct dentry *, const ino_t); /* dir_sngl.c */ extern const struct hfs_name hfs_sngl_reserved1[]; diff --git a/include/linux/hfs_fs_i.h b/include/linux/hfs_fs_i.h index cf9ed53e0..453896882 100644 --- a/include/linux/hfs_fs_i.h +++ b/include/linux/hfs_fs_i.h @@ -34,7 +34,7 @@ struct hfs_inode_info { struct hfs_hdr_layout *layout; /* for dentry cleanup */ - void (*d_drop_op)(const ino_t, struct dentry *); + void (*d_drop_op)(struct dentry *, const ino_t); }; #endif diff --git a/include/linux/hfs_sysdep.h b/include/linux/hfs_sysdep.h index 93de05aad..22e2ac66b 100644 --- a/include/linux/hfs_sysdep.h +++ b/include/linux/hfs_sysdep.h @@ -78,6 +78,10 @@ extern inline hfs_u32 hfs_time(void) { */ typedef struct wait_queue *hfs_wait_queue; +extern inline void hfs_init_waitqueue(hfs_wait_queue *queue) { + init_waitqueue(queue); +} + extern inline void hfs_sleep_on(hfs_wait_queue *queue) { sleep_on(queue); } diff --git a/include/linux/kerneld.h b/include/linux/kerneld.h deleted file mode 100644 index b2db5f8c7..000000000 --- a/include/linux/kerneld.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _LINUX_KERNELD_H -#define _LINUX_KERNELD_H - -#define KERNELD_SYSTEM 1 -#define KERNELD_REQUEST_MODULE 2 /* "insmod" */ -#define KERNELD_RELEASE_MODULE 3 /* "rmmod" */ -#define KERNELD_DELAYED_RELEASE_MODULE 4 /* "rmmod" */ -#define KERNELD_CANCEL_RELEASE_MODULE 5 /* "rmmod" */ -#define KERNELD_REQUEST_ROUTE 6 /* from net/ipv4/route.c */ -#define KERNELD_BLANKER 7 /* from drivers/char/console.c */ -#define KERNELD_PNP 8 /* from drivers/pnp/kerneld.c */ -#define KERNELD_ARP 256 /* from net/ipv4/arp.c */ - -/* - * Uncomment the following line for the new kerneld protocol - * This includes the pid of the kernel level requester into the kerneld header - */ -/* -#define NEW_KERNELD_PROTOCOL - */ -#ifdef NEW_KERNELD_PROTOCOL -#define OLDIPC_KERNELD 00040000 /* use the kerneld message channel */ -#define IPC_KERNELD 00140000 /* use the kerneld message channel, new protocol */ -#define KDHDR (sizeof(long) + sizeof(short) + sizeof(short)) -#define NULL_KDHDR 0, 2, 0 -#else -#define IPC_KERNELD 00040000 /* use the kerneld message channel */ -#define KDHDR (sizeof(long)) -#define NULL_KDHDR 0 -#endif -#define KERNELD_MAXCMD 0x7ffeffff -#define KERNELD_MINSEQ 0x7fff0000 /* "commands" legal up to 0x7ffeffff */ -#define KERNELD_WAIT 0x80000000 -#define KERNELD_NOWAIT 0 - -struct kerneld_msg { - long mtype; - long id; -#ifdef NEW_KERNELD_PROTOCOL - short version; - short pid; -#endif -#ifdef __KERNEL__ - char *text; -#else - char text[1]; -#endif /* __KERNEL__ */ -}; - -#ifdef __KERNEL__ -#include <linux/string.h> - -extern int kerneld_send(int msgtype, int ret_size, int msgsz, - const char *text, const char *ret_val); - -/* - * Request that a module should be loaded. - * Wait for the exit status from insmod/modprobe. - * If it fails, it fails... at least we tried... - */ -static inline int request_module(const char *name) -{ - return kerneld_send(KERNELD_REQUEST_MODULE, - 0 | KERNELD_WAIT, - strlen(name), name, NULL); -} - -/* - * Request the removal of a module, maybe don't wait for it. - * It doesn't matter if the removal fails, now does it? - */ -static inline int release_module(const char *name, int waitflag) -{ - return kerneld_send(KERNELD_RELEASE_MODULE, - 0 | (waitflag?KERNELD_WAIT:KERNELD_NOWAIT), - strlen(name), name, NULL); -} - -/* - * Request a delayed removal of a module, but don't wait for it. - * The delay is done by kerneld (default: 60 seconds) - */ -static inline int delayed_release_module(const char *name) -{ - return kerneld_send(KERNELD_DELAYED_RELEASE_MODULE, - 0 | KERNELD_NOWAIT, - strlen(name), name, NULL); -} - -/* - * Attempt to cancel a previous request for removal of a module, - * but don't wait for it. - * This call can be made if the kernel wants to prevent a delayed - * unloading of a module. - */ -static inline int cancel_release_module(const char *name) -{ - return kerneld_send(KERNELD_CANCEL_RELEASE_MODULE, - 0 | KERNELD_NOWAIT, - strlen(name), name, NULL); -} - -/* - * Perform an "inverted" system call, maybe return the exit status - */ -static inline int ksystem(const char *cmd, int waitflag) -{ - return kerneld_send(KERNELD_SYSTEM, - 0 | (waitflag?KERNELD_WAIT:KERNELD_NOWAIT), - strlen(cmd), cmd, NULL); -} - -/* - * Try to create a route, possibly by opening a ppp-connection - */ -static inline int kerneld_route(const char *ip_route) -{ - return kerneld_send(KERNELD_REQUEST_ROUTE, - 0 | KERNELD_WAIT, - strlen(ip_route), ip_route, NULL); -} - -/* - * Handle an external screen blanker - */ -static inline int kerneld_blanker(int on_off) -{ - char *s = on_off ? "on" : "off"; - return kerneld_send(KERNELD_BLANKER, - 0 | (on_off ? KERNELD_NOWAIT : KERNELD_WAIT), - strlen(s), s, NULL); -} - -#endif /* __KERNEL__ */ -#endif diff --git a/include/linux/kmod.h b/include/linux/kmod.h new file mode 100644 index 000000000..876c7f222 --- /dev/null +++ b/include/linux/kmod.h @@ -0,0 +1,4 @@ +/* + kmod header +*/ +extern int request_module(const char * name); diff --git a/include/linux/module.h b/include/linux/module.h index 475c68854..ad3d10baf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -148,7 +148,7 @@ const char __module_author[] __attribute__((section(".modinfo"))) = \ const char __module_description[] __attribute__((section(".modinfo"))) = \ "description=" desc -/* Could potentially be used by kerneld... */ +/* Could potentially be used by kmod... */ #define MODULE_SUPPORTED_DEVICE(dev) \ const char __module_device[] __attribute__((section(".modinfo"))) = \ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 55193867d..b57519b72 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -217,7 +217,7 @@ extern int pim_rcv(struct sk_buff * , unsigned short); extern int pim_rcv_v1(struct sk_buff * , unsigned short len); struct rtmsg; -extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm); +extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); #endif #endif diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 03904df71..eb83cfe01 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -83,8 +83,15 @@ struct ncp_privatedata_ioctl }; #define NCP_IOC_NCPREQUEST _IOR('n', 1, struct ncp_ioctl_request) -#define NCP_IOC_GETMOUNTUID _IOW('n', 2, uid_t) -#define NCP_IOC_GETMOUNTUID_INT _IOW('n', 2, unsigned int) +#define NCP_IOC_GETMOUNTUID _IOW('n', 2, __kernel_uid_t) + +#if 1 +#ifdef __KERNEL__ +/* remove after ncpfs-2.0.13 gets released or at the beginning of kernel-2.1. codefreeze */ +#define NCP_IOC_GETMOUNTUID_INT _IOW('n', 2, unsigned int) +#endif +#endif + #define NCP_IOC_CONN_LOGGED_IN _IO('n', 3) #define NCP_GET_FS_INFO_VERSION (1) diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h index efcc20556..38492fc92 100644 --- a/include/linux/ncp_fs_sb.h +++ b/include/linux/ncp_fs_sb.h @@ -51,11 +51,9 @@ struct ncp_server { int ncp_reply_size; struct ncp_inode_info root; -#if 0 - char root_path; /* '\0' */ -#else struct dentry* root_dentry; -#endif + + int root_setuped; /* info for packet signing */ int sign_wanted; /* 1=Server needs signed packets */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 72430508a..d1c005c70 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -125,6 +125,9 @@ struct net_device_stats unsigned long tx_heartbeat_errors; unsigned long tx_window_errors; + /* for cslip etc */ + unsigned long rx_compressed; + unsigned long tx_compressed; }; #ifdef CONFIG_NET_FASTROUTE @@ -352,6 +355,7 @@ extern __inline__ int unregister_gifconf(unsigned int family) #define HAVE_NETIF_RX 1 extern void netif_rx(struct sk_buff *skb); extern void net_bh(void); +extern void dev_tint(struct device *dev); extern int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy); extern int dev_ioctl(unsigned int cmd, void *); extern int dev_change_flags(struct device *, unsigned); @@ -423,7 +427,7 @@ extern int dev_mc_add(struct device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct device *dev); extern void dev_set_promiscuity(struct device *dev, int inc); extern void dev_set_allmulti(struct device *dev, int inc); -/* Load a device via the kerneld */ +/* Load a device via the kmod */ extern void dev_load(const char *name); extern void dev_mcast_init(void); extern int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev)); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a7b51b977..b72ad4ed1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -85,6 +85,9 @@ do { \ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) +/* Flags in the RPC client structure */ +#define NFS_CLNTF_BUFSIZE 0x0001 /* readdir buffer in longwords */ + #ifdef __KERNEL__ /* diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4a309eb91..8c6467010 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -66,7 +66,7 @@ struct rtattr #define RTA_ALIGNTO 4 #define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) -#define RTA_OK(rta,len) ((rta)->rta_len > sizeof(struct rtattr) && \ +#define RTA_OK(rta,len) ((rta)->rta_len >= sizeof(struct rtattr) && \ (rta)->rta_len <= (len)) #define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) diff --git a/include/linux/sched.h b/include/linux/sched.h index 096d0656c..7eae346a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -86,6 +86,12 @@ extern int last_pid; #define SCHED_FIFO 1 #define SCHED_RR 2 +/* + * This is an additional bit set when we want to + * yield the CPU for one re-schedule.. + */ +#define SCHED_YIELD 0x10 + struct sched_param { int sched_priority; }; @@ -113,19 +119,24 @@ extern void trap_init(void); asmlinkage void schedule(void); -/* Open file table structure */ + +/* + * Open file table structure + */ struct files_struct { int count; + int max_fds; + struct file ** fd; /* current fd array */ fd_set close_on_exec; fd_set open_fds; - struct file * fd[NR_OPEN]; }; #define INIT_FILES { \ 1, \ + NR_OPEN, \ + &init_fd_array[0], \ { { 0, } }, \ - { { 0, } }, \ - { NULL, } \ + { { 0, } } \ } struct fs_struct { @@ -387,43 +398,32 @@ extern __inline__ struct task_struct **get_free_taskslot(void) /* PID hashing. */ #define PIDHASH_SZ (NR_TASKS >> 2) extern struct task_struct *pidhash[PIDHASH_SZ]; -extern spinlock_t pidhash_lock; #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) extern __inline__ void hash_pid(struct task_struct *p) { struct task_struct **htable = &pidhash[pid_hashfn(p->pid)]; - unsigned long flags; - spin_lock_irqsave(&pidhash_lock, flags); if((p->pidhash_next = *htable) != NULL) (*htable)->pidhash_pprev = &p->pidhash_next; *htable = p; p->pidhash_pprev = htable; - spin_unlock_irqrestore(&pidhash_lock, flags); } extern __inline__ void unhash_pid(struct task_struct *p) { - unsigned long flags; - - spin_lock_irqsave(&pidhash_lock, flags); if(p->pidhash_next) p->pidhash_next->pidhash_pprev = p->pidhash_pprev; *p->pidhash_pprev = p->pidhash_next; - spin_unlock_irqrestore(&pidhash_lock, flags); } extern __inline__ struct task_struct *find_task_by_pid(int pid) { struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)]; - unsigned long flags; - spin_lock_irqsave(&pidhash_lock, flags); for(p = *htable; p && p->pid != pid; p = p->pidhash_next) ; - spin_unlock_irqrestore(&pidhash_lock, flags); return p; } @@ -571,19 +571,6 @@ extern void exit_sighand(struct task_struct *); extern int do_execve(char *, char **, char **, struct pt_regs *); extern int do_fork(unsigned long, unsigned long, struct pt_regs *); -/* See if we have a valid user level fd. - * If it makes sense, return the file structure it references. - * Otherwise return NULL. - */ -extern inline struct file *file_from_fd(const unsigned int fd) -{ - - if (fd >= NR_OPEN) - return NULL; - /* either valid or null */ - return current->files->fd[fd]; -} - /* * The wait-queues are circular lists, and you have to be *very* sure * to keep them correct. Use only these two functions to add/remove @@ -627,11 +614,9 @@ extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue * write_unlock_irqrestore(&waitqueue_lock, flags); } -#define REMOVE_LINKS(p) do { unsigned long flags; \ - write_lock_irqsave(&tasklist_lock, flags); \ +#define REMOVE_LINKS(p) do { \ (p)->next_task->prev_task = (p)->prev_task; \ (p)->prev_task->next_task = (p)->next_task; \ - write_unlock_irqrestore(&tasklist_lock, flags); \ if ((p)->p_osptr) \ (p)->p_osptr->p_ysptr = (p)->p_ysptr; \ if ((p)->p_ysptr) \ @@ -640,13 +625,11 @@ extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue * (p)->p_pptr->p_cptr = (p)->p_osptr; \ } while (0) -#define SET_LINKS(p) do { unsigned long flags; \ - write_lock_irqsave(&tasklist_lock, flags); \ +#define SET_LINKS(p) do { \ (p)->next_task = &init_task; \ (p)->prev_task = init_task.prev_task; \ init_task.prev_task->next_task = (p); \ init_task.prev_task = (p); \ - write_unlock_irqrestore(&tasklist_lock, flags); \ (p)->p_ysptr = NULL; \ if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \ (p)->p_osptr->p_ysptr = p; \ diff --git a/include/linux/socket.h b/include/linux/socket.h index e274a3c51..afff2fd5c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -137,6 +137,7 @@ struct ucred { #define AF_NETLINK 16 #define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ #define AF_PACKET 17 /* Packet family */ +#define AF_ASH 18 /* Ash */ #define AF_MAX 32 /* For now.. */ /* Protocol families, same as address families. */ @@ -160,6 +161,7 @@ struct ucred { #define PF_NETLINK AF_NETLINK #define PF_ROUTE AF_ROUTE #define PF_PACKET AF_PACKET +#define PF_ASH AF_ASH #define PF_MAX AF_MAX diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 60fb2d74f..da2b2cdd1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,6 +30,7 @@ struct rpc_portmap { * The high-level client handle */ struct rpc_clnt { + unsigned int cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_maxproc; /* max procedure number */ @@ -37,7 +38,6 @@ struct rpc_clnt { char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ - struct rpc_portmap cl_pmap; /* port mapping */ struct rpc_stat * cl_stats; /* statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ @@ -47,10 +47,11 @@ struct rpc_clnt { cl_binding : 1,/* doing a getport() */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ + unsigned int cl_flags; /* misc client flags */ unsigned long cl_hardmax; /* max hard timeout */ + struct rpc_portmap cl_pmap; /* port mapping */ struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ - unsigned int cl_users; /* number of references */ }; #define cl_timeout cl_xprt->timeout #define cl_prog cl_pmap.pm_prog diff --git a/include/linux/swap.h b/include/linux/swap.h index 4d291146e..494490c32 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -36,10 +36,10 @@ struct swap_info_struct { extern int nr_swap_pages; extern int nr_free_pages; extern atomic_t nr_async_pages; -extern int min_free_pages; -extern int free_pages_low; -extern int free_pages_high; extern struct inode swapper_inode; +extern unsigned long page_cache_size; +extern int buffermem; +#define BUFFER_MEM ((buffermem >> PAGE_SHIFT) + page_cache_size) /* Incomplete types for prototype declarations: */ struct task_struct; diff --git a/include/linux/swapctl.h b/include/linux/swapctl.h index e71dcd067..cc169d2da 100644 --- a/include/linux/swapctl.h +++ b/include/linux/swapctl.h @@ -6,29 +6,18 @@ /* Swap tuning control */ -/* First, enumerate the different reclaim policies */ -enum RCL_POLICY {RCL_ROUND_ROBIN, RCL_BUFF_FIRST, RCL_PERSIST}; - -typedef struct swap_control_v5 +typedef struct swap_control_v6 { unsigned int sc_max_page_age; unsigned int sc_page_advance; unsigned int sc_page_decline; unsigned int sc_page_initial_age; - unsigned int sc_max_buff_age; - unsigned int sc_buff_advance; - unsigned int sc_buff_decline; - unsigned int sc_buff_initial_age; unsigned int sc_age_cluster_fract; unsigned int sc_age_cluster_min; unsigned int sc_pageout_weight; unsigned int sc_bufferout_weight; - unsigned int sc_buffer_grace; - unsigned int sc_nr_buffs_to_free; - unsigned int sc_nr_pages_to_free; - enum RCL_POLICY sc_policy; -} swap_control_v5; -typedef struct swap_control_v5 swap_control_t; +} swap_control_v6; +typedef struct swap_control_v6 swap_control_t; extern swap_control_t swap_control; typedef struct swapstat_v1 @@ -42,7 +31,23 @@ typedef struct swapstat_v1 typedef swapstat_v1 swapstat_t; extern swapstat_t swapstats; -extern int min_free_pages, free_pages_low, free_pages_high; +typedef struct buffer_mem_v1 +{ + unsigned int min_percent; + unsigned int borrow_percent; + unsigned int max_percent; +} buffer_mem_v1; +typedef buffer_mem_v1 buffer_mem_t; +extern buffer_mem_t buffer_mem; + +typedef struct freepages_v1 +{ + unsigned int min; + unsigned int low; + unsigned int high; +} freepages_v1; +typedef freepages_v1 freepages_t; +extern freepages_t freepages; #define SC_VERSION 1 #define SC_MAX_VERSION 1 @@ -55,17 +60,11 @@ extern int min_free_pages, free_pages_low, free_pages_high; failure to free a resource at any priority */ #define RCL_FAILURE (RCL_MAXPRI + 1) -#define RCL_POLICY (swap_control.sc_policy) #define AGE_CLUSTER_FRACT (swap_control.sc_age_cluster_fract) #define AGE_CLUSTER_MIN (swap_control.sc_age_cluster_min) #define PAGEOUT_WEIGHT (swap_control.sc_pageout_weight) #define BUFFEROUT_WEIGHT (swap_control.sc_bufferout_weight) -#define NR_BUFFS_TO_FREE (swap_control.sc_nr_buffs_to_free) -#define NR_PAGES_TO_FREE (swap_control.sc_nr_pages_to_free) - -#define BUFFERMEM_GRACE (swap_control.sc_buffer_grace) - /* Page aging (see mm/swap.c) */ #define MAX_PAGE_AGE (swap_control.sc_max_page_age) @@ -73,11 +72,6 @@ extern int min_free_pages, free_pages_low, free_pages_high; #define PAGE_DECLINE (swap_control.sc_page_decline) #define PAGE_INITIAL_AGE (swap_control.sc_page_initial_age) -#define MAX_BUFF_AGE (swap_control.sc_max_buff_age) -#define BUFF_ADVANCE (swap_control.sc_buff_advance) -#define BUFF_DECLINE (swap_control.sc_buff_decline) -#define BUFF_INITIAL_AGE (swap_control.sc_buff_initial_age) - /* Given a resource of N units (pages or buffers etc), we only try to * age and reclaim AGE_CLUSTER_FRACT per 1024 resources each time we * scan the resource list. */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 865bdd1dd..b7550ba2c 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -70,7 +70,9 @@ enum KERN_PRINTK, /* sturct: control printk logging parameters */ KERN_NAMETRANS, /* Name translation */ KERN_STATINODE, - KERN_DENTRY /* dentry statistics */ + KERN_DENTRY, /* dentry statistics */ + KERN_MODPROBE, + KERN_KMOD_UNLOAD_DELAY }; @@ -82,6 +84,7 @@ enum VM_FREEPG, /* struct: Set free page thresholds */ VM_BDFLUSH, /* struct: Control buffer cache flushing */ VM_OVERCOMMIT_MEMORY, /* Turn off the virtual memory safety limit */ + VM_BUFFERMEM /* struct: Set cache memory thresholds */ }; @@ -118,6 +121,7 @@ enum NET_CORE_FASTROUTE, NET_CORE_MSG_COST, NET_CORE_MSG_BURST, + NET_CORE_OPTMEM_MAX, }; /* /proc/sys/net/ethernet */ @@ -145,8 +149,6 @@ enum NET_IPV4_FIB_HASH = 19, NET_IPV4_TCP_HOE_RETRANSMITS=32, - NET_IPV4_TCP_SACK, - NET_IPV4_TCP_TSACK, NET_IPV4_TCP_TIMESTAMPS, NET_IPV4_TCP_WINDOW_SCALING, NET_IPV4_TCP_VEGAS_CONG_AVOID, @@ -167,6 +169,7 @@ enum NET_IPV4_IP_MASQ_DEBUG, NET_TCP_SYNCOOKIES, NET_TCP_STDURG, + NET_TCP_RFC1337, NET_TCP_SYN_TAILDROP, NET_TCP_MAX_SYN_BACKLOG, NET_IPV4_LOCAL_PORT_RANGE, diff --git a/include/linux/tty.h b/include/linux/tty.h index 6b00c4329..34c88d721 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -320,8 +320,7 @@ extern int espserial_init(void); extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device, const char *routine); -extern char *_tty_name(struct tty_struct *tty, char *buf); -extern char *tty_name(struct tty_struct *tty); +extern char *tty_name(struct tty_struct *tty, char *buf); extern void tty_wait_until_sent(struct tty_struct * tty, int timeout); extern int tty_check_change(struct tty_struct * tty); extern void stop_tty(struct tty_struct * tty); diff --git a/include/linux/umsdos_fs.p b/include/linux/umsdos_fs.p index 62ce67d0c..7c0e64ec3 100644 --- a/include/linux/umsdos_fs.p +++ b/include/linux/umsdos_fs.p @@ -1,9 +1,6 @@ /* check.c 23/01/95 03.38.30 */ void check_page_tables (void); /* dir.c 22/06/95 00.22.12 */ -struct dentry *creat_dentry (const char *name, - const int len, - struct inode *inode); int compat_msdos_create(struct inode *dir, const char *name, int len, @@ -30,6 +27,16 @@ int UMSDOS_lookup(struct inode *dir,struct dentry *dentry); int umsdos_hlink2inode (struct inode *hlink, struct inode **result); /* emd.c 22/06/95 00.22.04 */ +void fill_new_filp (struct file *filp, struct dentry *dentry); +void kill_dentry (struct dentry *dentry); +struct dentry *creat_dentry (const char *name, + const int len, + struct inode *inode); +ssize_t umsdos_file_write_kmem_real (struct file *filp, + const char *buf, + size_t count, + loff_t *offs); + ssize_t umsdos_file_read_kmem (struct inode *emd_dir, struct file *filp, char *buf, diff --git a/include/net/dst.h b/include/net/dst.h index b879bb059..0d18f60d2 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -120,6 +120,8 @@ extern void dst_destroy(struct dst_entry * dst); extern __inline__ void dst_free(struct dst_entry * dst) { + if (dst->obsolete > 1) + return; if (!atomic_read(&dst->use)) { dst_destroy(dst); return; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 42233aadf..863037b23 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -114,7 +114,7 @@ extern __inline__ void ip6_dst_store(struct sock *sk, struct dst_entry *dst) struct rt6_info *rt; np = &sk->net_pinfo.af_inet6; - sk->dst_cache = dst; + dst_release(xchg(&sk->dst_cache,dst)); rt = (struct rt6_info *) dst; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b6055ae44..1a322a498 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -4,7 +4,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ipv6.h,v 1.8 1997/12/29 19:52:09 kuznet Exp $ + * $Id: ipv6.h,v 1.9 1998/03/08 05:55:20 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/include/net/route.h b/include/net/route.h index 338e158fd..624fd233a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -102,6 +102,7 @@ extern unsigned inet_addr_type(u32 addr); extern void ip_rt_multicast_event(struct in_device *); extern int ip_rt_ioctl(unsigned int cmd, void *arg); extern void ip_rt_get_source(u8 *src, struct rtable *rt); +extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); extern __inline__ void ip_rt_put(struct rtable * rt) diff --git a/include/net/sock.h b/include/net/sock.h index c225a0015..589f58c7c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -191,42 +191,75 @@ struct raw_opt { struct tcp_opt { + int tcp_header_len; /* Bytes of tcp header to send */ + +/* + * Header prediction flags + * 0x5?10 << 16 + snd_wnd in net byte order + */ + __u32 pred_flags; + /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. */ __u32 rcv_nxt; /* What we want to receive next */ - __u32 rcv_up; /* The urgent point (may not be valid) */ - __u32 rcv_wnd; /* Current receiver window */ __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ - __u32 snd_up; /* Outgoing urgent pointer */ - __u32 snd_wl1; /* Sequence for window update */ - __u32 snd_wl2; /* Ack sequence for update */ + __u32 rcv_tstamp; /* timestamp of last received packet */ + __u32 lrcvtime; /* timestamp of last received data packet*/ + __u32 srtt; /* smothed round trip time << 3 */ - __u32 rcv_wup; /* rcv_nxt on last window update sent */ + __u32 ato; /* delayed ack timeout */ + __u32 snd_wl1; /* Sequence for window update */ - __u32 fin_seq; /* XXX This one should go, we don't need it. -DaveM */ + __u32 snd_wl2; /* Ack sequence for update */ + __u32 snd_wnd; /* The window we expect to receive */ + __u32 max_window; + __u8 pending; /* pending events */ + __u8 retransmits; + __u32 last_ack_sent; /* last ack we sent */ - __u32 srtt; /* smothed round trip time << 3 */ + __u32 backoff; /* backoff */ __u32 mdev; /* medium deviation */ + __u32 snd_cwnd; /* Sending congestion window */ __u32 rto; /* retransmit timeout */ - __u32 backoff; /* backoff */ + + __u32 packets_out; /* Packets which are "in flight" */ + __u32 high_seq; /* highest sequence number sent by onset of congestion */ /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ - __u32 snd_cwnd; /* Sending congestion window */ __u32 snd_ssthresh; /* Slow start size threshold */ __u16 snd_cwnd_cnt; - __u16 max_window; + __u8 dup_acks; /* Consequetive duplicate acks seen from other end */ + __u8 delayed_acks; + + /* Two commonly used timers in both sender and receiver paths. */ + struct timer_list retransmit_timer; /* Resend (no ack) */ + struct timer_list delack_timer; /* Ack delay */ + + struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ + struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ + struct sk_buff *send_head; /* Front of stuff to transmit */ + struct sk_buff *retrans_head; /* retrans head can be + * different to the head of + * write queue if we are doing + * fast retransmit + */ + + __u32 rcv_wnd; /* Current receiver window */ + __u32 rcv_wup; /* rcv_nxt on last window update sent */ + __u32 write_seq; + __u32 copied_seq; /* * Options received (usually on last packet, some only on SYN packets). */ char tstamp_ok, /* TIMESTAMP seen on SYN packet */ - wscale_ok, /* Wscale seen on SYN packet */ - sack_ok; /* SACK_PERM seen on SYN packet */ + wscale_ok; /* Wscale seen on SYN packet */ char saw_tstamp; /* Saw TIMESTAMP on last packet */ __u16 in_mss; /* MSS option received from sender */ __u8 snd_wscale; /* Window scaling received from sender */ @@ -235,60 +268,20 @@ struct tcp_opt __u32 rcv_tsecr; /* Time stamp echo reply */ __u32 ts_recent; /* Time stamp to echo next */ __u32 ts_recent_stamp;/* Time we stored ts_recent (for aging) */ - __u32 last_ack_sent; /* last ack we sent */ - int sacks; /* Number of SACK blocks if any */ - __u32 left_sack[4]; /* Left edges of blocks */ - __u32 right_sack[4]; /* Right edges of blocks */ - int tcp_header_len; /* Bytes of tcp header to send */ -/* - * Timers used by the TCP protocol layer - */ - struct timer_list delack_timer; /* Ack delay */ - struct timer_list idle_timer; /* Idle watch */ - struct timer_list completion_timer; /* Up/Down timer */ struct timer_list probe_timer; /* Probes */ - struct timer_list retransmit_timer; /* Resend (no ack) */ - - __u32 basertt; /* Vegas baseRTT */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 window_clamp; /* XXX Document this... -DaveM */ - - __u8 pending; /* pending events */ - __u8 delayed_acks; - __u8 dup_acks; /* Consequetive duplicate acks seen from other end */ - __u8 retransmits; - - __u32 lrcvtime; /* timestamp of last received data packet */ - __u32 rcv_tstamp; /* timestamp of last received packet */ - __u32 iat_mdev; /* interarrival time medium deviation */ - __u32 iat; /* interarrival time */ - __u32 ato; /* delayed ack timeout */ - __u32 high_seq; /* highest sequence number sent by onset of congestion */ - -/* - * new send pointers - */ - struct sk_buff * send_head; - struct sk_buff * retrans_head; /* retrans head can be - * different to the head of - * write queue if we are doing - * fast retransmit - */ -/* - * Header prediction flags - * 0x5?10 << 16 + snd_wnd in net byte order - */ - __u32 pred_flags; - __u32 snd_wnd; /* The window we expect to receive */ - - __u32 probes_out; /* unanswered 0 window probes */ + __u32 basertt; /* Vegas baseRTT */ + __u32 window_clamp; /* XXX Document this... -DaveM */ + __u32 probes_out; /* unanswered 0 window probes */ + __u32 syn_seq; + __u32 fin_seq; + __u32 urg_seq; + __u32 urg_data; struct open_request *syn_wait_queue; struct open_request **syn_wait_last; int syn_backlog; - struct tcp_func *af_specific; }; @@ -347,73 +340,73 @@ struct sock struct sock *sklist_next; struct sock *sklist_prev; - atomic_t wmem_alloc; - atomic_t rmem_alloc; - unsigned long allocation; /* Allocation mode */ + /* Local port binding hash linkage. */ + struct sock *bind_next; + struct sock **bind_pprev; + + /* Main hash linkage for various protocol lookup tables. */ + struct sock *next; + struct sock **pprev; - /* The following stuff should probably move to the tcp private area */ - __u32 write_seq; - __u32 copied_seq; - __u32 syn_seq; - __u32 urg_seq; - __u32 urg_data; - unsigned char delayed_acks; - /* End of block to move */ + /* Socket demultiplex comparisons on incoming packets. */ + __u32 daddr; /* Foreign IPv4 addr */ + __u32 rcv_saddr; /* Bound local IPv4 addr */ + int bound_dev_if; /* Bound device index if != 0 */ + unsigned short num; /* Local port */ + volatile unsigned char state, /* Connection state */ + zapped; /* In ax25 & ipx means not linked */ + struct tcphdr dummy_th; /* TCP header template */ - int sock_readers; /* user count */ + int sock_readers; /* user count */ + int rcvbuf; + + struct wait_queue **sleep; + struct dst_entry *dst_cache; /* Destination cache */ + atomic_t rmem_alloc; /* Receive queue bytes committed */ + struct sk_buff_head receive_queue; /* Incoming packets */ + atomic_t wmem_alloc; /* Transmit queue bytes committed */ + struct sk_buff_head write_queue; /* Packet sending queue */ + atomic_t omem_alloc; /* "o" is "option" or "other" */ + __u32 saddr; /* Sending source */ + unsigned int allocation; /* Allocation mode */ + int sndbuf; + struct sock *prev; /* * Not all are volatile, but some are, so we * might as well say they all are. */ volatile char dead, - urginline, done, + urginline, reuse, keepopen, linger, destroy, no_check, - zapped, /* In ax25 & ipx means not linked */ broadcast, nonagle, bsdism; - int bound_dev_if; - unsigned long lingertime; + unsigned char debug; int proc; + unsigned long lingertime; - struct sock *next; - struct sock **pprev; - struct sock *bind_next; - struct sock **bind_pprev; - struct sock *prev; int hashent; struct sock *pair; - struct sk_buff_head back_log; - - struct sk_buff_head write_queue, - receive_queue, - out_of_order_queue, + /* Error and backlog packet queues, rarely used. */ + struct sk_buff_head back_log, error_queue; unsigned short family; struct proto *prot; - struct wait_queue **sleep; - - __u32 daddr; - __u32 saddr; /* Sending source */ - __u32 rcv_saddr; /* Bound address */ - struct dst_entry *dst_cache; /* * mss is min(mtu, max_window) */ unsigned short mtu; /* mss negotiated in the syn's */ unsigned short mss; /* current eff. mss - can change */ unsigned short user_mss; /* mss requested by user in ioctl */ - unsigned short num; - unsigned short shutdown; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) @@ -436,16 +429,12 @@ struct sock cause failure but are the cause of a persistent failure not just 'timed out' */ - unsigned char protocol; - volatile unsigned char state; unsigned short ack_backlog; unsigned short max_ack_backlog; - unsigned char debug; __u32 priority; - int rcvbuf; - int sndbuf; unsigned short type; unsigned char localroute; /* Route locally only */ + unsigned char protocol; struct ucred peercred; #ifdef CONFIG_FILTER @@ -472,11 +461,6 @@ struct sock #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE) struct packet_opt *af_packet; #endif -#ifdef CONFIG_INET -#ifdef CONFIG_NUTCP - struct tcp_opt af_tcp; -#endif -#endif #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE) x25_cb *x25; #endif @@ -503,7 +487,6 @@ struct sock int ip_ttl; /* TTL setting */ int ip_tos; /* TOS */ unsigned ip_cmsg_flags; - struct tcphdr dummy_th; struct ip_options *opt; unsigned char ip_hdrincl; /* Include headers ? */ __u8 ip_mc_ttl; /* Multicasting TTL */ @@ -731,7 +714,7 @@ here: } /* - * This might not be the most apropriate place for this two + * This might not be the most appropriate place for this two * but since they are used by a lot of the net related code * at least they get declared on a include that is common to all */ @@ -750,7 +733,7 @@ static __inline__ int max(unsigned int a, unsigned int b) return a; } -extern struct sock * sk_alloc(int family, int priority); +extern struct sock * sk_alloc(int family, int priority, int zero_it); extern void sk_free(struct sock *sk); extern void destroy_sock(struct sock *sk); @@ -884,7 +867,6 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) return -ENOMEM; - skb_set_owner_r(skb, sk); #ifdef CONFIG_FILTER if (sk->filter) @@ -894,7 +876,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } #endif /* CONFIG_FILTER */ - skb_queue_tail(&sk->receive_queue,skb); + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->receive_queue, skb); if (!sk->dead) sk->data_ready(sk,skb->len); return 0; diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c445ca1a..cec01dfe6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -27,13 +27,13 @@ * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */ -#define TCP_HTABLE_SIZE 1024 +#define TCP_HTABLE_SIZE 512 /* This is for listening sockets, thus all sockets which possess wildcards. */ #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ /* This is for all sockets, to keep track of the local port allocations. */ -#define TCP_BHTABLE_SIZE 64 +#define TCP_BHTABLE_SIZE 512 /* tcp_ipv4.c: These need to be shared by v4 and v6 because the lookup * and hashing code needs to work with different AF's yet @@ -41,47 +41,153 @@ */ extern struct sock *tcp_established_hash[TCP_HTABLE_SIZE]; extern struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE]; -extern struct sock *tcp_bound_hash[TCP_BHTABLE_SIZE]; -/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6 - * because the v6 tcp code to intialize a connection needs to interoperate - * with the v4 code using the same variables. - * FIXME: It would be better to rewrite the connection code to be - * address family independent and just leave one copy in the ipv4 section. - * This would also clean up some code duplication. -- erics +/* There are a few simple rules, which allow for local port reuse by + * an application. In essence: + * + * 1) Sockets bound to different interfaces may share a local port. + * Failing that, goto test 2. + * 2) If all sockets have sk->reuse set, and none of them are in + * TCP_LISTEN state, the port may be shared. + * Failing that, goto test 3. + * 3) If all sockets are bound to a specific sk->rcv_saddr local + * address, and none of them are the same, the port may be + * shared. + * Failing this, the port cannot be shared. + * + * The interesting point, is test #2. This is what an FTP server does + * all day. To optimize this case we use a specific flag bit defined + * below. As we add sockets to a bind bucket list, we perform a + * check of: (newsk->reuse && (newsk->state != TCP_LISTEN)) + * As long as all sockets added to a bind bucket pass this test, + * the flag bit will be set. + * The resulting situation is that tcp_v[46]_verify_bind() can just check + * for this flag bit, if it is set and the socket trying to bind has + * sk->reuse set, we don't even have to walk the owners list at all, + * we return that it is ok to bind this socket to the requested local port. + * + * Sounds like a lot of work, but it is worth it. In a more naive + * implementation (ie. current FreeBSD etc.) the entire list of ports + * must be walked for each data port opened by an ftp server. Needless + * to say, this does not scale at all. With a couple thousand FTP + * users logged onto your box, isn't it nice to know that new data + * ports are created in O(1) time? I thought so. ;-) -DaveM */ -extern int sysctl_tcp_sack; -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; +struct tcp_bind_bucket { + unsigned short port; + unsigned short flags; +#define TCPB_FLAG_LOCKED 0x0001 +#define TCPB_FLAG_FASTREUSE 0x0002 + + struct tcp_bind_bucket *next; + struct sock *owners; + struct tcp_bind_bucket **pprev; +}; -/* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) +extern struct tcp_bind_bucket *tcp_bound_hash[TCP_BHTABLE_SIZE]; +extern kmem_cache_t *tcp_bucket_cachep; +extern struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum); +extern void tcp_bucket_unlock(struct sock *sk); +extern int tcp_port_rover; + +/* Level-1 socket-demux cache. */ +#define TCP_NUM_REGS 32 +extern struct sock *tcp_regs[TCP_NUM_REGS]; + +#define TCP_RHASH_FN(__fport) \ + ((((__fport) >> 7) ^ (__fport)) & (TCP_NUM_REGS - 1)) +#define TCP_RHASH(__fport) tcp_regs[TCP_RHASH_FN((__fport))] +#define TCP_SK_RHASH_FN(__sock) TCP_RHASH_FN((__sock)->dummy_th.dest) +#define TCP_SK_RHASH(__sock) tcp_regs[TCP_SK_RHASH_FN((__sock))] + +static __inline__ void tcp_reg_zap(struct sock *sk) { - return (lport ^ (lport >> 7)) & (TCP_BHTABLE_SIZE - 1); + struct sock **rpp; + + rpp = &(TCP_SK_RHASH(sk)); + if(*rpp == sk) + *rpp = NULL; } -/* Find the next port that hashes h that is larger than lport. - * If you change the hash, change this function to match, or you will - * break TCP port selection. This function must also NOT wrap around - * when the next number exceeds the largest possible port (2^16-1). - */ -static __inline__ int tcp_bhashnext(__u16 lport, __u16 h) +/* These are AF independent. */ +static __inline__ int tcp_bhashfn(__u16 lport) { - __u32 s; /* don't change this to a smaller type! */ - - s = (lport ^ (h ^ tcp_bhashfn(lport))); - if (s > lport) - return s; - s = lport + TCP_BHTABLE_SIZE; - return (s ^ (h ^ tcp_bhashfn(s))); + return (lport & (TCP_BHTABLE_SIZE - 1)); } -static __inline__ int tcp_sk_bhashfn(struct sock *sk) +static __inline__ void tcp_sk_bindify(struct sock *sk) { - __u16 lport = sk->num; - return tcp_bhashfn(lport); + struct tcp_bind_bucket *tb; + unsigned short snum = sk->num; + + for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next) + ; + /* Update bucket flags. */ + if(tb->owners == NULL) { + /* We're the first. */ + if(sk->reuse && sk->state != TCP_LISTEN) + tb->flags = TCPB_FLAG_FASTREUSE; + else + tb->flags = 0; + } else { + if((tb->flags & TCPB_FLAG_FASTREUSE) && + ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) + tb->flags &= ~TCPB_FLAG_FASTREUSE; + } + if((sk->bind_next = tb->owners) != NULL) + tb->owners->bind_pprev = &sk->bind_next; + tb->owners = sk; + sk->bind_pprev = &tb->owners; + sk->prev = (struct sock *) tb; } +/* This is a TIME_WAIT bucket. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct tcp_tw_bucket { + /* These _must_ match the beginning of struct sock precisely. + * XXX Yes I know this is gross, but I'd have to edit every single + * XXX networking file if I created a "struct sock_header". -DaveM + */ + struct sock *sklist_next; + struct sock *sklist_prev; + struct sock *bind_next; + struct sock **bind_pprev; + struct sock *next; + struct sock **pprev; + __u32 daddr; + __u32 rcv_saddr; + int bound_dev_if; + unsigned short num; + unsigned char state, + family; /* sk->zapped */ + __u16 source; /* sk->dummy_th.source */ + __u16 dest; /* sk->dummy_th.dest */ + + /* And these are ours. */ + __u32 rcv_nxt; + struct tcp_func *af_specific; + struct tcp_bind_bucket *tb; + struct timer_list timer; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr v6_daddr; + struct in6_addr v6_rcv_saddr; +#endif +}; + +extern kmem_cache_t *tcp_timewait_cachep; + +/* tcp_ipv4.c: These sysctl variables need to be shared between v4 and v6 + * because the v6 tcp code to intialize a connection needs to interoperate + * with the v4 code using the same variables. + * FIXME: It would be better to rewrite the connection code to be + * address family independent and just leave one copy in the ipv4 section. + * This would also clean up some code duplication. -- erics + */ +extern int sysctl_tcp_timestamps; +extern int sysctl_tcp_window_scaling; + /* These can have wildcards, don't try too hard. */ static __inline__ int tcp_lhashfn(unsigned short num) { @@ -93,28 +199,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) return tcp_lhashfn(sk->num); } -/* Only those holding the sockhash lock call these two things here. - * Note the slightly gross overloading of sk->prev, AF_UNIX is the - * only other main benefactor of that member of SK, so who cares. - */ -static __inline__ void tcp_sk_bindify(struct sock *sk) -{ - int hashent = tcp_sk_bhashfn(sk); - struct sock **htable = &tcp_bound_hash[hashent]; - - if((sk->bind_next = *htable) != NULL) - (*htable)->bind_pprev = &sk->bind_next; - *htable = sk; - sk->bind_pprev = htable; -} - -static __inline__ void tcp_sk_unbindify(struct sock *sk) -{ - if(sk->bind_next) - sk->bind_next->bind_pprev = sk->bind_pprev; - *(sk->bind_pprev) = sk->bind_next; -} - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define NETHDR_SIZE sizeof(struct ipv6hdr) #else @@ -186,6 +270,8 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk) * we tell the LL layer that it is something * wrong (e.g. that it can expire redirects) */ +#define TCP_BUCKETGC_PERIOD (HZ) + /* * TCP option */ @@ -193,9 +279,6 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk) #define TCPOPT_NOP 1 /* Padding */ #define TCPOPT_EOL 0 /* End of options */ #define TCPOPT_MSS 2 /* Segment size negotiating */ -/* - * We don't use these yet, but they are for PAWS and big windows - */ #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ #define TCPOPT_SACK 5 /* SACK Block */ @@ -210,6 +293,10 @@ static __inline__ void tcp_sk_unbindify(struct sock *sk) #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 +/* But this is what stacks really send out. */ +#define TCPOLEN_TSTAMP_ALIGNED 12 +#define TCPOLEN_WSCALE_ALIGNED 4 + /* * TCP option flags for parsed options. */ @@ -259,7 +346,6 @@ struct open_request { __u8 __pad; unsigned snd_wscale : 4, rcv_wscale : 4, - sack_ok : 1, tstamp_ok : 1, wscale_ok : 1; /* The following two fields can be easily recomputed I think -AK */ @@ -355,7 +441,7 @@ extern __inline int after(__u32 seq1, __u32 seq2) /* is s2<=s1<=s3 ? */ extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3) { - return (after(seq1+1, seq2) && before(seq1, seq3+1)); + return seq3 - seq2 >= seq1 - seq2; } @@ -390,6 +476,11 @@ extern int tcp_rcv_established(struct sock *sk, struct tcphdr *th, __u16 len); +extern int tcp_timewait_state_process(struct tcp_tw_bucket *tw, + struct sk_buff *skb, + struct tcphdr *th, + void *opt, __u16 len); + extern void tcp_close(struct sock *sk, unsigned long timeout); extern struct sock * tcp_accept(struct sock *sk, int flags); @@ -427,6 +518,10 @@ extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn); +extern struct sock * tcp_create_openreq_child(struct sock *sk, + struct open_request *req, + struct sk_buff *skb); + extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req, @@ -457,10 +552,11 @@ extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern void tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); +extern void tcp_send_active_reset(struct sock *sk); extern int tcp_send_synack(struct sock *); -extern void tcp_send_skb(struct sock *, struct sk_buff *); +extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue); extern void tcp_send_ack(struct sock *sk); -extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout); +extern void tcp_send_delayed_ack(struct tcp_opt *tp, int max_timeout); /* CONFIG_IP_TRANSPARENT_PROXY */ extern int tcp_chkaddr(struct sk_buff *); @@ -492,40 +588,94 @@ struct tcp_sl_timer { #define TCP_SLT_SYNACK 0 #define TCP_SLT_KEEPALIVE 1 -#define TCP_SLT_MAX 2 +#define TCP_SLT_BUCKETGC 2 +#define TCP_SLT_MAX 3 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX]; -/* - * FIXME: this method of choosing when to send a window update - * does not seem correct to me. -- erics +/* Compute the actual receive window we are currently advertising. */ +static __inline__ u32 tcp_receive_window(struct tcp_opt *tp) +{ + return tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd); +} + +/* Choose a new window, without checks for shrinking, and without + * scaling applied to the result. The caller does these things + * if necessary. This is a "raw" window selection. */ -static __inline__ unsigned short tcp_raise_window(struct sock *sk) +extern u32 __tcp_select_window(struct sock *sk); + +/* Chose a new window to advertise, update state in tcp_opt for the + * socket, and return result with RFC1323 scaling applied. The return + * value can be stuffed directly into th->window for an outgoing + * frame. + */ +extern __inline__ u16 tcp_select_window(struct sock *sk) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - long cur_win; - int res = 0; - - /* - * compute the actual window i.e. - * old_window - received_bytes_on_that_win - */ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + u32 new_win = __tcp_select_window(sk); + u32 cur_win = tcp_receive_window(tp); - cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd); + /* Never shrink the offered window */ + if(new_win < cur_win) + new_win = cur_win; + tp->rcv_wnd = new_win; + tp->rcv_wup = tp->rcv_nxt; + /* RFC1323 scaling applied */ + return new_win >> tp->rcv_wscale; +} - /* - * We need to send an ack right away if - * our rcv window is blocking the sender and - * we have more free space to offer. - */ +/* See if we can advertise non-zero, and if so how much we + * can increase our advertisement. If it becomes more than + * twice what we are talking about right now, return true. + */ +extern __inline__ int tcp_raise_window(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + u32 new_win = __tcp_select_window(sk); + u32 cur_win = tcp_receive_window(tp); - if (cur_win < (sk->mss << 1)) - res = 1; - return res; + return (new_win && (new_win > (cur_win << 1))); } -extern unsigned short tcp_select_window(struct sock *sk); +/* This checks if the data bearing packet SKB (usually tp->send_head) + * should be put on the wire right now. + */ +static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int nagle_check = 1; + int len; + + /* RFC 1122 - section 4.2.3.4 + * + * We must queue if + * + * a) The right edge of this frame exceeds the window + * b) There are packets in flight and we have a small segment + * [SWS avoidance and Nagle algorithm] + * (part of SWS is done on packetization) + * c) We are retransmiting [Nagle] + * d) We have too many packets 'in flight' + * + * Don't use the nagle rule for urgent data. + */ + len = skb->end_seq - skb->seq; + if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out && + !skb->h.th->urg) + nagle_check = 0; + + return (nagle_check && tp->packets_out < tp->snd_cwnd && + !after(skb->end_seq, tp->snd_una + tp->snd_wnd) && + tp->retransmits == 0); +} + +/* This tells the input processing path that an ACK should go out + * right now. + */ +#define tcp_enter_quickack_mode(__tp) ((__tp)->ato = (HZ/100)) +#define tcp_in_quickack_mode(__tp) ((__tp)->ato == (HZ/100)) /* * List all states of a TCP socket that can be viewed as a "connected" @@ -581,41 +731,49 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) case TCP_CLOSE: /* Should be about 2 rtt's */ net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME)); + sk->prot->unhash(sk); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab--; - if (state == TCP_TIME_WAIT || state == TCP_CLOSE) - sk->prot->rehash(sk); } } static __inline__ void tcp_build_options(__u32 *ptr, struct tcp_opt *tp) { - /* FIXME: We will still need to do SACK here. */ if (tp->tstamp_ok) { - *ptr = ntohl((TCPOPT_NOP << 24) - | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) - | TCPOLEN_TIMESTAMP); + *ptr = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); /* rest filled in by tcp_update_options */ } } static __inline__ void tcp_update_options(__u32 *ptr, struct tcp_opt *tp) { - /* FIXME: We will still need to do SACK here. */ if (tp->tstamp_ok) { *++ptr = htonl(jiffies); *++ptr = htonl(tp->ts_recent); } } +static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp) +{ + if (tp->tstamp_ok) { + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *ptr++ = htonl(jiffies); + *ptr = htonl(tp->ts_recent); + } +} + /* * This routines builds a generic TCP header. * They also build the RFC1323 Timestamp, but don't fill the * actual timestamp in (you need to call tcp_update_options for this). - * It can't (unfortunately) do SACK as well. * XXX: pass tp instead of sk here. */ @@ -624,23 +782,12 @@ static inline void tcp_build_header_data(struct tcphdr *th, struct sock *sk, int struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); - th->seq = htonl(sk->write_seq); + th->seq = htonl(tp->write_seq); if (!push) th->psh = 1; tcp_build_options((__u32*)(th+1), tp); } -static inline void tcp_build_header(struct tcphdr *th, struct sock *sk) -{ - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); - th->seq = htonl(sk->write_seq); - th->ack_seq = htonl(tp->last_ack_sent = tp->rcv_nxt); - th->window = htons(tcp_select_window(sk)); - tcp_build_options((__u32 *)(th+1), tp); -} - /* * Construct a tcp options header for a SYN or SYN_ACK packet. * If this is every changed make sure to change the definition of @@ -651,31 +798,32 @@ static inline void tcp_build_header(struct tcphdr *th, struct sock *sk) * It would be especially magical to compute the checksum for this * stuff on the fly here. */ -extern __inline__ int tcp_syn_build_options(struct sk_buff *skb, int mss, int sack, int ts, int offer_wscale, int wscale) +extern __inline__ int tcp_syn_build_options(struct sk_buff *skb, int mss, int ts, int offer_wscale, int wscale) { - int count = 4 + (offer_wscale ? 4 : 0) + ((ts || sack) ? 4 : 0) + (ts ? 8 : 0); + int count = 4 + (offer_wscale ? TCPOLEN_WSCALE_ALIGNED : 0) + + ((ts) ? TCPOLEN_TSTAMP_ALIGNED : 0); unsigned char *optr = skb_put(skb,count); __u32 *ptr = (__u32 *)optr; - /* - * We always get an MSS option. + /* We always get an MSS option. + * The option bytes which will be seen in normal data + * packets should timestamps be used, must be in the MSS + * advertised. But we subtract them from sk->mss so + * that calculations in tcp_sendmsg are simpler etc. + * So account for this fact here if necessary. If we + * don't do this correctly, as a receiver we won't + * recognize data packets as being full sized when we + * should, and thus we won't abide by the delayed ACK + * rules correctly. */ + if(ts) + mss += TCPOLEN_TSTAMP_ALIGNED; *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); if (ts) { - if (sack) { - *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(jiffies); /* TSVAL */ - *ptr++ = htonl(0); /* TSECR */ - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(jiffies); /* TSVAL */ - *ptr++ = htonl(0); /* TSECR */ - } - } else if (sack) { - *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) - | (TCPOPT_NOP << 8) | TCPOPT_NOP); + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *ptr++ = htonl(jiffies); /* TSVAL */ + *ptr++ = __constant_htonl(0); /* TSECR */ } if (offer_wscale) *ptr++ = htonl((TCPOPT_WINDOW << 24) | (TCPOLEN_WINDOW << 16) | (wscale << 8)); @@ -724,33 +872,15 @@ extern __inline__ void tcp_select_initial_window(__u32 space, __u16 mss, (*window_clamp) = min(65535<<(*rcv_wscale),*window_clamp); } -#define SYNQ_DEBUG 1 - extern __inline__ void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req, struct open_request *prev) { -#ifdef SYNQ_DEBUG - if (prev->dl_next != req) { - printk(KERN_DEBUG "synq_unlink: bad prev ptr: %p\n",prev); - return; - } -#endif - if(!req->dl_next) { -#ifdef SYNQ_DEBUG - if (tp->syn_wait_last != (void*) req) - printk(KERN_DEBUG "synq_unlink: bad last ptr %p,%p\n", - req,tp->syn_wait_last); -#endif + if(!req->dl_next) tp->syn_wait_last = (struct open_request **)prev; - } prev->dl_next = req->dl_next; } extern __inline__ void tcp_synq_queue(struct tcp_opt *tp, struct open_request *req) { -#ifdef SYNQ_DEBUG - if (*tp->syn_wait_last != NULL) - printk("synq_queue: last ptr doesn't point to last req.\n"); -#endif req->dl_next = NULL; *tp->syn_wait_last = req; tp->syn_wait_last = &req->dl_next; @@ -765,14 +895,11 @@ extern __inline__ void tcp_synq_init(struct tcp_opt *tp) extern __inline__ struct open_request *tcp_synq_unlink_tail(struct tcp_opt *tp) { struct open_request *head = tp->syn_wait_queue; -#ifdef SYNQ_DEBUG - if (!head) { - printk(KERN_DEBUG "tail drop on empty queue? - bug\n"); - return NULL; - } -#endif +#if 0 + /* Should be a net-ratelimit'd thing, not all the time. */ printk(KERN_DEBUG "synq tail drop with expire=%ld\n", head->expires-jiffies); +#endif if (head->dl_next == NULL) tp->syn_wait_last = &tp->syn_wait_queue; tp->syn_wait_queue = head->dl_next; @@ -799,6 +926,17 @@ extern __inline__ void tcp_dec_slow_timer(int timer) atomic_dec(&slt->count); } +/* This needs to use a slow timer, so it is here. */ +static __inline__ void tcp_sk_unbindify(struct sock *sk) +{ + struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev; + if(sk->bind_next) + sk->bind_next->bind_pprev = sk->bind_pprev; + *sk->bind_pprev = sk->bind_next; + if(tb->owners == NULL) + tcp_inc_slow_timer(TCP_SLT_BUCKETGC); +} + extern const char timer_bug_msg[]; static inline void tcp_clear_xmit_timer(struct sock *sk, int what) @@ -820,7 +958,8 @@ static inline void tcp_clear_xmit_timer(struct sock *sk, int what) printk(timer_bug_msg); return; }; - del_timer(timer); + if(timer->prev != NULL) + del_timer(timer); } static inline int tcp_timer_is_set(struct sock *sk, int what) @@ -829,13 +968,13 @@ static inline int tcp_timer_is_set(struct sock *sk, int what) switch (what) { case TIME_RETRANS: - return tp->retransmit_timer.next != NULL; + return tp->retransmit_timer.prev != NULL; break; case TIME_DACK: - return tp->delack_timer.next != NULL; + return tp->delack_timer.prev != NULL; break; case TIME_PROBE0: - return tp->probe_timer.next != NULL; + return tp->probe_timer.prev != NULL; break; default: printk(timer_bug_msg); |