diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
commit | c7c4310f7fc1485925d800628bf50b3aeab535ef (patch) | |
tree | b12aa4be0e8fb82aaaea97fb475e793e8a347c49 | |
parent | 1ffd1d069ca4c5ffe16fea6175dab1b9bbb15820 (diff) |
Merge with Linux 2.4.0-test3-pre8. Linus has accepted most of what
I've sent him, so we're very close to full integration of the MIPS
port into his sources.
86 files changed, 2417 insertions, 1815 deletions
diff --git a/Documentation/Configure.help b/Documentation/Configure.help index c7ee9aa5f..9a7ee4a2b 100644 --- a/Documentation/Configure.help +++ b/Documentation/Configure.help @@ -4318,12 +4318,10 @@ CONFIG_DMASCC modem), in order to send and receive AX.25 packet radio network traffic. - Currently, this driver supports Ottawa PI/PI2 - (http://hydra.carleton.ca/info/pi2.html ) and Gracilis PackeTwin - (http://www.paccomm.com/gracilis.html ) boards. They are detected - automatically. If you have one of these cards, say Y here and read - the AX25-HOWTO, available from - http://www.linuxdoc.org/docs.html#howto . + Currently, this driver supports Ottawa PI/PI2, Paccomm/Gracilis + PackeTwin, and S5SCC/DMA boards. They are detected automatically. + If you have one of these cards, say Y here and read the AX25-HOWTO, + available from http://www.linuxdoc.org/docs.html#howto . This driver can operate multiple boards simultaneously. If you compile it as a module (by saying M instead of Y), it will be called @@ -4339,7 +4337,9 @@ CONFIG_DMASCC certain parameters, such as channel access timing, clock mode, and DMA channel. This is accomplished with a small utility program, dmascc_cfg, available at - http://www.nt.tuwien.ac.at/~kkudielk/Linux/ . + http://www.nt.tuwien.ac.at/~kkudielk/Linux/ . Please be sure to get + at least version 1.27 of dmascc_cfg, as older versions will not + work with the current driver. Z8530 SCC driver for AX.25 CONFIG_SCC diff --git a/MAINTAINERS b/MAINTAINERS index b3a4c6d86..7fcfdae09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -438,8 +438,9 @@ S: Maintained HIGH-SPEED SCC DRIVER FOR AX.25 P: Klaus Kudielka -M: oe1kib@oe1kib.ampr.org +M: klaus.kudielka@ieee.org L: linux-hams@vger.rutgers.edu +W: http://www.nt.tuwien.ac.at/~kkudielk/Linux/ S: Maintained LOGICAL VOLUME MANAGER diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index a80d61ebc..660a9e1fb 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -77,8 +77,8 @@ sigpending = 8 addr_limit = 12 exec_domain = 16 need_resched = 20 -processor = 56 -tsk_ptrace = 60 +processor = 48 +tsk_ptrace = 56 ENOSYS = 38 diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index bddac2a1d..e240cc79a 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -88,12 +88,38 @@ static void default_idle(void) } /* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle (void) +{ + int oldval; + + __sti(); + + /* + * Deal with another CPU just having chosen a thread to + * run here: + */ + oldval = xchg(¤t->need_resched, -1); + + if (!oldval) + asm volatile( + "2:" + "cmpl $-1, %0;" + "rep; nop;" + "je 2b;" + : :"m" (current->need_resched)); +} + +/* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle(void) +void cpu_idle (void) { /* endless idle loop with no priority at all */ init_idle(); @@ -111,6 +137,18 @@ void cpu_idle(void) } } +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } + + return 1; +} + +__setup("idle=", idle_setup); + static long no_idt[2] = {0, 0}; static int reboot_mode = 0; static int reboot_thru_bios = 0; diff --git a/arch/mips/defconfig b/arch/mips/defconfig index 18c31669d..a664376d1 100644 --- a/arch/mips/defconfig +++ b/arch/mips/defconfig @@ -110,12 +110,8 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -# CONFIG_SKB_LARGE is not set # CONFIG_IPV6 is not set # CONFIG_KHTTPD is not set # CONFIG_ATM is not set diff --git a/arch/mips/defconfig-cobalt b/arch/mips/defconfig-cobalt index e2700ed7a..b17dd8433 100644 --- a/arch/mips/defconfig-cobalt +++ b/arch/mips/defconfig-cobalt @@ -114,12 +114,8 @@ CONFIG_IP_MROUTE=y # CONFIG_IP_PIMSM_V1 is not set # CONFIG_IP_PIMSM_V2 is not set CONFIG_IP_ALIAS=y +# CONFIG_INET_ECN is not set CONFIG_SYN_COOKIES=y - -# -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y # CONFIG_IPV6 is not set # CONFIG_KHTTPD is not set # CONFIG_ATM is not set diff --git a/arch/mips/defconfig-decstation b/arch/mips/defconfig-decstation index c0e7558f1..3e75a49a8 100644 --- a/arch/mips/defconfig-decstation +++ b/arch/mips/defconfig-decstation @@ -106,12 +106,8 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -# CONFIG_SKB_LARGE is not set # CONFIG_IPV6 is not set # CONFIG_KHTTPD is not set # CONFIG_ATM is not set diff --git a/arch/mips/defconfig-ip22 b/arch/mips/defconfig-ip22 index 18c31669d..a664376d1 100644 --- a/arch/mips/defconfig-ip22 +++ b/arch/mips/defconfig-ip22 @@ -110,12 +110,8 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -# CONFIG_SKB_LARGE is not set # CONFIG_IPV6 is not set # CONFIG_KHTTPD is not set # CONFIG_ATM is not set diff --git a/arch/mips/defconfig-rm200 b/arch/mips/defconfig-rm200 index fba87cd6b..ac863d978 100644 --- a/arch/mips/defconfig-rm200 +++ b/arch/mips/defconfig-rm200 @@ -112,12 +112,8 @@ CONFIG_INET=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -# CONFIG_SKB_LARGE is not set # CONFIG_IPV6 is not set # CONFIG_KHTTPD is not set # CONFIG_ATM is not set diff --git a/arch/mips64/defconfig b/arch/mips64/defconfig index ba39829fa..3af288cff 100644 --- a/arch/mips64/defconfig +++ b/arch/mips64/defconfig @@ -102,14 +102,10 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set # -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y - -# # # # CONFIG_IPX is not set diff --git a/arch/mips64/defconfig-ip22 b/arch/mips64/defconfig-ip22 index ef422eb5b..a40dc4eb0 100644 --- a/arch/mips64/defconfig-ip22 +++ b/arch/mips64/defconfig-ip22 @@ -92,14 +92,10 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set # -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y - -# # # # CONFIG_IPX is not set diff --git a/arch/mips64/defconfig-ip27 b/arch/mips64/defconfig-ip27 index ba39829fa..3af288cff 100644 --- a/arch/mips64/defconfig-ip27 +++ b/arch/mips64/defconfig-ip27 @@ -102,14 +102,10 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set # -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y - -# # # # CONFIG_IPX is not set diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index cc5dfe78a..0813c9501 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -134,12 +134,8 @@ CONFIG_INET=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y CONFIG_IPV6=m # CONFIG_IPV6_EUI64 is not set # CONFIG_KHTTPD is not set diff --git a/arch/sparc64/config.in b/arch/sparc64/config.in index 5cd76d3af..eede4683d 100644 --- a/arch/sparc64/config.in +++ b/arch/sparc64/config.in @@ -27,6 +27,7 @@ define_bool CONFIG_VT_CONSOLE y bool 'Symmetric multi-processing support' CONFIG_SMP # Global things across all Sun machines. +define_bool CONFIG_HAVE_DEC_LOCK y define_bool CONFIG_ISA n define_bool CONFIG_PCMCIA n define_bool CONFIG_SBUS y diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 6612852e4..3cf68e2b5 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -20,6 +20,7 @@ CONFIG_KMOD=y CONFIG_VT=y CONFIG_VT_CONSOLE=y # CONFIG_SMP is not set +CONFIG_HAVE_DEC_LOCK=y # CONFIG_ISA is not set # CONFIG_PCMCIA is not set CONFIG_SBUS=y @@ -155,12 +156,8 @@ CONFIG_INET=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_ALIAS is not set +# CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set - -# -# (it is safe to leave these untouched) -# -CONFIG_SKB_LARGE=y CONFIG_IPV6=m # CONFIG_IPV6_EUI64 is not set # CONFIG_KHTTPD is not set diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 446072933..b066b8b96 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -325,7 +325,7 @@ beyond_if: current->thread.flags |= SPARC_FLAG_32BIT; } start_thread32(regs, ex.a_entry, current->mm->start_stack); - if (current->flags & PF_PTRACED) + if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); return 0; } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 2ef0d1004..40b12eb6a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -386,6 +386,46 @@ void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) } } +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t finished; + int wait; +}; + +extern unsigned long xcall_call_function; + +int smp_call_function(void (*func)(void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = smp_num_cpus - 1; + + if (!cpus) + return 0; + + data.func = func; + data.info = info; + atomic_set(&data.finished, 0); + data.wait = wait; + + smp_cross_call(&xcall_call_function, + 0, (u64) &data, 0); + if (wait) { + while (atomic_read(&data.finished) != cpus) + barrier(); + } + + return 0; +} + +void smp_call_function_client(struct call_data_struct *call_data) +{ + call_data->func(call_data->info); + if (call_data->wait) + atomic_inc(&call_data->finished); +} + extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_mm; extern unsigned long xcall_flush_tlb_range; diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 8b917e303..5d2dd2985 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -279,7 +279,7 @@ char * getname32(const char *filename) char *tmp, *result; result = ERR_PTR(-ENOMEM); - tmp = (char *)__get_free_page(GFP_KERNEL); + tmp = __getname(); if (tmp) { int retval = do_getname32(filename, tmp); diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 548ef0ac4..6c2d54e04 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -1,5 +1,5 @@ # $Id: Makefile,v 1.22 2000/03/31 04:06:23 davem Exp $ -# Makefile for Sparc library files.. +# Makefile for Sparc64 library files.. # CFLAGS := $(CFLAGS) @@ -7,7 +7,8 @@ CFLAGS := $(CFLAGS) OBJS = PeeCeeI.o blockops.o debuglocks.o strlen.o strncmp.o \ memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ - VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o + VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \ + dec_and_lock.o lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S new file mode 100644 index 000000000..dca825a7f --- /dev/null +++ b/arch/sparc64/lib/dec_and_lock.S @@ -0,0 +1,61 @@ +/* $Id$ + * dec_and_lock.S: Sparc64 version of "atomic_dec_and_lock()" + * using cas and ldstub instructions. + * + * Copyright (C) 2000 David S. Miller (davem@redhat.com) + */ + + .text + .align 64 + + /* CAS basically works like this: + * + * void CAS(MEM, REG1, REG2) + * { + * START_ATOMIC(); + * if (*(MEM) == REG1) { + * TMP = *(MEM); + * *(MEM) = REG2; + * REG2 = TMP; + * } + * END_ATOMIC(); + * } + * + * All non-contention cases are handled in 2 I-cache + * lines which is 1 L2 cache line. + */ + + .globl atomic_dec_and_lock +atomic_dec_and_lock: /* %o0 = counter, %o1 = lock */ +loop1: lduw [%o0], %g5 + subcc %g5, 1, %g7 + be,pn %icc, to_zero + nop +nzero: cas [%o0], %g5, %g7 + cmp %g5, %g7 + bne,pn %icc, loop1 + mov 0, %g1 + +out: retl + mov %g1, %o0 +to_zero:ldstub [%o1], %g3 + brnz,pn %g3, spin_on_lock + membar #StoreLoad | #StoreStore +loop2: cas [%o0], %g5, %g7 /* ASSERT(g7 == 0) */ + brnz,pt %g7, out + mov 1, %g1 + + lduw [%o0], %g5 + subcc %g5, 1, %g7 + be,pn %icc, loop2 + nop + membar #StoreStore | #LoadStore + stb %g0, [%o1] + b,pt %xcc, nzero + nop + +spin_on_lock: + ldub [%o1], %g3 + brnz,pt %g3, spin_on_lock + membar #LoadLoad + b,a,pt %xcc, to_zero diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 90cc898ff..e954b24c8 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -432,4 +432,24 @@ xcall_flush_cache_all: nop flush %g6 retry + + .globl xcall_call_function +xcall_call_function: + mov TLB_TAG_ACCESS, %g5 ! wheee... + stxa %g1, [%g5] ASI_IMMU ! save call_data here for a bit + membar #Sync + rdpr %pstate, %g2 + wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + mov TLB_TAG_ACCESS, %g2 + ldxa [%g2] ASI_IMMU, %g5 + rdpr %pil, %g2 + wrpr %g0, 15, %pil + sethi %hi(109f), %g7 + b,pt %xcc, etrap_irq +109: or %g7, %lo(109b), %g7 + call smp_call_function_client + mov %l5, %o0 + b,pt %xcc, rtrap + clr %l6 + #endif /* CONFIG_SMP */ diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c index 5be9c5d49..8200c831f 100644 --- a/arch/sparc64/solaris/fs.c +++ b/arch/sparc64/solaris/fs.c @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/sched.h> +#include <linux/malloc.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/file.h> diff --git a/drivers/block/lvm-snap.c b/drivers/block/lvm-snap.c index 2f01d3f7b..20e1c78cc 100644 --- a/drivers/block/lvm-snap.c +++ b/drivers/block/lvm-snap.c @@ -29,7 +29,7 @@ #include <linux/lvm.h> -static char *lvm_snap_version = "LVM 0.8final (15/02/2000)\n"; +static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.8final (15/02/2000)\n"; extern const char *const lvm_name; extern int lvm_blocksizes[]; diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 42063cefa..a32cd18df 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -25,9 +25,12 @@ * the kernel and is not a module. Since the functions are used by some Atari * drivers, this is the case on the Atari. * + * + * 1.1 Cesar Barros: SMP locking fixes + * added changelog */ -#define NVRAM_VERSION "1.0" +#define NVRAM_VERSION "1.1" #include <linux/module.h> #include <linux/config.h> @@ -81,7 +84,7 @@ #endif /* Note that *all* calls to CMOS_READ and CMOS_WRITE must be done with - * interrupts disabled. Due to the index-port/data-port design of the RTC, we + * rtc_lock held. Due to the index-port/data-port design of the RTC, we * don't want two different things trying to get to it at once. (e.g. the * periodic 11 min sync from time.c vs. this driver.) */ @@ -96,11 +99,13 @@ #include <linux/nvram.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <linux/spinlock.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/system.h> +extern spinlock_t rtc_lock; static int nvram_open_cnt = 0; /* #times opened */ static int nvram_open_mode; /* special open modes */ @@ -163,21 +168,20 @@ unsigned char nvram_read_byte( int i ) unsigned long flags; unsigned char c; - save_flags(flags); - cli(); + spin_lock_irqsave (&rtc_lock, flags); c = nvram_read_int( i ); - restore_flags(flags); + spin_unlock_irqrestore (&rtc_lock, flags); return( c ); } +/* This races nicely with trying to read with checksum checking (nvram_read) */ void nvram_write_byte( unsigned char c, int i ) { unsigned long flags; - save_flags(flags); - cli(); + spin_lock_irqsave (&rtc_lock, flags); nvram_write_int( c, i ); - restore_flags(flags); + spin_unlock_irqrestore (&rtc_lock, flags); } int nvram_check_checksum( void ) @@ -185,10 +189,9 @@ int nvram_check_checksum( void ) unsigned long flags; int rv; - save_flags(flags); - cli(); + spin_lock_irqsave (&rtc_lock, flags); rv = nvram_check_checksum_int(); - restore_flags(flags); + spin_unlock_irqrestore (&rtc_lock, flags); return( rv ); } @@ -196,10 +199,9 @@ void nvram_set_checksum( void ) { unsigned long flags; - save_flags(flags); - cli(); + spin_lock_irqsave (&rtc_lock, flags); nvram_set_checksum_int(); - restore_flags(flags); + spin_unlock_irqrestore (&rtc_lock, flags); } #endif /* MACH == ATARI */ @@ -228,63 +230,67 @@ static long long nvram_llseek(struct file *file,loff_t offset, int origin ) static ssize_t nvram_read(struct file * file, char * buf, size_t count, loff_t *ppos ) { - unsigned long flags; + char contents [NVRAM_BYTES]; unsigned i = *ppos; - char *tmp = buf; - - if (i != *ppos) - return -EINVAL; + char *tmp; - save_flags(flags); - cli(); + spin_lock_irq (&rtc_lock); - if (!nvram_check_checksum_int()) { - restore_flags(flags); - return( -EIO ); - } + if (!nvram_check_checksum_int()) + goto checksum_err; + + for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp) + *tmp = nvram_read_int(i); + + spin_unlock_irq (&rtc_lock); + + copy_to_user_ret (buf, contents, tmp - contents, -EFAULT); - for( ; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp ) - put_user( nvram_read_int(i), tmp ); *ppos = i; - restore_flags(flags); - return( tmp - buf ); + return (tmp - contents); + +checksum_err: + spin_unlock_irq (&rtc_lock); + return -EIO; } static ssize_t nvram_write(struct file * file, const char * buf, size_t count, loff_t *ppos ) { - unsigned long flags; + char contents [NVRAM_BYTES]; unsigned i = *ppos; - const char *tmp = buf; - char c; - - if (i != *ppos) - return -EINVAL; + char * tmp; - save_flags(flags); - cli(); - - if (!nvram_check_checksum_int()) { - restore_flags(flags); - return( -EIO ); - } + /* could comebody please help me indent this better? */ + copy_from_user_ret (contents, buf, (NVRAM_BYTES - i) < count ? + (NVRAM_BYTES - i) : count, + -EFAULT); + + spin_lock_irq (&rtc_lock); + + if (!nvram_check_checksum_int()) + goto checksum_err; + + for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp) + nvram_write_int (*tmp, i); - for( ; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp ) { - get_user( c, tmp ); - nvram_write_int( c, i ); - } nvram_set_checksum_int(); + + spin_unlock_irq (&rtc_lock); + *ppos = i; - restore_flags(flags); - return( tmp - buf ); + return (tmp - contents); + +checksum_err: + spin_unlock_irq (&rtc_lock); + return -EIO; } static int nvram_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg ) { - unsigned long flags; int i; switch( cmd ) { @@ -293,14 +299,13 @@ static int nvram_ioctl( struct inode *inode, struct file *file, if (!capable(CAP_SYS_ADMIN)) return( -EACCES ); - save_flags(flags); - cli(); + spin_lock_irq (&rtc_lock); for( i = 0; i < NVRAM_BYTES; ++i ) nvram_write_int( 0, i ); nvram_set_checksum_int(); - restore_flags(flags); + spin_unlock_irq (&rtc_lock); return( 0 ); case NVRAM_SETCKS: /* just set checksum, contents unchanged @@ -309,10 +314,9 @@ static int nvram_ioctl( struct inode *inode, struct file *file, if (!capable(CAP_SYS_ADMIN)) return( -EACCES ); - save_flags(flags); - cli(); + spin_lock_irq (&rtc_lock); nvram_set_checksum_int(); - restore_flags(flags); + spin_unlock_irq (&rtc_lock); return( 0 ); default: @@ -355,16 +359,14 @@ static int nvram_read_proc( char *buffer, char **start, off_t offset, static int nvram_read_proc( char *buffer, char **start, off_t offset, int size, int *eof, void *data ) { - unsigned long flags; unsigned char contents[NVRAM_BYTES]; int i, len = 0; off_t begin = 0; - - save_flags(flags); - cli(); + + spin_lock_irq (&rtc_lock); for( i = 0; i < NVRAM_BYTES; ++i ) contents[i] = nvram_read_int( i ); - restore_flags(flags); + spin_unlock_irq (&rtc_lock); *eof = mach_proc_infos( contents, buffer, &len, &begin, offset, size ); @@ -475,15 +477,13 @@ static char *gfx_types[] = { static int pc_proc_infos( unsigned char *nvram, char *buffer, int *len, off_t *begin, off_t offset, int size ) { - unsigned long flags; int checksum; int type; - save_flags(flags); - cli(); + spin_lock_irq (&rtc_lock); checksum = nvram_check_checksum_int(); - restore_flags(flags); - + spin_unlock_irq (&rtc_lock); + PRINT_PROC( "Checksum status: %svalid\n", checksum ? "" : "not " ); PRINT_PROC( "# floppies : %d\n", diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index f0b95fd0f..0bcb9eddc 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1115,26 +1115,26 @@ static int __init i2c_init(void) extern int i2c_dev_init(void); #endif #ifdef CONFIG_I2C_ALGOBIT - extern int algo_bit_init(void); + extern int i2c_algo_bit_init(void); #endif #ifdef CONFIG_I2C_BITLP - extern int bitlp_init(void); + extern int i2c_bitlp_init(void); #endif #ifdef CONFIG_I2C_BITELV - extern int bitelv_init(void); + extern int i2c_bitelv_init(void); #endif #ifdef CONFIG_I2C_BITVELLE - extern int bitvelle_init(void); + extern int i2c_bitvelle_init(void); #endif #ifdef CONFIG_I2C_BITVIA - extern int bitvia_init(void); + extern int i2c_bitvia_init(void); #endif #ifdef CONFIG_I2C_ALGOPCF - extern int algo_pcf_init(void); + extern int i2c_algo_pcf_init(void); #endif #ifdef CONFIG_I2C_PCFISA - extern int pcfisa_init(void); + extern int i2c_pcfisa_init(void); #endif /* This is needed for automatic patch generation: sensors code starts here */ diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 43838c6dd..88b3f8e4a 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -2858,13 +2858,16 @@ int __init ide_setup (char *s) const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1); const char max_hwif = '0' + (MAX_HWIFS - 1); + + if (strncmp(s,"hd",2) == 0 && s[2] == '=') /* hd= is for hd.c */ + return 0; /* driver and not us */ + if (strncmp(s,"ide",3) && strncmp(s,"idebus",6) && #ifdef CONFIG_BLK_DEV_VIA82CXXX strncmp(s,"splitfifo",9) && #endif /* CONFIG_BLK_DEV_VIA82CXXX */ - strncmp(s,"hdxlun",6) && - (strncmp(s,"hd",2) && s[2] != '=')) + strncmp(s,"hd",2)) /* hdx= & hdxlun= */ return 0; printk("ide_setup: %s", s); diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index c294323d1..7ed3785ff 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -1,8 +1,10 @@ /* - * $Id: dmascc.c,v 1.3 1998/09/07 04:41:56 kudielka Exp $ + * $Id: dmascc.c,v 1.27 2000/06/01 14:46:23 oe1kib Exp $ * * Driver for high-speed SCC boards (those with DMA support) - * Copyright (C) 1997 Klaus Kudielka + * Copyright (C) 1997-2000 Klaus Kudielka + * + * S5SCC/DMA support by Janko Koleznik S52HI * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,20 +19,22 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ #include <linux/module.h> #include <linux/delay.h> -#include <linux/dmascc.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in.h> +#include <linux/init.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/netdevice.h> +#include <linux/rtnetlink.h> #include <linux/sockios.h> #include <linux/tqueue.h> #include <linux/version.h> @@ -40,68 +44,31 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/segment.h> +#include <asm/uaccess.h> #include <net/ax25.h> #include "z8530.h" -/* Linux 2.0 compatibility */ - -#if LINUX_VERSION_CODE < 0x20100 - - -#define __init -#define __initdata -#define __initfunc(x) x - -#define MODULE_AUTHOR(x) -#define MODULE_DESCRIPTION(x) -#define MODULE_PARM(x,y) - -#define copy_to_user(x,y,z) memcpy_tofs(x,y,z) -#define copy_from_user(x,y,z) memcpy_fromfs(x,y,z) -#define test_and_set_bit(x,y) set_bit(x,y) -#define register_netdevice(x) register_netdev(x) -#define unregister_netdevice(x) unregister_netdev(x) -#define dev_kfree_skb(x) dev_kfree_skb(x,FREE_WRITE) -#define SET_DEV_INIT(x) (x=dmascc_dev_init) - -#define SHDLCE 0x01 /* WR15 */ - -#define AUTOEOM 0x02 /* WR7' */ -#define RXFIFOH 0x08 -#define TXFIFOE 0x20 - -static int dmascc_dev_init(struct net_device *dev) -{ - return 0; -} - -static void dev_init_buffers(struct net_device *dev) -{ - int i; - - for (i = 0; i < DEV_NUMBUFFS; i++) - skb_queue_head_init(&dev->buffs[i]); -} - - -#else - - -#include <linux/init.h> -#include <asm/uaccess.h> - -#define SET_DEV_INIT(x) - +/* Linux 2.2 and 2.3 compatibility */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,14) +#define net_device device +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,43) +#define netif_start_queue(dev) { dev->tbusy = 0; } +#define netif_stop_queue(dev) { dev->tbusy = 1; } +#define netif_wake_queue(dev) { dev->tbusy = 0; mark_bh(NET_BH); } +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,47) +#define netif_running(dev) (dev->flags & IFF_UP) #endif /* Number of buffers per channel */ -#define NUM_TX_BUF 2 /* NUM_TX_BUF >= 1 (2 recommended) */ -#define NUM_RX_BUF 2 /* NUM_RX_BUF >= 1 (2 recommended) */ -#define BUF_SIZE 2016 +#define NUM_TX_BUF 2 /* NUM_TX_BUF >= 1 (min. 2 recommended) */ +#define NUM_RX_BUF 6 /* NUM_RX_BUF >= 1 (min. 2 recommended) */ +#define BUF_SIZE 1576 /* BUF_SIZE >= mtu + hard_header_len */ /* Cards supported */ @@ -112,13 +79,18 @@ static void dev_init_buffers(struct net_device *dev) 0, 8, 3686400, 7372800 } #define HW_TWIN { "Gracilis PackeTwin", 0x200, 0x10, 0x10, 32, \ 0, 4, 6144000, 6144000 } +#define HW_S5 { "S5SCC/DMA", 0x200, 0x10, 0x10, 32, \ + 0, 8, 4915200, 9830400 } + +#define HARDWARE { HW_PI, HW_PI2, HW_TWIN, HW_S5 } -#define HARDWARE { HW_PI, HW_PI2, HW_TWIN } +#define TMR_0_HZ 25600 /* Frequency of timer 0 */ #define TYPE_PI 0 #define TYPE_PI2 1 #define TYPE_TWIN 2 -#define NUM_TYPES 3 +#define TYPE_S5 3 +#define NUM_TYPES 4 #define MAX_NUM_DEVS 32 @@ -188,16 +160,44 @@ static void dev_init_buffers(struct net_device *dev) /* Status values */ -/* tx_state */ -#define TX_IDLE 0 -#define TX_OFF 1 -#define TX_TXDELAY 2 -#define TX_ACTIVE 3 -#define TX_SQDELAY 4 +#define IDLE 0 +#define TX_HEAD 1 +#define TX_DATA 2 +#define TX_PAUSE 3 +#define TX_TAIL 4 +#define RTS_OFF 5 +#define WAIT 6 +#define DCD_ON 7 +#define RX_ON 8 +#define DCD_OFF 9 + + +/* Ioctls */ + +#define SIOCGSCCPARAM SIOCDEVPRIVATE +#define SIOCSSCCPARAM (SIOCDEVPRIVATE+1) /* Data types */ +struct scc_param { + int pclk_hz; /* frequency of BRG input (don't change) */ + int brg_tc; /* BRG terminal count; BRG disabled if < 0 */ + int nrzi; /* 0 (nrz), 1 (nrzi) */ + int clocks; /* see dmascc_cfg documentation */ + int txdelay; /* [1/TMR_0_HZ] */ + int txtimeout; /* [1/HZ] */ + int txtail; /* [1/TMR_0_HZ] */ + int waittime; /* [1/TMR_0_HZ] */ + int slottime; /* [1/TMR_0_HZ] */ + int persist; /* 1 ... 256 */ + int dma; /* -1 (disable), 0, 1, 3 */ + int txpause; /* [1/TMR_0_HZ] */ + int rtsoff; /* [1/TMR_0_HZ] */ + int dcdon; /* [1/TMR_0_HZ] */ + int dcdoff; /* [1/TMR_0_HZ] */ +}; + struct scc_hardware { char *name; int io_region; @@ -211,10 +211,17 @@ struct scc_hardware { }; struct scc_priv { - struct enet_statistics stats; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) + char name[IFNAMSIZ]; +#endif + int type; + int chip; + struct net_device *dev; struct scc_info *info; + struct net_device_stats stats; int channel; - int cmd, data, tmr; + int card_base, scc_cmd, scc_data; + int tmr_cnt, tmr_ctrl, tmr_mode; struct scc_param param; char rx_buf[NUM_RX_BUF][BUF_SIZE]; int rx_len[NUM_RX_BUF]; @@ -226,17 +233,13 @@ struct scc_priv { int tx_len[NUM_TX_BUF]; int tx_ptr; int tx_head, tx_tail, tx_count; - int tx_sem, tx_state; + int state; unsigned long tx_start; - int status; + int rr0; }; struct scc_info { - int type; - int chip; - int open; - int scc_base; - int tmr_base; + int irq_used; int twin_serial_cfg; struct net_device dev[2]; struct scc_priv priv[2]; @@ -247,25 +250,33 @@ struct scc_info { /* Function declarations */ int dmascc_init(void) __init; -static int setup_adapter(int io, int h, int n) __init; +static int setup_adapter(int card_base, int type, int n) __init; + +static void write_scc(struct scc_priv *priv, int reg, int val); +static void write_scc_data(struct scc_priv *priv, int val, int fast); +static int read_scc(struct scc_priv *priv, int reg); +static int read_scc_data(struct scc_priv *priv); -static inline void write_scc(int ctl, int reg, int val); -static inline int read_scc(int ctl, int reg); static int scc_open(struct net_device *dev); static int scc_close(struct net_device *dev); static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static int scc_send_packet(struct sk_buff *skb, struct net_device *dev); -static struct enet_statistics *scc_get_stats(struct net_device *dev); +static struct net_device_stats *scc_get_stats(struct net_device *dev); static int scc_set_mac_address(struct net_device *dev, void *sa); + static void scc_isr(int irq, void *dev_id, struct pt_regs * regs); static inline void z8530_isr(struct scc_info *info); -static void rx_isr(struct net_device *dev); -static void special_condition(struct net_device *dev, int rc); +static void rx_isr(struct scc_priv *priv); +static void special_condition(struct scc_priv *priv, int rc); static void rx_bh(void *arg); -static void tx_isr(struct net_device *dev); -static void es_isr(struct net_device *dev); -static void tm_isr(struct net_device *dev); -static inline void delay(struct net_device *dev, int t); +static void tx_isr(struct scc_priv *priv); +static void es_isr(struct scc_priv *priv); +static void tm_isr(struct scc_priv *priv); + +static inline void tx_on(struct scc_priv *priv); +static inline void rx_on(struct scc_priv *priv); +static inline void rx_off(struct scc_priv *priv); +static void start_timer(struct scc_priv *priv, int t, int r15); static inline unsigned char random(void); @@ -287,7 +298,6 @@ static struct scc_info *first = NULL; static unsigned long rand; - /* Module functions */ #ifdef MODULE @@ -298,14 +308,12 @@ MODULE_DESCRIPTION("Driver for high-speed SCC boards"); MODULE_PARM(io, "1-" __MODULE_STRING(MAX_NUM_DEVS) "i"); -int init_module(void) -{ +int init_module(void) { return dmascc_init(); } -void cleanup_module(void) -{ +void cleanup_module(void) { int i; struct scc_info *info; @@ -315,15 +323,17 @@ void cleanup_module(void) /* Unregister devices */ for (i = 0; i < 2; i++) { if (info->dev[i].name) + rtnl_lock(); unregister_netdevice(&info->dev[i]); + rtnl_unlock(); } /* Reset board */ - if (info->type == TYPE_TWIN) - outb_p(0, info->dev[0].base_addr + TWIN_SERIAL_CFG); - write_scc(info->priv[0].cmd, R9, FHWRES); + if (info->priv[0].type == TYPE_TWIN) + outb(0, info->dev[0].base_addr + TWIN_SERIAL_CFG); + write_scc(&info->priv[0], R9, FHWRES); release_region(info->dev[0].base_addr, - hw[info->type].io_size); + hw[info->priv[0].type].io_size); /* Free memory */ first = info->next; @@ -335,8 +345,7 @@ void cleanup_module(void) #else -void __init dmascc_setup(char *str, int *ints) -{ +void __init dmascc_setup(char *str, int *ints) { int i; for (i = 0; i < MAX_NUM_DEVS && i < ints[0]; i++) @@ -349,8 +358,7 @@ void __init dmascc_setup(char *str, int *ints) /* Initialization functions */ -int __init dmascc_init(void) -{ +int __init dmascc_init(void) { int h, i, j, n; int base[MAX_NUM_DEVS], tcmd[MAX_NUM_DEVS], t0[MAX_NUM_DEVS], t1[MAX_NUM_DEVS]; @@ -402,18 +410,18 @@ int __init dmascc_init(void) for (i = 0; i < hw[h].num_devs; i++) if (base[i]) { /* Timer 0: LSB+MSB, Mode 3, TMR_0_HZ */ - outb_p(0x36, tcmd[i]); - outb_p((hw[h].tmr_hz/TMR_0_HZ) & 0xFF, t0[i]); - outb_p((hw[h].tmr_hz/TMR_0_HZ) >> 8, t0[i]); + outb(0x36, tcmd[i]); + outb((hw[h].tmr_hz/TMR_0_HZ) & 0xFF, t0[i]); + outb((hw[h].tmr_hz/TMR_0_HZ) >> 8, t0[i]); /* Timer 1: LSB+MSB, Mode 0, HZ/10 */ - outb_p(0x70, tcmd[i]); - outb_p((TMR_0_HZ/HZ*10) & 0xFF, t1[i]); - outb_p((TMR_0_HZ/HZ*10) >> 8, t1[i]); + outb(0x70, tcmd[i]); + outb((TMR_0_HZ/HZ*10) & 0xFF, t1[i]); + outb((TMR_0_HZ/HZ*10) >> 8, t1[i]); start[i] = jiffies; delay[i] = 0; counting[i] = 1; /* Timer 2: LSB+MSB, Mode 0 */ - outb_p(0xb0, tcmd[i]); + outb(0xb0, tcmd[i]); } time = jiffies; /* Wait until counter registers are loaded */ @@ -424,8 +432,8 @@ int __init dmascc_init(void) for (i = 0; i < hw[h].num_devs; i++) if (base[i] && counting[i]) { /* Read back Timer 1: latch; read LSB; read MSB */ - outb_p(0x40, tcmd[i]); - t_val = inb_p(t1[i]) + (inb_p(t1[i]) << 8); + outb(0x40, tcmd[i]); + t_val = inb(t1[i]) + (inb(t1[i]) << 8); /* Also check whether counter did wrap */ if (t_val == 0 || t_val > TMR_0_HZ/HZ*10) counting[i] = 0; delay[i] = jiffies - start[i]; @@ -452,31 +460,46 @@ int __init dmascc_init(void) return -EIO; } -int __init setup_adapter(int io, int h, int n) -{ + +int __init setup_adapter(int card_base, int type, int n) { int i, irq, chip; struct scc_info *info; struct net_device *dev; struct scc_priv *priv; unsigned long time; unsigned int irqs; - int tmr = io + hw[h].tmr_offset; - int scc = io + hw[h].scc_offset; - int cmd = scc + SCCA_CMD; + int tmr_base = card_base + hw[type].tmr_offset; + int scc_base = card_base + hw[type].scc_offset; char *chipnames[] = CHIPNAMES; - /* Reset 8530 */ - write_scc(cmd, R9, FHWRES | MIE | NV); + /* Allocate memory */ + info = kmalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); + if (!info) { + printk("dmascc: could not allocate memory for %s at %#3x\n", + hw[type].name, card_base); + return -1; + } + + /* Initialize what is necessary for write_scc and write_scc_data */ + memset(info, 0, sizeof(struct scc_info)); + priv = &info->priv[0]; + priv->type = type; + priv->card_base = card_base; + priv->scc_cmd = scc_base + SCCA_CMD; + priv->scc_data = scc_base + SCCA_DATA; + + /* Reset SCC */ + write_scc(priv, R9, FHWRES | MIE | NV); /* Determine type of chip by enabling SDLC/HDLC enhancements */ - write_scc(cmd, R15, SHDLCE); - if (!read_scc(cmd, R15)) { + write_scc(priv, R15, SHDLCE); + if (!read_scc(priv, R15)) { /* WR7' not present. This is an ordinary Z8530 SCC. */ chip = Z8530; } else { /* Put one character in TX FIFO */ - write_scc(cmd, R8, 0); - if (read_scc(cmd, R0) & Tx_BUF_EMP) { + write_scc_data(priv, 0, 0); + if (read_scc(priv, R0) & Tx_BUF_EMP) { /* TX FIFO not full. This is a Z85230 ESCC with a 4-byte FIFO. */ chip = Z85230; } else { @@ -484,93 +507,76 @@ int __init setup_adapter(int io, int h, int n) chip = Z85C30; } } - write_scc(cmd, R15, 0); + write_scc(priv, R15, 0); /* Start IRQ auto-detection */ sti(); irqs = probe_irq_on(); /* Enable interrupts */ - switch (h) { - case TYPE_PI: - case TYPE_PI2: - outb_p(0, io + PI_DREQ_MASK); - write_scc(cmd, R15, CTSIE); - write_scc(cmd, R0, RES_EXT_INT); - write_scc(cmd, R1, EXT_INT_ENAB); - break; - case TYPE_TWIN: - outb_p(0, io + TWIN_DMA_CFG); - inb_p(io + TWIN_CLR_TMR1); - inb_p(io + TWIN_CLR_TMR2); - outb_p(TWIN_EI, io + TWIN_SERIAL_CFG); - break; + if (type == TYPE_TWIN) { + outb(0, card_base + TWIN_DMA_CFG); + inb(card_base + TWIN_CLR_TMR1); + inb(card_base + TWIN_CLR_TMR2); + outb((info->twin_serial_cfg = TWIN_EI), card_base + TWIN_SERIAL_CFG); + } else { + write_scc(priv, R15, CTSIE); + write_scc(priv, R0, RES_EXT_INT); + write_scc(priv, R1, EXT_INT_ENAB); } /* Start timer */ - outb_p(1, tmr + TMR_CNT1); - outb_p(0, tmr + TMR_CNT1); + outb(1, tmr_base + TMR_CNT1); + outb(0, tmr_base + TMR_CNT1); + /* Wait and detect IRQ */ time = jiffies; while (jiffies - time < 2 + HZ / TMR_0_HZ); irq = probe_irq_off(irqs); /* Clear pending interrupt, disable interrupts */ - switch (h) { - case TYPE_PI: - case TYPE_PI2: - write_scc(cmd, R1, 0); - write_scc(cmd, R15, 0); - write_scc(cmd, R0, RES_EXT_INT); - break; - case TYPE_TWIN: - inb_p(io + TWIN_CLR_TMR1); - outb_p(0, io + TWIN_SERIAL_CFG); - break; + if (type == TYPE_TWIN) { + inb(card_base + TWIN_CLR_TMR1); + } else { + write_scc(priv, R1, 0); + write_scc(priv, R15, 0); + write_scc(priv, R0, RES_EXT_INT); } if (irq <= 0) { printk("dmascc: could not find irq of %s at %#3x (irq=%d)\n", - hw[h].name, io, irq); - return -1; - } - - /* Allocate memory */ - info = kmalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); - if (!info) { - printk("dmascc: could not allocate memory for %s at %#3x\n", - hw[h].name, io); + hw[type].name, card_base, irq); + kfree_s(info, sizeof(struct scc_info)); return -1; } /* Set up data structures */ - memset(info, 0, sizeof(struct scc_info)); - info->type = h; - info->chip = chip; - info->scc_base = io + hw[h].scc_offset; - info->tmr_base = io + hw[h].tmr_offset; - info->twin_serial_cfg = 0; for (i = 0; i < 2; i++) { dev = &info->dev[i]; priv = &info->priv[i]; + priv->type = type; + priv->chip = chip; + priv->dev = dev; priv->info = info; priv->channel = i; - priv->cmd = info->scc_base + (i ? SCCB_CMD : SCCA_CMD); - priv->data = info->scc_base + (i ? SCCB_DATA : SCCA_DATA); - priv->tmr = info->tmr_base + (i ? TMR_CNT2 : TMR_CNT1); - priv->param.pclk_hz = hw[h].pclk_hz; + priv->card_base = card_base; + priv->scc_cmd = scc_base + (i ? SCCB_CMD : SCCA_CMD); + priv->scc_data = scc_base + (i ? SCCB_DATA : SCCA_DATA); + priv->tmr_cnt = tmr_base + (i ? TMR_CNT2 : TMR_CNT1); + priv->tmr_ctrl = tmr_base + TMR_CTRL; + priv->tmr_mode = i ? 0xb0 : 0x70; + priv->param.pclk_hz = hw[type].pclk_hz; priv->param.brg_tc = -1; priv->param.clocks = TCTRxCP | RCRTxCP; - priv->param.txdelay = TMR_0_HZ * 10 / 1000; - priv->param.txtime = HZ * 3; - priv->param.sqdelay = TMR_0_HZ * 1 / 1000; - priv->param.slottime = TMR_0_HZ * 10 / 1000; - priv->param.waittime = TMR_0_HZ * 100 / 1000; - priv->param.persist = 32; + priv->param.persist = 256; + priv->param.dma = -1; priv->rx_task.routine = rx_bh; - priv->rx_task.data = dev; + priv->rx_task.data = priv; dev->priv = priv; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) + if (sizeof(dev->name) == sizeof(char *)) dev->name = priv->name; +#endif sprintf(dev->name, "dmascc%i", 2*n+i); - dev->base_addr = io; + dev->base_addr = card_base; dev->irq = irq; dev->open = scc_open; dev->stop = scc_close; @@ -580,7 +586,6 @@ int __init setup_adapter(int io, int h, int n) dev->hard_header = ax25_encapsulate; dev->rebuild_header = ax25_rebuild_header; dev->set_mac_address = scc_set_mac_address; - SET_DEV_INIT(dev->init); dev->type = ARPHRD_AX25; dev->hard_header_len = 73; dev->mtu = 1500; @@ -589,203 +594,298 @@ int __init setup_adapter(int io, int h, int n) memcpy(dev->broadcast, ax25_broadcast, 7); memcpy(dev->dev_addr, ax25_test, 7); dev_init_buffers(dev); + rtnl_lock(); if (register_netdevice(dev)) { printk("dmascc: could not register %s\n", dev->name); } + rtnl_unlock(); } - request_region(io, hw[h].io_size, "dmascc"); + request_region(card_base, hw[type].io_size, "dmascc"); info->next = first; first = info; - printk("dmascc: found %s (%s) at %#3x, irq %d\n", hw[h].name, - chipnames[chip], io, irq); + printk("dmascc: found %s (%s) at %#3x, irq %d\n", hw[type].name, + chipnames[chip], card_base, irq); return 0; } /* Driver functions */ -static inline void write_scc(int ctl, int reg, int val) -{ - outb_p(reg, ctl); - outb_p(val, ctl); +static void write_scc(struct scc_priv *priv, int reg, int val) { + unsigned long flags; + switch (priv->type) { + case TYPE_S5: + if (reg) outb(reg, priv->scc_cmd); + outb(val, priv->scc_cmd); + return; + case TYPE_TWIN: + if (reg) outb_p(reg, priv->scc_cmd); + outb_p(val, priv->scc_cmd); + return; + default: + save_flags(flags); + cli(); + outb_p(0, priv->card_base + PI_DREQ_MASK); + if (reg) outb_p(reg, priv->scc_cmd); + outb_p(val, priv->scc_cmd); + outb(1, priv->card_base + PI_DREQ_MASK); + restore_flags(flags); + return; + } } -static inline int read_scc(int ctl, int reg) -{ - outb_p(reg, ctl); - return inb_p(ctl); +static void write_scc_data(struct scc_priv *priv, int val, int fast) { + unsigned long flags; + switch (priv->type) { + case TYPE_S5: + outb(val, priv->scc_data); + return; + case TYPE_TWIN: + outb_p(val, priv->scc_data); + return; + default: + if (fast) outb_p(val, priv->scc_data); + else { + save_flags(flags); + cli(); + outb_p(0, priv->card_base + PI_DREQ_MASK); + outb_p(val, priv->scc_data); + outb(1, priv->card_base + PI_DREQ_MASK); + restore_flags(flags); + } + return; + } } -static int scc_open(struct net_device *dev) -{ +static int read_scc(struct scc_priv *priv, int reg) { + int rc; + unsigned long flags; + switch (priv->type) { + case TYPE_S5: + if (reg) outb(reg, priv->scc_cmd); + return inb(priv->scc_cmd); + case TYPE_TWIN: + if (reg) outb_p(reg, priv->scc_cmd); + return inb_p(priv->scc_cmd); + default: + save_flags(flags); + cli(); + outb_p(0, priv->card_base + PI_DREQ_MASK); + if (reg) outb_p(reg, priv->scc_cmd); + rc = inb_p(priv->scc_cmd); + outb(1, priv->card_base + PI_DREQ_MASK); + restore_flags(flags); + return rc; + } +} + + +static int read_scc_data(struct scc_priv *priv) { + int rc; + unsigned long flags; + switch (priv->type) { + case TYPE_S5: + return inb(priv->scc_data); + case TYPE_TWIN: + return inb_p(priv->scc_data); + default: + save_flags(flags); + cli(); + outb_p(0, priv->card_base + PI_DREQ_MASK); + rc = inb_p(priv->scc_data); + outb(1, priv->card_base + PI_DREQ_MASK); + restore_flags(flags); + return rc; + } +} + + +static int scc_open(struct net_device *dev) { struct scc_priv *priv = dev->priv; struct scc_info *info = priv->info; - int io = dev->base_addr; - int cmd = priv->cmd; + int card_base = priv->card_base; + + MOD_INC_USE_COUNT; /* Request IRQ if not already used by other channel */ - if (!info->open) { - if (request_irq(dev->irq, scc_isr, SA_INTERRUPT, "dmascc", info)) + if (!info->irq_used) { + if (request_irq(dev->irq, scc_isr, 0, "dmascc", info)) { + MOD_DEC_USE_COUNT; return -EAGAIN; + } } + info->irq_used++; /* Request DMA if required */ - if (dev->dma && request_dma(dev->dma, "dmascc")) { - if (!info->open) free_irq(dev->irq, info); - return -EAGAIN; + if (priv->param.dma >= 0) { + if (request_dma(priv->param.dma, "dmascc")) { + if (--info->irq_used == 0) free_irq(dev->irq, info); + MOD_DEC_USE_COUNT; + return -EAGAIN; + } else { + unsigned long flags = claim_dma_lock(); + clear_dma_ff(priv->param.dma); + release_dma_lock(flags); + } } /* Initialize local variables */ priv->rx_ptr = 0; priv->rx_over = 0; priv->rx_head = priv->rx_tail = priv->rx_count = 0; - priv->tx_state = TX_IDLE; + priv->state = IDLE; priv->tx_head = priv->tx_tail = priv->tx_count = 0; priv->tx_ptr = 0; - priv->tx_sem = 0; /* Reset channel */ - write_scc(cmd, R9, (priv->channel ? CHRB : CHRA) | MIE | NV); + write_scc(priv, R9, (priv->channel ? CHRB : CHRA) | MIE | NV); /* X1 clock, SDLC mode */ - write_scc(cmd, R4, SDLC | X1CLK); + write_scc(priv, R4, SDLC | X1CLK); /* DMA */ - write_scc(cmd, R1, EXT_INT_ENAB | WT_FN_RDYFN); + write_scc(priv, R1, EXT_INT_ENAB | WT_FN_RDYFN); /* 8 bit RX char, RX disable */ - write_scc(cmd, R3, Rx8); + write_scc(priv, R3, Rx8); /* 8 bit TX char, TX disable */ - write_scc(cmd, R5, Tx8); + write_scc(priv, R5, Tx8); /* SDLC address field */ - write_scc(cmd, R6, 0); + write_scc(priv, R6, 0); /* SDLC flag */ - write_scc(cmd, R7, FLAG); - switch (info->chip) { + write_scc(priv, R7, FLAG); + switch (priv->chip) { case Z85C30: /* Select WR7' */ - write_scc(cmd, R15, SHDLCE); + write_scc(priv, R15, SHDLCE); /* Auto EOM reset */ - write_scc(cmd, R7, AUTOEOM); - write_scc(cmd, R15, 0); + write_scc(priv, R7, AUTOEOM); + write_scc(priv, R15, 0); break; case Z85230: /* Select WR7' */ - write_scc(cmd, R15, SHDLCE); - /* RX FIFO half full (interrupt only), Auto EOM reset, - TX FIFO empty (DMA only) */ - write_scc(cmd, R7, AUTOEOM | (dev->dma ? TXFIFOE : RXFIFOH)); - write_scc(cmd, R15, 0); + write_scc(priv, R15, SHDLCE); + /* The following bits are set (see 2.5.2.1): + - Automatic EOM reset + - Interrupt request if RX FIFO is half full + This bit should be ignored in DMA mode (according to the + documentation), but actually isn't. The receiver doesn't work if + it is set. Thus, we have to clear it in DMA mode. + - Interrupt/DMA request if TX FIFO is completely empty + a) If set, the ESCC behaves as if it had no TX FIFO (Z85C30 + compatibility). + b) If cleared, DMA requests may follow each other very quickly, + filling up the TX FIFO. + Advantage: TX works even in case of high bus latency. + Disadvantage: Edge-triggered DMA request circuitry may miss + a request. No more data is delivered, resulting + in a TX FIFO underrun. + Both PI2 and S5SCC/DMA seem to work fine with TXFIFOE cleared. + The PackeTwin doesn't. I don't know about the PI, but let's + assume it behaves like the PI2. + */ + if (priv->param.dma >= 0) { + if (priv->type == TYPE_TWIN) write_scc(priv, R7, AUTOEOM | TXFIFOE); + else write_scc(priv, R7, AUTOEOM); + } else { + write_scc(priv, R7, AUTOEOM | RXFIFOH); + } + write_scc(priv, R15, 0); break; } /* Preset CRC, NRZ(I) encoding */ - write_scc(cmd, R10, CRCPS | (priv->param.nrzi ? NRZI : NRZ)); + write_scc(priv, R10, CRCPS | (priv->param.nrzi ? NRZI : NRZ)); /* Configure baud rate generator */ if (priv->param.brg_tc >= 0) { /* Program BR generator */ - write_scc(cmd, R12, priv->param.brg_tc & 0xFF); - write_scc(cmd, R13, (priv->param.brg_tc>>8) & 0xFF); + write_scc(priv, R12, priv->param.brg_tc & 0xFF); + write_scc(priv, R13, (priv->param.brg_tc>>8) & 0xFF); /* BRG source = SYS CLK; enable BRG; DTR REQ function (required by PackeTwin, not connected on the PI2); set DPLL source to BRG */ - write_scc(cmd, R14, SSBR | DTRREQ | BRSRC | BRENABL); + write_scc(priv, R14, SSBR | DTRREQ | BRSRC | BRENABL); /* Enable DPLL */ - write_scc(cmd, R14, SEARCH | DTRREQ | BRSRC | BRENABL); + write_scc(priv, R14, SEARCH | DTRREQ | BRSRC | BRENABL); } else { /* Disable BR generator */ - write_scc(cmd, R14, DTRREQ | BRSRC); + write_scc(priv, R14, DTRREQ | BRSRC); } /* Configure clocks */ - if (info->type == TYPE_TWIN) { + if (priv->type == TYPE_TWIN) { /* Disable external TX clock receiver */ - outb_p((info->twin_serial_cfg &= + outb((info->twin_serial_cfg &= ~(priv->channel ? TWIN_EXTCLKB : TWIN_EXTCLKA)), - io + TWIN_SERIAL_CFG); + card_base + TWIN_SERIAL_CFG); } - write_scc(cmd, R11, priv->param.clocks); - if ((info->type == TYPE_TWIN) && !(priv->param.clocks & TRxCOI)) { + write_scc(priv, R11, priv->param.clocks); + if ((priv->type == TYPE_TWIN) && !(priv->param.clocks & TRxCOI)) { /* Enable external TX clock receiver */ - outb_p((info->twin_serial_cfg |= + outb((info->twin_serial_cfg |= (priv->channel ? TWIN_EXTCLKB : TWIN_EXTCLKA)), - io + TWIN_SERIAL_CFG); + card_base + TWIN_SERIAL_CFG); } /* Configure PackeTwin */ - if (info->type == TYPE_TWIN) { + if (priv->type == TYPE_TWIN) { /* Assert DTR, enable interrupts */ - outb_p((info->twin_serial_cfg |= TWIN_EI | + outb((info->twin_serial_cfg |= TWIN_EI | (priv->channel ? TWIN_DTRB_ON : TWIN_DTRA_ON)), - io + TWIN_SERIAL_CFG); + card_base + TWIN_SERIAL_CFG); } /* Read current status */ - priv->status = read_scc(cmd, R0); - /* Enable SYNC, DCD, and CTS interrupts */ - write_scc(cmd, R15, DCDIE | CTSIE | SYNCIE); - - /* Configure PI2 DMA */ - if (info->type <= TYPE_PI2) outb_p(1, io + PI_DREQ_MASK); + priv->rr0 = read_scc(priv, R0); + /* Enable DCD interrupt */ + write_scc(priv, R15, DCDIE); netif_start_queue(dev); - info->open++; - MOD_INC_USE_COUNT; return 0; } -static int scc_close(struct net_device *dev) -{ +static int scc_close(struct net_device *dev) { struct scc_priv *priv = dev->priv; struct scc_info *info = priv->info; - int io = dev->base_addr; - int cmd = priv->cmd; + int card_base = priv->card_base; netif_stop_queue(dev); - info->open--; - if (info->type == TYPE_TWIN) + if (priv->type == TYPE_TWIN) { /* Drop DTR */ - outb_p((info->twin_serial_cfg &= + outb((info->twin_serial_cfg &= (priv->channel ? ~TWIN_DTRB_ON : ~TWIN_DTRA_ON)), - io + TWIN_SERIAL_CFG); - - /* Reset channel, free DMA */ - write_scc(cmd, R9, (priv->channel ? CHRB : CHRA) | MIE | NV); - if (dev->dma) { - if (info->type == TYPE_TWIN) outb_p(0, io + TWIN_DMA_CFG); - free_dma(dev->dma); + card_base + TWIN_SERIAL_CFG); } - if (!info->open) { - if (info->type <= TYPE_PI2) outb_p(0, io + PI_DREQ_MASK); - free_irq(dev->irq, info); + /* Reset channel, free DMA and IRQ */ + write_scc(priv, R9, (priv->channel ? CHRB : CHRA) | MIE | NV); + if (priv->param.dma >= 0) { + if (priv->type == TYPE_TWIN) outb(0, card_base + TWIN_DMA_CFG); + free_dma(priv->param.dma); } + if (--info->irq_used == 0) free_irq(dev->irq, info); + MOD_DEC_USE_COUNT; return 0; } -static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - int rc; +static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct scc_priv *priv = dev->priv; switch (cmd) { case SIOCGSCCPARAM: - if(copy_to_user(ifr->ifr_data, &priv->param, sizeof(struct scc_param))) - return -EFAULT; + if (copy_to_user(ifr->ifr_data, &priv->param, sizeof(struct scc_param))) + return -EFAULT; return 0; case SIOCSSCCPARAM: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (netif_running(dev)) - return -EAGAIN; - if(copy_from_user(&priv->param, ifr->ifr_data, sizeof(struct scc_param))) - return -EFAULT; - dev->dma = priv->param.dma; + if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (netif_running(dev)) return -EAGAIN; + if (copy_from_user(&priv->param, ifr->ifr_data, sizeof(struct scc_param))) + return -EFAULT; return 0; default: return -EINVAL; @@ -793,165 +893,150 @@ static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } -static int scc_send_packet(struct sk_buff *skb, struct net_device *dev) -{ +static int scc_send_packet(struct sk_buff *skb, struct net_device *dev) { struct scc_priv *priv = dev->priv; - struct scc_info *info = priv->info; - int cmd = priv->cmd; unsigned long flags; int i; - /* Block a timer-based transmit from overlapping */ + /* Temporarily stop the scheduler feeding us packets */ netif_stop_queue(dev); - + /* Transfer data to DMA buffer */ i = priv->tx_head; memcpy(priv->tx_buf[i], skb->data+1, skb->len-1); priv->tx_len[i] = skb->len-1; + /* Clear interrupts while we touch our circular buffers */ save_flags(flags); cli(); - /* Set the busy flag if we just filled up the last buffer */ + /* Move the ring buffer's head */ priv->tx_head = (i + 1) % NUM_TX_BUF; priv->tx_count++; - if (priv->tx_count != NUM_TX_BUF) - netif_wake_queue(dev); + + /* If we just filled up the last buffer, leave queue stopped. + The higher layers must wait until we have a DMA buffer + to accept the data. */ + if (priv->tx_count < NUM_TX_BUF) netif_wake_queue(dev); /* Set new TX state */ - if (priv->tx_state == TX_IDLE) { + if (priv->state == IDLE) { /* Assert RTS, start timer */ - priv->tx_state = TX_TXDELAY; - if (info->type <= TYPE_PI2) outb_p(0, dev->base_addr + PI_DREQ_MASK); - write_scc(cmd, R5, TxCRC_ENAB | RTS | TxENAB | Tx8); - if (info->type <= TYPE_PI2) outb_p(1, dev->base_addr + PI_DREQ_MASK); + priv->state = TX_HEAD; priv->tx_start = jiffies; - delay(dev, priv->param.txdelay); + write_scc(priv, R5, TxCRC_ENAB | RTS | TxENAB | Tx8); + write_scc(priv, R15, 0); + start_timer(priv, priv->param.txdelay, 0); } + /* Turn interrupts back on and free buffer */ restore_flags(flags); - dev_kfree_skb(skb); - priv->tx_sem = 0; return 0; } -static struct enet_statistics *scc_get_stats(struct net_device *dev) -{ +static struct net_device_stats *scc_get_stats(struct net_device *dev) { struct scc_priv *priv = dev->priv; return &priv->stats; } -static int scc_set_mac_address(struct net_device *dev, void *sa) -{ +static int scc_set_mac_address(struct net_device *dev, void *sa) { memcpy(dev->dev_addr, ((struct sockaddr *)sa)->sa_data, dev->addr_len); return 0; } -static void scc_isr(int irq, void *dev_id, struct pt_regs * regs) -{ +static void scc_isr(int irq, void *dev_id, struct pt_regs * regs) { struct scc_info *info = dev_id; - int is, io = info->dev[0].base_addr; - /* We're a fast IRQ handler and are called with interrupts disabled */ + /* At this point interrupts are enabled, and the interrupt under service + is already acknowledged, but masked off. - /* IRQ sharing doesn't make sense due to ISA's edge-triggered - interrupts, hence it is safe to return if we have found and - processed a single device. */ - - /* Interrupt processing: We loop until we know that the IRQ line is + Interrupt processing: We loop until we know that the IRQ line is low. If another positive edge occurs afterwards during the ISR, another interrupt will be triggered by the interrupt controller - as soon as the IRQ level is enabled again (see asm/irq.h). */ - - switch (info->type) { - case TYPE_PI: - case TYPE_PI2: - outb_p(0, io + PI_DREQ_MASK); - z8530_isr(info); - outb_p(1, io + PI_DREQ_MASK); - return; - case TYPE_TWIN: - while ((is = ~inb_p(io + TWIN_INT_REG)) & + as soon as the IRQ level is enabled again (see asm/irq.h). + + Bottom-half handlers will be processed after scc_isr(). This is + important, since we only have small ringbuffers and want new data + to be fetched/delivered immediately. */ + + if (info->priv[0].type == TYPE_TWIN) { + int is, card_base = info->priv[0].card_base; + while ((is = ~inb(card_base + TWIN_INT_REG)) & TWIN_INT_MSK) { if (is & TWIN_SCC_MSK) { z8530_isr(info); } else if (is & TWIN_TMR1_MSK) { - inb_p(io + TWIN_CLR_TMR1); - tm_isr(&info->dev[0]); + inb(card_base + TWIN_CLR_TMR1); + tm_isr(&info->priv[0]); } else { - inb_p(io + TWIN_CLR_TMR2); - tm_isr(&info->dev[1]); + inb(card_base + TWIN_CLR_TMR2); + tm_isr(&info->priv[1]); } } - /* No interrupts pending from the PackeTwin */ - return; - } + } else z8530_isr(info); } -static inline void z8530_isr(struct scc_info *info) -{ - int is, a_cmd; - - a_cmd = info->scc_base + SCCA_CMD; +static inline void z8530_isr(struct scc_info *info) { + int is, i = 100; - while ((is = read_scc(a_cmd, R3))) { + while ((is = read_scc(&info->priv[0], R3)) && i--) { if (is & CHARxIP) { - rx_isr(&info->dev[0]); + rx_isr(&info->priv[0]); } else if (is & CHATxIP) { - tx_isr(&info->dev[0]); + tx_isr(&info->priv[0]); } else if (is & CHAEXT) { - es_isr(&info->dev[0]); + es_isr(&info->priv[0]); } else if (is & CHBRxIP) { - rx_isr(&info->dev[1]); + rx_isr(&info->priv[1]); } else if (is & CHBTxIP) { - tx_isr(&info->dev[1]); + tx_isr(&info->priv[1]); } else { - es_isr(&info->dev[1]); + es_isr(&info->priv[1]); } + write_scc(&info->priv[0], R0, RES_H_IUS); + i++; + } + if (i < 0) { + printk("dmascc: stuck in ISR with RR3=0x%02x.\n", is); } /* Ok, no interrupts pending from this 8530. The INT line should be inactive now. */ } -static void rx_isr(struct net_device *dev) -{ - struct scc_priv *priv = dev->priv; - int cmd = priv->cmd; - - if (dev->dma) { +static void rx_isr(struct scc_priv *priv) { + if (priv->param.dma >= 0) { /* Check special condition and perform error reset. See 2.4.7.5. */ - special_condition(dev, read_scc(cmd, R1)); - write_scc(cmd, R0, ERR_RES); + special_condition(priv, read_scc(priv, R1)); + write_scc(priv, R0, ERR_RES); } else { /* Check special condition for each character. Error reset not necessary. Same algorithm for SCC and ESCC. See 2.4.7.1 and 2.4.7.4. */ int rc; - while (read_scc(cmd, R0) & Rx_CH_AV) { - rc = read_scc(cmd, R1); + while (read_scc(priv, R0) & Rx_CH_AV) { + rc = read_scc(priv, R1); if (priv->rx_ptr < BUF_SIZE) - priv->rx_buf[priv->rx_head][priv->rx_ptr++] = read_scc(cmd, R8); + priv->rx_buf[priv->rx_head][priv->rx_ptr++] = + read_scc_data(priv); else { priv->rx_over = 2; - read_scc(cmd, R8); + read_scc_data(priv); } - special_condition(dev, rc); + special_condition(priv, rc); } } } -static void special_condition(struct net_device *dev, int rc) -{ - struct scc_priv *priv = dev->priv; - int cb, cmd = priv->cmd; +static void special_condition(struct scc_priv *priv, int rc) { + int cb; unsigned long flags; /* See Figure 2-15. Only overrun and EOF need to be checked. */ @@ -959,18 +1044,15 @@ static void special_condition(struct net_device *dev, int rc) if (rc & Rx_OVR) { /* Receiver overrun */ priv->rx_over = 1; - if (!dev->dma) write_scc(cmd, R0, ERR_RES); + if (priv->param.dma < 0) write_scc(priv, R0, ERR_RES); } else if (rc & END_FR) { /* End of frame. Get byte count */ - if (dev->dma) { - flags=claim_dma_lock(); - disable_dma(dev->dma); - clear_dma_ff(dev->dma); - cb = BUF_SIZE - get_dma_residue(dev->dma) - 2; - release_dma_lock(flags); - + if (priv->param.dma >= 0) { + flags = claim_dma_lock(); + cb = BUF_SIZE - get_dma_residue(priv->param.dma) - 2; + release_dma_lock(flags); } else { - cb = priv->rx_ptr - 2; + cb = priv->rx_ptr - 2; } if (priv->rx_over) { /* We had an overrun */ @@ -980,36 +1062,32 @@ static void special_condition(struct net_device *dev, int rc) priv->rx_over = 0; } else if (rc & CRC_ERR) { /* Count invalid CRC only if packet length >= minimum */ - if (cb >= 8) { + if (cb >= 15) { priv->stats.rx_errors++; priv->stats.rx_crc_errors++; } } else { - if (cb >= 8) { - /* Put good frame in FIFO */ - priv->rx_len[priv->rx_head] = cb; - priv->rx_head = (priv->rx_head + 1) % NUM_RX_BUF; - priv->rx_count++; - if (priv->rx_count == NUM_RX_BUF) { - /* Disable receiver if FIFO full */ - write_scc(cmd, R3, Rx8); + if (cb >= 15) { + if (priv->rx_count < NUM_RX_BUF - 1) { + /* Put good frame in FIFO */ + priv->rx_len[priv->rx_head] = cb; + priv->rx_head = (priv->rx_head + 1) % NUM_RX_BUF; + priv->rx_count++; + /* Mark bottom half handler */ + queue_task(&priv->rx_task, &tq_immediate); + mark_bh(IMMEDIATE_BH); + } else { priv->stats.rx_errors++; priv->stats.rx_over_errors++; } - /* Mark bottom half handler */ - queue_task(&priv->rx_task, &tq_immediate); - mark_bh(IMMEDIATE_BH); } } /* Get ready for new frame */ - if (dev->dma) { - - flags=claim_dma_lock(); - set_dma_addr(dev->dma, (int) priv->rx_buf[priv->rx_head]); - set_dma_count(dev->dma, BUF_SIZE); - enable_dma(dev->dma); + if (priv->param.dma >= 0) { + flags = claim_dma_lock(); + set_dma_addr(priv->param.dma, (int) priv->rx_buf[priv->rx_head]); + set_dma_count(priv->param.dma, BUF_SIZE); release_dma_lock(flags); - } else { priv->rx_ptr = 0; } @@ -1017,12 +1095,8 @@ static void special_condition(struct net_device *dev, int rc) } -static void rx_bh(void *arg) -{ - struct net_device *dev = arg; - struct scc_priv *priv = dev->priv; - struct scc_info *info = priv->info; - int cmd = priv->cmd; +static void rx_bh(void *arg) { + struct scc_priv *priv = arg; int i = priv->rx_tail; int cb; unsigned long flags; @@ -1045,20 +1119,15 @@ static void rx_bh(void *arg) data = skb_put(skb, cb+1); data[0] = 0; memcpy(&data[1], priv->rx_buf[i], cb); - skb->dev = dev; + skb->dev = priv->dev; skb->protocol = ntohs(ETH_P_AX25); skb->mac.raw = skb->data; netif_rx(skb); priv->stats.rx_packets++; + priv->stats.rx_bytes += cb; } save_flags(flags); cli(); - /* Enable receiver if RX buffers have been unavailable */ - if ((priv->rx_count == NUM_RX_BUF) && (priv->status & DCD)) { - if (info->type <= TYPE_PI2) outb_p(0, dev->base_addr + PI_DREQ_MASK); - write_scc(cmd, R3, RxENABLE | Rx8 | RxCRC_ENAB); - if (info->type <= TYPE_PI2) outb_p(1, dev->base_addr + PI_DREQ_MASK); - } /* Move tail */ priv->rx_tail = i = (i + 1) % NUM_RX_BUF; priv->rx_count--; @@ -1068,265 +1137,277 @@ static void rx_bh(void *arg) } -static void tx_isr(struct net_device *dev) -{ - struct scc_priv *priv = dev->priv; - int cmd = priv->cmd; +static void tx_isr(struct scc_priv *priv) { int i = priv->tx_tail, p = priv->tx_ptr; /* Suspend TX interrupts if we don't want to send anything. See Figure 2-22. */ if (p == priv->tx_len[i]) { - write_scc(cmd, R0, RES_Tx_P); + write_scc(priv, R0, RES_Tx_P); return; } /* Write characters */ - while ((read_scc(cmd, R0) & Tx_BUF_EMP) && p < priv->tx_len[i]) { - write_scc(cmd, R8, priv->tx_buf[i][p++]); + while ((read_scc(priv, R0) & Tx_BUF_EMP) && p < priv->tx_len[i]) { + write_scc_data(priv, priv->tx_buf[i][p++], 0); } - priv->tx_ptr = p; + /* Reset EOM latch of Z8530 */ + if (!priv->tx_ptr && p && priv->chip == Z8530) + write_scc(priv, R0, RES_EOM_L); + + priv->tx_ptr = p; } -static void es_isr(struct net_device *dev) -{ - struct scc_priv *priv = dev->priv; - struct scc_info *info = priv->info; - int i, cmd = priv->cmd; - int st, dst, res; +static void es_isr(struct scc_priv *priv) { + int i, rr0, drr0, res; unsigned long flags; - /* Read status and reset interrupt bit */ - st = read_scc(cmd, R0); - write_scc(cmd, R0, RES_EXT_INT); - dst = priv->status ^ st; - priv->status = st; - - /* Since the EOM latch is reset automatically, we assume that - it has been zero if and only if we are in the TX_ACTIVE state. - Otherwise we follow 2.4.9.6. */ + /* Read status, reset interrupt bit (open latches) */ + rr0 = read_scc(priv, R0); + write_scc(priv, R0, RES_EXT_INT); + drr0 = priv->rr0 ^ rr0; + priv->rr0 = rr0; - /* Transmit underrun */ - if ((priv->tx_state == TX_ACTIVE) && (st & TxEOM)) { + /* Transmit underrun (2.4.9.6). We can't check the TxEOM flag, since + it might have already been cleared again by AUTOEOM. */ + if (priv->state == TX_DATA) { /* Get remaining bytes */ i = priv->tx_tail; - if (dev->dma) { - flags=claim_dma_lock(); - disable_dma(dev->dma); - clear_dma_ff(dev->dma); - res = get_dma_residue(dev->dma); + if (priv->param.dma >= 0) { + disable_dma(priv->param.dma); + flags = claim_dma_lock(); + res = get_dma_residue(priv->param.dma); release_dma_lock(flags); } else { res = priv->tx_len[i] - priv->tx_ptr; - if (res) write_scc(cmd, R0, RES_Tx_P); priv->tx_ptr = 0; } - /* Remove frame from FIFO */ - priv->tx_tail = (i + 1) % NUM_TX_BUF; - priv->tx_count--; - /* Check if another frame is available and we are allowed to transmit */ - if (priv->tx_count && (jiffies - priv->tx_start) < priv->param.txtime) { - if (dev->dma) { - flags=claim_dma_lock(); - set_dma_addr(dev->dma, (int) priv->tx_buf[priv->tx_tail]); - set_dma_count(dev->dma, priv->tx_len[priv->tx_tail]); - enable_dma(dev->dma); - release_dma_lock(flags); - } else { - /* If we have an ESCC, we are allowed to write data bytes - immediately. Otherwise we have to wait for the next - TX interrupt. See Figure 2-22. */ - if (info->chip == Z85230) { - tx_isr(dev); - } - } - } else { - /* No frame available. Disable interrupts. */ - priv->tx_state = TX_SQDELAY; - delay(dev, priv->param.sqdelay); - write_scc(cmd, R15, DCDIE | CTSIE | SYNCIE); - write_scc(cmd, R1, EXT_INT_ENAB | WT_FN_RDYFN); - } - /* Update packet statistics */ + /* Disable DREQ / TX interrupt */ + if (priv->param.dma >= 0 && priv->type == TYPE_TWIN) + outb(0, priv->card_base + TWIN_DMA_CFG); + else + write_scc(priv, R1, EXT_INT_ENAB | WT_FN_RDYFN); if (res) { + /* Update packet statistics */ priv->stats.tx_errors++; priv->stats.tx_fifo_errors++; + /* Other underrun interrupts may already be waiting */ + write_scc(priv, R0, RES_EXT_INT); + write_scc(priv, R0, RES_EXT_INT); } else { + /* Update packet statistics */ priv->stats.tx_packets++; + priv->stats.tx_bytes += priv->tx_len[i]; + /* Remove frame from FIFO */ + priv->tx_tail = (i + 1) % NUM_TX_BUF; + priv->tx_count--; + /* Inform upper layers */ + netif_wake_queue(priv->dev); + } + /* Switch state */ + write_scc(priv, R15, 0); + if (priv->tx_count && + (jiffies - priv->tx_start) < priv->param.txtimeout) { + priv->state = TX_PAUSE; + start_timer(priv, priv->param.txpause, 0); + } else { + priv->state = TX_TAIL; + start_timer(priv, priv->param.txtail, 0); } - /* Inform upper layers */ - netif_wake_queue(dev); } /* DCD transition */ - if ((priv->tx_state < TX_TXDELAY) && (dst & DCD)) { - /* Transmitter state change */ - priv->tx_state = TX_OFF; - /* Enable or disable receiver */ - if (st & DCD) { - if (dev->dma) { - /* Program DMA controller */ - flags=claim_dma_lock(); - disable_dma(dev->dma); - clear_dma_ff(dev->dma); - set_dma_mode(dev->dma, DMA_MODE_READ); - set_dma_addr(dev->dma, (int) priv->rx_buf[priv->rx_head]); - set_dma_count(dev->dma, BUF_SIZE); - enable_dma(dev->dma); - release_dma_lock(flags); - /* Configure PackeTwin DMA */ - if (info->type == TYPE_TWIN) { - outb_p((dev->dma == 1) ? TWIN_DMA_HDX_R1 : TWIN_DMA_HDX_R3, - dev->base_addr + TWIN_DMA_CFG); - } - /* Sp. cond. intr. only, ext int enable */ - write_scc(cmd, R1, EXT_INT_ENAB | INT_ERR_Rx | - WT_RDY_RT | WT_FN_RDYFN | WT_RDY_ENAB); - } else { - /* Intr. on all Rx characters and Sp. cond., ext int enable */ - write_scc(cmd, R1, EXT_INT_ENAB | INT_ALL_Rx | WT_RDY_RT | - WT_FN_RDYFN); - } - if (priv->rx_count < NUM_RX_BUF) { - /* Enable receiver */ - write_scc(cmd, R3, RxENABLE | Rx8 | RxCRC_ENAB); + if (drr0 & DCD) { + if (rr0 & DCD) { + switch (priv->state) { + case IDLE: + case WAIT: + priv->state = DCD_ON; + write_scc(priv, R15, 0); + start_timer(priv, priv->param.dcdon, 0); } } else { - /* Disable DMA */ - if (dev->dma) - { - flags=claim_dma_lock(); - disable_dma(dev->dma); - release_dma_lock(flags); - } - /* Disable receiver */ - write_scc(cmd, R3, Rx8); - /* DMA disable, RX int disable, Ext int enable */ - write_scc(cmd, R1, EXT_INT_ENAB | WT_RDY_RT | WT_FN_RDYFN); - /* Transmitter state change */ - if (random() > priv->param.persist) - delay(dev, priv->param.slottime); - else { - if (priv->tx_count) { - priv->tx_state = TX_TXDELAY; - write_scc(cmd, R5, TxCRC_ENAB | RTS | TxENAB | Tx8); - priv->tx_start = jiffies; - delay(dev, priv->param.txdelay); - } else { - priv->tx_state = TX_IDLE; - } + switch (priv->state) { + case RX_ON: + rx_off(priv); + priv->state = DCD_OFF; + write_scc(priv, R15, 0); + start_timer(priv, priv->param.dcdoff, 0); } } } /* CTS transition */ - if ((info->type <= TYPE_PI2) && (dst & CTS) && (~st & CTS)) { - /* Timer has expired */ - tm_isr(dev); - } + if ((drr0 & CTS) && (~rr0 & CTS) && priv->type != TYPE_TWIN) + tm_isr(priv); - /* /SYNC/HUNT transition */ - if ((dst & SYNC_HUNT) && (~st & SYNC_HUNT)) { - /* Reset current frame and clear RX FIFO */ - while (read_scc(cmd, R0) & Rx_CH_AV) read_scc(cmd, R8); - priv->rx_over = 0; - if (dev->dma) { - flags=claim_dma_lock(); - disable_dma(dev->dma); - clear_dma_ff(dev->dma); - set_dma_addr(dev->dma, (int) priv->rx_buf[priv->rx_head]); - set_dma_count(dev->dma, BUF_SIZE); - enable_dma(dev->dma); - release_dma_lock(flags); +} + + +static void tm_isr(struct scc_priv *priv) { + switch (priv->state) { + case TX_HEAD: + case TX_PAUSE: + tx_on(priv); + priv->state = TX_DATA; + break; + case TX_TAIL: + write_scc(priv, R5, TxCRC_ENAB | Tx8); + priv->state = RTS_OFF; + if (priv->type != TYPE_TWIN) write_scc(priv, R15, 0); + start_timer(priv, priv->param.rtsoff, 0); + break; + case RTS_OFF: + write_scc(priv, R15, DCDIE); + priv->rr0 = read_scc(priv, R0); + if (priv->rr0 & DCD) { + priv->stats.collisions++; + rx_on(priv); + priv->state = RX_ON; } else { - priv->rx_ptr = 0; + priv->state = WAIT; + start_timer(priv, priv->param.waittime, DCDIE); + } + break; + case WAIT: + if (priv->tx_count) { + priv->state = TX_HEAD; + priv->tx_start = jiffies; + write_scc(priv, R5, TxCRC_ENAB | RTS | TxENAB | Tx8); + write_scc(priv, R15, 0); + start_timer(priv, priv->param.txdelay, 0); + } else { + priv->state = IDLE; + if (priv->type != TYPE_TWIN) write_scc(priv, R15, DCDIE); } + break; + case DCD_ON: + case DCD_OFF: + write_scc(priv, R15, DCDIE); + priv->rr0 = read_scc(priv, R0); + if (priv->rr0 & DCD) { + rx_on(priv); + priv->state = RX_ON; + } else { + priv->state = WAIT; + start_timer(priv, + random()/priv->param.persist*priv->param.slottime, + DCDIE); + } + break; } } -static void tm_isr(struct net_device *dev) -{ - struct scc_priv *priv = dev->priv; - struct scc_info *info = priv->info; - int cmd = priv->cmd; +static inline void tx_on(struct scc_priv *priv) { + int i, n; unsigned long flags; - switch (priv->tx_state) { - case TX_OFF: - if (~priv->status & DCD) { - if (random() > priv->param.persist) delay(dev, priv->param.slottime); - else { - if (priv->tx_count) { - priv->tx_state = TX_TXDELAY; - write_scc(cmd, R5, TxCRC_ENAB | RTS | TxENAB | Tx8); - priv->tx_start = jiffies; - delay(dev, priv->param.txdelay); - } else { - priv->tx_state = TX_IDLE; - } - } - } - break; - case TX_TXDELAY: - priv->tx_state = TX_ACTIVE; - if (dev->dma) { - /* Program DMA controller */ - - flags=claim_dma_lock(); - disable_dma(dev->dma); - clear_dma_ff(dev->dma); - set_dma_mode(dev->dma, DMA_MODE_WRITE); - set_dma_addr(dev->dma, (int) priv->tx_buf[priv->tx_tail]); - set_dma_count(dev->dma, priv->tx_len[priv->tx_tail]); - enable_dma(dev->dma); - release_dma_lock(flags); - - /* Configure PackeTwin DMA */ - if (info->type == TYPE_TWIN) { - outb_p((dev->dma == 1) ? TWIN_DMA_HDX_T1 : TWIN_DMA_HDX_T3, - dev->base_addr + TWIN_DMA_CFG); - } - /* Enable interrupts and DMA. On the PackeTwin, the DTR//REQ pin - is used for TX DMA requests, but we enable the WAIT/DMA request - pin, anyway */ - write_scc(cmd, R15, TxUIE | DCDIE | CTSIE | SYNCIE); - write_scc(cmd, R1, EXT_INT_ENAB | WT_FN_RDYFN | WT_RDY_ENAB); - } else { - write_scc(cmd, R15, TxUIE | DCDIE | CTSIE | SYNCIE); - write_scc(cmd, R1, EXT_INT_ENAB | WT_FN_RDYFN | TxINT_ENAB); - tx_isr(dev); + if (priv->param.dma >= 0) { + n = (priv->chip == Z85230) ? 3 : 1; + /* Program DMA controller */ + flags = claim_dma_lock(); + set_dma_mode(priv->param.dma, DMA_MODE_WRITE); + set_dma_addr(priv->param.dma, (int) priv->tx_buf[priv->tx_tail]+n); + set_dma_count(priv->param.dma, priv->tx_len[priv->tx_tail]-n); + release_dma_lock(flags); + /* Enable TX underrun interrupt */ + write_scc(priv, R15, TxUIE); + /* Configure DREQ */ + if (priv->type == TYPE_TWIN) + outb((priv->param.dma == 1) ? TWIN_DMA_HDX_T1 : TWIN_DMA_HDX_T3, + priv->card_base + TWIN_DMA_CFG); + else + write_scc(priv, R1, EXT_INT_ENAB | WT_FN_RDYFN | WT_RDY_ENAB); + /* Write first byte(s) */ + save_flags(flags); + cli(); + for (i = 0; i < n; i++) + write_scc_data(priv, priv->tx_buf[priv->tx_tail][i], 1); + enable_dma(priv->param.dma); + restore_flags(flags); + } else { + write_scc(priv, R15, TxUIE); + write_scc(priv, R1, EXT_INT_ENAB | WT_FN_RDYFN | TxINT_ENAB); + tx_isr(priv); + } + /* Reset EOM latch if we do not have the AUTOEOM feature */ + if (priv->chip == Z8530) write_scc(priv, R0, RES_EOM_L); +} + + +static inline void rx_on(struct scc_priv *priv) { + unsigned long flags; + + /* Clear RX FIFO */ + while (read_scc(priv, R0) & Rx_CH_AV) read_scc_data(priv); + priv->rx_over = 0; + if (priv->param.dma >= 0) { + /* Program DMA controller */ + flags = claim_dma_lock(); + set_dma_mode(priv->param.dma, DMA_MODE_READ); + set_dma_addr(priv->param.dma, (int) priv->rx_buf[priv->rx_head]); + set_dma_count(priv->param.dma, BUF_SIZE); + release_dma_lock(flags); + enable_dma(priv->param.dma); + /* Configure PackeTwin DMA */ + if (priv->type == TYPE_TWIN) { + outb((priv->param.dma == 1) ? TWIN_DMA_HDX_R1 : TWIN_DMA_HDX_R3, + priv->card_base + TWIN_DMA_CFG); } - if (info->chip == Z8530) write_scc(cmd, R0, RES_EOM_L); - break; - case TX_SQDELAY: - /* Disable transmitter */ - write_scc(cmd, R5, TxCRC_ENAB | Tx8); - /* Transmitter state change: Switch to TX_OFF and wait at least - 1 slottime. */ - priv->tx_state = TX_OFF; - if (~priv->status & DCD) delay(dev, priv->param.waittime); + /* Sp. cond. intr. only, ext int enable, RX DMA enable */ + write_scc(priv, R1, EXT_INT_ENAB | INT_ERR_Rx | + WT_RDY_RT | WT_FN_RDYFN | WT_RDY_ENAB); + } else { + /* Reset current frame */ + priv->rx_ptr = 0; + /* Intr. on all Rx characters and Sp. cond., ext int enable */ + write_scc(priv, R1, EXT_INT_ENAB | INT_ALL_Rx | WT_RDY_RT | + WT_FN_RDYFN); } + write_scc(priv, R0, ERR_RES); + write_scc(priv, R3, RxENABLE | Rx8 | RxCRC_ENAB); } -static inline void delay(struct net_device *dev, int t) -{ - struct scc_priv *priv = dev->priv; - int tmr = priv->tmr; +static inline void rx_off(struct scc_priv *priv) { + /* Disable receiver */ + write_scc(priv, R3, Rx8); + /* Disable DREQ / RX interrupt */ + if (priv->param.dma >= 0 && priv->type == TYPE_TWIN) + outb(0, priv->card_base + TWIN_DMA_CFG); + else + write_scc(priv, R1, EXT_INT_ENAB | WT_FN_RDYFN); + /* Disable DMA */ + if (priv->param.dma >= 0) disable_dma(priv->param.dma); +} + + +static void start_timer(struct scc_priv *priv, int t, int r15) { + unsigned long flags; - outb_p(t & 0xFF, tmr); - outb_p((t >> 8) & 0xFF, tmr); + outb(priv->tmr_mode, priv->tmr_ctrl); + if (t == 0) { + tm_isr(priv); + } else if (t > 0) { + save_flags(flags); + cli(); + outb(t & 0xFF, priv->tmr_cnt); + outb((t >> 8) & 0xFF, priv->tmr_cnt); + if (priv->type != TYPE_TWIN) { + write_scc(priv, R15, r15 | CTSIE); + priv->rr0 |= CTS; + } + restore_flags(flags); + } } -static inline unsigned char random(void) -{ +static inline unsigned char random(void) { /* See "Numerical Recipes in C", second edition, p. 284 */ rand = rand * 1664525L + 1013904223L; return (unsigned char) (rand >> 24); } - diff --git a/drivers/video/aty128fb.c b/drivers/video/aty128fb.c index cf3512043..0770f4448 100644 --- a/drivers/video/aty128fb.c +++ b/drivers/video/aty128fb.c @@ -75,9 +75,9 @@ #undef DEBUG #ifdef DEBUG -#define DBG(x) printk(KERN_DEBUG "aty128fb: %s\n",(x)); +#define DBG(fmt, args...) printk(KERN_DEBUG "aty128fb: %s " fmt, __FUNCTION__, ##args); #else -#define DBG(x) +#define DBG(fmt, args...) #endif #ifndef CONFIG_PPC @@ -104,10 +104,20 @@ static struct fb_var_screeninfo default_var = { #ifndef MODULE /* default modedb mode */ +/* 640x480, 60 Hz, Non-Interlaced (25.172 MHz dotclock) */ static struct fb_videomode defaultmode __initdata = { - /* 640x480, 60 Hz, Non-Interlaced (25.172 MHz dotclock) */ - NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, - 0, FB_VMODE_NONINTERLACED + refresh: 60, + xres: 640, + yres: 480, + pixclock: 39722, + left_margin: 48, + right_margin: 16, + upper_margin: 33, + lower_margin: 10, + hsync_len: 96, + vsync_len: 2, + sync: 0, + vmode: FB_VMODE_NONINTERLACED }; #endif /* MODULE */ @@ -141,32 +151,30 @@ static const struct aty128_chip_info aty128_pci_probe_list[] __initdata = /* packed BIOS settings */ #ifndef CONFIG_PPC -#pragma pack(1) typedef struct { - u8 clock_chip_type; - u8 struct_size; - u8 accelerator_entry; - u8 VGA_entry; - u16 VGA_table_offset; - u16 POST_table_offset; - u16 XCLK; - u16 MCLK; - u8 num_PLL_blocks; - u8 size_PLL_blocks; - u16 PCLK_ref_freq; - u16 PCLK_ref_divider; - u32 PCLK_min_freq; - u32 PCLK_max_freq; - u16 MCLK_ref_freq; - u16 MCLK_ref_divider; - u32 MCLK_min_freq; - u32 MCLK_max_freq; - u16 XCLK_ref_freq; - u16 XCLK_ref_divider; - u32 XCLK_min_freq; - u32 XCLK_max_freq; -} PLL_BLOCK; -#pragma pack() + u8 clock_chip_type; + u8 struct_size; + u8 accelerator_entry; + u8 VGA_entry; + u16 VGA_table_offset; + u16 POST_table_offset; + u16 XCLK; + u16 MCLK; + u8 num_PLL_blocks; + u8 size_PLL_blocks; + u16 PCLK_ref_freq; + u16 PCLK_ref_divider; + u32 PCLK_min_freq; + u32 PCLK_max_freq; + u16 MCLK_ref_freq; + u16 MCLK_ref_divider; + u32 MCLK_min_freq; + u32 MCLK_max_freq; + u16 XCLK_ref_freq; + u16 XCLK_ref_divider; + u32 XCLK_min_freq; + u32 XCLK_max_freq; +} __attribute__ ((packed)) PLL_BLOCK; #endif /* !CONFIG_PPC */ /* onboard memory information */ @@ -194,12 +202,9 @@ static const struct aty128_meminfo sdr_sgram = static const struct aty128_meminfo ddr_sgram = { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" }; -static int currcon = 0; - -static char *aty128fb_name = "ATY Rage128"; +static const char *aty128fb_name = "ATY Rage128"; static char fontname[40] __initdata = { 0 }; -static char noaccel __initdata = 0; -static unsigned int initdepth __initdata = 8; +static int noaccel __initdata = 0; #ifndef MODULE static const char *mode_option __initdata = NULL; @@ -291,13 +296,14 @@ struct fb_info_aty128 { u32 cfb32[16]; #endif } fbcon_cmap; - int blitter_may_be_busy; #ifdef CONFIG_PCI struct pci_dev *pdev; #endif #ifdef CONFIG_MTRR struct { int vram; int vram_valid; } mtrr; #endif + int currcon; + int blitter_may_be_busy; int fifo_slots; /* free slots in FIFO (64 max) */ }; @@ -515,10 +521,8 @@ aty_pll_wait_readupdate(const struct fb_info_aty128 *info) break; } -#ifdef DEBUG if (reset) /* reset engine?? */ - DBG("PLL write timeout!"); -#endif + printk(KERN_DEBUG "aty128fb: PLL write timeout!"); } @@ -645,9 +649,7 @@ aty128_reset_engine(const struct fb_info_aty128 *info) /* use old pio mode */ aty_st_le32(PM4_BUFFER_CNTL, PM4_BUFFER_CNTL_NONPM4); -#ifdef DEBUG DBG("engine reset"); -#endif } @@ -1075,12 +1077,10 @@ aty128_var_to_pll(u32 period_in_ps, struct aty128_pll *pll, pll->feedback_divider = round_div(n, d); pll->vclk = vclk; -#ifdef DEBUG - printk(KERN_DEBUG "var_to_pll: post %d feedback %d vlck %d output %d ref_divider %d\n", - pll->post_divider, pll->feedback_divider, vclk, output_freq, - c.ref_divider); - printk(KERN_DEBUG "var_to_pll: vclk_per: %d\n", period_in_ps); -#endif + DBG("post %d feedback %d vlck %d output %d ref_divider %d " + "vclk_per: %d\n", pll->post_divider, + pll->feedback_divider, vclk, output_freq, + c.ref_divider, period_in_ps); return 0; } @@ -1134,9 +1134,8 @@ aty128_ddafifo(struct aty128_ddafifo *dsp, m->Tr2w + x; -#ifdef DEBUG - printk(KERN_DEBUG "aty128fb: x %x\n", x); -#endif + DBG("x %x\n", x); + b = 0; while (x) { x >>= 1; @@ -1155,10 +1154,9 @@ aty128_ddafifo(struct aty128_ddafifo *dsp, return -EINVAL; } -#ifdef DEBUG - printk(KERN_DEBUG "aty128fb: p: %x rloop: %x x: %x ron: %x roff: %x\n", - p, m->Rloop, x, ron, roff); -#endif + DBG("p: %x rloop: %x x: %x ron: %x roff: %x\n", + p, m->Rloop, x, ron, roff); + dsp->dda_config = p << 16 | m->Rloop << 20 | x; dsp->dda_on_off = ron << 16 | roff; @@ -1543,7 +1541,12 @@ static int aty128fb_get_cmap(struct fb_cmap *cmap, int kspc, int con, struct fb_info *info) { - if (con == currcon) /* current console? */ +#if 1 + fb_copy_cmap(&info->cmap, cmap, kspc ? 0 : 2); +#else + struct fb_info_aty128 fb = (struct fb_info_aty128 *)info; + + if (con == fb->currcon) /* current console? */ return fb_get_cmap(cmap, kspc, aty128_getcolreg, info); else if (fb_display[con].cmap.len) /* non default colormap? */ fb_copy_cmap(&fb_display[con].cmap, cmap, kspc ? 0 : 2); @@ -1551,6 +1554,7 @@ aty128fb_get_cmap(struct fb_cmap *cmap, int kspc, int con, int size = (fb_display[con].var.bits_per_pixel <= 8) ? 256 : 32; fb_copy_cmap(fb_default_cmap(size), cmap, kspc ? 0 : 2); } +#endif return 0; } @@ -1564,6 +1568,7 @@ aty128fb_set_cmap(struct fb_cmap *cmap, int kspc, int con, struct fb_info *info) { int err; + struct fb_info_aty128 *fb = (struct fb_info_aty128 *)info; struct display *disp; if (con >= 0) @@ -1577,7 +1582,7 @@ aty128fb_set_cmap(struct fb_cmap *cmap, int kspc, int con, return err; } - if (con == currcon) /* current console? */ + if (con == fb->currcon) /* current console? */ return fb_set_cmap(cmap, kspc, aty128_setcolreg, info); else fb_copy_cmap(cmap, &disp->cmap, kspc ? 0 : 1); @@ -1632,24 +1637,6 @@ aty128fb_setup(char *options) fontname[i] = 0; } else if (!strncmp(this_opt, "noaccel", 7)) { noaccel = 1; - } else if (!strncmp(this_opt, "depth:", 6)) { - unsigned int depth = simple_strtoul(this_opt+6, NULL, 0); - switch (depth) { - case 0 ... 8: - initdepth = 8; - break; - case 9 ... 16: - initdepth = 16; - break; - case 17 ... 24: - initdepth = 24; - break; - case 25 ... 32: - initdepth = 32; - break; - default: - initdepth = 8; - } } #ifdef CONFIG_MTRR else if(!strncmp(this_opt, "nomtrr", 6)) { @@ -1762,7 +1749,7 @@ aty128_init(struct fb_info_aty128 *info, const char *name) #endif /* CONFIG_PPC */ { if (fb_find_mode(&var, &info->fb_info, mode_option, NULL, 0, - &defaultmode, initdepth) == 0) + &defaultmode, 8) == 0) var = default_var; } #endif /* MODULE */ @@ -1859,16 +1846,6 @@ aty128_pci_register(struct pci_dev *pdev, u32 fb_addr, reg_addr, io_addr = 0; int err; -#if 0 - /* Request resources we're going to use */ - io_addr = pci_resource_start(pdev, 1); - if (!request_region(io_addr, pci_resource_len(pdev, 1), - "aty128fb IO")) { - printk(KERN_ERR "aty128fb: cannot reserve I/O ports\n"); - goto err_out_none; - } -#endif - fb_addr = pci_resource_start(pdev, 0); if (!request_mem_region(fb_addr, pci_resource_len(pdev, 0), "aty128fb FB")) { @@ -1894,6 +1871,8 @@ aty128_pci_register(struct pci_dev *pdev, /* Copy PCI device info into info->pdev */ info->pdev = pdev; + info->currcon = -1; + /* Virtualize mmio region */ info->regbase_phys = reg_addr; info->regbase = ioremap(reg_addr, 0x1FFF); @@ -1966,7 +1945,6 @@ err_free_mmio: err_free_fb: release_mem_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); -err_out_none: return -ENODEV; } #endif /* CONFIG_PCI */ @@ -2072,13 +2050,12 @@ aty128_get_pllinfo(struct fb_info_aty128 *info) if (bios_seg) iounmap(bios_seg); -#ifdef DEBUG - printk(KERN_DEBUG "get_pllinfo: ppll_max %d ppll_min %d xclk %d " - "ref_divider %d dotclock %d\n", - info->constants.ppll_max, info->constants.ppll_min, - info->constants.xclk, info->constants.ref_divider, - info->constants.dotclock); -#endif + DBG("ppll_max %d ppll_min %d xclk %d " + "ref_divider %d dotclock %d\n", + info->constants.ppll_max, info->constants.ppll_min, + info->constants.xclk, info->constants.ref_divider, + info->constants.dotclock); + return; } #endif /* !CONFIG_PPC */ @@ -2159,11 +2136,12 @@ aty128fbcon_switch(int con, struct fb_info *fb) struct aty128fb_par par; /* Do we have to save the colormap? */ - if (fb_display[currcon].cmap.len) - fb_get_cmap(&fb_display[currcon].cmap, 1, aty128_getcolreg, fb); + if (fb_display[info->currcon].cmap.len) + fb_get_cmap(&fb_display[info->currcon].cmap, 1, + aty128_getcolreg, fb); /* set the current console */ - currcon = con; + info->currcon = con; aty128_decode_var(&fb_display[con].var, &par, info); aty128_set_par(&par, info); @@ -2319,7 +2297,9 @@ aty128_setcolreg(u_int regno, u_int red, u_int green, u_int blue, static void do_install_cmap(int con, struct fb_info *info) { - if (con != currcon) + struct fb_info_aty128 *fb = (struct fb_info_aty128 *)info; + + if (con != fb->currcon) return; if (fb_display[con].cmap.len) diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c index f4824762b..fff0a4f39 100644 --- a/drivers/video/matrox/matroxfb_crtc2.c +++ b/drivers/video/matrox/matroxfb_crtc2.c @@ -85,7 +85,7 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info, int mode, unsigned int pos) { u_int32_t tmp; - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); switch (mode) { case 15: @@ -250,9 +250,9 @@ static void initMatroxDH(struct matroxfb_dh_fb_info* m2info, struct display* p) static int matroxfb_dh_open(struct fb_info* info, int user) { #define m2info ((struct matroxfb_dh_fb_info*)info) - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); - if (minfo) { + if (MINFO) { if (ACCESS_FBINFO(dead)) { return -ENXIO; } @@ -263,9 +263,9 @@ static int matroxfb_dh_open(struct fb_info* info, int user) { static int matroxfb_dh_release(struct fb_info* info, int user) { #define m2info ((struct matroxfb_dh_fb_info*)info) - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); - if (minfo) { + if (MINFO) { } return 0; #undef m2info @@ -320,7 +320,7 @@ static int matroxfb_dh_set_var(struct fb_var_screeninfo* var, int con, int cmap_len; int mode; int err; - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); if (con < 0) p = m2info->fbcon.disp; @@ -477,7 +477,7 @@ static int matroxfb_dh_pan_display(struct fb_var_screeninfo* var, int con, static int matroxfb_dh_switch(int con, struct fb_info* info); static int matroxfb_dh_get_vblank(const struct matroxfb_dh_fb_info* m2info, struct fb_vblank* vblank) { - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); memset(vblank, 0, sizeof(*vblank)); vblank->flags = FB_VBLANK_HAVE_VCOUNT | FB_VBLANK_HAVE_VBLANK; @@ -496,7 +496,7 @@ static int matroxfb_dh_ioctl(struct inode* inode, int con, struct fb_info* info) { #define m2info ((struct matroxfb_dh_fb_info*)info) - struct matrox_fb_info* minfo = m2info->primary_dev; + MINFO_FROM(m2info->primary_dev); DBG("matroxfb_crtc2_ioctl") @@ -516,7 +516,7 @@ static int matroxfb_dh_ioctl(struct inode* inode, case MATROXFB_GET_OUTPUT_MODE: case MATROXFB_GET_ALL_OUTPUTS: { - return ACCESS_FBINFO(fbcon.fbops)->fb_ioctl(inode, file, cmd, arg, con, &minfo->fbcon); + return ACCESS_FBINFO(fbcon.fbops)->fb_ioctl(inode, file, cmd, arg, con, &ACCESS_FBINFO(fbcon)); } case MATROXFB_SET_OUTPUT_CONNECTION: { @@ -660,15 +660,15 @@ static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) { if (mem < 64*1024) mem *= 1024; mem &= ~0x00000FFF; /* PAGE_MASK? */ - if (minfo->video.len_usable + mem <= minfo->video.len) - m2info->video.offbase = minfo->video.len - mem; - else if (minfo->video.len < mem) { + if (ACCESS_FBINFO(video.len_usable) + mem <= ACCESS_FBINFO(video.len)) + m2info->video.offbase = ACCESS_FBINFO(video.len) - mem; + else if (ACCESS_FBINFO(video.len) < mem) { kfree(d); return -ENOMEM; } else { /* check yres on first head... */ m2info->video.borrowed = mem; - minfo->video.len_usable -= mem; - m2info->video.offbase = minfo->video.len_usable; + ACCESS_FBINFO(video.len_usable) -= mem; + m2info->video.offbase = ACCESS_FBINFO(video.len_usable); } m2info->video.base = ACCESS_FBINFO(video.base) + m2info->video.offbase; m2info->video.len = m2info->video.len_usable = m2info->video.len_maximum = mem; @@ -698,8 +698,8 @@ static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) { matroxfb_dh_set_var(&matroxfb_dh_defined, -1, &m2info->fbcon); } down_write(&ACCESS_FBINFO(crtc2.lock)); - oldcrtc2 = minfo->crtc2.info; - minfo->crtc2.info = &m2info->fbcon; + oldcrtc2 = ACCESS_FBINFO(crtc2.info); + ACCESS_FBINFO(crtc2.info) = &m2info->fbcon; up_write(&ACCESS_FBINFO(crtc2.lock)); if (oldcrtc2) { printk(KERN_ERR "matroxfb_crtc2: Internal consistency check failed: crtc2 already present: %p\n", @@ -731,9 +731,9 @@ static void matroxfb_dh_deregisterfb(struct matroxfb_dh_fb_info* m2info) { struct fb_info* crtc2; down_write(&ACCESS_FBINFO(crtc2.lock)); - crtc2 = minfo->crtc2.info; + crtc2 = ACCESS_FBINFO(crtc2.info); if (crtc2 == &m2info->fbcon) - minfo->crtc2.info = NULL; + ACCESS_FBINFO(crtc2.info) = NULL; up_write(&ACCESS_FBINFO(crtc2.lock)); if (crtc2 != &m2info->fbcon) { printk(KERN_ERR "matroxfb_crtc2: Internal consistency check failed: crtc2 mismatch at unload: %p != %p\n", @@ -756,7 +756,7 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) { struct matroxfb_dh_fb_info* m2info; /* hardware is CRTC2 incapable... */ - if (!minfo->devflags.crtc2) + if (!ACCESS_FBINFO(devflags.crtc2)) return NULL; m2info = (struct matroxfb_dh_fb_info*)kmalloc(sizeof(*m2info), GFP_KERNEL); if (!m2info) { @@ -764,7 +764,7 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) { return NULL; } memset(m2info, 0, sizeof(*m2info)); - m2info->primary_dev = minfo; + m2info->primary_dev = MINFO; if (matroxfb_dh_registerfb(m2info)) { kfree(m2info); printk(KERN_ERR "matroxfb_crtc2: CRTC2 framebuffer failed to register\n"); diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c index e7d515ec2..57b822293 100644 --- a/drivers/video/matrox/matroxfb_maven.c +++ b/drivers/video/matrox/matroxfb_maven.c @@ -892,9 +892,9 @@ static struct matrox_altout maven_altout = { static int maven_init_client(struct i2c_client* clnt) { struct i2c_adapter* a = clnt->adapter; - struct matroxfb_dh_maven_info* m2info = ((struct i2c_bit_adapter*)a)->minfo; struct maven_data* md = clnt->data; - struct matrox_fb_info* minfo = m2info->primary_dev; + struct matroxfb_dh_maven_info* m2info __attribute__((unused)) = ((struct i2c_bit_adapter*)a)->minfo; + MINFO_FROM(m2info->primary_dev); md->mode = MODE_MONITOR; md->primary_head = MINFO; diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c index 6a4e41e9e..23fc97915 100644 --- a/drivers/video/valkyriefb.c +++ b/drivers/video/valkyriefb.c @@ -4,7 +4,7 @@ * Created 8 August 1998 by Martin Costabel and Kevin Schoedel * * Vmode-switching changes and vmode 15/17 modifications created 29 August - * 1998 by Barry Nathan <barryn@pobox.com>. + * 1998 by Barry K. Nathan <barryn@pobox.com>. * * Derived directly from: * diff --git a/drivers/video/valkyriefb.h b/drivers/video/valkyriefb.h index ee246a4fc..c8984b38b 100644 --- a/drivers/video/valkyriefb.h +++ b/drivers/video/valkyriefb.h @@ -4,7 +4,7 @@ * Created 8 August 1998 by Martin Costabel and Kevin Schoedel * * Vmode-switching changes and vmode 15/17 modifications created 29 August - * 1998 by Barry Nathan <barryn@pobox.com>. + * 1998 by Barry K. Nathan <barryn@pobox.com>. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/fs/dcache.c b/fs/dcache.c index 3de8547ef..0f9c59c6c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -557,7 +557,7 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask) { int count = 0; if (priority) - count = dentry_stat.nr_unused >> (priority >> 2); + count = dentry_stat.nr_unused / priority; prune_dcache(count); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 046d384de..0874d6da7 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -854,6 +854,7 @@ retry: if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) { return; } + lock_kernel(); if (!(bh = fat_bread(sb, i_pos >> MSDOS_DPB_BITS))) { printk("dev = %s, ino = %d\n", kdevname(inode->i_dev), i_pos); fat_fs_panic(sb, "msdos_write_inode: unable to read i-node block"); diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index a7ad1fbf0..637889c3f 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -44,6 +44,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 19 diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index 595ba2283..a421e6dc3 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -40,6 +40,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 9dc6c3389..c44d16bc0 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -40,6 +40,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nasty libc5 fixup - bletch */ #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) diff --git a/include/asm-i386/timex.h b/include/asm-i386/timex.h index f6cf7303d..1cb3b96b4 100644 --- a/include/asm-i386/timex.h +++ b/include/asm-i386/timex.h @@ -29,7 +29,7 @@ * four billion cycles just basically sounds like a good idea, * regardless of how fast the machine is. */ -typedef unsigned long cycles_t; +typedef unsigned long long cycles_t; extern cycles_t cacheflush_time; @@ -38,10 +38,10 @@ static inline cycles_t get_cycles (void) #ifndef CONFIG_X86_TSC return 0; #else - unsigned long eax, edx; + unsigned long long ret; - rdtsc(eax,edx); - return eax; + rdtscll(ret); + return ret; #endif } diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index 877ed2eec..5d5eb7b8c 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -47,8 +47,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 - -#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 2d97e06d9..fa13cd8ff 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -40,6 +40,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 72592c8fc..a0e63bbbc 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -54,6 +54,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-mips64/socket.h b/include/asm-mips64/socket.h index b9098caa4..8410eb384 100644 --- a/include/asm-mips64/socket.h +++ b/include/asm-mips64/socket.h @@ -1,10 +1,10 @@ -/* $Id: socket.h,v 1.2 2000/02/25 21:52:09 ralf Exp $ - * +/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1997, 1999 Ralf Baechle + * Copyright (C) 1997, 1999, 2000 Ralf Baechle + * Copyright (C) 2000 Silicon Graphics, Inc. */ #ifndef _ASM_SOCKET_H #define _ASM_SOCKET_H @@ -62,6 +62,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index c6c1899e8..40b648d47 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -46,6 +46,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 703a7bd58..0d00c3b54 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -47,7 +47,9 @@ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 -#define SO_PEERNAME 28 +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index ebcb6a333..6bba7306e 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -40,6 +40,8 @@ #define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP /* Nast libc5 fixup - bletch */ #if defined(__KERNEL__) diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index cbcebff57..256f5c165 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -1,4 +1,4 @@ -/* $Id: socket.h,v 1.14 2000/06/09 07:35:28 davem Exp $ */ +/* $Id: socket.h,v 1.15 2000/07/08 00:20:43 davem Exp $ */ #ifndef _ASM_SOCKET_H #define _ASM_SOCKET_H @@ -41,6 +41,8 @@ #define SO_DETACH_FILTER 0x001b #define SO_PEERNAME 0x001c +#define SO_TIMESTAMP 0x001d +#define SCM_TIMESTAMP SO_TIMESTAMP /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index 387ee1774..e28c54a97 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -1,4 +1,4 @@ -/* $Id: socket.h,v 1.7 2000/06/09 07:35:28 davem Exp $ */ +/* $Id: socket.h,v 1.8 2000/07/08 00:20:43 davem Exp $ */ #ifndef _ASM_SOCKET_H #define _ASM_SOCKET_H @@ -41,6 +41,8 @@ #define SO_DETACH_FILTER 0x001b #define SO_PEERNAME 0x001c +#define SO_TIMESTAMP 0x001d +#define SCM_TIMESTAMP SO_TIMESTAMP /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 diff --git a/include/linux/dmascc.h b/include/linux/dmascc.h deleted file mode 100644 index 01b46df73..000000000 --- a/include/linux/dmascc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * $Id: dmascc.h,v 1.1 1997/12/01 10:44:55 oe1kib Exp $ - * - * Driver for high-speed SCC boards (those with DMA support) - * Copyright (C) 1997 Klaus Kudielka - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* Ioctls */ -#define SIOCGSCCPARAM SIOCDEVPRIVATE -#define SIOCSSCCPARAM (SIOCDEVPRIVATE+1) - -/* Frequency of timer 0 */ -#define TMR_0_HZ 25600 - -/* Configurable parameters */ -struct scc_param { - int pclk_hz; /* frequency of BRG input (read-only - don't change) */ - int brg_tc; /* baud rate generator terminal count - BRG disabled if < 0 */ - int nrzi; /* 0 (nrz), 1 (nrzi) */ - int clocks; /* see documentation */ - int txdelay; /* [1/TMR_0_HZ] */ - int txtime; /* [1/HZ] */ - int sqdelay; /* [1/TMR_0_HZ] */ - int waittime; /* [1/TMR_0_HZ] */ - int slottime; /* [1/TMR_0_HZ] */ - int persist; /* 0 ... 255 */ - int dma; /* 1, 3 */ -}; - diff --git a/include/linux/in6.h b/include/linux/in6.h index 13eed62d6..fdae6abd6 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -138,7 +138,7 @@ struct in6_flowlabel_req */ #define IPV6_TLV_PAD0 0 #define IPV6_TLV_PADN 1 -#define IPV6_TLV_ROUTERALERT 20 +#define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_JUMBO 194 /* diff --git a/include/linux/sched.h b/include/linux/sched.h index d80ec061a..21c9f9f74 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -259,7 +259,6 @@ struct task_struct { struct exec_domain *exec_domain; volatile long need_resched; - cycles_t avg_slice; int lock_depth; /* Lock depth. We can context switch in and out of holding a syscall kernel lock... */ /* begin intel cache line */ long counter; @@ -267,8 +266,8 @@ struct task_struct { unsigned long policy; /* memory management info */ struct mm_struct *mm, *active_mm; - int has_cpu; - int processor; + int has_cpu, processor; + unsigned long cpus_allowed; unsigned long ptrace; struct list_head run_list; struct task_struct *next_task, *prev_task; @@ -399,6 +398,7 @@ struct task_struct { policy: SCHED_OTHER, \ mm: NULL, \ active_mm: &init_mm, \ + cpus_allowed: -1, \ run_list: LIST_HEAD_INIT(tsk.run_list), \ next_task: &tsk, \ prev_task: &tsk, \ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 71f8772cd..850837af2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -148,16 +148,8 @@ struct sk_buff { #endif }; -/* These are just the default values. This is run time configurable. - * FIXME: Probably the config option should go away. -- erics - */ -#ifdef CONFIG_SKB_LARGE #define SK_WMEM_MAX 65535 #define SK_RMEM_MAX 65535 -#else -#define SK_WMEM_MAX 32767 -#define SK_RMEM_MAX 32767 -#endif #ifdef __KERNEL__ /* diff --git a/include/net/checksum.h b/include/net/checksum.h index de3286944..6793f196f 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -124,5 +124,10 @@ static __inline__ unsigned int csum_and_copy_to_user } #endif +static inline unsigned int csum_chain(unsigned int csum, unsigned int addend) +{ + csum += addend; + return csum + (csum < addend); +} #endif diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h new file mode 100644 index 000000000..2ddf7675a --- /dev/null +++ b/include/net/inet_ecn.h @@ -0,0 +1,72 @@ +#ifndef _INET_ECN_H_ +#define _INET_ECN_H_ + +#include <linux/config.h> + +#ifdef CONFIG_INET_ECN + +static inline int INET_ECN_is_ce(__u8 dsfield) +{ + return (dsfield&3) == 3; +} + +static inline int INET_ECN_is_not_ce(__u8 dsfield) +{ + return (dsfield&3) == 2; +} + +static inline int INET_ECN_is_capable(__u8 dsfield) +{ + return (dsfield&2); +} + +static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) +{ + outer &= ~3; + if (INET_ECN_is_capable(inner)) + outer |= (inner & 3); + return outer; +} + +#define INET_ECN_xmit(sk) do { (sk)->protinfo.af_inet.tos |= 2; } while (0) +#define INET_ECN_dontxmit(sk) do { (sk)->protinfo.af_inet.tos &= ~3; } while (0) + +#define IP6_ECN_flow_init(label) do { \ + (label) &= ~htonl(3<<20); \ + } while (0) + +#define IP6_ECN_flow_xmit(sk, label) do { \ + if (INET_ECN_is_capable((sk)->protinfo.af_inet.tos)) \ + (label) |= __constant_htons(2 << 4); \ + } while (0) + + +#else +#define INET_ECN_is_ce(x...) (0) +#define INET_ECN_is_not_ce(x...) (0) +#define INET_ECN_is_capable(x...) (0) +#define INET_ECN_encapsulate(x, y) (x) +#define IP6_ECN_flow_init(x...) do { } while (0) +#define IP6_ECN_flow_xmit(x...) do { } while (0) +#define INET_ECN_xmit(x...) do { } while (0) +#define INET_ECN_dontxmit(x...) do { } while (0) +#endif + +static inline void IP_ECN_set_ce(struct iphdr *iph) +{ + u32 check = iph->check; + check += __constant_htons(0xFFFE); + iph->check = check + (check>=0xFFFF); + iph->tos |= 1; +} + +struct ipv6hdr; + +static inline void IP6_ECN_set_ce(struct ipv6hdr *iph) +{ + *(u32*)iph |= htonl(1<<20); +} + +#define ip6_get_dsfield(iph) ((ntohs(*(u16*)(iph)) >> 4) & 0xFF) + +#endif diff --git a/include/net/ip.h b/include/net/ip.h index f3ab5ac7b..45a85abfe 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -167,12 +167,14 @@ extern __inline__ int ip_send(struct sk_buff *skb) return ip_finish_output(skb); } +/* The function in 2.2 was invalid, producing wrong result for + * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ extern __inline__ int ip_decrease_ttl(struct iphdr *iph) { u32 check = iph->check; check += __constant_htons(0x0100); - iph->check = check + (check>>16); + iph->check = check + (check>=0xFFFF); return --iph->ttl; } @@ -220,6 +222,8 @@ extern int ip_call_ra_chain(struct sk_buff *skb); */ struct sk_buff *ip_defrag(struct sk_buff *skb); +extern int ip_frag_nqueues; +extern atomic_t ip_frag_mem; /* * Functions provided by ip_forward.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aef0d2619..235ae404d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -4,7 +4,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ipv6.h,v 1.20 2000/02/27 19:51:38 davem Exp $ + * $Id: ipv6.h,v 1.21 2000/07/07 22:29:42 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -189,6 +189,9 @@ extern u8 * ipv6_parse_exthdrs(struct sk_buff **skb, u8 *nhptr); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); +extern int ip6_frag_nqueues; +extern atomic_t ip6_frag_mem; + #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ /* diff --git a/include/net/neighbour.h b/include/net/neighbour.h index b63398881..c8490705f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -259,6 +259,19 @@ __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev if (n || !creat) return n; + n = neigh_create(tbl, pkey, dev); + return IS_ERR(n) ? NULL : n; +} + +extern __inline__ struct neighbour * +__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, + struct net_device *dev) +{ + struct neighbour *n = neigh_lookup(tbl, pkey, dev); + + if (n) + return n; + return neigh_create(tbl, pkey, dev); } diff --git a/include/net/route.h b/include/net/route.h index 180daad87..20536ca63 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -118,6 +118,13 @@ extern __inline__ void ip_rt_put(struct rtable * rt) dst_release(&rt->u.dst); } +#ifdef CONFIG_INET_ECN +#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) +#else +#define IPTOS_RT_MASK IPTOS_TOS_MASK +#endif + + extern __u8 ip_tos2prio[16]; extern __inline__ char rt_tos2priority(u8 tos) diff --git a/include/net/sock.h b/include/net/sock.h index c0451c111..60756db48 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -515,6 +515,8 @@ struct sock { broadcast, bsdism; unsigned char debug; + unsigned char rcvtstamp; + unsigned char userlocks; int proc; unsigned long lingertime; @@ -1222,7 +1224,7 @@ extern __inline__ long sock_sndtimeo(struct sock *sk, int noblock) extern __inline__ int sock_rcvlowat(struct sock *sk, int waitall, int len) { - return waitall ? len : min(sk->rcvlowat, len); + return (waitall ? len : min(sk->rcvlowat, len)) ? : 1; } /* Alas, with timeout socket operations are not restartable. @@ -1233,6 +1235,15 @@ extern __inline__ int sock_intr_errno(long timeo) return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; } +static __inline__ void +sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) +{ + if (sk->rcvtstamp) + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp); + else + sk->stamp = skb->stamp; +} + /* * Enable debug/info messages */ diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 32ce9a7d6..63595484d 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -133,6 +133,7 @@ EXPORT_SYMBOL(get_fs_type); EXPORT_SYMBOL(get_super); EXPORT_SYMBOL(get_empty_super); EXPORT_SYMBOL(getname); +EXPORT_SYMBOL(names_cachep); EXPORT_SYMBOL(_fput); EXPORT_SYMBOL(igrab); EXPORT_SYMBOL(iunique); diff --git a/kernel/sched.c b/kernel/sched.c index 3b007e5e8..dd055d92f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -78,18 +78,20 @@ static union { } aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr +#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule struct kernel_stat kstat = { 0 }; #ifdef CONFIG_SMP #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) -#define can_schedule(p) (!(p)->has_cpu) +#define can_schedule(p,cpu) ((!(p)->has_cpu) && \ + ((p)->cpus_allowed & (1 << cpu))) #else #define idle_task(cpu) (&init_task) -#define can_schedule(p) (1) +#define can_schedule(p,cpu) (1) #endif @@ -119,7 +121,7 @@ static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struc * into account). */ if (p->policy != SCHED_OTHER) { - weight = 1000 + p->rt_priority; + weight = 1000 + 2*DEF_PRIORITY + p->rt_priority; goto out; } @@ -183,87 +185,108 @@ static inline int preemption_goodness(struct task_struct * prev, struct task_str * up unlocking it early, so the caller must not unlock the * runqueue, it's always done by reschedule_idle(). */ -static inline void reschedule_idle(struct task_struct * p, unsigned long flags) +static void reschedule_idle(struct task_struct * p, unsigned long flags) { #ifdef CONFIG_SMP - int this_cpu = smp_processor_id(), target_cpu; - struct task_struct *tsk; - int cpu, best_cpu, i; + int this_cpu = smp_processor_id(); + struct task_struct *tsk, *target_tsk; + int cpu, best_cpu, i, max_prio; + cycles_t oldest_idle; /* * shortcut if the woken up task's last CPU is * idle now. */ best_cpu = p->processor; - tsk = idle_task(best_cpu); - if (cpu_curr(best_cpu) == tsk) - goto send_now; + if (can_schedule(p, best_cpu)) { + tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == tsk) + goto send_now_idle; + + /* + * Maybe this process has enough priority to preempt + * its preferred CPU. (this is a shortcut): + */ + tsk = cpu_curr(best_cpu); + if (preemption_goodness(tsk, p, best_cpu) > 1) + goto preempt_now; + } /* * We know that the preferred CPU has a cache-affine current * process, lets try to find a new idle CPU for the woken-up - * process: + * process. Select the least recently active idle CPU. (that + * one will have the least active cache context.) Also find + * the executing process which has the least priority. */ - for (i = smp_num_cpus - 1; i >= 0; i--) { + oldest_idle = -1ULL; + target_tsk = NULL; + max_prio = 1; + + for (i = 0; i < smp_num_cpus; i++) { cpu = cpu_logical_map(i); - if (cpu == best_cpu) + if (!can_schedule(p, cpu)) continue; tsk = cpu_curr(cpu); /* - * We use the last available idle CPU. This creates + * We use the first available idle CPU. This creates * a priority list between idle CPUs, but this is not * a problem. */ - if (tsk == idle_task(cpu)) - goto send_now; - } - - /* - * No CPU is idle, but maybe this process has enough priority - * to preempt it's preferred CPU. - */ - tsk = cpu_curr(best_cpu); - if (preemption_goodness(tsk, p, best_cpu) > 0) - goto send_now; + if (tsk == idle_task(cpu)) { + if (last_schedule(cpu) < oldest_idle) { + oldest_idle = last_schedule(cpu); + target_tsk = tsk; + } + } else { + if (oldest_idle == -1ULL) { + int prio = preemption_goodness(tsk, p, cpu); - /* - * We will get here often - or in the high CPU contention - * case. No CPU is idle and this process is either lowprio or - * the preferred CPU is highprio. Try to preempt some other CPU - * only if it's RT or if it's iteractive and the preferred - * cpu won't reschedule shortly. - */ - if (p->avg_slice < cacheflush_time || (p->policy & ~SCHED_YIELD) != SCHED_OTHER) { - for (i = smp_num_cpus - 1; i >= 0; i--) { - cpu = cpu_logical_map(i); - if (cpu == best_cpu) - continue; - tsk = cpu_curr(cpu); - if (preemption_goodness(tsk, p, cpu) > 0) - goto send_now; + if (prio > max_prio) { + max_prio = prio; + target_tsk = tsk; + } + } } } + tsk = target_tsk; + if (tsk) { + if (oldest_idle != -1ULL) + goto send_now_idle; + goto preempt_now; + } spin_unlock_irqrestore(&runqueue_lock, flags); return; -send_now: - target_cpu = tsk->processor; +send_now_idle: + /* + * If need_resched == -1 then we can skip sending the IPI + * altogether, tsk->need_resched is actively watched by the + * idle thread. + */ + if (!tsk->need_resched) + smp_send_reschedule(tsk->processor); + tsk->need_resched = 1; + spin_unlock_irqrestore(&runqueue_lock, flags); + return; + +preempt_now: tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); /* * the APIC stuff can go outside of the lock because * it uses no task information, only CPU#. */ - if (target_cpu != this_cpu) - smp_send_reschedule(target_cpu); + if (tsk->processor != this_cpu) + smp_send_reschedule(tsk->processor); return; #else /* UP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; tsk = cpu_curr(this_cpu); - if (preemption_goodness(tsk, p, this_cpu) > 0) + if (preemption_goodness(tsk, p, this_cpu) > 1) tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); #endif @@ -413,10 +436,12 @@ static inline void __schedule_tail(struct task_struct *prev) unsigned long flags; spin_lock_irqsave(&runqueue_lock, flags); + prev->has_cpu = 0; reschedule_idle(prev, flags); // spin_unlocks runqueue + } else { + wmb(); + prev->has_cpu = 0; } - wmb(); - prev->has_cpu = 0; #endif /* CONFIG_SMP */ } @@ -501,7 +526,7 @@ repeat_schedule: still_running_back: list_for_each(tmp, &runqueue_head) { p = list_entry(tmp, struct task_struct, run_list); - if (can_schedule(p)) { + if (can_schedule(p, this_cpu)) { int weight = goodness(p, this_cpu, prev->active_mm); if (weight > c) c = weight, next = p; @@ -540,13 +565,6 @@ still_running_back: t = get_cycles(); this_slice = t - sched_data->last_schedule; sched_data->last_schedule = t; - - /* - * Exponentially fading average calculation, with - * some weight so it doesnt get fooled easily by - * smaller irregularities. - */ - prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2; } /* @@ -641,15 +659,20 @@ scheduling_in_interrupt: return; } -static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync) +static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, + const int sync) { struct list_head *tmp, *head; - struct task_struct *p; + struct task_struct *p, *best_exclusive; unsigned long flags; + int best_cpu, irq; if (!q) goto out; + best_cpu = smp_processor_id(); + irq = in_interrupt(); + best_exclusive = NULL; wq_write_lock_irqsave(&q->lock, flags); #if WAITQUEUE_DEBUG @@ -661,10 +684,13 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con if (!head->next || !head->prev) WQ_BUG(); #endif - list_for_each(tmp, head) { + tmp = head->next; + while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); + tmp = tmp->next; + #if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic); #endif @@ -674,15 +700,37 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con #if WAITQUEUE_DEBUG curr->__waker = (long)__builtin_return_address(0); #endif - if (sync) - wake_up_process_synchronous(p); - else - wake_up_process(p); - if (state & mode & TASK_EXCLUSIVE) - break; + /* + * If waking up from an interrupt context then + * prefer processes which are affine to this + * CPU. + */ + if (irq && (state & mode & TASK_EXCLUSIVE)) { + if (!best_exclusive) + best_exclusive = p; + else if ((p->processor == best_cpu) && + (best_exclusive->processor != best_cpu)) + best_exclusive = p; + } else { + if (sync) + wake_up_process_synchronous(p); + else + wake_up_process(p); + if (state & mode & TASK_EXCLUSIVE) + break; + } } } + if (best_exclusive) + best_exclusive->state = TASK_RUNNING; wq_write_unlock_irqrestore(&q->lock, flags); + + if (best_exclusive) { + if (sync) + wake_up_process_synchronous(best_exclusive); + else + wake_up_process(best_exclusive); + } out: return; } @@ -697,6 +745,7 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode) __wake_up_common(q, mode, 1); } + #define SLEEP_ON_VAR \ unsigned long flags; \ wait_queue_t wait; \ @@ -798,7 +847,7 @@ asmlinkage long sys_nice(int increment) * timeslice instead (default 200 ms). The rounding is * why we want to avoid negative values. */ - newprio = (newprio * DEF_PRIORITY + 10) / 20; + newprio = (newprio * DEF_PRIORITY + 10)/20; increment = newprio; if (increase) increment = -increment; @@ -812,7 +861,7 @@ asmlinkage long sys_nice(int increment) */ newprio = current->priority - increment; if ((signed) newprio < 1) - newprio = 1; + newprio = DEF_PRIORITY/20; if (newprio > DEF_PRIORITY*2) newprio = DEF_PRIORITY*2; current->priority = newprio; @@ -355,7 +355,6 @@ static kmem_cache_t cache_cache = { spinlock: SPIN_LOCK_UNLOCKED, colour_off: L1_CACHE_BYTES, name: "kmem_cache", - next: LIST_HEAD_INIT(cache_cache.next) }; /* Guard access to the cache-chain. */ @@ -412,7 +411,7 @@ void __init kmem_cache_init(void) size_t left_over; init_MUTEX(&cache_chain_sem); - list_add(&cache_cache.next,&cache_chain); + INIT_LIST_HEAD(&cache_chain); kmem_cache_estimate(0, cache_cache.objsize, 0, &left_over, &cache_cache.num); @@ -443,7 +442,7 @@ void __init kmem_cache_sizes_init(void) * eliminates "false sharing". * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */ - sprintf(name,"size-%ld", (unsigned long) sizes->cs_size); + sprintf(name,"size-%Zd",sizes->cs_size); if (!(sizes->cs_cachep = kmem_cache_create(name, sizes->cs_size, 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) { @@ -455,7 +454,7 @@ void __init kmem_cache_sizes_init(void) offslab_limit = sizes->cs_size-sizeof(slab_t); offslab_limit /= 2; } - sprintf(name, "size-%ld(DMA)", (unsigned long) sizes->cs_size); + sprintf(name, "size-%Zd(DMA)",sizes->cs_size); sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0, SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sizes->cs_dmacachep) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 88322c8d6..b0d989516 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -281,25 +281,27 @@ struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey, struct neighbour *n, *n1; u32 hash_val; int key_len = tbl->key_len; + int error; n = neigh_alloc(tbl); if (n == NULL) - return NULL; + return ERR_PTR(-ENOBUFS); memcpy(n->primary_key, pkey, key_len); n->dev = dev; dev_hold(dev); /* Protocol specific setup. */ - if (tbl->constructor && tbl->constructor(n) < 0) { + if (tbl->constructor && (error = tbl->constructor(n)) < 0) { neigh_release(n); - return NULL; + return ERR_PTR(error); } /* Device specific setup. */ - if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) { + if (n->parms && n->parms->neigh_setup && + (error = n->parms->neigh_setup(n)) < 0) { neigh_release(n); - return NULL; + return ERR_PTR(error); } n->confirmed = jiffies - (n->parms->base_reachable_time<<1); @@ -1242,6 +1244,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (nda[NDA_LLADDR-1] != NULL && nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len)) goto out; + err = 0; n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev); if (n) { if (nlh->nlmsg_flags&NLM_F_EXCL) @@ -1249,9 +1252,11 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } else if (!(nlh->nlmsg_flags&NLM_F_CREATE)) err = -ENOENT; else { - n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1); - if (n == NULL) - err = -ENOBUFS; + n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST-1]), dev); + if (IS_ERR(n)) { + err = PTR_ERR(n); + n = NULL; + } } if (err == 0) { err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL, diff --git a/net/core/sock.c b/net/core/sock.c index 4044a7f7d..fcb6246b3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,7 +7,7 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.93 2000/04/13 03:13:29 davem Exp $ + * Version: $Id: sock.c,v 1.95 2000/07/08 00:20:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -308,6 +308,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sock->passcred = valbool; break; + case SO_TIMESTAMP: + sk->rcvtstamp = valbool; + break; + case SO_RCVLOWAT: if (val < 0) val = INT_MAX; @@ -485,7 +489,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_BSDCOMPAT: v.val = sk->bsdism; break; - + + case SO_TIMESTAMP: + v.val = sk->rcvtstamp; + break; + case SO_RCVTIMEO: lv=sizeof(struct timeval); if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) { @@ -599,7 +607,16 @@ void __init sk_init(void) { sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0, SLAB_HWCACHE_ALIGN, 0, 0); - + + if (num_physpages <= 4096) { + sysctl_wmem_max = 32767; + sysctl_rmem_max = 32767; + sysctl_wmem_default = 32767; + sysctl_wmem_default = 32767; + } else if (num_physpages >= 131072) { + sysctl_wmem_max = 131071; + sysctl_rmem_max = 131071; + } } /* diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ce55ebb2..d97558a24 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -789,13 +789,14 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->router && ((neigh = neigh_clone(dn_db->router)) != NULL)) goto add_entry; - if ((neigh = neigh_create(&dn_neigh_table, &cb->src, dev)) != NULL) { + neigh = neigh_create(&dn_neigh_table, &cb->src, dev); + if (!IS_ERR(neigh)) { if (dev->type == ARPHRD_ETHER) memcpy(neigh->ha, skb->mac.ethernet->h_source, ETH_ALEN); goto add_entry; } - return -ENOBUFS; + return PTR_ERR(neigh); non_local_input: diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in index 68fea0272..7a44fa565 100644 --- a/net/ipv4/Config.in +++ b/net/ipv4/Config.in @@ -44,15 +44,8 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then bool ' IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD fi fi +bool ' IP: TCP Explicit Congestion Notification support' CONFIG_INET_ECN bool ' IP: TCP syncookie support (disabled per default)' CONFIG_SYN_COOKIES -comment '(it is safe to leave these untouched)' -#bool ' IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP -#bool ' IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY -#bool ' IP: Disable NAGLE algorithm (normally enabled)' CONFIG_TCP_NAGLE_OFF -bool ' IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB_LARGE -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -# bool ' IP: support checksum copy to user for UDP (EXPERIMENTAL)' CONFIG_UDP_DELAY_CSUM -#fi if [ "$CONFIG_NETFILTER" != "n" ]; then source net/ipv4/netfilter/Config.in fi diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9def6b16b..81d8ebe80 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,6 +1,6 @@ /* linux/net/inet/arp.c * - * Version: $Id: arp.c,v 1.86 2000/04/26 09:36:36 davem Exp $ + * Version: $Id: arp.c,v 1.87 2000/07/07 22:40:35 davem Exp $ * * Copyright (C) 1994 by Florian La Roche * @@ -424,20 +424,24 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) int arp_bind_neighbour(struct dst_entry *dst) { struct net_device *dev = dst->dev; + struct neighbour *n = dst->neighbour; if (dev == NULL) - return 0; - if (dst->neighbour == NULL) { + return -EINVAL; + if (n == NULL) { u32 nexthop = ((struct rtable*)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; - dst->neighbour = __neigh_lookup( + n = __neigh_lookup_errno( #ifdef CONFIG_ATM_CLIP dev->type == ARPHRD_ATM ? &clip_tbl : #endif - &arp_tbl, &nexthop, dev, 1); + &arp_tbl, &nexthop, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + dst->neighbour = n; } - return (dst->neighbour != NULL); + return 0; } /* @@ -847,9 +851,9 @@ int arp_req_set(struct arpreq *r, struct net_device * dev) if (r->arp_ha.sa_family != dev->type) return -EINVAL; - err = -ENOBUFS; - neigh = __neigh_lookup(&arp_tbl, &ip, dev, 1); - if (neigh) { + neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); + err = PTR_ERR(neigh); + if (!IS_ERR(neigh)) { unsigned state = NUD_STALE; if (r->arp_flags & ATF_PERM) state = NUD_PERMANENT; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 70f8cfb90..07041a3e5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.49 2000/04/15 01:48:10 davem Exp $ + * Version: $Id: ip_fragment.c,v 1.50 2000/07/07 22:29:42 davem Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> @@ -18,6 +18,7 @@ * Ultima : ip_expire() kernel panic. * Bill Hawes : Frag accounting and evictor fixes. * John McDonald : 0 length frag bug. + * Alexey Kuznetsov: SMP races, threading, cleanup. */ #include <linux/config.h> @@ -31,11 +32,17 @@ #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> +#include <net/checksum.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> #include <linux/netfilter_ipv4.h> +/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 + * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c + * as well. Or notify me, at least. --ANK + */ + /* Fragment cache limits. We will commit 256K at one time. Should we * cross that limit we will prune down to 192K. This should cope with * even the most extreme cases without allowing an attacker to measurably @@ -46,38 +53,77 @@ int sysctl_ipfrag_low_thresh = 192*1024; int sysctl_ipfrag_time = IP_FRAG_TIME; -/* Describe an IP fragment. */ -struct ipfrag { - int offset; /* offset of fragment in IP datagram */ - int end; /* last byte of data in datagram */ - int len; /* length of this fragment */ - struct sk_buff *skb; /* complete received fragment */ - unsigned char *ptr; /* pointer into real fragment data */ - struct ipfrag *next; /* linked list pointers */ - struct ipfrag *prev; +struct ipfrag_skb_cb +{ + struct inet_skb_parm h; + int offset; }; +#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) + /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct iphdr *iph; /* pointer to IP header */ struct ipq *next; /* linked list pointers */ - struct ipfrag *fragments; /* linked list of received fragments */ + u32 saddr; + u32 daddr; + u16 id; + u8 protocol; + u8 last_in; +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 + + struct sk_buff *fragments; /* linked list of received fragments */ int len; /* total length of original datagram */ - short ihlen; /* length of the IP header */ + int meat; + spinlock_t lock; + atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ struct ipq **pprev; - struct net_device *dev; /* Device - for icmp replies */ + struct net_device *dev; /* Device - for icmp replies */ }; +/* Hash table. */ + #define IPQ_HASHSZ 64 +/* Per-bucket lock is easy to add now. */ static struct ipq *ipq_hash[IPQ_HASHSZ]; -static spinlock_t ipfrag_lock = SPIN_LOCK_UNLOCKED; +static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED; +int ip_frag_nqueues = 0; -#define ipqhashfn(id, saddr, daddr, prot) \ - ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1)) +static __inline__ void __ipq_unlink(struct ipq *qp) +{ + if(qp->next) + qp->next->pprev = qp->pprev; + *qp->pprev = qp->next; + ip_frag_nqueues--; +} + +static __inline__ void ipq_unlink(struct ipq *ipq) +{ + write_lock(&ipfrag_lock); + __ipq_unlink(ipq); + write_unlock(&ipfrag_lock); +} -static atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +/* + * Was: ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1)) + * + * I see, I see evil hand of bigendian mafia. On Intel all the packets hit + * one hash bucket with this hash function. 8) + */ +static __inline__ unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) +{ + unsigned int h = saddr ^ daddr; + + h ^= (h>>16)^id; + h ^= (h>>8)^prot; + return h & (IPQ_HASHSZ - 1); +} + + +atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ /* Memory Tracking Functions. */ extern __inline__ void frag_kfree_skb(struct sk_buff *skb) @@ -86,112 +132,106 @@ extern __inline__ void frag_kfree_skb(struct sk_buff *skb) kfree_skb(skb); } -extern __inline__ void frag_kfree_s(void *ptr, int len) +extern __inline__ void frag_free_queue(struct ipq *qp) { - atomic_sub(len, &ip_frag_mem); - kfree(ptr); + atomic_sub(sizeof(struct ipq), &ip_frag_mem); + kfree(qp); } - -extern __inline__ void *frag_kmalloc(int size, int pri) + +extern __inline__ struct ipq *frag_alloc_queue(void) { - void *vp = kmalloc(size, pri); + struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); - if(!vp) + if(!qp) return NULL; - atomic_add(size, &ip_frag_mem); - return vp; + atomic_add(sizeof(struct ipq), &ip_frag_mem); + return qp; } - -/* Create a new fragment entry. */ -static struct ipfrag *ip_frag_create(int offset, int end, - struct sk_buff *skb, unsigned char *ptr) + + +/* Destruction primitives. */ + +/* Complete destruction of ipq. */ +static void ip_frag_destroy(struct ipq *qp) { - struct ipfrag *fp; + struct sk_buff *fp; - fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); - if (fp == NULL) - goto out_nomem; + BUG_TRAP(qp->last_in&COMPLETE); + BUG_TRAP(del_timer(&qp->timer) == 0); - /* Fill in the structure. */ - fp->offset = offset; - fp->end = end; - fp->len = end - offset; - fp->skb = skb; - fp->ptr = ptr; - fp->next = fp->prev = NULL; - - /* Charge for the SKB as well. */ - atomic_add(skb->truesize, &ip_frag_mem); + /* Release all fragment data. */ + fp = qp->fragments; + while (fp) { + struct sk_buff *xp = fp->next; - return(fp); + frag_kfree_skb(fp); + fp = xp; + } -out_nomem: - NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n")); - return(NULL); + /* Finally, release the queue descriptor itself. */ + frag_free_queue(qp); } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and return the queue entry address if found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst) +static __inline__ void ipq_put(struct ipq *ipq) { - __u16 id = iph->id; - __u32 saddr = iph->saddr; - __u32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); - struct ipq *qp; - - /* We are always in BH context, and protected by the - * ipfrag lock. - */ - for(qp = ipq_hash[hash]; qp; qp = qp->next) { - if(qp->iph->id == id && - qp->iph->saddr == saddr && - qp->iph->daddr == daddr && - qp->iph->protocol == protocol) { - del_timer(&qp->timer); - break; - } - } - return qp; + if (atomic_dec_and_test(&ipq->refcnt)) + ip_frag_destroy(ipq); } -/* Remove an entry from the "incomplete datagrams" queue, either - * because we completed, reassembled and processed it, or because - * it timed out. - * - * This is called _only_ from BH contexts with the ipfrag lock held, - * on packet reception processing and from frag queue expiration - * timers. -DaveM +/* Kill ipq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. */ -static void ip_free(struct ipq *qp) +static __inline__ void ipq_kill(struct ipq *ipq) { - struct ipfrag *fp; - - /* Stop the timer for this entry. */ - del_timer(&qp->timer); - - /* Remove this entry from the "incomplete datagrams" queue. */ - if(qp->next) - qp->next->pprev = qp->pprev; - *qp->pprev = qp->next; - - /* Release all fragment data. */ - fp = qp->fragments; - while (fp) { - struct ipfrag *xp = fp->next; + if (del_timer(&ipq->timer)) + atomic_dec(&ipq->refcnt); - frag_kfree_skb(fp->skb); - frag_kfree_s(fp, sizeof(struct ipfrag)); - fp = xp; + if (!(ipq->last_in & COMPLETE)) { + ipq_unlink(ipq); + atomic_dec(&ipq->refcnt); + ipq->last_in |= COMPLETE; } +} - /* Release the IP header. */ - frag_kfree_s(qp->iph, 64 + 8); +/* Memory limiting on fragments. Evictor trashes the oldest + * fragment queue until we are back under the low threshold. + */ +static void ip_evictor(void) +{ + int i, progress; - /* Finally, release the queue descriptor itself. */ - frag_kfree_s(qp, sizeof(struct ipq)); + do { + if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh) + return; + progress = 0; + /* FIXME: Make LRU queue of frag heads. -DaveM */ + for (i = 0; i < IPQ_HASHSZ; i++) { + struct ipq *qp; + if (ipq_hash[i] == NULL) + continue; + + write_lock(&ipfrag_lock); + if ((qp = ipq_hash[i]) != NULL) { + /* find the oldest queue for this hash bucket */ + while (qp->next) + qp = qp->next; + __ipq_unlink(qp); + write_unlock(&ipfrag_lock); + + spin_lock(&qp->lock); + if (del_timer(&qp->timer)) + atomic_dec(&qp->refcnt); + qp->last_in |= COMPLETE; + spin_unlock(&qp->lock); + + ipq_put(qp); + IP_INC_STATS_BH(IpReasmFails); + progress = 1; + continue; + } + write_unlock(&ipfrag_lock); + } + } while (progress); } /* @@ -201,181 +241,310 @@ static void ip_expire(unsigned long arg) { struct ipq *qp = (struct ipq *) arg; - spin_lock(&ipfrag_lock); - if(!qp->fragments) - { -#ifdef IP_EXPIRE_DEBUG - printk("warning: possible ip-expire attack\n"); -#endif + spin_lock(&qp->lock); + + if (qp->last_in & COMPLETE) goto out; - } - - /* Send an ICMP "Fragment Reassembly Timeout" message. */ + + ipq_kill(qp); + IP_INC_STATS_BH(IpReasmTimeout); IP_INC_STATS_BH(IpReasmFails); - icmp_send(qp->fragments->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { + /* Send an ICMP "Fragment Reassembly Timeout" message. */ + icmp_send(qp->fragments, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + } out: - /* Nuke the fragment queue. */ - ip_free(qp); - spin_unlock(&ipfrag_lock); + spin_unlock(&qp->lock); + ipq_put(qp); } -/* Memory limiting on fragments. Evictor trashes the oldest - * fragment queue until we are back under the low threshold. - * - * We are always called in BH with the ipfrag lock held. - */ -static void ip_evictor(void) +/* Creation primitives. */ + +static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) { - int i, progress; + struct ipq *qp; -restart: - progress = 0; - /* FIXME: Make LRU queue of frag heads. -DaveM */ - for (i = 0; i < IPQ_HASHSZ; i++) { - struct ipq *qp; - if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh) - return; - qp = ipq_hash[i]; - if (qp) { - /* find the oldest queue for this hash bucket */ - while (qp->next) - qp = qp->next; - ip_free(qp); - progress = 1; + write_lock(&ipfrag_lock); +#ifdef CONFIG_SMP + /* With SMP race we have to recheck hash table, because + * such entry could be created on other cpu, while we + * promoted read lock to write lock. + */ + for(qp = ipq_hash[hash]; qp; qp = qp->next) { + if(qp->id == qp_in->id && + qp->saddr == qp_in->saddr && + qp->daddr == qp_in->daddr && + qp->protocol == qp_in->protocol) { + atomic_inc(&qp->refcnt); + write_unlock(&ipfrag_lock); + qp_in->last_in |= COMPLETE; + ipq_put(qp_in); + return qp; } } - if (progress) - goto restart; - panic("ip_evictor: memcount"); +#endif + qp = qp_in; + + atomic_inc(&qp->refcnt); + if((qp->next = ipq_hash[hash]) != NULL) + qp->next->pprev = &qp->next; + ipq_hash[hash] = qp; + qp->pprev = &ipq_hash[hash]; + ip_frag_nqueues++; + write_unlock(&ipfrag_lock); + return qp; } -/* Add an entry to the 'ipq' queue for a newly received IP datagram. - * We will (hopefully :-) receive all other fragments of this datagram - * in time, so we just create a queue for this datagram, in which we - * will insert the received fragments at their respective positions. - */ -static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) +/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ +static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) { struct ipq *qp; - unsigned int hash; - int ihlen; - qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC); - if (qp == NULL) + if ((qp = frag_alloc_queue()) == NULL) goto out_nomem; - /* Allocate memory for the IP header (plus 8 octets for ICMP). */ - ihlen = iph->ihl * 4; - - qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC); - if (qp->iph == NULL) - goto out_free; - - memcpy(qp->iph, iph, ihlen + 8); + qp->protocol = iph->protocol; + qp->last_in = 0; + qp->id = iph->id; + qp->saddr = iph->saddr; + qp->daddr = iph->daddr; qp->len = 0; - qp->ihlen = ihlen; + qp->meat = 0; qp->fragments = NULL; - qp->dev = skb->dev; /* Initialize a timer for this entry. */ init_timer(&qp->timer); - qp->timer.expires = 0; /* (to be set later) */ qp->timer.data = (unsigned long) qp; /* pointer to queue */ qp->timer.function = ip_expire; /* expire function */ + qp->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&qp->refcnt, 1); - /* Add this entry to the queue. */ - hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); + return ip_frag_intern(hash, qp); - /* In a BH context and ipfrag lock is held. -DaveM */ - if((qp->next = ipq_hash[hash]) != NULL) - qp->next->pprev = &qp->next; - ipq_hash[hash] = qp; - qp->pprev = &ipq_hash[hash]; +out_nomem: + NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); + return NULL; +} - return qp; +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph) +{ + __u16 id = iph->id; + __u32 saddr = iph->saddr; + __u32 daddr = iph->daddr; + __u8 protocol = iph->protocol; + unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); + struct ipq *qp; -out_free: - frag_kfree_s(qp, sizeof(struct ipq)); -out_nomem: - NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); - return(NULL); + read_lock(&ipfrag_lock); + for(qp = ipq_hash[hash]; qp; qp = qp->next) { + if(qp->id == id && + qp->saddr == saddr && + qp->daddr == daddr && + qp->protocol == protocol) { + atomic_inc(&qp->refcnt); + read_unlock(&ipfrag_lock); + return qp; + } + } + read_unlock(&ipfrag_lock); + + return ip_frag_create(hash, iph); } -/* See if a fragment queue is complete. */ -static int ip_done(struct ipq *qp) +/* Add new segment to existing queue. */ +static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { - struct ipfrag *fp; - int offset; + struct iphdr *iph = skb->nh.iph; + struct sk_buff *prev, *next; + int flags, offset; + int ihl, end; - /* Only possible if we received the final fragment. */ - if (qp->len == 0) - return 0; + if (qp->last_in & COMPLETE) + goto err; - /* Check all fragment offsets to see if they connect. */ - fp = qp->fragments; - offset = 0; - while (fp) { - if (fp->offset > offset) - return(0); /* fragment(s) missing */ - offset = fp->end; - fp = fp->next; + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) + atomic_inc(&qp->refcnt); + + offset = ntohs(iph->frag_off); + flags = offset & ~IP_OFFSET; + offset &= IP_OFFSET; + offset <<= 3; /* offset is in 8-byte chunks */ + ihl = iph->ihl * 4; + + /* Determine the position of this fragment. */ + end = offset + (ntohs(iph->tot_len) - ihl); + + /* Is this the final fragment? */ + if ((flags & IP_MF) == 0) { + /* If we already have some bits beyond end + * or have different end, the segment is corrrupted. + */ + if (end < qp->len || + ((qp->last_in & LAST_IN) && end != qp->len)) + goto err; + qp->last_in |= LAST_IN; + qp->len = end; + } else { + if (end&7) { + end &= ~7; + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + if (end > qp->len) { + /* Some bits beyond end -> corruption. */ + if (qp->last_in & LAST_IN) + goto err; + qp->len = end; + } } + if (end == offset) + goto err; + + /* Point into the IP datagram 'data' part. */ + skb_pull(skb, (skb->nh.raw+ihl) - skb->data); + skb_trim(skb, end - offset); + + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for(next = qp->fragments; next != NULL; next = next->next) { + if (FRAG_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; + } + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (FRAG_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) + goto err; + skb_pull(skb, i); + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + } + + while (next && FRAG_CB(next)->offset < end) { + int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + FRAG_CB(next)->offset += i; + skb_pull(next, i); + qp->meat -= i; + if (next->ip_summed != CHECKSUM_UNNECESSARY) + next->ip_summed = CHECKSUM_NONE; + break; + } else { + struct sk_buff *free_it = next; - /* All fragments are present. */ - return 1; + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + qp->fragments = next; + + qp->meat -= free_it->len; + frag_kfree_skb(free_it); + } + } + + FRAG_CB(skb)->offset = offset; + + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + qp->fragments = skb; + + qp->dev = skb->dev; + qp->meat += skb->len; + atomic_add(skb->truesize, &ip_frag_mem); + if (offset == 0) + qp->last_in |= FIRST_IN; + + return; + +err: + kfree_skb(skb); } + /* Build a new IP datagram from all its fragments. * * FIXME: We copy here because we lack an effective way of handling lists * of bits on input. Until the new skb data handling is in I'm not going * to touch this with a bargepole. */ -static struct sk_buff *ip_glue(struct ipq *qp) +static struct sk_buff *ip_frag_reasm(struct ipq *qp) { struct sk_buff *skb; struct iphdr *iph; - struct ipfrag *fp; - unsigned char *ptr; - int count, len; + struct sk_buff *fp, *head = qp->fragments; + int len; + int ihlen; + + ipq_kill(qp); + + BUG_TRAP(head != NULL); + BUG_TRAP(FRAG_CB(head)->offset == 0); /* Allocate a new buffer for the datagram. */ - len = qp->ihlen + qp->len; - + ihlen = head->nh.iph->ihl*4; + len = ihlen + qp->len; + if(len > 65535) goto out_oversize; - + skb = dev_alloc_skb(len); if (!skb) goto out_nomem; /* Fill in the basic details. */ - skb->mac.raw = ptr = skb->data; - skb->nh.iph = iph = (struct iphdr *) skb_put(skb, len); + skb->mac.raw = skb->data; + skb->nh.raw = skb->data; + FRAG_CB(skb)->h = FRAG_CB(head)->h; + skb->ip_summed = head->ip_summed; + skb->csum = 0; /* Copy the original IP headers into the new buffer. */ - memcpy(ptr, qp->iph, qp->ihlen); - ptr += qp->ihlen; + memcpy(skb_put(skb, ihlen), head->nh.iph, ihlen); /* Copy the data portions of all fragments into the new buffer. */ - fp = qp->fragments; - count = qp->ihlen; - while(fp) { - if ((fp->len <= 0) || ((count + fp->len) > skb->len)) - goto out_invalid; - memcpy((ptr + fp->offset), fp->ptr, fp->len); - if (count == qp->ihlen) { - skb->dst = dst_clone(fp->skb->dst); - skb->dev = fp->skb->dev; - } - count += fp->len; - fp = fp->next; + for (fp=head; fp; fp = fp->next) { + memcpy(skb_put(skb, fp->len), fp->data, fp->len); + + if (skb->ip_summed != fp->ip_summed) + skb->ip_summed = CHECKSUM_NONE; + else if (skb->ip_summed == CHECKSUM_HW) + skb->csum = csum_chain(skb->csum, fp->csum); } - skb->pkt_type = qp->fragments->skb->pkt_type; - skb->protocol = qp->fragments->skb->protocol; + skb->dst = dst_clone(head->dst); + skb->pkt_type = head->pkt_type; + skb->protocol = head->protocol; + skb->dev = qp->dev; + /* * Clearly bogus, because security markings of the individual * fragments should have been checked for consistency before @@ -385,29 +554,24 @@ static struct sk_buff *ip_glue(struct ipq *qp) * as well take the value associated with the first fragment. * --rct */ - skb->security = qp->fragments->skb->security; + skb->security = head->security; #ifdef CONFIG_NETFILTER /* Connection association is same as fragment (if any). */ - skb->nfct = qp->fragments->skb->nfct; + skb->nfct = head->nfct; nf_conntrack_get(skb->nfct); #ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = qp->fragments->skb->nf_debug; + skb->nf_debug = head->nf_debug; #endif #endif /* Done with all fragments. Fixup the new IP header. */ iph = skb->nh.iph; iph->frag_off = 0; - iph->tot_len = htons(count); + iph->tot_len = htons(len); IP_INC_STATS_BH(IpReasmOKs); return skb; -out_invalid: - NETDEBUG(printk(KERN_ERR - "Invalid fragment list: Fragment over size.\n")); - kfree_skb(skb); - goto out_fail; out_nomem: NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", @@ -417,7 +581,7 @@ out_oversize: if (net_ratelimit()) printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", - NIPQUAD(qp->iph->saddr)); + NIPQUAD(qp->saddr)); out_fail: IP_INC_STATS_BH(IpReasmFails); return NULL; @@ -427,185 +591,32 @@ out_fail: struct sk_buff *ip_defrag(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; - struct ipfrag *prev, *next, *tmp, *tfp; struct ipq *qp; - unsigned char *ptr; - int flags, offset; - int i, ihl, end; IP_INC_STATS_BH(IpReasmReqds); - spin_lock(&ipfrag_lock); - /* Start by cleaning up the memory. */ if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) ip_evictor(); - /* - * Look for the entry for this IP datagram in the - * "incomplete datagrams" queue. If found, the - * timer is removed. - */ - qp = ip_find(iph, skb->dst); - - /* Is this a non-fragmented datagram? */ - offset = ntohs(iph->frag_off); - flags = offset & ~IP_OFFSET; - offset &= IP_OFFSET; + /* Lookup (or create) queue header */ + if ((qp = ip_find(iph)) != NULL) { + struct sk_buff *ret = NULL; - offset <<= 3; /* offset is in 8-byte chunks */ - ihl = iph->ihl * 4; + spin_lock(&qp->lock); - /* - * Check whether to create a fresh queue entry. If the - * queue already exists, its timer will be restarted as - * long as we continue to receive fragments. - */ - if (qp) { - /* ANK. If the first fragment is received, - * we should remember the correct IP header (with options) - */ - if (offset == 0) { - /* Fragmented frame replaced by unfragmented copy? */ - if ((flags & IP_MF) == 0) - goto out_freequeue; - qp->ihlen = ihl; - memcpy(qp->iph, iph, (ihl + 8)); - } - } else { - /* Fragmented frame replaced by unfragmented copy? */ - if ((offset == 0) && ((flags & IP_MF) == 0)) - goto out_skb; - - /* If we failed to create it, then discard the frame. */ - qp = ip_create(skb, iph); - if (!qp) - goto out_freeskb; - } - - /* Attempt to construct an oversize packet. */ - if((ntohs(iph->tot_len) + ((int) offset)) > 65535) - goto out_oversize; + ip_frag_queue(qp, skb); - /* Determine the position of this fragment. */ - end = offset + ntohs(iph->tot_len) - ihl; + if (qp->last_in == (FIRST_IN|LAST_IN) && + qp->meat == qp->len) + ret = ip_frag_reasm(qp); - /* Is this the final fragment? */ - if ((flags & IP_MF) == 0) - qp->len = end; - - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = NULL; - for(next = qp->fragments; next != NULL; next = next->next) { - if (next->offset >= offset) - break; /* bingo! */ - prev = next; + spin_unlock(&qp->lock); + ipq_put(qp); + return ret; } - /* Point into the IP datagram 'data' part. */ - ptr = skb->data + ihl; - - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if ((prev != NULL) && (offset < prev->end)) { - i = prev->end - offset; - offset += i; /* ptr into datagram */ - ptr += i; /* ptr into fragment data */ - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - for (tmp = next; tmp != NULL; tmp = tfp) { - tfp = tmp->next; - if (tmp->offset >= end) - break; /* no overlaps at all */ - - i = end - next->offset; /* overlap is 'i' bytes */ - tmp->len -= i; /* so reduce size of */ - tmp->offset += i; /* next fragment */ - tmp->ptr += i; - - /* If we get a frag size of <= 0, remove it and the packet - * that it goes with. - */ - if (tmp->len <= 0) { - if (tmp->prev != NULL) - tmp->prev->next = tmp->next; - else - qp->fragments = tmp->next; - - if (tmp->next != NULL) - tmp->next->prev = tmp->prev; - - /* We have killed the original next frame. */ - next = tfp; - - frag_kfree_skb(tmp->skb); - frag_kfree_s(tmp, sizeof(struct ipfrag)); - } - } - - /* - * Create a fragment to hold this skb. - * No memory to save the fragment? throw the lot ... - */ - tfp = ip_frag_create(offset, end, skb, ptr); - if (!tfp) - goto out_freeskb; - - /* Insert this fragment in the chain of fragments. */ - tfp->prev = prev; - tfp->next = next; - if (prev != NULL) - prev->next = tfp; - else - qp->fragments = tfp; - - if (next != NULL) - next->prev = tfp; - - /* OK, so we inserted this new fragment into the chain. - * Check if we now have a full IP datagram which we can - * bump up to the IP layer... - */ - if (ip_done(qp)) { - /* Glue together the fragments. */ - skb = ip_glue(qp); - /* Free the queue entry. */ -out_freequeue: - ip_free(qp); -out_skb: - spin_unlock(&ipfrag_lock); - return skb; - } - - /* - * The queue is still active ... reset its timer. - */ -out_timer: - mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time); /* ~ 30 seconds */ -out: - spin_unlock(&ipfrag_lock); - return NULL; - - /* - * Error exits ... we need to reset the timer if there's a queue. - */ -out_oversize: - if (net_ratelimit()) - printk(KERN_INFO "Oversized packet received from %u.%u.%u.%u\n", - NIPQUAD(iph->saddr)); - /* the skb isn't in a fragment, so fall through to free it */ -out_freeskb: - kfree_skb(skb); IP_INC_STATS_BH(IpReasmFails); - if (qp) - goto out_timer; - goto out; + kfree_skb(skb); + return NULL; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 79dc3d629..a316401b0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -36,6 +36,7 @@ #include <net/ipip.h> #include <net/arp.h> #include <net/checksum.h> +#include <net/inet_ecn.h> #ifdef CONFIG_IPV6 #include <net/ipv6.h> @@ -119,11 +120,11 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_fb_tunnel_init(struct net_device *dev); static struct net_device ipgre_fb_tunnel_dev = { - "gre%d", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init, + "gre0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init, }; static struct ip_tunnel ipgre_fb_tunnel = { - NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre%d", } + NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", } }; /* Tunnel hash table */ @@ -530,6 +531,34 @@ out: #endif } +static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos)) { + if (skb->protocol == __constant_htons(ETH_P_IP)) { + if (INET_ECN_is_not_ce(skb->nh.iph->tos)) + IP_ECN_set_ce(skb->nh.iph); + } else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + if (INET_ECN_is_not_ce(ip6_get_dsfield(skb->nh.ipv6h))) + IP6_ECN_set_ce(skb->nh.ipv6h); + } + } +} + +static inline u8 +ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) +{ +#ifdef CONFIG_INET_ECN + u8 inner = 0; + if (skb->protocol == __constant_htons(ETH_P_IP)) + inner = old_iph->tos; + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + inner = ip6_get_dsfield((struct ipv6hdr*)old_iph); + return INET_ECN_encapsulate(tos, inner); +#else + return tos; +#endif +} + int ipgre_rcv(struct sk_buff *skb, unsigned short len) { struct iphdr *iph = skb->nh.iph; @@ -604,6 +633,7 @@ int ipgre_rcv(struct sk_buff *skb, unsigned short len) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif + ipgre_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -638,6 +668,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int gre_hlen; u32 dst; int mtu; + int err; if (tunnel->recursion++) { tunnel->stat.collisions++; @@ -789,7 +820,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; iph->protocol = IPPROTO_GRE; - iph->tos = tos; + iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; @@ -834,10 +865,17 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->nfct = NULL; #endif + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); + if(err < 0) { + if(net_ratelimit()) + printk(KERN_ERR "ipgre_tunnel_xmit: ip_send() failed, err=%d\n", -err); + skb = NULL; + goto tx_error; + } + stats->tx_bytes += skb->len; stats->tx_packets++; - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - do_ip_send); tunnel->recursion--; return 0; @@ -846,7 +884,8 @@ tx_error_icmp: tx_error: stats->tx_errors++; - dev_kfree_skb(skb); + if(skb) + dev_kfree_skb(skb); tunnel->recursion--; return 0; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 90b74447f..aea8b9370 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,7 +5,7 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.47 2000/01/16 05:11:23 davem Exp $ + * Version: $Id: ip_sockglue.c,v 1.49 2000/07/08 00:20:43 davem Exp $ * * Authors: see ip.c * @@ -327,6 +327,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) if (err) goto out_free_skb; + sock_recv_timestamp(msg, sk, skb); + serr = SKB_EXT_ERR(skb); sin = (struct sockaddr_in *)msg->msg_name; @@ -462,8 +464,15 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt break; case IP_TOS: /* This sets both TOS and Precedence */ /* Reject setting of unused bits */ +#ifndef CONFIG_INET_ECN if (val & ~(IPTOS_TOS_MASK|IPTOS_PREC_MASK)) goto e_inval; +#else + if (sk->type == SOCK_STREAM) { + val &= ~3; + val |= sk->protinfo.af_inet.tos & 3; + } +#endif if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !capable(CAP_NET_ADMIN)) { err = -EPERM; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e343f34e8..1177033ca 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.35 2000/07/07 01:55:20 davem Exp $ + * Version: $Id: ipip.c,v 1.37 2000/07/07 23:47:45 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 @@ -114,6 +114,7 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/ipip.h> +#include <net/inet_ecn.h> #define HASH_SIZE 16 #define HASH(addr) ((addr^(addr>>4))&0xF) @@ -122,11 +123,11 @@ static int ipip_fb_tunnel_init(struct net_device *dev); static int ipip_tunnel_init(struct net_device *dev); static struct net_device ipip_fb_tunnel_dev = { - "tunl%d", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init, + "tunl0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init, }; static struct ip_tunnel ipip_fb_tunnel = { - NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl%d", } + NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl0", } }; static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; @@ -465,6 +466,13 @@ out: #endif } +static inline void ipip_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos) && + INET_ECN_is_not_ce(skb->nh.iph->tos)) + IP_ECN_set_ce(iph); +} + int ipip_rcv(struct sk_buff *skb, unsigned short len) { struct iphdr *iph; @@ -489,6 +497,7 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif + ipip_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipip_lock); return 0; @@ -525,6 +534,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int max_headroom; /* The extra header space needed */ u32 dst = tiph->daddr; int mtu; + int err; if (tunnel->recursion++) { tunnel->stat.collisions++; @@ -620,7 +630,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; - iph->tos = tos; + iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; @@ -636,10 +646,17 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->nfct = NULL; #endif + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); + if(err < 0) { + if(net_ratelimit()) + printk(KERN_ERR "ipip_tunnel_xmit: ip_send() failed, err=%d\n", -err); + skb = NULL; + goto tx_error; + } + stats->tx_bytes += skb->len; stats->tx_packets++; - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - do_ip_send); tunnel->recursion--; return 0; @@ -647,7 +664,8 @@ tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; - dev_kfree_skb(skb); + if(skb) + dev_kfree_skb(skb); tunnel->recursion--; return 0; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4d94a4cc0..f1ff8f1ee 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -7,7 +7,7 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.42 2000/04/16 01:11:37 davem Exp $ + * Version: $Id: proc.c,v 1.43 2000/07/07 22:29:42 davem Exp $ * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> @@ -78,6 +78,8 @@ int afinet_get_info(char *buffer, char **start, off_t offset, int length) fold_prot_inuse(&udp_prot)); len += sprintf(buffer+len,"RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); + len += sprintf(buffer+len, "FRAG: inuse %d memory %d\n", + ip_frag_nqueues, atomic_read(&ip_frag_mem)); if (offset >= len) { *start = buffer; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6e3f19287..5ac30dc40 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,7 +5,7 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.50 2000/05/03 06:37:06 davem Exp $ + * Version: $Id: raw.c,v 1.52 2000/07/08 00:20:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -502,7 +502,7 @@ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (err) goto done; - sk->stamp=skb->stamp; + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ if (sin) { @@ -580,6 +580,36 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } +static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch(cmd) { + case SIOCOUTQ: + { + int amount = atomic_read(&sk->wmem_alloc); + return put_user(amount, (int *)arg); + } + case SIOCINQ: + { + struct sk_buff *skb; + int amount = 0; + + spin_lock_irq(&sk->receive_queue.lock); + skb = skb_peek(&sk->receive_queue); + if (skb != NULL) + amount = skb->len; + spin_unlock_irq(&sk->receive_queue.lock); + return put_user(amount, (int *)arg); + } + + default: +#ifdef CONFIG_IP_MROUTE + return ipmr_ioctl(sk, cmd, arg); +#else + return -ENOIOCTLCMD; +#endif + } +} + static void get_raw_sock(struct sock *sp, char *tmpbuf, int i) { unsigned int dest, src; @@ -648,9 +678,7 @@ struct proto raw_prot = { close: raw_close, connect: udp_connect, disconnect: udp_disconnect, -#ifdef CONFIG_IP_MROUTE - ioctl: ipmr_ioctl, -#endif + ioctl: raw_ioctl, init: raw_init, setsockopt: raw_setsockopt, getsockopt: raw_getsockopt, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ec254e313..eb00518bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.86 2000/04/24 07:03:14 davem Exp $ + * Version: $Id: route.c,v 1.88 2000/07/07 23:47:45 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -152,23 +152,29 @@ struct dst_ops ipv4_dst_ops = sizeof(struct rtable), }; +#ifdef CONFIG_INET_ECN +#define ECN_OR_COST(class) TC_PRIO_##class +#else +#define ECN_OR_COST(class) TC_PRIO_FILLER +#endif + __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, - TC_PRIO_FILLER, + ECN_OR_COST(FILLER), TC_PRIO_BESTEFFORT, - TC_PRIO_FILLER, + ECN_OR_COST(BESTEFFORT), TC_PRIO_BULK, - TC_PRIO_FILLER, + ECN_OR_COST(BULK), TC_PRIO_BULK, - TC_PRIO_FILLER, + ECN_OR_COST(BULK), TC_PRIO_INTERACTIVE, - TC_PRIO_FILLER, + ECN_OR_COST(INTERACTIVE), TC_PRIO_INTERACTIVE, - TC_PRIO_FILLER, + ECN_OR_COST(INTERACTIVE), TC_PRIO_INTERACTIVE_BULK, - TC_PRIO_FILLER, + ECN_OR_COST(INTERACTIVE_BULK), TC_PRIO_INTERACTIVE_BULK, - TC_PRIO_FILLER + ECN_OR_COST(INTERACTIVE_BULK) }; @@ -582,9 +588,15 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) { - if (!arp_bind_neighbour(&rt->u.dst)) { + int err = arp_bind_neighbour(&rt->u.dst); + if (err) { write_unlock_bh(&rt_hash_table[hash].lock); + if (err != -ENOBUFS) { + rt_drop(rt); + return err; + } + /* Neighbour tables are full and nothing can be released. Try to shrink route cache, it is most likely it holds some neighbour records. @@ -600,13 +612,8 @@ restart: goto restart; } - if (net_ratelimit()) { - if ((rt->u.dst.dev->flags&IFF_UP) && - __in_dev_get(rt->u.dst.dev)) - printk("Neighbour table overflow.\n"); - else - printk("Device %s is down.\n", rt->u.dst.dev->name); - } + if (net_ratelimit()) + printk("Neighbour table overflow.\n"); rt_drop(rt); return -ENOBUFS; } @@ -712,7 +719,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - tos &= IPTOS_TOS_MASK; + tos &= IPTOS_RT_MASK; if (!in_dev) return; @@ -791,7 +798,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, if (rt->peer) atomic_inc(&rt->peer->refcnt); - if (!arp_bind_neighbour(&rt->u.dst) || + if (arp_bind_neighbour(&rt->u.dst) || !(rt->u.dst.neighbour->nud_state&NUD_VALID)) { if (rt->u.dst.neighbour) neigh_event_send(rt->u.dst.neighbour, NULL); @@ -967,7 +974,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; - u8 tos = iph->tos & IPTOS_TOS_MASK; + u8 tos = iph->tos & IPTOS_RT_MASK; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) @@ -1546,7 +1553,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, unsigned hash; int iif = dev->ifindex; - tos &= IPTOS_TOS_MASK; + tos &= IPTOS_RT_MASK; hash = rt_hash_code(daddr, saddr^(iif<<5), tos); read_lock(&rt_hash_table[hash].lock); @@ -1616,10 +1623,10 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int int free_res = 0; int err; - tos &= IPTOS_TOS_MASK|RTO_ONLINK; + tos &= IPTOS_RT_MASK|RTO_ONLINK; key.dst = daddr; key.src = saddr; - key.tos = tos&IPTOS_TOS_MASK; + key.tos = tos&IPTOS_RT_MASK; key.iif = loopback_dev.ifindex; key.oif = oif; key.scope = (tos&RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; @@ -1889,7 +1896,7 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif) rth->key.src == saddr && rth->key.iif == 0 && rth->key.oif == oif && - !((rth->key.tos^tos)&(IPTOS_TOS_MASK|RTO_ONLINK)) && + !((rth->key.tos^tos)&(IPTOS_RT_MASK|RTO_ONLINK)) && ((tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY)) ) { rth->u.dst.lastuse = jiffies; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4343b707..dbf680233 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.169 2000/04/20 14:41:16 davem Exp $ + * Version: $Id: tcp.c,v 1.170 2000/07/08 00:20:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -1018,9 +1018,13 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) tcp_send_skb(sk, skb, queue_it, mss_now); } } - sk->err = 0; err = copied; - goto out; +out: + __tcp_push_pending_frames(sk, tp, mss_now); + TCP_CHECK_TIMER(sk); +out_unlock: + release_sock(sk); + return err; do_sock_err: if(copied) @@ -1048,12 +1052,7 @@ do_fault: kfree_skb(skb); do_fault2: err = -EFAULT; -out: - __tcp_push_pending_frames(sk, tp, mss_now); - TCP_CHECK_TIMER(sk); -out_unlock: - release_sock(sk); - return err; + goto out; } #undef PSH_NEEDED @@ -1270,10 +1269,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, TCP_CHECK_TIMER(sk); - - if (sk->err) - goto out_err; - err = -ENOTCONN; if (sk->state == TCP_LISTEN) goto out; @@ -1292,13 +1287,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - /* - * BUG BUG BUG - * This violates 1003.1g compliance. We must wait for - * data to exist even if we read none! - */ - - while (len > 0) { + do { struct sk_buff * skb; u32 offset; @@ -1519,29 +1508,6 @@ do_prequeue: continue; skb->used = 1; tcp_eat_skb(sk, skb); - -#ifdef TCP_LESS_COARSE_ACKS - /* Possible improvement. When sender is faster than receiver, - * traffic looks like: fill window ... wait for window open ... - * fill window. We lose at least one rtt, because call - * cleanup_rbuf only once. Probably, if "len" was large - * we should insert several intermediate cleanup_rbuf(s). - * - * F.e.: - */ - do { - u32 full_space = min(tp->window_clamp, tcp_full_space(sk)); - - /* Try to ACK, if total buffer length is larger - than maximal window and if rcv_window has - chances to increase twice. It will result - to exponentially decreased ACKing during - read to huge (usually, mmapped) buffer. - */ - if (len >= full_space && tp->rcv_wnd <= full_space/2) - cleanup_rbuf(sk, copied); - } while (0); -#endif continue; found_fin_ok: @@ -1552,7 +1518,7 @@ do_prequeue: /* All is done. */ skb->used = 1; break; - } + } while (len > 0); if (user_recv) { if (skb_queue_len(&tp->ucopy.prequeue)) { @@ -1584,9 +1550,6 @@ do_prequeue: release_sock(sk); return copied; -out_err: - err = sock_error(sk); - out: TCP_CHECK_TIMER(sk); release_sock(sk); @@ -2012,7 +1975,6 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) struct open_request *req; struct sock *newsk; int error; - long timeo; lock_sock(sk); @@ -2023,10 +1985,10 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) if (sk->state != TCP_LISTEN) goto out; - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - /* Find already established connection */ if (!tp->accept_queue) { + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + /* If this is a non blocking socket don't sleep */ error = -EAGAIN; if (!timeo) @@ -2099,6 +2061,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, break; } tp->nonagle = (val == 0) ? 0 : 1; + if (val) + tcp_push_pending_frames(sk, tp); break; case TCP_CORK: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a14c984d7..dec2a6126 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.83 2000/06/09 07:35:49 davem Exp $ + * Version: $Id: udp.c,v 1.84 2000/07/08 00:20:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -710,7 +710,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (err) goto out_free; - sk->stamp=skb->stamp; + + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ if (sin) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 844ea8228..986cd023f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.19 2000/02/27 19:51:47 davem Exp $ + * $Id: datagram.c,v 1.20 2000/07/08 00:20:43 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -127,6 +127,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (err) goto out_free_skb; + sock_recv_timestamp(msg, sk, skb); + serr = SKB_EXT_ERR(skb); sin = (struct sockaddr_in6 *)msg->msg_name; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 22c1fa367..944d665d5 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -7,7 +7,7 @@ * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * - * Version: $Id: proc.c,v 1.14 2000/04/16 01:11:37 davem Exp $ + * Version: $Id: proc.c,v 1.15 2000/07/07 22:29:42 davem Exp $ * * Authors: David S. Miller (davem@caip.rutgers.edu) * @@ -46,6 +46,8 @@ int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int d fold_prot_inuse(&udpv6_prot)); len += sprintf(buffer+len, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); + len += sprintf(buffer+len, "FRAG6: inuse %d memory %d\n", + ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); *start = buffer + offset; len -= offset; if(len > length) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3f2ec7068..e83870421 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.36 2000/05/03 06:37:07 davem Exp $ + * $Id: raw.c,v 1.39 2000/07/08 00:20:43 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -331,7 +331,6 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, } err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - sk->stamp=skb->stamp; if (err) goto out_free; @@ -348,6 +347,8 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, } } + sock_recv_timestamp(msg, sk, skb); + if (sk->net_pinfo.af_inet6.rxopt.all) datagram_recv_ctl(sk, msg, skb); err = copied; @@ -535,6 +536,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) fl.proto = proto; fl.fl6_dst = daddr; + if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) + fl.fl6_src = &np->saddr; fl.uli_u.icmpt.type = 0; fl.uli_u.icmpt.code = 0; @@ -694,6 +697,31 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return 0; } +static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch(cmd) { + case SIOCOUTQ: + { + int amount = atomic_read(&sk->wmem_alloc); + return put_user(amount, (int *)arg); + } + case SIOCINQ: + { + struct sk_buff *skb; + int amount = 0; + + spin_lock_irq(&sk->receive_queue.lock); + skb = skb_peek(&sk->receive_queue); + if (skb != NULL) + amount = skb->tail - skb->h.raw; + spin_unlock_irq(&sk->receive_queue.lock); + return put_user(amount, (int *)arg); + } + + default: + return -ENOIOCTLCMD; + } +} static void rawv6_close(struct sock *sk, long timeout) { @@ -790,6 +818,7 @@ struct proto rawv6_prot = { close: rawv6_close, connect: udpv6_connect, disconnect: udp_disconnect, + ioctl: rawv6_ioctl, init: rawv6_init_sk, destroy: inet6_destroy_sock, setsockopt: rawv6_setsockopt, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 99f4a702f..abdcdc713 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.17 2000/05/03 06:37:07 davem Exp $ + * $Id: reassembly.c,v 1.18 2000/07/07 22:29:42 davem Exp $ * * Based on: net/ipv4/ip_fragment.c * @@ -21,6 +21,7 @@ * More RFC compliance. * * Horst von Brand Add missing #include <linux/string.h> + * Alexey Kuznetsov SMP races, threading, cleanup. */ #include <linux/errno.h> #include <linux/types.h> @@ -46,198 +47,202 @@ int sysctl_ip6frag_high_thresh = 256*1024; int sysctl_ip6frag_low_thresh = 192*1024; -int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; - -atomic_t ip6_frag_mem = ATOMIC_INIT(0); -static spinlock_t ip6_frag_lock = SPIN_LOCK_UNLOCKED; +int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; -struct ipv6_frag { - __u16 offset; - __u16 len; - struct sk_buff *skb; +struct ip6frag_skb_cb +{ + struct inet6_skb_parm h; + int offset; +}; - struct frag_hdr *fhdr; +#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) - struct ipv6_frag *next; -}; /* * Equivalent of ipv4 struct ipq */ -struct frag_queue { - +struct frag_queue +{ struct frag_queue *next; - struct frag_queue *prev; __u32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; + + spinlock_t lock; + atomic_t refcnt; struct timer_list timer; /* expire timer */ - struct ipv6_frag *fragments; - struct net_device *dev; + struct sk_buff *fragments; + int len; + int meat; + struct net_device *dev; int iif; __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 #define FIRST_IN 2 #define LAST_IN 1 __u8 nexthdr; __u16 nhoffset; + struct frag_queue **pprev; }; -static struct frag_queue ipv6_frag_queue = { - &ipv6_frag_queue, &ipv6_frag_queue, -}; +/* Hash table. */ -/* Memory Tracking Functions. */ -extern __inline__ void frag_kfree_skb(struct sk_buff *skb) +#define IP6Q_HASHSZ 64 + +static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ]; +static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED; +int ip6_frag_nqueues = 0; + +static __inline__ void __fq_unlink(struct frag_queue *fq) { - atomic_sub(skb->truesize, &ip6_frag_mem); - kfree_skb(skb); + if(fq->next) + fq->next->pprev = fq->pprev; + *fq->pprev = fq->next; + ip6_frag_nqueues--; } -extern __inline__ void frag_kfree_s(void *ptr, int len) +static __inline__ void fq_unlink(struct frag_queue *fq) { - atomic_sub(len, &ip6_frag_mem); - kfree(ptr); + write_lock(&ip6_frag_lock); + __fq_unlink(fq); + write_unlock(&ip6_frag_lock); } - -extern __inline__ void *frag_kmalloc(int size, int pri) + +static __inline__ unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, + struct in6_addr *daddr) { - void *vp = kmalloc(size, pri); + unsigned int h = saddr->s6_addr32[3] ^ daddr->s6_addr32[3] ^ id; - if(!vp) - return NULL; - atomic_add(size, &ip6_frag_mem); - return vp; + h ^= (h>>16); + h ^= (h>>8); + return h & (IP6Q_HASHSZ - 1); } -static void create_frag_entry(struct sk_buff *skb, - __u8 *nhptr, - struct frag_hdr *fhdr); -static u8 * reasm_frag(struct frag_queue *fq, - struct sk_buff **skb_in); - -static void reasm_queue(struct frag_queue *fq, - struct sk_buff *skb, - struct frag_hdr *fhdr, - u8 *nhptr); - -static void fq_free(struct frag_queue *fq); +atomic_t ip6_frag_mem = ATOMIC_INIT(0); -static void frag_prune(void) +/* Memory Tracking Functions. */ +extern __inline__ void frag_kfree_skb(struct sk_buff *skb) { - struct frag_queue *fq; - - spin_lock(&ip6_frag_lock); - while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) { - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) { - spin_unlock(&ip6_frag_lock); - return; - } - } - if (atomic_read(&ip6_frag_mem)) - printk(KERN_DEBUG "IPv6 frag_prune: memleak\n"); - atomic_set(&ip6_frag_mem, 0); - spin_unlock(&ip6_frag_lock); + atomic_sub(skb->truesize, &ip6_frag_mem); + kfree_skb(skb); } - -u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) +extern __inline__ void frag_free_queue(struct frag_queue *fq) { - struct sk_buff *skb = *skbp; - struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); - struct frag_queue *fq; - struct ipv6hdr *hdr; - - hdr = skb->nh.ipv6h; + atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); + kfree(fq); +} - IP6_INC_STATS_BH(Ip6ReasmReqds); +extern __inline__ struct frag_queue *frag_alloc_queue(void) +{ + struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC); - /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) { - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + if(!fq) return NULL; - } - if ((u8 *)(fhdr+1) > skb->tail) { - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); - return NULL; - } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) - frag_prune(); + atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); + return fq; +} - spin_lock(&ip6_frag_lock); - for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) { - if (fq->id == fhdr->identification && - !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) && - !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) { - u8 *ret = NULL; +/* Destruction primitives. */ - reasm_queue(fq, skb, fhdr, nhptr); +/* Complete destruction of fq. */ +static void ip6_frag_destroy(struct frag_queue *fq) +{ + struct sk_buff *fp; - if (fq->last_in == (FIRST_IN|LAST_IN)) - ret = reasm_frag(fq, skbp); + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); - spin_unlock(&ip6_frag_lock); - return ret; - } - } + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; - create_frag_entry(skb, nhptr, fhdr); - spin_unlock(&ip6_frag_lock); + frag_kfree_skb(fp); + fp = xp; + } - return NULL; + frag_free_queue(fq); } - -static void fq_free(struct frag_queue *fq) +static __inline__ void fq_put(struct frag_queue *fq) { - struct ipv6_frag *fp, *back; + if (atomic_dec_and_test(&fq->refcnt)) + ip6_frag_destroy(fq); +} - del_timer(&fq->timer); +/* Kill fq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. + */ +static __inline__ void fq_kill(struct frag_queue *fq) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); - for (fp = fq->fragments; fp; ) { - frag_kfree_skb(fp->skb); - back = fp; - fp=fp->next; - frag_kfree_s(back, sizeof(*back)); + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; } +} - fq->prev->next = fq->next; - fq->next->prev = fq->prev; +static void ip6_evictor(void) +{ + int i, progress; - fq->prev = fq->next = NULL; - - frag_kfree_s(fq, sizeof(*fq)); + do { + if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) + return; + progress = 0; + for (i = 0; i < IP6Q_HASHSZ; i++) { + struct frag_queue *fq; + if (ip6_frag_hash[i] == NULL) + continue; + + write_lock(&ip6_frag_lock); + if ((fq = ip6_frag_hash[i]) != NULL) { + /* find the oldest queue for this hash bucket */ + while (fq->next) + fq = fq->next; + __fq_unlink(fq); + write_unlock(&ip6_frag_lock); + + spin_lock(&fq->lock); + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + spin_unlock(&fq->lock); + + fq_put(fq); + IP6_INC_STATS_BH(Ip6ReasmFails); + progress = 1; + continue; + } + write_unlock(&ip6_frag_lock); + } + } while (progress); } -static void frag_expire(unsigned long data) +static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq; - struct ipv6_frag *frag; + struct frag_queue *fq = (struct frag_queue *) data; - fq = (struct frag_queue *) data; + spin_lock(&fq->lock); - spin_lock(&ip6_frag_lock); + if (fq->last_in & COMPLETE) + goto out; - frag = fq->fragments; + fq_kill(fq); IP6_INC_STATS_BH(Ip6ReasmTimeout); IP6_INC_STATS_BH(Ip6ReasmFails); - if (frag == NULL) { - spin_unlock(&ip6_frag_lock); - printk(KERN_DEBUG "invalid fragment queue\n"); - return; - } - - /* Send error only if the first segment arrived. - (fixed --ANK (980728)) - */ - if (fq->last_in&FIRST_IN) { + /* Send error only if the first segment arrived. */ + if (fq->last_in&FIRST_IN && fq->fragments) { struct net_device *dev = dev_get_by_index(fq->iif); /* @@ -246,144 +251,234 @@ static void frag_expire(unsigned long data) pointer directly, device might already disappeared. */ if (dev) { - frag->skb->dev = dev; - icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, + fq->fragments->dev = dev; + icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); dev_put(dev); } } - - fq_free(fq); - spin_unlock(&ip6_frag_lock); +out: + spin_unlock(&fq->lock); + fq_put(fq); } +/* Creation primitives. */ -static void create_frag_entry(struct sk_buff *skb, - __u8 *nhptr, - struct frag_hdr *fhdr) + +static struct frag_queue *ip6_frag_intern(unsigned int hash, + struct frag_queue *fq_in) { struct frag_queue *fq; - struct ipv6hdr *hdr; - - fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue), - GFP_ATOMIC); - if (fq == NULL) { - IP6_INC_STATS_BH(Ip6ReasmFails); - kfree_skb(skb); - return; + write_lock(&ip6_frag_lock); +#ifdef CONFIG_SMP + for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == fq_in->id && + !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && + !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { + atomic_inc(&fq->refcnt); + write_unlock(&ip6_frag_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in); + return fq; + } } +#endif + fq = fq_in; + + atomic_inc(&fq->refcnt); + if((fq->next = ip6_frag_hash[hash]) != NULL) + fq->next->pprev = &fq->next; + ip6_frag_hash[hash] = fq; + fq->pprev = &ip6_frag_hash[hash]; + ip6_frag_nqueues++; + write_unlock(&ip6_frag_lock); + return fq; +} - memset(fq, 0, sizeof(struct frag_queue)); - fq->id = fhdr->identification; +static struct frag_queue * +ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct frag_queue *fq; - hdr = skb->nh.ipv6h; - ipv6_addr_copy(&fq->saddr, &hdr->saddr); - ipv6_addr_copy(&fq->daddr, &hdr->daddr); + if ((fq = frag_alloc_queue()) == NULL) + goto oom; + + memset(fq, 0, sizeof(struct frag_queue)); + + fq->id = id; + ipv6_addr_copy(&fq->saddr, src); + ipv6_addr_copy(&fq->daddr, dst); /* init_timer has been done by the memset */ - fq->timer.function = frag_expire; + fq->timer.function = ip6_frag_expire; fq->timer.data = (long) fq; - fq->timer.expires = jiffies + sysctl_ip6frag_time; + fq->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&fq->refcnt, 1); - reasm_queue(fq, skb, fhdr, nhptr); + return ip6_frag_intern(hash, fq); - if (fq->fragments) { - fq->prev = ipv6_frag_queue.prev; - fq->next = &ipv6_frag_queue; - fq->prev->next = fq; - ipv6_frag_queue.prev = fq; - - add_timer(&fq->timer); - } else - frag_kfree_s(fq, sizeof(*fq)); +oom: + IP6_INC_STATS_BH(Ip6ReasmFails); + return NULL; } +static __inline__ struct frag_queue * +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct frag_queue *fq; + unsigned int hash = ip6qhashfn(id, src, dst); + + read_lock(&ip6_frag_lock); + for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == id && + !ipv6_addr_cmp(src, &fq->saddr) && + !ipv6_addr_cmp(dst, &fq->daddr)) { + atomic_inc(&fq->refcnt); + read_unlock(&ip6_frag_lock); + return fq; + } + } + read_unlock(&ip6_frag_lock); + + return ip6_frag_create(hash, id, src, dst); +} -/* - * We queue the packet even if it's the last. - * It's a trade off. This allows the reassembly - * code to be simpler (=faster) and of the - * steps we do for queueing the only unnecessary - * one it's the kmalloc for a struct ipv6_frag. - * Feel free to try other alternatives... - */ -static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, u8 *nhptr) +static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, u8 *nhptr) { - struct ipv6_frag *nfp, *fp, **bptr; + struct sk_buff *prev, *next; + int offset, end; - nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag), - GFP_ATOMIC); + if (fq->last_in & COMPLETE) + goto err; - if (nfp == NULL) { - kfree_skb(skb); - return; - } + if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) + atomic_inc(&fq->refcnt); - nfp->offset = ntohs(fhdr->frag_off) & ~0x7; - nfp->len = (ntohs(skb->nh.ipv6h->payload_len) - - ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + offset = ntohs(fhdr->frag_off) & ~0x7; + end = offset + (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); - if ((u32)nfp->offset + (u32)nfp->len >= 65536) { + if ((unsigned int)end >= 65536) { icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); goto err; } - if (fhdr->frag_off & __constant_htons(0x0001)) { + + /* Is this the final fragment? */ + if (!(fhdr->frag_off & __constant_htons(0x0001))) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) + goto err; + fq->last_in |= LAST_IN; + fq->len = end; + } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. - * ... and would break our defragmentation algorithm 8) */ - if (nfp->len & 0x7) { + if (end & 0x7) { printk(KERN_DEBUG "fragment not rounded to 8bytes\n"); /* It is not in specs, but I see no reasons to send an error in this case. --ANK */ - if (nfp->offset == 0) + if (offset == 0) icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, &skb->nh.ipv6h->payload_len); goto err; } + if (end > fq->len) { + /* Some bits beyond end -> corruption. */ + if (fq->last_in & LAST_IN) + goto err; + fq->len = end; + } } - nfp->skb = skb; - nfp->fhdr = fhdr; - nfp->next = NULL; + if (end == offset) + goto err; - bptr = &fq->fragments; + /* Point into the IP datagram 'data' part. */ + skb_pull(skb, (u8 *) (fhdr + 1) - skb->data); + skb_trim(skb, end - offset); - for (fp = fq->fragments; fp; fp=fp->next) { - if (nfp->offset <= fp->offset) - break; - bptr = &fp->next; + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for(next = fq->fragments; next != NULL; next = next->next) { + if (FRAG6_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; } - if (fp && fp->offset == nfp->offset) { - if (nfp->len != fp->len) { - printk(KERN_DEBUG "reasm_queue: dup with wrong len\n"); + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (FRAG6_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) + goto err; + skb_pull(skb, i); } + } - /* duplicate. discard it. */ - goto err; + /* Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + while (next && FRAG6_CB(next)->offset < end) { + int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + FRAG6_CB(next)->offset += i; /* next fragment */ + skb_pull(next, i); + fq->meat -= i; + break; + } else { + struct sk_buff *free_it = next; + + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + fq->fragments = next; + + fq->meat -= free_it->len; + frag_kfree_skb(free_it); + } } - atomic_add(skb->truesize, &ip6_frag_mem); + FRAG6_CB(skb)->offset = offset; - /* All the checks are done, fragment is acepted. - Only now we are allowed to update reassembly data! - (fixed --ANK (980728)) - */ + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + fq->fragments = skb; - /* iif always set to one of the last arrived segment */ fq->dev = skb->dev; fq->iif = skb->dev->ifindex; - - /* Last fragment */ - if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) - fq->last_in |= LAST_IN; + fq->meat += skb->len; + atomic_add(skb->truesize, &ip6_frag_mem); /* First fragment. nexthdr and nhptr are get from the first fragment. @@ -391,85 +486,67 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, first one. (fixed --ANK (980728)) */ - if (nfp->offset == 0) { + if (offset == 0) { fq->nexthdr = fhdr->nexthdr; - fq->last_in |= FIRST_IN; fq->nhoffset = nhptr - skb->nh.raw; + fq->last_in |= FIRST_IN; } - - *bptr = nfp; - nfp->next = fp; return; err: - frag_kfree_s(nfp, sizeof(*nfp)); kfree_skb(skb); } /* - * check if this fragment completes the packet - * returns true on success + * Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. */ -static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in) +static u8* ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in) { - struct ipv6_frag *fp; - struct ipv6_frag *head = fq->fragments; - struct ipv6_frag *tail = NULL; + struct sk_buff *fp, *head = fq->fragments; struct sk_buff *skb; - __u32 offset = 0; - __u32 payload_len; - __u16 unfrag_len; - __u16 copy; + int payload_len; + int unfrag_len; + int copy; u8 *nhptr; - for(fp = head; fp; fp=fp->next) { - if (offset != fp->offset) - return NULL; - - offset += fp->len; - tail = fp; - } - /* * we know the m_flag arrived and we have a queue, * starting from 0, without gaps. * this means we have all fragments. */ - /* Unfragmented part is taken from the first segment. - (fixed --ANK (980728)) - */ - unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1); + fq_kill(fq); - payload_len = (unfrag_len + tail->offset + - (tail->skb->tail - (__u8 *) (tail->fhdr + 1))); + BUG_TRAP(head != NULL); + BUG_TRAP(FRAG6_CB(head)->offset == 0); - if (payload_len > 65535) { - if (net_ratelimit()) - printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len); - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - return NULL; - } + /* Unfragmented part is taken from the first segment. */ + unfrag_len = head->h.raw - (u8 *) (head->nh.ipv6h + 1); + payload_len = unfrag_len + fq->len; - if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n"); - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - return NULL; - } + if (payload_len > 65535) + goto out_oversize; + + if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) + goto out_oom; copy = unfrag_len + sizeof(struct ipv6hdr); + skb->mac.raw = skb->data; skb->nh.ipv6h = (struct ipv6hdr *) skb->data; skb->dev = fq->dev; skb->protocol = __constant_htons(ETH_P_IPV6); - skb->pkt_type = head->skb->pkt_type; - memcpy(skb->cb, head->skb->cb, sizeof(skb->cb)); - skb->dst = dst_clone(head->skb->dst); + skb->pkt_type = head->pkt_type; + FRAG6_CB(skb)->h = FRAG6_CB(head)->h; + skb->dst = dst_clone(head->dst); - memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy); + memcpy(skb_put(skb, copy), head->nh.ipv6h, copy); nhptr = skb->nh.raw + fq->nhoffset; *nhptr = fq->nexthdr; @@ -479,29 +556,73 @@ static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in) *skb_in = skb; - /* - * FIXME: If we don't have a checksum we ought to be able - * to defragment and checksum in this pass. [AC] - * Note that we don't really know yet whether the protocol - * needs checksums at all. It might still be a good idea. -AK - */ - for(fp = fq->fragments; fp; ) { - struct ipv6_frag *back; - - memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len); - frag_kfree_skb(fp->skb); - back = fp; - fp=fp->next; - frag_kfree_s(back, sizeof(*back)); + for (fp = fq->fragments; fp; fp=fp->next) + memcpy(skb_put(skb, fp->len), fp->data, fp->len); + + IP6_INC_STATS_BH(Ip6ReasmOKs); + return nhptr; + +out_oversize: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); + goto out_fail; +out_oom: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); +out_fail: + IP6_INC_STATS_BH(Ip6ReasmFails); + return NULL; +} + +u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) +{ + struct sk_buff *skb = *skbp; + struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); + struct frag_queue *fq; + struct ipv6hdr *hdr; + + hdr = skb->nh.ipv6h; + + IP6_INC_STATS_BH(Ip6ReasmReqds); + + /* Jumbo payload inhibits frag. header */ + if (hdr->payload_len==0) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; + } + if ((u8 *)(fhdr+1) > skb->tail) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; } - del_timer(&fq->timer); - fq->prev->next = fq->next; - fq->next->prev = fq->prev; - fq->prev = fq->next = NULL; + if (!(fhdr->frag_off & __constant_htons(0xFFF9))) { + /* It is not a fragmented frame */ + skb->h.raw += sizeof(struct frag_hdr); + IP6_INC_STATS_BH(Ip6ReasmOKs); - frag_kfree_s(fq, sizeof(*fq)); + return &fhdr->nexthdr; + } - IP6_INC_STATS_BH(Ip6ReasmOKs); - return nhptr; + if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + ip6_evictor(); + + if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) { + u8 *ret = NULL; + + spin_lock(&fq->lock); + + ip6_frag_queue(fq, skb, fhdr, nhptr); + + if (fq->last_in == (FIRST_IN|LAST_IN) && + fq->meat == fq->len) + ret = ip6_frag_reasm(fq, skbp); + + spin_unlock(&fq->lock); + fq_put(fq); + return ret; + } + + IP6_INC_STATS_BH(Ip6ReasmFails); + kfree_skb(skb); + return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc6020c33..dc5ddffd8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.45 2000/01/16 05:11:38 davem Exp $ + * $Id: route.c,v 1.46 2000/07/07 22:40:35 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -769,10 +769,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) goto out; if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { - rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway); - err = -ENOMEM; - if (rt->rt6i_nexthop == NULL) + rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(rt->rt6i_nexthop)) { + err = PTR_ERR(rt->rt6i_nexthop); + rt->rt6i_nexthop = NULL; goto out; + } } if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 114b59daa..c8a631f9f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.39 2000/07/07 01:55:20 davem Exp $ + * $Id: sit.c,v 1.41 2000/07/07 23:47:45 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -45,6 +45,7 @@ #include <net/udp.h> #include <net/icmp.h> #include <net/ipip.h> +#include <net/inet_ecn.h> /* This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c @@ -59,7 +60,7 @@ static int ipip6_fb_tunnel_init(struct net_device *dev); static int ipip6_tunnel_init(struct net_device *dev); static struct net_device ipip6_fb_tunnel_dev = { - "", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, + "sit0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, }; static struct ip_tunnel ipip6_fb_tunnel = { @@ -174,10 +175,10 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) dev->priv = (void*)(dev+1); nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; - strcpy(dev->name, nt->parms.name); dev->init = ipip6_tunnel_init; dev->new_style = 1; memcpy(&nt->parms, parms, sizeof(*parms)); + strcpy(dev->name, nt->parms.name); if (dev->name[0] == 0) { int i; for (i=1; i<100; i++) { @@ -370,6 +371,13 @@ out: #endif } +static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos) && + INET_ECN_is_not_ce(ip6_get_dsfield(skb->nh.ipv6h))) + IP6_ECN_set_ce(skb->nh.ipv6h); +} + int ipip6_rcv(struct sk_buff *skb, unsigned short len) { struct iphdr *iph; @@ -394,6 +402,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif + ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipip6_lock); return 0; @@ -431,6 +440,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; struct in6_addr *addr6; int addr_type; + int err; if (tunnel->recursion++) { tunnel->stat.collisions++; @@ -548,7 +558,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->frag_off = 0; iph->protocol = IPPROTO_IPV6; - iph->tos = tos; + iph->tos = INET_ECN_encapsulate(tos, ip6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; @@ -564,10 +574,17 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->nfct = NULL; #endif + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); + if(err < 0) { + if(net_ratelimit()) + printk(KERN_ERR "ipip6_tunnel_xmit: ip_send() failed, err=%d\n", -err); + skb = NULL; + goto tx_error; + } + stats->tx_bytes += skb->len; stats->tx_packets++; - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - do_ip_send); tunnel->recursion--; return 0; @@ -576,7 +593,8 @@ tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; - dev_kfree_skb(skb); + if(skb) + dev_kfree_skb(skb); tunnel->recursion--; return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4b3bf084b..f9f0c0dc9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.53 2000/05/03 06:37:07 davem Exp $ + * $Id: udp.c,v 1.55 2000/07/08 00:20:43 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -400,7 +400,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (err) goto out_free; - sk->stamp=skb->stamp; + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -868,6 +868,8 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) fl.proto = IPPROTO_UDP; fl.fl6_dst = daddr; + if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) + fl.fl6_src = &np->saddr; fl.uli_u.ports.dport = udh.uh.dest; fl.uli_u.ports.sport = udh.uh.source; diff --git a/net/netsyms.c b/net/netsyms.c index e1bfc3403..91c7a1074 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -69,17 +69,6 @@ extern int netdev_finish_unregister(struct net_device *dev); #include <linux/rtnetlink.h> -#if defined(CONFIG_ULTRA) || defined(CONFIG_WD80x3) || \ - defined(CONFIG_EL2) || defined(CONFIG_NE2000) || \ - defined(CONFIG_E2100) || defined(CONFIG_HPLAN_PLUS) || \ - defined(CONFIG_HPLAN) || defined(CONFIG_AC3200) || \ - defined(CONFIG_ES3210) || defined(CONFIG_ULTRA32) || \ - defined(CONFIG_LNE390) || defined(CONFIG_NE3210) || \ - defined(CONFIG_NE2K_PCI) || defined(CONFIG_APNE) || \ - defined(CONFIG_DAYNAPORT) -#include "../drivers/net/8390.h" -#endif - #ifdef CONFIG_IPX_MODULE extern struct datalink_proto *make_EII_client(void); extern struct datalink_proto *make_8023_client(void); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2955a04f6..3b2df4f55 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,7 +5,7 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.34 2000/04/25 04:13:35 davem Exp $ + * Version: $Id: af_packet.c,v 1.36 2000/07/08 00:20:43 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -1053,7 +1053,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len, err = memcpy_toiovec(msg->msg_iov, skb->data, copied); if (err) goto out_free; - sk->stamp=skb->stamp; + + sock_recv_timestamp(msg, sk, skb); if (msg->msg_name) memcpy(msg->msg_name, skb->cb, msg->msg_namelen); @@ -1392,6 +1393,23 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg switch(cmd) { + case SIOCOUTQ: + { + int amount = atomic_read(&sk->wmem_alloc); + return put_user(amount, (int *)arg); + } + case SIOCINQ: + { + struct sk_buff *skb; + int amount = 0; + + spin_lock_bh(&sk->receive_queue.lock); + skb = skb_peek(&sk->receive_queue); + if (skb) + amount = skb->len; + spin_unlock_bh(&sk->receive_queue.lock); + return put_user(amount, (int *)arg); + } case FIOSETOWN: case SIOCSPGRP: err = get_user(pid, (int *) arg); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index d8c117247..1a4a501c9 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -39,6 +39,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/pkt_sched.h> +#include <net/inet_ecn.h> #define RED_ECN_ECT 0x02 #define RED_ECN_CE 0x01 @@ -170,14 +171,9 @@ static int red_ecn_mark(struct sk_buff *skb) if (!(tos & RED_ECN_ECT)) return 0; - if (!(tos & RED_ECN_CE)) { - u32 check = skb->nh.iph->check; + if (!(tos & RED_ECN_CE)) + IP_ECN_set_ce(skb->nh.iph); - check += __constant_htons(0xFFFE); - skb->nh.iph->check = check + (check>>16); - - skb->nh.iph->tos = tos | RED_ECN_CE; - } return 1; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 7ea61ce5c..1b7119ffd 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -242,9 +242,9 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { atomic_inc(&n->refcnt); } else { - n = __neigh_lookup(mn->tbl, mn->primary_key, dev, 1); - if (n == NULL) - return -ENOBUFS; + n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev); + if (IS_ERR(n)) + return PTR_ERR(n); } if (neigh_event_send(n, skb_res) == 0) { int err; |