diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-04 07:40:19 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-04 07:40:19 +0000 |
commit | 33263fc5f9ac8e8cb2b22d06af3ce5ac1dd815e4 (patch) | |
tree | 2d1b86a40bef0958a68cf1a2eafbeb0667a70543 /include/asm-i386 | |
parent | 216f5f51aa02f8b113aa620ebc14a9631a217a00 (diff) |
Merge with Linux 2.3.32.
Diffstat (limited to 'include/asm-i386')
28 files changed, 1016 insertions, 477 deletions
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index 6346f91d1..0112f9b35 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -46,6 +46,17 @@ static __inline__ void atomic_sub(int i, volatile atomic_t *v) :"ir" (i), "m" (__atomic_fool_gcc(v))); } +static __inline__ int atomic_sub_and_test(int i, volatile atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "subl %2,%0; sete %1" + :"=m" (__atomic_fool_gcc(v)), "=qm" (c) + :"ir" (i), "m" (__atomic_fool_gcc(v))); + return c; +} + static __inline__ void atomic_inc(volatile atomic_t *v) { __asm__ __volatile__( diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h index 08df0f278..86068d069 100644 --- a/include/asm-i386/bitops.h +++ b/include/asm-i386/bitops.h @@ -132,8 +132,7 @@ extern __inline__ int find_first_zero_bit(void * addr, unsigned size) if (!size) return 0; - __asm__("cld\n\t" - "movl $-1,%%eax\n\t" + __asm__("movl $-1,%%eax\n\t" "xorl %%edx,%%edx\n\t" "repe; scasl\n\t" "je 1f\n\t" diff --git a/include/asm-i386/cache.h b/include/asm-i386/cache.h index 379568491..7260b50e4 100644 --- a/include/asm-i386/cache.h +++ b/include/asm-i386/cache.h @@ -11,16 +11,4 @@ #define L1_CACHE_BYTES 16 #endif -#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) - -#define SMP_CACHE_BYTES L1_CACHE_BYTES - -#ifdef MODULE -#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES))) -#else -#define __cacheline_aligned \ - __attribute__((__aligned__(L1_CACHE_BYTES), \ - __section__(".data.cacheline_aligned"))) -#endif - #endif diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index 50b708d91..47cd4856c 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -18,7 +18,8 @@ #define E820_RAM 1 #define E820_RESERVED 2 -#define E820_ACPI 3 +#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ +#define E820_NVS 4 #define HIGH_MEMORY (1024*1024) diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h index f96faa806..4679cb3ef 100644 --- a/include/asm-i386/hardirq.h +++ b/include/asm-i386/hardirq.h @@ -17,8 +17,8 @@ extern unsigned int local_irq_count[NR_CPUS]; #define hardirq_trylock(cpu) (local_irq_count[cpu] == 0) #define hardirq_endlock(cpu) do { } while (0) -#define hardirq_enter(cpu) (local_irq_count[cpu]++) -#define hardirq_exit(cpu) (local_irq_count[cpu]--) +#define irq_enter(cpu, irq) (local_irq_count[cpu]++) +#define irq_exit(cpu, irq) (local_irq_count[cpu]--) #define synchronize_irq() barrier() @@ -39,13 +39,17 @@ static inline void release_irqlock(int cpu) } } -static inline void hardirq_enter(int cpu) +static inline void irq_enter(int cpu, int irq) { ++local_irq_count[cpu]; atomic_inc(&global_irq_count); + + while (test_bit(0,&global_irq_lock)) { + /* nothing */; + } } -static inline void hardirq_exit(int cpu) +static inline void irq_exit(int cpu, int irq) { atomic_dec(&global_irq_count); --local_irq_count[cpu]; diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index d17d108c7..0964c435d 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -20,6 +20,7 @@ #ifdef __KERNEL__ +#include <linux/config.h> #include <linux/init.h> #include <linux/interrupt.h> #include <asm/kmap_types.h> @@ -37,7 +38,20 @@ extern pte_t *pkmap_page_table; extern void kmap_init(void) __init; -#define PKMAP_BASE (0xff000000UL) +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define PKMAP_BASE (0xfe000000UL) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) extern unsigned long FASTCALL(kmap_high(struct page *page)); extern void FASTCALL(kunmap_high(struct page *page)); diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h index 894055a7d..0a2e01c36 100644 --- a/include/asm-i386/hw_irq.h +++ b/include/asm-i386/hw_irq.h @@ -79,10 +79,10 @@ extern void init_8259A(int aeoi); extern void FASTCALL(send_IPI_self(int vector)); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void send_IPI(int dest, int vector); -extern void init_pic_mode(void); -extern void print_IO_APIC(void); extern unsigned long io_apic_irqs; extern volatile unsigned long irq_err_count; diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 75cfb939b..7a4e9facc 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -71,12 +71,12 @@ __IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i #define __INS(s) \ extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("cld ; rep ; ins" #s \ +{ __asm__ __volatile__ ("rep ; ins" #s \ : "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } #define __OUTS(s) \ extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("cld ; rep ; outs" #s \ +{ __asm__ __volatile__ ("rep ; outs" #s \ : "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } #define RETURN_TYPE unsigned char @@ -101,9 +101,10 @@ __OUTS(b) __OUTS(w) __OUTS(l) +#define IO_SPACE_LIMIT 0xffff + #ifdef __KERNEL__ -#include <asm/page.h> #include <linux/vmalloc.h> /* @@ -228,6 +229,22 @@ out: return retval; } +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index cd142e995..fb3af63ae 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -3,6 +3,7 @@ #include <asm/desc.h> #include <asm/atomic.h> +#include <asm/pgalloc.h> /* * possibly do the LDT unload here? @@ -10,9 +11,12 @@ #define destroy_context(mm) do { } while(0) #define init_new_context(tsk,mm) do { } while (0) +#ifdef __SMP__ +extern unsigned int cpu_tlbbad[NR_CPUS]; +#endif + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) { - if (prev != next) { /* * Re-load LDT if necessary @@ -24,6 +28,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd))); clear_bit(cpu, &prev->cpu_vm_mask); } +#ifdef __SMP__ + else { + if(cpu_tlbbad[cpu]) + local_flush_tlb(); + } + cpu_tlbbad[cpu] = 0; +#endif set_bit(cpu, &next->cpu_vm_mask); } diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 301db5816..c5e36bdf5 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -50,6 +50,9 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-i386/pgalloc-2level.h b/include/asm-i386/pgalloc-2level.h new file mode 100644 index 000000000..4ff5ce3b7 --- /dev/null +++ b/include/asm-i386/pgalloc-2level.h @@ -0,0 +1,23 @@ +#ifndef _I386_PGALLOC_2LEVEL_H +#define _I386_PGALLOC_2LEVEL_H + +/* + * traditional i386 two-level paging, page table allocation routines: + */ + +extern __inline__ pmd_t *get_pmd_fast(void) +{ + return (pmd_t *)0; +} + +extern __inline__ void free_pmd_fast(pmd_t *pmd) { } +extern __inline__ void free_pmd_slow(pmd_t *pmd) { } + +extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) +{ + if (!pgd) + BUG(); + return (pmd_t *) pgd; +} + +#endif /* _I386_PGALLOC_2LEVEL_H */ diff --git a/include/asm-i386/pgalloc-3level.h b/include/asm-i386/pgalloc-3level.h new file mode 100644 index 000000000..30099a755 --- /dev/null +++ b/include/asm-i386/pgalloc-3level.h @@ -0,0 +1,68 @@ +#ifndef _I386_PGALLOC_3LEVEL_H +#define _I386_PGALLOC_3LEVEL_H + +/* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. Page-table allocation routines. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +extern __inline__ pmd_t *get_pmd_slow(void) +{ + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); + + if (ret) + memset(ret, 0, PAGE_SIZE); + return ret; +} + +extern __inline__ pmd_t *get_pmd_fast(void) +{ + unsigned long *ret; + + if ((ret = pmd_quicklist) != NULL) { + pmd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pmd_slow(); + return (pmd_t *)ret; +} + +extern __inline__ void free_pmd_fast(pmd_t *pmd) +{ + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; + pgtable_cache_size++; +} + +extern __inline__ void free_pmd_slow(pmd_t *pmd) +{ + free_page((unsigned long)pmd); +} + +extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) +{ + if (!pgd) + BUG(); + address = (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); + if (pgd_none(*pgd)) { + pmd_t *page = get_pmd_fast(); + + if (!page) + page = get_pmd_slow(); + if (page) { + if (pgd_none(*pgd)) { + set_pgd(pgd, __pgd(1 + __pa(page))); + __flush_tlb(); + return page + address; + } else + free_pmd_fast(page); + } else + return NULL; + } + return (pmd_t *)pgd_page(*pgd) + address; +} + +#endif /* _I386_PGALLOC_3LEVEL_H */ diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h new file mode 100644 index 000000000..3cdfac12f --- /dev/null +++ b/include/asm-i386/pgalloc.h @@ -0,0 +1,264 @@ +#ifndef _I386_PGALLOC_H +#define _I386_PGALLOC_H + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/fixmap.h> +#include <linux/threads.h> + +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +#if CONFIG_X86_PAE +# include <asm/pgalloc-3level.h> +#else +# include <asm/pgalloc-2level.h> +#endif + +/* + * Allocate and free page tables. The xxx_kernel() versions are + * used to allocate a kernel page table - this turns on ASN bits + * if any. + */ + +extern __inline__ pgd_t *get_pgd_slow(void) +{ + pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (ret) { +#if CONFIG_X86_PAE + int i; + for (i = 0; i < USER_PTRS_PER_PGD; i++) + __pgd_clear(ret + i); +#else + memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); +#endif + memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return ret; +} + +extern __inline__ pgd_t *get_pgd_fast(void) +{ + unsigned long *ret; + + if ((ret = pgd_quicklist) != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; +} + +extern __inline__ void free_pgd_fast(pgd_t *pgd) +{ + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; +} + +extern __inline__ void free_pgd_slow(pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} + +extern pte_t *get_pte_slow(pmd_t *pmd, unsigned long address_preadjusted); +extern pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long address_preadjusted); + +extern __inline__ pte_t *get_pte_fast(void) +{ + unsigned long *ret; + + if((ret = (unsigned long *)pte_quicklist) != NULL) { + pte_quicklist = (unsigned long *)(*ret); + ret[0] = ret[1]; + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +extern __inline__ void free_pte_fast(pte_t *pte) +{ + *(unsigned long *)pte = (unsigned long) pte_quicklist; + pte_quicklist = (unsigned long *) pte; + pgtable_cache_size++; +} + +extern __inline__ void free_pte_slow(pte_t *pte) +{ + free_page((unsigned long)pte); +} + +#define pte_free_kernel(pte) free_pte_slow(pte) +#define pte_free(pte) free_pte_slow(pte) +#define pgd_free(pgd) free_pgd_slow(pgd) +#define pgd_alloc() get_pgd_fast() + +extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) +{ + if (!pmd) + BUG(); + address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + if (pmd_none(*pmd)) { + pte_t * page = (pte_t *) get_pte_fast(); + + if (!page) + return get_pte_kernel_slow(pmd, address); + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(page))); + return page + address; + } + if (pmd_bad(*pmd)) { + __handle_bad_pmd_kernel(pmd); + return NULL; + } + return (pte_t *) pmd_page(*pmd) + address; +} + +extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address) +{ + address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + + if (pmd_none(*pmd)) + goto getnew; + if (pmd_bad(*pmd)) + goto fix; + return (pte_t *)pmd_page(*pmd) + address; +getnew: +{ + unsigned long page = (unsigned long) get_pte_fast(); + + if (!page) + return get_pte_slow(pmd, address); + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(page))); + return (pte_t *)page + address; +} +fix: + __handle_bad_pmd(pmd); + return NULL; +} + +/* + * allocating and freeing a pmd is trivial: the 1-entry pmd is + * inside the pgd, so has no extra memory associated with it. + * (In the PAE case we free the page.) + */ +#define pmd_free(pmd) free_pmd_slow(pmd) + +#define pmd_free_kernel pmd_free +#define pmd_alloc_kernel pmd_alloc + +extern int do_check_pgt_cache(int, int); + +extern inline void set_pgdir(unsigned long address, pgd_t entry) +{ + struct task_struct * p; + pgd_t *pgd; +#ifdef __SMP__ + int i; +#endif + + read_lock(&tasklist_lock); + for_each_task(p) { + if (!p->mm) + continue; + *pgd_offset(p->mm,address) = entry; + } + read_unlock(&tasklist_lock); +#ifndef __SMP__ + for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) + pgd[address >> PGDIR_SHIFT] = entry; +#else + /* To pgd_alloc/pgd_free, one holds master kernel lock and so does our callee, so we can + modify pgd caches of other CPUs as well. -jj */ + for (i = 0; i < NR_CPUS; i++) + for (pgd = (pgd_t *)cpu_data[i].pgd_quick; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) + pgd[address >> PGDIR_SHIFT] = entry; +#endif +} + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ + +#ifndef __SMP__ + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb_one(addr); +} + +static inline void flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +#else + +/* + * We aren't very clever about this yet - SMP could certainly + * avoid some global flushes.. + */ + +#include <asm/smp.h> + +#define local_flush_tlb() \ + __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) +{ + flush_tlb_mm(mm); +} + +extern volatile unsigned long smp_invalidate_needed; +extern unsigned int cpu_tlbbad[NR_CPUS]; + +static inline void do_flush_tlb_local(void) +{ + unsigned long cpu = smp_processor_id(); + struct mm_struct *mm = current->mm; + + clear_bit(cpu, &smp_invalidate_needed); + if (mm) { + set_bit(cpu, &mm->cpu_vm_mask); + local_flush_tlb(); + } else { + cpu_tlbbad[cpu] = 1; + } +} + +#endif + +#endif /* _I386_PGALLOC_H */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 3ca3551d3..dbb12f801 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -34,6 +34,19 @@ extern inline int pgd_bad(pgd_t pgd) { return 0; } extern inline int pgd_present(pgd_t pgd) { return 1; } #define pgd_clear(xp) do { } while (0) +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) (*(pteptr) = pteval) +/* + * (pmds are folded into pgds so this doesnt get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) +#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) + #define pgd_page(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) @@ -42,19 +55,4 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) return (pmd_t *) dir; } -extern __inline__ pmd_t *get_pmd_fast(void) -{ - return (pmd_t *)0; -} - -extern __inline__ void free_pmd_fast(pmd_t *pmd) { } -extern __inline__ void free_pmd_slow(pmd_t *pmd) { } - -extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) -{ - if (!pgd) - BUG(); - return (pmd_t *) pgd; -} - #endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index d9e813def..5ee848ef3 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -27,19 +27,31 @@ #define PTRS_PER_PTE 512 #define pte_ERROR(e) \ - printk("%s:%d: bad pte %016Lx.\n", __FILE__, __LINE__, pte_val(e)) + printk("%s:%d: bad pte %p(%016Lx).\n", __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %016Lx.\n", __FILE__, __LINE__, pmd_val(e)) + printk("%s:%d: bad pmd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %016Lx.\n", __FILE__, __LINE__, pgd_val(e)) + printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) /* * Subtle, in PAE mode we cannot have zeroes in the top level - * page directory, the CPU enforces this. + * page directory, the CPU enforces this. (ie. the PGD entry + * always has to have the present bit set.) The CPU caches + * the 4 pgd entries internally, so there is no extra memory + * load on TLB miss, despite one more level of indirection. */ -#define pgd_none(x) (pgd_val(x) == 1ULL) +#define EMPTY_PGD (__pa(empty_zero_page) + 1) +#define pgd_none(x) (pgd_val(x) == EMPTY_PGD) extern inline int pgd_bad(pgd_t pgd) { return 0; } extern inline int pgd_present(pgd_t pgd) { return !pgd_none(pgd); } + +#define set_pte(pteptr,pteval) \ + set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) +#define set_pmd(pmdptr,pmdval) \ + set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) +#define set_pgd(pgdptr,pgdval) \ + set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval)) + /* * Pentium-II errata A13: in PAE mode we explicitly have to flush * the TLB via cr3 if the top-level pgd is changed... This was one tough @@ -48,7 +60,7 @@ extern inline int pgd_present(pgd_t pgd) { return !pgd_none(pgd); } */ extern inline void __pgd_clear (pgd_t * pgd) { - pgd_val(*pgd) = 1; // no zero allowed! + set_pgd(pgd, __pgd(EMPTY_PGD)); } extern inline void pgd_clear (pgd_t * pgd) @@ -64,61 +76,4 @@ extern inline void pgd_clear (pgd_t * pgd) #define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ __pmd_offset(address)) -extern __inline__ pmd_t *get_pmd_slow(void) -{ - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); - - if (ret) - memset(ret, 0, PAGE_SIZE); - return ret; -} - -extern __inline__ pmd_t *get_pmd_fast(void) -{ - unsigned long *ret; - - if ((ret = pmd_quicklist) != NULL) { - pmd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } else - ret = (unsigned long *)get_pmd_slow(); - return (pmd_t *)ret; -} - -extern __inline__ void free_pmd_fast(pmd_t *pmd) -{ - *(unsigned long *)pmd = (unsigned long) pmd_quicklist; - pmd_quicklist = (unsigned long *) pmd; - pgtable_cache_size++; -} - -extern __inline__ void free_pmd_slow(pmd_t *pmd) -{ - free_page((unsigned long)pmd); -} - -extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) -{ - if (!pgd) - BUG(); - address = (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); - if (pgd_none(*pgd)) { - pmd_t *page = get_pmd_fast(); - - if (!page) - page = get_pmd_slow(); - if (page) { - if (pgd_none(*pgd)) { - pgd_val(*pgd) = 1 + __pa(page); - __flush_tlb(); - return page + address; - } else - free_pmd_fast(page); - } else - return NULL; - } - return (pmd_t *)pgd_page(*pgd) + address; -} - #endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 9138abfc3..336c27c67 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -27,19 +27,6 @@ extern pgd_t swapper_pg_dir[1024]; #define flush_page_to_ram(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(mm, start, end) flushes a range of pages - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - #define __flush_tlb() \ do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0) @@ -49,65 +36,16 @@ do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3" #define __flush_tlb_one(addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) #endif - -#ifndef __SMP__ - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - if (mm == current->active_mm) - __flush_tlb(); -} - -#else /* - * We aren't very clever about this yet - SMP could certainly - * avoid some global flushes.. + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. */ +extern unsigned long empty_zero_page[1024]; +#define ZERO_PAGE(vaddr) (mem_map + MAP_NR(empty_zero_page)) -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) -{ - flush_tlb_mm(mm); -} - -#endif #endif /* !__ASSEMBLY__ */ -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - /* * The Linux x86 paging architecture is 'compile-time dual-mode', it * implements both the traditional 2-level x86 page tables and the @@ -121,13 +59,6 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u #endif #endif -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) - #define __beep() asm("movb $0x3,%al; outb %al,$0x61") #define PMD_SIZE (1UL << PMD_SHIFT) @@ -221,13 +152,6 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u extern unsigned long pg0[1024]; /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (mem_map + MAP_NR(empty_zero_page)) - -/* * Handling allocation failures during page table setup. */ extern void __handle_bad_pmd(pmd_t * pmd); @@ -235,19 +159,19 @@ extern void __handle_bad_pmd_kernel(pmd_t * pmd); #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) #define pte_pagenr(x) ((unsigned long)((pte_val(x) >> PAGE_SHIFT))) #define pmd_none(x) (!pmd_val(x)) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) /* * Permanent address of a page. Obviously must never be * called on a highmem page. */ -#define page_address(page) ({ if (PageHighMem(page)) BUG(); PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); }) +#define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) #define pte_page(x) (mem_map+pte_pagenr(x)) @@ -261,37 +185,37 @@ extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } -extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; } -extern inline pte_t pte_exprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; } -extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } -extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } -extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_RW; return pte; } -extern inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; } -extern inline pte_t pte_mkexec(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; } -extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } -extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } -extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pte; } +extern inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } +extern inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } +extern inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } +extern inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; } +extern inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; } +extern inline pte_t pte_mkread(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } +extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } +extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } +extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } +extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ -extern inline pte_t mk_pte(struct page *page, pgprot_t pgprot) -{ - pte_t __pte; - - pte_val(__pte) = (page-mem_map)*(unsigned long long)PAGE_SIZE + - pgprot_val(pgprot); - return __pte; -} +#define mk_pte(page,pgprot) \ +({ \ + pte_t __pte; \ + \ + set_pte(&__pte, __pte(((page)-mem_map) * \ + (unsigned long long)PAGE_SIZE + pgprot_val(pgprot))); \ + __pte; \ +}) /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) \ -({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) +({ pte_t __pte; set_pte(&__pte, __pte(physpage + pgprot_val(pgprot))); __pte; }) extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } +{ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; } #define page_pte(page) page_pte_prot(page, __pgprot(0)) @@ -317,182 +241,10 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) __pte_offset(address)) /* - * Allocate and free page tables. The xxx_kernel() versions are - * used to allocate a kernel page table - this turns on ASN bits - * if any. - */ - -extern __inline__ pgd_t *get_pgd_slow(void) -{ - pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL); - - if (ret) { -#if 0 - /* - * On PAE allocating a whole page is overkill - we will - * either embedd this in mm_struct, or do a SLAB cache. - */ - memcpy(ret, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t)); -#endif -#if CONFIG_X86_PAE - int i; - for (i = 0; i < USER_PTRS_PER_PGD; i++) - __pgd_clear(ret + i); -#else - memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -#endif - memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return ret; -} - -extern __inline__ pgd_t *get_pgd_fast(void) -{ - unsigned long *ret; - - if ((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } else - ret = (unsigned long *)get_pgd_slow(); - return (pgd_t *)ret; -} - -extern __inline__ void free_pgd_fast(pgd_t *pgd) -{ - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; -} - -extern __inline__ void free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -extern pte_t *get_pte_slow(pmd_t *pmd, unsigned long address_preadjusted); -extern pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long address_preadjusted); - -extern __inline__ pte_t *get_pte_fast(void) -{ - unsigned long *ret; - - if((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -extern __inline__ void free_pte_fast(pte_t *pte) -{ - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; - pgtable_cache_size++; -} - -extern __inline__ void free_pte_slow(pte_t *pte) -{ - free_page((unsigned long)pte); -} - -#define pte_free_kernel(pte) free_pte_slow(pte) -#define pte_free(pte) free_pte_slow(pte) -#define pgd_free(pgd) free_pgd_slow(pgd) -#define pgd_alloc() get_pgd_fast() - -extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) -{ - if (!pmd) - BUG(); - address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); - if (pmd_none(*pmd)) { - pte_t * page = (pte_t *) get_pte_fast(); - - if (!page) - return get_pte_kernel_slow(pmd, address); - pmd_val(*pmd) = _KERNPG_TABLE + __pa(page); - return page + address; - } - if (pmd_bad(*pmd)) { - __handle_bad_pmd_kernel(pmd); - return NULL; - } - return (pte_t *) pmd_page(*pmd) + address; -} - -extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address) -{ - address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); - - if (pmd_none(*pmd)) - goto getnew; - if (pmd_bad(*pmd)) - goto fix; - return (pte_t *)pmd_page(*pmd) + address; -getnew: -{ - unsigned long page = (unsigned long) get_pte_fast(); - - if (!page) - return get_pte_slow(pmd, address); - pmd_val(*pmd) = _PAGE_TABLE + __pa(page); - return (pte_t *)page + address; -} -fix: - __handle_bad_pmd(pmd); - return NULL; -} - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the page.) - */ -#define pmd_free(pmd) free_pmd_slow(pmd) - -#define pmd_free_kernel pmd_free -#define pmd_alloc_kernel pmd_alloc - -extern int do_check_pgt_cache(int, int); - -extern inline void set_pgdir(unsigned long address, pgd_t entry) -{ - struct task_struct * p; - pgd_t *pgd; -#ifdef __SMP__ - int i; -#endif - - read_lock(&tasklist_lock); - for_each_task(p) { - if (!p->mm) - continue; - *pgd_offset(p->mm,address) = entry; - } - read_unlock(&tasklist_lock); -#ifndef __SMP__ - for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) - pgd[address >> PGDIR_SHIFT] = entry; -#else - /* To pgd_alloc/pgd_free, one holds master kernel lock and so does our callee, so we can - modify pgd caches of other CPUs as well. -jj */ - for (i = 0; i < NR_CPUS; i++) - for (pgd = (pgd_t *)cpu_data[i].pgd_quick; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) - pgd[address >> PGDIR_SHIFT] = entry; -#endif -} - -/* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ -extern inline void update_mmu_cache(struct vm_area_struct * vma, - unsigned long address, pte_t pte) -{ -} +#define update_mmu_cache(vma,address,pte) do { } while (0) /* Encode and de-code a swap entry */ #define SWP_TYPE(x) (((x).val >> 1) & 0x3f) diff --git a/include/asm-i386/resource.h b/include/asm-i386/resource.h index 0f43dba41..e49c5b8c3 100644 --- a/include/asm-i386/resource.h +++ b/include/asm-i386/resource.h @@ -18,6 +18,12 @@ #define RLIM_NLIMITS 10 +/* + * SuS says limits have to be unsigned. + * Which makes a ton more sense anyway. + */ +#define RLIM_INFINITY (~0UL) + #ifdef __KERNEL__ #define INIT_RLIMITS \ diff --git a/include/asm-i386/rwlock.h b/include/asm-i386/rwlock.h new file mode 100644 index 000000000..ac9e8f36b --- /dev/null +++ b/include/asm-i386/rwlock.h @@ -0,0 +1,86 @@ +/* include/asm-i386/rwlock.h + * + * Helpers used by both rw spinlocks and rw semaphores. + * + * Based in part on code from semaphore.h and + * spinlock.h Copyright 1996 Linus Torvalds. + * + * Copyright 1999 Red Hat, Inc. + * + * Written by Benjamin LaHaise. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_I386_RWLOCK_H +#define _ASM_I386_RWLOCK_H + +typedef struct { unsigned long a[100]; } __dummy_lock_t; +#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock)) + +#define RW_LOCK_BIAS 0x01000000 +#define RW_LOCK_BIAS_STR "0x01000000" + +#define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (__dummy_lock(rw))) + +#define __build_read_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_read_lock_const(rw, helper); \ + else \ + __build_read_lock_ptr(rw, helper); \ + } while (0) + +#define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (__dummy_lock(rw))) + +#define __build_write_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_write_lock_const(rw, helper); \ + else \ + __build_write_lock_ptr(rw, helper); \ + } while (0) + +#endif diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h index 7b0cddf96..3cdb0f6cd 100644 --- a/include/asm-i386/semaphore.h +++ b/include/asm-i386/semaphore.h @@ -30,7 +30,7 @@ #include <asm/system.h> #include <asm/atomic.h> -#include <linux/spinlock.h> +#include <asm/rwlock.h> #include <linux/wait.h> struct semaphore { @@ -111,10 +111,7 @@ extern inline void down(struct semaphore * sem) __asm__ __volatile__( "# atomic down operation\n\t" -#ifdef __SMP__ - "lock ; " -#endif - "decl (%0)\n\t" /* --sem->count */ + LOCK "decl (%0)\n\t" /* --sem->count */ "js 2f\n" "1:\n" ".section .text.lock,\"ax\"\n" @@ -136,10 +133,7 @@ extern inline int down_interruptible(struct semaphore * sem) __asm__ __volatile__( "# atomic interruptible down operation\n\t" -#ifdef __SMP__ - "lock ; " -#endif - "decl (%1)\n\t" /* --sem->count */ + LOCK "decl (%1)\n\t" /* --sem->count */ "js 2f\n\t" "xorl %0,%0\n" "1:\n" @@ -163,10 +157,7 @@ extern inline int down_trylock(struct semaphore * sem) __asm__ __volatile__( "# atomic interruptible down operation\n\t" -#ifdef __SMP__ - "lock ; " -#endif - "decl (%1)\n\t" /* --sem->count */ + LOCK "decl (%1)\n\t" /* --sem->count */ "js 2f\n\t" "xorl %0,%0\n" "1:\n" @@ -193,10 +184,7 @@ extern inline void up(struct semaphore * sem) #endif __asm__ __volatile__( "# atomic up operation\n\t" -#ifdef __SMP__ - "lock ; " -#endif - "incl (%0)\n\t" /* ++sem->count */ + LOCK "incl (%0)\n\t" /* ++sem->count */ "jle 2f\n" "1:\n" ".section .text.lock,\"ax\"\n" @@ -208,4 +196,173 @@ extern inline void up(struct semaphore * sem) :"memory"); } +/* rw mutexes (should that be mutices? =) -- throw rw + * spinlocks and semaphores together, and this is what we + * end up with... + * + * The lock is initialized to BIAS. This way, a writer + * subtracts BIAS ands gets 0 for the case of an uncontended + * lock. Readers decrement by 1 and see a positive value + * when uncontended, negative if there are writers waiting + * (in which case it goes to sleep). + * + * The value 0x01000000 supports up to 128 processors and + * lots of processes. BIAS must be chosen such that subl'ing + * BIAS once per CPU will result in the long remaining + * negative. + * + * In terms of fairness, this should result in the lock + * flopping back and forth between readers and writers + * under heavy use. + * + * -ben + */ +struct rw_semaphore { + atomic_t count; + volatile unsigned char write_bias_granted; + volatile unsigned char read_bias_granted; + volatile unsigned char pad1; + volatile unsigned char pad2; + wait_queue_head_t wait; + wait_queue_head_t write_bias_wait; +#if WAITQUEUE_DEBUG + long __magic; + atomic_t readers; + atomic_t writers; +#endif +}; + +#if WAITQUEUE_DEBUG +#define __RWSEM_DEBUG_INIT , ATOMIC_INIT(0), ATOMIC_INIT(0) +#else +#define __RWSEM_DEBUG_INIT /* */ +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ ATOMIC_INIT(RW_LOCK_BIAS), 0, 0, 0, 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait), \ + __WAIT_QUEUE_HEAD_INITIALIZER((name).write_bias_wait) \ + __SEM_DEBUG_INIT(name) __RWSEM_DEBUG_INIT } + +extern inline void init_rwsem(struct rw_semaphore *sem) +{ + atomic_set(&sem->count, RW_LOCK_BIAS); + sem->read_bias_granted = 0; + sem->write_bias_granted = 0; + init_waitqueue_head(&sem->wait); + init_waitqueue_head(&sem->write_bias_wait); +#if WAITQUEUE_DEBUG + sem->__magic = (long)&sem->__magic; + atomic_set(&sem->readers, 0); + atomic_set(&sem->writers, 0); +#endif +} + +/* we use FASTCALL convention for the helpers */ +extern struct rw_semaphore *FASTCALL(down_read_failed(struct rw_semaphore *sem)); +extern struct rw_semaphore *FASTCALL(down_write_failed(struct rw_semaphore *sem)); +extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *sem)); + +extern inline void down_read(struct rw_semaphore *sem) +{ +#if WAITQUEUE_DEBUG + if (sem->__magic != (long)&sem->__magic) + BUG(); +#endif + __build_read_lock(sem, "__down_read_failed"); +#if WAITQUEUE_DEBUG + if (sem->write_bias_granted) + BUG(); + if (atomic_read(&sem->writers)) + BUG(); + atomic_inc(&sem->readers); +#endif +} + +extern inline void down_write(struct rw_semaphore *sem) +{ +#if WAITQUEUE_DEBUG + if (sem->__magic != (long)&sem->__magic) + BUG(); +#endif + __build_write_lock(sem, "__down_write_failed"); +#if WAITQUEUE_DEBUG + if (atomic_read(&sem->writers)) + BUG(); + if (atomic_read(&sem->readers)) + BUG(); + if (sem->read_bias_granted) + BUG(); + if (sem->write_bias_granted) + BUG(); + atomic_inc(&sem->writers); +#endif +} + +/* When a reader does a release, the only significant + * case is when there was a writer waiting, and we've + * bumped the count to 0: we must wake the writer up. + */ +extern inline void __up_read(struct rw_semaphore *sem) +{ + __asm__ __volatile__( + "# up_read\n\t" + LOCK "incl (%%eax)\n\t" + "jz 2f\n" /* only do the wake if result == 0 (ie, a writer) */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\tcall __rwsem_wake\n\t" + "jmp 1b\n" + ".previous" + ::"a" (sem) + :"memory" + ); +} + +/* releasing the writer is easy -- just release it and + * wake up any sleepers. + */ +extern inline void __up_write(struct rw_semaphore *sem) +{ + __asm__ __volatile__( + "# up_write\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%%eax)\n" + "jc 2f\n" /* only do the wake if the result was -'ve to 0/+'ve */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\tcall __rwsem_wake\n\t" + "jmp 1b\n" + ".previous" + ::"a" (sem) + :"memory" + ); +} + +extern inline void up_read(struct rw_semaphore *sem) +{ +#if WAITQUEUE_DEBUG + if (sem->write_bias_granted) + BUG(); + if (atomic_read(&sem->writers)) + BUG(); + atomic_dec(&sem->readers); +#endif + __up_read(sem); +} + +extern inline void up_write(struct rw_semaphore *sem) +{ +#if WAITQUEUE_DEBUG + if (sem->read_bias_granted) + BUG(); + if (sem->write_bias_granted) + BUG(); + if (atomic_read(&sem->readers)) + BUG(); + if (atomic_read(&sem->writers) != 1) + BUG(); + atomic_dec(&sem->writers); +#endif + __up_write(sem); +} + #endif diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index fc1244493..ae25cc427 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -2,3 +2,9 @@ * Just a place holder. We don't want to have to test x86 before * we include stuff */ + +#ifndef _i386_SETUP_H +#define _i386_SETUP_H + + +#endif /* _i386_SETUP_H */ diff --git a/include/asm-i386/signal.h b/include/asm-i386/signal.h index b6823738b..b2b8a76f0 100644 --- a/include/asm-i386/signal.h +++ b/include/asm-i386/signal.h @@ -63,6 +63,7 @@ typedef unsigned long sigset_t; #define SIGLOST 29 */ #define SIGPWR 30 +#define SIGSYS 31 #define SIGUNUSED 31 /* These should not be considered constants from userland. */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 2aa6aec4e..3425c2cd1 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -174,6 +174,7 @@ extern volatile unsigned long smp_invalidate_needed; extern int pic_mode; extern void smp_flush_tlb(void); extern int get_maxlvt(void); +extern void disable_local_APIC (void); extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); extern void smp_send_reschedule(int cpu); extern void smp_invalidate_rcv(void); /* Process an NMI */ diff --git a/include/asm-i386/smplock.h b/include/asm-i386/smplock.h index 152c1a9fa..ed5c58254 100644 --- a/include/asm-i386/smplock.h +++ b/include/asm-i386/smplock.h @@ -38,6 +38,10 @@ do { \ */ extern __inline__ void lock_kernel(void) { +#if 1 + if (!++current->lock_depth) + spin_lock(&kernel_flag); +#else __asm__ __volatile__( "incl %1\n\t" "jne 9f" @@ -45,12 +49,17 @@ extern __inline__ void lock_kernel(void) "\n9:" :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); +#endif } extern __inline__ void unlock_kernel(void) { if (current->lock_depth < 0) BUG(); +#if 1 + if (--current->lock_depth < 0) + spin_unlock(&kernel_flag); +#else __asm__ __volatile__( "decl %1\n\t" "jns 9f\n\t" @@ -58,4 +67,5 @@ extern __inline__ void unlock_kernel(void) "\n9:" :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); +#endif } diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index a10ed9c5c..5cf17b846 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -1,17 +1,41 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include <asm/atomic.h> +#include <asm/rwlock.h> +#include <asm/page.h> + +extern int printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +/* It seems that people are forgetting to + * initialize their spinlocks properly, tsk tsk. + * Remember to turn this off in 2.4. -ben + */ +#define SPINLOCK_DEBUG 1 + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere */ typedef struct { volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif } spinlock_t; -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } +#define SPINLOCK_MAGIC 0xdead4ead -#define spin_lock_init(x) do { (x)->lock = 0; } while(0) +#if SPINLOCK_DEBUG +#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC +#else +#define SPINLOCK_MAGIC_INIT /* */ +#endif + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 SPINLOCK_MAGIC_INIT } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) /* * Simple spin lock operations. There are two variants, one clears IRQ's * on the local processor, one does not. @@ -21,9 +45,6 @@ typedef struct { #define spin_unlock_wait(x) do { barrier(); } while(((volatile spinlock_t *)(x))->lock) -typedef struct { unsigned long a[100]; } __dummy_lock_t; -#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock)) - #define spin_lock_string \ "\n1:\t" \ "lock ; btsl $0,%0\n\t" \ @@ -35,18 +56,45 @@ typedef struct { unsigned long a[100]; } __dummy_lock_t; "jmp 1b\n" \ ".previous" +/* + * Sadly, some early PPro chips require the locked access, + * otherwise we could just always simply do + * + * #define spin_unlock_string \ + * "movb $0,%0" + * + * Which is noticeably faster. + */ #define spin_unlock_string \ "lock ; btrl $0,%0" -#define spin_lock(lock) \ -__asm__ __volatile__( \ - spin_lock_string \ - :"=m" (__dummy_lock(lock))) - -#define spin_unlock(lock) \ -__asm__ __volatile__( \ - spin_unlock_string \ - :"=m" (__dummy_lock(lock))) +extern inline void spin_lock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + __label__ here; +here: + if (lock->magic != SPINLOCK_MAGIC) { +printk("eip: %p\n", &&here); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string + :"=m" (__dummy_lock(lock))); +} + +extern inline void spin_unlock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!lock->lock) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + :"=m" (__dummy_lock(lock))); +} #define spin_trylock(lock) (!test_and_set_bit(0,(lock))) @@ -62,47 +110,60 @@ __asm__ __volatile__( \ */ typedef struct { volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif } rwlock_t; -#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } +#define RWLOCK_MAGIC 0xdeaf1eed + +#if SPINLOCK_DEBUG +#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT /* */ +#endif + +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } /* * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "write" bit. + * with the high bit (sign) being the "contended" bit. * * The inline assembly is non-obvious. Think about it. + * + * Changed to use the same technique as rw semaphores. See + * semaphore.h for details. -ben */ -#define read_lock(rw) \ - asm volatile("\n1:\t" \ - "lock ; incl %0\n\t" \ - "js 2f\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tlock ; decl %0\n" \ - "3:\tcmpl $0,%0\n\t" \ - "js 3b\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (__dummy_lock(&(rw)->lock))) - -#define read_unlock(rw) \ - asm volatile("lock ; decl %0" \ - :"=m" (__dummy_lock(&(rw)->lock))) - -#define write_lock(rw) \ - asm volatile("\n1:\t" \ - "lock ; btsl $31,%0\n\t" \ - "jc 4f\n" \ - "2:\ttestl $0x7fffffff,%0\n\t" \ - "jne 3f\n" \ - ".section .text.lock,\"ax\"\n" \ - "3:\tlock ; btrl $31,%0\n" \ - "4:\tcmp $0,%0\n\t" \ - "jne 4b\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (__dummy_lock(&(rw)->lock))) - -#define write_unlock(rw) \ - asm volatile("lock ; btrl $31,%0":"=m" (__dummy_lock(&(rw)->lock))) +/* the spinlock helpers are in arch/i386/kernel/semaphore.S */ + +extern inline void read_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_read_lock(rw, "__read_lock_failed"); +} + +extern inline void write_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_write_lock(rw, "__write_lock_failed"); +} + +#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" (__dummy_lock(&(rw)->lock))) +#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" (__dummy_lock(&(rw)->lock))) + +extern inline int write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} #endif /* __ASM_SPINLOCK_H */ diff --git a/include/asm-i386/string-486.h b/include/asm-i386/string-486.h index 4c2531712..7c3ee1a1b 100644 --- a/include/asm-i386/string-486.h +++ b/include/asm-i386/string-486.h @@ -187,7 +187,6 @@ extern inline char * strrchr(const char * s, int c) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" "movb %%al,%%ah\n" "1:\tlodsb\n\t" "cmpb %%ah,%%al\n\t" @@ -206,7 +205,6 @@ extern inline size_t strspn(const char * cs, const char * ct) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" "movl %6,%%edi\n\t" "repne\n\t" "scasb\n\t" @@ -234,7 +232,6 @@ extern inline size_t strcspn(const char * cs, const char * ct) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" "movl %6,%%edi\n\t" "repne\n\t" "scasb\n\t" @@ -263,7 +260,6 @@ extern inline char * strpbrk(const char * cs,const char * ct) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" "movl %6,%%edi\n\t" "repne\n\t" "scasb\n\t" @@ -296,7 +292,6 @@ extern inline char * strstr(const char * cs,const char * ct) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" \ "movl %6,%%edi\n\t" "repne\n\t" "scasb\n\t" @@ -378,7 +373,6 @@ __asm__ __volatile__( "1:\txorl %0,%0\n\t" "movl $-1,%%ecx\n\t" "xorl %%eax,%%eax\n\t" - "cld\n\t" "movl %4,%%edi\n\t" "repne\n\t" "scasb\n\t" @@ -474,7 +468,6 @@ extern inline void * __memcpy_g(void * to, const void * from, size_t n) int d0, d1, d2; register void *tmp = (void *)to; __asm__ __volatile__ ( - "cld\n\t" "shrl $1,%%ecx\n\t" "jnc 1f\n\t" "movsb\n" @@ -554,7 +547,6 @@ int d0, d1, d2; register void *tmp = (void *)dest; if (dest<src) __asm__ __volatile__ ( - "cld\n\t" "rep\n\t" "movsb" :"=&c" (d0), "=&S" (d1), "=&D" (d2) @@ -577,7 +569,6 @@ extern inline int memcmp(const void * cs,const void * ct,size_t count) int d0, d1, d2; register int __res; __asm__ __volatile__( - "cld\n\t" "repe\n\t" "cmpsb\n\t" "je 1f\n\t" @@ -597,7 +588,6 @@ register void * __res; if (!count) return NULL; __asm__ __volatile__( - "cld\n\t" "repne\n\t" "scasb\n\t" "je 1f\n\t" @@ -753,8 +743,7 @@ extern inline void * memscan(void * addr, int c, size_t size) { if (!size) return addr; - __asm__("cld - repnz; scasb + __asm__("repnz; scasb jnz 1f dec %%edi 1: " diff --git a/include/asm-i386/string.h b/include/asm-i386/string.h index ea2e9f85d..515ffa7d5 100644 --- a/include/asm-i386/string.h +++ b/include/asm-i386/string.h @@ -1,6 +1,7 @@ #ifndef _I386_STRING_H_ #define _I386_STRING_H_ +#ifdef __KERNEL__ /* * On a 486 or Pentium, we are better off not using the * byte string operations. But on a 386 or a PPro the @@ -32,7 +33,6 @@ extern inline char * strcpy(char * dest,const char *src) { int d0, d1, d2; __asm__ __volatile__( - "cld\n" "1:\tlodsb\n\t" "stosb\n\t" "testb %%al,%%al\n\t" @@ -47,7 +47,6 @@ extern inline char * strncpy(char * dest,const char *src,size_t count) { int d0, d1, d2, d3; __asm__ __volatile__( - "cld\n" "1:\tdecl %2\n\t" "js 2f\n\t" "lodsb\n\t" @@ -67,7 +66,6 @@ extern inline char * strcat(char * dest,const char * src) { int d0, d1, d2, d3; __asm__ __volatile__( - "cld\n\t" "repne\n\t" "scasb\n\t" "decl %1\n" @@ -85,7 +83,6 @@ extern inline char * strncat(char * dest,const char * src,size_t count) { int d0, d1, d2, d3; __asm__ __volatile__( - "cld\n\t" "repne\n\t" "scasb\n\t" "decl %1\n\t" @@ -110,7 +107,6 @@ extern inline int strcmp(const char * cs,const char * ct) int d0, d1; register int __res; __asm__ __volatile__( - "cld\n" "1:\tlodsb\n\t" "scasb\n\t" "jne 2f\n\t" @@ -132,7 +128,6 @@ extern inline int strncmp(const char * cs,const char * ct,size_t count) register int __res; int d0, d1, d2; __asm__ __volatile__( - "cld\n" "1:\tdecl %3\n\t" "js 2f\n\t" "lodsb\n\t" @@ -156,7 +151,6 @@ extern inline char * strchr(const char * s, int c) int d0; register char * __res; __asm__ __volatile__( - "cld\n\t" "movb %%al,%%ah\n" "1:\tlodsb\n\t" "cmpb %%ah,%%al\n\t" @@ -176,7 +170,6 @@ extern inline char * strrchr(const char * s, int c) int d0, d1; register char * __res; __asm__ __volatile__( - "cld\n\t" "movb %%al,%%ah\n" "1:\tlodsb\n\t" "cmpb %%ah,%%al\n\t" @@ -194,7 +187,6 @@ extern inline size_t strlen(const char * s) int d0; register int __res; __asm__ __volatile__( - "cld\n\t" "repne\n\t" "scasb\n\t" "notl %0\n\t" @@ -207,7 +199,6 @@ extern inline void * __memcpy(void * to, const void * from, size_t n) { int d0, d1, d2; __asm__ __volatile__( - "cld\n\t" "rep ; movsl\n\t" "testb $2,%b4\n\t" "je 1f\n\t" @@ -273,7 +264,6 @@ extern inline void * __constant_memcpy(void * to, const void * from, size_t n) } #define COMMON(x) \ __asm__ __volatile__( \ - "cld\n\t" \ "rep ; movsl" \ x \ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ @@ -343,13 +333,28 @@ extern __inline__ void *__memcpy3d(void *to, const void *from, size_t len) #endif +/* + * struct_cpy(x,y), copy structure *x into (matching structure) *y. + * + * We get link-time errors if the structure sizes do not match. + * There is no runtime overhead, it's all optimized away at + * compile time. + */ +extern void __struct_cpy_bug (void); + +#define struct_cpy(x,y) \ +({ \ + if (sizeof(*(x)) != sizeof(*(y))) \ + __struct_cpy_bug; \ + memcpy(x, y, sizeof(*(x))); \ +}) + #define __HAVE_ARCH_MEMMOVE extern inline void * memmove(void * dest,const void * src, size_t n) { int d0, d1, d2; if (dest<src) __asm__ __volatile__( - "cld\n\t" "rep\n\t" "movsb" : "=&c" (d0), "=&S" (d1), "=&D" (d2) @@ -379,7 +384,6 @@ register void * __res; if (!count) return NULL; __asm__ __volatile__( - "cld\n\t" "repne\n\t" "scasb\n\t" "je 1f\n\t" @@ -393,7 +397,6 @@ extern inline void * __memset_generic(void * s, char c,size_t count) { int d0, d1; __asm__ __volatile__( - "cld\n\t" "rep\n\t" "stosb" : "=&c" (d0), "=&D" (d1) @@ -414,7 +417,6 @@ extern inline void * __constant_c_memset(void * s, unsigned long c, size_t count { int d0, d1; __asm__ __volatile__( - "cld\n\t" "rep ; stosl\n\t" "testb $2,%b3\n\t" "je 1f\n\t" @@ -475,7 +477,7 @@ extern inline void * __constant_c_and_count_memset(void * s, unsigned long patte return s; } #define COMMON(x) \ -__asm__ __volatile__("cld\n\t" \ +__asm__ __volatile__( \ "rep ; stosl" \ x \ : "=&c" (d0), "=&D" (d1) \ @@ -518,8 +520,7 @@ extern inline void * memscan(void * addr, int c, size_t size) { if (!size) return addr; - __asm__("cld - repnz; scasb + __asm__("repnz; scasb jnz 1f dec %%edi 1: " @@ -528,5 +529,7 @@ extern inline void * memscan(void * addr, int c, size_t size) return addr; } +#endif /* __KERNEL__ */ + #endif #endif diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index f2ccae723..ac0ff6e1e 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <asm/segment.h> +#include <linux/bitops.h> /* for LOCK_PREFIX */ #ifdef __KERNEL__ @@ -122,12 +123,62 @@ static inline unsigned long get_limit(unsigned long segment) #define nop() __asm__ __volatile__ ("nop") -#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + #define tas(ptr) (xchg((ptr),1)) struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + */ +extern inline void __set_64bit (unsigned long long * ptr, + unsigned int low, unsigned int high) +{ +__asm__ __volatile__ ( + "1: movl (%0), %%eax; + movl 4(%0), %%edx; + cmpxchg8b (%0); + jnz 1b" + :: "D"(ptr), + "b"(low), + "c"(high) + : + "ax","dx","memory"); +} + +extern void inline __set_64bit_constant (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); +} +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) + +extern void inline __set_64bit_var (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit_constant(ptr, value) : \ + __set_64bit_var(ptr, value) ) + +#define _set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ + __set_64bit(ptr, ll_low(value), ll_high(value)) ) + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, @@ -159,6 +210,50 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz } /* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#if CPU != 386 +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +#else +/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ +#endif + +/* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking * to devices. diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index b3cb91024..8678dc583 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -196,6 +196,9 @@ #define __NR_putpmsg 189 /* some people actually want streams */ #define __NR_vfork 190 #define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ @@ -273,6 +276,19 @@ __asm__ volatile ("int $0x80" \ __syscall_return(type,__res); \ } +#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ +type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ +{ \ +long __res; \ +__asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \ + "0" ((long)(arg6))); \ +__syscall_return(type,__res); \ +} + #ifdef __KERNEL_SYSCALLS__ /* |