diff options
Diffstat (limited to 'arch/i386/mm')
-rw-r--r-- | arch/i386/mm/Makefile | 25 | ||||
-rw-r--r-- | arch/i386/mm/extable.c | 63 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 153 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 180 | ||||
-rw-r--r-- | arch/i386/mm/ioremap.c | 109 |
5 files changed, 416 insertions, 114 deletions
diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile index af75a20a4..cee7d4e6d 100644 --- a/arch/i386/mm/Makefile +++ b/arch/i386/mm/Makefile @@ -7,26 +7,7 @@ # # Note 2! The CFLAGS definition is now in the main makefile... -.c.o: - $(CC) $(CFLAGS) -c $< -.s.o: - $(AS) -o $*.o $< -.c.s: - $(CC) $(CFLAGS) -S $< +O_TARGET := mm.o +O_OBJS := init.o fault.o ioremap.o extable.o -OBJS = init.o fault.o - -mm.o: $(OBJS) - $(LD) -r -o mm.o $(OBJS) - -modules: - -dep: - $(CPP) -M *.c > .depend - -# -# include a dependency file if one exists -# -ifeq (.depend,$(wildcard .depend)) -include .depend -endif +include $(TOPDIR)/Rules.make diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c new file mode 100644 index 000000000..c43a8d43d --- /dev/null +++ b/arch/i386/mm/extable.c @@ -0,0 +1,63 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <asm/uaccess.h> + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + /* Some versions of the linker are buggy and do not align the + __start pointer along with the section, thus we may be low. */ + if ((long)first & 3) + (long)first = ((long)first | 3) + 1; + + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret; +#ifdef CONFIG_MODULES + struct module *mp; +#endif + + /* Search the kernel's table first. */ + ret = search_one_table(__start___ex_table, + __stop___ex_table-1, addr); + if (ret) + return ret; + +#ifdef CONFIG_MODULES + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->exceptinfo.start != NULL) { + ret = search_one_table(mp->exceptinfo.start, + mp->exceptinfo.stop-1, addr); + if (ret) + return ret; + } + } +#endif + return 0; +} diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 01c259f0f..50dcd0735 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -4,7 +4,6 @@ * Copyright (C) 1995 Linus Torvalds */ -#include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/head.h> @@ -17,10 +16,61 @@ #include <linux/mm.h> #include <asm/system.h> -#include <asm/segment.h> +#include <asm/uaccess.h> #include <asm/pgtable.h> -extern void die_if_kernel(char *,struct pt_regs *,long); +extern void die_if_kernel(const char *,struct pt_regs *,long); + +/* + * Ugly, ugly, but the goto's result in better assembly.. + */ +int __verify_write(const void * addr, unsigned long size) +{ + struct vm_area_struct * vma; + unsigned long start = (unsigned long) addr; + + if (!size) + return 1; + + vma = find_vma(current->mm, start); + if (!vma) + goto bad_area; + if (vma->vm_start > start) + goto check_stack; + +good_area: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + size--; + size += start & ~PAGE_MASK; + size >>= PAGE_SHIFT; + start &= PAGE_MASK; + + for (;;) { + do_wp_page(current, vma, start, 1); + if (!size) + break; + size--; + start += PAGE_SIZE; + if (start < vma->vm_end) + continue; + vma = vma->vm_next; + if (!vma || vma->vm_start != start) + goto bad_area; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area;; + } + return 1; + +check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, start) == 0) + goto good_area; + +bad_area: + return 0; +} /* * This routine handles page faults. It determines the address, @@ -34,58 +84,76 @@ extern void die_if_kernel(char *,struct pt_regs *,long); */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) { + void (*handler)(struct task_struct *, + struct vm_area_struct *, + unsigned long, + int); + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; + unsigned long fixup; + int write; /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); - vma = find_vma(current, address); + down(&mm->mmap_sem); + vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur) + if (error_code & 4) { + /* + * accessing the stack below %esp is always a bug. + * The "+ 32" is there due to some instructions (like + * pusha) doing pre-decrement on the stack and that + * doesn't show up until later.. + */ + if (address + 32 < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) goto bad_area; - vma->vm_offset -= vma->vm_start - (address & PAGE_MASK); - vma->vm_start = (address & PAGE_MASK); /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: - /* - * was it a write? - */ - if (error_code & 2) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - /* read with protection fault? */ - if (error_code & 1) - goto bad_area; - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + write = 0; + handler = do_no_page; + switch (error_code & 3) { + default: /* 3: write, present */ + handler = do_wp_page; +#ifdef TEST_VERIFY_AREA + if (regs->cs == KERNEL_CS) + printk("WP fault at %08lx\n", regs->eip); +#endif + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; } + handler(tsk, vma, address, write); + up(&mm->mmap_sem); /* * Did it hit the DOS screen memory VA from vm86 mode? */ if (regs->eflags & VM_MASK) { unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; if (bit < 32) - current->tss.screen_bitmap |= 1 << bit; - } - if (error_code & 1) { -#ifdef CONFIG_TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) - printk("WP fault at %08x\n", regs->eip); -#endif - do_wp_page(vma, address, error_code & 2); - return; + tsk->tss.screen_bitmap |= 1 << bit; } - do_no_page(vma, address, error_code & 2); return; /* @@ -93,11 +161,20 @@ good_area: * Fix it, but check if it's kernel or user first.. */ bad_area: + up(&mm->mmap_sem); + + /* Are we prepared to handle this fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + printk("Exception at %lx (%lx)\n", regs->eip, fixup); + regs->eip = fixup; + return; + } + if (error_code & 4) { - current->tss.cr2 = address; - current->tss.error_code = error_code; - current->tss.trap_no = 14; - send_sig(SIGSEGV, current, 1); + tsk->tss.cr2 = address; + tsk->tss.error_code = error_code; + tsk->tss.trap_no = 14; + force_sig(SIGSEGV, tsk); return; } /* @@ -106,14 +183,14 @@ bad_area: * * First we check if it was the bootup rw-test, though.. */ - if (wp_works_ok < 0 && address == TASK_SIZE && (error_code & 1)) { + if (wp_works_ok < 0 && !address && (error_code & 1)) { wp_works_ok = 1; pg0[0] = pte_val(mk_pte(0, PAGE_SHARED)); - invalidate(); + flush_tlb(); printk("This processor honours the WP bit even when in supervisor mode. Good.\n"); return; } - if ((unsigned long) (address-TASK_SIZE) < PAGE_SIZE) { + if (address < PAGE_SIZE) { printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); pg0[0] = pte_val(mk_pte(0, PAGE_SHARED)); } else @@ -121,13 +198,13 @@ bad_area: printk(" at virtual address %08lx\n",address); __asm__("movl %%cr3,%0" : "=r" (page)); printk(KERN_ALERT "current->tss.cr3 = %08lx, %%cr3 = %08lx\n", - current->tss.cr3, page); - page = ((unsigned long *) page)[address >> 22]; + tsk->tss.cr3, page); + page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); if (page & 1) { page &= PAGE_MASK; address &= 0x003ff000; - page = ((unsigned long *) page)[address >> PAGE_SHIFT]; + page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; printk(KERN_ALERT "*pte = %08lx\n", page); } die_if_kernel("Oops", regs, error_code); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 296595644..cf258d6de 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -15,13 +15,17 @@ #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/blk.h> +#endif #include <asm/system.h> -#include <asm/segment.h> +#include <asm/uaccess.h> #include <asm/pgtable.h> +#include <asm/dma.h> -extern void scsi_mem_init(unsigned long); -extern void sound_mem_init(void); extern void die_if_kernel(char *,struct pt_regs *,long); extern void show_net_buffers(void); @@ -29,7 +33,7 @@ extern void show_net_buffers(void); * BAD_PAGE is the page that is used for page faults when linux * is out-of-memory. Older versions of linux just did a * do_exit(), but using this instead means there is less risk - * for a process dying in kernel mode, possibly leaving a inode + * for a process dying in kernel mode, possibly leaving an inode * unused etc.. * * BAD_PAGETABLE is the accompanying page-table: it is initialized @@ -70,15 +74,15 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - i = high_memory >> PAGE_SHIFT; + i = max_mapnr; while (i-- > 0) { total++; - if (mem_map[i] & MAP_PAGE_RESERVED) + if (PageReserved(mem_map+i)) reserved++; - else if (!mem_map[i]) + else if (!mem_map[i].count) free++; else - shared += mem_map[i]-1; + shared += mem_map[i].count-1; } printk("%d pages of RAM\n",total); printk("%d free pages\n",free); @@ -111,34 +115,90 @@ unsigned long paging_init(unsigned long start_mem, unsigned long end_mem) * and SMM (for laptops with [34]86/SL chips) may need it. It is read * and write protected to detect null pointer references in the * kernel. + * It may also hold the MP configuration table when we are booting SMP. */ #if 0 memset((void *) 0, 0, PAGE_SIZE); #endif +#ifdef __SMP__ + if (!smp_scan_config(0x0,0x400)) /* Scan the bottom 1K for a signature */ + { + /* + * FIXME: Linux assumes you have 640K of base ram.. this continues + * the error... + */ + if (!smp_scan_config(639*0x400,0x400)) /* Scan the top 1K of base RAM */ + smp_scan_config(0xF0000,0x10000); /* Scan the 64K of bios */ + } + /* + * If it is an SMP machine we should know now, unless the configuration + * is in an EISA/MCA bus machine with an extended bios data area. I don't + * have such a machine so someone else can fill in the check of the EBDA + * here. + */ +/* smp_alloc_memory(8192); */ +#endif +#ifdef TEST_VERIFY_AREA + wp_works_ok = 0; +#endif start_mem = PAGE_ALIGN(start_mem); - address = 0; + address = PAGE_OFFSET; pg_dir = swapper_pg_dir; + /* unmap the original low memory mappings */ + pgd_val(pg_dir[0]) = 0; while (address < end_mem) { + /* + * The following code enabled 4MB page tables for the + * Intel Pentium cpu, unfortunately the SMP kernel can't + * handle the 4MB page table optimizations yet + */ +#ifndef __SMP__ + /* + * This will create page tables that + * span up to the next 4MB virtual + * memory boundary, but that's ok, + * we won't use that memory anyway. + */ + if (x86_capability & 8) { +#ifdef GAS_KNOWS_CR4 + __asm__("movl %%cr4,%%eax\n\t" + "orl $16,%%eax\n\t" + "movl %%eax,%%cr4" + : : :"ax"); +#else + __asm__(".byte 0x0f,0x20,0xe0\n\t" + "orl $16,%%eax\n\t" + ".byte 0x0f,0x22,0xe0" + : : :"ax"); +#endif + wp_works_ok = 1; + pgd_val(pg_dir[768]) = _PAGE_TABLE + _PAGE_4M + __pa(address); + pg_dir++; + address += 4*1024*1024; + continue; + } +#endif /* map the memory at virtual addr 0xC0000000 */ + /* pg_table is physical at this point */ pg_table = (pte_t *) (PAGE_MASK & pgd_val(pg_dir[768])); if (!pg_table) { - pg_table = (pte_t *) start_mem; + pg_table = (pte_t *) __pa(start_mem); start_mem += PAGE_SIZE; } - /* also map it temporarily at 0x0000000 for init */ - pgd_val(pg_dir[0]) = _PAGE_TABLE | (unsigned long) pg_table; pgd_val(pg_dir[768]) = _PAGE_TABLE | (unsigned long) pg_table; pg_dir++; + /* now change pg_table to kernel virtual addresses */ + pg_table = (pte_t *) __va(pg_table); for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - if (address < end_mem) - *pg_table = mk_pte(address, PAGE_SHARED); - else - pte_clear(pg_table); + pte_t pte = mk_pte(address, PAGE_KERNEL); + if (address >= end_mem) + pte_val(pte) = 0; + set_pte(pg_table, pte); address += PAGE_SIZE; } } - invalidate(); + local_flush_tlb(); return free_area_init(start_mem, end_mem); } @@ -149,16 +209,25 @@ void mem_init(unsigned long start_mem, unsigned long end_mem) int reservedpages = 0; int datapages = 0; unsigned long tmp; - extern int etext; + extern int _etext; end_mem &= PAGE_MASK; - high_memory = end_mem; + high_memory = (void *) end_mem; + max_mapnr = MAP_NR(end_mem); /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); /* mark usable pages in the mem_map[] */ - start_low_mem = PAGE_ALIGN(start_low_mem); + start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET; + +#ifdef __SMP__ + /* + * But first pinch a few for the stack/trampoline stuff + */ + start_low_mem += PAGE_SIZE; /* 32bit startup code */ + start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */ +#endif start_mem = PAGE_ALIGN(start_mem); /* @@ -166,53 +235,56 @@ void mem_init(unsigned long start_mem, unsigned long end_mem) * They seem to have done something stupid with the floppy * controller as well.. */ - while (start_low_mem < 0x9f000) { - mem_map[MAP_NR(start_low_mem)] = 0; + while (start_low_mem < 0x9f000+PAGE_OFFSET) { + clear_bit(PG_reserved, &mem_map[MAP_NR(start_low_mem)].flags); start_low_mem += PAGE_SIZE; } - while (start_mem < high_memory) { - mem_map[MAP_NR(start_mem)] = 0; + while (start_mem < end_mem) { + clear_bit(PG_reserved, &mem_map[MAP_NR(start_mem)].flags); start_mem += PAGE_SIZE; } -#ifdef CONFIG_SCSI - scsi_mem_init(high_memory); -#endif -#ifdef CONFIG_SOUND - sound_mem_init(); -#endif - for (tmp = 0 ; tmp < high_memory ; tmp += PAGE_SIZE) { - if (mem_map[MAP_NR(tmp)]) { - if (tmp >= 0xA0000 && tmp < 0x100000) + for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) { + if (tmp >= MAX_DMA_ADDRESS) + clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags); + if (PageReserved(mem_map+MAP_NR(tmp))) { + if (tmp >= 0xA0000+PAGE_OFFSET && tmp < 0x100000+PAGE_OFFSET) reservedpages++; - else if (tmp < (unsigned long) &etext) + else if (tmp < (unsigned long) &_etext) codepages++; else datapages++; continue; } - mem_map[MAP_NR(tmp)] = 1; - free_page(tmp); + mem_map[MAP_NR(tmp)].count = 1; +#ifdef CONFIG_BLK_DEV_INITRD + if (!initrd_start || (tmp < initrd_start || tmp >= + initrd_end)) +#endif + free_page(tmp); } - tmp = nr_free_pages << PAGE_SHIFT; printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data)\n", - tmp >> 10, - high_memory >> 10, + (unsigned long) nr_free_pages << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), codepages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datapages << (PAGE_SHIFT-10)); /* test if the WP bit is honoured in supervisor mode */ - wp_works_ok = -1; - pg0[0] = pte_val(mk_pte(0, PAGE_READONLY)); - invalidate(); - __asm__ __volatile__("movb 0,%%al ; movb %%al,0": : :"ax", "memory"); - pg0[0] = 0; - invalidate(); - if (wp_works_ok < 0) - wp_works_ok = 0; -#ifdef CONFIG_TEST_VERIFY_AREA - wp_works_ok = 0; -#endif + if (wp_works_ok < 0) { + unsigned char tmp_reg; + pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY)); + local_flush_tlb(); + __asm__ __volatile__( + "movb %0,%1 ; movb %1,%0" + :"=m" (*(char *) __va(0)), + "=q" (tmp_reg) + :/* no inputs */ + :"memory"); + pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_KERNEL)); + local_flush_tlb(); + if (wp_works_ok < 0) + wp_works_ok = 0; + } return; } @@ -220,18 +292,18 @@ void si_meminfo(struct sysinfo *val) { int i; - i = high_memory >> PAGE_SHIFT; + i = max_mapnr; val->totalram = 0; val->sharedram = 0; val->freeram = nr_free_pages << PAGE_SHIFT; val->bufferram = buffermem; while (i-- > 0) { - if (mem_map[i] & MAP_PAGE_RESERVED) + if (PageReserved(mem_map+i)) continue; val->totalram++; - if (!mem_map[i]) + if (!mem_map[i].count) continue; - val->sharedram += mem_map[i]-1; + val->sharedram += mem_map[i].count-1; } val->totalram <<= PAGE_SHIFT; val->sharedram <<= PAGE_SHIFT; diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c new file mode 100644 index 000000000..42b386c1c --- /dev/null +++ b/arch/i386/mm/ioremap.c @@ -0,0 +1,109 @@ +/* + * arch/i386/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ + +#include <linux/vmalloc.h> + +#include <asm/io.h> + +static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, + unsigned long phys_addr) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + do { + if (!pte_none(*pte)) + printk("remap_area_pte: page already exists\n"); + set_pte(pte, mk_pte_phys(phys_addr, PAGE_KERNEL)); + address += PAGE_SIZE; + phys_addr += PAGE_SIZE; + pte++; + } while (address < end); +} + +static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long phys_addr) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + phys_addr -= address; + do { + pte_t * pte = pte_alloc_kernel(pmd, address); + if (!pte) + return -ENOMEM; + remap_area_pte(pte, address, end - address, address + phys_addr); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); + return 0; +} + +static int remap_area_pages(unsigned long address, unsigned long phys_addr, unsigned long size) +{ + pgd_t * dir; + unsigned long end = address + size; + + phys_addr -= address; + dir = pgd_offset(&init_mm, address); + flush_cache_all(); + while (address < end) { + pmd_t *pmd = pmd_alloc_kernel(dir, address); + if (!pmd) + return -ENOMEM; + if (remap_area_pmd(pmd, address, end - address, phys_addr + address)) + return -ENOMEM; + set_pgdir(address, *dir); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } + flush_tlb_all(); + return 0; +} + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + */ +void * ioremap(unsigned long phys_addr, unsigned long size) +{ + void * addr; + struct vm_struct * area; + + if (phys_addr < virt_to_phys(high_memory)) + return phys_to_virt(phys_addr); + if (phys_addr & ~PAGE_MASK) + return NULL; + size = PAGE_ALIGN(size); + if (!size || size > phys_addr + size) + return NULL; + area = get_vm_area(size); + if (!area) + return NULL; + addr = area->addr; + if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size)) { + vfree(addr); + return NULL; + } + return addr; +} + +void iounmap(void *addr) +{ + if (addr > high_memory) + return vfree(addr); +} |