diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-16 01:07:24 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-16 01:07:24 +0000 |
commit | 95db6b748fc86297827fbd9c9ef174d491c9ad89 (patch) | |
tree | 27a92a942821cde1edda9a1b088718d436b3efe4 /arch/i386 | |
parent | 45b27b0a0652331d104c953a5b192d843fff88f8 (diff) |
Merge with Linux 2.3.40.
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/config.in | 18 | ||||
-rw-r--r-- | arch/i386/defconfig | 8 | ||||
-rw-r--r-- | arch/i386/kernel/Makefile | 6 | ||||
-rw-r--r-- | arch/i386/kernel/acpi.c | 120 | ||||
-rw-r--r-- | arch/i386/kernel/apic.c | 656 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 59 | ||||
-rw-r--r-- | arch/i386/kernel/i386_ksyms.c | 7 | ||||
-rw-r--r-- | arch/i386/kernel/i8259.c | 22 | ||||
-rw-r--r-- | arch/i386/kernel/io_apic.c | 213 | ||||
-rw-r--r-- | arch/i386/kernel/irq.c | 20 | ||||
-rw-r--r-- | arch/i386/kernel/mpparse.c | 514 | ||||
-rw-r--r-- | arch/i386/kernel/pci-i386.c | 87 | ||||
-rw-r--r-- | arch/i386/kernel/pci-pc.c | 16 | ||||
-rw-r--r-- | arch/i386/kernel/ptrace.c | 387 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 60 | ||||
-rw-r--r-- | arch/i386/kernel/signal.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/smp.c | 486 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 946 | ||||
-rw-r--r-- | arch/i386/kernel/time.c | 10 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 24 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 14 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 23 | ||||
-rw-r--r-- | arch/i386/vmlinux.lds | 6 |
23 files changed, 1894 insertions, 1810 deletions
diff --git a/arch/i386/config.in b/arch/i386/config.in index 654602855..a32b43c65 100644 --- a/arch/i386/config.in +++ b/arch/i386/config.in @@ -7,6 +7,8 @@ mainmenu_name "Linux Kernel Configuration" define_bool CONFIG_X86 y define_bool CONFIG_ISA y +define_bool CONFIG_UID16 y + mainmenu_option next_comment comment 'Code maturity level options' bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL @@ -35,6 +37,7 @@ if [ "$CONFIG_M686" = "y" -o "$CONFIG_M586TSC" = "y" ]; then fi if [ "$CONFIG_M686" = "y" ]; then define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y fi if [ "$CONFIG_MK7" = "y" ]; then define_bool CONFIG_X86_TSC y @@ -57,6 +60,13 @@ fi bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +if [ "$CONFIG_SMP" != "y" ]; then + bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC + if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then + define_bool CONFIG_X86_IO_APIC y + define_bool CONFIG_X86_LOCAL_APIC y + fi +fi endmenu mainmenu_option next_comment @@ -103,7 +113,11 @@ if [ "$CONFIG_VISWS" != "y" ]; then bool 'MCA support' CONFIG_MCA fi -source drivers/pcmcia/Config.in +bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG + +if [ "$CONFIG_HOTPLUG" = "y" ] ; then + source drivers/pcmcia/Config.in +fi bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT @@ -163,6 +177,8 @@ if [ "$CONFIG_SCSI" != "n" ]; then fi endmenu +source drivers/ieee1394/Config.in + source drivers/i2o/Config.in if [ "$CONFIG_NET" = "y" ]; then diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 9b248a6be..91ca9739b 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -3,6 +3,7 @@ # CONFIG_X86=y CONFIG_ISA=y +CONFIG_UID16=y # # Code maturity level options @@ -24,6 +25,7 @@ CONFIG_X86_BSWAP=y CONFIG_X86_POPAD_OK=y CONFIG_X86_TSC=y CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y CONFIG_NOHIGHMEM=y # CONFIG_HIGHMEM4G is not set # CONFIG_HIGHMEM64G is not set @@ -53,13 +55,13 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_NAMES=y # CONFIG_MCA is not set +CONFIG_HOTPLUG=y # # PCMCIA/CardBus support # CONFIG_PCMCIA=y CONFIG_CARDBUS=y -CONFIG_YENTA=y # CONFIG_I82365 is not set CONFIG_TCIC=y CONFIG_SYSVIPC=y @@ -102,11 +104,13 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_CMD640=y # CONFIG_BLK_DEV_CMD640_ENHANCED is not set +# CONFIG_BLK_DEV_ISAPNP is not set CONFIG_BLK_DEV_RZ1000=y CONFIG_BLK_DEV_IDEPCI=y # CONFIG_BLK_DEV_IDEDMA_PCI is not set # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_AEC6210 is not set +# CONFIG_BLK_DEV_CMD64X is not set # CONFIG_IDE_CHIPSETS is not set # CONFIG_BLK_CPQ_DA is not set @@ -314,6 +318,7 @@ CONFIG_PCMCIA_PCNET=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_AIRONET4500_CS is not set +# CONFIG_ARCNET_COM20020_CS is not set # CONFIG_PCMCIA_3C575 is not set # CONFIG_PCMCIA_TULIP is not set # CONFIG_PCMCIA_EPIC100 is not set @@ -397,6 +402,7 @@ CONFIG_PSMOUSE=y CONFIG_DRM=y CONFIG_DRM_TDFX=y # CONFIG_DRM_GAMMA is not set +CONFIG_PCMCIA_SERIAL=y # # PCMCIA character device support diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index e60e15620..a59e7c71b 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -55,8 +55,12 @@ ifdef CONFIG_SMP O_OBJS += smp.o smpboot.o trampoline.o endif +ifdef CONFIG_X86_LOCAL_APIC +O_OBJS += apic.o +endif + ifdef CONFIG_X86_IO_APIC -O_OBJS += io_apic.o +O_OBJS += io_apic.o mpparse.o endif ifdef CONFIG_X86_VISWS_APIC diff --git a/arch/i386/kernel/acpi.c b/arch/i386/kernel/acpi.c index 49444d258..3fd1704e3 100644 --- a/arch/i386/kernel/acpi.c +++ b/arch/i386/kernel/acpi.c @@ -172,6 +172,14 @@ static struct ctl_table acpi_table[] = &acpi_p_lvl3_lat, sizeof(acpi_p_lvl3_lat), 0644, NULL, &acpi_do_ulong}, + {ACPI_P_LVL2_LAT, "enter_lvl2_lat", + &acpi_enter_lvl2_lat, sizeof(acpi_enter_lvl2_lat), + 0644, NULL, &acpi_do_ulong}, + + {ACPI_ENTER_LVL3_LAT, "enter_lvl3_lat", + &acpi_enter_lvl3_lat, sizeof(acpi_enter_lvl3_lat), + 0644, NULL, &acpi_do_ulong}, + {ACPI_S0_SLP_TYP, "s0_slp_typ", &acpi_slp_typ[ACPI_S0], sizeof(acpi_slp_typ[ACPI_S0]), 0600, NULL, &acpi_do_ulong}, @@ -195,6 +203,17 @@ static struct ctl_table acpi_dir_table[] = {0} }; +static u32 FASTCALL(acpi_read_pm1_control(struct acpi_facp *)); +static u32 FASTCALL(acpi_read_pm1_status(struct acpi_facp *)); +static u32 FASTCALL(acpi_read_pm1_enable(struct acpi_facp *)); +static u32 FASTCALL(acpi_read_gpe_status(struct acpi_facp *)); +static u32 FASTCALL(acpi_read_gpe_enable(struct acpi_facp *)); + +static void FASTCALL(acpi_write_pm1_control(struct acpi_facp *, u32)); +static void FASTCALL(acpi_write_pm1_status(struct acpi_facp *, u32)); +static void FASTCALL(acpi_write_pm1_enable(struct acpi_facp *, u32)); +static void FASTCALL(acpi_write_gpe_status(struct acpi_facp *, u32)); +static void FASTCALL(acpi_write_gpe_enable(struct acpi_facp *, u32)); /* * Get the value of the PM1 control register (SCI_EN, ...) @@ -643,43 +662,49 @@ static int acpi_disable(struct acpi_facp *facp) return 0; } -static inline int bm_activity(void) +static inline int bm_activity(struct acpi_facp *facp) { - return 0 && acpi_read_pm1_status(acpi_facp) & ACPI_BM; + return acpi_read_pm1_status(facp) & ACPI_BM; } -static inline void clear_bm_activity(void) +static inline void clear_bm_activity(struct acpi_facp *facp) { - acpi_write_pm1_status(acpi_facp, ACPI_BM); + acpi_write_pm1_status(facp, ACPI_BM); } -static void sleep_on_busmaster(void) +static void sleep_on_busmaster(struct acpi_facp *facp) { - u32 pm1_cntr = acpi_read_pm1_control(acpi_facp); + u32 pm1_cntr = acpi_read_pm1_control(facp); if (pm1_cntr & ACPI_BM_RLD) { pm1_cntr &= ~ACPI_BM_RLD; - acpi_write_pm1_control(acpi_facp, pm1_cntr); + acpi_write_pm1_control(facp, pm1_cntr); } } -static void wake_on_busmaster(void) +static void wake_on_busmaster(struct acpi_facp *facp) { - u32 pm1_cntr = acpi_read_pm1_control(acpi_facp); + u32 pm1_cntr = acpi_read_pm1_control(facp); if (!(pm1_cntr & ACPI_BM_RLD)) { pm1_cntr |= ACPI_BM_RLD; - acpi_write_pm1_control(acpi_facp, pm1_cntr); + acpi_write_pm1_control(facp, pm1_cntr); } - clear_bm_activity(); + clear_bm_activity(facp); } +/* The ACPI timer is just the low 24 bits */ +#define TIME_BEGIN(tmr) inl(tmr) +#define TIME_END(tmr, begin) ((inl(tmr) - (begin)) & 0x00ffffff) + + /* * Idle loop (uniprocessor only) */ static void acpi_idle_handler(void) { static int sleep_level = 1; + struct acpi_facp *facp = acpi_facp; - if (!acpi_facp->pm_tmr || !acpi_p_blk) + if (!facp || !facp->pm_tmr || !acpi_p_blk) goto not_initialized; /* @@ -687,7 +712,7 @@ static void acpi_idle_handler(void) */ if (sleep_level == 1) goto sleep1; - if (sleep_level == 2 || bm_activity()) + if (sleep_level == 2) goto sleep2; sleep3: sleep_level = 3; @@ -695,37 +720,53 @@ sleep3: printk("ACPI C3 works\n"); acpi_p_lvl3_tested = 1; } - wake_on_busmaster(); - if (acpi_facp->pm2_cnt) + wake_on_busmaster(facp); + if (facp->pm2_cnt) goto sleep3_with_arbiter; for (;;) { unsigned long time; + unsigned int pm_tmr = facp->pm_tmr; + __cli(); if (current->need_resched) goto out; - time = inl(acpi_facp->pm_tmr); + if (bm_activity(facp)) + goto sleep2; + + time = TIME_BEGIN(pm_tmr); inb(acpi_p_blk + ACPI_P_LVL3); - time = inl(acpi_facp->pm_tmr) - time; + inl(pm_tmr); /* Dummy read, force synchronization with the PMU */ + time = TIME_END(pm_tmr, time); + __sti(); - if (time > acpi_p_lvl3_lat || bm_activity()) + if (time < acpi_p_lvl3_lat) goto sleep2; } sleep3_with_arbiter: for (;;) { unsigned long time; - unsigned int pm2_cntr = acpi_facp->pm2_cnt; + u8 arbiter; + unsigned int pm2_cntr = facp->pm2_cnt; + unsigned int pm_tmr = facp->pm_tmr; + __cli(); if (current->need_resched) goto out; - time = inl(acpi_facp->pm_tmr); - outb(inb(pm2_cntr) | ACPI_ARB_DIS, pm2_cntr); + if (bm_activity(facp)) + goto sleep2; + + time = TIME_BEGIN(pm_tmr); + arbiter = inb(pm2_cntr) & ~ACPI_ARB_DIS; + outb(arbiter | ACPI_ARB_DIS, pm2_cntr); /* Disable arbiter, park on CPU */ inb(acpi_p_blk + ACPI_P_LVL3); - outb(inb(pm2_cntr) & ~ACPI_ARB_DIS, pm2_cntr); - time = inl(acpi_facp->pm_tmr) - time; + inl(pm_tmr); /* Dummy read, force synchronization with the PMU */ + time = TIME_END(pm_tmr, time); + outb(arbiter, pm2_cntr); /* Enable arbiter again.. */ + __sti(); - if (time > acpi_p_lvl3_lat || bm_activity()) + if (time < acpi_p_lvl3_lat) goto sleep2; } @@ -735,38 +776,45 @@ sleep2: printk("ACPI C2 works\n"); acpi_p_lvl2_tested = 1; } - wake_on_busmaster(); /* Required to track BM activity.. */ + wake_on_busmaster(facp); /* Required to track BM activity.. */ for (;;) { unsigned long time; + unsigned int pm_tmr = facp->pm_tmr; + __cli(); if (current->need_resched) goto out; - time = inl(acpi_facp->pm_tmr); + + time = TIME_BEGIN(pm_tmr); inb(acpi_p_blk + ACPI_P_LVL2); - time = inl(acpi_facp->pm_tmr) - time; + inl(pm_tmr); /* Dummy read, force synchronization with the PMU */ + time = TIME_END(pm_tmr, time); + __sti(); - if (time > acpi_p_lvl2_lat) + if (time < acpi_p_lvl2_lat) goto sleep1; - if (bm_activity()) { - clear_bm_activity(); + if (bm_activity(facp)) { + clear_bm_activity(facp); continue; } - if (time < acpi_enter_lvl3_lat) + if (time > acpi_enter_lvl3_lat) goto sleep3; } sleep1: sleep_level = 1; - sleep_on_busmaster(); + sleep_on_busmaster(facp); for (;;) { unsigned long time; + unsigned int pm_tmr = facp->pm_tmr; + __cli(); if (current->need_resched) goto out; - time = inl(acpi_facp->pm_tmr); + time = TIME_BEGIN(pm_tmr); __asm__ __volatile__("sti ; hlt": : :"memory"); - time = inl(acpi_facp->pm_tmr) - time; - if (time < acpi_enter_lvl2_lat) + time = TIME_END(pm_tmr, time); + if (time > acpi_enter_lvl2_lat) goto sleep2; } @@ -1221,7 +1269,7 @@ static int __init acpi_init(void) * do this with multiple CPU's, we'd need a per-CPU ACPI * device.. */ -#ifdef __SMP__ +#ifdef CONFIG_SMP if (smp_num_cpus > 1) return 0; #endif diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c new file mode 100644 index 000000000..b8bca05c6 --- /dev/null +++ b/arch/i386/kernel/apic.c @@ -0,0 +1,656 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> + * + */ + +#include <linux/config.h> +#include <linux/init.h> + +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/mc146818rtc.h> +#include <linux/kernel_stat.h> + +#include <asm/smp.h> +#include <asm/mtrr.h> +#include <asm/mpspec.h> +#include <asm/pgalloc.h> + +int prof_multiplier[NR_CPUS] = { 1, }; +int prof_old_multiplier[NR_CPUS] = { 1, }; +int prof_counter[NR_CPUS] = { 1, }; + +/* + * IA s/w dev Vol 3, Section 7.4 + */ +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +int get_maxlvt(void) +{ + unsigned int v, ver, maxlvt; + + v = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(v); + /* 82489DXs do not report # of LVT entries. */ + maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; + return maxlvt; +} + +void disable_local_APIC (void) +{ + unsigned long value; + int maxlvt; + + /* + * Disable APIC + */ + value = apic_read(APIC_SPIV); + value &= ~(1<<8); + apic_write(APIC_SPIV,value); + + /* + * Clean APIC state for other OSs: + */ + value = apic_read(APIC_SPIV); + value &= ~(1<<8); + apic_write(APIC_SPIV,value); + maxlvt = get_maxlvt(); + apic_write_around(APIC_LVTT, 0x00010000); + apic_write_around(APIC_LVT0, 0x00010000); + apic_write_around(APIC_LVT1, 0x00010000); + if (maxlvt >= 3) + apic_write_around(APIC_LVTERR, 0x00010000); + if (maxlvt >= 4) + apic_write_around(APIC_LVTPC, 0x00010000); +} + +extern void __error_in_apic_c (void); + +void __init setup_local_APIC (void) +{ + unsigned long value, ver, maxlvt; + + if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) + __error_in_apic_c(); + + /* + * Double-check wether this APIC is really registered. + */ + if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map)) + BUG(); + + value = apic_read(APIC_SPIV); + /* + * Enable APIC + */ + value |= (1<<8); + + /* + * Some unknown Intel IO/APIC (or APIC) errata is biting us with + * certain networking cards. If high frequency interrupts are + * happening on a particular IOAPIC pin, plus the IOAPIC routing + * entry is masked/unmasked at a high rate as well then sooner or + * later IOAPIC line gets 'stuck', no more interrupts are received + * from the device. If focus CPU is disabled then the hang goes + * away, oh well :-( + * + * [ This bug can be reproduced easily with a level-triggered + * PCI Ne2000 networking cards and PII/PIII processors, dual + * BX chipset. ] + */ +#if 0 + /* Enable focus processor (bit==0) */ + value &= ~(1<<9); +#else + /* Disable focus processor (bit==1) */ + value |= (1<<9); +#endif + /* + * Set spurious IRQ vector + */ + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV,value); + + /* + * Set up LVT0, LVT1: + * + * set up through-local-APIC on the BP's LINT0. This is not + * strictly necessery in pure symmetric-IO mode, but sometimes + * we delegate interrupts to the 8259A. + */ + if (!smp_processor_id()) { + value = 0x00000700; + printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); + } else { + value = 0x00010700; + printk("masked ExtINT on CPU#%d\n", smp_processor_id()); + } + apic_write_around(APIC_LVT0,value); + + /* + * only the BP should see the LINT1 NMI signal, obviously. + */ + if (!smp_processor_id()) + value = 0x00000400; // unmask NMI + else + value = 0x00010400; // mask NMI + apic_write_around(APIC_LVT1,value); + + value = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(value); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + maxlvt = get_maxlvt(); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_readaround(APIC_SPIV); // not strictly necessery + apic_write(APIC_ESR, 0); + } + value = apic_read(APIC_ESR); + printk("ESR value before enabling vector: %08lx\n", value); + + value = apic_read(APIC_LVTERR); + value = ERROR_APIC_VECTOR; // enables sending errors + apic_write(APIC_LVTERR,value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt != 3) { + apic_readaround(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + value = apic_read(APIC_ESR); + printk("ESR value after enabling vector: %08lx\n", value); + } else + printk("No ESR for 82489DX.\n"); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write(APIC_TASKPRI,value); + + /* + * Set up the logical destination ID and put the + * APIC into flat delivery mode. + */ + value = apic_read(APIC_LDR); + value &= ~APIC_LDR_MASK; + value |= (1<<(smp_processor_id()+24)); + apic_write(APIC_LDR,value); + + value = apic_read(APIC_DFR); + value |= SET_APIC_DFR(0xf); + apic_write(APIC_DFR, value); +} + +void __init init_apic_mappings(void) +{ + unsigned long apic_phys; + + if (smp_found_config) { + apic_phys = mp_lapic_addr; + } else { + /* + * set up a fake all zeroes page to simulate the + * local APIC and another one for the IO-APIC. We + * could use the real zero-page, but it's safer + * this way if some buggy code writes to this page ... + */ + apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = __pa(apic_phys); + } + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); + +#ifdef CONFIG_X86_IO_APIC + { + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mpc_apicaddr; + } else { + ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = __pa(ioapic_phys); + } + set_fixmap_nocache(idx, ioapic_phys); + Dprintk("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + } + } +#endif +} + +/* + * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts + * per second. We assume that the caller has already set up the local + * APIC. + * + * The APIC timer is not exactly sync with the external timer chip, it + * closely follows bus clocks. + */ + +/* + * The timer chip is already set up at HZ interrupts per second here, + * but we do not accept timer interrupts yet. We only allow the BP + * to calibrate. + */ +static unsigned int __init get_8254_timer_count(void) +{ + extern rwlock_t xtime_lock; + unsigned long flags; + + unsigned int count; + + write_lock_irqsave(&xtime_lock, flags); + + outb_p(0x00, 0x43); + count = inb_p(0x40); + count |= inb_p(0x40) << 8; + + write_unlock_irqrestore(&xtime_lock, flags); + + return count; +} + +void __init wait_8254_wraparound(void) +{ + unsigned int curr_count, prev_count=~0; + int delta; + + curr_count = get_8254_timer_count(); + + do { + prev_count = curr_count; + curr_count = get_8254_timer_count(); + delta = curr_count-prev_count; + + /* + * This limit for delta seems arbitrary, but it isn't, it's + * slightly above the level of error a buggy Mercury/Neptune + * chipset timer can cause. + */ + + } while (delta<300); +} + +/* + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. + */ + +#define APIC_DIVISOR 16 + +void __setup_APIC_LVTT(unsigned int clocks) +{ + unsigned int lvtt1_value, tmp_value; + + tmp_value = apic_read(APIC_LVTT); + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | + APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + apic_write(APIC_LVTT, lvtt1_value); + + /* + * Divide PICLK by 16 + */ + tmp_value = apic_read(APIC_TDCR); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); + + tmp_value = apic_read(APIC_TMICT); + apic_write(APIC_TMICT, clocks/APIC_DIVISOR); +} + +void setup_APIC_timer(void * data) +{ + unsigned int clocks = (unsigned int) data, slice, t0, t1; + unsigned long flags; + int delta; + + __save_flags(flags); + __sti(); + /* + * ok, Intel has some smart code in their APIC that knows + * if a CPU was in 'hlt' lowpower mode, and this increases + * its APIC arbitration priority. To avoid the external timer + * IRQ APIC event being in synchron with the APIC clock we + * introduce an interrupt skew to spread out timer events. + * + * The number of slices within a 'big' timeslice is smp_num_cpus+1 + */ + + slice = clocks / (smp_num_cpus+1); + printk("cpu: %d, clocks: %d, slice: %d\n", + smp_processor_id(), clocks, slice); + + /* + * Wait for IRQ0's slice: + */ + wait_8254_wraparound(); + + __setup_APIC_LVTT(clocks); + + t0 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + do { + t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); + } while (delta < 0); + + __setup_APIC_LVTT(clocks); + + printk("CPU%d<C0:%d,C:%d,D:%d,S:%d,C:%d>\n", + smp_processor_id(), t0, t1, delta, slice, clocks); + + __restore_flags(flags); +} + +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +int __init calibrate_APIC_clock(void) +{ + unsigned long long t1 = 0, t2 = 0; + long tt1, tt2; + long result; + int i; + const int LOOPS = HZ/10; + + printk("calibrating APIC timer ... "); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + */ + __setup_APIC_LVTT(1000000000); + + /* + * The timer chip counts down to zero. Let's wait + * for a wraparound to start exact measurement: + * (the current tick might have been already half done) + */ + + wait_8254_wraparound(); + + /* + * We wrapped around just now. Let's start: + */ + if (cpu_has_tsc) + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); + + /* + * Let's wait LOOPS wraprounds: + */ + for (i = 0; i < LOOPS; i++) + wait_8254_wraparound(); + + tt2 = apic_read(APIC_TMCCT); + if (cpu_has_tsc) + rdtscll(t2); + + /* + * The APIC bus clock counter is 32 bits only, it + * might have overflown, but note that we use signed + * longs, thus no extra care needed. + * + * underflown to be exact, as the timer counts down ;) + */ + + result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + + if (cpu_has_tsc) + printk("\n..... CPU clock speed is %ld.%04ld MHz.\n", + ((long)(t2-t1)/LOOPS)/(1000000/HZ), + ((long)(t2-t1)/LOOPS)%(1000000/HZ)); + + printk("..... host bus clock speed is %ld.%04ld MHz.\n", + result/(1000000/HZ), + result%(1000000/HZ)); + + return result; +} + +static unsigned int calibration_result; + +void __init setup_APIC_clocks (void) +{ + __cli(); + + calibration_result = calibrate_APIC_clock(); + /* + * Now set up the timer for real. + */ + setup_APIC_timer((void *)calibration_result); + + __sti(); + + /* and update all other cpus */ + smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); +} + +/* + * the frequency of the profiling timer can be changed + * by writing a multiplier value into /proc/profile. + */ +int setup_profiling_timer(unsigned int multiplier) +{ + int i; + + /* + * Sanity check. [at least 500 APIC cycles should be + * between APIC interrupts as a rule of thumb, to avoid + * irqs flooding us] + */ + if ( (!multiplier) || (calibration_result/multiplier < 500)) + return -EINVAL; + + /* + * Set the new multiplier for each CPU. CPUs don't start using the + * new values until the next timer interrupt in which they do process + * accounting. At that time they also adjust their APIC timers + * accordingly. + */ + for (i = 0; i < NR_CPUS; ++i) + prof_multiplier[i] = multiplier; + + return 0; +} + +#undef APIC_DIVISOR + +/* + * Local timer interrupt handler. It does both profiling and + * process statistics/rescheduling. + * + * We do profiling in every local tick, statistics/rescheduling + * happen only every 'profiling multiplier' ticks. The default + * multiplier is 1 and it can be changed by writing the new multiplier + * value into /proc/profile. + */ + +inline void smp_local_timer_interrupt(struct pt_regs * regs) +{ + int user = (user_mode(regs) != 0); + int cpu = smp_processor_id(); + + /* + * The profiling function is SMP safe. (nothing can mess + * around with "current", and the profiling counters are + * updated with atomic operations). This is especially + * useful with a profiling multiplier != 1 + */ + if (!user) + x86_do_profile(regs->eip); + + if (--prof_counter[cpu] <= 0) { + int system = 1 - user; + struct task_struct * p = current; + + /* + * The multiplier may have changed since the last time we got + * to this point as a result of the user writing to + * /proc/profile. In this case we need to adjust the APIC + * timer accordingly. + * + * Interrupts are already masked off at this point. + */ + prof_counter[cpu] = prof_multiplier[cpu]; + if (prof_counter[cpu] != prof_old_multiplier[cpu]) { + __setup_APIC_LVTT(calibration_result/prof_counter[cpu]); + prof_old_multiplier[cpu] = prof_counter[cpu]; + } + + /* + * After doing the above, we need to make like + * a normal interrupt - otherwise timer interrupts + * ignore the global interrupt lock, which is the + * WrongThing (tm) to do. + */ + + irq_enter(cpu, 0); + update_one_process(p, 1, user, system, cpu); + if (p->pid) { + p->counter -= 1; + if (p->counter <= 0) { + p->counter = 0; + p->need_resched = 1; + } + if (p->priority < DEF_PRIORITY) { + kstat.cpu_nice += user; + kstat.per_cpu_nice[cpu] += user; + } else { + kstat.cpu_user += user; + kstat.per_cpu_user[cpu] += user; + } + kstat.cpu_system += system; + kstat.per_cpu_system[cpu] += system; + + } + irq_exit(cpu, 0); + } + + /* + * We take the 'long' return path, and there every subsystem + * grabs the apropriate locks (kernel lock/ irq lock). + * + * we might want to decouple profiling from the 'long path', + * and do the profiling totally in assembly. + * + * Currently this isn't too much of an issue (performance wise), + * we can take more than 100K local irqs per second on a 100 MHz P5. + */ +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesnt support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +unsigned int apic_timer_irqs [NR_CPUS] = { 0, }; + +void smp_apic_timer_interrupt(struct pt_regs * regs) +{ + /* + * the NMI deadlock-detector uses this. + */ + apic_timer_irqs[smp_processor_id()]++; + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + smp_local_timer_interrupt(regs); +} + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) +{ + ack_APIC_irq(); + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk("spurious APIC interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ + +static spinlock_t err_lock = SPIN_LOCK_UNLOCKED; + +asmlinkage void smp_error_interrupt(void) +{ + unsigned long v; + + spin_lock(&err_lock); + + v = apic_read(APIC_ESR); + printk("APIC error interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); + printk("... APIC ESR0: %08lx\n", v); + + apic_write(APIC_ESR, 0); + v |= apic_read(APIC_ESR); + printk("... APIC ESR1: %08lx\n", v); + /* + * Be a bit more verbose. (multiple bits can be set) + */ + if (v & 0x01) + printk("... bit 0: APIC Send CS Error (hw problem).\n"); + if (v & 0x02) + printk("... bit 1: APIC Receive CS Error (hw problem).\n"); + if (v & 0x04) + printk("... bit 2: APIC Send Accept Error.\n"); + if (v & 0x08) + printk("... bit 3: APIC Receive Accept Error.\n"); + if (v & 0x10) + printk("... bit 4: Reserved!.\n"); + if (v & 0x20) + printk("... bit 5: Send Illegal Vector (kernel bug).\n"); + if (v & 0x40) + printk("... bit 6: Received Illegal Vector.\n"); + if (v & 0x80) + printk("... bit 7: Illegal Register Address.\n"); + + ack_APIC_irq(); + + irq_err_count++; + + spin_unlock(&err_lock); +} + diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index ecfe0697d..e91602aba 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -416,15 +416,15 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_time) .long SYMBOL_NAME(sys_mknod) .long SYMBOL_NAME(sys_chmod) /* 15 */ - .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_lchown16) .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ .long SYMBOL_NAME(sys_stat) .long SYMBOL_NAME(sys_lseek) .long SYMBOL_NAME(sys_getpid) /* 20 */ .long SYMBOL_NAME(sys_mount) .long SYMBOL_NAME(sys_oldumount) - .long SYMBOL_NAME(sys_setuid) - .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_setuid16) + .long SYMBOL_NAME(sys_getuid16) .long SYMBOL_NAME(sys_stime) /* 25 */ .long SYMBOL_NAME(sys_ptrace) .long SYMBOL_NAME(sys_alarm) @@ -446,11 +446,11 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_times) .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ .long SYMBOL_NAME(sys_brk) /* 45 */ - .long SYMBOL_NAME(sys_setgid) - .long SYMBOL_NAME(sys_getgid) + .long SYMBOL_NAME(sys_setgid16) + .long SYMBOL_NAME(sys_getgid16) .long SYMBOL_NAME(sys_signal) - .long SYMBOL_NAME(sys_geteuid) - .long SYMBOL_NAME(sys_getegid) /* 50 */ + .long SYMBOL_NAME(sys_geteuid16) + .long SYMBOL_NAME(sys_getegid16) /* 50 */ .long SYMBOL_NAME(sys_acct) .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ @@ -470,8 +470,8 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_sigaction) .long SYMBOL_NAME(sys_sgetmask) .long SYMBOL_NAME(sys_ssetmask) - .long SYMBOL_NAME(sys_setreuid) /* 70 */ - .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_setreuid16) /* 70 */ + .long SYMBOL_NAME(sys_setregid16) .long SYMBOL_NAME(sys_sigsuspend) .long SYMBOL_NAME(sys_sigpending) .long SYMBOL_NAME(sys_sethostname) @@ -480,8 +480,8 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_getrusage) .long SYMBOL_NAME(sys_gettimeofday) .long SYMBOL_NAME(sys_settimeofday) - .long SYMBOL_NAME(sys_getgroups) /* 80 */ - .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_getgroups16) /* 80 */ + .long SYMBOL_NAME(sys_setgroups16) .long SYMBOL_NAME(old_select) .long SYMBOL_NAME(sys_symlink) .long SYMBOL_NAME(sys_lstat) @@ -495,7 +495,7 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_truncate) .long SYMBOL_NAME(sys_ftruncate) .long SYMBOL_NAME(sys_fchmod) - .long SYMBOL_NAME(sys_fchown) /* 95 */ + .long SYMBOL_NAME(sys_fchown16) /* 95 */ .long SYMBOL_NAME(sys_getpriority) .long SYMBOL_NAME(sys_setpriority) .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ @@ -538,8 +538,8 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_sysfs) /* 135 */ .long SYMBOL_NAME(sys_personality) .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ - .long SYMBOL_NAME(sys_setfsuid) - .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_setfsuid16) + .long SYMBOL_NAME(sys_setfsgid16) .long SYMBOL_NAME(sys_llseek) /* 140 */ .long SYMBOL_NAME(sys_getdents) .long SYMBOL_NAME(sys_select) @@ -564,14 +564,14 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_sched_rr_get_interval) .long SYMBOL_NAME(sys_nanosleep) .long SYMBOL_NAME(sys_mremap) - .long SYMBOL_NAME(sys_setresuid) - .long SYMBOL_NAME(sys_getresuid) /* 165 */ + .long SYMBOL_NAME(sys_setresuid16) + .long SYMBOL_NAME(sys_getresuid16) /* 165 */ .long SYMBOL_NAME(sys_vm86) .long SYMBOL_NAME(sys_query_module) .long SYMBOL_NAME(sys_poll) .long SYMBOL_NAME(sys_nfsservctl) - .long SYMBOL_NAME(sys_setresgid) /* 170 */ - .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_setresgid16) /* 170 */ + .long SYMBOL_NAME(sys_getresgid16) .long SYMBOL_NAME(sys_prctl) .long SYMBOL_NAME(sys_rt_sigreturn) .long SYMBOL_NAME(sys_rt_sigaction) @@ -582,7 +582,7 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_rt_sigsuspend) .long SYMBOL_NAME(sys_pread) /* 180 */ .long SYMBOL_NAME(sys_pwrite) - .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_chown16) .long SYMBOL_NAME(sys_getcwd) .long SYMBOL_NAME(sys_capget) .long SYMBOL_NAME(sys_capset) /* 185 */ @@ -598,6 +598,25 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_stat64) /* 195 */ .long SYMBOL_NAME(sys_lstat64) .long SYMBOL_NAME(sys_fstat64) + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_getgid) /* 200 */ + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) + .long SYMBOL_NAME(sys_setreuid) + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_getgroups) /* 205 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_fchown) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) + .long SYMBOL_NAME(sys_setresgid) /* 210 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_setfsuid) /* 215 */ + .long SYMBOL_NAME(sys_setfsgid) /* @@ -606,6 +625,6 @@ ENTRY(sys_call_table) * entries. Don't panic if you notice that this hasn't * been shrunk every time we add a new system call. */ - .rept NR_syscalls-197 + .rept NR_syscalls-216 .long SYMBOL_NAME(sys_ni_syscall) .endr diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 9f75d94bf..2df82ff21 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -23,7 +23,7 @@ extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(elf_fpregset_t *); -#ifdef __SMP__ +#ifdef CONFIG_SMP extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); #endif @@ -92,13 +92,10 @@ EXPORT_SYMBOL(mmx_clear_page); EXPORT_SYMBOL(mmx_copy_page); #endif -#ifdef __SMP__ +#ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(kernel_flag); -EXPORT_SYMBOL(cpu_number_map); -EXPORT_SYMBOL(__cpu_logical_map); EXPORT_SYMBOL(smp_num_cpus); -EXPORT_SYMBOL(cpu_present_map); EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL_NOVERS(__write_lock_failed); EXPORT_SYMBOL_NOVERS(__read_lock_failed); diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 3e9097f06..c62e5c2d2 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -71,17 +71,16 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) #undef BI -#ifdef __SMP__ /* * The following vectors are part of the Linux architecture, there * is no hardware IRQ pin equivalent for them, they are triggered * through the ICC by us (IPIs) */ +#ifdef CONFIG_SMP BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) -BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) +#endif /* * every pentium local APIC has two 'local interrupts', with a @@ -90,8 +89,10 @@ BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) * overflow. Linux uses the local APIC timer interrupt to get * a much simpler SMP time architecture: */ +#ifdef CONFIG_X86_LOCAL_APIC BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) - +BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) +BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #endif #define IRQ(x,y) \ @@ -428,8 +429,7 @@ void __init init_IRQ(void) set_intr_gate(vector, interrupt[i]); } -#ifdef __SMP__ - +#ifdef CONFIG_SMP /* * IRQ0 must be given a fixed assignment and initialized, * because it's used before the IO-APIC is set up. @@ -445,16 +445,18 @@ void __init init_IRQ(void) /* IPI for invalidation */ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -#endif +#endif /* * Set the clock to HZ Hz, we already have a valid diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 5ed9255f6..fdd4ecda9 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998, 1999 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! @@ -13,142 +13,35 @@ * and Ingo Molnar <mingo@redhat.com> */ -#include <linux/sched.h> -#include <linux/smp_lock.h> +#include <linux/mm.h> +#include <linux/irq.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/sched.h> +#include <linux/config.h> +#include <linux/smp_lock.h> + #include <asm/io.h> +#include <asm/smp.h> #include <asm/desc.h> -#include <linux/irq.h> - -#undef __init -#define __init - -/* - * volatile is justified in this case, IO-APIC register contents - * might change spontaneously, GCC should not cache it - */ -#define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) - -extern int nmi_watchdog; - -/* - * The structure of the IO-APIC: - */ - -struct IO_APIC_reg_00 { - __u32 __reserved_2 : 24, - ID : 4, - __reserved_1 : 4; -} __attribute__ ((packed)); - -struct IO_APIC_reg_01 { - __u32 version : 8, - __reserved_2 : 8, - entries : 8, - __reserved_1 : 8; -} __attribute__ ((packed)); - -struct IO_APIC_reg_02 { - __u32 __reserved_2 : 24, - arbitration : 4, - __reserved_1 : 4; -} __attribute__ ((packed)); - /* * # of IO-APICs and # of IRQ routing registers */ int nr_ioapics = 0; int nr_ioapic_registers[MAX_IO_APICS]; -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - -struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - union { struct { __u32 - __reserved_1 : 24, - physical_dest : 4, - __reserved_2 : 4; - } physical; - - struct { __u32 - __reserved_1 : 24, - logical_dest : 8; - } logical; - } dest; - -} __attribute__ ((packed)); - -/* - * MP-BIOS irq configuration table structures: - */ +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];/* I/O APIC entries */ -int mp_irq_entries = 0; /* # of MP IRQ source entries */ +/* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - /* MP IRQ source entries */ -int mpc_default_type = 0; /* non-0 if default (table-less) - MP configuration */ - - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - *IO_APIC_BASE(apic) = reg; - return *(IO_APIC_BASE(apic)+4); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - *IO_APIC_BASE(apic) = reg; - *(IO_APIC_BASE(apic)+4) = value; -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - */ -static inline void io_apic_modify(unsigned int apic, unsigned int value) -{ - *(IO_APIC_BASE(apic)+4) = value; -} +/* MP IRQ source entries */ +int mp_irq_entries = 0; -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - (void) *(IO_APIC_BASE(apic)+4); -} +/* non-0 if default (table-less) MP configuration */ +int mpc_default_type = 0; /* * Rough estimation of how many shared IRQs there are, can @@ -157,6 +50,13 @@ static inline void io_apic_sync(unsigned int apic) #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; @@ -239,6 +139,7 @@ static void clear_IO_APIC (void) #define MAX_PIRQS 8 int pirq_entries [MAX_PIRQS]; int pirqs_enabled; +int skip_ioapic_setup = 0; static int __init ioapic_setup(char *str) { @@ -997,7 +898,7 @@ void disable_IO_APIC(void) } } -static void __init setup_ioapic_id(void) +static void __init setup_ioapic_default_id(void) { struct IO_APIC_reg_00 reg_00; @@ -1012,7 +913,7 @@ static void __init setup_ioapic_id(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (cpu_present_map & (1<<0x2)) + if (phys_cpu_present_map & (1<<0x2)) panic("APIC ID 2 already used"); /* @@ -1031,6 +932,47 @@ static void __init setup_ioapic_id(void) panic("could not set ID"); } +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc (void) +{ + struct IO_APIC_reg_00 reg_00; + int apic; + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + *(int *)®_00 = io_apic_read(apic, 0); + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + printk("...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mpc_apicid); + + reg_00.ID = mp_ioapics[apic].mpc_apicid; + io_apic_write(apic, 0, *(int *)®_00); + + /* + * Sanity check + */ + *(int *)®_00 = io_apic_read(apic, 0); + if (reg_00.ID != mp_ioapics[apic].mpc_apicid) + panic("could not set ID!\n"); + else + printk(" ok.\n"); + } +} + static void __init construct_default_ISA_mptable(void) { int i, pos = 0; @@ -1071,7 +1013,7 @@ static void __init construct_default_ISA_mptable(void) mp_irqs[0].mpc_dstirq = 2; } - setup_ioapic_id(); + setup_ioapic_default_id(); } /* @@ -1320,6 +1262,8 @@ static inline void check_timer(void) pin1 = find_timer_pin(mp_INT); pin2 = find_timer_pin(mp_ExtINT); + printk("..TIMER: vector=%d pin1=%d pin2=%d\n", vector, pin1, pin2); + /* * Ok, does IRQ0 through the IOAPIC work? */ @@ -1405,8 +1349,8 @@ void __init setup_IO_APIC(void) { enable_IO_APIC(); - printk("ENABLING IO-APIC IRQs\n"); io_apic_irqs = ~PIC_IRQS; + printk("ENABLING IO-APIC IRQs\n"); /* * If there are no explicit MP IRQ entries, it's either one of the @@ -1422,8 +1366,25 @@ void __init setup_IO_APIC(void) * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS * mptable: */ + setup_ioapic_ids_from_mpc(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); print_IO_APIC(); } + +#ifndef CONFIG_SMP +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +void IO_APIC_init_uniprocessor (void) +{ + if (!smp_found_config) + return; + phys_cpu_present_map = 0xff; + setup_local_APIC(); + setup_IO_APIC(); + setup_APIC_clocks(); +} +#endif diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index a111eb516..317d8a8d7 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -1,8 +1,3 @@ -/* mostly architecture independent - some moved to i8259.c - the beautiful visws architecture code needs to be updated too. - and, finally, the BUILD_IRQ and SMP_BUILD macros in irq.h need fixed. - */ /* * linux/arch/i386/kernel/irq.c * @@ -16,6 +11,8 @@ */ /* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * * IRQs are in fact implemented a bit like signal handlers for the kernel. * Naturally it's not a 1:1 relation, but there are similarities. */ @@ -33,15 +30,16 @@ #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel_stat.h> +#include <linux/irq.h> -#include <asm/system.h> #include <asm/io.h> +#include <asm/smp.h> +#include <asm/system.h> #include <asm/bitops.h> #include <asm/pgalloc.h> #include <asm/delay.h> #include <asm/desc.h> #include <asm/irq.h> -#include <linux/irq.h> unsigned int local_bh_count[NR_CPUS]; @@ -99,7 +97,7 @@ static void ack_none(unsigned int irq) */ #if CONFIG_X86 printk("unexpected IRQ trap at vector %02x\n", irq); -#ifdef __SMP__ +#ifdef CONFIG_X86_LOCAL_APIC /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N @@ -149,7 +147,7 @@ int get_irq_list(char *buf) if (!action) continue; p += sprintf(p, "%3d: ",i); -#ifndef __SMP__ +#ifndef CONFIG_SMP p += sprintf(p, "%10u ", kstat_irqs(i)); #else for (j = 0; j < smp_num_cpus; j++) @@ -186,7 +184,7 @@ int get_irq_list(char *buf) */ spinlock_t i386_bh_lock = SPIN_LOCK_UNLOCKED; -#ifdef __SMP__ +#ifdef CONFIG_SMP unsigned char global_irq_holder = NO_PROC_ID; unsigned volatile int global_irq_lock; atomic_t global_irq_count; @@ -707,7 +705,7 @@ void free_irq(unsigned int irq, void *dev_id) } spin_unlock_irqrestore(&irq_controller_lock,flags); -#ifdef __SMP__ +#ifdef CONFIG_SMP /* Wait to make sure it's not being used on another CPU */ while (irq_desc[irq].status & IRQ_INPROGRESS) barrier(); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c new file mode 100644 index 000000000..977dd18ba --- /dev/null +++ b/arch/i386/kernel/mpparse.c @@ -0,0 +1,514 @@ +/* + * Intel Multiprocessor Specificiation 1.1 and 1.4 + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> + * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> + * + * Fixes + * Erich Boleyn : MP v1.4 and additional changes. + * Alan Cox : Added EBDA scanning + * Ingo Molnar : various cleanups and rewrites + * Maciej W. Rozycki : Bits for genuine 82489DX timers + */ + +#include <linux/mm.h> +#include <linux/irq.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/config.h> +#include <linux/bootmem.h> +#include <linux/smp_lock.h> +#include <linux/kernel_stat.h> +#include <linux/mc146818rtc.h> + +#include <asm/smp.h> +#include <asm/mtrr.h> +#include <asm/mpspec.h> +#include <asm/pgalloc.h> + +/* Have we found an MP table */ +int smp_found_config = 0; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [NR_CPUS]; +int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; +int mp_current_pci_id = 0; +int pic_mode; +unsigned long mp_lapic_addr = 0; + +/* Processor that is doing the boot up */ +unsigned int boot_cpu_id = 0; +/* Internal processor count */ +static unsigned int num_processors = 1; + +/* Bitmask of physically existing CPUs */ +unsigned long phys_cpu_present_map = 0; + +/* + * IA s/w dev Vol 3, Section 7.4 + */ +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +/* + * Intel MP BIOS table parsing routines: + */ + +#ifndef CONFIG_X86_VISWS_APIC +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum=0; + while(len--) + sum+=*mp++; + return sum&0xFF; +} + +/* + * Processor encoding in an MP configuration block + */ + +static char __init *mpc_family(int family,int model) +{ + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" + }; + + switch (family) { + case 0x04: + if (model < 10) + return model_defs[model]; + break; + + case 0x05: + return("Pentium(tm)"); + + case 0x06: + return("Pentium(tm) Pro"); + + case 0x0F: + if (model == 0x0F) + return("Special controller"); + } + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; +} + +static void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family( (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver); + + if (m->mpc_featureflag&(1<<0)) + Dprintk(" Floating point unit present.\n"); + if (m->mpc_featureflag&(1<<7)) + Dprintk(" Machine Exception supported.\n"); + if (m->mpc_featureflag&(1<<8)) + Dprintk(" 64 bit compare & exchange supported.\n"); + if (m->mpc_featureflag&(1<<9)) + Dprintk(" Internal APIC present.\n"); + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + Dprintk(" Bootup CPU\n"); + boot_cpu_id = m->mpc_apicid; + } else + /* Boot CPU already counted */ + num_processors++; + + if (m->mpc_apicid > NR_CPUS) { + printk("Processor #%d unused. (Max %d processors).\n", + m->mpc_apicid, NR_CPUS); + return; + } + ver = m->mpc_apicver; + + phys_cpu_present_map |= 1 << m->mpc_apicid; + /* + * Validate version + */ + if (ver == 0x0) { + printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; +} + +static void __init MP_bus_info (struct mpc_config_bus *m) +{ + char str[7]; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + + if (strncmp(str, "ISA", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else { + if (strncmp(str, "EISA", 4) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else { + if (strncmp(str, "PCI", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else { + printk("Unknown bustype %s\n", str); + panic("cannot handle bus - mail to linux-smp@vger.rutgers.edu"); + } } } +} + +static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk("Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +{ + mp_irqs [mp_irq_entries] = *m; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +{ + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); +} + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc) +{ + char str[16]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) + { + panic("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 1; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) + { + panic("SMP mptable: checksum error!\n"); + return 1; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) + { + printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec); + return 1; + } + memcpy(str,mpc->mpc_oem,8); + str[8]=0; + printk("OEM ID: %s ",str); + + memcpy(str,mpc->mpc_productid,12); + str[12]=0; + printk("Product ID: %s ",str); + + printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default */ + mp_lapic_addr = mpc->mpc_lapic; + + /* + * Now process the configuration blocks. + */ + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + } + } + return num_processors; +} + +/* + * Scan the memory blocks for an SMP configuration block. + */ +static int __init smp_get_mpf(struct intel_mp_floating *mpf) +{ + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(" IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(" Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + smp_found_config = 1; + /* + * default CPU id - if it's different in the mptable + * then we change it before first using it. + */ + boot_cpu_id = 0; + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + phys_cpu_present_map = 3; + num_processors = 2; + + nr_ioapics = 1; + mp_ioapics[0].mpc_apicaddr = 0xFEC00000; + /* + * Save the default type number, we + * need it later to set the IO-APIC + * up properly: + */ + mpc_default_type = mpf->mpf_feature1; + + printk("Bus #0 is "); + } + + switch (mpf->mpf_feature1) { + case 1: + case 5: + printk("ISA\n"); + break; + case 2: + printk("EISA with no IRQ0 and no IRQ13 DMA chaining\n"); + break; + case 6: + case 3: + printk("EISA\n"); + break; + case 4: + case 7: + printk("MCA\n"); + break; + case 0: + if (!mpf->mpf_physptr) + BUG(); + break; + default: + printk("???\nUnknown standard configuration %d\n", + mpf->mpf_feature1); + return 1; + } + if (mpf->mpf_feature1 > 4) { + printk("Bus #1 is PCI\n"); + + /* + * Set local APIC version to the integrated form. + * It's initialized to zero otherwise, representing + * a discrete 82489DX. + */ + apic_version[0] = 0x10; + apic_version[1] = 0x10; + } + /* + * Read the physical hardware table. Anything here will override the + * defaults. + */ + if (mpf->mpf_physptr) + smp_read_mpc((void *)mpf->mpf_physptr); + + printk("Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ + return 1; +} + +static int __init smp_scan_config(unsigned long base, unsigned long length) +{ + unsigned long *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + printk("Error: MPF size\n"); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + printk("found SMP MP-table at %08ld\n", + virt_to_phys(mpf)); + smp_get_mpf(mpf); + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +void __init init_intel_smp (void) +{ + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There are Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. These loaders are buggy and + * should be fixed. + */ + + address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + smp_scan_config(address, 0x1000); + if (smp_found_config) + printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n"); +} + +#else + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesnt have a BIOS(-configuration table). + * No problem for Linux. + */ +void __init init_visws_smp(void) +{ + smp_found_config = 1; + + phys_cpu_present_map |= 2; /* or in id 1 */ + apic_version[1] |= 0x10; /* integrated APIC */ + apic_version[0] |= 0x10; + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +#endif + +/* + * - Intel MP Configuration Table + * - or SGI Visual Workstation configuration + */ +void __init init_smp_config (void) +{ +#ifdef CONFIG_X86_IO_APIC + init_intel_smp(); +#endif +#ifdef CONFIG_VISWS + init_visws_smp(); +#endif +} + diff --git a/arch/i386/kernel/pci-i386.c b/arch/i386/kernel/pci-i386.c index e94868cd9..ee2edca1e 100644 --- a/arch/i386/kernel/pci-i386.c +++ b/arch/i386/kernel/pci-i386.c @@ -94,59 +94,56 @@ #include "pci-i386.h" -/* - * Assign new address to PCI resource. We hope our resource information - * is complete. On the PC, we don't re-assign resources unless we are - * forced to do so. - * - * Expects start=0, end=size-1, flags=resource type. - */ - -int pci_assign_resource(struct pci_dev *dev, int i) +void +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) { - struct resource *r = &dev->resource[i]; - struct resource *pr = pci_find_parent_resource(dev, r); - unsigned long size = r->end + 1; u32 new, check; + int reg; - if (!pr) { - printk(KERN_ERR "PCI: Cannot find parent resource for device %s\n", dev->slot_name); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) { - /* - * We need to avoid collisions with `mirrored' VGA ports and other strange - * ISA hardware, so we always want the addresses kilobyte aligned. - */ - if (size > 0x100) { - printk(KERN_ERR "PCI: I/O Region %s/%d too large (%ld bytes)\n", dev->slot_name, i, size); - return -EFBIG; - } - if (allocate_resource(pr, r, size, 0x1000, ~0, 1024, NULL, NULL)) { - printk(KERN_ERR "PCI: Allocation of I/O region %s/%d (%ld bytes) failed\n", dev->slot_name, i, size); - return -EBUSY; - } + new = res->start | (res->flags & PCI_REGION_FLAG_MASK); + if (resource < 6) { + reg = PCI_BASE_ADDRESS_0 + 4*resource; + } else if (resource == PCI_ROM_RESOURCE) { + res->flags |= PCI_ROM_ADDRESS_ENABLE; + reg = dev->rom_base_reg; } else { - if (allocate_resource(pr, r, size, 0x10000000, ~0, size, NULL, NULL)) { - printk(KERN_ERR "PCI: Allocation of memory region %s/%d (%ld bytes) failed\n", dev->slot_name, i, size); - return -EBUSY; - } + /* Somebody might have asked allocation of a non-standard resource */ + return; } - if (i < 6) { - int reg = PCI_BASE_ADDRESS_0 + 4*i; - new = r->start | (r->flags & PCI_REGION_FLAG_MASK); - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if (new != check) - printk(KERN_ERR "PCI: Error while updating region %s/%d (%08x != %08x)\n", dev->slot_name, i, new, check); - } else if (i == PCI_ROM_RESOURCE) { - r->flags |= PCI_ROM_ADDRESS_ENABLE; - pci_write_config_dword(dev, dev->rom_base_reg, r->start | (r->flags & PCI_REGION_FLAG_MASK)); + + pci_write_config_dword(dev, reg, new); + pci_read_config_dword(dev, reg, &check); + if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_ERR "PCI: Error while updating region " + "%s/%d (%08x != %08x)\n", dev->slot_name, resource, + new, check); } - printk("PCI: Assigned addresses %08lx-%08lx to region %s/%d\n", r->start, r->end, dev->slot_name, i); - return 0; } +void +pcibios_align_resource(void *data, struct resource *res, unsigned long size) +{ + struct pci_dev *dev = data; + + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + /* We need to avoid collisions with `mirrored' VGA ports + and other strange ISA hardware, so we always want the + addresses kilobyte aligned. */ + if (size >= 0x100) { + printk(KERN_ERR "PCI: I/O Region %s/%d too large" + " (%ld bytes)\n", dev->slot_name, + dev->resource - res, size); + } + + start = (start + 1024 - 1) & ~(1024 - 1); + res->start = start; + } +} + + /* * Handle resources of PCI devices. If the world were perfect, we could * just allocate all the resource regions and do nothing more. It isn't. diff --git a/arch/i386/kernel/pci-pc.c b/arch/i386/kernel/pci-pc.c index 6b7d65589..601ffd3bf 100644 --- a/arch/i386/kernel/pci-pc.c +++ b/arch/i386/kernel/pci-pc.c @@ -23,6 +23,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; static struct pci_bus *pci_root_bus; +static struct pci_ops *pci_root_ops; /* * IRQ routing table provided by the BIOS @@ -876,9 +877,9 @@ static void __init pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &subb); DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); if (busno) - pci_scan_bus(busno, pci_root_bus->ops, NULL); /* Bus A */ + pci_scan_bus(busno, pci_root_ops, NULL); /* Bus A */ if (suba < subb) - pci_scan_bus(suba+1, pci_root_bus->ops, NULL); /* Bus B */ + pci_scan_bus(suba+1, pci_root_ops, NULL); /* Bus B */ } } @@ -891,7 +892,7 @@ static void __init pci_fixup_rcc(struct pci_dev *d) u8 busno; pci_read_config_byte(d, 0x44, &busno); printk("PCI: RCC host bridge: secondary bus %02x\n", busno); - pci_scan_bus(busno, pci_root_bus->ops, NULL); + pci_scan_bus(busno, pci_root_ops, NULL); } static void __init pci_fixup_compaq(struct pci_dev *d) @@ -903,7 +904,7 @@ static void __init pci_fixup_compaq(struct pci_dev *d) u8 busno; pci_read_config_byte(d, 0xc8, &busno); printk("PCI: Compaq host bridge: secondary bus %02x\n", busno); - pci_scan_bus(busno, pci_root_bus->ops, NULL); + pci_scan_bus(busno, pci_root_ops, NULL); } static void __init pci_fixup_umc_ide(struct pci_dev *d) @@ -1189,7 +1190,6 @@ void __init pcibios_init(void) { struct pci_ops *bios = NULL; struct pci_ops *dir = NULL; - struct pci_ops *ops; #ifdef CONFIG_PCI_BIOS if ((pci_probe & PCI_PROBE_BIOS) && ((bios = pci_find_bios()))) { @@ -1202,16 +1202,16 @@ void __init pcibios_init(void) dir = pci_check_direct(); #endif if (dir) - ops = dir; + pci_root_ops = dir; else if (bios) - ops = bios; + pci_root_ops = bios; else { printk("PCI: No PCI bus detected\n"); return; } printk("PCI: Probing PCI hardware\n"); - pci_root_bus = pci_scan_bus(0, ops, NULL); + pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL); pcibios_fixup_irqs(); if (pci_probe & PCI_PEER_FIXUP) diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 1e6576860..ed64f15a2 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -198,259 +198,242 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) switch (request) { /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - goto out; - ret = put_user(tmp,(unsigned long *) data); - goto out; - } + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long *) data); + break; + } /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; + case PTRACE_PEEKUSR: { + unsigned long tmp; - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - goto out; - - tmp = 0; /* Default return condition */ - if(addr < 17*sizeof(long)) - tmp = getreg(child, addr); - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 2; - tmp = child->thread.debugreg[addr]; - }; - ret = put_user(tmp,(unsigned long *) data); - goto out; - } + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = 0; - if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) - goto out; - ret = -EIO; - goto out; + tmp = 0; /* Default return condition */ + if(addr < 17*sizeof(long)) + tmp = getreg(child, addr); + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = child->thread.debugreg[addr]; + } + ret = put_user(tmp,(unsigned long *) data); + break; + } - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - goto out; + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; - if (addr < 17*sizeof(long)) { - ret = putreg(child, addr, data); - goto out; - } + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; - /* We need to be very careful here. We implicitly - want to modify a portion of the task_struct, and we - have to be selective about what portions we allow someone - to modify. */ + if (addr < 17*sizeof(long)) { + ret = putreg(child, addr, data); + break; + } + /* We need to be very careful here. We implicitly + want to modify a portion of the task_struct, and we + have to be selective about what portions we allow someone + to modify. */ + ret = -EIO; if(addr >= (long) &dummy->u_debugreg[0] && addr <= (long) &dummy->u_debugreg[7]){ - if(addr == (long) &dummy->u_debugreg[4]) return -EIO; - if(addr == (long) &dummy->u_debugreg[5]) return -EIO; + if(addr == (long) &dummy->u_debugreg[4]) break; + if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && - ((unsigned long) data) >= TASK_SIZE-3) return -EIO; + ((unsigned long) data) >= TASK_SIZE-3) break; - ret = -EIO; if(addr == (long) &dummy->u_debugreg[7]) { data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) goto out; - }; + } addr -= (long) &dummy->u_debugreg; addr = addr >> 2; child->thread.debugreg[addr] = data; ret = 0; - goto out; - }; - ret = -EIO; - goto out; + } + break; - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: { /* restart after signal. */ - long tmp; + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + long tmp; - ret = -EIO; - if ((unsigned long) data > _NSIG) - goto out; - if (request == PTRACE_SYSCALL) - child->flags |= PF_TRACESYS; - else - child->flags &= ~PF_TRACESYS; - child->exit_code = data; + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->flags |= PF_TRACESYS; + else + child->flags &= ~PF_TRACESYS; + child->exit_code = data; /* make sure the single step bit is not set. */ - tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET,tmp); - wake_up_process(child); - ret = 0; - goto out; - } + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET,tmp); + wake_up_process(child); + ret = 0; + break; + } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ - case PTRACE_KILL: { - long tmp; + case PTRACE_KILL: { + long tmp; - ret = 0; - if (child->state == TASK_ZOMBIE) /* already dead */ - goto out; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); - wake_up_process(child); - goto out; + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + long tmp; + + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->flags &= ~PF_TRACESYS; + if ((child->flags & PF_DTRACE) == 0) { + /* Spurious delayed TF traps may occur */ + child->flags |= PF_DTRACE; } + tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: { /* detach a process that was attached. */ + long tmp; - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - long tmp; + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->flags &= ~(PF_PTRACED|PF_TRACESYS); + child->exit_code = data; + write_lock_irqsave(&tasklist_lock, flags); + REMOVE_LINKS(child); + child->p_pptr = child->p_opptr; + SET_LINKS(child); + write_unlock_irqrestore(&tasklist_lock, flags); + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + wake_up_process(child); + ret = 0; + break; + } + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, 17*sizeof(long))) { ret = -EIO; - if ((unsigned long) data > _NSIG) - goto out; - child->flags &= ~PF_TRACESYS; - if ((child->flags & PF_DTRACE) == 0) { - /* Spurious delayed TF traps may occur */ - child->flags |= PF_DTRACE; - } - tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - goto out; + break; } + for ( i = 0; i < 17*sizeof(long); i += sizeof(long) ) { + __put_user(getreg(child, i),(unsigned long *) data); + data += sizeof(long); + } + ret = 0; + break; + } - case PTRACE_DETACH: { /* detach a process that was attached. */ - long tmp; - + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + if (!access_ok(VERIFY_READ, (unsigned *)data, 17*sizeof(long))) { ret = -EIO; - if ((unsigned long) data > _NSIG) - goto out; - child->flags &= ~(PF_PTRACED|PF_TRACESYS); - child->exit_code = data; - write_lock_irqsave(&tasklist_lock, flags); - REMOVE_LINKS(child); - child->p_pptr = child->p_opptr; - SET_LINKS(child); - write_unlock_irqrestore(&tasklist_lock, flags); - /* make sure the single step bit is not set. */ - tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); - wake_up_process(child); - ret = 0; - goto out; + break; } + for ( i = 0; i < 17*sizeof(long); i += sizeof(long) ) { + __get_user(tmp, (unsigned long *) data); + putreg(child, i, tmp); + data += sizeof(long); + } + ret = 0; + break; + } - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, (unsigned *)data, - 17*sizeof(long))) - { - ret = -EIO; - goto out; - } - for ( i = 0; i < 17*sizeof(long); i += sizeof(long) ) - { - __put_user(getreg(child, i),(unsigned long *) data); - data += sizeof(long); - } - ret = 0; - goto out; - }; - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, (unsigned *)data, - 17*sizeof(long))) - { - ret = -EIO; - goto out; - } - for ( i = 0; i < 17*sizeof(long); i += sizeof(long) ) - { - __get_user(tmp, (unsigned long *) data); - putreg(child, i, tmp); - data += sizeof(long); - } - ret = 0; - goto out; - }; - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (!access_ok(VERIFY_WRITE, (unsigned *)data, - sizeof(struct user_i387_struct))) - { - ret = -EIO; - goto out; - } - ret = 0; - if ( !child->used_math ) { - /* Simulate an empty FPU. */ - child->thread.i387.hard.cwd = 0xffff037f; - child->thread.i387.hard.swd = 0xffff0000; - child->thread.i387.hard.twd = 0xffffffff; - } + case PTRACE_GETFPREGS: { /* Get the child FPU state. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + ret = 0; + if ( !child->used_math ) { + /* Simulate an empty FPU. */ + child->thread.i387.hard.cwd = 0xffff037f; + child->thread.i387.hard.swd = 0xffff0000; + child->thread.i387.hard.twd = 0xffffffff; + } #ifdef CONFIG_MATH_EMULATION - if ( boot_cpu_data.hard_math ) { + if ( boot_cpu_data.hard_math ) { #endif - __copy_to_user((void *)data, &child->thread.i387.hard, - sizeof(struct user_i387_struct)); + __copy_to_user((void *)data, &child->thread.i387.hard, sizeof(struct user_i387_struct)); #ifdef CONFIG_MATH_EMULATION - } else { - save_i387_soft(&child->thread.i387.soft, - (struct _fpstate *)data); - } + } else { + save_i387_soft(&child->thread.i387.soft, (struct _fpstate *)data); + } #endif - goto out; - }; - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - if (!access_ok(VERIFY_READ, (unsigned *)data, - sizeof(struct user_i387_struct))) - { - ret = -EIO; - goto out; - } - child->used_math = 1; + break; + } + + case PTRACE_SETFPREGS: { /* Set the child FPU state. */ + if (!access_ok(VERIFY_READ, (unsigned *)data, sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + child->used_math = 1; #ifdef CONFIG_MATH_EMULATION - if ( boot_cpu_data.hard_math ) { + if ( boot_cpu_data.hard_math ) { #endif - __copy_from_user(&child->thread.i387.hard, (void *)data, - sizeof(struct user_i387_struct)); + __copy_from_user(&child->thread.i387.hard, (void *)data, sizeof(struct user_i387_struct)); #ifdef CONFIG_MATH_EMULATION - } else { - restore_i387_soft(&child->thread.i387.soft, - (struct _fpstate *)data); - } + } else { + restore_i387_soft(&child->thread.i387.soft, (struct _fpstate *)data); + } #endif - ret = 0; - goto out; - }; + ret = 0; + break; + } - default: - ret = -EIO; - goto out; + default: + ret = -EIO; + break; } out: unlock_kernel(); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cd80009d4..d308a1280 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -31,6 +31,11 @@ * * Added proper L2 cache detection for Coppermine * Dragan Stancevic <visitor@valinux.com>, October 1999 + * + * Added the origninal array for capability flags but forgot to credit + * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff + * Jauder Ho <jauderho@carumba.com>, January 2000 + * */ /* @@ -69,6 +74,7 @@ #include <asm/desc.h> #include <asm/e820.h> #include <asm/dma.h> +#include <asm/mpspec.h> /* * Machine setup.. @@ -77,7 +83,7 @@ char ignore_irq13 = 0; /* set if exception 16 works */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; -unsigned long mmu_cr4_features __initdata = 0; +unsigned long mmu_cr4_features = 0; /* * Bus types .. @@ -691,7 +697,7 @@ void __init setup_arch(char **cmdline_p) */ reserve_bootmem(0, PAGE_SIZE); -#ifdef __SMP__ +#ifdef CONFIG_SMP /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix @@ -701,7 +707,7 @@ void __init setup_arch(char **cmdline_p) smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ #endif -#ifdef __SMP__ +#ifdef CONFIG_X86_IO_APIC /* * Save possible boot-time SMP configuration: */ @@ -1166,6 +1172,8 @@ void __init get_cpu_vendor(struct cpuinfo_x86 *c) c->x86_vendor = X86_VENDOR_CENTAUR; else if (!strcmp(v, "NexGenDriven")) c->x86_vendor = X86_VENDOR_NEXGEN; + else if (!strcmp(v, "RiseRiseRise")) + c->x86_vendor = X86_VENDOR_RISE; else c->x86_vendor = X86_VENDOR_UNKNOWN; } @@ -1176,6 +1184,7 @@ struct cpu_model_info { char *model_names[16]; }; +/* Naming convention should be: <Name> [(<Codename>)] */ static struct cpu_model_info cpu_models[] __initdata = { { X86_VENDOR_INTEL, 4, { "486 DX-25/33", "486 DX-50", "486 SX", "486 DX/2", "486 SL", @@ -1188,8 +1197,9 @@ static struct cpu_model_info cpu_models[] __initdata = { NULL, NULL, NULL, NULL }}, { X86_VENDOR_INTEL, 6, { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", - NULL, "Pentium II (Deschutes)", "Mobile Pentium II", "Pentium III (Katmai)", - "Pentium III (Coppermine)", NULL, NULL, NULL, NULL, NULL, NULL }}, + NULL, "Pentium II (Deschutes)", "Mobile Pentium II", + "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, NULL, + NULL, NULL, NULL, NULL }}, { X86_VENDOR_AMD, 4, { NULL, NULL, NULL, "486 DX/2", NULL, NULL, NULL, "486 DX/2-WB", "486 DX/4", "486 DX/4-WB", NULL, NULL, NULL, NULL, "Am5x86-WT", @@ -1210,6 +1220,9 @@ static struct cpu_model_info cpu_models[] __initdata = { { X86_VENDOR_NEXGEN, 5, { "Nx586", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }}, + { X86_VENDOR_RISE, 5, + { "mP6", "mP6", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }}, }; void __init identify_cpu(struct cpuinfo_x86 *c) @@ -1300,8 +1313,9 @@ void __init identify_cpu(struct cpuinfo_x86 *c) if (c->x86_model <= 16) p = cpu_models[i].model_names[c->x86_model]; - /* Names for the Pentium II Celeron processors - detectable only by also checking the cache size */ + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ if ((cpu_models[i].vendor == X86_VENDOR_INTEL) && (cpu_models[i].x86 == 6)) { @@ -1310,7 +1324,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) else if(c->x86_model == 6 && c->x86_cache_size == 128) p = "Celeron (Mendocino)"; else if(c->x86_model == 5 && c->x86_cache_size == 256) - p = "Celeron (Dixon)"; + p = "Mobile Pentium II (Dixon)"; } } } @@ -1341,7 +1355,7 @@ void __init dodgy_tsc(void) static char *cpu_vendor_names[] __initdata = { - "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur" }; + "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise" }; void __init print_cpu_info(struct cpuinfo_x86 *c) @@ -1373,17 +1387,28 @@ int get_cpuinfo(char * buffer) { char *p = buffer; int sep_bug; + + /* + * Flags should be entered into the array ONLY if there is no overlap. + * Else a number should be used and then overridden in the case + * statement below. --Jauder <jauderho@carumba.com> + * + * NOTE: bits 10, 19-22, 26-31 are reserved. + * + * Data courtesy of http://www.sandpile.org/arch/cpuid.htm + * Thanks to the Greasel! + */ static char *x86_cap_flags[] = { "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov", - "pat", "17", "psn", "19", "20", "21", "22", "mmx", - "24", "kni", "26", "27", "28", "29", "30", "31" + "16", "pse36", "psn", "19", "20", "21", "22", "mmx", + "24", "xmm", "26", "27", "28", "29", "30", "31" }; struct cpuinfo_x86 *c = cpu_data; int i, n; - for(n=0; n<NR_CPUS; n++, c++) { -#ifdef __SMP__ + for (n = 0; n < NR_CPUS; n++, c++) { +#ifdef CONFIG_SMP if (!(cpu_online_map & (1<<n))) continue; #endif @@ -1430,9 +1455,8 @@ int get_cpuinfo(char * buffer) break; case X86_VENDOR_INTEL: - x86_cap_flags[17] = "pse36"; - x86_cap_flags[18] = "psn"; - x86_cap_flags[24] = "osfxsr"; + x86_cap_flags[16] = "pat"; + x86_cap_flags[24] = "fxsr"; break; case X86_VENDOR_CENTAUR: @@ -1496,14 +1520,14 @@ void cpu_init (void) int nr = smp_processor_id(); struct tss_struct * t = &init_tss[nr]; - if (test_and_set_bit(nr,&cpu_initialized)) { + if (test_and_set_bit(nr, &cpu_initialized)) { printk("CPU#%d already initialized!\n", nr); for (;;) __sti(); } cpus_initialized++; printk("Initializing CPU#%d\n", nr); - if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) + if (cpu_has_pse) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index a973746b9..18de47dd4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -19,6 +19,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/stddef.h> +#include <linux/highuid.h> #include <asm/ucontext.h> #include <asm/uaccess.h> @@ -642,6 +643,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) info.si_code = SI_USER; info.si_pid = current->p_pptr->pid; info.si_uid = current->p_pptr->uid; + info.si_uid16 = high2lowuid(current->p_pptr->uid); } /* If the (new) signal is now blocked, requeue it. */ diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 9acf81556..05e0d1d23 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -2,7 +2,7 @@ * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> - * (c) 1998-99 Ingo Molnar <mingo@redhat.com> + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> * * This code is released under the GNU public license version 2 or * later. @@ -11,18 +11,18 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/kernel_stat.h> -#include <linux/smp_lock.h> #include <linux/irq.h> - #include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/smp_lock.h> +#include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> + #include <asm/mtrr.h> #include <asm/pgalloc.h> /* - * Some notes on processor bugs: + * Some notes on x86 processor bugs affecting SMP operation: * * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. * The Linux implications for SMP are handled as follows: @@ -381,7 +381,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) static inline void do_flush_tlb_all_local(void) { - local_flush_tlb(); + __flush_tlb_all(); if (!current->mm && current->active_mm) { unsigned long cpu = smp_processor_id(); @@ -397,9 +397,7 @@ static void flush_tlb_all_ipi(void* info) void flush_tlb_all(void) { - if (cpu_online_map ^ (1 << smp_processor_id())) - while (smp_call_function (flush_tlb_all_ipi,0,0,1) == -EBUSY) - mb(); + smp_call_function (flush_tlb_all_ipi,0,1,1); do_flush_tlb_all_local(); } @@ -438,50 +436,44 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, * [SUMMARY] Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. - * <nonatomic> If true, we might schedule away to lock the mutex + * <nonatomic> currently unused. * <wait> If true, wait (atomically) until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <<func>> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, you may call it from a bottom half handler. */ { struct call_data_struct data; int ret, cpus = smp_num_cpus-1; - static DECLARE_MUTEX(lock); - unsigned long timeout; + static spinlock_t lock = SPIN_LOCK_UNLOCKED; - if (nonatomic) - down(&lock); - else - if (down_trylock(&lock)) - return -EBUSY; + if(cpus == 0) + return 0; - call_data = &data; data.func = func; data.info = info; atomic_set(&data.started, 0); data.wait = wait; if (wait) atomic_set(&data.finished, 0); - mb(); + spin_lock_bh(&lock); + call_data = &data; /* Send a message to all other CPUs and wait for them to respond */ send_IPI_allbutself(CALL_FUNCTION_VECTOR); /* Wait for response */ - timeout = jiffies + HZ; - while ((atomic_read(&data.started) != cpus) - && time_before(jiffies, timeout)) + /* FIXME: lock-up detection, backtrace on lock-up */ + while(atomic_read(&data.started) != cpus) barrier(); - ret = -ETIMEDOUT; - if (atomic_read(&data.started) != cpus) - goto out; + ret = 0; if (wait) while (atomic_read(&data.finished) != cpus) barrier(); -out: - call_data = NULL; - up(&lock); + spin_unlock_bh(&lock); return 0; } @@ -504,14 +496,12 @@ static void stop_this_cpu (void * dummy) void smp_send_stop(void) { - unsigned long flags; + smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_num_cpus = 1; - __save_flags(flags); __cli(); - smp_call_function(stop_this_cpu, NULL, 1, 0); disable_local_APIC(); - __restore_flags(flags); - + __sti(); } /* @@ -561,431 +551,3 @@ asmlinkage void smp_call_function_interrupt(void) atomic_inc(&call_data->finished); } -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - ack_APIC_irq(); - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk("spurious APIC interrupt on CPU#%d, should never happen.\n", - smp_processor_id()); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ - -static spinlock_t err_lock = SPIN_LOCK_UNLOCKED; - -asmlinkage void smp_error_interrupt(void) -{ - unsigned long v; - - spin_lock(&err_lock); - - v = apic_read(APIC_ESR); - printk("APIC error interrupt on CPU#%d, should never happen.\n", - smp_processor_id()); - printk("... APIC ESR0: %08lx\n", v); - - apic_write(APIC_ESR, 0); - v |= apic_read(APIC_ESR); - printk("... APIC ESR1: %08lx\n", v); - /* - * Be a bit more verbose. (multiple bits can be set) - */ - if (v & 0x01) - printk("... bit 0: APIC Send CS Error (hw problem).\n"); - if (v & 0x02) - printk("... bit 1: APIC Receive CS Error (hw problem).\n"); - if (v & 0x04) - printk("... bit 2: APIC Send Accept Error.\n"); - if (v & 0x08) - printk("... bit 3: APIC Receive Accept Error.\n"); - if (v & 0x10) - printk("... bit 4: Reserved!.\n"); - if (v & 0x20) - printk("... bit 5: Send Illegal Vector (kernel bug).\n"); - if (v & 0x40) - printk("... bit 6: Received Illegal Vector.\n"); - if (v & 0x80) - printk("... bit 7: Illegal Register Address.\n"); - - ack_APIC_irq(); - - irq_err_count++; - - spin_unlock(&err_lock); -} - -/* - * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts - * per second. We assume that the caller has already set up the local - * APIC. - * - * The APIC timer is not exactly sync with the external timer chip, it - * closely follows bus clocks. - */ - -int prof_multiplier[NR_CPUS] = { 1, }; -int prof_old_multiplier[NR_CPUS] = { 1, }; -int prof_counter[NR_CPUS] = { 1, }; - -/* - * The timer chip is already set up at HZ interrupts per second here, - * but we do not accept timer interrupts yet. We only allow the BP - * to calibrate. - */ -static unsigned int __init get_8254_timer_count(void) -{ - extern rwlock_t xtime_lock; - unsigned long flags; - - unsigned int count; - - write_lock_irqsave(&xtime_lock, flags); - - outb_p(0x00, 0x43); - count = inb_p(0x40); - count |= inb_p(0x40) << 8; - - write_unlock_irqrestore(&xtime_lock, flags); - - return count; -} - -void __init wait_8254_wraparound(void) -{ - unsigned int curr_count, prev_count=~0; - int delta; - - curr_count = get_8254_timer_count(); - - do { - prev_count = curr_count; - curr_count = get_8254_timer_count(); - delta = curr_count-prev_count; - - /* - * This limit for delta seems arbitrary, but it isn't, it's - * slightly above the level of error a buggy Mercury/Neptune - * chipset timer can cause. - */ - - } while (delta<300); -} - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ - -#define APIC_DIVISOR 16 - -void __setup_APIC_LVTT(unsigned int clocks) -{ - unsigned int lvtt1_value, tmp_value; - - tmp_value = apic_read(APIC_LVTT); - lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | - APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - apic_write(APIC_LVTT, lvtt1_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - - tmp_value = apic_read(APIC_TMICT); - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); -} - -void setup_APIC_timer(void * data) -{ - unsigned int clocks = (unsigned int) data, slice, t0, t1, nr; - unsigned long flags; - int delta; - - __save_flags(flags); - __sti(); - /* - * ok, Intel has some smart code in their APIC that knows - * if a CPU was in 'hlt' lowpower mode, and this increases - * its APIC arbitration priority. To avoid the external timer - * IRQ APIC event being in synchron with the APIC clock we - * introduce an interrupt skew to spread out timer events. - * - * The number of slices within a 'big' timeslice is smp_num_cpus+1 - */ - - slice = clocks / (smp_num_cpus+1); - nr = cpu_number_map[smp_processor_id()] + 1; - printk("cpu: %d, clocks: %d, slice: %d, nr: %d.\n", - smp_processor_id(), clocks, slice, nr); - /* - * Wait for IRQ0's slice: - */ - wait_8254_wraparound(); - - __setup_APIC_LVTT(clocks); - - t0 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - do { - t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - delta = (int)(t0 - t1 - slice*nr); - } while (delta < 0); - - __setup_APIC_LVTT(clocks); - - printk("CPU%d<C0:%d,C:%d,D:%d,S:%d,C:%d>\n", - smp_processor_id(), t0, t1, delta, slice, clocks); - - __restore_flags(flags); -} - -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -int __init calibrate_APIC_clock(void) -{ - unsigned long long t1 = 0, t2 = 0; - long tt1, tt2; - long result; - int i; - const int LOOPS = HZ/10; - - printk("calibrating APIC timer ... "); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - */ - __setup_APIC_LVTT(1000000000); - - /* - * The timer chip counts down to zero. Let's wait - * for a wraparound to start exact measurement: - * (the current tick might have been already half done) - */ - - wait_8254_wraparound(); - - /* - * We wrapped around just now. Let's start: - */ - if (cpu_has_tsc) - rdtscll(t1); - tt1 = apic_read(APIC_TMCCT); - - /* - * Let's wait LOOPS wraprounds: - */ - for (i = 0; i < LOOPS; i++) - wait_8254_wraparound(); - - tt2 = apic_read(APIC_TMCCT); - if (cpu_has_tsc) - rdtscll(t2); - - /* - * The APIC bus clock counter is 32 bits only, it - * might have overflown, but note that we use signed - * longs, thus no extra care needed. - * - * underflown to be exact, as the timer counts down ;) - */ - - result = (tt1-tt2)*APIC_DIVISOR/LOOPS; - - if (cpu_has_tsc) - printk("\n..... CPU clock speed is %ld.%04ld MHz.\n", - ((long)(t2-t1)/LOOPS)/(1000000/HZ), - ((long)(t2-t1)/LOOPS)%(1000000/HZ)); - - printk("..... host bus clock speed is %ld.%04ld MHz.\n", - result/(1000000/HZ), - result%(1000000/HZ)); - - return result; -} - -static unsigned int calibration_result; - -void __init setup_APIC_clocks(void) -{ - unsigned long flags; - - __save_flags(flags); - __cli(); - - calibration_result = calibrate_APIC_clock(); - - smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); - - /* - * Now set up the timer for real. - */ - setup_APIC_timer((void *)calibration_result); - - __restore_flags(flags); -} - -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) -{ - int i; - - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - prof_multiplier[i] = multiplier; - - return 0; -} - -#undef APIC_DIVISOR - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -inline void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int user = (user_mode(regs) != 0); - int cpu = smp_processor_id(); - - /* - * The profiling function is SMP safe. (nothing can mess - * around with "current", and the profiling counters are - * updated with atomic operations). This is especially - * useful with a profiling multiplier != 1 - */ - if (!user) - x86_do_profile(regs->eip); - - if (--prof_counter[cpu] <= 0) { - int system = 1 - user; - struct task_struct * p = current; - - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - prof_counter[cpu] = prof_multiplier[cpu]; - if (prof_counter[cpu] != prof_old_multiplier[cpu]) { - __setup_APIC_LVTT(calibration_result/prof_counter[cpu]); - prof_old_multiplier[cpu] = prof_counter[cpu]; - } - - /* - * After doing the above, we need to make like - * a normal interrupt - otherwise timer interrupts - * ignore the global interrupt lock, which is the - * WrongThing (tm) to do. - */ - - irq_enter(cpu, 0); - update_one_process(p, 1, user, system, cpu); - if (p->pid) { - p->counter -= 1; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->priority < DEF_PRIORITY) { - kstat.cpu_nice += user; - kstat.per_cpu_nice[cpu] += user; - } else { - kstat.cpu_user += user; - kstat.per_cpu_user[cpu] += user; - } - kstat.cpu_system += system; - kstat.per_cpu_system[cpu] += system; - - } - irq_exit(cpu, 0); - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesnt support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -unsigned int apic_timer_irqs [NR_CPUS] = { 0, }; - -void smp_apic_timer_interrupt(struct pt_regs * regs) -{ - /* - * the NMI deadlock-detector uses this. - */ - apic_timer_irqs[smp_processor_id()]++; - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - smp_local_timer_interrupt(regs); -} - diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 811f00f38..3ff5cc002 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1,8 +1,8 @@ /* - * Intel MP v1.1/v1.4 specification compliant parsing routines. + * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> - * (c) 1998, 1999 Ingo Molnar <mingo@redhat.com> + * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> * * Much of the core SMP work is based on previous work by Thomas Radke, to * whom a great many thanks are extended. @@ -26,10 +26,8 @@ * Alan Cox : Dumb bug: 'B' step PPro's are fine * Ingo Molnar : Added APIC timers, based on code * from Jose Renau - * Alan Cox : Added EBDA scanning * Ingo Molnar : various cleanups and rewrites * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX timers */ #include <linux/config.h> @@ -51,72 +49,28 @@ static int smp_b_stepping = 0; /* Setup configured maximum number of CPUs to activate */ static int max_cpus = -1; -/* 1 if "noapic" boot option passed */ -int skip_ioapic_setup = 0; /* Total count of live CPUs */ -int smp_num_cpus = 0; -/* Internal processor count */ -static unsigned int num_processors = 1; +int smp_num_cpus = 1; -/* Have we found an SMP box */ -int smp_found_config = 0; - -/* Bitmask of physically existing CPUs */ -unsigned long cpu_present_map = 0; /* Bitmask of currently online CPUs */ unsigned long cpu_online_map = 0; -/* which CPU maps to which logical number */ -volatile int cpu_number_map[NR_CPUS]; -/* which logical number maps to which CPU */ -volatile int __cpu_logical_map[NR_CPUS]; +/* which CPU (physical APIC ID) maps to which logical CPU number */ +volatile int x86_apicid_to_cpu[NR_CPUS]; +/* which logical CPU number maps to which CPU (physical APIC ID) */ +volatile int x86_cpu_to_apicid[NR_CPUS]; static volatile unsigned long cpu_callin_map = 0; static volatile unsigned long cpu_callout_map = 0; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS]; -/* Processor that is doing the boot up */ -static unsigned int boot_cpu_id = 0; -/* Tripped once we need to start cross invalidating */ -static int smp_activated = 0; /* Set when the idlers are all forked */ int smp_threads_ready = 0; /* - * Various Linux-internal data structures created from the - * MP-table. - */ -int apic_version [NR_CPUS]; -int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; -extern int nr_ioapics; -extern struct mpc_config_ioapic mp_ioapics [MAX_IO_APICS]; -extern int mp_irq_entries; -extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; -extern int mpc_default_type; -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; -int mp_current_pci_id = 0; -unsigned long mp_lapic_addr = 0; -int pic_mode; - -extern void cache_APIC_registers (void); - -#define SMP_DEBUG 1 - -#if SMP_DEBUG -#define dprintk(x...) printk(##x) -#else -#define dprintk(x...) -#endif - -/* - * IA s/w dev Vol 3, Section 7.4 - */ -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -/* * Setup routine for controlling SMP activation * * Command-line option of "nosmp" or "maxcpus=0" will disable SMP @@ -144,471 +98,6 @@ static int __init maxcpus(char *str) __setup("maxcpus=", maxcpus); /* - * Intel MP BIOS table parsing routines: - */ - -#ifndef CONFIG_X86_VISWS_APIC -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum=0; - while(len--) - sum+=*mp++; - return sum&0xFF; -} - -/* - * Processor encoding in an MP configuration block - */ - -static char __init *mpc_family(int family,int model) -{ - static char n[32]; - static char *model_defs[]= - { - "80486DX","80486DX", - "80486SX","80486DX/2 or 80487", - "80486SL","80486SX/2", - "Unknown","80486DX/2-WB", - "80486DX/4","80486DX/4-WB" - }; - - switch (family) { - case 0x04: - if (model < 10) - return model_defs[model]; - break; - - case 0x05: - return("Pentium(tm)"); - - case 0x06: - return("Pentium(tm) Pro"); - - case 0x0F: - if (model == 0x0F) - return("Special controller"); - } - sprintf(n,"Unknown CPU [%d:%d]",family, model); - return n; -} - -static void __init MP_processor_info (struct mpc_config_processor *m) -{ - int ver; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - printk("Processor #%d %s APIC version %d\n", - m->mpc_apicid, - mpc_family( (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), - m->mpc_apicver); - -#ifdef SMP_DEBUG - if (m->mpc_featureflag&(1<<0)) - printk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) - printk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) - printk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) - printk(" Internal APIC present.\n"); -#endif - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - dprintk(" Bootup CPU\n"); - boot_cpu_id = m->mpc_apicid; - } else - /* Boot CPU already counted */ - num_processors++; - - if (m->mpc_apicid > NR_CPUS) { - printk("Processor #%d unused. (Max %d processors).\n", - m->mpc_apicid, NR_CPUS); - return; - } - ver = m->mpc_apicver; - - cpu_present_map |= (1<<m->mpc_apicid); - /* - * Validate version - */ - if (ver == 0x0) { - printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; -} - -static void __init MP_bus_info (struct mpc_config_bus *m) -{ - char str[7]; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - dprintk("Bus #%d is %s\n", m->mpc_busid, str); - - if (strncmp(str, "ISA", 3) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else { - if (strncmp(str, "EISA", 4) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else { - if (strncmp(str, "PCI", 3) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; - } else { - printk("Unknown bustype %s\n", str); - panic("cannot handle bus - mail to linux-smp@vger.rutgers.edu"); - } } } -} - -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk("I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk("Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) -{ - mp_irqs [mp_irq_entries] = *m; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) -{ - /* - * Well it seems all SMP boards in existence - * use ExtINT/LVT1 == LINT0 and - * NMI/LVT2 == LINT1 - the following check - * will show us if this assumptions is false. - * Until then we do not have to add baggage. - */ - if ((m->mpc_irqtype == mp_ExtINT) && - (m->mpc_destapiclint != 0)) - BUG(); - if ((m->mpc_irqtype == mp_NMI) && - (m->mpc_destapiclint != 1)) - BUG(); -} - -/* - * Read/parse the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) -{ - char str[16]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) - { - panic("SMP mptable: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], - mpc->mpc_signature[2], - mpc->mpc_signature[3]); - return 1; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) - { - panic("SMP mptable: checksum error!\n"); - return 1; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) - { - printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec); - return 1; - } - memcpy(str,mpc->mpc_oem,8); - str[8]=0; - printk("OEM ID: %s ",str); - - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); - - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - - /* save the local APIC address, it might be non-default */ - mp_lapic_addr = mpc->mpc_lapic; - - /* - * Now process the configuration blocks. - */ - while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - } - } - return num_processors; -} - -/* - * Scan the memory blocks for an SMP configuration block. - */ -static int __init smp_get_mpf(struct intel_mp_floating *mpf) -{ - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { - printk(" IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(" Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } - smp_found_config = 1; - /* - * default CPU id - if it's different in the mptable - * then we change it before first using it. - */ - boot_cpu_id = 0; - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - cpu_present_map = 3; - num_processors = 2; - - nr_ioapics = 1; - mp_ioapics[0].mpc_apicaddr = 0xFEC00000; - /* - * Save the default type number, we - * need it later to set the IO-APIC - * up properly: - */ - mpc_default_type = mpf->mpf_feature1; - - printk("Bus #0 is "); - } - - switch (mpf->mpf_feature1) { - case 1: - case 5: - printk("ISA\n"); - break; - case 2: - printk("EISA with no IRQ0 and no IRQ13 DMA chaining\n"); - break; - case 6: - case 3: - printk("EISA\n"); - break; - case 4: - case 7: - printk("MCA\n"); - break; - case 0: - if (!mpf->mpf_physptr) - BUG(); - break; - default: - printk("???\nUnknown standard configuration %d\n", - mpf->mpf_feature1); - return 1; - } - if (mpf->mpf_feature1 > 4) { - printk("Bus #1 is PCI\n"); - - /* - * Set local APIC version to the integrated form. - * It's initialized to zero otherwise, representing - * a discrete 82489DX. - */ - apic_version[0] = 0x10; - apic_version[1] = 0x10; - } - /* - * Read the physical hardware table. Anything here will override the - * defaults. - */ - if (mpf->mpf_physptr) - smp_read_mpc((void *)mpf->mpf_physptr); - - __cpu_logical_map[0] = boot_cpu_id; - global_irq_holder = boot_cpu_id; - current->processor = boot_cpu_id; - - printk("Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ - return 1; -} - -static int __init smp_scan_config(unsigned long base, unsigned long length) -{ - unsigned long *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); - if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { - - printk("found SMP MP-table at %08ld\n", - virt_to_phys(mpf)); - smp_get_mpf(mpf); - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -void __init init_intel_smp (void) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - */ - - address = *(unsigned short *)phys_to_virt(0x40E); - address <<= 4; - smp_scan_config(address, 0x1000); - if (smp_found_config) - printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n"); -} - -#else - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesnt have a BIOS(-configuration table). - * No problem for Linux. - */ -void __init init_visws_smp(void) -{ - smp_found_config = 1; - - cpu_present_map |= 2; /* or in id 1 */ - apic_version[1] |= 0x10; /* integrated APIC */ - apic_version[0] |= 0x10; - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -#endif - -/* - * - Intel MP Configuration Table - * - or SGI Visual Workstation configuration - */ -void __init init_smp_config (void) -{ -#ifndef CONFIG_VISWS - init_intel_smp(); -#else - init_visws_smp(); -#endif -} - - - -/* * Trampoline 80x86 program as an array. */ @@ -688,209 +177,12 @@ void __init smp_commence(void) /* * Lets the callins below out of their loop. */ - dprintk("Setting commenced=1, go go go\n"); + Dprintk("Setting commenced=1, go go go\n"); wmb(); atomic_set(&smp_commenced,1); } -extern void __error_in_io_apic_c(void); - - -int get_maxlvt(void) -{ - unsigned int v, ver, maxlvt; - - v = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(v); - /* 82489DXs do not report # of LVT entries. */ - maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; - return maxlvt; -} - -void disable_local_APIC (void) -{ - unsigned long value; - int maxlvt; - - /* - * Disable APIC - */ - value = apic_read(APIC_SPIV); - value &= ~(1<<8); - apic_write(APIC_SPIV,value); - - /* - * Clean APIC state for other OSs: - */ - value = apic_read(APIC_SPIV); - value &= ~(1<<8); - apic_write(APIC_SPIV,value); - maxlvt = get_maxlvt(); - apic_write_around(APIC_LVTT, 0x00010000); - apic_write_around(APIC_LVT0, 0x00010000); - apic_write_around(APIC_LVT1, 0x00010000); - if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, 0x00010000); - if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, 0x00010000); -} - -void __init setup_local_APIC (void) -{ - unsigned long value, ver, maxlvt; - - if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) - __error_in_io_apic_c(); - - value = apic_read(APIC_SPIV); - /* - * Enable APIC - */ - value |= (1<<8); - - /* - * Some unknown Intel IO/APIC (or APIC) errata is biting us with - * certain networking cards. If high frequency interrupts are - * happening on a particular IOAPIC pin, plus the IOAPIC routing - * entry is masked/unmasked at a high rate as well then sooner or - * later IOAPIC line gets 'stuck', no more interrupts are received - * from the device. If focus CPU is disabled then the hang goes - * away, oh well :-( - * - * [ This bug can be reproduced easily with a level-triggered - * PCI Ne2000 networking cards and PII/PIII processors, dual - * BX chipset. ] - */ -#if 0 - /* Enable focus processor (bit==0) */ - value &= ~(1<<9); -#else - /* Disable focus processor (bit==1) */ - value |= (1<<9); -#endif - /* - * Set spurious IRQ vector - */ - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV,value); - - /* - * Set up LVT0, LVT1: - * - * set up through-local-APIC on the BP's LINT0. This is not - * strictly necessery in pure symmetric-IO mode, but sometimes - * we delegate interrupts to the 8259A. - */ - if (hard_smp_processor_id() == boot_cpu_id) { - value = 0x00000700; - printk("enabled ExtINT on CPU#%d\n", hard_smp_processor_id()); - } else { - value = 0x00010700; - printk("masked ExtINT on CPU#%d\n", hard_smp_processor_id()); - } - apic_write_around(APIC_LVT0,value); - - /* - * only the BP should see the LINT1 NMI signal, obviously. - */ - if (hard_smp_processor_id() == boot_cpu_id) - value = 0x00000400; // unmask NMI - else - value = 0x00010400; // mask NMI - apic_write_around(APIC_LVT1,value); - - value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - maxlvt = get_maxlvt(); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_readaround(APIC_SPIV); // not strictly necessery - apic_write(APIC_ESR, 0); - } - value = apic_read(APIC_ESR); - printk("ESR value before enabling vector: %08lx\n", value); - - value = apic_read(APIC_LVTERR); - value = ERROR_APIC_VECTOR; // enables sending errors - apic_write(APIC_LVTERR,value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt != 3) { - apic_readaround(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - value = apic_read(APIC_ESR); - printk("ESR value after enabling vector: %08lx\n", value); - } else - printk("No ESR for 82489DX.\n"); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI,value); - - /* - * Set up the logical destination ID and put the - * APIC into flat delivery mode. - */ - value = apic_read(APIC_LDR); - value &= ~APIC_LDR_MASK; - value |= (1<<(smp_processor_id()+24)); - apic_write(APIC_LDR,value); - - value = apic_read(APIC_DFR); - value |= SET_APIC_DFR(0xf); - apic_write(APIC_DFR, value); -} - -void __init init_smp_mappings(void) -{ - unsigned long apic_phys; - - if (smp_found_config) { - apic_phys = mp_lapic_addr; - } else { - /* - * set up a fake all zeroes page to simulate the - * local APIC and another one for the IO-APIC. We - * could use the real zero-page, but it's safer - * this way if some buggy code writes to this page ... - */ - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } - set_fixmap(FIX_APIC_BASE, apic_phys); - dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); - -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - } else { - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap(idx,ioapic_phys); - dprintk("mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif -} - /* * TSC synchronization. * @@ -990,20 +282,14 @@ static void __init synchronize_tsc_bp (void) } sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!(cpu_online_map & (1 << i))) - continue; - + for (i = 0; i < smp_num_cpus; i++) { t0 = tsc_values[i]; sum += t0; } avg = div64(sum, smp_num_cpus); sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!(cpu_online_map & (1 << i))) - continue; - + for (i = 0; i < smp_num_cpus; i++) { delta = tsc_values[i] - avg; if (delta < 0) delta = -delta; @@ -1059,15 +345,20 @@ extern void calibrate_delay(void); void __init smp_callin(void) { - int cpuid; + int cpuid, phys_id; unsigned long timeout; /* * (This works even if the APIC is not enabled.) */ - cpuid = GET_APIC_ID(apic_read(APIC_ID)); - - dprintk("CPU#%d waiting for CALLOUT\n", cpuid); + phys_id = GET_APIC_ID(apic_read(APIC_ID)); + cpuid = current->processor; + if (test_and_set_bit(cpuid, &cpu_online_map)) { + printk("huh, phys CPU#%d, CPU#%d already present??\n", + phys_id, cpuid); + BUG(); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); /* * STARTUP IPIs are fragile beasts as they might sometimes @@ -1102,7 +393,7 @@ void __init smp_callin(void) * boards) */ - dprintk("CALLIN, before setup_local_APIC().\n"); + Dprintk("CALLIN, before setup_local_APIC().\n"); setup_local_APIC(); sti(); @@ -1117,7 +408,7 @@ void __init smp_callin(void) * Get our bogomips. */ calibrate_delay(); - dprintk("Stack at about %p\n",&cpuid); + Dprintk("Stack at about %p\n",&cpuid); /* * Save our processor parameters @@ -1133,7 +424,7 @@ void __init smp_callin(void) * Synchronize the TSC with the BP */ if (cpu_has_tsc) - synchronize_tsc_ap (); + synchronize_tsc_ap(); } int cpucount = 0; @@ -1198,21 +489,21 @@ static int __init fork_by_hand(void) return do_fork(CLONE_VM|CLONE_PID, 0, ®s); } -static void __init do_boot_cpu(int i) +static void __init do_boot_cpu (int apicid) { unsigned long cfg; struct task_struct *idle; unsigned long send_status, accept_status; - int timeout, num_starts, j; + int timeout, num_starts, j, cpu; unsigned long start_eip; - cpucount++; + cpu = ++cpucount; /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ if (fork_by_hand() < 0) - panic("failed fork for CPU %d", i); + panic("failed fork for CPU %d", cpu); /* * We remove it from the pidhash and the runqueue @@ -1220,23 +511,23 @@ static void __init do_boot_cpu(int i) */ idle = init_task.prev_task; if (!idle) - panic("No idle process for CPU %d", i); + panic("No idle process for CPU %d", cpu); - idle->processor = i; - __cpu_logical_map[cpucount] = i; - cpu_number_map[i] = cpucount; + idle->processor = cpu; + x86_cpu_to_apicid[cpu] = apicid; + x86_apicid_to_cpu[apicid] = cpu; idle->has_cpu = 1; /* we schedule the first task manually */ idle->thread.eip = (unsigned long) start_secondary; del_from_runqueue(idle); unhash_process(idle); - init_tasks[cpucount] = idle; + init_tasks[cpu] = idle; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); /* So we see what's up */ - printk("Booting processor %d eip %lx\n", i, start_eip); + printk("Booting processor %d eip %lx\n", cpu, start_eip); stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); /* @@ -1244,21 +535,20 @@ static void __init do_boot_cpu(int i) * the targeted processor. */ - dprintk("Setting warm reset code and vector.\n"); + Dprintk("Setting warm reset code and vector.\n"); CMOS_WRITE(0xa, 0xf); local_flush_tlb(); - dprintk("1.\n"); + Dprintk("1.\n"); *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4; - dprintk("2.\n"); + Dprintk("2.\n"); *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; - dprintk("3.\n"); + Dprintk("3.\n"); /* * Be paranoid about clearing APIC errors. */ - - if (APIC_INTEGRATED(apic_version[i])) { + if (APIC_INTEGRATED(apic_version[apicid])) { apic_readaround(APIC_SPIV); apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); @@ -1274,7 +564,7 @@ static void __init do_boot_cpu(int i) * Starting actual IPI sequence... */ - dprintk("Asserting INIT.\n"); + Dprintk("Asserting INIT.\n"); /* * Turn INIT on @@ -1285,7 +575,7 @@ static void __init do_boot_cpu(int i) /* * Target chip */ - apic_write(APIC_ICR2, cfg | SET_APIC_DEST_FIELD(i)); + apic_write(APIC_ICR2, cfg | SET_APIC_DEST_FIELD(apicid)); /* * Send IPI @@ -1296,12 +586,12 @@ static void __init do_boot_cpu(int i) apic_write(APIC_ICR, cfg); udelay(200); - dprintk("Deasserting INIT.\n"); + Dprintk("Deasserting INIT.\n"); /* Target chip */ cfg = apic_read(APIC_ICR2); cfg &= 0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(apicid)); /* Send IPI */ cfg = apic_read(APIC_ICR); @@ -1316,8 +606,7 @@ static void __init do_boot_cpu(int i) * If we don't have an integrated APIC, don't * send the STARTUP IPIs. */ - - if (APIC_INTEGRATED(apic_version[i])) + if (APIC_INTEGRATED(apic_version[apicid])) num_starts = 2; else num_starts = 0; @@ -1325,13 +614,14 @@ static void __init do_boot_cpu(int i) /* * Run STARTUP IPI loop. */ + Dprintk("#startup loops: %d.\n", num_starts); for (j = 1; j <= num_starts; j++) { - dprintk("Sending STARTUP #%d.\n",j); + Dprintk("Sending STARTUP #%d.\n",j); apic_readaround(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - dprintk("After apic_write.\n"); + Dprintk("After apic_write.\n"); /* * STARTUP IPI @@ -1340,7 +630,7 @@ static void __init do_boot_cpu(int i) /* Target chip */ cfg = apic_read(APIC_ICR2); cfg &= 0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); + apic_write(APIC_ICR2, cfg | SET_APIC_DEST_FIELD(apicid)); /* Boot on the stack */ cfg = apic_read(APIC_ICR); @@ -1350,12 +640,12 @@ static void __init do_boot_cpu(int i) /* Kick the second */ apic_write(APIC_ICR, cfg); - dprintk("Startup point 1.\n"); + Dprintk("Startup point 1.\n"); - dprintk("Waiting for send to finish...\n"); + Dprintk("Waiting for send to finish...\n"); timeout = 0; do { - dprintk("+"); + Dprintk("+"); udelay(100); send_status = apic_read(APIC_ICR) & 0x1000; } while (send_status && (timeout++ < 1000)); @@ -1368,7 +658,7 @@ static void __init do_boot_cpu(int i) if (send_status || accept_status) break; } - dprintk("After Startup.\n"); + Dprintk("After Startup.\n"); if (send_status) printk("APIC never delivered???\n"); @@ -1379,24 +669,24 @@ static void __init do_boot_cpu(int i) /* * allow APs to start initializing. */ - dprintk("Before Callout %d.\n", i); - set_bit(i, &cpu_callout_map); - dprintk("After Callout %d.\n", i); + Dprintk("Before Callout %d.\n", cpu); + set_bit(cpu, &cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); /* * Wait 5s total for a response */ - for (timeout = 0; timeout < 50000; timeout++) { - if (test_bit(i, &cpu_callin_map)) + for (timeout = 0; timeout < 1000000000; timeout++) { + if (test_bit(cpu, &cpu_callin_map)) break; /* It has booted */ udelay(100); } - if (test_bit(i, &cpu_callin_map)) { + if (test_bit(cpu, &cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ - printk("OK.\n"); - printk("CPU%d: ", i); - print_cpu_info(&cpu_data[i]); + Dprintk("OK.\n"); + printk("CPU%d: ", cpu); + print_cpu_info(&cpu_data[cpu]); } else { if (*((volatile unsigned char *)phys_to_virt(8192)) == 0xA5) /* trampoline code not run */ @@ -1404,10 +694,10 @@ static void __init do_boot_cpu(int i) else printk("CPU booted but not responding.\n"); } - dprintk("CPU has booted.\n"); + Dprintk("CPU has booted.\n"); } else { - __cpu_logical_map[cpucount] = -1; - cpu_number_map[i] = -1; + x86_cpu_to_apicid[cpu] = -1; + x86_apicid_to_cpu[apicid] = -1; cpucount--; } @@ -1462,7 +752,7 @@ extern int prof_counter[NR_CPUS]; void __init smp_boot_cpus(void) { - int i; + int apicid, cpu; #ifdef CONFIG_MTRR /* Must be done before other processors booted */ @@ -1473,58 +763,63 @@ void __init smp_boot_cpus(void) * and the per-CPU profiling counter/multiplier */ - for (i = 0; i < NR_CPUS; i++) { - cpu_number_map[i] = -1; - prof_counter[i] = 1; - prof_old_multiplier[i] = 1; - prof_multiplier[i] = 1; + for (apicid = 0; apicid < NR_CPUS; apicid++) { + x86_apicid_to_cpu[apicid] = -1; + prof_counter[apicid] = 1; + prof_old_multiplier[apicid] = 1; + prof_multiplier[apicid] = 1; } /* * Setup boot CPU information */ - - smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */ - smp_tune_scheduling(); - printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data[boot_cpu_id]); + smp_store_cpu_info(0); /* Final full version of the data */ + printk("CPU%d: ", 0); + print_cpu_info(&cpu_data[0]); /* - * not necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * (and for the case when a non-SMP board boots an SMP kernel) + * We have the boot CPU online for sure. */ - cpu_present_map |= (1 << hard_smp_processor_id()); - - cpu_number_map[boot_cpu_id] = 0; - + set_bit(0, &cpu_online_map); + x86_apicid_to_cpu[boot_cpu_id] = 0; + x86_cpu_to_apicid[0] = boot_cpu_id; + global_irq_holder = 0; + current->processor = 0; init_idle(); + smp_tune_scheduling(); /* * If we couldnt find an SMP configuration at boot time, * get out of here now! */ - if (!smp_found_config) { printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n"); #ifndef CONFIG_VISWS io_apic_irqs = 0; #endif - cpu_online_map = cpu_present_map; + cpu_online_map = phys_cpu_present_map = 1; smp_num_cpus = 1; goto smp_done; } /* - * If SMP should be disabled, then really disable it! + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. */ + if (!test_bit(boot_cpu_id, &phys_cpu_present_map)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_id); + phys_cpu_present_map |= (1 << hard_smp_processor_id()); + } + /* + * If SMP should be disabled, then really disable it! + */ if (!max_cpus) { smp_found_config = 0; printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); } -#ifdef SMP_DEBUG { int reg; @@ -1536,11 +831,11 @@ void __init smp_boot_cpus(void) */ reg = apic_read(APIC_LVR); - dprintk("Getting VERSION: %x\n", reg); + Dprintk("Getting VERSION: %x\n", reg); apic_write(APIC_LVR, 0); reg = apic_read(APIC_LVR); - dprintk("Getting VERSION: %x\n", reg); + Dprintk("Getting VERSION: %x\n", reg); /* * The two version reads above should print the same @@ -1553,14 +848,12 @@ void __init smp_boot_cpus(void) * compatibility mode, but most boxes are anymore. */ - reg = apic_read(APIC_LVT0); - dprintk("Getting LVT0: %x\n", reg); + Dprintk("Getting LVT0: %x\n", reg); reg = apic_read(APIC_LVT1); - dprintk("Getting LVT1: %x\n", reg); + Dprintk("Getting LVT1: %x\n", reg); } -#endif setup_local_APIC(); @@ -1570,42 +863,33 @@ void __init smp_boot_cpus(void) /* * Now scan the CPU present map and fire up the other CPUs. */ + Dprintk("CPU present map: %lx\n", phys_cpu_present_map); - /* - * Add all detected CPUs. (later on we can down individual - * CPUs which will change cpu_online_map but not necessarily - * cpu_present_map. We are pretty much ready for hot-swap CPUs.) - */ - cpu_online_map = cpu_present_map; - mb(); - - dprintk("CPU map: %lx\n", cpu_present_map); - - for (i = 0; i < NR_CPUS; i++) { + for (apicid = 0; apicid < NR_CPUS; apicid++) { /* * Don't even attempt to start the boot CPU! */ - if (i == boot_cpu_id) + if (apicid == boot_cpu_id) continue; - if ((cpu_online_map & (1 << i)) - && (max_cpus < 0 || max_cpus > cpucount+1)) { - do_boot_cpu(i); - } + if (!(phys_cpu_present_map & (1 << apicid))) + continue; + if ((max_cpus >= 0) && (max_cpus < cpucount+1)) + continue; + + do_boot_cpu(apicid); /* * Make sure we unmap all failed CPUs */ - if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) { - printk("CPU #%d not responding - cannot use it.\n",i); - cpu_online_map &= ~(1 << i); - } + if ((x86_apicid_to_cpu[apicid] == -1) && + (phys_cpu_present_map & (1 << apicid))) + printk("phys CPU #%d not responding - cannot use it.\n",apicid); } /* * Cleanup possible dangling ends... */ - #ifndef CONFIG_VISWS { /* @@ -1627,27 +911,25 @@ void __init smp_boot_cpus(void) * Allow the user to impress friends. */ - dprintk("Before bogomips.\n"); + Dprintk("Before bogomips.\n"); if (!cpucount) { printk(KERN_ERR "Error: only one processor found.\n"); - cpu_online_map = (1<<hard_smp_processor_id()); } else { unsigned long bogosum = 0; - for(i = 0; i < 32; i++) - if (cpu_online_map&(1<<i)) - bogosum+=cpu_data[i].loops_per_sec; + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online_map & (1<<cpu)) + bogosum += cpu_data[cpu].loops_per_sec; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, (bogosum+2500)/500000, ((bogosum+2500)/5000)%100); - dprintk("Before bogocount - setting activated=1.\n"); - smp_activated = 1; + Dprintk("Before bogocount - setting activated=1.\n"); } smp_num_cpus = cpucount + 1; if (smp_b_stepping) printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); - dprintk("Boot done.\n"); + Dprintk("Boot done.\n"); cache_APIC_registers(); #ifndef CONFIG_VISWS @@ -1661,12 +943,6 @@ void __init smp_boot_cpus(void) smp_done: /* - * now we know the other CPUs have fired off and we know our - * APIC ID, so we can go init the TSS and stuff: - */ - cpu_init(); - - /* * Set up all local APIC timers in the system: */ setup_APIC_clocks(); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 903dcf15d..84e20b225 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -42,12 +42,14 @@ #include <linux/init.h> #include <linux/smp.h> -#include <asm/processor.h> -#include <asm/uaccess.h> #include <asm/io.h> +#include <asm/smp.h> #include <asm/irq.h> -#include <asm/delay.h> #include <asm/msr.h> +#include <asm/delay.h> +#include <asm/mpspec.h> +#include <asm/uaccess.h> +#include <asm/processor.h> #include <linux/mc146818rtc.h> #include <linux/timex.h> @@ -368,7 +370,7 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg * profiling, except when we simulate SMP mode on a uniprocessor * system, in that case we have to call the local interrupt handler. */ -#ifndef __SMP__ +#ifndef CONFIG_X86_LOCAL_APIC if (!user_mode(regs)) x86_do_profile(regs->eip); #else diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 063e9fefb..17cac5019 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -138,7 +138,7 @@ static void show_registers(struct pt_regs *regs) unsigned short ss; unsigned long *stack, addr, module_start, module_end; - esp = (unsigned long) (1+regs); + esp = (unsigned long) (®s->esp); ss = __KERNEL_DS; if (regs->xcs & 3) { in_kernel = 0; @@ -337,7 +337,7 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) atomic_t nmi_counter[NR_CPUS]; -#if CONFIG_SMP +#if CONFIG_X86_IO_APIC int nmi_watchdog = 1; @@ -388,7 +388,12 @@ inline void nmi_watchdog_tick(struct pt_regs * regs) alert_counter[cpu]++; if (alert_counter[cpu] == 5*HZ) { spin_lock(&nmi_print_lock); - console_lock.lock = 0; // we are in trouble anyway + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + spin_trylock(&console_lock); + spin_unlock(&console_lock); printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); show_registers(regs); printk("console shuts up ...\n"); @@ -409,7 +414,7 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) atomic_inc(nmi_counter+smp_processor_id()); if (!(reason & 0xc0)) { -#if CONFIG_SMP +#if CONFIG_X86_IO_APIC /* * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. @@ -600,7 +605,10 @@ void __init trap_init_f00f_bug(void) pte = pte_offset(pmd, page); __free_page(pte_page(*pte)); *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO); - local_flush_tlb(); + /* + * Not that any PGE-capable kernel should have the f00f bug ... + */ + __flush_tlb_all(); /* * "idt" is magic - it overlaps the idt_descr @@ -806,13 +814,9 @@ void __init trap_init(void) set_call_gate(&default_ldt[4],lcall27); /* - * on SMP we do not yet know which CPU is on which TSS, - * so we delay this until smp_init(). (the CPU is already - * in a reasonable state, otherwise we wouldnt have gotten so far :) + * Should be a barrier for any external CPU state. */ -#ifndef __SMP__ cpu_init(); -#endif #ifdef CONFIG_X86_VISWS_APIC superio_init(); diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 618b36544..21c9cadff 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -16,6 +16,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/interrupt.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -76,7 +77,7 @@ bad_area: return 0; } -static inline void handle_wp_test (void) +static void __init handle_wp_test (void) { const unsigned long vaddr = PAGE_OFFSET; pgd_t *pgd; @@ -91,7 +92,7 @@ static inline void handle_wp_test (void) pmd = pmd_offset(pgd, vaddr); pte = pte_offset(pmd, vaddr); *pte = mk_pte_phys(0, PAGE_KERNEL); - local_flush_tlb(); + __flush_tlb_all(); boot_cpu_data.wp_works_ok = 1; /* @@ -123,6 +124,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long page; unsigned long fixup; int write; + int si_code = SEGV_MAPERR; /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); @@ -164,6 +166,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) */ good_area: write = 0; + si_code = SEGV_ACCERR; + switch (error_code & 3) { default: /* 3: write, present */ #ifdef TEST_VERIFY_AREA @@ -216,10 +220,14 @@ bad_area: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { + struct siginfo si; tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - force_sig(SIGSEGV, tsk); + si.si_signo = SIGSEGV; + si.si_code = si_code; + si.si_addr = (void*) address; + force_sig_info(SIGSEGV, &si, tsk); return; } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index b99daee84..b20ddc2d5 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -34,6 +34,7 @@ #include <asm/dma.h> #include <asm/fixmap.h> #include <asm/e820.h> +#include <asm/apic.h> unsigned long highstart_pfn, highend_pfn; static unsigned long totalram_pages = 0; @@ -194,8 +195,6 @@ void __init kmap_init(void) kmap_pte = kmap_get_fixmap_pte(kmap_vstart); kmap_prot = PAGE_KERNEL; - if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) - pgprot_val(kmap_prot) |= _PAGE_GLOBAL; } #endif @@ -239,7 +238,8 @@ void show_mem(void) extern char _text, _etext, _edata, __bss_start, _end; extern char __init_begin, __init_end; -static void set_pte_phys (unsigned long vaddr, unsigned long phys) +static inline void set_pte_phys (unsigned long vaddr, + unsigned long phys, pgprot_t flags) { pgprot_t prot; pgd_t *pgd; @@ -249,26 +249,25 @@ static void set_pte_phys (unsigned long vaddr, unsigned long phys) pgd = swapper_pg_dir + __pgd_offset(vaddr); pmd = pmd_offset(pgd, vaddr); pte = pte_offset(pmd, vaddr); - prot = PAGE_KERNEL; - if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) - pgprot_val(prot) |= _PAGE_GLOBAL; + pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); set_pte(pte, mk_pte_phys(phys, prot)); /* * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) */ __flush_tlb_one(vaddr); } -void set_fixmap (enum fixed_addresses idx, unsigned long phys) +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { unsigned long address = __fix_to_virt(idx); if (idx >= __end_of_fixed_addresses) { - printk("Invalid set_fixmap\n"); + printk("Invalid __set_fixmap\n"); return; } - set_pte_phys(address,phys); + set_pte_phys(address, phys, flags); } static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) @@ -439,10 +438,10 @@ void __init paging_init(void) set_in_cr4(X86_CR4_PAE); #endif - __flush_tlb(); + __flush_tlb_all(); -#ifdef __SMP__ - init_smp_mappings(); +#ifdef CONFIG_X86_LOCAL_APIC + init_apic_mappings(); #endif #ifdef CONFIG_HIGHMEM diff --git a/arch/i386/vmlinux.lds b/arch/i386/vmlinux.lds index 9624cae47..5790bb5d4 100644 --- a/arch/i386/vmlinux.lds +++ b/arch/i386/vmlinux.lds @@ -64,6 +64,12 @@ SECTIONS } _end = . ; + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + } + /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } |