summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile37
-rw-r--r--kernel/dma.c8
-rw-r--r--kernel/exec_domain.c21
-rw-r--r--kernel/exit.c293
-rw-r--r--kernel/fork.c289
-rw-r--r--kernel/info.c16
-rw-r--r--kernel/itimer.c88
-rw-r--r--kernel/ksyms.c281
-rw-r--r--kernel/ksyms.ver194
-rw-r--r--kernel/module.c365
-rw-r--r--kernel/panic.c32
-rw-r--r--kernel/printk.c38
-rw-r--r--kernel/resource.c9
-rw-r--r--kernel/sched.c1492
-rw-r--r--kernel/signal.c97
-rw-r--r--kernel/softirq.c15
-rw-r--r--kernel/sys.c606
-rw-r--r--kernel/sysctl.c922
-rw-r--r--kernel/time.c493
19 files changed, 3704 insertions, 1592 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index fd73ad5f0..9586d067f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,40 +9,17 @@
.S.s:
$(CPP) -traditional $< -o $*.s
-.c.s:
- $(CC) $(CFLAGS) -S $<
-.s.o:
- $(AS) -o $*.o $<
-.c.o:
- $(CC) $(CFLAGS) -c $<
-OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
+O_TARGET := kernel.o
+O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
module.o exit.o signal.o itimer.o info.o time.o softirq.o \
- resource.o
+ resource.o sysctl.o
-SYMTAB_OBJS = ksyms.o
-
-all: kernel.o
-
-include ../versions.mk
+ifeq ($(CONFIG_MODULES),y)
+OX_OBJS = ksyms.o
+endif
-kernel.o: $(SYMTAB_OBJS) $(OBJS)
- $(LD) -r -o kernel.o $(SYMTAB_OBJS) $(OBJS)
- sync
+include $(TOPDIR)/Rules.make
sched.o: sched.c
$(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
-
-dep:
- $(CPP) -M *.c > .depend
-
-dummy:
-modules:
-
-#
-# include a dependency file if one exists
-#
-ifeq (.depend,$(wildcard .depend))
-include .depend
-endif
-
diff --git a/kernel/dma.c b/kernel/dma.c
index 94b121653..0f13e6627 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -40,7 +40,7 @@
struct dma_chan {
int lock;
- char *device_id;
+ const char *device_id;
};
static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
@@ -69,12 +69,12 @@ int get_dma_list(char *buf)
} /* get_dma_list */
-int request_dma(unsigned int dmanr, char * device_id)
+int request_dma(unsigned int dmanr, const char * device_id)
{
if (dmanr >= MAX_DMA_CHANNELS)
return -EINVAL;
- if (xchg_u32(&dma_chan_busy[dmanr].lock, 1) != 0)
+ if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
return -EBUSY;
dma_chan_busy[dmanr].device_id = device_id;
@@ -91,7 +91,7 @@ void free_dma(unsigned int dmanr)
return;
}
- if (xchg_u32(&dma_chan_busy[dmanr].lock, 0) == 0) {
+ if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
printk("Trying to free free DMA%d\n", dmanr);
return;
}
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 7f0114a46..9a202359a 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -28,6 +28,27 @@ static struct exec_domain *exec_domains = &default_exec_domain;
static asmlinkage void no_lcall7(struct pt_regs * regs)
{
+
+ /*
+ * This may have been a static linked SVr4 binary, so we would have the
+ * personality set incorrectly. Check to see whether SVr4 is available,
+ * and use it, otherwise give the user a SEGV.
+ */
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)--;
+
+ current->personality = PER_SVR4;
+ current->exec_domain = lookup_exec_domain(current->personality);
+
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)++;
+
+ if (current->exec_domain && current->exec_domain->handler
+ && current->exec_domain->handler != no_lcall7) {
+ current->exec_domain->handler(regs);
+ return;
+ }
+
send_sig(SIGSEGV, current, 1);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 59c0b075b..d2fdbdc4a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -4,7 +4,7 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#define DEBUG_PROC_TREE
+#undef DEBUG_PROC_TREE
#include <linux/wait.h>
#include <linux/errno.h>
@@ -15,56 +15,85 @@
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/malloc.h>
+#include <linux/interrupt.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
-#include <asm/segment.h>
extern void sem_exit (void);
+extern void acct_process (long exitcode);
+extern void kerneld_exit(void);
int getrusage(struct task_struct *, int, struct rusage *);
-static int generate(unsigned long sig, struct task_struct * p)
+static inline void generate(unsigned long sig, struct task_struct * p)
{
unsigned long mask = 1 << (sig-1);
- struct sigaction * sa = sig + p->sigaction - 1;
+ struct sigaction * sa = sig + p->sig->action - 1;
- /* always generate signals for traced processes ??? */
- if (p->flags & PF_PTRACED) {
- p->signal |= mask;
- return 1;
+ /*
+ * Optimize away the signal, if it's a signal that can
+ * be handled immediately (ie non-blocked and untraced)
+ * and that is ignored (either explicitly or by default)
+ */
+ if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) {
+ /* don't bother with ignored signals (but SIGCHLD is special) */
+ if (sa->sa_handler == SIG_IGN && sig != SIGCHLD)
+ return;
+ /* some signals are ignored by default.. (but SIGCONT already did its deed) */
+ if ((sa->sa_handler == SIG_DFL) &&
+ (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG))
+ return;
}
- /* don't bother with ignored signals (but SIGCHLD is special) */
- if (sa->sa_handler == SIG_IGN && sig != SIGCHLD)
- return 0;
- /* some signals are ignored by default.. (but SIGCONT already did its deed) */
- if ((sa->sa_handler == SIG_DFL) &&
- (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH))
- return 0;
p->signal |= mask;
- return 1;
+ if (p->state == TASK_INTERRUPTIBLE && (p->signal & ~p->blocked))
+ wake_up_process(p);
}
+/*
+ * Force a signal that the process can't ignore: if necessary
+ * we unblock the signal and change any SIG_IGN to SIG_DFL.
+ */
+void force_sig(unsigned long sig, struct task_struct * p)
+{
+ sig--;
+ if (p->sig) {
+ unsigned long mask = 1UL << sig;
+ struct sigaction *sa = p->sig->action + sig;
+ p->signal |= mask;
+ p->blocked &= ~mask;
+ if (sa->sa_handler == SIG_IGN)
+ sa->sa_handler = SIG_DFL;
+ if (p->state == TASK_INTERRUPTIBLE)
+ wake_up_process(p);
+ }
+}
+
+
int send_sig(unsigned long sig,struct task_struct * p,int priv)
{
if (!p || sig > 32)
return -EINVAL;
if (!priv && ((sig != SIGCONT) || (current->session != p->session)) &&
- (current->euid != p->euid) && (current->euid != p->uid) && !suser())
+ (current->euid ^ p->suid) && (current->euid ^ p->uid) &&
+ (current->uid ^ p->suid) && (current->uid ^ p->uid) &&
+ !suser())
return -EPERM;
if (!sig)
return 0;
/*
* Forget it if the process is already zombie'd.
*/
- if (p->state == TASK_ZOMBIE)
+ if (!p->sig)
return 0;
if ((sig == SIGKILL) || (sig == SIGCONT)) {
if (p->state == TASK_STOPPED)
- p->state = TASK_RUNNING;
+ wake_up_process(p);
p->exit_code = 0;
p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
(1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
}
- /* Depends on order SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU */
- if ((sig >= SIGSTOP) && (sig <= SIGTTOU))
+ if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
p->signal &= ~(1<<(SIGCONT-1));
/* Actually generate the signal */
generate(sig,p);
@@ -73,7 +102,7 @@ int send_sig(unsigned long sig,struct task_struct * p,int priv)
void notify_parent(struct task_struct * tsk)
{
- if (tsk->p_pptr == task[1])
+ if (tsk->p_pptr == task[smp_num_cpus]) /* Init */
tsk->exit_signal = SIGCHLD;
send_sig(tsk->exit_signal, tsk->p_pptr, 1);
wake_up_interruptible(&tsk->p_pptr->wait_chldexit);
@@ -94,10 +123,14 @@ void release(struct task_struct * p)
nr_tasks--;
task[i] = NULL;
REMOVE_LINKS(p);
+ release_thread(p);
if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
- free_page(p->kernel_stack_page);
- free_page((long) p);
+ free_kernel_stack(p->kernel_stack_page);
+ current->cmin_flt += p->min_flt + p->cmin_flt;
+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
+ current->cnswap += p->nswap + p->cnswap;
+ kfree(p);
return;
}
panic("trying to release non-existent task");
@@ -313,12 +346,12 @@ asmlinkage int sys_kill(int pid,int sig)
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
-int is_orphaned_pgrp(int pgrp)
+static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
struct task_struct *p;
for_each_task(p) {
- if ((p->pgrp != pgrp) ||
+ if ((p == ignored_task) || (p->pgrp != pgrp) ||
(p->state == TASK_ZOMBIE) ||
(p->p_pptr->pid == 1))
continue;
@@ -329,7 +362,12 @@ int is_orphaned_pgrp(int pgrp)
return(1); /* (sighing) "Often!" */
}
-static int has_stopped_jobs(int pgrp)
+int is_orphaned_pgrp(int pgrp)
+{
+ return will_become_orphaned_pgrp(pgrp, 0);
+}
+
+static inline int has_stopped_jobs(int pgrp)
{
struct task_struct * p;
@@ -342,65 +380,140 @@ static int has_stopped_jobs(int pgrp)
return(0);
}
-static void forget_original_parent(struct task_struct * father)
+static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct * p;
for_each_task(p) {
if (p->p_opptr == father)
- if (task[1])
- p->p_opptr = task[1];
+ if (task[smp_num_cpus]) /* init */
+ p->p_opptr = task[smp_num_cpus];
else
p->p_opptr = task[0];
}
}
-static void exit_files(void)
+static inline void close_files(struct files_struct * files)
{
- int i;
+ int i, j;
+
+ j = 0;
+ for (;;) {
+ unsigned long set = files->open_fds.fds_bits[j];
+ i = j * __NFDBITS;
+ j++;
+ if (i >= NR_OPEN)
+ break;
+ while (set) {
+ if (set & 1)
+ close_fp(files->fd[i]);
+ i++;
+ set >>= 1;
+ }
+ }
+}
- for (i=0 ; i<NR_OPEN ; i++)
- if (current->files->fd[i])
- sys_close(i);
+static inline void __exit_files(struct task_struct *tsk)
+{
+ struct files_struct * files = tsk->files;
+
+ if (files) {
+ tsk->files = NULL;
+ if (!--files->count) {
+ close_files(files);
+ kfree(files);
+ }
+ }
}
-static void exit_fs(void)
+void exit_files(struct task_struct *tsk)
{
- iput(current->fs->pwd);
- current->fs->pwd = NULL;
- iput(current->fs->root);
- current->fs->root = NULL;
+ __exit_files(tsk);
}
-NORET_TYPE void do_exit(long code)
+static inline void __exit_fs(struct task_struct *tsk)
{
- struct task_struct *p;
+ struct fs_struct * fs = tsk->fs;
+
+ if (fs) {
+ tsk->fs = NULL;
+ if (!--fs->count) {
+ iput(fs->root);
+ iput(fs->pwd);
+ kfree(fs);
+ }
+ }
+}
- if (intr_count) {
- printk("Aiee, killing interrupt handler\n");
- intr_count = 0;
+void exit_fs(struct task_struct *tsk)
+{
+ __exit_fs(tsk);
+}
+
+static inline void __exit_sighand(struct task_struct *tsk)
+{
+ struct signal_struct * sig = tsk->sig;
+
+ if (sig) {
+ tsk->sig = NULL;
+ if (!--sig->count) {
+ kfree(sig);
+ }
}
-fake_volatile:
- current->flags |= PF_EXITING;
- sem_exit();
- exit_mmap(current);
- free_page_tables(current);
- exit_files();
- exit_fs();
- exit_thread();
+}
+
+void exit_sighand(struct task_struct *tsk)
+{
+ __exit_sighand(tsk);
+}
+
+static inline void __exit_mm(struct task_struct * tsk)
+{
+ struct mm_struct * mm = tsk->mm;
+
+ /* Set us up to use the kernel mm state */
+ if (mm != &init_mm) {
+ flush_cache_mm(mm);
+ flush_tlb_mm(mm);
+ tsk->mm = &init_mm;
+ tsk->swappable = 0;
+ SET_PAGE_DIR(tsk, swapper_pg_dir);
+
+ /* free the old state - not used any more */
+ if (!--mm->count) {
+ exit_mmap(mm);
+ free_page_tables(mm);
+ kfree(mm);
+ }
+ }
+}
+
+void exit_mm(struct task_struct *tsk)
+{
+ __exit_mm(tsk);
+}
+
+/*
+ * Send signals to all our closest relatives so that they know
+ * to properly mourn us..
+ */
+static void exit_notify(void)
+{
+ struct task_struct * p;
+
forget_original_parent(current);
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
- * jobs, send them a SIGUP and then a SIGCONT. (POSIX 3.2.2.2)
+ * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*
* Case i: Our father is in a different pgrp than we are
* and we were the only connection outside, so our pgrp
* is about to become orphaned.
- */
+ */
if ((current->p_pptr->pgrp != current->pgrp) &&
(current->p_pptr->session == current->session) &&
- is_orphaned_pgrp(current->pgrp) &&
+ will_become_orphaned_pgrp(current->pgrp, current) &&
has_stopped_jobs(current->pgrp)) {
kill_pg(current->pgrp,SIGHUP,1);
kill_pg(current->pgrp,SIGCONT,1);
@@ -420,8 +533,8 @@ fake_volatile:
current->p_cptr = p->p_osptr;
p->p_ysptr = NULL;
p->flags &= ~(PF_PTRACED|PF_TRACESYS);
- if (task[1] && task[1] != current)
- p->p_pptr = task[1];
+ if (task[smp_num_cpus] && task[smp_num_cpus] != current) /* init */
+ p->p_pptr = task[smp_num_cpus];
else
p->p_pptr = task[0];
p->p_osptr = p->p_pptr->p_cptr;
@@ -445,11 +558,28 @@ fake_volatile:
}
if (current->leader)
disassociate_ctty(1);
- if (last_task_used_math == current)
- last_task_used_math = NULL;
+}
+
+NORET_TYPE void do_exit(long code)
+{
+ if (intr_count) {
+ printk("Aiee, killing interrupt handler\n");
+ intr_count = 0;
+ }
+fake_volatile:
+ acct_process(code);
+ current->flags |= PF_EXITING;
+ del_timer(&current->real_timer);
+ sem_exit();
+ kerneld_exit();
+ __exit_mm(current);
+ __exit_files(current);
+ __exit_fs(current);
+ __exit_sighand(current);
+ exit_thread();
current->state = TASK_ZOMBIE;
current->exit_code = code;
- current->mm->rss = 0;
+ exit_notify();
#ifdef DEBUG_PROC_TREE
audit_ptree();
#endif
@@ -486,10 +616,18 @@ asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct
struct task_struct *p;
if (stat_addr) {
- flag = verify_area(VERIFY_WRITE, stat_addr, 4);
+ flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr));
+ if (flag)
+ return flag;
+ }
+ if (ru) {
+ flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru));
if (flag)
return flag;
}
+ if (options & ~(WNOHANG|WUNTRACED|__WCLONE))
+ return -EINVAL;
+
add_wait_queue(&current->wait_chldexit,&wait);
repeat:
flag=0;
@@ -514,24 +652,22 @@ repeat:
continue;
if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
continue;
+ if (ru != NULL)
+ getrusage(p, RUSAGE_BOTH, ru);
if (stat_addr)
- put_fs_long((p->exit_code << 8) | 0x7f,
+ put_user((p->exit_code << 8) | 0x7f,
stat_addr);
p->exit_code = 0;
- if (ru != NULL)
- getrusage(p, RUSAGE_BOTH, ru);
retval = p->pid;
goto end_wait4;
case TASK_ZOMBIE:
current->cutime += p->utime + p->cutime;
current->cstime += p->stime + p->cstime;
- current->mm->cmin_flt += p->mm->min_flt + p->mm->cmin_flt;
- current->mm->cmaj_flt += p->mm->maj_flt + p->mm->cmaj_flt;
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
- flag = p->pid;
if (stat_addr)
- put_fs_long(p->exit_code, stat_addr);
+ put_user(p->exit_code, stat_addr);
+ retval = p->pid;
if (p->p_opptr != p->p_pptr) {
REMOVE_LINKS(p);
p->p_pptr = p->p_opptr;
@@ -542,7 +678,6 @@ repeat:
#ifdef DEBUG_PROC_TREE
audit_ptree();
#endif
- retval = flag;
goto end_wait4;
default:
continue;
@@ -552,12 +687,11 @@ repeat:
retval = 0;
if (options & WNOHANG)
goto end_wait4;
- current->state=TASK_INTERRUPTIBLE;
- schedule();
- current->signal &= ~(1<<(SIGCHLD-1));
retval = -ERESTARTSYS;
if (current->signal & ~current->blocked)
goto end_wait4;
+ current->state=TASK_INTERRUPTIBLE;
+ schedule();
goto repeat;
}
retval = -ECHILD;
@@ -566,6 +700,8 @@ end_wait4:
return retval;
}
+#ifndef __alpha__
+
/*
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
@@ -574,3 +710,14 @@ asmlinkage int sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
+
+#endif
+
+/*
+ * sys_wait() has been added for compatibility. wait() should be
+ * implemented by calling sys_wait4() from libc.a.
+ */
+asmlinkage int sys_wait(unsigned int * stat_addr)
+{
+ return sys_wait4(-1, stat_addr, 0, NULL);
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index 104ffea96..864bc52e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -15,87 +15,81 @@
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/malloc.h>
-#include <linux/ldt.h>
+#include <linux/smp.h>
-#include <asm/segment.h>
#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
int nr_tasks=1;
int nr_running=1;
-long last_pid=0;
+unsigned long int total_forks=0; /* Handle normal Linux uptimes. */
+int last_pid=0;
-static int find_empty_process(void)
+static inline int find_empty_process(void)
{
- int free_task;
- int i, tasks_free;
- int this_user_tasks;
+ int i;
-repeat:
- if ((++last_pid) & 0xffff8000)
- last_pid=1;
- this_user_tasks = 0;
- tasks_free = 0;
- free_task = -EAGAIN;
- i = NR_TASKS;
- while (--i > 0) {
- if (!task[i]) {
- free_task = i;
- tasks_free++;
- continue;
- }
- if (task[i]->uid == current->uid)
- this_user_tasks++;
- if (task[i]->pid == last_pid || task[i]->pgrp == last_pid ||
- task[i]->session == last_pid)
- goto repeat;
- }
- if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT ||
- this_user_tasks > current->rlim[RLIMIT_NPROC].rlim_cur)
+ if (nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) {
if (current->uid)
return -EAGAIN;
- return free_task;
-}
+ }
+ if (current->uid) {
+ long max_tasks = current->rlim[RLIMIT_NPROC].rlim_cur;
-static struct file * copy_fd(struct file * old_file)
-{
- struct file * new_file = get_empty_filp();
- int error;
-
- if (new_file) {
- memcpy(new_file,old_file,sizeof(struct file));
- new_file->f_count = 1;
- if (new_file->f_inode)
- new_file->f_inode->i_count++;
- if (new_file->f_op && new_file->f_op->open) {
- error = new_file->f_op->open(new_file->f_inode,new_file);
- if (error) {
- iput(new_file->f_inode);
- new_file->f_count = 0;
- new_file = NULL;
+ max_tasks--; /* count the new process.. */
+ if (max_tasks < nr_tasks) {
+ struct task_struct *p;
+ for_each_task (p) {
+ if (p->uid == current->uid)
+ if (--max_tasks < 0)
+ return -EAGAIN;
}
}
}
- return new_file;
+ for (i = 0 ; i < NR_TASKS ; i++) {
+ if (!task[i])
+ return i;
+ }
+ return -EAGAIN;
+}
+
+static int get_pid(unsigned long flags)
+{
+ struct task_struct *p;
+
+ if (flags & CLONE_PID)
+ return current->pid;
+repeat:
+ if ((++last_pid) & 0xffff8000)
+ last_pid=1;
+ for_each_task (p) {
+ if (p->pid == last_pid ||
+ p->pgrp == last_pid ||
+ p->session == last_pid)
+ goto repeat;
+ }
+ return last_pid;
}
-static int dup_mmap(struct task_struct * tsk)
+static inline int dup_mmap(struct mm_struct * mm)
{
struct vm_area_struct * mpnt, **p, *tmp;
- tsk->mm->mmap = NULL;
- p = &tsk->mm->mmap;
+ mm->mmap = NULL;
+ p = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!tmp) {
- exit_mmap(tsk);
+ exit_mmap(mm);
return -ENOMEM;
}
*tmp = *mpnt;
- tmp->vm_task = tsk;
+ tmp->vm_flags &= ~VM_LOCKED;
+ tmp->vm_mm = mm;
tmp->vm_next = NULL;
if (tmp->vm_inode) {
tmp->vm_inode->i_count++;
@@ -104,59 +98,109 @@ static int dup_mmap(struct task_struct * tsk)
mpnt->vm_next_share = tmp;
tmp->vm_prev_share = mpnt;
}
+ if (copy_page_range(mm, current->mm, tmp)) {
+ exit_mmap(mm);
+ return -ENOMEM;
+ }
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
*p = tmp;
p = &tmp->vm_next;
}
- build_mmap_avl(tsk);
+ build_mmap_avl(mm);
return 0;
}
-/*
- * SHAREFD not yet implemented..
- */
-static void copy_files(unsigned long clone_flags, struct task_struct * p)
+static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
- int i;
- struct file * f;
-
- if (clone_flags & COPYFD) {
- for (i=0; i<NR_OPEN;i++)
- if ((f = p->files->fd[i]) != NULL)
- p->files->fd[i] = copy_fd(f);
- } else {
- for (i=0; i<NR_OPEN;i++)
- if ((f = p->files->fd[i]) != NULL)
- f->f_count++;
+ if (!(clone_flags & CLONE_VM)) {
+ struct mm_struct * mm = kmalloc(sizeof(*tsk->mm), GFP_KERNEL);
+ if (!mm)
+ return -1;
+ *mm = *current->mm;
+ mm->count = 1;
+ mm->def_flags = 0;
+ tsk->mm = mm;
+ tsk->min_flt = tsk->maj_flt = 0;
+ tsk->cmin_flt = tsk->cmaj_flt = 0;
+ tsk->nswap = tsk->cnswap = 0;
+ if (new_page_tables(tsk))
+ return -1;
+ if (dup_mmap(mm)) {
+ free_page_tables(mm);
+ return -1;
+ }
+ return 0;
}
+ SET_PAGE_DIR(tsk, current->mm->pgd);
+ current->mm->count++;
+ return 0;
}
-/*
- * CLONEVM not yet correctly implemented: needs to clone the mmap
- * instead of duplicating it..
- */
-static int copy_mm(unsigned long clone_flags, struct task_struct * p)
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
- if (clone_flags & COPYVM) {
- p->mm->min_flt = p->mm->maj_flt = 0;
- p->mm->cmin_flt = p->mm->cmaj_flt = 0;
- if (copy_page_tables(p))
- return 1;
- return dup_mmap(p);
- } else {
- if (clone_page_tables(p))
- return 1;
- return dup_mmap(p); /* wrong.. */
+ if (clone_flags & CLONE_FS) {
+ current->fs->count++;
+ return 0;
+ }
+ tsk->fs = kmalloc(sizeof(*tsk->fs), GFP_KERNEL);
+ if (!tsk->fs)
+ return -1;
+ tsk->fs->count = 1;
+ tsk->fs->umask = current->fs->umask;
+ if ((tsk->fs->root = current->fs->root))
+ tsk->fs->root->i_count++;
+ if ((tsk->fs->pwd = current->fs->pwd))
+ tsk->fs->pwd->i_count++;
+ return 0;
+}
+
+static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+ int i;
+ struct files_struct *oldf, *newf;
+ struct file **old_fds, **new_fds;
+
+ oldf = current->files;
+ if (clone_flags & CLONE_FILES) {
+ oldf->count++;
+ return 0;
+ }
+
+ newf = kmalloc(sizeof(*newf), GFP_KERNEL);
+ tsk->files = newf;
+ if (!newf)
+ return -1;
+
+ newf->count = 1;
+ newf->close_on_exec = oldf->close_on_exec;
+ newf->open_fds = oldf->open_fds;
+
+ old_fds = oldf->fd;
+ new_fds = newf->fd;
+ for (i = NR_OPEN; i != 0; i--) {
+ struct file * f = *old_fds;
+ old_fds++;
+ *new_fds = f;
+ new_fds++;
+ if (f)
+ f->f_count++;
}
+ return 0;
}
-static void copy_fs(unsigned long clone_flags, struct task_struct * p)
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
- if (current->fs->pwd)
- current->fs->pwd->i_count++;
- if (current->fs->root)
- current->fs->root->i_count++;
+ if (clone_flags & CLONE_SIGHAND) {
+ current->sig->count++;
+ return 0;
+ }
+ tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
+ if (!tsk->sig)
+ return -1;
+ tsk->sig->count = 1;
+ memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
+ return 0;
}
/*
@@ -167,17 +211,20 @@ static void copy_fs(unsigned long clone_flags, struct task_struct * p)
int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
{
int nr;
+ int error = -ENOMEM;
unsigned long new_stack;
struct task_struct *p;
- if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL)))
+ p = (struct task_struct *) kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
goto bad_fork;
- new_stack = get_free_page(GFP_KERNEL);
+ new_stack = alloc_kernel_stack();
if (!new_stack)
- goto bad_fork_free;
+ goto bad_fork_free_p;
+ error = -EAGAIN;
nr = find_empty_process();
if (nr < 0)
- goto bad_fork_free;
+ goto bad_fork_free_stack;
*p = *current;
@@ -187,47 +234,75 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
(*p->binfmt->use_count)++;
p->did_exec = 0;
+ p->swappable = 0;
p->kernel_stack_page = new_stack;
*(unsigned long *) p->kernel_stack_page = STACK_MAGIC;
p->state = TASK_UNINTERRUPTIBLE;
- p->flags &= ~(PF_PTRACED|PF_TRACESYS);
- p->pid = last_pid;
+ p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
+ p->flags |= PF_FORKNOEXEC;
+ p->pid = get_pid(clone_flags);
+ p->next_run = NULL;
+ p->prev_run = NULL;
p->p_pptr = p->p_opptr = current;
p->p_cptr = NULL;
+ init_waitqueue(&p->wait_chldexit);
p->signal = 0;
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
- p->leader = 0; /* process leadership doesn't inherit */
+ init_timer(&p->real_timer);
+ p->real_timer.data = (unsigned long) p;
+ p->leader = 0; /* session leadership doesn't inherit */
p->tty_old_pgrp = 0;
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
+#ifdef __SMP__
+ p->processor = NO_PROC_ID;
+ p->lock_depth = 1;
+#endif
p->start_time = jiffies;
- p->mm->swappable = 0; /* don't try to swap it out before it's set up */
task[nr] = p;
SET_LINKS(p);
nr_tasks++;
+ error = -ENOMEM;
/* copy all the process information */
- copy_thread(nr, clone_flags, usp, p, regs);
- if (copy_mm(clone_flags, p))
+ if (copy_files(clone_flags, p))
goto bad_fork_cleanup;
+ if (copy_fs(clone_flags, p))
+ goto bad_fork_cleanup_files;
+ if (copy_sighand(clone_flags, p))
+ goto bad_fork_cleanup_fs;
+ if (copy_mm(clone_flags, p))
+ goto bad_fork_cleanup_sighand;
+ copy_thread(nr, clone_flags, usp, p, regs);
p->semundo = NULL;
- copy_files(clone_flags, p);
- copy_fs(clone_flags, p);
/* ok, now we should be set up.. */
- p->mm->swappable = 1;
+ p->swappable = 1;
p->exit_signal = clone_flags & CSIGNAL;
p->counter = current->counter >> 1;
- p->state = TASK_RUNNING; /* do this last, just in case */
+ wake_up_process(p); /* do this last, just in case */
+ ++total_forks;
return p->pid;
+
+bad_fork_cleanup_sighand:
+ exit_sighand(p);
+bad_fork_cleanup_fs:
+ exit_fs(p);
+bad_fork_cleanup_files:
+ exit_files(p);
bad_fork_cleanup:
+ if (p->exec_domain && p->exec_domain->use_count)
+ (*p->exec_domain->use_count)--;
+ if (p->binfmt && p->binfmt->use_count)
+ (*p->binfmt->use_count)--;
task[nr] = NULL;
REMOVE_LINKS(p);
nr_tasks--;
-bad_fork_free:
- free_page(new_stack);
- free_page((long) p);
+bad_fork_free_stack:
+ free_kernel_stack(new_stack);
+bad_fork_free_p:
+ kfree(p);
bad_fork:
- return -EAGAIN;
+ return error;
}
diff --git a/kernel/info.c b/kernel/info.c
index c7b2b9a8c..20b6ad6ae 100644
--- a/kernel/info.c
+++ b/kernel/info.c
@@ -6,23 +6,19 @@
/* This implements the sysinfo() system call */
-#include <asm/segment.h>
-
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/unistd.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/uaccess.h>
asmlinkage int sys_sysinfo(struct sysinfo *info)
{
- int error;
struct sysinfo val;
- struct task_struct **p;
- error = verify_area(VERIFY_WRITE, info, sizeof(struct sysinfo));
- if (error)
- return error;
memset((char *)&val, 0, sizeof(struct sysinfo));
val.uptime = jiffies / HZ;
@@ -31,12 +27,12 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
- for (p = &LAST_TASK; p > &FIRST_TASK; p--)
- if (*p) val.procs++;
+ val.procs = nr_tasks-1;
si_meminfo(&val);
si_swapinfo(&val);
- memcpy_tofs(info, &val, sizeof(struct sysinfo));
+ if (copy_to_user(info, &val, sizeof(struct sysinfo)))
+ return -EFAULT;
return 0;
}
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 02f7b7ce8..efcc8351b 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -13,13 +13,28 @@
#include <linux/time.h>
#include <linux/mm.h>
-#include <asm/segment.h>
+#include <asm/uaccess.h>
+/*
+ * change timeval to jiffies, trying to avoid the
+ * most obvious overflows..
+ *
+ * The tv_*sec values are signed, but nothing seems to
+ * indicate whether we really should use them as signed values
+ * when doing itimers. POSIX doesn't mention this (but if
+ * alarm() uses itimers without checking, we have to use unsigned
+ * arithmetic).
+ */
static unsigned long tvtojiffies(struct timeval *value)
{
- return((unsigned long )value->tv_sec * HZ +
- (unsigned long )(value->tv_usec + (1000000 / HZ - 1)) /
- (1000000 / HZ));
+ unsigned long sec = (unsigned) value->tv_sec;
+ unsigned long usec = (unsigned) value->tv_usec;
+
+ if (sec > (ULONG_MAX / HZ))
+ return ULONG_MAX;
+ usec += 1000000 / HZ - 1;
+ usec /= 1000000 / HZ;
+ return HZ*sec+usec;
}
static void jiffiestotv(unsigned long jiffies, struct timeval *value)
@@ -29,14 +44,23 @@ static void jiffiestotv(unsigned long jiffies, struct timeval *value)
return;
}
-int _getitimer(int which, struct itimerval *value)
+static int _getitimer(int which, struct itimerval *value)
{
register unsigned long val, interval;
switch (which) {
case ITIMER_REAL:
- val = current->it_real_value;
interval = current->it_real_incr;
+ val = 0;
+ if (del_timer(&current->real_timer)) {
+ unsigned long now = jiffies;
+ val = current->real_timer.expires;
+ add_timer(&current->real_timer);
+ /* look out for negative/zero itimer.. */
+ if (val <= now)
+ val = now+1;
+ val -= now;
+ }
break;
case ITIMER_VIRTUAL:
val = current->it_virt_value;
@@ -51,7 +75,7 @@ int _getitimer(int which, struct itimerval *value)
}
jiffiestotv(val, &value->it_value);
jiffiestotv(interval, &value->it_interval);
- return(0);
+ return 0;
}
asmlinkage int sys_getitimer(int which, struct itimerval *value)
@@ -64,11 +88,24 @@ asmlinkage int sys_getitimer(int which, struct itimerval *value)
error = _getitimer(which, &get_buffer);
if (error)
return error;
- error = verify_area(VERIFY_WRITE, value, sizeof(struct itimerval));
- if (error)
- return error;
- memcpy_tofs(value, &get_buffer, sizeof(get_buffer));
- return 0;
+ return copy_to_user(value, &get_buffer, sizeof(get_buffer)) ? -EFAULT : 0;
+}
+
+void it_real_fn(unsigned long __data)
+{
+ struct task_struct * p = (struct task_struct *) __data;
+ unsigned long interval;
+
+ send_sig(SIGALRM, p, 1);
+ interval = p->it_real_incr;
+ if (interval) {
+ unsigned long timeout = jiffies + interval;
+ /* check for overflow */
+ if (timeout < interval)
+ timeout = ULONG_MAX;
+ p->real_timer.expires = timeout;
+ add_timer(&p->real_timer);
+ }
}
int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
@@ -82,13 +119,17 @@ int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
return k;
switch (which) {
case ITIMER_REAL:
- if (j) {
- j += 1+itimer_ticks;
- if (j < itimer_next)
- itimer_next = j;
- }
+ del_timer(&current->real_timer);
current->it_real_value = j;
current->it_real_incr = i;
+ if (!j)
+ break;
+ i = j + jiffies;
+ /* check for overflow.. */
+ if (i < j)
+ i = ULONG_MAX;
+ current->real_timer.expires = i;
+ add_timer(&current->real_timer);
break;
case ITIMER_VIRTUAL:
if (j)
@@ -117,20 +158,17 @@ asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerva
error = verify_area(VERIFY_READ, value, sizeof(*value));
if (error)
return error;
- memcpy_fromfs(&set_buffer, value, sizeof(set_buffer));
+ error = copy_from_user(&set_buffer, value, sizeof(set_buffer));
+ if (error)
+ return -EFAULT;
} else
memset((char *) &set_buffer, 0, sizeof(set_buffer));
- if (ovalue) {
- error = verify_area(VERIFY_WRITE, ovalue, sizeof(struct itimerval));
- if (error)
- return error;
- }
-
error = _setitimer(which, &set_buffer, ovalue ? &get_buffer : 0);
if (error || !ovalue)
return error;
- memcpy_tofs(ovalue, &get_buffer, sizeof(get_buffer));
+ if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+ error = -EFAULT;
return error;
}
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index ccb2f2b4c..a612201c0 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -9,14 +9,19 @@
* by Bjorn Ekwall <bj0rn@blox.se>
*/
-#include <linux/autoconf.h>
#include <linux/module.h>
+#include <linux/config.h>
#include <linux/kernel.h>
+#include <linux/smp.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
+#include <linux/cdrom.h>
+#include <linux/ucdrom.h>
#include <linux/sched.h>
+#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/malloc.h>
+#include <linux/vmalloc.h>
#include <linux/ptrace.h>
#include <linux/sys.h>
#include <linux/utsname.h>
@@ -32,75 +37,63 @@
#include <linux/locks.h>
#include <linux/string.h>
#include <linux/delay.h>
-#include <linux/config.h>
+#include <linux/sem.h>
+#include <linux/minix_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/random.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+#include <linux/hdreg.h>
+#include <linux/skbuff.h>
+#include <linux/genhd.h>
+#include <linux/swap.h>
+#include <linux/ctype.h>
-#ifdef CONFIG_NET
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#ifdef CONFIG_INET
-#include <linux/ip.h>
-#include <net/protocol.h>
-#include <net/arp.h>
-#include <net/tcp.h>
-#if defined(CONFIG_PPP) || defined(CONFIG_SLIP)
-#include "../drivers/net/slhc.h"
-#endif
-#endif
+extern unsigned char aux_device_present, kbd_read_mask;
+#ifdef __i386__
+ extern struct drive_info_struct drive_info;
#endif
+
#ifdef CONFIG_PCI
#include <linux/bios32.h>
#include <linux/pci.h>
#endif
-#if defined(CONFIG_MSDOS_FS) && !defined(CONFIG_UMSDOS_FS)
-#include <linux/msdos_fs.h>
+#if defined(CONFIG_PROC_FS)
+#include <linux/proc_fs.h>
+#endif
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
#endif
-
#include <asm/irq.h>
+#ifdef __SMP__
+#include <linux/smp.h>
+#endif
-extern char *floppy_track_buffer;
-
+extern char *get_options(char *str, int *ints);
extern void set_device_ro(int dev,int flag);
extern struct file_operations * get_blkfops(unsigned int);
-
-extern void *sys_call_table;
-
-#ifdef CONFIG_FTAPE
-extern char * ftape_big_buffer;
-#endif
+extern void blkdev_release(struct inode * inode);
-#ifdef CONFIG_SCSI
-#include "../drivers/scsi/scsi.h"
-#include "../drivers/scsi/hosts.h"
-#include "../drivers/scsi/constants.h"
-#endif
+extern void *sys_call_table;
extern int sys_tz;
extern int request_dma(unsigned int dmanr, char * deviceID);
extern void free_dma(unsigned int dmanr);
-extern int close_fp(struct file *filp);
-extern void (* iABI_hook)(struct pt_regs * regs);
-
struct symbol_table symbol_table = {
#include <linux/symtab_begin.h>
-#ifdef CONFIG_MODVERSIONS
- { (void *)1 /* Version version :-) */, "_Using_Versions" },
+#ifdef MODVERSIONS
+ { (void *)1 /* Version version :-) */,
+ SYMBOL_NAME_STR (Using_Versions) },
#endif
- /* stackable module support */
- X(rename_module_symbol),
- X(register_symtab),
- /* system info variables */
- /* These check that they aren't defines (0/1) */
-#ifndef EISA_bus__is_a_macro
- X(EISA_bus),
-#endif
-#ifndef MCA_bus__is_a_macro
- X(MCA_bus),
-#endif
-#ifndef wp_works_ok__is_a_macro
- X(wp_works_ok),
+ /* stackable module support */
+ X(register_symtab_from),
+#ifdef CONFIG_KERNELD
+ X(kerneld_send),
#endif
+ X(get_options),
#ifdef CONFIG_PCI
/* PCI BIOS support */
@@ -117,21 +110,22 @@ struct symbol_table symbol_table = {
#endif
/* process memory management */
- X(verify_area),
X(do_mmap),
X(do_munmap),
- X(zeromap_page_range),
- X(unmap_page_range),
- X(insert_vm_struct),
- X(merge_segments),
+ X(exit_mm),
/* internal kernel memory management */
X(__get_free_pages),
X(free_pages),
X(kmalloc),
- X(kfree_s),
+ X(kfree),
X(vmalloc),
X(vfree),
+ X(mem_map),
+ X(remap_page_range),
+ X(max_mapnr),
+ X(high_memory),
+ X(update_vm_cache),
/* filesystem internal functions */
X(getname),
@@ -141,23 +135,32 @@ struct symbol_table symbol_table = {
X(namei),
X(lnamei),
X(open_namei),
+ X(sys_close),
X(close_fp),
X(check_disk_change),
X(invalidate_buffers),
+ X(invalidate_inodes),
+ X(invalidate_inode_pages),
X(fsync_dev),
X(permission),
X(inode_setattr),
X(inode_change_ok),
- X(generic_mmap),
X(set_blocksize),
X(getblk),
X(bread),
X(breada),
- X(brelse),
+ X(__brelse),
+ X(__bforget),
X(ll_rw_block),
X(__wait_on_buffer),
+ X(mark_buffer_uptodate),
+ X(unlock_buffer),
X(dcache_lookup),
X(dcache_add),
+ X(add_blkdev_randomness),
+ X(generic_file_read),
+ X(generic_file_mmap),
+ X(generic_readpage),
/* device registration */
X(register_chrdev),
@@ -168,6 +171,14 @@ struct symbol_table symbol_table = {
X(tty_unregister_driver),
X(tty_std_termios),
+#if defined(CONFIG_BLK_DEV_IDECD) || \
+ defined(CONFIG_BLK_DEV_SR) || \
+ defined(CONFIG_CM206)
+ X(register_cdrom),
+ X(unregister_cdrom),
+ X(cdrom_fops),
+#endif
+
/* block device driver support */
X(block_read),
X(block_write),
@@ -182,16 +193,27 @@ struct symbol_table symbol_table = {
X(bmap),
X(sync_dev),
X(get_blkfops),
-
+ X(blkdev_open),
+ X(blkdev_release),
+ X(gendisk_head),
+ X(resetup_one_dev),
+ X(unplug_device),
+#ifdef __i386__
+ X(drive_info),
+#endif
+
+#ifdef CONFIG_SERIAL
/* Module creation of serial units */
X(register_serial),
X(unregister_serial),
-
+#endif
/* tty routines */
X(tty_hangup),
X(tty_wait_until_sent),
X(tty_check_change),
X(tty_hung_up_p),
+ X(do_SAK),
+ X(console_print),
/* filesystem registration */
X(register_filesystem),
@@ -200,27 +222,47 @@ struct symbol_table symbol_table = {
/* executable format registration */
X(register_binfmt),
X(unregister_binfmt),
+ X(search_binary_handler),
+ X(prepare_binprm),
+ X(remove_arg_zero),
/* execution environment registration */
X(lookup_exec_domain),
X(register_exec_domain),
X(unregister_exec_domain),
+ /* sysctl table registration */
+ X(register_sysctl_table),
+ X(unregister_sysctl_table),
+ X(sysctl_string),
+ X(sysctl_intvec),
+ X(proc_dostring),
+ X(proc_dointvec),
+ X(proc_dointvec_minmax),
+
/* interrupt handling */
X(request_irq),
X(free_irq),
X(enable_irq),
X(disable_irq),
+ X(probe_irq_on),
+ X(probe_irq_off),
X(bh_active),
X(bh_mask),
+ X(bh_mask_count),
+ X(bh_base),
X(add_timer),
X(del_timer),
X(tq_timer),
X(tq_immediate),
X(tq_scheduler),
- X(tq_last),
X(timer_active),
X(timer_table),
+ X(intr_count),
+
+ /* autoirq from drivers/net/auto_irq.c */
+ X(autoirq_setup),
+ X(autoirq_report),
/* dma handling */
X(request_dma),
@@ -241,11 +283,13 @@ struct symbol_table symbol_table = {
X(sleep_on),
X(interruptible_sleep_on),
X(schedule),
- X(current),
+ X(current_set),
X(jiffies),
X(xtime),
+ X(do_gettimeofday),
X(loops_per_sec),
X(need_resched),
+ X(kstat),
X(kill_proc),
X(kill_pg),
X(kill_sl),
@@ -255,18 +299,20 @@ struct symbol_table symbol_table = {
X(printk),
X(sprintf),
X(vsprintf),
+ X(kdevname),
X(simple_strtoul),
X(system_utsname),
X(sys_call_table),
+ X(hard_reset_now),
+ X(_ctype),
+ X(secure_tcp_sequence_number),
/* Signal interfaces */
- X(do_signal),
X(send_sig),
/* Program loader interfaces */
X(setup_arg_pages),
X(copy_strings),
- X(create_tables),
X(do_execve),
X(flush_old_exec),
X(open_inode),
@@ -274,77 +320,7 @@ struct symbol_table symbol_table = {
/* Miscellaneous access points */
X(si_meminfo),
-#ifdef CONFIG_NET
- /* socket layer registration */
- X(sock_register),
- X(sock_unregister),
- /* Internet layer registration */
-#ifdef CONFIG_INET
- X(inet_add_protocol),
- X(inet_del_protocol),
-#if defined(CONFIG_PPP) || defined(CONFIG_SLIP)
- /* VJ header compression */
- X(slhc_init),
- X(slhc_free),
- X(slhc_remember),
- X(slhc_compress),
- X(slhc_uncompress),
-#endif
-#endif
- /* Device callback registration */
- X(register_netdevice_notifier),
- X(unregister_netdevice_notifier),
-#endif
-#ifdef CONFIG_FTAPE
- /* The next labels are needed for ftape driver. */
- X(ftape_big_buffer),
-#endif
- X(floppy_track_buffer),
-#ifdef CONFIG_INET
- /* support for loadable net drivers */
- X(register_netdev),
- X(unregister_netdev),
- X(ether_setup),
- X(alloc_skb),
- X(kfree_skb),
- X(dev_kfree_skb),
- X(netif_rx),
- X(dev_rint),
- X(dev_tint),
- X(irq2dev_map),
- X(dev_add_pack),
- X(dev_remove_pack),
- X(dev_get),
- X(dev_ioctl),
- X(dev_queue_xmit),
- X(dev_base),
- X(dev_close),
- X(arp_find),
- X(n_tty_ioctl),
- X(tty_register_ldisc),
- X(kill_fasync),
-#endif
-#ifdef CONFIG_SCSI
- /* Supports loadable scsi drivers */
- /*
- * in_scan_scsis is a hack, and should go away once the new
- * memory allocation code is in the NCR driver
- */
- X(in_scan_scsis),
- X(scsi_register_module),
- X(scsi_unregister_module),
- X(scsi_free),
- X(scsi_malloc),
- X(scsi_register),
- X(scsi_unregister),
- X(scsicam_bios_param),
- X(scsi_init_malloc),
- X(scsi_init_free),
- X(print_command),
- X(print_msg),
- X(print_status),
-#endif
/* Added to make file system as module */
X(set_writetime),
X(sys_tz),
@@ -352,9 +328,12 @@ struct symbol_table symbol_table = {
X(file_fsync),
X(clear_inode),
X(refile_buffer),
+ X(nr_async_pages),
X(___strtok),
X(init_fifo),
X(super_blocks),
+ X(reuse_list),
+ X(fifo_inode_operations),
X(chrdev_inode_operations),
X(blkdev_inode_operations),
X(read_ahead),
@@ -363,29 +342,23 @@ struct symbol_table symbol_table = {
X(insert_inode_hash),
X(event),
X(__down),
-#if defined(CONFIG_MSDOS_FS) && !defined(CONFIG_UMSDOS_FS)
- /* support for umsdos fs */
- X(msdos_bmap),
- X(msdos_create),
- X(msdos_file_read),
- X(msdos_file_write),
- X(msdos_lookup),
- X(msdos_mkdir),
- X(msdos_mmap),
- X(msdos_put_inode),
- X(msdos_put_super),
- X(msdos_read_inode),
- X(msdos_read_super),
- X(msdos_readdir),
- X(msdos_rename),
- X(msdos_rmdir),
- X(msdos_smap),
- X(msdos_statfs),
- X(msdos_truncate),
- X(msdos_unlink),
- X(msdos_unlink_umsdos),
- X(msdos_write_inode),
+ X(__up),
+ X(securelevel),
+/* all busmice */
+ X(add_mouse_randomness),
+ X(fasync_helper),
+/* psaux mouse */
+ X(aux_device_present),
+ X(kbd_read_mask),
+
+#ifdef CONFIG_BLK_DEV_MD
+ X(disk_name), /* for md.c */
#endif
+
+ /* binfmt_aout */
+ X(get_write_access),
+ X(put_write_access),
+
/********************************************************
* Do not add anything below this line,
* as the stacked modules depend on this!
diff --git a/kernel/ksyms.ver b/kernel/ksyms.ver
index 76537cd4c..69c4c3f9e 100644
--- a/kernel/ksyms.ver
+++ b/kernel/ksyms.ver
@@ -192,3 +192,197 @@
#define __down _set_ver(__down, 75aa9e96)
#endif /* _KSYMS_VER_ */
#endif /* CONFIG_MODVERSIONS !__GENKSYMS__ */
+/**** This file is generated by genksyms DO NOT EDIT! ****/
+#if defined(CONFIG_MODVERSIONS) && !defined(__GENKSYMS__)
+#ifndef _KSYMS_VER_
+#define _KSYMS_VER_
+#define rename_module_symbol _set_ver(rename_module_symbol, b81c73c1)
+#define register_symtab _set_ver(register_symtab, e910ea66)
+#define EISA_bus _set_ver(EISA_bus, 7e37737c)
+#define wp_works_ok _set_ver(wp_works_ok, f37f99e9)
+#define verify_area _set_ver(verify_area, 4cfda560)
+#define do_mmap _set_ver(do_mmap, 677e7ee1)
+#define do_munmap _set_ver(do_munmap, 6221f117)
+#define zeromap_page_range _set_ver(zeromap_page_range, 7c395a26)
+#define unmap_page_range _set_ver(unmap_page_range, 0110085f)
+#define insert_vm_struct _set_ver(insert_vm_struct, 1f4e4882)
+#define merge_segments _set_ver(merge_segments, 6854be5a)
+#define __get_free_pages _set_ver(__get_free_pages, 5243d78b)
+#define free_pages _set_ver(free_pages, 96448859)
+#define kmalloc _set_ver(kmalloc, d31fb2cb)
+#define kfree_s _set_ver(kfree_s, 1e72eb79)
+#define vmalloc _set_ver(vmalloc, 667f3e25)
+#define vfree _set_ver(vfree, 6df52add)
+#define getname _set_ver(getname, 81487159)
+#define putname _set_ver(putname, b19e8126)
+#define __iget _set_ver(__iget, ee2b6320)
+#define iput _set_ver(iput, 59241ced)
+#define namei _set_ver(namei, 00478bcd)
+#define lnamei _set_ver(lnamei, fcfddbb1)
+#define open_namei _set_ver(open_namei, 414b2b0f)
+#define close_fp _set_ver(close_fp, 1d4c15d8)
+#define check_disk_change _set_ver(check_disk_change, b66ed457)
+#define invalidate_buffers _set_ver(invalidate_buffers, c65255f1)
+#define fsync_dev _set_ver(fsync_dev, a221190d)
+#define permission _set_ver(permission, 0ebf7474)
+#define inode_setattr _set_ver(inode_setattr, 0c80a3c1)
+#define inode_change_ok _set_ver(inode_change_ok, 5d1cb326)
+#define generic_mmap _set_ver(generic_mmap, d4ff59f3)
+#define set_blocksize _set_ver(set_blocksize, f45fda38)
+#define getblk _set_ver(getblk, d40228ac)
+#define bread _set_ver(bread, c73bf0f0)
+#define breada _set_ver(breada, eb8e858c)
+#define brelse _set_ver(brelse, 4c27ac3d)
+#define ll_rw_block _set_ver(ll_rw_block, f3aa4dd3)
+#define __wait_on_buffer _set_ver(__wait_on_buffer, e8fcc968)
+#define dcache_lookup _set_ver(dcache_lookup, 83336566)
+#define dcache_add _set_ver(dcache_add, fe71f11e)
+#define register_chrdev _set_ver(register_chrdev, da99513f)
+#define unregister_chrdev _set_ver(unregister_chrdev, 61ea5ee8)
+#define register_blkdev _set_ver(register_blkdev, 4699a621)
+#define unregister_blkdev _set_ver(unregister_blkdev, d39bbca9)
+#define tty_register_driver _set_ver(tty_register_driver, fcc8591c)
+#define tty_unregister_driver _set_ver(tty_unregister_driver, c78132a8)
+#define tty_std_termios _set_ver(tty_std_termios, cf350678)
+#define block_read _set_ver(block_read, a7fe4f51)
+#define block_write _set_ver(block_write, 902674c9)
+#define block_fsync _set_ver(block_fsync, 182888d8)
+#define wait_for_request _set_ver(wait_for_request, 9ca2932e)
+#define blksize_size _set_ver(blksize_size, dea1eb55)
+#define hardsect_size _set_ver(hardsect_size, ed1ee14f)
+#define blk_size _set_ver(blk_size, f60b5398)
+#define blk_dev _set_ver(blk_dev, dbf5fdd4)
+#define is_read_only _set_ver(is_read_only, b0c5f83e)
+#define set_device_ro _set_ver(set_device_ro, 8fb69e13)
+#define bmap _set_ver(bmap, 73bb8bdd)
+#define sync_dev _set_ver(sync_dev, 9bca536d)
+#define get_blkfops _set_ver(get_blkfops, 83827791)
+#define register_serial _set_ver(register_serial, 3425f38c)
+#define unregister_serial _set_ver(unregister_serial, c013d717)
+#define tty_hangup _set_ver(tty_hangup, e3487df0)
+#define tty_wait_until_sent _set_ver(tty_wait_until_sent, da85d428)
+#define tty_check_change _set_ver(tty_check_change, 705eaab0)
+#define tty_hung_up_p _set_ver(tty_hung_up_p, f99ac1e4)
+#define register_filesystem _set_ver(register_filesystem, 1c7110ef)
+#define unregister_filesystem _set_ver(unregister_filesystem, 5e353af7)
+#define register_binfmt _set_ver(register_binfmt, 66ece706)
+#define unregister_binfmt _set_ver(unregister_binfmt, 41822618)
+#define lookup_exec_domain _set_ver(lookup_exec_domain, 32f10d48)
+#define register_exec_domain _set_ver(register_exec_domain, eda4711f)
+#define unregister_exec_domain _set_ver(unregister_exec_domain, 78ea447c)
+#define request_irq _set_ver(request_irq, 9e81629c)
+#define free_irq _set_ver(free_irq, f487dc0c)
+#define enable_irq _set_ver(enable_irq, 54e09f5f)
+#define disable_irq _set_ver(disable_irq, b4449c1f)
+#define bh_active _set_ver(bh_active, 98fb5ca1)
+#define bh_mask _set_ver(bh_mask, 1abf3d3f)
+#define add_timer _set_ver(add_timer, f13cb728)
+#define del_timer _set_ver(del_timer, c7aff713)
+#define tq_timer _set_ver(tq_timer, 46cf583e)
+#define tq_immediate _set_ver(tq_immediate, 46cf583e)
+#define tq_scheduler _set_ver(tq_scheduler, 46cf583e)
+#define tq_last _set_ver(tq_last, 457cf547)
+#define timer_active _set_ver(timer_active, 5a6747ee)
+#define timer_table _set_ver(timer_table, 9e03b650)
+#define request_dma _set_ver(request_dma, 2a687646)
+#define free_dma _set_ver(free_dma, 5d4b914c)
+#define disable_hlt _set_ver(disable_hlt, 794487ee)
+#define enable_hlt _set_ver(enable_hlt, 9c7077bd)
+#define check_region _set_ver(check_region, b91154fb)
+#define request_region _set_ver(request_region, 138b0a1e)
+#define release_region _set_ver(release_region, f41d6d31)
+#define wake_up _set_ver(wake_up, e8d71419)
+#define wake_up_interruptible _set_ver(wake_up_interruptible, 64c8cb92)
+#define sleep_on _set_ver(sleep_on, 67a00cee)
+#define interruptible_sleep_on _set_ver(interruptible_sleep_on, 6a5fc80d)
+#define schedule _set_ver(schedule, 01000e51)
+#define current _set_ver(current, fc1cb29b)
+#define jiffies _set_ver(jiffies, 2f7c7437)
+#define xtime _set_ver(xtime, e70c0be0)
+#define loops_per_sec _set_ver(loops_per_sec, 40a14192)
+#define need_resched _set_ver(need_resched, dfc016ea)
+#define kill_proc _set_ver(kill_proc, 911f760a)
+#define kill_pg _set_ver(kill_pg, 0a758a45)
+#define kill_sl _set_ver(kill_sl, 49625e94)
+#define panic _set_ver(panic, 400c0de3)
+#define printk _set_ver(printk, ad1148ba)
+#define sprintf _set_ver(sprintf, f9003107)
+#define vsprintf _set_ver(vsprintf, e605cb6b)
+#define simple_strtoul _set_ver(simple_strtoul, bdb8c1e3)
+#define system_utsname _set_ver(system_utsname, 066845bc)
+#define sys_call_table _set_ver(sys_call_table, 79fa4011)
+#define do_signal _set_ver(do_signal, 86f9bc59)
+#define send_sig _set_ver(send_sig, 5cddd8d9)
+#define setup_arg_pages _set_ver(setup_arg_pages, fe68d94a)
+#define copy_strings _set_ver(copy_strings, 232aee96)
+#define create_tables _set_ver(create_tables, ba788fa2)
+#define do_execve _set_ver(do_execve, 8c99dc0a)
+#define flush_old_exec _set_ver(flush_old_exec, c737e178)
+#define open_inode _set_ver(open_inode, 27302cb6)
+#define read_exec _set_ver(read_exec, a80a2dd0)
+#define si_meminfo _set_ver(si_meminfo, bb05fc9a)
+#define sock_register _set_ver(sock_register, d68e1649)
+#define sock_unregister _set_ver(sock_unregister, 72c332bd)
+#define inet_add_protocol _set_ver(inet_add_protocol, 55292121)
+#define inet_del_protocol _set_ver(inet_del_protocol, 73908a1b)
+#define slhc_init _set_ver(slhc_init, e490a4b8)
+#define slhc_free _set_ver(slhc_free, 39ab902b)
+#define slhc_remember _set_ver(slhc_remember, db333be6)
+#define slhc_compress _set_ver(slhc_compress, e753e2d2)
+#define slhc_uncompress _set_ver(slhc_uncompress, 81cc1144)
+#define register_netdevice_notifier _set_ver(register_netdevice_notifier, e7aace7c)
+#define unregister_netdevice_notifier _set_ver(unregister_netdevice_notifier, be114416)
+#define floppy_track_buffer _set_ver(floppy_track_buffer, c6e3f7c2)
+#define register_netdev _set_ver(register_netdev, 0d8d1bb4)
+#define unregister_netdev _set_ver(unregister_netdev, 25a99579)
+#define ether_setup _set_ver(ether_setup, 4eafef91)
+#define alloc_skb _set_ver(alloc_skb, b6b523ba)
+#define kfree_skb _set_ver(kfree_skb, 0b938572)
+#define dev_kfree_skb _set_ver(dev_kfree_skb, aa1fe7f4)
+#define netif_rx _set_ver(netif_rx, d8051cb2)
+#define dev_rint _set_ver(dev_rint, 040d3f4b)
+#define dev_tint _set_ver(dev_tint, 860b350b)
+#define irq2dev_map _set_ver(irq2dev_map, 10bdcd8a)
+#define dev_add_pack _set_ver(dev_add_pack, 6d7d9be4)
+#define dev_remove_pack _set_ver(dev_remove_pack, 784fa59f)
+#define dev_get _set_ver(dev_get, 72ed90fd)
+#define dev_ioctl _set_ver(dev_ioctl, 08760203)
+#define dev_queue_xmit _set_ver(dev_queue_xmit, 4a478225)
+#define dev_base _set_ver(dev_base, 0a8809f0)
+#define dev_close _set_ver(dev_close, 9bdad56d)
+#define arp_find _set_ver(arp_find, a141bd11)
+#define n_tty_ioctl _set_ver(n_tty_ioctl, 538e5fa6)
+#define tty_register_ldisc _set_ver(tty_register_ldisc, 8fdde939)
+#define kill_fasync _set_ver(kill_fasync, 890501b6)
+#define in_scan_scsis _set_ver(in_scan_scsis, 21874a88)
+#define scsi_register_module _set_ver(scsi_register_module, 8eff1010)
+#define scsi_unregister_module _set_ver(scsi_unregister_module, d913b8f0)
+#define scsi_free _set_ver(scsi_free, 475dddfa)
+#define scsi_malloc _set_ver(scsi_malloc, 1cce3f92)
+#define scsi_register _set_ver(scsi_register, d6e77069)
+#define scsi_unregister _set_ver(scsi_unregister, 3b0b616b)
+#define scsicam_bios_param _set_ver(scsicam_bios_param, 3d965248)
+#define scsi_init_malloc _set_ver(scsi_init_malloc, e5167cbc)
+#define scsi_init_free _set_ver(scsi_init_free, 8b2721f8)
+#define print_command _set_ver(print_command, 6f14cd75)
+#define print_msg _set_ver(print_msg, 0465f877)
+#define print_status _set_ver(print_status, 32f84646)
+#define set_writetime _set_ver(set_writetime, 52131916)
+#define sys_tz _set_ver(sys_tz, aa3c9782)
+#define __wait_on_super _set_ver(__wait_on_super, 61a5c00a)
+#define file_fsync _set_ver(file_fsync, d30a190f)
+#define clear_inode _set_ver(clear_inode, da2b0e9f)
+#define refile_buffer _set_ver(refile_buffer, 8c69e123)
+#define ___strtok _set_ver(___strtok, 8b55d69c)
+#define init_fifo _set_ver(init_fifo, 082629c7)
+#define super_blocks _set_ver(super_blocks, e1f1ee99)
+#define chrdev_inode_operations _set_ver(chrdev_inode_operations, 6ba1faa3)
+#define blkdev_inode_operations _set_ver(blkdev_inode_operations, ed443696)
+#define read_ahead _set_ver(read_ahead, bbcd3768)
+#define get_hash_table _set_ver(get_hash_table, 3b5f3c55)
+#define get_empty_inode _set_ver(get_empty_inode, 554bdc75)
+#define insert_inode_hash _set_ver(insert_inode_hash, 59b8c371)
+#define event _set_ver(event, a6aac9c1)
+#define __down _set_ver(__down, 75aa9e96)
+#endif /* _KSYMS_VER_ */
+#endif /* CONFIG_MODVERSIONS !__GENKSYMS__ */
diff --git a/kernel/module.c b/kernel/module.c
index e29a48ba5..09cee93b7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1,11 +1,14 @@
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <asm/segment.h>
#include <linux/mm.h> /* defines GFP_KERNEL */
#include <linux/string.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/config.h>
+
+#include <asm/uaccess.h>
/*
* Originally by Anonymous (as far as I know...)
* Linux version by Bas Laarhoven <bas@vimec.nl>
@@ -34,30 +37,31 @@
* and finally: reducing the number of entries in ksyms.c
* since every subsystem should now be able to decide and
* control exactly what symbols it wants to export, locally!
+ *
+ * On 1-Aug-95: <Matti.Aarnio@utu.fi> altered code to use same style as
+ * do /proc/net/XXX "files". Namely allow more than 4kB
+ * (or what the block size is) output.
+ *
+ * - Use dummy syscall functions for users who disable all
+ * module support. Similar to kernel/sys.c (Paul Gortmaker)
*/
-#ifdef DEBUG_MODULE
-#define PRINTK(a) printk a
-#else
-#define PRINTK(a) /* */
-#endif
+#ifdef CONFIG_MODULES /* a *big* #ifdef block... */
static struct module kernel_module;
-static struct module *module_list = &kernel_module;
+struct module *module_list = &kernel_module;
static int freeing_modules; /* true if some modules are marked for deletion */
static struct module *find_module( const char *name);
-static int get_mod_name( char *user_name, char *buf);
static int free_modules( void);
-static int module_init_flag = 0; /* Hmm... */
+extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
/*
* Called at boot time
*/
void init_modules(void) {
- extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
struct internal_symbol *sym;
int i;
@@ -70,62 +74,21 @@ void init_modules(void) {
kernel_module.name = "";
}
-int
-rename_module_symbol(char *old_name, char *new_name)
-{
- struct internal_symbol *sym;
- int i = 0; /* keep gcc silent */
-
- if (module_list->symtab) {
- sym = module_list->symtab->symbol;
- for (i = module_list->symtab->n_symbols; i > 0; ++sym, --i) {
- if (strcmp(sym->name, old_name) == 0) { /* found it! */
- sym->name = new_name; /* done! */
- PRINTK(("renamed %s to %s\n", old_name, new_name));
- return 1; /* it worked! */
- }
- }
- }
- printk("rename %s to %s failed!\n", old_name, new_name);
- return 0; /* not there... */
- /*
- * This one will change the name of the first matching symbol!
- *
- * With this function, you can replace the name of a symbol defined
- * in the current module with a new name, e.g. when you want to insert
- * your own function instead of a previously defined function
- * with the same name.
- *
- * "Normal" usage:
- *
- * bogus_function(int params)
- * {
- * do something "smart";
- * return real_function(params);
- * }
- *
- * ...
- *
- * init_module()
- * {
- * if (rename_module_symbol("_bogus_function", "_real_function"))
- * printk("yep!\n");
- * else
- * printk("no way!\n");
- * ...
- * }
- *
- * When loading this module, real_function will be resolved
- * to the real function address.
- * All later loaded modules that refer to "real_function()" will
- * then really call "bogus_function()" instead!!!
- *
- * This feature will give you ample opportunities to get to know
- * the taste of your foot when you stuff it into your mouth!!!
- */
+/*
+ * Copy the name of a module from user space.
+ */
+inline int
+get_mod_name(char *user_name, char *buf)
+{
+ /* Should return -EBIG instead of -EFAULT when the name
+ is too long, but that we couldn't detect real faults then.
+ Maybe strncpy_from_user() should return -EBIG, when
+ the source string is too long. */
+ return strncpy_from_user(buf, user_name, MOD_MAX_NAME);
}
+
/*
* Allocate space for a module.
*/
@@ -143,7 +106,7 @@ sys_create_module(char *module_name, unsigned long size)
return -EPERM;
if (module_name == NULL || size == 0)
return -EINVAL;
- if ((error = get_mod_name(module_name, name)) != 0)
+ if ((error = get_mod_name(module_name, name)) < 0)
return error;
if (find_module(name) != NULL) {
return -EEXIST;
@@ -154,8 +117,8 @@ sys_create_module(char *module_name, unsigned long size)
}
strcpy((char *)(mp + 1), name); /* why not? */
- npages = (size + sizeof (int) + 4095) / 4096;
- if ((addr = vmalloc(npages * 4096)) == 0) {
+ npages = (size + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE;
+ if ((addr = vmalloc(npages * PAGE_SIZE)) == 0) {
kfree_s(mp, sspace);
return -ENOMEM;
}
@@ -168,15 +131,18 @@ sys_create_module(char *module_name, unsigned long size)
mp->addr = addr;
mp->state = MOD_UNINITIALIZED;
mp->cleanup = NULL;
+ mp->exceptinfo.start = NULL;
+ mp->exceptinfo.stop = NULL;
- * (int *) addr = 0; /* set use count to zero */
+ * (long *) addr = 0; /* set use count to zero */
module_list = mp; /* link it in */
- PRINTK(("module `%s' (%lu pages @ 0x%08lx) created\n",
- mp->name, (unsigned long) mp->size, (unsigned long) mp->addr));
+ pr_debug("module `%s' (%lu pages @ 0x%08lx) created\n",
+ mp->name, (unsigned long) mp->size, (unsigned long) mp->addr);
return (unsigned long) addr;
}
+
/*
* Initialize a module.
*/
@@ -194,34 +160,44 @@ sys_init_module(char *module_name, char *code, unsigned codesize,
if (!suser())
return -EPERM;
+#ifdef __i386__
/* A little bit of protection... we "know" where the user stack is... */
+
if (symtab && ((unsigned long)symtab > 0xb0000000)) {
- printk("warning: you are using an old insmod, no symbols will be inserted!\n");
+ printk(KERN_WARNING "warning: you are using an old insmod, no symbols will be inserted!\n");
symtab = NULL;
}
-
- /*
- * First reclaim any memory from dead modules that where not
- * freed when deleted. Should I think be done by timers when
- * the module was deleted - Jon.
- */
- free_modules();
-
- if ((error = get_mod_name(module_name, name)) != 0)
+#endif
+ if ((error = get_mod_name(module_name, name)) < 0)
return error;
- PRINTK(("initializing module `%s', %d (0x%x) bytes\n",
- name, codesize, codesize));
- memcpy_fromfs(&rt, routines, sizeof rt);
+ pr_debug("initializing module `%s', %d (0x%x) bytes\n",
+ name, codesize, codesize);
+ if (copy_from_user(&rt, routines, sizeof rt))
+ return -EFAULT;
if ((mp = find_module(name)) == NULL)
return -ENOENT;
- if ((codesize + sizeof (int) + 4095) / 4096 > mp->size)
+ if (codesize & MOD_AUTOCLEAN) {
+ /*
+ * set autoclean marker from codesize...
+ * set usage count to "zero"
+ */
+ codesize &= ~MOD_AUTOCLEAN;
+ GET_USE_COUNT(mp) = MOD_AUTOCLEAN;
+ }
+ if ((codesize + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE > mp->size)
+ return -EINVAL;
+ if (copy_from_user((char *)mp->addr + sizeof (long), code, codesize))
+ return -EFAULT;
+ memset((char *)mp->addr + sizeof (long) + codesize, 0,
+ mp->size * PAGE_SIZE - (codesize + sizeof (long)));
+ pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
+ (unsigned long) rt.init, (unsigned long) rt.cleanup);
+ if (rt.signature != MODULE_2_1_7_SIG){
+ printk ("Older insmod used with kernel 2.1.7 +\n");
return -EINVAL;
- memcpy_fromfs((char *)mp->addr + sizeof (int), code, codesize);
- memset((char *)mp->addr + sizeof (int) + codesize, 0,
- mp->size * 4096 - (codesize + sizeof (int)));
- PRINTK(( "module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
- (unsigned long) rt.init, (unsigned long) rt.cleanup));
+ }
mp->cleanup = rt.cleanup;
+ mp->exceptinfo = rt.exceptinfo;
/* update kernel symbol table */
if (symtab) { /* symtab == NULL means no new entries to handle */
@@ -231,37 +207,34 @@ sys_init_module(char *module_name, char *code, unsigned codesize,
int i;
int legal_start;
- if ((error = verify_area(VERIFY_READ, symtab, sizeof(int))))
- return error;
- memcpy_fromfs((char *)(&(size)), symtab, sizeof(int));
-
+ error = get_user(size, &symtab->size);
+ if (error)
+ return error;
if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) {
return -ENOMEM;
}
- if ((error = verify_area(VERIFY_READ, symtab, size))) {
- kfree_s(newtab, size);
- return error;
+ if (copy_from_user((char *)(newtab), symtab, size)) {
+ kfree_s(newtab, size);
+ return -EFAULT;
}
- memcpy_fromfs((char *)(newtab), symtab, size);
/* sanity check */
legal_start = sizeof(struct symbol_table) +
newtab->n_symbols * sizeof(struct internal_symbol) +
newtab->n_refs * sizeof(struct module_ref);
- if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) ||
- (legal_start > size)) {
- printk("Illegal symbol table! Rejected!\n");
+ if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || (legal_start > size)) {
+ printk(KERN_WARNING "Rejecting illegal symbol table (n_symbols=%d,n_refs=%d)\n",
+ newtab->n_symbols, newtab->n_refs);
kfree_s(newtab, size);
return -EINVAL;
}
/* relocate name pointers, index referred from start of table */
- for (sym = &(newtab->symbol[0]), i = 0;
- i < newtab->n_symbols; ++sym, ++i) {
+ for (sym = &(newtab->symbol[0]), i = 0; i < newtab->n_symbols; ++sym, ++i) {
if ((unsigned long)sym->name < legal_start || size <= (unsigned long)sym->name) {
- printk("Illegal symbol table! Rejected!\n");
+ printk(KERN_WARNING "Rejecting illegal symbol table\n");
kfree_s(newtab, size);
return -EINVAL;
}
@@ -287,7 +260,7 @@ sys_init_module(char *module_name, char *code, unsigned codesize,
link = link->next;
if (link == (struct module *)0) {
- printk("Non-module reference! Rejected!\n");
+ printk(KERN_WARNING "Non-module reference! Rejected!\n");
return -EINVAL;
}
@@ -297,12 +270,12 @@ sys_init_module(char *module_name, char *code, unsigned codesize,
}
}
- module_init_flag = 1; /* Hmm... */
+ GET_USE_COUNT(mp) += 1;
if ((*rt.init)() != 0) {
- module_init_flag = 0; /* Hmm... */
+ GET_USE_COUNT(mp) = 0;
return -EBUSY;
}
- module_init_flag = 0; /* Hmm... */
+ GET_USE_COUNT(mp) -= 1;
mp->state = MOD_RUNNING;
return 0;
@@ -319,17 +292,39 @@ sys_delete_module(char *module_name)
return -EPERM;
/* else */
if (module_name != NULL) {
- if ((error = get_mod_name(module_name, name)) != 0)
+ if ((error = get_mod_name(module_name, name)) < 0)
return error;
if ((mp = find_module(name)) == NULL)
return -ENOENT;
- if ((mp->ref != NULL) || (GET_USE_COUNT(mp) != 0))
+ if ((mp->ref != NULL) ||
+ ((GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED)) != 0))
return -EBUSY;
+ GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
if (mp->state == MOD_RUNNING)
(*mp->cleanup)();
mp->state = MOD_DELETED;
+ free_modules();
+ }
+ /* for automatic reaping */
+ else {
+ struct module *mp_next;
+ for (mp = module_list; mp != &kernel_module; mp = mp_next) {
+ mp_next = mp->next;
+ if ((mp->ref == NULL) && (mp->state == MOD_RUNNING) &&
+ ((GET_USE_COUNT(mp) & ~MOD_VISITED) == MOD_AUTOCLEAN)) {
+ if ((GET_USE_COUNT(mp) & MOD_VISITED)) {
+ /* Don't reap until one "cycle" after last _use_ */
+ GET_USE_COUNT(mp) &= ~MOD_VISITED;
+ }
+ else {
+ GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
+ (*mp->cleanup)();
+ mp->state = MOD_DELETED;
+ free_modules();
+ }
+ }
+ }
}
- free_modules();
return 0;
}
@@ -364,6 +359,7 @@ sys_get_kernel_syms(struct kernel_sym *table)
struct module *mp = module_list;
int i;
int nmodsyms = 0;
+ int err;
for (mp = module_list; mp; mp = mp->next) {
if (mp->symtab && mp->symtab->n_symbols) {
@@ -378,16 +374,15 @@ sys_get_kernel_syms(struct kernel_sym *table)
if (table != NULL) {
to = table;
- if ((i = verify_area(VERIFY_WRITE, to, nmodsyms * sizeof(*table))))
- return i;
-
/* copy all module symbols first (always LIFO order) */
for (mp = module_list; mp; mp = mp->next) {
if (mp->state == MOD_RUNNING) {
/* magic: write module info as a pseudo symbol */
isym.value = (unsigned long)mp;
sprintf(isym.name, "#%s", mp->name);
- memcpy_tofs(to, &isym, sizeof isym);
+ err = copy_to_user(to, &isym, sizeof isym);
+ if (err)
+ return -EFAULT;
++to;
if (mp->symtab != NULL) {
@@ -397,7 +392,9 @@ sys_get_kernel_syms(struct kernel_sym *table)
isym.value = (unsigned long)from->addr;
strncpy(isym.name, from->name, sizeof isym.name);
- memcpy_tofs(to, &isym, sizeof isym);
+ err = copy_to_user(to, &isym, sizeof isym);
+ if (err)
+ return -EFAULT;
}
}
}
@@ -407,24 +404,6 @@ sys_get_kernel_syms(struct kernel_sym *table)
return nmodsyms;
}
-
-/*
- * Copy the name of a module from user space.
- */
-int
-get_mod_name(char *user_name, char *buf)
-{
- int i;
-
- i = 0;
- for (i = 0 ; (buf[i] = get_fs_byte(user_name + i)) != '\0' ; ) {
- if (++i >= MOD_MAX_NAME)
- return -E2BIG;
- }
- return 0;
-}
-
-
/*
* Look for a module by name, ignoring modules marked for deletion.
*/
@@ -482,7 +461,7 @@ free_modules( void)
if (mp->state != MOD_DELETED) {
mpp = &mp->next;
} else {
- if (GET_USE_COUNT(mp) != 0) {
+ if ((GET_USE_COUNT(mp) != 0) || (mp->ref != NULL)) {
freeing_modules = 1;
mpp = &mp->next;
} else { /* delete it */
@@ -509,7 +488,7 @@ free_modules( void)
int get_module_list(char *buf)
{
char *p;
- char *q;
+ const char *q;
int i;
struct module *mp;
struct module_ref *ref;
@@ -521,6 +500,8 @@ int get_module_list(char *buf)
if (p - buf > 4096 - 100)
break; /* avoid overflowing buffer */
q = mp->name;
+ if (*q == '\0' && mp->size == 0 && mp->ref == NULL)
+ continue; /* don't list modules for kernel syms */
i = 20;
while (*q) {
*p++ = *q++;
@@ -546,8 +527,8 @@ int get_module_list(char *buf)
while (*q)
*p++ = *q++;
+ *p++ = '\t';
if ((ref = mp->ref) != NULL) {
- *p++ = '\t';
*p++ = '[';
for (; ref; ref = ref->next) {
q = ref->module->name;
@@ -558,6 +539,15 @@ int get_module_list(char *buf)
}
*p++ = ']';
}
+ if (mp->state == MOD_RUNNING) {
+ sprintf(size,"\t%ld%s",
+ GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED),
+ ((GET_USE_COUNT(mp) & MOD_AUTOCLEAN)?
+ " (autoclean)":""));
+ q = size;
+ while (*q)
+ *p++ = *q++;
+ }
*p++ = '\n';
}
return p - buf;
@@ -567,40 +557,51 @@ int get_module_list(char *buf)
/*
* Called by the /proc file system to return a current list of ksyms.
*/
-int get_ksyms_list(char *buf)
+int get_ksyms_list(char *buf, char **start, off_t offset, int length)
{
struct module *mp;
struct internal_symbol *sym;
int i;
char *p = buf;
+ int len = 0; /* code from net/ipv4/proc.c */
+ off_t pos = 0;
+ off_t begin = 0;
for (mp = module_list; mp; mp = mp->next) {
if ((mp->state == MOD_RUNNING) &&
- (mp->symtab != NULL) && (mp->symtab->n_symbols > 0)) {
+ (mp->symtab != NULL) &&
+ (mp->symtab->n_symbols > 0)) {
for (i = mp->symtab->n_symbols,
sym = mp->symtab->symbol;
i > 0; --i, ++sym) {
- if (p - buf > 4096 - 100) {
- strcat(p, "...\n");
- p += strlen(p);
- return p - buf; /* avoid overflowing buffer */
- }
-
+ p = buf + len;
if (mp->name[0]) {
- sprintf(p, "%08lx %s\t[%s]\n",
- (long)sym->addr, sym->name, mp->name);
+ len += sprintf(p, "%08lx %s\t[%s]\n",
+ (long)sym->addr,
+ sym->name, mp->name);
+ } else {
+ len += sprintf(p, "%08lx %s\n",
+ (long)sym->addr,
+ sym->name);
}
- else {
- sprintf(p, "%08lx %s\n",
- (long)sym->addr, sym->name);
+ pos = begin + len;
+ if (pos < offset) {
+ len = 0;
+ begin = pos;
}
- p += strlen(p);
+ pos = begin + len;
+ if (pos > offset+length)
+ goto leave_the_loop;
}
}
}
-
- return p - buf;
+ leave_the_loop:
+ *start = buf + (offset - begin);
+ len -= (offset - begin);
+ if (len > length)
+ len = length;
+ return len;
}
/*
@@ -613,7 +614,7 @@ int get_ksyms_list(char *buf)
* - For a loadable module, the function should only be called in the
* context of init_module
*
- * Those are the only restrictions! (apart from not being reenterable...)
+ * Those are the only restrictions! (apart from not being reentrant...)
*
* If you want to remove a symbol table for a loadable module,
* the call looks like: "register_symtab(0)".
@@ -628,7 +629,7 @@ int get_ksyms_list(char *buf)
static struct symbol_table nulltab;
int
-register_symtab(struct symbol_table *intab)
+register_symtab_from(struct symbol_table *intab, long *from)
{
struct module *mp;
struct module *link;
@@ -645,11 +646,16 @@ register_symtab(struct symbol_table *intab)
intab->n_symbols +=1;
}
-#if 1
- if (module_init_flag == 0) { /* Hmm... */
-#else
- if (module_list == &kernel_module) {
-#endif
+ for (mp = module_list; mp != &kernel_module; mp = mp->next) {
+ /*
+ * "from" points to "mod_use_count_" (== start of module)
+ * or is == 0 if called from a non-module
+ */
+ if ((unsigned long)(mp->addr) == (unsigned long)from)
+ break;
+ }
+
+ if (mp == &kernel_module) {
/* Aha! Called from an "internal" module */
if (!intab)
return 0; /* or -ESILLY_PROGRAMMER :-) */
@@ -657,7 +663,7 @@ register_symtab(struct symbol_table *intab)
/* create a pseudo module! */
if (!(mp = (struct module*) kmalloc(MODSIZ, GFP_KERNEL))) {
/* panic time! */
- printk("Out of memory for new symbol table!\n");
+ printk(KERN_ERR "Out of memory for new symbol table!\n");
return -ENOMEM;
}
/* else OK */
@@ -680,7 +686,6 @@ register_symtab(struct symbol_table *intab)
* call to init_module i.e. when loading the module!!
* Or else...
*/
- mp = module_list; /* true when doing init_module! */
/* Any table there before? */
if ((oldtab = mp->symtab) == (struct symbol_table*)0) {
@@ -690,12 +695,6 @@ register_symtab(struct symbol_table *intab)
}
/* else ****** we have to replace the module symbol table ******/
-#if 0
- if (oldtab->n_symbols > 0) {
- /* Oh dear, I have to drop the old ones... */
- printk("Warning, dropping old symbols\n");
- }
-#endif
if (oldtab->n_refs == 0) { /* no problems! */
mp->symtab = intab;
@@ -720,7 +719,7 @@ register_symtab(struct symbol_table *intab)
oldtab->n_refs * REFSIZ,
GFP_KERNEL))) {
/* panic time! */
- printk("Out of memory for new symbol table!\n");
+ printk(KERN_ERR "Out of memory for new symbol table!\n");
return -ENOMEM;
}
@@ -759,3 +758,35 @@ register_symtab(struct symbol_table *intab)
return 0;
}
+
+#else /* CONFIG_MODULES */
+
+/* Dummy syscalls for people who don't want modules */
+
+asmlinkage unsigned long sys_create_module(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_init_module(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_delete_module(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_get_kernel_syms(void)
+{
+ return -ENOSYS;
+}
+
+int register_symtab_from(struct symbol_table *intab, long *from)
+{
+ return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
diff --git a/kernel/panic.c b/kernel/panic.c
index 300fcbbf3..7e04fdc31 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -7,18 +7,33 @@
/*
* This function is used through-out the kernel (including mm and fs)
* to indicate a major problem.
+ * Support for machines without PC-style console hardware - <dfrick@dial.eunet.ch>, July 96
*/
#include <stdarg.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <asm/system.h>
asmlinkage void sys_sync(void); /* it's really int */
+extern void do_unblank_screen(void);
+extern int C_A_D;
+
+int panic_timeout = 0;
+
+void panic_setup(char *str, int *ints)
+{
+ if (ints[0] == 1)
+ panic_timeout = ints[1];
+}
NORET_TYPE void panic(const char * fmt, ...)
{
static char buf[1024];
va_list args;
+ int i;
va_start(args, fmt);
vsprintf(buf, fmt, args);
@@ -28,5 +43,22 @@ NORET_TYPE void panic(const char * fmt, ...)
printk(KERN_EMERG "In swapper task - not syncing\n");
else
sys_sync();
+
+#ifndef CONFIG_SERIAL_ONLY_CONSOLE
+ do_unblank_screen();
+#endif
+
+ if (panic_timeout > 0)
+ {
+ /*
+ * Delay timeout seconds before rebooting the machine.
+ * We can't use the "normal" timers since we just panicked..
+ */
+ printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+ for(i = 0; i < (panic_timeout*1000); i++)
+ udelay(1000);
+ hard_reset_now();
+ }
for(;;);
}
+
diff --git a/kernel/printk.c b/kernel/printk.c
index 8b518f6cb..ed39d4fab 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -13,22 +13,29 @@
#include <stdarg.h>
-#include <asm/segment.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
-#define LOG_BUF_LEN 4096
+#include <asm/uaccess.h>
+
+#define LOG_BUF_LEN 8192
static char buf[1024];
extern void console_print(const char *);
-#define DEFAULT_MESSAGE_LOGLEVEL 7 /* KERN_DEBUG */
-#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything more serious than KERN_DEBUG */
+/* printk's without a loglevel use this.. */
+#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
+
+/* We show everything that is MORE important than this.. */
+#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
+#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
unsigned long log_size = 0;
struct wait_queue * log_wait = NULL;
@@ -89,7 +96,7 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
log_size--;
log_start &= LOG_BUF_LEN-1;
sti();
- put_fs_byte(c,buf);
+ put_user(c,buf);
buf++;
i++;
cli();
@@ -115,7 +122,7 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
j = log_start + log_size - count;
for (i = 0; i < count; i++) {
c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
- put_fs_byte(c, buf++);
+ put_user(c, buf++);
}
if (do_clear)
logged_chars = 0;
@@ -124,7 +131,7 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
logged_chars = 0;
return 0;
case 6: /* Disable logging to console */
- console_loglevel = 1; /* only panic messages shown */
+ console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
return 0;
case 7: /* Enable logging to console */
console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
@@ -132,6 +139,8 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
case 8:
if (len < 1 || len > 8)
return -EINVAL;
+ if (len < MINIMUM_CONSOLE_LOGLEVEL)
+ len = MINIMUM_CONSOLE_LOGLEVEL;
console_loglevel = len;
return 0;
}
@@ -164,7 +173,7 @@ asmlinkage int printk(const char *fmt, ...)
) {
p -= 3;
p[0] = '<';
- p[1] = DEFAULT_MESSAGE_LOGLEVEL - 1 + '0';
+ p[1] = DEFAULT_MESSAGE_LOGLEVEL + '0';
p[2] = '>';
} else
msg += 3;
@@ -230,3 +239,16 @@ void register_console(void (*proc)(const char *))
j = 0;
}
}
+
+/*
+ * Write a message to a certain tty, not just the console. This is used for
+ * messages that need to be redirected to a specific tty.
+ * We don't put it into the syslog queue right now maybe in the future if
+ * really needed.
+ */
+void tty_write_message(struct tty_struct *tty, char *msg)
+{
+ if (tty && tty->driver.write)
+ tty->driver.write(tty, 0, msg, strlen(msg));
+ return;
+}
diff --git a/kernel/resource.c b/kernel/resource.c
index 5a7999d73..48184bfcf 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -92,15 +92,6 @@ void request_region(unsigned int from, unsigned int num, const char *name)
}
}
-/*
- * This is for compatibility with older drivers.
- * It can be removed when all drivers call the new function.
- */
-void snarf_region(unsigned int from, unsigned int num)
-{
- request_region(from,num,"No name given.");
-}
-
/*
* Call this when the device driver is unloaded
*/
diff --git a/kernel/sched.c b/kernel/sched.c
index 93003dfc1..8f88f88a3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2,16 +2,17 @@
* linux/kernel/sched.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1996-04-21 Modified by Ulrich Windl to make NTP work
*/
/*
* 'sched.c' is the main kernel file. It contains scheduling primitives
* (sleep_on, wakeup, schedule etc) as well as a number of simple system
- * call functions (type getpid(), which just extracts a field from
+ * call functions (type getpid()), which just extract a field from
* current-task
*/
-#include <linux/config.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/timer.h>
@@ -26,20 +27,23 @@
#include <linux/tqueue.h>
#include <linux/resource.h>
#include <linux/mm.h>
+#include <linux/smp.h>
#include <asm/system.h>
#include <asm/io.h>
-#include <asm/segment.h>
+#include <asm/uaccess.h>
#include <asm/pgtable.h>
-
-#define TIMER_IRQ 0
+#include <asm/mmu_context.h>
#include <linux/timex.h>
/*
* kernel variables
*/
-long tick = 1000000 / HZ; /* timer interrupt period */
+
+int securelevel = 0; /* system security level */
+
+long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
volatile struct timeval xtime; /* The current time */
int tickadj = 500/HZ; /* microsecs */
@@ -50,17 +54,19 @@ DECLARE_TASK_QUEUE(tq_scheduler);
/*
* phase-lock loop variables
*/
-int time_status = TIME_BAD; /* clock synchronization status */
-long time_offset = 0; /* time adjustment (us) */
-long time_constant = 0; /* pll time constant */
-long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
-long time_precision = 1; /* clock precision (us) */
-long time_maxerror = 0x70000000;/* maximum error */
-long time_esterror = 0x70000000;/* estimated error */
-long time_phase = 0; /* phase offset (scaled us) */
-long time_freq = 0; /* frequency offset (scaled ppm) */
-long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
-long time_reftime = 0; /* time at last adjustment (s) */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_ERROR; /* clock synchronization status */
+int time_status = STA_UNSYNC; /* clock status bits */
+long time_offset = 0; /* time adjustment (us) */
+long time_constant = 2; /* pll time constant */
+long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
+long time_precision = 1; /* clock precision (us) */
+long time_maxerror = MAXPHASE; /* maximum error (us) */
+long time_esterror = MAXPHASE; /* estimated error (us) */
+long time_phase = 0; /* phase offset (scaled us) */
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */
+long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
+long time_reftime = 0; /* time at last adjustment (s) */
long time_adjust = 0;
long time_adjust_step = 0;
@@ -69,132 +75,354 @@ int need_resched = 0;
unsigned long event = 0;
extern int _setitimer(int, struct itimerval *, struct itimerval *);
-unsigned long * prof_buffer = NULL;
+unsigned int * prof_buffer = NULL;
unsigned long prof_len = 0;
+unsigned long prof_shift = 0;
#define _S(nr) (1<<((nr)-1))
extern void mem_use(void);
-extern int timer_interrupt(void);
-
+#ifdef __mips__
+unsigned long init_kernel_stack[2048] = { STACK_MAGIC, };
+unsigned long init_user_stack[2048] = { STACK_MAGIC, };
+#else
unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
+#endif
static struct vm_area_struct init_mmap = INIT_MMAP;
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+
+struct mm_struct init_mm = INIT_MM;
struct task_struct init_task = INIT_TASK;
unsigned long volatile jiffies=0;
-struct task_struct *current = &init_task;
+struct task_struct *current_set[NR_CPUS];
struct task_struct *last_task_used_math = NULL;
struct task_struct * task[NR_TASKS] = {&init_task, };
struct kernel_stat kstat = { 0 };
-unsigned long itimer_ticks = 0;
-unsigned long itimer_next = ~0;
+static inline void add_to_runqueue(struct task_struct * p)
+{
+#ifdef __SMP__
+ int cpu=smp_processor_id();
+#endif
+#if 1 /* sanity tests */
+ if (p->next_run || p->prev_run) {
+ printk("task already on run-queue\n");
+ return;
+ }
+#endif
+ if (p->counter > current->counter + 3)
+ need_resched = 1;
+ nr_running++;
+ (p->prev_run = init_task.prev_run)->next_run = p;
+ p->next_run = &init_task;
+ init_task.prev_run = p;
+#ifdef __SMP__
+ /* this is safe only if called with cli()*/
+ while(set_bit(31,&smp_process_available))
+ {
+ while(test_bit(31,&smp_process_available))
+ {
+ if(clear_bit(cpu,&smp_invalidate_needed))
+ {
+ local_flush_tlb();
+ set_bit(cpu,&cpu_callin_map[0]);
+ }
+ }
+ }
+ smp_process_available++;
+ clear_bit(31,&smp_process_available);
+ if ((0!=p->pid) && smp_threads_ready)
+ {
+ int i;
+ for (i=0;i<smp_num_cpus;i++)
+ {
+ if (0==current_set[cpu_logical_map[i]]->pid)
+ {
+ smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
+ break;
+ }
+ }
+ }
+#endif
+}
+
+static inline void del_from_runqueue(struct task_struct * p)
+{
+ struct task_struct *next = p->next_run;
+ struct task_struct *prev = p->prev_run;
+
+#if 1 /* sanity tests */
+ if (!next || !prev) {
+ printk("task not on run-queue\n");
+ return;
+ }
+#endif
+ if (p == &init_task) {
+ static int nr = 0;
+ if (nr < 5) {
+ nr++;
+ printk("idle task may not sleep\n");
+ }
+ return;
+ }
+ nr_running--;
+ next->prev_run = prev;
+ prev->next_run = next;
+ p->next_run = NULL;
+ p->prev_run = NULL;
+}
+
+static inline void move_last_runqueue(struct task_struct * p)
+{
+ struct task_struct *next = p->next_run;
+ struct task_struct *prev = p->prev_run;
+
+ /* remove from list */
+ next->prev_run = prev;
+ prev->next_run = next;
+ /* add back to list */
+ p->next_run = &init_task;
+ prev = init_task.prev_run;
+ init_task.prev_run = p;
+ p->prev_run = prev;
+ prev->next_run = p;
+}
+
+/*
+ * Wake up a process. Put it on the run-queue if it's not
+ * already there. The "current" process is always on the
+ * run-queue (except when the actual re-schedule is in
+ * progress), and as such you're allowed to do the simpler
+ * "current->state = TASK_RUNNING" to mark yourself runnable
+ * without the overhead of this.
+ */
+inline void wake_up_process(struct task_struct * p)
+{
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ p->state = TASK_RUNNING;
+ if (!p->next_run)
+ add_to_runqueue(p);
+ restore_flags(flags);
+}
+
+static void process_timeout(unsigned long __data)
+{
+ struct task_struct * p = (struct task_struct *) __data;
+
+ p->timeout = 0;
+ wake_up_process(p);
+}
+
+/*
+ * This is the function that decides how desirable a process is..
+ * You can weigh different processes against each other depending
+ * on what CPU they've run on lately etc to try to handle cache
+ * and TLB miss penalties.
+ *
+ * Return values:
+ * -1000: never select this
+ * 0: out of time, recalculate counters (but it might still be
+ * selected)
+ * +ve: "goodness" value (the larger, the better)
+ * +1000: realtime process, select this.
+ */
+static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
+{
+ int weight;
+
+#ifdef __SMP__
+ /* We are not permitted to run a task someone else is running */
+ if (p->processor != NO_PROC_ID)
+ return -1000;
+#ifdef PAST_2_0
+ /* This process is locked to a processor group */
+ if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
+ return -1000;
+#endif
+#endif
+
+ /*
+ * Realtime process, select the first one on the
+ * runqueue (taking priorities within processes
+ * into account).
+ */
+ if (p->policy != SCHED_OTHER)
+ return 1000 + p->rt_priority;
+
+ /*
+ * Give the process a first-approximation goodness value
+ * according to the number of clock-ticks it has left.
+ *
+ * Don't do any other calculations if the time slice is
+ * over..
+ */
+ weight = p->counter;
+ if (weight) {
+
+#ifdef __SMP__
+ /* Give a largish advantage to the same processor... */
+ /* (this is equivalent to penalizing other processors) */
+ if (p->last_processor == this_cpu)
+ weight += PROC_CHANGE_PENALTY;
+#endif
+
+ /* .. and a slight advantage to the current process */
+ if (p == prev)
+ weight += 1;
+ }
+
+ return weight;
+}
/*
* 'schedule()' is the scheduler function. It's a very simple and nice
* scheduler: it's not perfect, but certainly works for most things.
- * The one thing you might take a look at is the signal-handler code here.
+ *
+ * The goto is "interesting".
*
* NOTE!! Task 0 is the 'idle' task, which gets called when no other
* tasks can run. It can not be killed, and it cannot sleep. The 'state'
* information in task[0] is never used.
- *
- * The "confuse_gcc" goto is used only to get better assembly code..
- * Dijkstra probably hates me.
*/
asmlinkage void schedule(void)
{
int c;
struct task_struct * p;
- struct task_struct * next;
- unsigned long ticks;
+ struct task_struct * prev, * next;
+ unsigned long timeout = 0;
+ int this_cpu=smp_processor_id();
/* check alarm, wake up any interruptible tasks that have got a signal */
- if (intr_count) {
- printk("Aiee: scheduling in interrupt\n");
+ if (intr_count)
+ goto scheduling_in_interrupt;
+
+ if (bh_active & bh_mask) {
+ intr_count = 1;
+ do_bottom_half();
intr_count = 0;
}
+
run_task_queue(&tq_scheduler);
- cli();
- ticks = itimer_ticks;
- itimer_ticks = 0;
- itimer_next = ~0;
- sti();
+
need_resched = 0;
- nr_running = 0;
- p = &init_task;
- for (;;) {
- if ((p = p->next_task) == &init_task)
- goto confuse_gcc1;
- if (ticks && p->it_real_value) {
- if (p->it_real_value <= ticks) {
- send_sig(SIGALRM, p, 1);
- if (!p->it_real_incr) {
- p->it_real_value = 0;
- goto end_itimer;
- }
- do {
- p->it_real_value += p->it_real_incr;
- } while (p->it_real_value <= ticks);
+ prev = current;
+ cli();
+ /* move an exhausted RR process to be last.. */
+ if (!prev->counter && prev->policy == SCHED_RR) {
+ prev->counter = prev->priority;
+ move_last_runqueue(prev);
+ }
+ switch (prev->state) {
+ case TASK_INTERRUPTIBLE:
+ if (prev->signal & ~prev->blocked)
+ goto makerunnable;
+ timeout = prev->timeout;
+ if (timeout && (timeout <= jiffies)) {
+ prev->timeout = 0;
+ timeout = 0;
+ makerunnable:
+ prev->state = TASK_RUNNING;
+ break;
}
- p->it_real_value -= ticks;
- if (p->it_real_value < itimer_next)
- itimer_next = p->it_real_value;
- }
-end_itimer:
- if (p->state != TASK_INTERRUPTIBLE)
- continue;
- if (p->signal & ~p->blocked) {
- p->state = TASK_RUNNING;
- continue;
- }
- if (p->timeout && p->timeout <= jiffies) {
- p->timeout = 0;
- p->state = TASK_RUNNING;
- }
+ default:
+ del_from_runqueue(prev);
+ case TASK_RUNNING:
}
-confuse_gcc1:
+ p = init_task.next_run;
+ sti();
+
+#ifdef __SMP__
+ /*
+ * This is safe as we do not permit re-entry of schedule()
+ */
+ prev->processor = NO_PROC_ID;
+#define idle_task (task[cpu_number_map[this_cpu]])
+#else
+#define idle_task (&init_task)
+#endif
+/*
+ * Note! there may appear new tasks on the run-queue during this, as
+ * interrupts are enabled. However, they will be put on front of the
+ * list, so our list starting at "p" is essentially fixed.
+ */
/* this is the scheduler proper: */
-#if 0
- /* give processes that go to sleep a bit higher priority.. */
- /* This depends on the values for TASK_XXX */
- /* This gives smoother scheduling for some things, but */
- /* can be very unfair under some circumstances, so.. */
- if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state &&
- current->counter < current->priority*2) {
- ++current->counter;
- }
-#endif
c = -1000;
- next = p = &init_task;
- for (;;) {
- if ((p = p->next_task) == &init_task)
- goto confuse_gcc2;
- if (p->state == TASK_RUNNING) {
- nr_running++;
- if (p->counter > c)
- c = p->counter, next = p;
- }
+ next = idle_task;
+ while (p != &init_task) {
+ int weight = goodness(p, prev, this_cpu);
+ if (weight > c)
+ c = weight, next = p;
+ p = p->next_run;
}
-confuse_gcc2:
+
+ /* if all runnable processes have "counter == 0", re-calculate counters */
if (!c) {
for_each_task(p)
p->counter = (p->counter >> 1) + p->priority;
}
- if (current == next)
- return;
- kstat.context_swtch++;
+#ifdef __SMP__
+ /*
+ * Allocate process to CPU
+ */
+
+ next->processor = this_cpu;
+ next->last_processor = this_cpu;
+#endif
+#ifdef __SMP_PROF__
+ /* mark processor running an idle thread */
+ if (0==next->pid)
+ set_bit(this_cpu,&smp_idle_map);
+ else
+ clear_bit(this_cpu,&smp_idle_map);
+#endif
+ if (prev != next) {
+ struct timer_list timer;
+
+ kstat.context_swtch++;
+ if (timeout) {
+ init_timer(&timer);
+ timer.expires = timeout;
+ timer.data = (unsigned long) prev;
+ timer.function = process_timeout;
+ add_timer(&timer);
+ }
+
+ get_mmu_context(next);
+ switch_to(prev,next);
+ if (timeout)
+ del_timer(&timer);
+ }
+ return;
- switch_to(next);
+scheduling_in_interrupt:
+ printk("Aiee: scheduling in interrupt %p\n",
+ return_address());
+/*
+ * System is probably fucked up anyway beyond a save landing; prevent
+ * messages on the screen from scrolling away.
+ */
+while(1);
}
+#ifndef __alpha__
+
+/*
+ * For backwards compatibility? This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
asmlinkage int sys_pause(void)
{
current->state = TASK_INTERRUPTIBLE;
@@ -202,6 +430,8 @@ asmlinkage int sys_pause(void)
return -ERESTARTNOHAND;
}
+#endif
+
/*
* wake_up doesn't wake up stopped processes - they have to be awakened
* with signals or similar.
@@ -212,70 +442,139 @@ asmlinkage int sys_pause(void)
*/
void wake_up(struct wait_queue **q)
{
- struct wait_queue *tmp;
- struct task_struct * p;
+ struct wait_queue *next;
+ struct wait_queue *head;
- if (!q || !(tmp = *q))
+ if (!q || !(next = *q))
return;
- do {
- if ((p = tmp->task) != NULL) {
+ head = WAIT_QUEUE_HEAD(q);
+ while (next != head) {
+ struct task_struct *p = next->task;
+ next = next->next;
+ if (p != NULL) {
if ((p->state == TASK_UNINTERRUPTIBLE) ||
- (p->state == TASK_INTERRUPTIBLE)) {
- p->state = TASK_RUNNING;
- if (p->counter > current->counter + 3)
- need_resched = 1;
- }
+ (p->state == TASK_INTERRUPTIBLE))
+ wake_up_process(p);
}
- if (!tmp->next) {
- printk("wait_queue is bad (eip = %p)\n",
- __builtin_return_address(0));
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
- printk(" tmp = %p\n",tmp);
- break;
- }
- tmp = tmp->next;
- } while (tmp != *q);
+ if (!next)
+ goto bad;
+ }
+ return;
+bad:
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
}
void wake_up_interruptible(struct wait_queue **q)
{
- struct wait_queue *tmp;
- struct task_struct * p;
+ struct wait_queue *next;
+ struct wait_queue *head;
- if (!q || !(tmp = *q))
+ if (!q || !(next = *q))
return;
- do {
- if ((p = tmp->task) != NULL) {
- if (p->state == TASK_INTERRUPTIBLE) {
- p->state = TASK_RUNNING;
- if (p->counter > current->counter + 3)
- need_resched = 1;
- }
+ head = WAIT_QUEUE_HEAD(q);
+ while (next != head) {
+ struct task_struct *p = next->task;
+ next = next->next;
+ if (p != NULL) {
+ if (p->state == TASK_INTERRUPTIBLE)
+ wake_up_process(p);
}
- if (!tmp->next) {
- printk("wait_queue is bad (eip = %p)\n",
- __builtin_return_address(0));
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
- printk(" tmp = %p\n",tmp);
- break;
- }
- tmp = tmp->next;
- } while (tmp != *q);
+ if (!next)
+ goto bad;
+ }
+ return;
+bad:
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
+}
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waiting" variable is
+ * incremented _while_ the process is sleeping on that
+ * semaphore.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ */
+static inline void normalize_semaphore(struct semaphore *sem)
+{
+ atomic_add(xchg(&sem->waiting,0), &sem->count);
+}
+
+/*
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody. In
+ * most cases "waiting" will be positive, and the normalization
+ * will allow things to continue. However, if somebody has
+ * /just/ done a down(), it may be that count was negative
+ * without waiting being positive (or in the generic case
+ * "count is more negative than waiting is positive"), and
+ * the waiter needs to check this itself (see __down).
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+ normalize_semaphore(sem);
+ wake_up(&sem->wait);
}
void __down(struct semaphore * sem)
{
- struct wait_queue wait = { current, NULL };
+ struct task_struct *tsk = current;
+ struct wait_queue wait = { tsk, NULL };
+
+ /*
+ * The order here is important. We add ourselves to the
+ * wait queues and mark ourselves sleeping _first_. That
+ * way, if a "up()" comes in here, we'll either get
+ * woken up (up happens after the wait queues are set up)
+ * OR we'll have "waiting > 0".
+ */
+ tsk->state = TASK_UNINTERRUPTIBLE;
add_wait_queue(&sem->wait, &wait);
- current->state = TASK_UNINTERRUPTIBLE;
- while (sem->count <= 0) {
- schedule();
- current->state = TASK_UNINTERRUPTIBLE;
+ atomic_inc(&sem->waiting);
+
+ /*
+ * Ok, we're set up. The only race here is really that
+ * an "up()" might have incremented count before we got
+ * here, so we check "count+waiting". If that is larger
+ * than zero, we shouldn't sleep, but re-try the lock.
+ */
+ if (sem->count+sem->waiting <= 0) {
+ /*
+ * If "count+waiting" <= 0, we have to wait
+ * for a up(), which will normalize the count.
+ * Remember, at this point we have decremented
+ * count, and incremented up, so if count is
+ * zero or positive we need to return to re-try
+ * the lock. It _may_ be that both count and
+ * waiting is zero and that it is still locked,
+ * but we still want to re-try the lock in that
+ * case to make count go negative again so that
+ * the optimized "up()" wake_up sequence works.
+ */
+ do {
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ } while (sem->count < 0);
}
- current->state = TASK_RUNNING;
+ tsk->state = TASK_RUNNING;
remove_wait_queue(&sem->wait, &wait);
+ normalize_semaphore(sem);
}
static inline void __sleep_on(struct wait_queue **p, int state)
@@ -288,11 +587,13 @@ static inline void __sleep_on(struct wait_queue **p, int state)
if (current == task[0])
panic("task[0] trying to sleep");
current->state = state;
- add_wait_queue(p, &wait);
save_flags(flags);
+ cli();
+ __add_wait_queue(p, &wait);
sti();
schedule();
- remove_wait_queue(p, &wait);
+ cli();
+ __remove_wait_queue(p, &wait);
restore_flags(flags);
}
@@ -311,7 +612,7 @@ void sleep_on(struct wait_queue **p)
* and the sorting routine counts on this..
*/
static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
-#define SLOW_BUT_DEBUGGING_TIMERS 1
+#define SLOW_BUT_DEBUGGING_TIMERS 0
void add_timer(struct timer_list * timer)
{
@@ -326,7 +627,6 @@ void add_timer(struct timer_list * timer)
}
#endif
p = &timer_head;
- timer->expires += jiffies;
save_flags(flags);
cli();
do {
@@ -341,42 +641,66 @@ void add_timer(struct timer_list * timer)
int del_timer(struct timer_list * timer)
{
- unsigned long flags;
-#if SLOW_BUT_DEBUGGING_TIMERS
- struct timer_list * p;
-
- p = &timer_head;
- save_flags(flags);
- cli();
- while ((p = p->next) != &timer_head) {
- if (p == timer) {
- timer->next->prev = timer->prev;
- timer->prev->next = timer->next;
+ int ret = 0;
+ if (timer->next) {
+ unsigned long flags;
+ struct timer_list * next;
+ save_flags(flags);
+ cli();
+ if ((next = timer->next) != NULL) {
+ (next->prev = timer->prev)->next = next;
timer->next = timer->prev = NULL;
- restore_flags(flags);
- timer->expires -= jiffies;
- return 1;
+ ret = 1;
}
+ restore_flags(flags);
}
- if (timer->next || timer->prev)
- printk("del_timer() called from %p with timer not initialized\n",
- __builtin_return_address(0));
- restore_flags(flags);
- return 0;
-#else
- save_flags(flags);
+ return ret;
+}
+
+static inline void run_timer_list(void)
+{
+ struct timer_list * timer;
+
cli();
- if (timer->next) {
+ while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
+ void (*fn)(unsigned long) = timer->function;
+ unsigned long data = timer->data;
timer->next->prev = timer->prev;
timer->prev->next = timer->next;
timer->next = timer->prev = NULL;
- restore_flags(flags);
- timer->expires -= jiffies;
- return 1;
+ sti();
+ fn(data);
+ cli();
}
- restore_flags(flags);
- return 0;
-#endif
+ sti();
+}
+
+static inline void run_old_timers(void)
+{
+ struct timer_struct *tp;
+ unsigned long mask;
+
+ for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
+ if (mask > timer_active)
+ break;
+ if (!(mask & timer_active))
+ continue;
+ if (tp->expires > jiffies)
+ continue;
+ timer_active &= ~mask;
+ tp->fn();
+ sti();
+ }
+}
+
+void tqueue_bh(void)
+{
+ run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+ run_task_queue(&tq_immediate);
}
unsigned long timer_active = 0;
@@ -403,21 +727,25 @@ static unsigned long count_active_tasks(void)
(*p)->state == TASK_UNINTERRUPTIBLE ||
(*p)->state == TASK_SWAPPING))
nr += FIXED_1;
+#ifdef __SMP__
+ nr-=(smp_num_cpus-1)*FIXED_1;
+#endif
return nr;
}
-static inline void calc_load(void)
+static inline void calc_load(unsigned long ticks)
{
unsigned long active_tasks; /* fixed-point */
static int count = LOAD_FREQ;
- if (count-- > 0)
- return;
- count = LOAD_FREQ;
- active_tasks = count_active_tasks();
- CALC_LOAD(avenrun[0], EXP_1, active_tasks);
- CALC_LOAD(avenrun[1], EXP_5, active_tasks);
- CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+ count -= ticks;
+ if (count < 0) {
+ count += LOAD_FREQ;
+ active_tasks = count_active_tasks();
+ CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+ }
}
/*
@@ -428,138 +756,138 @@ static inline void calc_load(void)
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
- * These were ported to Linux by Philip Gladstone.
*/
static void second_overflow(void)
{
- long ltemp;
-
- /* Bump the maxerror field */
- time_maxerror = (0x70000000-time_maxerror < time_tolerance) ?
- 0x70000000 : (time_maxerror + time_tolerance);
-
- /* Run the PLL */
- if (time_offset < 0) {
- ltemp = (-(time_offset+1) >> (SHIFT_KG + time_constant)) + 1;
- time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
- time_offset += (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
- time_adj = - time_adj;
- } else if (time_offset > 0) {
- ltemp = ((time_offset-1) >> (SHIFT_KG + time_constant)) + 1;
- time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
- time_offset -= (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
- } else {
- time_adj = 0;
- }
-
- time_adj += (time_freq >> (SHIFT_KF + SHIFT_HZ - SHIFT_SCALE))
- + FINETUNE;
-
- /* Handle the leap second stuff */
- switch (time_status) {
- case TIME_INS:
- /* ugly divide should be replaced */
- if (xtime.tv_sec % 86400 == 0) {
- xtime.tv_sec--; /* !! */
- time_status = TIME_OOP;
- printk("Clock: inserting leap second 23:59:60 UTC\n");
- }
- break;
-
- case TIME_DEL:
- /* ugly divide should be replaced */
- if (xtime.tv_sec % 86400 == 86399) {
- xtime.tv_sec++;
- time_status = TIME_OK;
- printk("Clock: deleting leap second 23:59:59 UTC\n");
- }
- break;
-
- case TIME_OOP:
- time_status = TIME_OK;
- break;
+ long ltemp;
+
+ /* Bump the maxerror field */
+ time_maxerror += time_tolerance >> SHIFT_USEC;
+ if ( time_maxerror > MAXPHASE )
+ time_maxerror = MAXPHASE;
+
+ /*
+ * Leap second processing. If in leap-insert state at
+ * the end of the day, the system clock is set back one
+ * second; if in leap-delete state, the system clock is
+ * set ahead one second. The microtime() routine or
+ * external clock driver will insure that reported time
+ * is always monotonic. The ugly divides should be
+ * replaced.
+ */
+ switch (time_state) {
+
+ case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
+ break;
+
+ case TIME_INS:
+ if (xtime.tv_sec % 86400 == 0) {
+ xtime.tv_sec--;
+ time_state = TIME_OOP;
+ printk("Clock: inserting leap second 23:59:60 UTC\n");
}
-}
-
-/*
- * disregard lost ticks for now.. We don't care enough.
- */
-static void timer_bh(void * unused)
-{
- unsigned long mask;
- struct timer_struct *tp;
- struct timer_list * timer;
+ break;
- cli();
- while ((timer = timer_head.next) != &timer_head && timer->expires < jiffies) {
- void (*fn)(unsigned long) = timer->function;
- unsigned long data = timer->data;
- timer->next->prev = timer->prev;
- timer->prev->next = timer->next;
- timer->next = timer->prev = NULL;
- sti();
- fn(data);
- cli();
- }
- sti();
-
- for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
- if (mask > timer_active)
- break;
- if (!(mask & timer_active))
- continue;
- if (tp->expires > jiffies)
- continue;
- timer_active &= ~mask;
- tp->fn();
- sti();
+ case TIME_DEL:
+ if ((xtime.tv_sec + 1) % 86400 == 0) {
+ xtime.tv_sec++;
+ time_state = TIME_WAIT;
+ printk("Clock: deleting leap second 23:59:59 UTC\n");
}
+ break;
+
+ case TIME_OOP:
+ time_state = TIME_WAIT;
+ break;
+
+ case TIME_WAIT:
+ if (!(time_status & (STA_INS | STA_DEL)))
+ time_state = TIME_OK;
+ }
+
+ /*
+ * Compute the phase adjustment for the next second. In
+ * PLL mode, the offset is reduced by a fixed factor
+ * times the time constant. In FLL mode the offset is
+ * used directly. In either mode, the maximum phase
+ * adjustment for each second is clamped so as to spread
+ * the adjustment over not more than the number of
+ * seconds between updates.
+ */
+ if (time_offset < 0) {
+ ltemp = -time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset += ltemp;
+ time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ } else {
+ ltemp = time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset -= ltemp;
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ }
+
+ /*
+ * Compute the frequency estimate and additional phase
+ * adjustment due to frequency error for the next
+ * second. When the PPS signal is engaged, gnaw on the
+ * watchdog counter and update the frequency computed by
+ * the pll and the PPS signal.
+ */
+ pps_valid++;
+ if (pps_valid == PPS_VALID) {
+ pps_jitter = MAXTIME;
+ pps_stabil = MAXFREQ;
+ time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+ STA_PPSWANDER | STA_PPSERROR);
+ }
+ ltemp = time_freq + pps_freq;
+ if (ltemp < 0)
+ time_adj -= -ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+ else
+ time_adj += ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+ /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
+ if (time_adj < 0)
+ time_adj -= -time_adj >> 2;
+ else
+ time_adj += time_adj >> 2;
+#endif
}
-void tqueue_bh(void * unused)
-{
- run_task_queue(&tq_timer);
-}
-
-void immediate_bh(void * unused)
-{
- run_task_queue(&tq_immediate);
-}
-
-/*
- * The int argument is really a (struct pt_regs *), in case the
- * interrupt wants to know from where it was called. The timer
- * irq uses this to decide if it should update the user or system
- * times.
- */
-static void do_timer(int irq, struct pt_regs * regs)
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
{
- unsigned long mask;
- struct timer_struct *tp;
- /* last time the cmos clock got updated */
- static long last_rtc_update=0;
- extern int set_rtc_mmss(unsigned long);
-
- long ltemp, psecs;
-
- /* Advance the phase, once it gets to one microsecond, then
+ /*
+ * Advance the phase, once it gets to one microsecond, then
* advance the tick more.
*/
time_phase += time_adj;
- if (time_phase < -FINEUSEC) {
- ltemp = -time_phase >> SHIFT_SCALE;
+ if (time_phase <= -FINEUSEC) {
+ long ltemp = -time_phase >> SHIFT_SCALE;
time_phase += ltemp << SHIFT_SCALE;
xtime.tv_usec += tick + time_adjust_step - ltemp;
}
- else if (time_phase > FINEUSEC) {
- ltemp = time_phase >> SHIFT_SCALE;
+ else if (time_phase >= FINEUSEC) {
+ long ltemp = time_phase >> SHIFT_SCALE;
time_phase -= ltemp << SHIFT_SCALE;
xtime.tv_usec += tick + time_adjust_step + ltemp;
} else
xtime.tv_usec += tick + time_adjust_step;
- if (time_adjust)
- {
+ if (time_adjust) {
/* We are doing an adjtime thing.
*
* Modify the value of the tick for next time.
@@ -570,123 +898,240 @@ static void do_timer(int irq, struct pt_regs * regs)
* in the range -tickadj .. +tickadj
*/
if (time_adjust > tickadj)
- time_adjust_step = tickadj;
+ time_adjust_step = tickadj;
else if (time_adjust < -tickadj)
- time_adjust_step = -tickadj;
+ time_adjust_step = -tickadj;
else
- time_adjust_step = time_adjust;
+ time_adjust_step = time_adjust;
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
}
else
time_adjust_step = 0;
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+ do {
+ ticks--;
+ update_wall_time_one_tick();
+ } while (ticks);
if (xtime.tv_usec >= 1000000) {
xtime.tv_usec -= 1000000;
xtime.tv_sec++;
second_overflow();
}
+}
- /* If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- */
- if (time_status != TIME_BAD && xtime.tv_sec > last_rtc_update + 660 &&
- xtime.tv_usec > 500000 - (tick >> 1) &&
- xtime.tv_usec < 500000 + (tick >> 1))
- if (set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
- else
- last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
-
- jiffies++;
- calc_load();
- if (user_mode(regs)) {
- current->utime++;
- if (current != task[0]) {
- if (current->priority < 15)
- kstat.cpu_nice++;
- else
- kstat.cpu_user++;
+static inline void do_process_times(struct task_struct *p,
+ unsigned long user, unsigned long system)
+{
+ long psecs;
+
+ p->utime += user;
+ p->stime += system;
+
+ psecs = (p->stime + p->utime) / HZ;
+ if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
+ /* Send SIGXCPU every second.. */
+ if (psecs * HZ == p->stime + p->utime)
+ send_sig(SIGXCPU, p, 1);
+ /* and SIGKILL when we go over max.. */
+ if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
+ send_sig(SIGKILL, p, 1);
+ }
+}
+
+static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
+{
+ unsigned long it_virt = p->it_virt_value;
+
+ if (it_virt) {
+ if (it_virt <= ticks) {
+ it_virt = ticks + p->it_virt_incr;
+ send_sig(SIGVTALRM, p, 1);
}
- /* Update ITIMER_VIRT for current task if not in a system call */
- if (current->it_virt_value && !(--current->it_virt_value)) {
- current->it_virt_value = current->it_virt_incr;
- send_sig(SIGVTALRM,current,1);
+ p->it_virt_value = it_virt - ticks;
+ }
+}
+
+static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
+{
+ unsigned long it_prof = p->it_prof_value;
+
+ if (it_prof) {
+ if (it_prof <= ticks) {
+ it_prof = ticks + p->it_prof_incr;
+ send_sig(SIGPROF, p, 1);
}
- } else {
- current->stime++;
- if(current != task[0])
- kstat.cpu_system++;
-#ifdef CONFIG_PROFILE
- if (prof_buffer && current != task[0]) {
- extern int _stext;
- unsigned long eip = regs->eip - (unsigned long) &_stext;
- eip >>= CONFIG_PROFILE_SHIFT;
- if (eip < prof_len)
- prof_buffer[eip]++;
+ p->it_prof_value = it_prof - ticks;
+ }
+}
+
+static __inline__ void update_one_process(struct task_struct *p,
+ unsigned long ticks, unsigned long user, unsigned long system)
+{
+ do_process_times(p, user, system);
+ do_it_virt(p, user);
+ do_it_prof(p, ticks);
+}
+
+static void update_process_times(unsigned long ticks, unsigned long system)
+{
+#ifndef __SMP__
+ struct task_struct * p = current;
+ unsigned long user = ticks - system;
+ if (p->pid) {
+ p->counter -= ticks;
+ if (p->counter < 0) {
+ p->counter = 0;
+ need_resched = 1;
}
-#endif
+ if (p->priority < DEF_PRIORITY)
+ kstat.cpu_nice += user;
+ else
+ kstat.cpu_user += user;
+ kstat.cpu_system += system;
}
- /*
- * check the cpu time limit on the process.
- */
- if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
- (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
- send_sig(SIGKILL, current, 1);
- if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
- (((current->stime + current->utime) % HZ) == 0)) {
- psecs = (current->stime + current->utime) / HZ;
- /* send when equal */
- if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
- send_sig(SIGXCPU, current, 1);
- /* and every five seconds thereafter. */
- else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
- ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
- send_sig(SIGXCPU, current, 1);
- }
-
- if (current != task[0] && 0 > --current->counter) {
- current->counter = 0;
- need_resched = 1;
+ update_one_process(p, ticks, user, system);
+#else
+ int cpu,j;
+ cpu = smp_processor_id();
+ for (j=0;j<smp_num_cpus;j++)
+ {
+ int i = cpu_logical_map[j];
+ struct task_struct *p;
+
+#ifdef __SMP_PROF__
+ if (test_bit(i,&smp_idle_map))
+ smp_idle_count[i]++;
+#endif
+ p = current_set[i];
+ /*
+ * Do we have a real process?
+ */
+ if (p->pid) {
+ /* assume user-mode process */
+ unsigned long utime = ticks;
+ unsigned long stime = 0;
+ if (cpu == i) {
+ utime = ticks-system;
+ stime = system;
+ } else if (smp_proc_in_lock[j]) {
+ utime = 0;
+ stime = ticks;
+ }
+ update_one_process(p, ticks, utime, stime);
+
+ if (p->priority < DEF_PRIORITY)
+ kstat.cpu_nice += utime;
+ else
+ kstat.cpu_user += utime;
+ kstat.cpu_system += stime;
+
+ p->counter -= ticks;
+ if (p->counter >= 0)
+ continue;
+ p->counter = 0;
+ } else {
+ /*
+ * Idle processor found, do we have anything
+ * we could run?
+ */
+ if (!(0x7fffffff & smp_process_available))
+ continue;
+ }
+ /* Ok, we should reschedule, do the magic */
+ if (i==cpu)
+ need_resched = 1;
+ else
+ smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
}
- /* Update ITIMER_PROF for the current task */
- if (current->it_prof_value && !(--current->it_prof_value)) {
- current->it_prof_value = current->it_prof_incr;
- send_sig(SIGPROF,current,1);
+#endif
+}
+
+static unsigned long lost_ticks = 0;
+static unsigned long lost_ticks_system = 0;
+
+static inline void update_times(void)
+{
+ unsigned long ticks;
+
+ ticks = xchg(&lost_ticks, 0);
+
+ if (ticks) {
+ unsigned long system;
+
+ system = xchg(&lost_ticks_system, 0);
+ calc_load(ticks);
+ update_wall_time(ticks);
+ update_process_times(ticks, system);
}
- for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
- if (mask > timer_active)
- break;
- if (!(mask & timer_active))
- continue;
- if (tp->expires > jiffies)
- continue;
- mark_bh(TIMER_BH);
+}
+
+static void timer_bh(void)
+{
+ update_times();
+ run_old_timers();
+ run_timer_list();
+}
+
+void do_timer(struct pt_regs * regs)
+{
+ (*(unsigned long *)&jiffies)++;
+ lost_ticks++;
+ mark_bh(TIMER_BH);
+ if (!user_mode(regs)) {
+ lost_ticks_system++;
+ if (prof_buffer && current->pid) {
+ extern int _stext;
+ unsigned long ip = instruction_pointer(regs);
+ ip -= (unsigned long) &_stext;
+ ip >>= prof_shift;
+ if (ip < prof_len)
+ prof_buffer[ip]++;
+ }
}
- cli();
- itimer_ticks++;
- if (itimer_ticks > itimer_next)
- need_resched = 1;
- if (timer_head.next->expires < jiffies)
- mark_bh(TIMER_BH);
- if (tq_timer != &tq_last)
+ if (tq_timer)
mark_bh(TQUEUE_BH);
- sti();
}
-asmlinkage int sys_alarm(long seconds)
+#ifndef __alpha__
+
+/*
+ * For backwards compatibility? This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+asmlinkage unsigned int sys_alarm(unsigned int seconds)
{
struct itimerval it_new, it_old;
+ unsigned int oldalarm;
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
it_new.it_value.tv_sec = seconds;
it_new.it_value.tv_usec = 0;
_setitimer(ITIMER_REAL, &it_new, &it_old);
- return(it_old.it_value.tv_sec + (it_old.it_value.tv_usec / 1000000));
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return 0 if we have an alarm pending.. */
+ /* And we'd better return too much than too little anyway */
+ if (it_old.it_value.tv_usec)
+ oldalarm++;
+ return oldalarm;
}
+/*
+ * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
+ * should be moved into arch/i386 instead?
+ */
asmlinkage int sys_getpid(void)
{
return current->pid;
@@ -717,47 +1162,286 @@ asmlinkage int sys_getegid(void)
return current->egid;
}
-asmlinkage int sys_nice(long increment)
+/*
+ * This has been replaced by sys_setpriority. Maybe it should be
+ * moved into the arch dependent tree for those ports that require
+ * it for backward compatibility?
+ */
+asmlinkage int sys_nice(int increment)
{
- int newprio;
-
- if (increment < 0 && !suser())
- return -EPERM;
+ unsigned long newprio;
+ int increase = 0;
+
+ newprio = increment;
+ if (increment < 0) {
+ if (!suser())
+ return -EPERM;
+ newprio = -increment;
+ increase = 1;
+ }
+ if (newprio > 40)
+ newprio = 40;
+ /*
+ * do a "normalization" of the priority (traditionally
+ * unix nice values are -20..20, linux doesn't really
+ * use that kind of thing, but uses the length of the
+ * timeslice instead (default 150 msec). The rounding is
+ * why we want to avoid negative values.
+ */
+ newprio = (newprio * DEF_PRIORITY + 10) / 20;
+ increment = newprio;
+ if (increase)
+ increment = -increment;
newprio = current->priority - increment;
- if (newprio < 1)
+ if ((signed) newprio < 1)
newprio = 1;
- if (newprio > 35)
- newprio = 35;
+ if (newprio > DEF_PRIORITY*2)
+ newprio = DEF_PRIORITY*2;
current->priority = newprio;
return 0;
}
+#endif
+
+static struct task_struct *find_process_by_pid(pid_t pid)
+{
+ struct task_struct *p;
+
+ p = current;
+ if (pid) {
+ for_each_task(p) {
+ if (p->pid == pid)
+ goto found;
+ }
+ p = NULL;
+ }
+found:
+ return p;
+}
+
+static int setscheduler(pid_t pid, int policy,
+ struct sched_param *param)
+{
+ struct sched_param lp;
+ struct task_struct *p;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+
+ if (copy_from_user(&lp, param, sizeof(struct sched_param)))
+ return -EFAULT;
+
+ p = find_process_by_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ if (policy < 0)
+ policy = p->policy;
+ else if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ policy != SCHED_OTHER)
+ return -EINVAL;
+
+ /*
+ * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
+ * priority for SCHED_OTHER is 0.
+ */
+ if (lp.sched_priority < 0 || lp.sched_priority > 99)
+ return -EINVAL;
+ if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
+ return -EINVAL;
+
+ if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
+ return -EPERM;
+ if ((current->euid != p->euid) && (current->euid != p->uid) &&
+ !suser())
+ return -EPERM;
+
+ p->policy = policy;
+ p->rt_priority = lp.sched_priority;
+ cli();
+ if (p->next_run)
+ move_last_runqueue(p);
+ sti();
+ schedule();
+
+ return 0;
+}
+
+asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
+ struct sched_param *param)
+{
+ return setscheduler(pid, policy, param);
+}
+
+asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
+{
+ return setscheduler(pid, -1, param);
+}
+
+asmlinkage int sys_sched_getscheduler(pid_t pid)
+{
+ struct task_struct *p;
+
+ if (pid < 0)
+ return -EINVAL;
+
+ p = find_process_by_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ return p->policy;
+}
+
+asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
+{
+ struct task_struct *p;
+ struct sched_param lp;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+
+ p = find_process_by_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ lp.sched_priority = p->rt_priority;
+ return copy_to_user(param, &lp, sizeof(struct sched_param)) ? -EFAULT : 0;
+}
+
+asmlinkage int sys_sched_yield(void)
+{
+ cli();
+ move_last_runqueue(current);
+ sti();
+ return 0;
+}
+
+asmlinkage int sys_sched_get_priority_max(int policy)
+{
+ switch (policy) {
+ case SCHED_FIFO:
+ case SCHED_RR:
+ return 99;
+ case SCHED_OTHER:
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+asmlinkage int sys_sched_get_priority_min(int policy)
+{
+ switch (policy) {
+ case SCHED_FIFO:
+ case SCHED_RR:
+ return 1;
+ case SCHED_OTHER:
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
+{
+ struct timespec t;
+
+ t.tv_sec = 0;
+ t.tv_nsec = 0; /* <-- Linus, please fill correct value in here */
+ return -ENOSYS; /* and then delete this line. Thanks! */
+ return copy_to_user(interval, &t, sizeof(struct timespec)) ? -EFAULT : 0;
+}
+
+/*
+ * change timeval to jiffies, trying to avoid the
+ * most obvious overflows..
+ */
+static unsigned long timespectojiffies(struct timespec *value)
+{
+ unsigned long sec = (unsigned) value->tv_sec;
+ long nsec = value->tv_nsec;
+
+ if (sec > (LONG_MAX / HZ))
+ return LONG_MAX;
+ nsec += 1000000000L / HZ - 1;
+ nsec /= 1000000000L / HZ;
+ return HZ * sec + nsec;
+}
+
+static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
+{
+ value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
+ value->tv_sec = jiffies / HZ;
+ return;
+}
+
+asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+{
+ int error;
+ struct timespec t;
+ unsigned long expire;
+
+ error = copy_from_user(&t, rqtp, sizeof(struct timespec));
+ if (error)
+ return -EFAULT;
+
+ if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
+ return -EINVAL;
+
+ if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
+ current->policy != SCHED_OTHER) {
+ /*
+ * Short delay requests up to 2 ms will be handled with
+ * high precision by a busy wait for all real-time processes.
+ */
+ udelay((t.tv_nsec + 999) / 1000);
+ return 0;
+ }
+
+ expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
+ current->timeout = expire;
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+
+ if (expire > jiffies) {
+ if (rmtp) {
+ jiffiestotimespec(expire - jiffies -
+ (expire > jiffies + 1), &t);
+ if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
+ return -EFAULT;
+ }
+ return -EINTR;
+ }
+
+ return 0;
+}
+
static void show_task(int nr,struct task_struct * p)
{
unsigned long free;
- static char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
+ static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
printk(stat_nam[p->state]);
else
printk(" ");
-#ifdef __i386__
+#if ((~0UL) == 0xffffffff)
if (p == current)
printk(" current ");
else
- printk(" %08lX ", ((unsigned long *)p->tss.esp)[3]);
-#elif defined (__mips__)
+ printk(" %08lX ", thread_saved_pc(&p->tss));
+#else
if (p == current)
- printk(" current ");
+ printk(" current task ");
else
- printk(" ");
+ printk(" %016lx ", thread_saved_pc(&p->tss));
#endif
- for (free = 1; free < 1024 ; free++) {
+ for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
if (((unsigned long *)p->kernel_stack_page)[free])
break;
}
- printk("%5lu %5d %6d ", free << 2, p->pid, p->p_pptr->pid);
+ printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
if (p->p_cptr)
printk("%5d ", p->p_cptr->pid);
else
@@ -776,8 +1460,15 @@ void show_state(void)
{
int i;
- printk(" free sibling\n");
+#if ((~0UL) == 0xffffffff)
+ printk("\n"
+ " free sibling\n");
printk(" task PC stack pid father child younger older\n");
+#else
+ printk("\n"
+ " free sibling\n");
+ printk(" task PC stack pid father child younger older\n");
+#endif
for (i=0 ; i<NR_TASKS ; i++)
if (task[i])
show_task(i,task[i]);
@@ -785,12 +1476,19 @@ void show_state(void)
void sched_init(void)
{
- bh_base[TIMER_BH].routine = timer_bh;
- bh_base[TQUEUE_BH].routine = tqueue_bh;
- bh_base[IMMEDIATE_BH].routine = immediate_bh;
- if (request_irq(TIMER_IRQ, do_timer, 0, "timer") != 0)
- panic("Could not allocate timer IRQ!");
- enable_bh(TIMER_BH);
- enable_bh(TQUEUE_BH);
- enable_bh(IMMEDIATE_BH);
+ /*
+ * We have to do a little magic to get the first
+ * process right in SMP mode.
+ */
+ int cpu=smp_processor_id();
+#ifndef __SMP__
+ current_set[cpu]=&init_task;
+#else
+ init_task.processor=cpu;
+ for(cpu = 0; cpu < NR_CPUS; cpu++)
+ current_set[cpu] = &init_task;
+#endif
+ init_bh(TIMER_BH, timer_bh);
+ init_bh(TQUEUE_BH, tqueue_bh);
+ init_bh(IMMEDIATE_BH, immediate_bh);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index f21d7a2c9..325663bed 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -13,22 +13,29 @@
#include <linux/unistd.h>
#include <linux/mm.h>
-#include <asm/segment.h>
+#include <asm/uaccess.h>
#define _S(nr) (1<<((nr)-1))
#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
+#if !defined(__alpha__) && !defined(__mips__)
+
+/*
+ * This call isn't used by all ports, in particular, the Alpha
+ * uses osf_sigprocmask instead. Maybe it should be moved into
+ * arch-dependent dir?
+ */
asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
{
sigset_t new_set, old_set = current->blocked;
int error;
if (set) {
- error = verify_area(VERIFY_READ, set, sizeof(sigset_t));
+ error = get_user(new_set, set);
if (error)
- return error;
- new_set = get_fs_long((unsigned long *) set) & _BLOCKABLE;
+ return error;
+ new_set &= _BLOCKABLE;
switch (how) {
case SIG_BLOCK:
current->blocked |= new_set;
@@ -44,14 +51,19 @@ asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
}
}
if (oset) {
- error = verify_area(VERIFY_WRITE, oset, sizeof(sigset_t));
+ error = put_user(old_set, oset);
if (error)
- return error;
- put_fs_long(old_set, (unsigned long *) oset);
+ return error;
}
return 0;
}
+#endif
+#ifndef __alpha__
+
+/*
+ * For backwards compatibility? Functionality superseded by sigprocmask.
+ */
asmlinkage int sys_sgetmask(void)
{
return current->blocked;
@@ -65,21 +77,19 @@ asmlinkage int sys_ssetmask(int newmask)
return old;
}
+#endif
+
asmlinkage int sys_sigpending(sigset_t *set)
{
- int error;
- /* fill in "set" with signals pending but blocked. */
- error = verify_area(VERIFY_WRITE, set, 4);
- if (!error)
- put_fs_long(current->blocked & current->signal, (unsigned long *)set);
- return error;
+ return put_user(current->blocked & current->signal,
+ /* Hack */(unsigned long *)set);
}
/*
* POSIX 3.3.1.3:
* "Setting a signal action to SIG_IGN for a signal that is pending
* shall cause the pending signal to be discarded, whether or not
- * it is blocked" (but SIGCHLD is unspecified: linux leaves it alone).
+ * it is blocked."
*
* "Setting a signal action to SIG_DFL for a signal that is pending
* and whose default action is to ignore the signal (for example,
@@ -90,31 +100,39 @@ asmlinkage int sys_sigpending(sigset_t *set)
* isn't actually ignored, but does automatic child reaping, while
* SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
*/
-static void check_pending(int signum)
+static inline void check_pending(int signum)
{
struct sigaction *p;
- p = signum - 1 + current->sigaction;
+ p = signum - 1 + current->sig->action;
if (p->sa_handler == SIG_IGN) {
- if (signum == SIGCHLD)
- return;
- current->signal &= ~_S(signum);
+ k_sigdelset(&current->signal, signum);
return;
}
if (p->sa_handler == SIG_DFL) {
if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
return;
- current->signal &= ~_S(signum);
+ k_sigdelset(&current->signal, signum);
return;
}
}
-asmlinkage unsigned long sys_signal(int signum, void (*handler)(int))
+#if !defined(__alpha__) && !defined(__mips__)
+/*
+ * For backwards compatibility? Functionality superseded by sigaction.
+ */
+asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler)
{
int err;
struct sigaction tmp;
- if (signum<1 || signum>32)
+ /*
+ * HACK: We still cannot handle signals > 32 due to the limited
+ * size of ksigset_t (which will go away).
+ */
+ if (signum > 32)
+ return -EINVAL;
+ if (signum<1 || signum>_NSIG)
return -EINVAL;
if (signum==SIGKILL || signum==SIGSTOP)
return -EINVAL;
@@ -123,37 +141,38 @@ asmlinkage unsigned long sys_signal(int signum, void (*handler)(int))
if (err)
return err;
}
+ memset(&tmp, 0, sizeof(tmp));
tmp.sa_handler = handler;
- tmp.sa_mask = 0;
tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
- tmp.sa_restorer = NULL;
- handler = current->sigaction[signum-1].sa_handler;
- current->sigaction[signum-1] = tmp;
+ handler = current->sig->action[signum-1].sa_handler;
+ current->sig->action[signum-1] = tmp;
check_pending(signum);
return (unsigned long) handler;
}
+#endif /* !defined(__alpha__) && !defined(__mips__) */
asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
struct sigaction * oldaction)
{
struct sigaction new_sa, *p;
- if (signum<1 || signum>32)
+ /*
+ * HACK: We still cannot handle signals > 32 due to the limited
+ * size of ksigset_t (which will go away).
+ */
+ if (signum > 32)
return -EINVAL;
- if (signum==SIGKILL || signum==SIGSTOP)
+ if (signum<1 || signum>_NSIG)
return -EINVAL;
- p = signum - 1 + current->sigaction;
+ p = signum - 1 + current->sig->action;
if (action) {
int err = verify_area(VERIFY_READ, action, sizeof(*action));
if (err)
return err;
- memcpy_fromfs(&new_sa, action, sizeof(struct sigaction));
- if (new_sa.sa_flags & SA_NOMASK)
- new_sa.sa_mask = 0;
- else {
- new_sa.sa_mask |= _S(signum);
- new_sa.sa_mask &= _BLOCKABLE;
- }
+ if (signum==SIGKILL || signum==SIGSTOP)
+ return -EINVAL;
+ if (copy_from_user(&new_sa, action, sizeof(struct sigaction)))
+ return -EFAULT;
if (new_sa.sa_handler != SIG_DFL && new_sa.sa_handler != SIG_IGN) {
err = verify_area(VERIFY_READ, new_sa.sa_handler, 1);
if (err)
@@ -161,10 +180,8 @@ asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
}
}
if (oldaction) {
- int err = verify_area(VERIFY_WRITE, oldaction, sizeof(*oldaction));
- if (err)
- return err;
- memcpy_tofs(oldaction, p, sizeof(struct sigaction));
+ if (copy_to_user(oldaction, p, sizeof(struct sigaction)))
+ return -EFAULT;
}
if (action) {
*p = new_sa;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7d919272b..022b55355 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,32 +21,31 @@
#include <asm/irq.h>
#include <asm/bitops.h>
-#define INCLUDE_INLINE_FUNCS
-#include <linux/tqueue.h>
-
unsigned long intr_count = 0;
+int bh_mask_count[32];
unsigned long bh_active = 0;
unsigned long bh_mask = 0;
-struct bh_struct bh_base[32];
+void (*bh_base[32])(void);
asmlinkage void do_bottom_half(void)
{
unsigned long active;
unsigned long mask, left;
- struct bh_struct *bh;
+ void (**bh)(void);
+ sti();
bh = bh_base;
active = bh_active & bh_mask;
for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
if (mask & active) {
- void (*fn)(void *);
+ void (*fn)(void);
bh_active &= ~mask;
- fn = bh->routine;
+ fn = *bh;
if (!fn)
goto bad_bh;
- fn(bh->data);
+ fn();
}
}
return;
diff --git a/kernel/sys.c b/kernel/sys.c
index 171d2411c..b2cc8f154 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,6 +4,7 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
+#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -17,19 +18,24 @@
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/acct.h>
+#include <linux/tty.h>
+#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
+#include <linux/apm_bios.h>
+#endif
-#include <asm/segment.h>
+#include <asm/uaccess.h>
#include <asm/io.h>
+#include <asm/system.h>
/*
* this indicates whether you can reboot with ctrl-alt-del: the default is yes
*/
-static int C_A_D = 1;
+int C_A_D = 1;
extern void adjust_clock(void);
-#define PZERO 15
-
asmlinkage int sys_ni_syscall(void)
{
return -ENOSYS;
@@ -37,70 +43,101 @@ asmlinkage int sys_ni_syscall(void)
static int proc_sel(struct task_struct *p, int which, int who)
{
- switch (which) {
- case PRIO_PROCESS:
- if (!who && p == current)
- return 1;
- return(p->pid == who);
- case PRIO_PGRP:
- if (!who)
- who = current->pgrp;
- return(p->pgrp == who);
- case PRIO_USER:
- if (!who)
- who = current->uid;
- return(p->uid == who);
+ if(p->pid)
+ {
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who && p == current)
+ return 1;
+ return(p->pid == who);
+ case PRIO_PGRP:
+ if (!who)
+ who = current->pgrp;
+ return(p->pgrp == who);
+ case PRIO_USER:
+ if (!who)
+ who = current->uid;
+ return(p->uid == who);
+ }
}
return 0;
}
asmlinkage int sys_setpriority(int which, int who, int niceval)
{
- struct task_struct **p;
+ struct task_struct *p;
int error = ESRCH;
- int priority;
+ unsigned int priority;
if (which > 2 || which < 0)
return -EINVAL;
- if ((priority = PZERO - niceval) <= 0)
- priority = 1;
+ /* normalize: avoid signed division (rounding problems) */
+ priority = niceval;
+ if (niceval < 0)
+ priority = -niceval;
+ if (priority > 20)
+ priority = 20;
+ priority = (priority * DEF_PRIORITY + 10) / 20 + DEF_PRIORITY;
+
+ if (niceval >= 0) {
+ priority = 2*DEF_PRIORITY - priority;
+ if (!priority)
+ priority = 1;
+ }
- for(p = &LAST_TASK; p > &FIRST_TASK; --p) {
- if (!*p || !proc_sel(*p, which, who))
+ for_each_task(p) {
+ if (!proc_sel(p, which, who))
continue;
- if ((*p)->uid != current->euid &&
- (*p)->uid != current->uid && !suser()) {
+ if (p->uid != current->euid &&
+ p->uid != current->uid && !suser()) {
error = EPERM;
continue;
}
if (error == ESRCH)
error = 0;
- if (priority > (*p)->priority && !suser())
+ if (priority > p->priority && !suser())
error = EACCES;
else
- (*p)->priority = priority;
+ p->priority = priority;
}
return -error;
}
+/*
+ * Ugh. To avoid negative return values, "getpriority()" will
+ * not return the normal nice-value, but a value that has been
+ * offset by 20 (ie it returns 0..40 instead of -20..20)
+ */
asmlinkage int sys_getpriority(int which, int who)
{
- struct task_struct **p;
- int max_prio = 0;
+ struct task_struct *p;
+ long max_prio = -ESRCH;
if (which > 2 || which < 0)
return -EINVAL;
- for(p = &LAST_TASK; p > &FIRST_TASK; --p) {
- if (!*p || !proc_sel(*p, which, who))
+ for_each_task (p) {
+ if (!proc_sel(p, which, who))
continue;
- if ((*p)->priority > max_prio)
- max_prio = (*p)->priority;
+ if (p->priority > max_prio)
+ max_prio = p->priority;
}
- return(max_prio ? max_prio : -ESRCH);
+
+ /* scale the priority from timeslice to 0..40 */
+ if (max_prio > 0)
+ max_prio = (max_prio * 20 + DEF_PRIORITY/2) / DEF_PRIORITY;
+ return max_prio;
}
+#ifndef __alpha__
+
+/*
+ * Why do these exist? Binary compatibility with some other standard?
+ * If so, maybe they should be moved into the appropriate arch
+ * directory.
+ */
+
asmlinkage int sys_profil(void)
{
return -ENOSYS;
@@ -131,7 +168,8 @@ asmlinkage int sys_prof(void)
return -ENOSYS;
}
-extern void hard_reset_now(void);
+#endif
+
extern asmlinkage sys_kill(int, int);
/*
@@ -157,6 +195,9 @@ asmlinkage int sys_reboot(int magic, int magic_too, int flag)
else if (flag == 0xCDEF0123) {
printk(KERN_EMERG "System halted\n");
sys_kill(-1, SIGKILL);
+#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
+ apm_set_power_state(APM_STATE_OFF);
+#endif
do_exit(0);
} else
return -EINVAL;
@@ -173,7 +214,7 @@ void ctrl_alt_del(void)
if (C_A_D)
hard_reset_now();
else
- send_sig(SIGINT,task[1],1);
+ kill_proc(1, SIGINT, 1);
}
@@ -195,6 +236,7 @@ void ctrl_alt_del(void)
asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
{
int old_rgid = current->gid;
+ int old_egid = current->egid;
if (rgid != (gid_t) -1) {
if ((old_rgid == rgid) ||
@@ -209,7 +251,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
(current->egid == egid) ||
(current->sgid == egid) ||
suser())
- current->egid = egid;
+ current->fsgid = current->egid = egid;
else {
current->gid = old_rgid;
return(-EPERM);
@@ -219,6 +261,8 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
(egid != (gid_t) -1 && egid != old_rgid))
current->sgid = current->egid;
current->fsgid = current->egid;
+ if (current->egid != old_egid)
+ current->dumpable = 0;
return 0;
}
@@ -227,19 +271,134 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
*/
asmlinkage int sys_setgid(gid_t gid)
{
+ int old_egid = current->egid;
+
if (suser())
current->gid = current->egid = current->sgid = current->fsgid = gid;
else if ((gid == current->gid) || (gid == current->sgid))
current->egid = current->fsgid = gid;
else
return -EPERM;
+ if (current->egid != old_egid)
+ current->dumpable = 0;
return 0;
}
+
+static char acct_active = 0;
+static struct file acct_file;
+
+int acct_process(long exitcode)
+{
+ struct acct ac;
+ unsigned short fs;
+
+ if (acct_active) {
+ strncpy(ac.ac_comm, current->comm, ACCT_COMM);
+ ac.ac_comm[ACCT_COMM-1] = '\0';
+ ac.ac_utime = current->utime;
+ ac.ac_stime = current->stime;
+ ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
+ ac.ac_etime = CURRENT_TIME - ac.ac_btime;
+ ac.ac_uid = current->uid;
+ ac.ac_gid = current->gid;
+ ac.ac_tty = (current)->tty == NULL ? -1 :
+ kdev_t_to_nr(current->tty->device);
+ ac.ac_flag = 0;
+ if (current->flags & PF_FORKNOEXEC)
+ ac.ac_flag |= AFORK;
+ if (current->flags & PF_SUPERPRIV)
+ ac.ac_flag |= ASU;
+ if (current->flags & PF_DUMPCORE)
+ ac.ac_flag |= ACORE;
+ if (current->flags & PF_SIGNALED)
+ ac.ac_flag |= AXSIG;
+ ac.ac_minflt = current->min_flt;
+ ac.ac_majflt = current->maj_flt;
+ ac.ac_exitcode = exitcode;
+
+ /* Kernel segment override */
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+
+ acct_file.f_op->write(acct_file.f_inode, &acct_file,
+ (char *)&ac, sizeof(struct acct));
+
+ set_fs(fs);
+ }
+ return 0;
+}
+
+asmlinkage int sys_acct(const char *name)
+{
+ struct inode *inode = (struct inode *)0;
+ char *tmp;
+ int error;
+
+ if (!suser())
+ return -EPERM;
+
+ if (name == (char *)0) {
+ if (acct_active) {
+ if (acct_file.f_op->release)
+ acct_file.f_op->release(acct_file.f_inode, &acct_file);
+
+ if (acct_file.f_inode != (struct inode *) 0)
+ iput(acct_file.f_inode);
+
+ acct_active = 0;
+ }
+ return 0;
+ } else {
+ if (!acct_active) {
+
+ if ((error = getname(name, &tmp)) != 0)
+ return (error);
+
+ error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
+ putname(tmp);
+
+ if (error)
+ return (error);
+
+ if (!S_ISREG(inode->i_mode)) {
+ iput(inode);
+ return -EACCES;
+ }
+
+ if (!inode->i_op || !inode->i_op->default_file_ops ||
+ !inode->i_op->default_file_ops->write) {
+ iput(inode);
+ return -EIO;
+ }
+
+ acct_file.f_mode = 3;
+ acct_file.f_flags = 0;
+ acct_file.f_count = 1;
+ acct_file.f_inode = inode;
+ acct_file.f_pos = inode->i_size;
+ acct_file.f_reada = 0;
+ acct_file.f_op = inode->i_op->default_file_ops;
+
+ if (acct_file.f_op->open)
+ if (acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
+ iput(inode);
+ return -EIO;
+ }
+
+ acct_active = 1;
+ return 0;
+ } else
+ return -EBUSY;
+ }
+}
+
+#ifndef __alpha__
-asmlinkage int sys_acct(void)
-{
- return -ENOSYS;
-}
+/*
+ * Why do these exist? Binary compatibility with some other standard?
+ * If so, maybe they should be moved into the appropriate arch
+ * directory.
+ */
asmlinkage int sys_phys(void)
{
@@ -266,6 +425,8 @@ asmlinkage int sys_old_syscall(void)
return -ENOSYS;
}
+#endif
+
/*
* Unprivileged users may change the real uid to the effective uid
* or vice versa. (BSD-style)
@@ -284,6 +445,7 @@ asmlinkage int sys_old_syscall(void)
asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
{
int old_ruid = current->uid;
+ int old_euid = current->euid;
if (ruid != (uid_t) -1) {
if ((old_ruid == ruid) ||
@@ -298,7 +460,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
(current->euid == euid) ||
(current->suid == euid) ||
suser())
- current->euid = euid;
+ current->fsuid = current->euid = euid;
else {
current->uid = old_ruid;
return(-EPERM);
@@ -308,6 +470,8 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
(euid != (uid_t) -1 && euid != old_ruid))
current->suid = current->euid;
current->fsuid = current->euid;
+ if (current->euid != old_euid)
+ current->dumpable = 0;
return 0;
}
@@ -324,15 +488,61 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
*/
asmlinkage int sys_setuid(uid_t uid)
{
+ int old_euid = current->euid;
+
if (suser())
current->uid = current->euid = current->suid = current->fsuid = uid;
else if ((uid == current->uid) || (uid == current->suid))
current->fsuid = current->euid = uid;
else
return -EPERM;
+ if (current->euid != old_euid)
+ current->dumpable = 0;
return(0);
}
+
+/*
+ * This function implementes a generic ability to update ruid, euid,
+ * and suid. This allows you to implement the 4.4 compatible seteuid().
+ */
+asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+{
+ uid_t old_ruid, old_euid, old_suid;
+
+ old_ruid = current->uid;
+ old_euid = current->euid;
+ old_suid = current->suid;
+
+ if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
+ (ruid != current->euid) && (ruid != current->suid))
+ return -EPERM;
+ if ((euid != (uid_t) -1) && (euid != current->uid) &&
+ (euid != current->euid) && (euid != current->suid))
+ return -EPERM;
+ if ((suid != (uid_t) -1) && (suid != current->uid) &&
+ (suid != current->euid) && (suid != current->suid))
+ return -EPERM;
+ if (ruid != (uid_t) -1)
+ current->uid = ruid;
+ if (euid != (uid_t) -1)
+ current->euid = euid;
+ if (suid != (uid_t) -1)
+ current->suid = suid;
+ return 0;
+}
+
+asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
+{
+ int retval;
+
+ if (!(retval = put_user(current->uid, ruid)) &&
+ !(retval = put_user(current->euid, euid)))
+ retval = put_user(current->suid, suid);
+ return retval;
+}
+
+
/*
* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
* is used for "access()" and for the NFS daemon (letting nfsd stay at
@@ -346,6 +556,8 @@ asmlinkage int sys_setfsuid(uid_t uid)
if (uid == current->uid || uid == current->euid ||
uid == current->suid || uid == current->fsuid || suser())
current->fsuid = uid;
+ if (current->fsuid != old_fsuid)
+ current->dumpable = 0;
return old_fsuid;
}
@@ -359,95 +571,31 @@ asmlinkage int sys_setfsgid(gid_t gid)
if (gid == current->gid || gid == current->egid ||
gid == current->sgid || gid == current->fsgid || suser())
current->fsgid = gid;
+ if (current->fsgid != old_fsgid)
+ current->dumpable = 0;
return old_fsgid;
}
-asmlinkage int sys_times(struct tms * tbuf)
+asmlinkage long sys_times(struct tms * tbuf)
{
+ int error;
if (tbuf) {
- int error = verify_area(VERIFY_WRITE,tbuf,sizeof *tbuf);
+ error = put_user(current->utime,&tbuf->tms_utime);
+ if (!error)
+ error = put_user(current->stime,&tbuf->tms_stime);
+ if (!error)
+ error = put_user(current->cutime,&tbuf->tms_cutime);
+ if (!error)
+ error = put_user(current->cstime,&tbuf->tms_cstime);
if (error)
- return error;
- put_fs_long(current->utime,(unsigned long *)&tbuf->tms_utime);
- put_fs_long(current->stime,(unsigned long *)&tbuf->tms_stime);
- put_fs_long(current->cutime,(unsigned long *)&tbuf->tms_cutime);
- put_fs_long(current->cstime,(unsigned long *)&tbuf->tms_cstime);
+ return error;
}
return jiffies;
}
-asmlinkage unsigned long sys_brk(unsigned long brk)
-{
- int freepages;
- unsigned long rlim;
- unsigned long newbrk, oldbrk;
-
- if (brk < current->mm->end_code)
- return current->mm->brk;
- newbrk = PAGE_ALIGN(brk);
- oldbrk = PAGE_ALIGN(current->mm->brk);
- if (oldbrk == newbrk)
- return current->mm->brk = brk;
-
- /*
- * Always allow shrinking brk
- */
- if (brk <= current->mm->brk) {
- current->mm->brk = brk;
- do_munmap(newbrk, oldbrk-newbrk);
- return brk;
- }
- /*
- * Check against rlimit and stack..
- */
- rlim = current->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (brk - current->mm->end_code > rlim)
- return current->mm->brk;
- /*
- * Check against existing mmap mappings.
- */
- if (find_vma_intersection(current, oldbrk, newbrk+PAGE_SIZE))
- return current->mm->brk;
- /*
- * stupid algorithm to decide if we have enough memory: while
- * simple, it hopefully works in most obvious cases.. Easy to
- * fool it, but this should catch most mistakes.
- */
- freepages = buffermem >> 12;
- freepages += nr_free_pages;
- freepages += nr_swap_pages;
-#if 0
- /*
- * This assumes a PCish memory architecture...
- */
- freepages -= (high_memory - 0x100000) >> 16;
-#else
- freepages -= (high_memory - KSEG0) >> 16;
-#endif
- freepages -= (newbrk-oldbrk) >> 12;
- if (freepages < 0)
- return current->mm->brk;
-#if 0
- freepages += current->mm->rss;
- freepages -= oldbrk >> 12;
- if (freepages < 0)
- return current->mm->brk;
-#endif
- /*
- * Ok, we have probably got enough memory - let it rip.
- */
- current->mm->brk = brk;
- do_mmap(NULL, oldbrk, newbrk-oldbrk,
- PROT_READ|PROT_WRITE|PROT_EXEC,
- MAP_FIXED|MAP_PRIVATE, 0);
- return brk;
-}
-
/*
- * This needs some heave checking ...
- * I just haven't get the stomach for it. I also don't fully
+ * This needs some heavy checking ...
+ * I just haven't the stomach for it. I also don't fully
* understand sessions/pgrp etc. Let somebody who does explain it.
*
* OK, I think I have the protection semantics right.... this is really
@@ -516,10 +664,28 @@ asmlinkage int sys_getpgrp(void)
return current->pgrp;
}
+asmlinkage int sys_getsid(pid_t pid)
+{
+ struct task_struct * p;
+
+ if (!pid)
+ return current->session;
+ for_each_task(p) {
+ if (p->pid == pid)
+ return p->session;
+ }
+ return -ESRCH;
+}
+
asmlinkage int sys_setsid(void)
{
- if (current->leader)
- return -EPERM;
+ struct task_struct * p;
+
+ for_each_task(p) {
+ if (p->pgrp == current->pid)
+ return -EPERM;
+ }
+
current->leader = 1;
current->session = current->pgrp = current->pid;
current->tty = NULL;
@@ -533,88 +699,76 @@ asmlinkage int sys_setsid(void)
asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
{
int i;
- int * groups;
+ if (gidsetsize < 0)
+ return -EINVAL;
+ i = current->ngroups;
if (gidsetsize) {
- i = verify_area(VERIFY_WRITE, grouplist, sizeof(gid_t) * gidsetsize);
- if (i)
- return i;
+ if (i > gidsetsize)
+ return -EINVAL;
+ if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
+ return -EFAULT;
}
- groups = current->groups;
- for (i = 0 ; (i < NGROUPS) && (*groups != NOGROUP) ; i++, groups++) {
- if (!gidsetsize)
- continue;
- if (i >= gidsetsize)
- break;
- put_user(*groups, grouplist);
- grouplist++;
- }
- return(i);
+ return i;
}
asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
{
- int i;
+ int err;
if (!suser())
return -EPERM;
- if (gidsetsize > NGROUPS)
+ if ((unsigned) gidsetsize > NGROUPS)
return -EINVAL;
- for (i = 0; i < gidsetsize; i++, grouplist++) {
- current->groups[i] = get_fs_word((unsigned short *) grouplist);
- }
- if (i < NGROUPS)
- current->groups[i] = NOGROUP;
- return 0;
+ err = copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t));
+ if (err) {
+ gidsetsize = 0;
+ err = -EFAULT;
+ }
+ current->ngroups = gidsetsize;
+ return err;
}
int in_group_p(gid_t grp)
{
- int i;
-
- if (grp == current->fsgid)
- return 1;
-
- for (i = 0; i < NGROUPS; i++) {
- if (current->groups[i] == NOGROUP)
- break;
- if (current->groups[i] == grp)
- return 1;
+ if (grp != current->fsgid) {
+ int i = current->ngroups;
+ if (i) {
+ gid_t *groups = current->groups;
+ do {
+ if (*groups == grp)
+ goto out;
+ groups++;
+ i--;
+ } while (i);
+ }
+ return 0;
}
- return 0;
+out:
+ return 1;
}
asmlinkage int sys_newuname(struct new_utsname * name)
{
- int error;
-
if (!name)
return -EFAULT;
- error = verify_area(VERIFY_WRITE, name, sizeof *name);
- if (!error)
- memcpy_tofs(name,&system_utsname,sizeof *name);
- return error;
+ if (copy_to_user(name,&system_utsname,sizeof *name))
+ return -EFAULT;
+ return 0;
}
+#ifndef __alpha__
+
+/*
+ * Move these to arch dependent dir since they are for
+ * backward compatibility only?
+ */
asmlinkage int sys_uname(struct old_utsname * name)
{
- int error;
- if (!name)
- return -EFAULT;
- error = verify_area(VERIFY_WRITE, name,sizeof *name);
- if (error)
- return error;
- memcpy_tofs(&name->sysname,&system_utsname.sysname,
- sizeof (system_utsname.sysname));
- memcpy_tofs(&name->nodename,&system_utsname.nodename,
- sizeof (system_utsname.nodename));
- memcpy_tofs(&name->release,&system_utsname.release,
- sizeof (system_utsname.release));
- memcpy_tofs(&name->version,&system_utsname.version,
- sizeof (system_utsname.version));
- memcpy_tofs(&name->machine,&system_utsname.machine,
- sizeof (system_utsname.machine));
- return 0;
+ int error = -EFAULT;;
+ if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+ error = 0;
+ return error;
}
asmlinkage int sys_olduname(struct oldold_utsname * name)
@@ -622,22 +776,30 @@ asmlinkage int sys_olduname(struct oldold_utsname * name)
int error;
if (!name)
return -EFAULT;
- error = verify_area(VERIFY_WRITE, name,sizeof *name);
- if (error)
- return error;
- memcpy_tofs(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
- put_fs_byte(0,name->sysname+__OLD_UTS_LEN);
- memcpy_tofs(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
- put_fs_byte(0,name->nodename+__OLD_UTS_LEN);
- memcpy_tofs(&name->release,&system_utsname.release,__OLD_UTS_LEN);
- put_fs_byte(0,name->release+__OLD_UTS_LEN);
- memcpy_tofs(&name->version,&system_utsname.version,__OLD_UTS_LEN);
- put_fs_byte(0,name->version+__OLD_UTS_LEN);
- memcpy_tofs(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
- put_fs_byte(0,name->machine+__OLD_UTS_LEN);
- return 0;
+ error = copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ if (!error)
+ error = put_user(0,name->sysname+__OLD_UTS_LEN);
+ if (!error)
+ error = copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ if (!error)
+ error = put_user(0,name->nodename+__OLD_UTS_LEN);
+ if (!error)
+ error = copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ if (!error)
+ error = put_user(0,name->release+__OLD_UTS_LEN);
+ if (!error)
+ error = copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ if (!error)
+ error = put_user(0,name->version+__OLD_UTS_LEN);
+ if (!error)
+ error = copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ if (!error)
+ error = put_user(0,name->machine+__OLD_UTS_LEN);
+ return error ? -EFAULT : 0;
}
+#endif
+
asmlinkage int sys_sethostname(char *name, int len)
{
int error;
@@ -646,10 +808,9 @@ asmlinkage int sys_sethostname(char *name, int len)
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- error = verify_area(VERIFY_READ, name, len);
+ error = copy_from_user(system_utsname.nodename, name, len);
if (error)
- return error;
- memcpy_fromfs(system_utsname.nodename, name, len);
+ return -EFAULT;
system_utsname.nodename[len] = 0;
return 0;
}
@@ -660,14 +821,10 @@ asmlinkage int sys_gethostname(char *name, int len)
if (len < 0)
return -EINVAL;
- i = verify_area(VERIFY_WRITE, name, len);
- if (i)
- return i;
i = 1+strlen(system_utsname.nodename);
if (i > len)
i = len;
- memcpy_tofs(name, system_utsname.nodename, i);
- return 0;
+ return copy_to_user(name, system_utsname.nodename, i) ? -EFAULT : 0;
}
/*
@@ -676,31 +833,25 @@ asmlinkage int sys_gethostname(char *name, int len)
*/
asmlinkage int sys_setdomainname(char *name, int len)
{
- int i;
+ int error;
if (!suser())
return -EPERM;
- if (len > __NEW_UTS_LEN)
+ if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- for (i=0; i < len; i++) {
- if ((system_utsname.domainname[i] = get_fs_byte(name+i)) == 0)
- return 0;
- }
- system_utsname.domainname[i] = 0;
+ error = copy_from_user(system_utsname.domainname, name, len);
+ if (error)
+ return -EFAULT;
+ system_utsname.domainname[len] = 0;
return 0;
}
asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
{
- int error;
-
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- error = verify_area(VERIFY_WRITE,rlim,sizeof *rlim);
- if (error)
- return error;
- memcpy_tofs(rlim, current->rlim + resource, sizeof(*rlim));
- return 0;
+ return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
+ ? -EFAULT : 0 ;
}
asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
@@ -710,10 +861,9 @@ asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- err = verify_area(VERIFY_READ, rlim, sizeof(*rlim));
+ err = copy_from_user(&new_rlim, rlim, sizeof(*rlim));
if (err)
- return err;
- memcpy_fromfs(&new_rlim, rlim, sizeof(*rlim));
+ return -EFAULT;
old_rlim = current->rlim + resource;
if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
(new_rlim.rlim_max > old_rlim->rlim_max)) &&
@@ -737,12 +887,8 @@ asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
*/
int getrusage(struct task_struct *p, int who, struct rusage *ru)
{
- int error;
struct rusage r;
- error = verify_area(VERIFY_WRITE, ru, sizeof *ru);
- if (error)
- return error;
memset((char *) &r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
@@ -750,28 +896,30 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
r.ru_utime.tv_usec = CT_TO_USECS(p->utime);
r.ru_stime.tv_sec = CT_TO_SECS(p->stime);
r.ru_stime.tv_usec = CT_TO_USECS(p->stime);
- r.ru_minflt = p->mm->min_flt;
- r.ru_majflt = p->mm->maj_flt;
+ r.ru_minflt = p->min_flt;
+ r.ru_majflt = p->maj_flt;
+ r.ru_nswap = p->nswap;
break;
case RUSAGE_CHILDREN:
r.ru_utime.tv_sec = CT_TO_SECS(p->cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->cstime);
- r.ru_minflt = p->mm->cmin_flt;
- r.ru_majflt = p->mm->cmaj_flt;
+ r.ru_minflt = p->cmin_flt;
+ r.ru_majflt = p->cmaj_flt;
+ r.ru_nswap = p->cnswap;
break;
default:
r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime);
- r.ru_minflt = p->mm->min_flt + p->mm->cmin_flt;
- r.ru_majflt = p->mm->maj_flt + p->mm->cmaj_flt;
+ r.ru_minflt = p->min_flt + p->cmin_flt;
+ r.ru_majflt = p->maj_flt + p->cmaj_flt;
+ r.ru_nswap = p->nswap + p->cnswap;
break;
}
- memcpy_tofs(ru, &r, sizeof(r));
- return 0;
+ return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
asmlinkage int sys_getrusage(int who, struct rusage *ru)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
new file mode 100644
index 000000000..3d0fbf49b
--- /dev/null
+++ b/kernel/sysctl.c
@@ -0,0 +1,922 @@
+/*
+ * sysctl.c: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ * Added /proc support, Dec 1995
+ * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
+ * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
+ * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
+ * Dynamic registration fixes, Stephen Tweedie.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/swapctl.h>
+#include <linux/proc_fs.h>
+#include <linux/malloc.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <linux/utsname.h>
+#include <linux/swapctl.h>
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+
+/* External variables not in a header file. */
+extern int panic_timeout;
+
+
+#ifdef CONFIG_ROOT_NFS
+#include <linux/nfs_fs.h>
+#endif
+
+static ctl_table root_table[];
+static struct ctl_table_header root_table_header =
+ {root_table, DNODE_SINGLE(&root_table_header)};
+
+static int parse_table(int *, int, void *, size_t *, void *, size_t,
+ ctl_table *, void **);
+
+static ctl_table kern_table[];
+static ctl_table vm_table[];
+extern ctl_table net_table[];
+
+/* /proc declarations: */
+
+#ifdef CONFIG_PROC_FS
+
+static long proc_readsys(struct inode * inode, struct file * file,
+ char * buf, unsigned long count);
+static long proc_writesys(struct inode * inode, struct file * file,
+ const char * buf, unsigned long count);
+static int proc_sys_permission(struct inode *, int);
+
+struct file_operations proc_sys_file_operations =
+{
+ NULL, /* lseek */
+ proc_readsys, /* read */
+ proc_writesys, /* write */
+ NULL, /* readdir */
+ NULL, /* select */
+ NULL, /* ioctl */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_sys_inode_operations =
+{
+ &proc_sys_file_operations,
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* readpage */
+ NULL, /* writepage */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ proc_sys_permission
+};
+
+extern struct proc_dir_entry proc_sys_root;
+
+static void register_proc_table(ctl_table *, struct proc_dir_entry *);
+static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
+#endif
+
+extern int bdf_prm[], bdflush_min[], bdflush_max[];
+
+static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
+ void *, size_t, void **);
+
+extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
+
+/* The default sysctl tables: */
+
+static ctl_table root_table[] = {
+ {CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
+ {CTL_VM, "vm", NULL, 0, 0555, vm_table},
+ {CTL_NET, "net", NULL, 0, 0555, net_table},
+ {0}
+};
+
+static ctl_table kern_table[] = {
+ {KERN_OSTYPE, "ostype", system_utsname.sysname, 64,
+ 0444, NULL, &proc_dostring, &sysctl_string},
+ {KERN_OSRELEASE, "osrelease", system_utsname.release, 64,
+ 0444, NULL, &proc_dostring, &sysctl_string},
+ {KERN_VERSION, "version", system_utsname.version, 64,
+ 0444, NULL, &proc_dostring, &sysctl_string},
+ {KERN_NODENAME, "hostname", system_utsname.nodename, 64,
+ 0644, NULL, &proc_dostring, &sysctl_string},
+ {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
+ 0644, NULL, &proc_dostring, &sysctl_string},
+ {KERN_NRINODE, "inode-nr", &nr_inodes, 2*sizeof(int),
+ 0444, NULL, &proc_dointvec},
+ {KERN_MAXINODE, "inode-max", &max_inodes, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {KERN_NRFILE, "file-nr", &nr_files, sizeof(int),
+ 0444, NULL, &proc_dointvec},
+ {KERN_MAXFILE, "file-max", &max_files, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {KERN_SECURELVL, "securelevel", &securelevel, sizeof(int),
+ 0444, NULL, &proc_dointvec, (ctl_handler *)&do_securelevel_strategy},
+ {KERN_PANIC, "panic", &panic_timeout, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_BLK_DEV_INITRD
+ {KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+#endif
+#ifdef CONFIG_ROOT_NFS
+ {KERN_NFSRNAME, "nfs-root-name", nfs_root_name, NFS_ROOT_NAME_LEN,
+ 0644, NULL, &proc_dostring, &sysctl_string },
+ {KERN_NFSRNAME, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
+ 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+#ifdef CONFIG_BINFMT_JAVA
+ {KERN_JAVA_INTERPRETER, "java-interpreter", binfmt_java_interpreter,
+ 64, 0644, NULL, &proc_dostring, &sysctl_string },
+ {KERN_JAVA_APPLETVIEWER, "java-appletviewer", binfmt_java_appletviewer,
+ 64, 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+ {0}
+};
+
+static ctl_table vm_table[] = {
+ {VM_SWAPCTL, "swapctl",
+ &swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
+ {VM_KSWAPD, "kswapd",
+ &kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec},
+ {VM_FREEPG, "freepages",
+ &min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
+ {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
+ &proc_dointvec_minmax, &sysctl_intvec, NULL,
+ &bdflush_min, &bdflush_max},
+ {0}
+};
+
+void sysctl_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ register_proc_table(root_table, &proc_sys_root);
+#endif
+}
+
+
+int do_sysctl (int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen)
+{
+ int error;
+ struct ctl_table_header *tmp;
+ void *context;
+
+ if (nlen == 0 || nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+
+ error = verify_area(VERIFY_READ,name,nlen*sizeof(int));
+ if (error) return error;
+ if (oldval) {
+ int old_len;
+ if (!oldlenp)
+ return -EFAULT;
+ error = verify_area(VERIFY_WRITE,oldlenp,sizeof(size_t));
+ if (error) return error;
+ get_user(old_len, oldlenp);
+ error = verify_area(VERIFY_WRITE,oldval,old_len);
+ if (error) return error;
+ }
+ if (newval) {
+ error = verify_area(VERIFY_READ,newval,newlen);
+ if (error) return error;
+ }
+ tmp = &root_table_header;
+ do {
+ context = 0;
+ error = parse_table(name, nlen, oldval, oldlenp,
+ newval, newlen, tmp->ctl_table, &context);
+ if (context)
+ kfree(context);
+ if (error != -ENOTDIR)
+ return error;
+ tmp = tmp->DLIST_NEXT(ctl_entry);
+ } while (tmp != &root_table_header);
+ return -ENOTDIR;
+}
+
+extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+{
+ struct __sysctl_args tmp;
+ int error;
+ error = verify_area(VERIFY_READ, args, sizeof(*args));
+ if (error)
+ return error;
+ copy_from_user(&tmp, args, sizeof(tmp));
+ return do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
+ tmp.newval, tmp.newlen);
+}
+
+/* Like in_group_p, but testing against egid, not fsgid */
+static int in_egroup_p(gid_t grp)
+{
+ if (grp != current->egid) {
+ int i = current->ngroups;
+ if (i) {
+ gid_t *groups = current->groups;
+ do {
+ if (*groups == grp)
+ goto out;
+ groups++;
+ i--;
+ } while (i);
+ }
+ return 0;
+ }
+out:
+ return 1;
+}
+
+/* ctl_perm does NOT grant the superuser all rights automatically, because
+ some sysctl variables are readonly even to root. */
+static int test_perm(int mode, int op)
+{
+ if (!current->euid)
+ mode >>= 6;
+ else if (in_egroup_p(0))
+ mode >>= 3;
+ if ((mode & op & 0007) == op)
+ return 0;
+ return -EACCES;
+}
+static inline int ctl_perm(ctl_table *table, int op)
+{
+ return test_perm(table->mode, op);
+}
+
+static int parse_table(int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen,
+ ctl_table *table, void **context)
+{
+ int error;
+repeat:
+ if (!nlen)
+ return -ENOTDIR;
+
+ for ( ; table->ctl_name; table++) {
+ int n;
+ get_user(n,name);
+ if (n == table->ctl_name ||
+ table->ctl_name == CTL_ANY) {
+ if (table->child) {
+ if (ctl_perm(table, 001))
+ return -EPERM;
+ if (table->strategy) {
+ error = table->strategy(
+ table, name, nlen,
+ oldval, oldlenp,
+ newval, newlen, context);
+ if (error)
+ return error;
+ }
+ name++;
+ nlen--;
+ table = table->child;
+ goto repeat;
+ }
+ error = do_sysctl_strategy(table, name, nlen,
+ oldval, oldlenp,
+ newval, newlen, context);
+ return error;
+ }
+ };
+ return -ENOTDIR;
+}
+
+/* Perform the actual read/write of a sysctl table entry. */
+int do_sysctl_strategy (ctl_table *table,
+ int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int op = 0, rc, len;
+
+ if (oldval)
+ op |= 004;
+ if (newval)
+ op |= 002;
+ if (ctl_perm(table, op))
+ return -EPERM;
+
+ if (table->strategy) {
+ rc = table->strategy(table, name, nlen, oldval, oldlenp,
+ newval, newlen, context);
+ if (rc < 0)
+ return rc;
+ if (rc > 0)
+ return 0;
+ }
+
+ /* If there is no strategy routine, or if the strategy returns
+ * zero, proceed with automatic r/w */
+ if (table->data && table->maxlen) {
+ if (oldval && oldlenp) {
+ get_user(len, oldlenp);
+ if (len) {
+ if (len > table->maxlen)
+ len = table->maxlen;
+ copy_to_user(oldval, table->data, len);
+ put_user(len, oldlenp);
+ }
+ }
+ if (newval && newlen) {
+ len = newlen;
+ if (len > table->maxlen)
+ len = table->maxlen;
+ copy_from_user(table->data, newval, len);
+ }
+ }
+ return 0;
+}
+
+/*
+ * This function only checks permission for changing the security level
+ * If the tests are successful, the actual change is done by
+ * do_sysctl_strategy
+ */
+static int do_securelevel_strategy (ctl_table *table,
+ int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int level;
+
+ if (newval && newlen) {
+ if (newlen != sizeof (int))
+ return -EINVAL;
+ copy_from_user (&level, newval, newlen);
+ if (level < securelevel && current->pid != 1)
+ return -EPERM;
+ }
+ return 0;
+}
+
+struct ctl_table_header *register_sysctl_table(ctl_table * table,
+ int insert_at_head)
+{
+ struct ctl_table_header *tmp;
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return 0;
+ *tmp = ((struct ctl_table_header) {table, DNODE_NULL});
+ if (insert_at_head)
+ DLIST_INSERT_AFTER(&root_table_header, tmp, ctl_entry);
+ else
+ DLIST_INSERT_BEFORE(&root_table_header, tmp, ctl_entry);
+#ifdef CONFIG_PROC_FS
+ register_proc_table(table, &proc_sys_root);
+#endif
+ return tmp;
+}
+
+void unregister_sysctl_table(struct ctl_table_header * table)
+{
+ DLIST_DELETE(table, ctl_entry);
+#ifdef CONFIG_PROC_FS
+ unregister_proc_table(table->ctl_table, &proc_sys_root);
+#endif
+}
+
+/*
+ * /proc/sys support
+ */
+
+#ifdef CONFIG_PROC_FS
+
+/* Scan the sysctl entries in table and add them all into /proc */
+static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
+{
+ struct proc_dir_entry *de, *tmp;
+ int exists;
+
+ for (; table->ctl_name; table++) {
+ exists = 0;
+ /* Can't do anything without a proc name. */
+ if (!table->procname)
+ continue;
+ /* Maybe we can't do anything with it... */
+ if (!table->proc_handler &&
+ !table->child)
+ continue;
+
+ de = kmalloc(sizeof(*de), GFP_KERNEL);
+ if (!de) continue;
+ de->namelen = strlen(table->procname);
+ de->name = table->procname;
+ de->mode = table->mode;
+ de->nlink = 1;
+ de->uid = 0;
+ de->gid = 0;
+ de->size = 0;
+ de->get_info = 0; /* For internal use if we want it */
+ de->fill_inode = 0; /* To override struct inode fields */
+ de->next = de->subdir = 0;
+ de->data = (void *) table;
+ /* Is it a file? */
+ if (table->proc_handler) {
+ de->ops = &proc_sys_inode_operations;
+ de->mode |= S_IFREG;
+ }
+ /* Otherwise it's a subdir */
+ else {
+ /* First check to see if it already exists */
+ for (tmp = root->subdir; tmp; tmp = tmp->next) {
+ if (tmp->namelen == de->namelen &&
+ !memcmp(tmp->name,de->name,de->namelen)) {
+ exists = 1;
+ kfree (de);
+ de = tmp;
+ }
+ }
+ if (!exists) {
+ de->ops = &proc_dir_inode_operations;
+ de->nlink++;
+ de->mode |= S_IFDIR;
+ }
+ }
+ table->de = de;
+ if (!exists)
+ proc_register_dynamic(root, de);
+ if (de->mode & S_IFDIR )
+ register_proc_table(table->child, de);
+ }
+}
+
+static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
+{
+ struct proc_dir_entry *de;
+ for (; table->ctl_name; table++) {
+ if (!(de = table->de))
+ continue;
+ if (de->mode & S_IFDIR) {
+ if (!table->child) {
+ printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
+ continue;
+ }
+ unregister_proc_table(table->child, de);
+ }
+ /* Don't unregister proc directories which still have
+ entries... */
+ if (!((de->mode & S_IFDIR) && de->subdir)) {
+ proc_unregister(root, de->low_ino);
+ kfree(de);
+ }
+ }
+}
+
+
+static long do_rw_proc(int write, struct inode * inode, struct file * file,
+ char * buf, unsigned long count)
+{
+ int op;
+ struct proc_dir_entry *de;
+ struct ctl_table *table;
+ size_t res;
+ long error;
+
+ error = verify_area(write ? VERIFY_READ : VERIFY_WRITE, buf, count);
+ if (error)
+ return error;
+
+ de = (struct proc_dir_entry*) inode->u.generic_ip;
+ if (!de || !de->data)
+ return -ENOTDIR;
+ table = (struct ctl_table *) de->data;
+ if (!table || !table->proc_handler)
+ return -ENOTDIR;
+ op = (write ? 002 : 004);
+ if (ctl_perm(table, op))
+ return -EPERM;
+
+ res = count;
+ error = (*table->proc_handler) (table, write, file, buf, &res);
+ if (error)
+ return error;
+ return res;
+}
+
+static long proc_readsys(struct inode * inode, struct file * file,
+ char * buf, unsigned long count)
+{
+ return do_rw_proc(0, inode, file, buf, count);
+}
+
+static long proc_writesys(struct inode * inode, struct file * file,
+ const char * buf, unsigned long count)
+{
+ return do_rw_proc(1, inode, file, (char *) buf, count);
+}
+
+static int proc_sys_permission(struct inode *inode, int op)
+{
+ return test_perm(inode->i_mode, op);
+}
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int len;
+ char *p, c;
+
+ if (!table->data || !table->maxlen || !*lenp ||
+ (filp->f_pos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ len = 0;
+ p = buffer;
+ while (len < *lenp) {
+ get_user(c, p++);
+ if (c == 0 || c == '\n')
+ break;
+ len++;
+ }
+ if (len >= table->maxlen)
+ len = table->maxlen-1;
+ copy_from_user(table->data, buffer, len);
+ ((char *) table->data)[len] = 0;
+ filp->f_pos += *lenp;
+ } else {
+ len = strlen(table->data);
+ if (len > table->maxlen)
+ len = table->maxlen;
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ copy_to_user(buffer, table->data, len);
+ if (len < *lenp) {
+ put_user('\n', ((char *) buffer) + len);
+ len++;
+ }
+ *lenp = len;
+ filp->f_pos += len;
+ }
+ return 0;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int *i, vleft, first=1, len, left, neg, val;
+ #define TMPBUFLEN 20
+ char buf[TMPBUFLEN], *p;
+
+ if (!table->data || !table->maxlen || !*lenp ||
+ (filp->f_pos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (int *) table->data;
+ vleft = table->maxlen / sizeof(int);
+ left = *lenp;
+
+ for (; left && vleft--; i++, first=0) {
+ if (write) {
+ while (left) {
+ char c;
+ get_user(c,(char *) buffer);
+ if (!isspace(c))
+ break;
+ left--;
+ ((char *) buffer)++;
+ }
+ if (!left)
+ break;
+ neg = 0;
+ len = left;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+ copy_from_user(buf, buffer, len);
+ buf[len] = 0;
+ p = buf;
+ if (*p == '-' && left > 1) {
+ neg = 1;
+ left--, p++;
+ }
+ if (*p < '0' || *p > '9')
+ break;
+ val = simple_strtoul(p, &p, 0);
+ len = p-buf;
+ if ((len < left) && *p && !isspace(*p))
+ break;
+ if (neg)
+ val = -val;
+ buffer += len;
+ left -= len;
+ *i = val;
+ } else {
+ p = buf;
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%d", *i);
+ len = strlen(buf);
+ if (len > left)
+ len = left;
+ copy_to_user(buffer, buf, len);
+ left -= len;
+ buffer += len;
+ }
+ }
+
+ if (!write && !first && left) {
+ put_user('\n', (char *) buffer);
+ left--, buffer++;
+ }
+ if (write) {
+ p = (char *) buffer;
+ while (left) {
+ char c;
+ get_user(c, p++);
+ if (!isspace(c))
+ break;
+ left--;
+ }
+ }
+ if (write && first)
+ return -EINVAL;
+ *lenp -= left;
+ filp->f_pos += *lenp;
+ return 0;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int *i, *min, *max, vleft, first=1, len, left, neg, val;
+ #define TMPBUFLEN 20
+ char buf[TMPBUFLEN], *p;
+
+ if (!table->data || !table->maxlen || !*lenp ||
+ (filp->f_pos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (int *) table->data;
+ min = (int *) table->extra1;
+ max = (int *) table->extra2;
+ vleft = table->maxlen / sizeof(int);
+ left = *lenp;
+
+ for (; left && vleft--; i++, first=0) {
+ if (write) {
+ while (left) {
+ char c;
+ get_user(c, (char *) buffer);
+ if (!isspace(c))
+ break;
+ left--;
+ ((char *) buffer)++;
+ }
+ if (!left)
+ break;
+ neg = 0;
+ len = left;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+ copy_from_user(buf, buffer, len);
+ buf[len] = 0;
+ p = buf;
+ if (*p == '-' && left > 1) {
+ neg = 1;
+ left--, p++;
+ }
+ if (*p < '0' || *p > '9')
+ break;
+ val = simple_strtoul(p, &p, 0);
+ len = p-buf;
+ if ((len < left) && *p && !isspace(*p))
+ break;
+ if (neg)
+ val = -val;
+ buffer += len;
+ left -= len;
+
+ if (min && val < *min++)
+ continue;
+ if (max && val > *max++)
+ continue;
+ *i = val;
+ } else {
+ p = buf;
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%d", *i);
+ len = strlen(buf);
+ if (len > left)
+ len = left;
+ copy_to_user(buffer, buf, len);
+ left -= len;
+ buffer += len;
+ }
+ }
+
+ if (!write && !first && left) {
+ put_user('\n', (char *) buffer);
+ left--, buffer++;
+ }
+ if (write) {
+ p = (char *) buffer;
+ while (left) {
+ char c;
+ get_user(c, p++);
+ if (!isspace(c))
+ break;
+ left--;
+ }
+ }
+ if (write && first)
+ return -EINVAL;
+ *lenp -= left;
+ filp->f_pos += *lenp;
+ return 0;
+}
+
+#else /* CONFIG_PROC_FS */
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+
+/*
+ * General sysctl support routines
+ */
+
+/* The generic string strategy routine: */
+int sysctl_string(ctl_table *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int l, len;
+
+ if (!table->data || !table->maxlen)
+ return -ENOTDIR;
+
+ if (oldval && oldlenp) {
+ get_user(len, oldlenp);
+ if (len) {
+ l = strlen(table->data);
+ if (len > l) len = l;
+ if (len >= table->maxlen)
+ len = table->maxlen;
+ copy_to_user(oldval, table->data, len);
+ put_user(0, ((char *) oldval) + len);
+ put_user(len, oldlenp);
+ }
+ }
+ if (newval && newlen) {
+ len = newlen;
+ if (len > table->maxlen)
+ len = table->maxlen;
+ copy_from_user(table->data, newval, len);
+ if (len == table->maxlen)
+ len--;
+ ((char *) table->data)[len] = 0;
+ }
+ return 0;
+}
+
+/*
+ * This function makes sure that all of the integers in the vector
+ * are between the minimum and maximum values given in the arrays
+ * table->extra1 and table->extra2, respectively.
+ */
+int sysctl_intvec(ctl_table *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int i, length, *vec, *min, *max;
+
+ if (newval && newlen) {
+ if (newlen % sizeof(int) != 0)
+ return -EINVAL;
+
+ if (!table->extra1 && !table->extra2)
+ return 0;
+
+ if (newlen > table->maxlen)
+ newlen = table->maxlen;
+ length = newlen / sizeof(int);
+
+ vec = (int *) newval;
+ min = (int *) table->extra1;
+ max = (int *) table->extra2;
+
+ for (i = 0; i < length; i++) {
+ int value;
+ get_user(value, vec + i);
+ if (min && value < min[i])
+ return -EINVAL;
+ if (max && value > max[i])
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+int do_string (
+ void *oldval, size_t *oldlenp, void *newval, size_t newlen,
+ int rdwr, char *data, size_t max)
+{
+ int l = strlen(data) + 1;
+ if (newval && !rdwr)
+ return -EPERM;
+ if (newval && newlen >= max)
+ return -EINVAL;
+ if (oldval) {
+ int old_l;
+ get_user(old_l, oldlenp);
+ if (l > old_l)
+ return -ENOMEM;
+ put_user(l, oldlenp);
+ copy_to_user(oldval, data, l);
+ }
+ if (newval) {
+ copy_from_user(data, newval, newlen);
+ data[newlen] = 0;
+ }
+ return 0;
+}
+
+int do_int (
+ void *oldval, size_t *oldlenp, void *newval, size_t newlen,
+ int rdwr, int *data)
+{
+ if (newval && !rdwr)
+ return -EPERM;
+ if (newval && newlen != sizeof(int))
+ return -EINVAL;
+ if (oldval) {
+ int old_l;
+ get_user(old_l, oldlenp);
+ if (old_l < sizeof(int))
+ return -ENOMEM;
+ put_user(sizeof(int), oldlenp);
+ copy_to_user(oldval, data, sizeof(int));
+ }
+ if (newval)
+ copy_from_user(data, newval, sizeof(int));
+ return 0;
+}
+
+int do_struct (
+ void *oldval, size_t *oldlenp, void *newval, size_t newlen,
+ int rdwr, void *data, size_t len)
+{
+ if (newval && !rdwr)
+ return -EPERM;
+ if (newval && newlen != len)
+ return -EINVAL;
+ if (oldval) {
+ int old_l;
+ get_user(old_l, oldlenp);
+ if (old_l < len)
+ return -ENOMEM;
+ put_user(len, oldlenp);
+ copy_to_user(oldval, data, len);
+ }
+ if (newval)
+ copy_from_user(data, newval, len);
+ return 0;
+}
+
diff --git a/kernel/time.c b/kernel/time.c
index 0424b2eaa..c2090a583 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -14,15 +14,8 @@
* Created file with time related functions from sched.c and adjtimex()
* 1993-10-08 Torsten Duwe
* adjtime interface update and CMOS clock write code
- * 1994-07-02 Alan Modra
- * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26 Markus Kuhn
- * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- * precision CMOS clock update
- *
- * to do: adjtimex() has to be updated to recent (1994-12-13) revision
- * of David Mill's kernel clock model. For more information, check
- * <ftp://louie.udel.edu/pub/ntp/kernel.tar.Z>.
+ * 1995-08-13 Torsten Duwe
+ * kernel PLL updated to 1994-12-13 specs (rfc-1489)
*/
#include <linux/errno.h>
@@ -31,223 +24,91 @@
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
-
-#include <asm/segment.h>
-#include <asm/io.h>
-
-#include <linux/mc146818rtc.h>
#include <linux/timex.h>
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
+#include <asm/uaccess.h>
+
+/*
+ * The timezone where the local system is located. Used as a default by some
+ * programs who obtain this value by using gettimeofday.
*/
-static inline unsigned long mktime(unsigned int year, unsigned int mon,
- unsigned int day, unsigned int hour,
- unsigned int min, unsigned int sec)
+struct timezone sys_tz = { 0, 0};
+
+static void do_normal_gettime(struct timeval * tm)
{
- if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
- mon += 12; /* Puts Feb last since it has leap day */
- year -= 1;
- }
- return (((
- (unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) +
- year*365 - 719499
- )*24 + hour /* now have hours */
- )*60 + min /* now have minutes */
- )*60 + sec; /* finally seconds */
+ *tm=xtime;
}
-void time_init(void)
-{
- unsigned int year, mon, day, hour, min, sec;
- int i;
+void (*do_get_fast_time)(struct timeval *) = do_normal_gettime;
- /* The Linux interpretation of the CMOS clock register contents:
- * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
- * RTC registers show the second which has precisely just started.
- * Let's hope other operating systems interpret the RTC the same way.
- */
- /* read RTC exactly on falling edge of update flag */
- for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
- if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
- break;
- for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
- if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
- break;
- do { /* Isn't this overkill ? UIP above should guarantee consistency */
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
- } while (sec != CMOS_READ(RTC_SECONDS));
- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
- }
- if ((year += 1900) < 1970)
- year += 100;
- xtime.tv_sec = mktime(year, mon, day, hour, min, sec);
- xtime.tv_usec = 0;
-printk("Year : %d\n", year);
-printk("Mon : %d\n", mon);
-printk("Day : %d\n", day);
-printk("Hour : %d\n", hour);
-printk("Min : %d\n", min);
-printk("Sec : %d\n", sec);
+/*
+ * Generic way to access 'xtime' (the current time of day).
+ * This can be changed if the platform provides a more accurate (and fast!)
+ * version.
+ */
+
+void get_fast_time(struct timeval * t)
+{
+ do_get_fast_time(t);
}
-/*
- * The timezone where the local system is located. Used as a default by some
- * programs who obtain this value by using gettimeofday.
- */
-struct timezone sys_tz = { 0, 0};
+#ifndef __alpha__
-asmlinkage int sys_time(long * tloc)
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday(). Is this for backwards compatibility? If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+asmlinkage int sys_time(int * tloc)
{
- int i, error;
+ int i;
i = CURRENT_TIME;
if (tloc) {
- error = verify_area(VERIFY_WRITE, tloc, 4);
- if (error)
- return error;
- put_fs_long(i,(unsigned long *)tloc);
+ if (put_user(i,tloc))
+ i = -EFAULT;
}
return i;
}
-asmlinkage int sys_stime(unsigned long * tptr)
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday(). Is this for backwards compatibility? If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+asmlinkage int sys_stime(int * tptr)
{
- int error;
- unsigned long value;
+ int value;
if (!suser())
return -EPERM;
- error = verify_area(VERIFY_READ, tptr, sizeof(*tptr));
- if (error)
- return error;
- value = get_fs_long(tptr);
+ if (get_user(value, tptr))
+ return -EFAULT;
cli();
xtime.tv_sec = value;
xtime.tv_usec = 0;
- time_status = TIME_BAD;
- time_maxerror = 0x70000000;
- time_esterror = 0x70000000;
+ time_state = TIME_ERROR;
+ time_maxerror = MAXPHASE;
+ time_esterror = MAXPHASE;
sti();
return 0;
}
-/* This function must be called with interrupts disabled
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-#define TICK_SIZE tick
-
-static inline unsigned long do_gettimeoffset(void)
-{
- int count;
- unsigned long offset = 0;
-
- /* timer count may underflow right here */
- outb_p(0x00, 0x43); /* latch the count ASAP */
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
- /* we know probability of underflow is always MUCH less than 1% */
- if (count > (LATCH - LATCH/100)) {
- /* check for pending timer interrupt */
- outb_p(0x0a, 0x20);
- if (inb(0x20) & 1)
- offset = TICK_SIZE;
- }
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
- return offset + count;
-}
-
-/*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long flags;
-
- save_flags(flags);
- cli();
- *tv = xtime;
-#if defined (__i386__) || defined (__mips__)
- tv->tv_usec += do_gettimeoffset();
- if (tv->tv_usec >= 1000000) {
- tv->tv_usec -= 1000000;
- tv->tv_sec++;
- }
-#endif /* !defined (__i386__) && !defined (__mips__) */
- restore_flags(flags);
-}
+#endif
asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{
- int error;
-
if (tv) {
struct timeval ktv;
- error = verify_area(VERIFY_WRITE, tv, sizeof *tv);
- if (error)
- return error;
do_gettimeofday(&ktv);
- memcpy_tofs(tv, &ktv, sizeof(ktv));
+ if (copy_to_user(tv, &ktv, sizeof(ktv)))
+ return -EFAULT;
}
if (tz) {
- error = verify_area(VERIFY_WRITE, tz, sizeof *tz);
- if (error)
- return error;
- memcpy_tofs(tz, &sys_tz, sizeof(sys_tz));
+ if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+ return -EFAULT;
}
return 0;
}
@@ -294,18 +155,12 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
if (!suser())
return -EPERM;
if (tv) {
- int error = verify_area(VERIFY_READ, tv, sizeof(*tv));
- if (error)
- return error;
- memcpy_fromfs(&new_tv, tv, sizeof(*tv));
- }
- if (tz) {
- int error = verify_area(VERIFY_READ, tz, sizeof(*tz));
- if (error)
- return error;
- memcpy_fromfs(&new_tz, tz, sizeof(*tz));
+ if (copy_from_user(&new_tv, tv, sizeof(*tv)))
+ return -EFAULT;
}
if (tz) {
+ if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+ return -EFAULT;
sys_tz = new_tz;
if (firsttime) {
firsttime = 0;
@@ -313,30 +168,29 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
warp_clock();
}
}
- if (tv) {
- cli();
- /* This is revolting. We need to set the xtime.tv_usec
- * correctly. However, the value in this location is
- * is value at the last tick.
- * Discover what correction gettimeofday
- * would have done, and then undo it!
- */
- new_tv.tv_usec -= do_gettimeoffset();
-
- if (new_tv.tv_usec < 0) {
- new_tv.tv_usec += 1000000;
- new_tv.tv_sec--;
- }
-
- xtime = new_tv;
- time_status = TIME_BAD;
- time_maxerror = 0x70000000;
- time_esterror = 0x70000000;
- sti();
- }
+ if (tv)
+ do_settimeofday(&new_tv);
return 0;
}
+long pps_offset = 0; /* pps time offset (us) */
+long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
+
+long pps_freq = 0; /* frequency offset (scaled ppm) */
+long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
+
+long pps_valid = PPS_VALID; /* pps signal watchdog counter */
+
+int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
+
+long pps_jitcnt = 0; /* jitter limit exceeded */
+long pps_calcnt = 0; /* calibration intervals */
+long pps_errcnt = 0; /* calibration errors */
+long pps_stbcnt = 0; /* stability limit exceeded */
+
+/* hook for a loadable hardpps kernel module */
+void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0;
+
/* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
@@ -348,36 +202,28 @@ asmlinkage int sys_adjtimex(struct timex *txc_p)
/* Local copy of parameter */
struct timex txc;
- error = verify_area(VERIFY_WRITE, txc_p, sizeof(struct timex));
- if (error)
- return error;
-
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
- memcpy_fromfs(&txc, txc_p, sizeof(struct timex));
+ error = copy_from_user(&txc, txc_p, sizeof(struct timex));
+ if (error)
+ return -EFAULT;
/* In order to modify anything, you gotta be super-user! */
- if (txc.mode && !suser())
+ if (txc.modes && !suser())
return -EPERM;
/* Now we validate the data before disabling interrupts
*/
- if (txc.mode != ADJ_OFFSET_SINGLESHOT && (txc.mode & ADJ_OFFSET))
- /* Microsec field limited to -131000 .. 131000 usecs */
- if (txc.offset <= -(1 << (31 - SHIFT_UPDATE))
- || txc.offset >= (1 << (31 - SHIFT_UPDATE)))
- return -EINVAL;
-
- /* time_status must be in a fairly small range */
- if (txc.mode & ADJ_STATUS)
- if (txc.status < TIME_OK || txc.status > TIME_BAD)
+ if (txc.modes != ADJ_OFFSET_SINGLESHOT && (txc.modes & ADJ_OFFSET))
+ /* adjustment Offset limited to +- .512 seconds */
+ if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
return -EINVAL;
/* if the quartz is off by more than 10% something is VERY wrong ! */
- if (txc.mode & ADJ_TICK)
+ if (txc.modes & ADJ_TICK)
if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
return -EINVAL;
@@ -387,130 +233,115 @@ asmlinkage int sys_adjtimex(struct timex *txc_p)
save_adjust = time_adjust;
/* If there are input parameters, then process them */
- if (txc.mode)
+ if (txc.modes)
{
- if (time_status == TIME_BAD)
- time_status = TIME_OK;
+ if (time_state == TIME_BAD)
+ time_state = TIME_OK;
- if (txc.mode & ADJ_STATUS)
+ if (txc.modes & ADJ_STATUS)
time_status = txc.status;
- if (txc.mode & ADJ_FREQUENCY)
- time_freq = txc.frequency << (SHIFT_KF - 16);
+ if (txc.modes & ADJ_FREQUENCY)
+ time_freq = txc.freq;
- if (txc.mode & ADJ_MAXERROR)
+ if (txc.modes & ADJ_MAXERROR)
time_maxerror = txc.maxerror;
- if (txc.mode & ADJ_ESTERROR)
+ if (txc.modes & ADJ_ESTERROR)
time_esterror = txc.esterror;
- if (txc.mode & ADJ_TIMECONST)
- time_constant = txc.time_constant;
+ if (txc.modes & ADJ_TIMECONST)
+ time_constant = txc.constant;
- if (txc.mode & ADJ_OFFSET)
- if (txc.mode == ADJ_OFFSET_SINGLESHOT)
+ if (txc.modes & ADJ_OFFSET)
+ if ((txc.modes == ADJ_OFFSET_SINGLESHOT)
+ || !(time_status & STA_PLL))
{
time_adjust = txc.offset;
}
- else /* XXX should give an error if other bits set */
+ else if ((time_status & STA_PLL)||(time_status & STA_PPSTIME))
{
- time_offset = txc.offset << SHIFT_UPDATE;
- mtemp = xtime.tv_sec - time_reftime;
- time_reftime = xtime.tv_sec;
- if (mtemp > (MAXSEC+2) || mtemp < 0)
- mtemp = 0;
-
- if (txc.offset < 0)
- time_freq -= (-txc.offset * mtemp) >>
- (time_constant + time_constant);
+ ltemp = (time_status & STA_PPSTIME &&
+ time_status & STA_PPSSIGNAL) ?
+ pps_offset : txc.offset;
+
+ /*
+ * Scale the phase adjustment and
+ * clamp to the operating range.
+ */
+ if (ltemp > MAXPHASE)
+ time_offset = MAXPHASE << SHIFT_UPDATE;
+ else if (ltemp < -MAXPHASE)
+ time_offset = -(MAXPHASE << SHIFT_UPDATE);
else
- time_freq += (txc.offset * mtemp) >>
- (time_constant + time_constant);
+ time_offset = ltemp << SHIFT_UPDATE;
- ltemp = time_tolerance << SHIFT_KF;
+ /*
+ * Select whether the frequency is to be controlled and in which
+ * mode (PLL or FLL). Clamp to the operating range. Ugly
+ * multiply/divide should be replaced someday.
+ */
- if (time_freq > ltemp)
- time_freq = ltemp;
- else if (time_freq < -ltemp)
- time_freq = -ltemp;
- }
- if (txc.mode & ADJ_TICK)
+ if (time_status & STA_FREQHOLD || time_reftime == 0)
+ time_reftime = xtime.tv_sec;
+ mtemp = xtime.tv_sec - time_reftime;
+ time_reftime = xtime.tv_sec;
+ if (time_status & STA_FLL)
+ {
+ if (mtemp >= MINSEC)
+ {
+ ltemp = ((time_offset / mtemp) << (SHIFT_USEC -
+ SHIFT_UPDATE));
+ if (ltemp < 0)
+ time_freq -= -ltemp >> SHIFT_KH;
+ else
+ time_freq += ltemp >> SHIFT_KH;
+ }
+ }
+ else
+ {
+ if (mtemp < MAXSEC)
+ {
+ ltemp *= mtemp;
+ if (ltemp < 0)
+ time_freq -= -ltemp >> (time_constant +
+ time_constant + SHIFT_KF -
+ SHIFT_USEC);
+ else
+ time_freq += ltemp >> (time_constant +
+ time_constant + SHIFT_KF -
+ SHIFT_USEC);
+ }
+ }
+ if (time_freq > time_tolerance)
+ time_freq = time_tolerance;
+ else if (time_freq < -time_tolerance)
+ time_freq = -time_tolerance;
+ } /* STA_PLL || STA_PPSTIME */
+ if (txc.modes & ADJ_TICK)
tick = txc.tick;
}
txc.offset = save_adjust;
- txc.frequency = ((time_freq+1) >> (SHIFT_KF - 16));
+ txc.freq = time_freq;
txc.maxerror = time_maxerror;
txc.esterror = time_esterror;
txc.status = time_status;
- txc.time_constant = time_constant;
+ txc.constant = time_constant;
txc.precision = time_precision;
txc.tolerance = time_tolerance;
txc.time = xtime;
txc.tick = tick;
+ txc.ppsfreq = pps_freq;
+ txc.jitter = pps_jitter;
+ txc.shift = pps_shift;
+ txc.stabil = pps_stabil;
+ txc.jitcnt = pps_jitcnt;
+ txc.calcnt = pps_calcnt;
+ txc.errcnt = pps_errcnt;
+ txc.stbcnt = pps_stbcnt;
sti();
- memcpy_tofs(txc_p, &txc, sizeof(struct timex));
- return time_status;
-}
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- */
-int set_rtc_mmss(unsigned long nowtime)
-{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char save_control, save_freq_select;
-
- save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
- CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- BCD_TO_BIN(cmos_minutes);
-
- /* since we're only adjusting minutes and seconds,
- * don't interfere with hour overflow. This avoids
- * messing with unknown time zones but requires your
- * RTC not to be off by more than 15 minutes
- */
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
- real_minutes += 30; /* correct for half hour time zone */
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) < 30)
- {
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
- }
- CMOS_WRITE(real_seconds,RTC_SECONDS);
- CMOS_WRITE(real_minutes,RTC_MINUTES);
- }
- else
- retval = -1;
-
- /* The following flags have to be released exactly in this order,
- * otherwise the DS12887 (popular MC146818A clone with integrated
- * battery and quartz) will not reset the oscillator and will not
- * update precisely 500 ms later. You won't find this mentioned in
- * the Dallas Semiconductor data sheets, but who believes data
- * sheets anyway ... -- Markus Kuhn
- */
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
- return retval;
+ return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : time_state;
}