Merge with Linux 2.2.1.

author: Ralf Baechle <ralf@linux-mips.org> 1999-02-15 02:15:32 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1999-02-15 02:15:32 +0000
commit: 86464aed71025541805e7b1515541aee89879e33 (patch)
tree: e01a457a4912a8553bc65524aa3125d51f29f810 /kernel
parent: 88f99939ecc6a95a79614574cb7d95ffccfc3466 (diff)
11 files changed, 567 insertions, 267 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 110296bca..dc0baed32 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -124,12 +124,12 @@ static int check_free_space(struct file *file)
 	if (acct_active) {
 		if (act < 0) {
 			acct_active = 0;
-			printk(KERN_INFO "Process accounting paused\r\n");
+			printk(KERN_INFO "Process accounting paused\n");
 		}
 	} else {
 		if (act > 0) {
 			acct_active = 1;
-			printk(KERN_INFO "Process accounting resumed\r\n");
+			printk(KERN_INFO "Process accounting resumed\n");
 		}
 	}
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f0a22099e..58eb6df5d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -258,6 +258,7 @@ static inline void __exit_mm(struct task_struct * tsk)
 		tsk->mm = &init_mm;
 		tsk->swappable = 0;
 		SET_PAGE_DIR(tsk, swapper_pg_dir);
+		mm_release();
 		mmput(mm);
 	}
 }
@@ -292,6 +293,7 @@ static void exit_notify(void)
 		kill_pg(current->pgrp,SIGHUP,1);
 		kill_pg(current->pgrp,SIGCONT,1);
 	}
+
 	/* Let father know we died */
 	notify_parent(current, current->exit_signal);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a625aaba3..5c714fe73 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -16,6 +16,7 @@
 #include <linux/unistd.h>
 #include <linux/smp_lock.h>
 #include <linux/module.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
@@ -230,16 +231,16 @@ static inline int dup_mmap(struct mm_struct * mm)
 		 * Link in the new vma even if an error occurred,
 		 * so that exit_mmap() can clean up the mess.
 		 */
-		if((tmp->vm_next = *pprev) != NULL)
-			(*pprev)->vm_pprev = &tmp->vm_next;
+		tmp->vm_next = *pprev;
 		*pprev = tmp;
-		tmp->vm_pprev = pprev;
 
 		pprev = &tmp->vm_next;
 		if (retval)
 			goto fail_nomem;
 	}
 	retval = 0;
+	if (mm->map_count >= AVL_MIN_MAP_COUNT)
+		build_mmap_avl(mm);
 
 fail_nomem:
 	flush_tlb_mm(current->mm);
@@ -268,7 +269,7 @@ struct mm_struct * mm_alloc(void)
 		 * Leave mm->pgd set to the parent's pgd
 		 * so that pgd_offset() is always valid.
 		 */
-		mm->mmap = mm->mmap_cache = NULL;
+		mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
 
 		/* It has not run yet, so cannot be present in anyone's
 		 * cache or tlb.
@@ -278,6 +279,30 @@ struct mm_struct * mm_alloc(void)
 	return mm;
 }
 
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one.  Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(void)
+{
+	struct task_struct *tsk = current;
+	forget_segments();
+	/* notify parent sleeping on vfork() */
+	if (tsk->flags & PF_VFORK) {
+		tsk->flags &= ~PF_VFORK;
+		up(tsk->p_opptr->vfork_sem);
+	}
+}
+
 /*
  * Decrement the use count and release all resources for an mm.
  */
@@ -453,10 +478,12 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 
-	new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU);
+	new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
 	new_flags |= PF_FORKNOEXEC;
 	if (!(clone_flags & CLONE_PTRACE))
 		new_flags &= ~(PF_PTRACED|PF_TRACESYS);
+	if (clone_flags & CLONE_VFORK)
+		new_flags |= PF_VFORK;
 	p->flags = new_flags;
 }
 
@@ -470,6 +497,9 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	int nr;
 	int retval = -ENOMEM;
 	struct task_struct *p;
+	struct semaphore sem = MUTEX_LOCKED;
+
+	current->vfork_sem = &sem;
 
 	p = alloc_task_struct();
 	if (!p)
@@ -521,6 +551,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	p->p_pptr = p->p_opptr = current;
 	p->p_cptr = NULL;
 	init_waitqueue(&p->wait_chldexit);
+	p->vfork_sem = NULL;
 
 	p->sigpending = 0;
 	sigemptyset(&p->signal);
@@ -602,9 +633,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	}
 	++total_forks;
 bad_fork:
-	up(&current->mm->mmap_sem);
 	unlock_kernel();
+	up(&current->mm->mmap_sem);
 fork_out:
+	if ((clone_flags & CLONE_VFORK) && (retval > 0)) 
+		down(&sem);
 	return retval;
 
 bad_fork_cleanup_sighand:
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index d567240e6..6cf723a4d 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -36,6 +36,7 @@
 #include <linux/ctype.h>
 #include <linux/file.h>
 #include <linux/console.h>
+#include <linux/poll.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -106,10 +107,11 @@ EXPORT_SYMBOL(update_vm_cache);
 EXPORT_SYMBOL(vmtruncate);
 
 /* filesystem internal functions */
+EXPORT_SYMBOL(in_group_p);
 EXPORT_SYMBOL(update_atime);
 EXPORT_SYMBOL(get_super);
+EXPORT_SYMBOL(get_fs_type);
 EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(putname);
 EXPORT_SYMBOL(__fput);
 EXPORT_SYMBOL(iget);
 EXPORT_SYMBOL(iput);
@@ -120,7 +122,8 @@ EXPORT_SYMBOL(sys_close);
 EXPORT_SYMBOL(d_alloc_root);
 EXPORT_SYMBOL(d_delete);
 EXPORT_SYMBOL(d_validate);
-EXPORT_SYMBOL(d_add);
+EXPORT_SYMBOL(d_rehash);
+EXPORT_SYMBOL(d_invalidate);	/* May be it will be better in dcache.h? */
 EXPORT_SYMBOL(d_move);
 EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_alloc);
@@ -129,7 +132,9 @@ EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(__mark_inode_dirty);
 EXPORT_SYMBOL(get_empty_filp);
 EXPORT_SYMBOL(init_private_file);
-EXPORT_SYMBOL(insert_file_free);
+EXPORT_SYMBOL(filp_open);
+EXPORT_SYMBOL(fput);
+EXPORT_SYMBOL(put_filp);
 EXPORT_SYMBOL(check_disk_change);
 EXPORT_SYMBOL(invalidate_buffers);
 EXPORT_SYMBOL(invalidate_inodes);
@@ -161,7 +166,6 @@ EXPORT_SYMBOL(posix_lock_file);
 EXPORT_SYMBOL(posix_test_lock);
 EXPORT_SYMBOL(posix_block_lock);
 EXPORT_SYMBOL(posix_unblock_lock);
-EXPORT_SYMBOL(locks_remove_flock);
 EXPORT_SYMBOL(dput);
 EXPORT_SYMBOL(get_cached_page);
 EXPORT_SYMBOL(put_cached_page);
@@ -172,6 +176,10 @@ EXPORT_SYMBOL(shrink_dcache_parent);
 EXPORT_SYMBOL(find_inode_number);
 EXPORT_SYMBOL(is_subdir);
 EXPORT_SYMBOL(get_unused_fd);
+EXPORT_SYMBOL(vfs_rmdir);
+EXPORT_SYMBOL(vfs_unlink);
+EXPORT_SYMBOL(vfs_rename);
+EXPORT_SYMBOL(__pollwait);
 
 #if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
 EXPORT_SYMBOL(do_nfsservctl);
@@ -294,6 +302,7 @@ EXPORT_SYMBOL(release_region);
 /* process management */
 EXPORT_SYMBOL(__wake_up);
 EXPORT_SYMBOL(sleep_on);
+EXPORT_SYMBOL(sleep_on_timeout);
 EXPORT_SYMBOL(interruptible_sleep_on);
 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
 EXPORT_SYMBOL(schedule);
@@ -359,6 +368,7 @@ EXPORT_SYMBOL(event);
 EXPORT_SYMBOL(__down);
 EXPORT_SYMBOL(__down_interruptible);
 EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(brw_page);
 
 /* all busmice */
 EXPORT_SYMBOL(add_mouse_randomness);
diff --git a/kernel/module.c b/kernel/module.c
index 59884d8e2..c5591db7f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -5,6 +5,7 @@
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
 #include <asm/pgtable.h>
+#include <linux/init.h>
 
 /*
  * Originally by Anonymous (as far as I know...)
@@ -30,7 +31,7 @@ static struct module kernel_module =
 	NULL,			/* next */
 	"",			/* name */
 	0,			/* size */
-	1,			/* usecount */
+	{ATOMIC_INIT(1)},	/* usecount */
 	MOD_RUNNING,		/* flags */
 	0,			/* nsyms -- to filled in in init_modules */
 	0,			/* ndeps */
@@ -56,7 +57,7 @@ static void free_module(struct module *, int tag_freed);
  * Called at boot time
  */
 
-void init_modules(void)
+__initfunc(void init_modules(void))
 {
 	kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
 
@@ -328,13 +329,13 @@ sys_init_module(const char *name_user, struct module *mod_user)
 	put_mod_name(name);
 
 	/* Initialize the module.  */
-	mod->usecount = 1;
+	atomic_set(&mod->uc.usecount,1);
 	if (mod->init && mod->init() != 0) {
-		mod->usecount = 0;
+		atomic_set(&mod->uc.usecount,0);
 		error = -EBUSY;
 		goto err0;
 	}
-	mod->usecount--;
+	atomic_dec(&mod->uc.usecount);
 
 	/* And set it running.  */
 	mod->flags |= MOD_RUNNING;
@@ -614,7 +615,7 @@ qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
 		info.size = mod->size;
 		info.flags = mod->flags;
 		info.usecount = (mod_member_present(mod, can_unload)
-				 && mod->can_unload ? -1 : mod->usecount);
+				 && mod->can_unload ? -1 : atomic_read(&mod->uc.usecount));
 
 		if (copy_to_user(buf, &info, sizeof(struct module_info)))
 			return -EFAULT;
@@ -853,7 +854,7 @@ int get_module_list(char *p)
 			len = sprintf(tmpstr, "%4ld",
 				      (mod_member_present(mod, can_unload)
 				       && mod->can_unload
-				       ? -1 : mod->usecount));
+				       ? -1L : (long)atomic_read(&mod->uc.usecount)));
 			safe_copy_str(tmpstr, len);
 		}
 
diff --git a/kernel/panic.c b/kernel/panic.c
index 87a0cb4b3..35289454a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -51,7 +51,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 		sys_sync();
 
 #ifdef __SMP__
-	smp_message_pass(MSG_ALL_BUT_SELF, MSG_STOP_CPU, 0, 0);
+	smp_send_stop();
 #endif
 
 	unblank_console();
diff --git a/kernel/sched.c b/kernel/sched.c
index c8c297180..add76fbe0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7,6 +7,14 @@
  *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
  *              make semaphores SMP safe
  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *  1997-09-10	Updated NTP code according to technical memorandum Jan '96
+ *		"A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-11-19	Implemented schedule_timeout() and related stuff
+ *		by Andrea Arcangeli
+ *  1998-12-24	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *		serialize accesses to xtime/lost_ticks).
+ *				Copyright (C) 1998  Andrea Arcangeli
+ *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
  */
 
 /*
@@ -59,8 +67,8 @@ long time_offset = 0;		/* time adjustment (us) */
 long time_constant = 2;		/* pll time constant */
 long time_tolerance = MAXFREQ;	/* frequency tolerance (ppm) */
 long time_precision = 1;	/* clock precision (us) */
-long time_maxerror = MAXPHASE;	/* maximum error (us) */
-long time_esterror = MAXPHASE;	/* estimated error (us) */
+long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us) */
+long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us) */
 long time_phase = 0;		/* phase offset (scaled us) */
 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;	/* frequency offset (scaled ppm) */
 long time_adj = 0;		/* tick adjust (scaled 1 / HZ) */
@@ -91,47 +99,122 @@ struct kernel_stat kstat = { 0 };
 
 void scheduling_functions_start_here(void) { }
 
-static inline void reschedule_idle(struct task_struct * p)
+#ifdef __SMP__
+static void reschedule_idle_slow(struct task_struct * p)
 {
+/*
+ * (see reschedule_idle() for an explanation first ...)
+ *
+ * Pass #2
+ *
+ * We try to find another (idle) CPU for this woken-up process.
+ *
+ * On SMP, we mostly try to see if the CPU the task used
+ * to run on is idle.. but we will use another idle CPU too,
+ * at this point we already know that this CPU is not
+ * willing to reschedule in the near future.
+ *
+ * An idle CPU is definitely wasted, especially if this CPU is
+ * running long-timeslice processes. The following algorithm is
+ * pretty good at finding the best idle CPU to send this process
+ * to.
+ *
+ * [We can try to preempt low-priority processes on other CPUs in
+ * 2.3. Also we can try to use the avg_slice value to predict
+ * 'likely reschedule' events even on other CPUs.]
+ */
+	int best_cpu = p->processor, this_cpu = smp_processor_id();
+	struct task_struct **idle = task, *tsk, *target_tsk;
+	int i = smp_num_cpus;
+
+	target_tsk = NULL;
+	do {
+		tsk = *idle;
+		idle++;
+		if (tsk->has_cpu) {
+			if (tsk->processor == this_cpu)
+				continue;
+			target_tsk = tsk;
+			if (tsk->processor == best_cpu) {
+				/*
+				 * bingo, we couldnt get a better
+				 * CPU, activate it.
+				 */
+				goto send; /* this one helps GCC ... */
+			}
+		}
+	} while (--i > 0);
 
 	/*
-	 * For SMP, we try to see if the CPU the task used
-	 * to run on is idle..
+	 * found any idle CPU?
 	 */
-#if 0
+	if (target_tsk) {
+send:
+		target_tsk->need_resched = 1;
+		smp_send_reschedule(target_tsk->processor);
+		return;
+	}
+}
+#endif /* __SMP__ */
+
+/*
+ * If there is a dependency between p1 and p2,
+ * don't be too eager to go into the slow schedule.
+ * In particular, if p1 and p2 both want the kernel
+ * lock, there is no point in trying to make them
+ * extremely parallel..
+ *
+ * (No lock - lock_depth < 0)
+ */
+#define related(p1,p2) ((p1)->lock_depth >= 0 && (p2)->lock_depth >= 0)
+
+static inline void reschedule_idle(struct task_struct * p)
+{
+
+	if (p->policy != SCHED_OTHER || p->counter > current->counter + 3) {
+		current->need_resched = 1;
+		return;
+	}
+
+#ifdef __SMP__
 	/*
-	 * Disable this for now. Ingo has some interesting
-	 * code that looks too complex, and I have some ideas,
-	 * but in the meantime.. One problem is that "wakeup()"
-	 * can be (and is) called before we've even initialized
-	 * SMP completely, so..
+	 * ("wakeup()" should not be called before we've initialized
+	 * SMP completely.
+	 * Basically a not-yet initialized SMP subsystem can be
+	 * considered as a not-yet working scheduler, simply dont use
+	 * it before it's up and running ...)
+	 *
+	 * SMP rescheduling is done in 2 passes:
+	 *  - pass #1: faster: 'quick decisions'
+	 *  - pass #2: slower: 'lets try and find another CPU'
 	 */
-#ifdef __SMP__
-	int want_cpu = p->processor;
 
 	/*
-	 * Don't even try to find another CPU for us if the task
-	 * ran on this one before..
+	 * Pass #1
+	 *
+	 * There are two metrics here:
+	 *
+	 * first, a 'cutoff' interval, currently 0-200 usecs on
+	 * x86 CPUs, depending on the size of the 'SMP-local cache'.
+	 * If the current process has longer average timeslices than
+	 * this, then we utilize the idle CPU.
+	 *
+	 * second, if the wakeup comes from a process context,
+	 * then the two processes are 'related'. (they form a
+	 * 'gang')
+	 *
+	 * An idle CPU is almost always a bad thing, thus we skip
+	 * the idle-CPU utilization only if both these conditions
+	 * are true. (ie. a 'process-gang' rescheduling with rather
+	 * high frequency should stay on the same CPU).
+	 *
+	 * [We can switch to something more finegrained in 2.3.]
 	 */
-	if (want_cpu != smp_processor_id()) {
-		struct task_struct **idle = task;
-		int i = smp_num_cpus;
-
-		do {
-			struct task_struct *tsk = *idle;
-			idle++;
-			/* Something like this.. */
-			if (tsk->has_cpu && tsk->processor == want_cpu) {
-				tsk->need_resched = 1;
-				smp_send_reschedule(want_cpu);
-				return;
-			}
-		} while (--i > 0);
-	}
-#endif
-#endif
-	if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
-		current->need_resched = 1;	
+	if ((current->avg_slice < cacheflush_time) && related(current, p))
+		return;
+
+	reschedule_idle_slow(p);
+#endif /* __SMP__ */
 }
 
 /*
@@ -149,6 +232,7 @@ static inline void add_to_runqueue(struct task_struct * p)
 	init_task.next_run = p;
 	p->next_run = next;
 	next->prev_run = p;
+	nr_running++;
 }
 
 static inline void del_from_runqueue(struct task_struct * p)
@@ -227,7 +311,6 @@ void wake_up_process(struct task_struct * p)
 	if (!p->next_run) {
 		add_to_runqueue(p);
 		reschedule_idle(p);
-		nr_running++;
 	}
 	spin_unlock_irqrestore(&runqueue_lock, flags);
 }
@@ -437,23 +520,6 @@ signed long schedule_timeout(signed long timeout)
 	struct timer_list timer;
 	unsigned long expire;
 
-	/*
-	 * PARANOID.
-	 */
-	if (current->state == TASK_UNINTERRUPTIBLE)
-	{
-		printk(KERN_WARNING "schedule_timeout: task not interrutible "
-		       "from %p\n", __builtin_return_address(0));
-		/*
-		 * We don' t want to interrupt a not interruptible task
-		 * risking to cause corruption. Better a a deadlock ;-).
-		 */
-		timeout = MAX_SCHEDULE_TIMEOUT;
-	}
-
-	/*
-	 * Here we start for real.
-	 */
 	switch (timeout)
 	{
 	case MAX_SCHEDULE_TIMEOUT:
@@ -501,6 +567,63 @@ signed long schedule_timeout(signed long timeout)
 }
 
 /*
+ * This one aligns per-CPU data on cacheline boundaries.
+ */
+static union {
+	struct schedule_data {
+		struct task_struct * prev;
+		long prevstate;
+		cycles_t last_schedule;
+	} schedule_data;
+	char __pad [L1_CACHE_BYTES];
+} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
+
+
+static inline void __schedule_tail (void)
+{
+#ifdef __SMP__
+	struct schedule_data * sched_data;
+
+	/*
+	 * We might have switched CPUs:
+	 */
+	sched_data = & aligned_data[smp_processor_id()].schedule_data;
+
+	/*
+	 * Subtle. In the rare event that we got a wakeup to 'prev' just
+	 * during the reschedule (this is possible, the scheduler is pretty
+	 * parallel), we should do another reschedule in the next task's
+	 * context. schedule() will do the right thing next time around.
+	 * this is equivalent to 'delaying' the wakeup until the reschedule
+	 * has finished.
+	 */
+	if (sched_data->prev->state != sched_data->prevstate)
+		current->need_resched = 1;
+
+	/*
+	 * Release the previous process ...
+	 *
+	 * We have dropped all locks, and we must make sure that we
+	 * only mark the previous process as no longer having a CPU
+	 * after all other state has been seen by other CPU's. Thus
+	 * the write memory barrier!
+	 */
+	wmb();
+	sched_data->prev->has_cpu = 0;
+#endif /* __SMP__ */
+}
+
+/*
+ * schedule_tail() is getting called from the fork return path. This
+ * cleans up all remaining scheduler things, without impacting the
+ * common case.
+ */
+void schedule_tail (void)
+{
+	__schedule_tail();
+}
+
+/*
  *  'schedule()' is the scheduler function. It's a very simple and nice
  * scheduler: it's not perfect, but certainly works for most things.
  *
@@ -512,11 +635,18 @@ signed long schedule_timeout(signed long timeout)
  */
 asmlinkage void schedule(void)
 {
+	struct schedule_data * sched_data;
 	struct task_struct * prev, * next;
 	int this_cpu;
 
 	prev = current;
 	this_cpu = prev->processor;
+	/*
+	 * 'sched_data' is protected by the fact that we can run
+	 * only one process per CPU.
+	 */
+	sched_data = & aligned_data[this_cpu].schedule_data;
+
 	if (in_interrupt())
 		goto scheduling_in_interrupt;
 	release_kernel_lock(prev, this_cpu);
@@ -531,6 +661,7 @@ asmlinkage void schedule(void)
 
 	/* move an exhausted RR process to be last.. */
 	prev->need_resched = 0;
+
 	if (!prev->counter && prev->policy == SCHED_RR) {
 		prev->counter = prev->priority;
 		move_last_runqueue(prev);
@@ -546,6 +677,9 @@ asmlinkage void schedule(void)
 			del_from_runqueue(prev);
 		case TASK_RUNNING:
 	}
+
+	sched_data->prevstate = prev->state;
+
 	{
 		struct task_struct * p = init_task.next_run;
 		/*
@@ -592,25 +726,49 @@ asmlinkage void schedule(void)
 		}
 	}
 
+ 	/*
+ 	 * maintain the per-process 'average timeslice' value.
+ 	 * (this has to be recalculated even if we reschedule to
+ 	 * the same process) Currently this is only used on SMP:
+ 	 */
 #ifdef __SMP__
-	next->has_cpu = 1;
-	next->processor = this_cpu;
-#endif
+	{
+		cycles_t t, this_slice;
 
-	if (prev != next) {
-		kstat.context_swtch++;
-		get_mmu_context(next);
-		switch_to(prev,next);
-	}
+		t = get_cycles();
+		this_slice = t - sched_data->last_schedule;
+		sched_data->last_schedule = t;
 
-	spin_unlock(&scheduler_lock);
+		/*
+		 * Simple, exponentially fading average calculation:
+		 */
+		prev->avg_slice = this_slice + prev->avg_slice;
+		prev->avg_slice >>= 1;
+	}
 
 	/*
-	 * At this point "prev" is "current", as we just
-	 * switched into it (from an even more "previous"
-	 * prev)
+	 * We drop the scheduler lock early (it's a global spinlock),
+	 * thus we have to lock the previous process from getting
+	 * rescheduled during switch_to().
 	 */
-	reacquire_kernel_lock(prev);
+	prev->has_cpu = 1;
+
+ 	next->has_cpu = 1;
+ 	next->processor = this_cpu;
+	spin_unlock(&scheduler_lock);
+#endif /* __SMP__ */
+ 	if (prev != next) {
+#ifdef __SMP__
+		sched_data->prev = prev;
+#endif
+	 	kstat.context_swtch++;
+		get_mmu_context(next);
+		switch_to(prev,next);
+
+		__schedule_tail();
+	}
+  
+	reacquire_kernel_lock(current);
 	return;
 
 scheduling_in_interrupt:
@@ -618,7 +776,6 @@ scheduling_in_interrupt:
 	*(int *)0 = 0;
 }
 
-
 rwlock_t waitqueue_lock = RW_LOCK_UNLOCKED;
 
 /*
@@ -701,56 +858,64 @@ void __up(struct semaphore *sem)
  * Either form may be used in conjunction with "up()".
  *
  */
-static inline int __do_down(struct semaphore * sem, int task_state)
-{
-	struct task_struct *tsk = current;
-	struct wait_queue wait = { tsk, NULL };
-	int		  ret = 0;
 
-	tsk->state = task_state;
-	add_wait_queue(&sem->wait, &wait);
+#define DOWN_VAR				\
+	struct task_struct *tsk = current;	\
+	struct wait_queue wait = { tsk, NULL };
 
-	/*
-	 * Ok, we're set up.  sem->count is known to be less than zero
-	 * so we must wait.
-	 *
-	 * We can let go the lock for purposes of waiting.
-	 * We re-acquire it after awaking so as to protect
-	 * all semaphore operations.
-	 *
-	 * If "up()" is called before we call waking_non_zero() then
-	 * we will catch it right away.  If it is called later then
-	 * we will have to go through a wakeup cycle to catch it.
-	 *
-	 * Multiple waiters contend for the semaphore lock to see
-	 * who gets to gate through and who has to wait some more.
-	 */
-	for (;;) {
-		if (waking_non_zero(sem))	/* are we waking up?  */
+#define DOWN_HEAD(task_state)						 \
+									 \
+									 \
+	tsk->state = (task_state);					 \
+	add_wait_queue(&sem->wait, &wait);				 \
+									 \
+	/*								 \
+	 * Ok, we're set up.  sem->count is known to be less than zero	 \
+	 * so we must wait.						 \
+	 *								 \
+	 * We can let go the lock for purposes of waiting.		 \
+	 * We re-acquire it after awaking so as to protect		 \
+	 * all semaphore operations.					 \
+	 *								 \
+	 * If "up()" is called before we call waking_non_zero() then	 \
+	 * we will catch it right away.  If it is called later then	 \
+	 * we will have to go through a wakeup cycle to catch it.	 \
+	 *								 \
+	 * Multiple waiters contend for the semaphore lock to see	 \
+	 * who gets to gate through and who has to wait some more.	 \
+	 */								 \
+	for (;;) {							 \
+		if (waking_non_zero(sem, tsk))	/* are we waking up?  */ \
 			break;			/* yes, exit loop */
 
-		if (task_state == TASK_INTERRUPTIBLE && signal_pending(tsk)) {
-			ret = -EINTR;			/* interrupted */
-			atomic_inc(&sem->count);	/* give up on down operation */
-			break;
-		}
-
-		schedule();
-		tsk->state = task_state;
-	}
-	tsk->state = TASK_RUNNING;
+#define DOWN_TAIL(task_state)			\
+		tsk->state = (task_state);	\
+	}					\
+	tsk->state = TASK_RUNNING;		\
 	remove_wait_queue(&sem->wait, &wait);
-	return ret;
-}
 
 void __down(struct semaphore * sem)
 {
-	__do_down(sem,TASK_UNINTERRUPTIBLE);
+	DOWN_VAR
+	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+	schedule();
+	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
 }
 
 int __down_interruptible(struct semaphore * sem)
 {
-	return __do_down(sem,TASK_INTERRUPTIBLE);
+	DOWN_VAR
+	int ret = 0;
+	DOWN_HEAD(TASK_INTERRUPTIBLE)
+	if (signal_pending(tsk))
+	{
+		ret = -EINTR;			/* interrupted */
+		atomic_inc(&sem->count);	/* give up on down operation */
+		break;
+	}
+	schedule();
+	DOWN_TAIL(TASK_INTERRUPTIBLE)
+	return ret;
 }
 
 #define	SLEEP_ON_VAR				\
@@ -803,6 +968,19 @@ void sleep_on(struct wait_queue **p)
 	SLEEP_ON_TAIL
 }
 
+long sleep_on_timeout(struct wait_queue **p, long timeout)
+{
+	SLEEP_ON_VAR
+	
+	current->state = TASK_UNINTERRUPTIBLE;
+
+	SLEEP_ON_HEAD
+	timeout = schedule_timeout(timeout);
+	SLEEP_ON_TAIL
+
+	return timeout;
+}
+
 void scheduling_functions_end_here(void) { }
 
 static inline void cascade_timers(struct timer_vec *tv)
@@ -940,8 +1118,11 @@ static void second_overflow(void)
 
     /* Bump the maxerror field */
     time_maxerror += time_tolerance >> SHIFT_USEC;
-    if ( time_maxerror > MAXPHASE )
-        time_maxerror = MAXPHASE;
+    if ( time_maxerror > NTP_PHASE_LIMIT ) {
+        time_maxerror = NTP_PHASE_LIMIT;
+	time_state = TIME_ERROR;	/* p. 17, sect. 4.3, (b) */
+	time_status |= STA_UNSYNC;
+    }
 
     /*
      * Leap second processing. If in leap-insert state at
@@ -965,7 +1146,7 @@ static void second_overflow(void)
 	if (xtime.tv_sec % 86400 == 0) {
 	    xtime.tv_sec--;
 	    time_state = TIME_OOP;
-	    printk("Clock: inserting leap second 23:59:60 UTC\n");
+	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 	}
 	break;
 
@@ -973,7 +1154,7 @@ static void second_overflow(void)
 	if ((xtime.tv_sec + 1) % 86400 == 0) {
 	    xtime.tv_sec++;
 	    time_state = TIME_WAIT;
-	    printk("Clock: deleting leap second 23:59:59 UTC\n");
+	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 	}
 	break;
 
@@ -1021,7 +1202,7 @@ static void second_overflow(void)
      * the pll and the PPS signal.
      */
     pps_valid++;
-    if (pps_valid == PPS_VALID) {
+    if (pps_valid == PPS_VALID) {	/* PPS signal lost */
 	pps_jitter = MAXTIME;
 	pps_stabil = MAXFREQ;
 	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
@@ -1036,17 +1217,38 @@ static void second_overflow(void)
 	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 
 #if HZ == 100
-    /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
+    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+     */
     if (time_adj < 0)
-	time_adj -= -time_adj >> 2;
+	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
     else
-	time_adj += time_adj >> 2;
+	time_adj += (time_adj >> 2) + (time_adj >> 5);
 #endif
 }
 
 /* in the NTP reference this is called "hardclock()" */
 static void update_wall_time_one_tick(void)
 {
+	if ( (time_adjust_step = time_adjust) != 0 ) {
+	    /* We are doing an adjtime thing. 
+	     *
+	     * Prepare time_adjust_step to be within bounds.
+	     * Note that a positive time_adjust means we want the clock
+	     * to run faster.
+	     *
+	     * Limit the amount of the step to be in the range
+	     * -tickadj .. +tickadj
+	     */
+	     if (time_adjust > tickadj)
+		time_adjust_step = tickadj;
+	     else if (time_adjust < -tickadj)
+		time_adjust_step = -tickadj;
+	     
+	    /* Reduce by this step the amount of time left  */
+	    time_adjust -= time_adjust_step;
+	}
+	xtime.tv_usec += tick + time_adjust_step;
 	/*
 	 * Advance the phase, once it gets to one microsecond, then
 	 * advance the tick more.
@@ -1055,37 +1257,13 @@ static void update_wall_time_one_tick(void)
 	if (time_phase <= -FINEUSEC) {
 		long ltemp = -time_phase >> SHIFT_SCALE;
 		time_phase += ltemp << SHIFT_SCALE;
-		xtime.tv_usec += tick + time_adjust_step - ltemp;
+		xtime.tv_usec -= ltemp;
 	}
 	else if (time_phase >= FINEUSEC) {
 		long ltemp = time_phase >> SHIFT_SCALE;
 		time_phase -= ltemp << SHIFT_SCALE;
-		xtime.tv_usec += tick + time_adjust_step + ltemp;
-	} else
-		xtime.tv_usec += tick + time_adjust_step;
-
-	if (time_adjust) {
-	    /* We are doing an adjtime thing. 
-	     *
-	     * Modify the value of the tick for next time.
-	     * Note that a positive delta means we want the clock
-	     * to run fast. This means that the tick should be bigger
-	     *
-	     * Limit the amount of the step for *next* tick to be
-	     * in the range -tickadj .. +tickadj
-	     */
-	     if (time_adjust > tickadj)
-		time_adjust_step = tickadj;
-	     else if (time_adjust < -tickadj)
-		time_adjust_step = -tickadj;
-	     else
-		time_adjust_step = time_adjust;
-	     
-	    /* Reduce by this step the amount of time left  */
-	    time_adjust -= time_adjust_step;
+		xtime.tv_usec += ltemp;
 	}
-	else
-	    time_adjust_step = 0;
 }
 
 /*
@@ -1189,13 +1367,21 @@ static void update_process_times(unsigned long ticks, unsigned long system)
 volatile unsigned long lost_ticks = 0;
 static unsigned long lost_ticks_system = 0;
 
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
 static inline void update_times(void)
 {
 	unsigned long ticks;
-	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	/*
+	 * update_times() is run from the raw timer_bh handler so we
+	 * just know that the irqs are locally enabled and so we don't
+	 * need to save/restore the flags of the local CPU here. -arca
+	 */
+	write_lock_irq(&xtime_lock);
 
 	ticks = lost_ticks;
 	lost_ticks = 0;
@@ -1206,12 +1392,12 @@ static inline void update_times(void)
 
 		calc_load(ticks);
 		update_wall_time(ticks);
-		restore_flags(flags);
+		write_unlock_irq(&xtime_lock);
 		
 		update_process_times(ticks, system);
 
 	} else
-		restore_flags(flags);
+		write_unlock_irq(&xtime_lock);
 }
 
 static void timer_bh(void)
@@ -1367,7 +1553,7 @@ asmlinkage int sys_nice(int increment)
 	 * do a "normalization" of the priority (traditionally
 	 * Unix nice values are -20 to 20; Linux doesn't really
 	 * use that kind of thing, but uses the length of the
-	 * timeslice instead (default 150 ms). The rounding is
+	 * timeslice instead (default 210 ms). The rounding is
 	 * why we want to avoid negative values.
 	 */
 	newprio = (newprio * DEF_PRIORITY + 10) / 20;
diff --git a/kernel/signal.c b/kernel/signal.c
index b10f59516..c9ea86038 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -28,8 +28,8 @@
 
 static kmem_cache_t *signal_queue_cachep;
 
-static int nr_queued_signals;
-static int max_queued_signals = 1024;
+int nr_queued_signals;
+int max_queued_signals = 1024;
 
 void __init signals_init(void)
 {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e76820457..1b364a6a1 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -6,6 +6,9 @@
  * do_bottom_half() runs at normal kernel priority: all interrupts
  * enabled.  do_bottom_half() is atomic with respect to itself: a
  * bottom_half handler need not be re-entrant.
+ *
+ * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
+ * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
  */
 
 #include <linux/mm.h>
@@ -17,7 +20,7 @@
 
 /* intr_count died a painless death... -DaveM */
 
-int bh_mask_count[32];
+atomic_t bh_mask_count[32];
 unsigned long bh_active = 0;
 unsigned long bh_mask = 0;
 void (*bh_base[32])(void);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 12455ee07..44a62a9e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -28,7 +28,7 @@
 #include <linux/nfs_fs.h>
 #endif
 
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
 extern int panic_timeout;
@@ -36,12 +36,17 @@ extern int console_loglevel, C_A_D;
 extern int bdf_prm[], bdflush_min[], bdflush_max[];
 extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
 extern int sysctl_overcommit_memory;
+extern int nr_queued_signals, max_queued_signals;
+
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 #endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
 #endif
+#ifdef CONFIG_SYSVIPC
+extern int shmmax;
+#endif
 
 #ifdef __sparc__
 extern char reboot_command [];
@@ -70,7 +75,9 @@ static struct ctl_table_header root_table_header =
 
 static ctl_table kern_table[];
 static ctl_table vm_table[];
+#ifdef CONFIG_NET
 extern ctl_table net_table[];
+#endif
 static ctl_table proc_table[];
 static ctl_table fs_table[];
 static ctl_table debug_table[];
@@ -123,18 +130,20 @@ struct inode_operations proc_sys_inode_operations =
 
 extern struct proc_dir_entry proc_sys_root;
 
-extern int inodes_stat[];
-extern int dentry_stat[];
 static void register_proc_table(ctl_table *, struct proc_dir_entry *);
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
+extern int inodes_stat[];
+extern int dentry_stat[];
 
 /* The default sysctl tables: */
 
 static ctl_table root_table[] = {
 	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
 	{CTL_VM, "vm", NULL, 0, 0555, vm_table},
+#ifdef CONFIG_NET
 	{CTL_NET, "net", NULL, 0, 0555, net_table},
+#endif
 	{CTL_PROC, "proc", NULL, 0, 0555, proc_table},
 	{CTL_FS, "fs", NULL, 0, 0555, fs_table},
 	{CTL_DEBUG, "debug", NULL, 0, 0555, debug_table},
@@ -195,12 +204,18 @@ static ctl_table kern_table[] = {
 	{KERN_ACCT, "acct", &acct_parm, 3*sizeof(int),
 	0644, NULL, &proc_dointvec},
 #endif
+	{KERN_RTSIGNR, "rtsig-nr", &nr_queued_signals, sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_SYSVIPC
+	{KERN_SHMMAX, "shmmax", &shmmax, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
 	{0}
 };
 
 static ctl_table vm_table[] = {
-	{VM_SWAPCTL, "swapctl", 
-	 &swap_control, sizeof(swap_control_t), 0644, NULL, &proc_dointvec},
 	{VM_FREEPG, "freepages", 
 	 &freepages, sizeof(freepages_t), 0644, NULL, &proc_dointvec},
 	{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
@@ -216,6 +231,8 @@ static ctl_table vm_table[] = {
 	 &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
 	{VM_PGT_CACHE, "pagetable_cache", 
 	 &pgt_cache_water, 2*sizeof(int), 0600, NULL, &proc_dointvec},
+	{VM_PAGE_CLUSTER, "page-cluster", 
+	 &page_cluster, sizeof(int), 0600, NULL, &proc_dointvec},
 	{0}
 };
 
@@ -866,14 +883,14 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
 
 #else /* CONFIG_PROC_FS */
 
-int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
-			  void *buffer, size_t *lenp)
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
 {
-  return -ENOSYS; 
+	return -ENOSYS;
 }
 
-int proc_dostring(ctl_table *table, int write, struct file *filp,
-		  void *buffer, size_t *lenp)
+static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+			    void *buffer, size_t *lenp)
 {
 	return -ENOSYS;
 }
@@ -1053,7 +1070,7 @@ int do_struct (
 }
 
 
-#else /* CONFIG_PROC_FS && CONFIG_SYSCTL */
+#else /* CONFIG_SYSCTL */
 
 
 extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
@@ -1109,7 +1126,4 @@ void unregister_sysctl_table(struct ctl_table_header * table)
 {
 }
 
-#endif /* CONFIG_PROC_FS && CONFIG_SYSCTL */
-
-
-
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/time.c b/kernel/time.c
index 9b476ece2..b1347c32f 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -16,6 +16,12 @@
  *      adjtime interface update and CMOS clock write code
  * 1995-08-13    Torsten Duwe
  *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *	Introduced error checking for many cases in adjtimex().
+ *	Updated NTP code according to technical memorandum Jan '96
+ *	"A Kernel Model for Precision Timekeeping" by Dave Mills
+ *	Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *	(Even though the technical memorandum forbids it)
  */
 
 #include <linux/mm.h>
@@ -88,9 +94,11 @@ asmlinkage int sys_stime(int * tptr)
 	cli();
 	xtime.tv_sec = value;
 	xtime.tv_usec = 0;
-	time_state = TIME_ERROR;
-	time_maxerror = MAXPHASE;
-	time_esterror = MAXPHASE;
+	time_adjust = 0;	/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_state = TIME_ERROR;	/* p. 24, (a) */
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
 	sti();
 	return 0;
 }
@@ -213,6 +221,7 @@ void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0;
 int do_adjtimex(struct timex *txc)
 {
         long ltemp, mtemp, save_adjust;
+	int error = 0;
 
 	/* In order to modify anything, you gotta be super-user! */
 	if (txc->modes && !capable(CAP_SYS_TIME))
@@ -235,109 +244,153 @@ int do_adjtimex(struct timex *txc)
 	/* Save for later - semantics of adjtime is to return old value */
 	save_adjust = time_adjust;
 
+#if 0	/* STA_CLOCKERR is never set yet */
+	time_status &= ~STA_CLOCKERR;		/* reset STA_CLOCKERR */
+#endif
 	/* If there are input parameters, then process them */
 	if (txc->modes)
 	{
-	    if (time_state == TIME_BAD)
-		time_state = TIME_OK;
+	    if (time_state == TIME_ERROR)
+		time_state = TIME_OK;		/* reset error -- why? */
 
-	    if (txc->modes & ADJ_STATUS)
-		time_status = txc->status;
+	    if (txc->modes & ADJ_STATUS)	/* only set allowed bits */
+		time_status =  (txc->status & ~STA_RONLY) |
+			      (time_status & STA_RONLY);
 
-	    if (txc->modes & ADJ_FREQUENCY)
-		time_freq = txc->freq;
+	    if (txc->modes & ADJ_FREQUENCY) {	/* p. 22 */
+		if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+		    error = -EINVAL;
+		    goto leave;
+		}
+		time_freq = txc->freq - pps_freq;
+	    }
 
-	    if (txc->modes & ADJ_MAXERROR)
+	    if (txc->modes & ADJ_MAXERROR) {
+		if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+		    error = -EINVAL;
+		    goto leave;
+		}
 		time_maxerror = txc->maxerror;
+	    }
 
-	    if (txc->modes & ADJ_ESTERROR)
+	    if (txc->modes & ADJ_ESTERROR) {
+		if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+		    error = -EINVAL;
+		    goto leave;
+		}
 		time_esterror = txc->esterror;
+	    }
 
-	    if (txc->modes & ADJ_TIMECONST)
+	    if (txc->modes & ADJ_TIMECONST) {	/* p. 24 */
+		if (txc->constant < 0) {	/* NTP v4 uses values > 6 */
+		    error = -EINVAL;
+		    goto leave;
+		}
 		time_constant = txc->constant;
+	    }
 
-	    if (txc->modes & ADJ_OFFSET) {
-	      if ((txc->modes == ADJ_OFFSET_SINGLESHOT)
-		  || !(time_status & STA_PLL))
-		{
-		  time_adjust = txc->offset;
+	    if (txc->modes & ADJ_OFFSET) {	/* values checked earlier */
+		if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+		    /* adjtime() is independent from ntp_adjtime() */
+		    time_adjust = txc->offset;
 		}
-	      else if ((time_status & STA_PLL)||(time_status & STA_PPSTIME))
-		{
-		  ltemp = (time_status & STA_PPSTIME &&
-			   time_status & STA_PPSSIGNAL) ?
-		    pps_offset : txc->offset;
-
-		  /*
-		   * Scale the phase adjustment and
-		   * clamp to the operating range.
-		   */
-		  if (ltemp > MAXPHASE)
-		    time_offset = MAXPHASE << SHIFT_UPDATE;
-		  else if (ltemp < -MAXPHASE)
-		    time_offset = -(MAXPHASE << SHIFT_UPDATE);
-		  else
-		    time_offset = ltemp << SHIFT_UPDATE;
-
-		  /*
-		   * Select whether the frequency is to be controlled and in which
-		   * mode (PLL or FLL). Clamp to the operating range. Ugly
-		   * multiply/divide should be replaced someday.
-		   */
-
-		  if (time_status & STA_FREQHOLD || time_reftime == 0)
+		else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
+		    ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+		            (STA_PPSTIME | STA_PPSSIGNAL) ?
+		            pps_offset : txc->offset;
+
+		    /*
+		     * Scale the phase adjustment and
+		     * clamp to the operating range.
+		     */
+		    if (ltemp > MAXPHASE)
+		        time_offset = MAXPHASE << SHIFT_UPDATE;
+		    else if (ltemp < -MAXPHASE)
+			time_offset = -(MAXPHASE << SHIFT_UPDATE);
+		    else
+		        time_offset = ltemp << SHIFT_UPDATE;
+
+		    /*
+		     * Select whether the frequency is to be controlled
+		     * and in which mode (PLL or FLL). Clamp to the operating
+		     * range. Ugly multiply/divide should be replaced someday.
+		     */
+
+		    if (time_status & STA_FREQHOLD || time_reftime == 0)
+		        time_reftime = xtime.tv_sec;
+		    mtemp = xtime.tv_sec - time_reftime;
 		    time_reftime = xtime.tv_sec;
-		  mtemp = xtime.tv_sec - time_reftime;
-		  time_reftime = xtime.tv_sec;
-		  if (time_status & STA_FLL)
-		    {
-		      if (mtemp >= MINSEC)
-			{
-			  ltemp = ((time_offset / mtemp) << (SHIFT_USEC -
-							     SHIFT_UPDATE));
-			  if (ltemp < 0)
-			    time_freq -= -ltemp >> SHIFT_KH;
-			  else
-			    time_freq += ltemp >> SHIFT_KH;
-			}
-		    } 
-		  else 
-		    {
-		      if (mtemp < MAXSEC)
-			{
-			  ltemp *= mtemp;
-			  if (ltemp < 0)
-			    time_freq -= -ltemp >> (time_constant +
-						    time_constant + SHIFT_KF -
-						    SHIFT_USEC);
-			  else
-			    time_freq += ltemp >> (time_constant +
-						   time_constant + SHIFT_KF -
-						   SHIFT_USEC);
-			}
+		    if (time_status & STA_FLL) {
+		        if (mtemp >= MINSEC) {
+			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
+							      SHIFT_UPDATE);
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> SHIFT_KH;
+			    else
+			        time_freq += ltemp >> SHIFT_KH;
+			} else /* calibration interval too short (p. 12) */
+				time_state = TIME_ERROR;
+		    } else {	/* PLL mode */
+		        if (mtemp < MAXSEC) {
+			    ltemp *= mtemp;
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> (time_constant +
+							time_constant +
+							SHIFT_KF - SHIFT_USEC);
+			    else
+			        time_freq += ltemp >> (time_constant +
+						       time_constant +
+						       SHIFT_KF - SHIFT_USEC);
+			} else /* calibration interval too long (p. 12) */
+				time_state = TIME_ERROR;
 		    }
-		  if (time_freq > time_tolerance)
-		    time_freq = time_tolerance;
-		  else if (time_freq < -time_tolerance)
-		    time_freq = -time_tolerance;
+		    if (time_freq > time_tolerance)
+		        time_freq = time_tolerance;
+		    else if (time_freq < -time_tolerance)
+		        time_freq = -time_tolerance;
 		} /* STA_PLL || STA_PPSTIME */
+	    } /* txc->modes & ADJ_OFFSET */
+	    if (txc->modes & ADJ_TICK) {
+		/* if the quartz is off by more than 10% something is
+		   VERY wrong ! */
+		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
+		    error = -EINVAL;
+		    goto leave;
+		}
+		tick = txc->tick;
 	    }
-	    if (txc->modes & ADJ_TICK)
-	      tick = txc->tick;
-
+	} /* txc->modes */
+leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+	    || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+		&& (time_status & STA_PPSSIGNAL) == 0)
+	    /* p. 24, (b) */
+	    || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+		== (STA_PPSTIME|STA_PPSJITTER))
+	    /* p. 24, (c) */
+	    || ((time_status & STA_PPSFREQ) != 0
+		&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+	    /* p. 24, (d) */
+		time_state = TIME_ERROR;
+	
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	    txc->offset	   = save_adjust;
+	else {
+	    if (time_offset < 0)
+		txc->offset = -(-time_offset >> SHIFT_UPDATE);
+	    else
+		txc->offset = time_offset >> SHIFT_UPDATE;
 	}
-	txc->offset	   = save_adjust;
-	txc->freq	   = time_freq;
+	txc->freq	   = time_freq + pps_freq;
 	txc->maxerror	   = time_maxerror;
 	txc->esterror	   = time_esterror;
 	txc->status	   = time_status;
 	txc->constant	   = time_constant;
 	txc->precision	   = time_precision;
 	txc->tolerance	   = time_tolerance;
-	txc->time	   = xtime;
+	do_gettimeofday(&txc->time);
 	txc->tick	   = tick;
 	txc->ppsfreq	   = pps_freq;
-	txc->jitter	   = pps_jitter;
+	txc->jitter	   = pps_jitter >> PPS_AVG;
 	txc->shift	   = pps_shift;
 	txc->stabil	   = pps_stabil;
 	txc->jitcnt	   = pps_jitcnt;
@@ -346,7 +399,7 @@ int do_adjtimex(struct timex *txc)
 	txc->stbcnt	   = pps_stbcnt;
 
 	sti();
-	return 0;
+	return(error < 0 ? error : time_state);
 }
 
 asmlinkage int sys_adjtimex(struct timex *txc_p)
@@ -360,8 +413,6 @@ asmlinkage int sys_adjtimex(struct timex *txc_p)
 	 */
 	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
 		return -EFAULT;
-	if ((ret = do_adjtimex(&txc)))
-	  return ret;
-
-	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : time_state;
+	ret = do_adjtimex(&txc);
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
 }
author	Ralf Baechle <ralf@linux-mips.org>	1999-02-15 02:15:32 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1999-02-15 02:15:32 +0000
commit	86464aed71025541805e7b1515541aee89879e33 (patch)
tree	e01a457a4912a8553bc65524aa3125d51f29f810 /kernel
parent	88f99939ecc6a95a79614574cb7d95ffccfc3466 (diff)