From 3e414096429d55fbc8116171bba3487647bbe638 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 3 Jul 2000 21:46:06 +0000
Subject: Merge with Linux 2.4.0-test3-pre2.

---
 fs/binfmt_elf.c          |    6 +-
 fs/binfmt_em86.c         |    2 +
 fs/binfmt_misc.c         |    1 +
 fs/binfmt_script.c       |    1 +
 fs/block_dev.c           |    6 +-
 fs/buffer.c              |   35 +-
 fs/coda/dir.c            |    2 +-
 fs/coda/file.c           |    4 +-
 fs/coda/psdev.c          |   41 +-
 fs/coda/upcall.c         |    6 +-
 fs/exec.c                |  101 ++--
 fs/ext2/fsync.c          |    2 +-
 fs/ext2/inode.c          |    2 +-
 fs/ext2/super.c          |    1 -
 fs/fat/inode.c           |    2 +-
 fs/fcntl.c               |    2 +-
 fs/file_table.c          |   24 +-
 fs/hpfs/file.c           |    2 +-
 fs/hpfs/hpfs_fn.h        |    2 +-
 fs/hpfs/namei.c          |   10 +-
 fs/inode.c               |   14 +-
 fs/ioctl.c               |    2 +-
 fs/lockd/clntlock.c      |   29 +-
 fs/lockd/svclock.c       |    2 +-
 fs/locks.c               | 1254 ++++++++++++++++++++++------------------------
 fs/minix/bitmap.c        |    3 -
 fs/minix/fsync.c         |    2 +-
 fs/minix/inode.c         |    4 +-
 fs/namei.c               |  324 ++++++++----
 fs/ncpfs/dir.c           |    6 +-
 fs/ncpfs/file.c          |   38 +-
 fs/ncpfs/inode.c         |   11 +-
 fs/ncpfs/ioctl.c         |   16 +-
 fs/ncpfs/mmap.c          |    1 +
 fs/ncpfs/ncplib_kernel.c |   24 +-
 fs/ncpfs/ncplib_kernel.h |    4 +
 fs/ncpfs/symlink.c       |   56 ++-
 fs/nfs/file.c            |    4 +-
 fs/nfsd/export.c         |   13 +-
 fs/nfsd/nfscache.c       |   13 +-
 fs/nfsd/nfsctl.c         |    1 -
 fs/nfsd/nfsfh.c          |   16 +-
 fs/nfsd/nfsproc.c        |    4 +-
 fs/nfsd/nfssvc.c         |   12 -
 fs/nfsd/vfs.c            |  150 +++---
 fs/ntfs/fs.c             |    2 +
 fs/pipe.c                |    2 +
 fs/proc/base.c           |   10 +-
 fs/qnx4/inode.c          |    1 -
 fs/ramfs/inode.c         |   10 +-
 fs/smbfs/file.c          |    2 +-
 fs/super.c               |  224 +++++----
 fs/sysv/fsync.c          |    2 +-
 fs/sysv/ialloc.c         |    2 +-
 fs/sysv/inode.c          |    3 +-
 fs/udf/fsync.c           |    2 +-
 fs/udf/inode.c           |    4 +-
 fs/udf/super.c           |    1 -
 fs/udf/udfdecl.h         |    4 +-
 fs/ufs/inode.c           |    5 +-
 fs/umsdos/inode.c        |    4 +-
 61 files changed, 1315 insertions(+), 1218 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fd867d0e..7f0e51187 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -674,9 +674,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 						    interpreter,
 						    &interp_load_addr);
 
-		lock_kernel();
+		allow_write_access(interpreter);
 		fput(interpreter);
-		unlock_kernel();
 		kfree(elf_interpreter);
 
 		if (elf_entry == ~0UL) {
@@ -763,9 +762,8 @@ out:
 
 	/* error cleanup */
 out_free_dentry:
-	lock_kernel();
+	allow_write_access(interpreter);
 	fput(interpreter);
-	unlock_kernel();
 out_free_interp:
 	if (elf_interpreter)
 		kfree(elf_interpreter);
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 1b18094eb..95c24a70a 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -17,6 +17,7 @@
 #include <linux/binfmts.h>
 #include <linux/elf.h>
 #include <linux/init.h>
+#include <linux/file.h>
 
 
 #define EM86_INTERP	"/usr/bin/em86"
@@ -43,6 +44,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 	}
 
 	bprm->sh_bang++;	/* Well, the bang-shell is implicit... */
+	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 0d44c3d4e..f9c30df1b 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -201,6 +201,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (!fmt)
 		goto _ret;
 
+	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index dc78f8389..3d5023e2d 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,6 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	 */
 
 	bprm->sh_bang++;
+	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c455a735d..29972c8ca 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -313,7 +313,7 @@ ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
  *	since the vma has no handle.
  */
  
-static int block_fsync(struct file *filp, struct dentry *dentry)
+static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
 	return fsync_dev(dentry->d_inode->i_rdev);
 }
@@ -597,6 +597,8 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
 				ret = bdev->bd_op->open(fake_inode, &fake_file);
 			if (!ret)
 				atomic_inc(&bdev->bd_openers);
+			else if (!atomic_read(&bdev->bd_openers))
+				bdev->bd_op = NULL;
 			iput(fake_inode);
 		}
 	}
@@ -617,6 +619,8 @@ int blkdev_open(struct inode * inode, struct file * filp)
 			ret = bdev->bd_op->open(inode,filp);
 		if (!ret)
 			atomic_inc(&bdev->bd_openers);
+		else if (!atomic_read(&bdev->bd_openers))
+			bdev->bd_op = NULL;
 	}	
 	up(&bdev->bd_sem);
 	return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 47d690fa4..b1e1c33b7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -323,7 +323,7 @@ asmlinkage long sys_sync(void)
  *	filp may be NULL if called via the msync of a vma.
  */
  
-int file_fsync(struct file *filp, struct dentry *dentry)
+int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
 	struct inode * inode = dentry->d_inode;
 	struct super_block * sb;
@@ -332,7 +332,7 @@ int file_fsync(struct file *filp, struct dentry *dentry)
 
 	lock_kernel();
 	/* sync the inode to buffers */
-	write_inode_now(inode);
+	write_inode_now(inode, 0);
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
@@ -360,12 +360,7 @@ asmlinkage long sys_fsync(unsigned int fd)
 		goto out;
 
 	dentry = file->f_dentry;
-	if (!dentry)
-		goto out_putf;
-
 	inode = dentry->d_inode;
-	if (!inode)
-		goto out_putf;
 
 	err = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync)
@@ -373,7 +368,7 @@ asmlinkage long sys_fsync(unsigned int fd)
 
 	/* We need to protect against concurrent writers.. */
 	down(&inode->i_sem);
-	err = file->f_op->fsync(file, dentry);
+	err = file->f_op->fsync(file, dentry, 0);
 	up(&inode->i_sem);
 
 out_putf:
@@ -395,20 +390,14 @@ asmlinkage long sys_fdatasync(unsigned int fd)
 		goto out;
 
 	dentry = file->f_dentry;
-	if (!dentry)
-		goto out_putf;
-
 	inode = dentry->d_inode;
-	if (!inode)
-		goto out_putf;
 
 	err = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync)
 		goto out_putf;
 
-	/* this needs further work, at the moment it is identical to fsync() */
 	down(&inode->i_sem);
-	err = file->f_op->fsync(file, dentry);
+	err = file->f_op->fsync(file, dentry, 1);
 	up(&inode->i_sem);
 
 out_putf:
@@ -2101,6 +2090,7 @@ static int grow_buffers(int size)
 	spin_unlock(&free_list[isize].lock);
 
 	page->buffers = bh;
+	page->flags &= ~(1 << PG_referenced);
 	lru_cache_add(page);
 	atomic_inc(&buffermem_pages);
 	return 1;
@@ -2499,7 +2489,7 @@ asmlinkage long sys_bdflush(int func, long data)
  * the syscall above, but now we launch it ourselves internally with
  * kernel_thread(...)  directly after the first thread in init/main.c
  */
-int bdflush(void * unused) 
+int bdflush(void *sem)
 {
 	struct task_struct *tsk = current;
 	int flushed;
@@ -2521,6 +2511,8 @@ int bdflush(void * unused)
 	recalc_sigpending(tsk);
 	spin_unlock_irq(&tsk->sigmask_lock);
 
+	up((struct semaphore *)sem);
+
 	for (;;) {
 		CHECK_EMERGENCY_SYNC
 
@@ -2555,7 +2547,7 @@ int bdflush(void * unused)
  * You don't need to change your userspace configuration since
  * the userspace `update` will do_exit(0) at the first sys_bdflush().
  */
-int kupdate(void * unused) 
+int kupdate(void *sem)
 {
 	struct task_struct * tsk = current;
 	int interval;
@@ -2571,6 +2563,8 @@ int kupdate(void * unused)
 	recalc_sigpending(tsk);
 	spin_unlock_irq(&tsk->sigmask_lock);
 
+	up((struct semaphore *)sem);
+
 	for (;;) {
 		/* update interval */
 		interval = bdf_prm.b_un.interval;
@@ -2604,8 +2598,11 @@ int kupdate(void * unused)
 
 static int __init bdflush_init(void)
 {
-	kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-	kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	DECLARE_MUTEX_LOCKED(sem);
+	kernel_thread(bdflush, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	down(&sem);
+	kernel_thread(kupdate, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	down(&sem);
 	return 0;
 }
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index e949f7986..0e6fa5625 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -55,7 +55,7 @@ static void coda_prepare_fakefile(struct inode *coda_inode,
 				  struct dentry *open_dentry);
 static int coda_venus_readdir(struct file *filp, void *dirent, 
 			      filldir_t filldir);
-int coda_fsync(struct file *, struct dentry *dentry);
+int coda_fsync(struct file *, struct dentry *dentry, int datasync);
 
 int coda_hasmknod = 0;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9aecd716a..128b07d44 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -40,7 +40,7 @@ coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 }
 
 /* exported from this file (used for dirs) */
-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry)
+int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 {
 	struct inode *inode = coda_dentry->d_inode;
 	struct dentry cont_dentry;
@@ -60,7 +60,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry)
 	cont_dentry.d_inode = (struct inode *)inode->i_mapping->host;
   
 	down(&cont_dentry.d_inode->i_sem);
-	result = file_fsync(NULL, &cont_dentry);
+	result = file_fsync(NULL, &cont_dentry, datasync);
 	up(&cont_dentry.d_inode->i_sem);
 
 	if ( result == 0 ) {
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 45025e871..14fe68ad4 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -109,7 +109,7 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf,
 	ssize_t retval = 0, count = 0;
 	int error;
 
-	if ( !coda_upc_comm.vc_pid ) 
+	if ( !coda_upc_comm.vc_inuse ) 
 		return -EIO;
         /* Peek at the opcode, uniquefier */
 	if (copy_from_user(&hdr, buf, 2 * sizeof(u_long)))
@@ -291,29 +291,14 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
         struct venus_comm *vcp = &coda_upc_comm;
         ENTRY;
 	
-	/* first opener: must be lento. Initialize & take its pid */
-	if ( (file->f_flags & O_ACCMODE) == O_RDWR ) {
-		if ( vcp->vc_pid ) {
-			printk("Venus pid already set to %d!!\n", vcp->vc_pid);
-			return -1;
-		}
-		if ( vcp->vc_inuse ) {
-			printk("psdev_open: Cannot O_RDWR while open.\n");
-			return -1;
-		}
-	}
-	
-	vcp->vc_inuse++;
-
-	if ( (file->f_flags & O_ACCMODE) == O_RDWR ) {
-		vcp->vc_pid = current->pid;
-		vcp->vc_seq = 0;
-		INIT_LIST_HEAD(&vcp->vc_pending);
-		INIT_LIST_HEAD(&vcp->vc_processing);
+	/* first opener, initialize */
+	if (!vcp->vc_inuse++) {
+            INIT_LIST_HEAD(&vcp->vc_pending);
+            INIT_LIST_HEAD(&vcp->vc_processing);
+            vcp->vc_seq = 0;
 	}
 
-	CDEBUG(D_PSDEV, "inuse: %d, vc_pid %d, caller %d\n",
-	       vcp->vc_inuse, vcp->vc_pid, current->pid);
+	CDEBUG(D_PSDEV, "inuse: %d\n", vcp->vc_inuse);
 
 	EXIT;
         return 0;
@@ -332,17 +317,9 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 		return -1;
 	}
 
-	vcp->vc_inuse--;
-	CDEBUG(D_PSDEV, "inuse: %d, vc_pid %d, caller %d\n",
-	       vcp->vc_inuse, vcp->vc_pid, current->pid);
-
-	if ( vcp->vc_pid != current->pid ) {
-		/* FIXME: this is broken. If venus does fork(), accounting goes wrong */
-		printk( "Closed by someone else than caller?\n" );
-		return 0;
-	}
+	CDEBUG(D_PSDEV, "psdev_release: inuse %d\n", vcp->vc_inuse);
+	if (--vcp->vc_inuse) return 0;
         
-	vcp->vc_pid = 0;
         /* Wakeup clients so they can return. */
 	CDEBUG(D_PSDEV, "wake up pending clients\n");
 	lh = vcp->vc_pending.next;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 674c8cb3b..206c9d8b0 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -625,7 +625,7 @@ static inline unsigned long coda_waitfor_upcall(struct upc_req *vmp)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
                 /* venus died */
-                if ( !coda_upc_comm.vc_pid )
+                if ( !coda_upc_comm.vc_inuse )
                         break;
 
 		/* got a reply */
@@ -688,7 +688,7 @@ static int coda_upcall(struct coda_sb_info *sbi,
 ENTRY;
 
 	vcommp = &coda_upc_comm;
-	if ( !vcommp->vc_pid ) {
+	if ( !vcommp->vc_inuse ) {
 		printk("No pseudo device in upcall comms at %p\n", vcommp);
                 return -ENXIO;
 	}
@@ -733,7 +733,7 @@ ENTRY;
 	CDEBUG(D_UPCALL, 
 	       "..process %d woken up by Venus for req at %p, data at %p\n", 
 	       current->pid, req, req->uc_data);
-	if (vcommp->vc_pid) {      /* i.e. Venus is still alive */
+	if (vcommp->vc_inuse) {      /* i.e. Venus is still alive */
 	    /* Op went through, interrupt or not... */
 	    if (req->uc_flags & REQ_WRITE) {
 		out = (union outputArgs *)req->uc_data;
diff --git a/fs/exec.c b/fs/exec.c
index 2ab337341..ce1031e3b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -101,37 +101,54 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
  */
 asmlinkage long sys_uselib(const char * library)
 {
-	int fd, retval;
 	struct file * file;
+	struct nameidata nd;
+	int error;
 
-	fd = sys_open(library, 0, 0);
-	if (fd < 0)
-		return fd;
-	file = fget(fd);
-	retval = -ENOEXEC;
-	if (file) {
-		if(file->f_op && file->f_op->read) {
-			struct linux_binfmt * fmt;
+	error = user_path_walk(library, &nd);
+	if (error)
+		goto out;
 
-			read_lock(&binfmt_lock);
-			for (fmt = formats ; fmt ; fmt = fmt->next) {
-				if (!fmt->load_shlib)
-					continue;
-				if (!try_inc_mod_count(fmt->module))
-					continue;
-				read_unlock(&binfmt_lock);
-				retval = fmt->load_shlib(file);
-				read_lock(&binfmt_lock);
-				put_binfmt(fmt);
-				if (retval != -ENOEXEC)
-					break;
-			}
+	error = -EINVAL;
+	if (!S_ISREG(nd.dentry->d_inode->i_mode))
+		goto exit;
+
+	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
+	if (error)
+		goto exit;
+
+	lock_kernel();
+	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+	unlock_kernel();
+	error = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+
+	error = -ENOEXEC;
+	if(file->f_op && file->f_op->read) {
+		struct linux_binfmt * fmt;
+
+		read_lock(&binfmt_lock);
+		for (fmt = formats ; fmt ; fmt = fmt->next) {
+			if (!fmt->load_shlib)
+				continue;
+			if (!try_inc_mod_count(fmt->module))
+				continue;
 			read_unlock(&binfmt_lock);
+			error = fmt->load_shlib(file);
+			read_lock(&binfmt_lock);
+			put_binfmt(fmt);
+			if (error != -ENOEXEC)
+				break;
 		}
-		fput(file);
+		read_unlock(&binfmt_lock);
 	}
-	sys_close(fd);
-  	return retval;
+	fput(file);
+out:
+  	return error;
+exit:
+	path_release(&nd);
+	goto out;
 }
 
 /*
@@ -319,6 +336,7 @@ int setup_arg_pages(struct linux_binprm *bprm)
 struct file *open_exec(const char *name)
 {
 	struct nameidata nd;
+	struct inode *inode;
 	struct file *file;
 	int err = 0;
 
@@ -328,14 +346,22 @@ struct file *open_exec(const char *name)
 	unlock_kernel();
 	file = ERR_PTR(err);
 	if (!err) {
+		inode = nd.dentry->d_inode;
 		file = ERR_PTR(-EACCES);
-		if (S_ISREG(nd.dentry->d_inode->i_mode)) {
-			int err = permission(nd.dentry->d_inode, MAY_EXEC);
+		if (!IS_NOEXEC(inode) && S_ISREG(inode->i_mode)) {
+			int err = permission(inode, MAY_EXEC);
 			file = ERR_PTR(err);
 			if (!err) {
 				lock_kernel();
 				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 				unlock_kernel();
+				if (!IS_ERR(file)) {
+					err = deny_write_access(file);
+					if (err) {
+						fput(file);
+						file = ERR_PTR(err);
+					}
+				}
 out:
 				return file;
 			}
@@ -540,23 +566,13 @@ static inline int must_not_trace_exec(struct task_struct * p)
 int prepare_binprm(struct linux_binprm *bprm)
 {
 	int mode;
-	int retval,id_change,cap_raised;
+	int id_change,cap_raised;
 	struct inode * inode = bprm->file->f_dentry->d_inode;
 
 	mode = inode->i_mode;
-	if (!S_ISREG(mode))			/* must be regular file */
-		return -EACCES;
-	if (!(mode & 0111))			/* with at least _one_ execute bit set */
+	/* Huh? We had already checked for MAY_EXEC, WTF do we check this? */
+	if (!(mode & 0111))	/* with at least _one_ execute bit set */
 		return -EACCES;
-	if (IS_NOEXEC(inode))			/* FS mustn't be mounted noexec */
-		return -EACCES;
-	if (!inode->i_sb)
-		return -EACCES;
-	if ((retval = permission(inode, MAY_EXEC)) != 0)
-		return retval;
-	/* better not execute files which are being written to */
-	if (atomic_read(&inode->i_writecount) > 0)
-		return -ETXTBSY;
 
 	bprm->e_uid = current->euid;
 	bprm->e_gid = current->egid;
@@ -728,6 +744,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 		char * dynloader[] = { "/sbin/loader" };
 		struct file * file;
 
+		allow_write_access(bprm->file);
 		fput(bprm->file);
 		bprm->file = NULL;
 
@@ -761,6 +778,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			retval = fn(bprm, regs);
 			if (retval >= 0) {
 				put_binfmt(fmt);
+				allow_write_access(bprm->file);
 				if (bprm->file)
 					fput(bprm->file);
 				bprm->file = NULL;
@@ -822,11 +840,13 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs
 	bprm.loader = 0;
 	bprm.exec = 0;
 	if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
+		allow_write_access(file);
 		fput(file);
 		return bprm.argc;
 	}
 
 	if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
+		allow_write_access(file);
 		fput(file);
 		return bprm.envc;
 	}
@@ -855,6 +875,7 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs
 
 out:
 	/* Something went wrong, return the inode and free the argument pages*/
+	allow_write_access(bprm.file);
 	if (bprm.file)
 		fput(bprm.file);
 
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
index 52ffd6138..1e4478cc7 100644
--- a/fs/ext2/fsync.c
+++ b/fs/ext2/fsync.c
@@ -124,7 +124,7 @@ static int sync_tindirect(struct inode * inode, u32 * tiblock, int wait)
  *	even pass file to fsync ?
  */
 
-int ext2_sync_file(struct file * file, struct dentry *dentry)
+int ext2_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	int wait, err = 0;
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7e5263fb1..d999b2b4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -904,7 +904,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
 	return err;
 }
 
-void ext2_write_inode (struct inode * inode)
+void ext2_write_inode (struct inode * inode, int wait)
 {
 	lock_kernel();
 	ext2_update_inode (inode, 0);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index aa6a599fc..d3af3b992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -593,7 +593,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data,
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_dev = dev;
 	sb->s_op = &ext2_sops;
 	sb->s_root = d_alloc_root(iget(sb, EXT2_ROOT_INO));
 	if (!sb->s_root) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 85cc4e1a6..bd8d0ae26 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -837,7 +837,7 @@ static void fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	MSDOS_I(inode)->i_ctime_ms = de->ctime_ms;
 }
 
-void fat_write_inode(struct inode *inode)
+void fat_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bh;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f6e4e1651..37e32a012 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -252,8 +252,8 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 				err = sock_fcntl (filp, cmd, arg);
 			break;
 	}
-	fput(filp);
 	unlock_kernel();
+	fput(filp);
 out:
 	return err;
 }
diff --git a/fs/file_table.c b/fs/file_table.c
index ecaa46896..5c722143d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -16,9 +16,7 @@
 static kmem_cache_t *filp_cache;
 
 /* sysctl tunables... */
-int nr_files;		/* read only */
-int nr_free_files;	/* read only */
-int max_files = NR_FILE;/* tunable */
+struct files_stat_struct files_stat = {0, 0, NR_FILE};
 
 /* Here the new files go */
 static LIST_HEAD(anon_list);
@@ -53,11 +51,11 @@ struct file * get_empty_filp(void)
 	struct file * f;
 
 	file_list_lock();
-	if (nr_free_files > NR_RESERVED_FILES) {
+	if (files_stat.nr_free_files > NR_RESERVED_FILES) {
 	used_one:
 		f = list_entry(free_list.next, struct file, f_list);
 		list_del(&f->f_list);
-		nr_free_files--;
+		files_stat.nr_free_files--;
 	new_one:
 		file_list_unlock();
 		memset(f, 0, sizeof(*f));
@@ -73,25 +71,25 @@ struct file * get_empty_filp(void)
 	/*
 	 * Use a reserved one if we're the superuser
 	 */
-	if (nr_free_files && !current->euid)
+	if (files_stat.nr_free_files && !current->euid)
 		goto used_one;
 	/*
 	 * Allocate a new one if we're below the limit.
 	 */
-	if (nr_files < max_files) {
+	if (files_stat.nr_files < files_stat.max_files) {
 		file_list_unlock();
 		f = kmem_cache_alloc(filp_cache, SLAB_KERNEL);
 		file_list_lock();
 		if (f) {
-			nr_files++;
+			files_stat.nr_files++;
 			goto new_one;
 		}
 		/* Big problems... */
 		printk("VFS: filp allocation failed\n");
 
-	} else if (max_files > old_max) {
-		printk("VFS: file-max limit %d reached\n", max_files);
-		old_max = max_files;
+	} else if (files_stat.max_files > old_max) {
+		printk("VFS: file-max limit %d reached\n", files_stat.max_files);
+		old_max = files_stat.max_files;
 	}
 	file_list_unlock();
 	return NULL;
@@ -148,7 +146,7 @@ void _fput(struct file *file)
 	file_list_lock();
 	list_del(&file->f_list);
 	list_add(&file->f_list, &free_list);
-	nr_free_files++;
+	files_stat.nr_free_files++;
 	file_list_unlock();
 }
 
@@ -160,7 +158,7 @@ void put_filp(struct file *file)
 		file_list_lock();
 		list_del(&file->f_list);
 		list_add(&file->f_list, &free_list);
-		nr_free_files++;
+		files_stat.nr_free_files++;
 		file_list_unlock();
 	}
 }
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c0707b52c..4a301f593 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -23,7 +23,7 @@ int hpfs_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-int hpfs_file_fsync(struct file *file, struct dentry *dentry)
+int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	/*return file_fsync(file, dentry);*/
 	return 0; /* Don't fsync :-) */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index a01140f1f..78341ca16 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -256,7 +256,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 
 int hpfs_file_release(struct inode *, struct file *);
 int hpfs_open(struct inode *, struct file *);
-int hpfs_file_fsync(struct file *, struct dentry *);
+int hpfs_file_fsync(struct file *, struct dentry *, int);
 secno hpfs_bmap(struct inode *, unsigned);
 void hpfs_truncate(struct inode *);
 int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index b09ad98ea..5684801df 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -330,7 +330,15 @@ int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 		struct iattr newattrs;
 		int err;
 		hpfs_unlock_2inodes(dir, inode);
-		if (rep || dentry->d_count > 1 || permission(inode, MAY_WRITE) || get_write_access(inode)) goto ret;
+		if (rep)
+			goto ret;
+		d_drop(dentry);
+		if (dentry->d_count > 1 ||
+		    permission(inode, MAY_WRITE) ||
+		    get_write_access(inode)) {
+			d_rehash(dentry);
+			goto ret;
+		}
 		/*printk("HPFS: truncating file before delete.\n");*/
 		down(&inode->i_sem);
 		newattrs.ia_size = 0;
diff --git a/fs/inode.c b/fs/inode.c
index e46359b03..3dbd9f54e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -162,10 +162,10 @@ static inline void wait_on_inode(struct inode *inode)
 }
 
 
-static inline void write_inode(struct inode *inode)
+static inline void write_inode(struct inode *inode, int sync)
 {
 	if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
-		inode->i_sb->s_op->write_inode(inode);
+		inode->i_sb->s_op->write_inode(inode, sync);
 }
 
 static inline void __iget(struct inode * inode)
@@ -182,7 +182,7 @@ static inline void __iget(struct inode * inode)
 	inodes_stat.nr_unused--;
 }
 
-static inline void sync_one(struct inode *inode)
+static inline void sync_one(struct inode *inode, int sync)
 {
 	if (inode->i_state & I_LOCK) {
 		__iget(inode);
@@ -199,7 +199,7 @@ static inline void sync_one(struct inode *inode)
 		inode->i_state ^= I_DIRTY | I_LOCK;
 		spin_unlock(&inode_lock);
 
-		write_inode(inode);
+		write_inode(inode, sync);
 
 		spin_lock(&inode_lock);
 		inode->i_state &= ~I_LOCK;
@@ -212,7 +212,7 @@ static inline void sync_list(struct list_head *head)
 	struct list_head * tmp;
 
 	while ((tmp = head->prev) != head)
-		sync_one(list_entry(tmp, struct inode, i_list));
+		sync_one(list_entry(tmp, struct inode, i_list), 0);
 }
 
 /**
@@ -266,14 +266,14 @@ static void sync_all_inodes(void)
  *	dirty. This is primarily needed by knfsd.
  */
  
-void write_inode_now(struct inode *inode)
+void write_inode_now(struct inode *inode, int sync)
 {
 	struct super_block * sb = inode->i_sb;
 
 	if (sb) {
 		spin_lock(&inode_lock);
 		while (inode->i_state & I_DIRTY)
-			sync_one(inode);
+			sync_one(inode, sync);
 		spin_unlock(&inode_lock);
 	}
 	else
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 16ad5ec26..f02d766bd 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -107,8 +107,8 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 			else if (filp->f_op && filp->f_op->ioctl)
 				error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
 	}
-	fput(filp);
 	unlock_kernel();
+	fput(filp);
 
 out:
 	return error;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index f89188d12..a3a4f072f 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -162,8 +162,7 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
-	struct file_lock  *fl;
-	struct inode	  *inode;
+	struct list_head *tmp;
 
 	/* This one ensures that our parent doesn't terminate while the
 	 * reclaim is in progress */
@@ -171,19 +170,21 @@ reclaimer(void *ptr)
 	lockd_up();
 
 	/* First, reclaim all locks that have been granted previously. */
-	do {
-		for (fl = file_lock_table; fl; fl = fl->fl_nextlink) {
-			inode = fl->fl_file->f_dentry->d_inode;
-			if (inode->i_sb->s_magic == NFS_SUPER_MAGIC
-			 && nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr)
-			 && fl->fl_u.nfs_fl.state != host->h_state
-			 && (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) {
-				fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED;
-				nlmclnt_reclaim(host, fl);
-				break;
-			}
+restart:
+	tmp = file_lock_list.next;
+	while (tmp != &file_lock_list) {
+		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+		struct inode *inode = fl->fl_file->f_dentry->d_inode;
+		if (inode->i_sb->s_magic == NFS_SUPER_MAGIC &&
+				nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) &&
+				fl->fl_u.nfs_fl.state != host->h_state &&
+				(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) {
+			fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED;
+			nlmclnt_reclaim(host, fl);
+			goto restart;
 		}
-	} while (fl);
+		tmp = tmp->next;
+	}
 
 	host->h_reclaiming = 0;
 	wake_up(&host->h_gracewait);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 279fcc3c1..56c8d8173 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -347,7 +347,7 @@ again:
 	/* Append to list of blocked */
 	nlmsvc_insert_block(block, NLM_NEVER);
 
-	if (!block->b_call.a_args.lock.fl.fl_prevblock) {
+	if (!list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
 		/* Now add block to block list of the conflicting lock
 		   if we haven't done so. */
 		dprintk("lockd: blocking on this lock.\n");
diff --git a/fs/locks.c b/fs/locks.c
index 015b8e87a..6ce980735 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -108,530 +108,98 @@
 #include <linux/malloc.h>
 #include <linux/file.h>
 #include <linux/smp_lock.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 
-static int flock_make_lock(struct file *filp, struct file_lock *fl,
-			       unsigned int cmd);
-static int posix_make_lock(struct file *filp, struct file_lock *fl,
-			       struct flock *l);
-static int flock_locks_conflict(struct file_lock *caller_fl,
-				struct file_lock *sys_fl);
-static int posix_locks_conflict(struct file_lock *caller_fl,
-				struct file_lock *sys_fl);
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
-static int flock_lock_file(struct file *filp, struct file_lock *caller,
-			   unsigned int wait);
-static int posix_locks_deadlock(struct file_lock *caller,
-				struct file_lock *blocker);
-
-static struct file_lock *locks_empty_lock(void);
-static struct file_lock *locks_init_lock(struct file_lock *,
-					 struct file_lock *);
-static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl);
-static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait);
-static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx);
-
-static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter);
-static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter);
-static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait);
-
-struct file_lock *file_lock_table = NULL;
-
-/* Allocate a new lock, and initialize its fields from fl.
- * The lock is not inserted into any lists until locks_insert_lock() or 
- * locks_insert_block() are called.
- */
-static inline struct file_lock *locks_alloc_lock(struct file_lock *fl)
-{
-	return locks_init_lock(locks_empty_lock(), fl);
-}
+LIST_HEAD(file_lock_list);
+static LIST_HEAD(blocked_list);
 
-/* Free lock not inserted in any queue.
- */
-static inline void locks_free_lock(struct file_lock *fl)
-{
-	if (waitqueue_active(&fl->fl_wait))
-		panic("Attempting to free lock with active wait queue");
-
-	if (fl->fl_nextblock != NULL || fl->fl_prevblock != NULL)
-		panic("Attempting to free lock with active block list");
-		
-	kfree(fl);
-	return;
-}
-
-/* Check if two locks overlap each other.
- */
-static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
-{
-	return ((fl1->fl_end >= fl2->fl_start) &&
-		(fl2->fl_end >= fl1->fl_start));
-}
-
-/*
- * Check whether two locks have the same owner
- * N.B. Do we need the test on PID as well as owner?
- * (Clone tasks should be considered as one "owner".)
- */
-static inline int
-locks_same_owner(struct file_lock *fl1, struct file_lock *fl2)
-{
-	return (fl1->fl_owner == fl2->fl_owner) &&
-	       (fl1->fl_pid   == fl2->fl_pid);
-}
+static kmem_cache_t *filelock_cache;
 
-/* Insert waiter into blocker's block list.
- * We use a circular list so that processes can be easily woken up in
- * the order they blocked. The documentation doesn't require this but
- * it seems like the reasonable thing to do.
- */
-static void locks_insert_block(struct file_lock *blocker, 
-			       struct file_lock *waiter)
+/* Allocate an empty lock structure. */
+static struct file_lock *locks_alloc_lock(void)
 {
-	struct file_lock *prevblock;
-
-	if (waiter->fl_prevblock) {
-		printk(KERN_ERR "locks_insert_block: remove duplicated lock "
-			"(pid=%d %Ld-%Ld type=%d)\n",
-			waiter->fl_pid, (long long)waiter->fl_start,
-			(long long)waiter->fl_end, waiter->fl_type);
-		locks_delete_block(waiter->fl_prevblock, waiter);
-	}
-
-	if (blocker->fl_prevblock == NULL)
-		/* No previous waiters - list is empty */
-		prevblock = blocker;
-	else
-		/* Previous waiters exist - add to end of list */
-		prevblock = blocker->fl_prevblock;
-
-	prevblock->fl_nextblock = waiter;
-	blocker->fl_prevblock = waiter;
-	waiter->fl_nextblock = blocker;
-	waiter->fl_prevblock = prevblock;
-	
-	return;
+	struct file_lock *fl;
+	fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	return fl;
 }
 
-/* Remove waiter from blocker's block list.
- * When blocker ends up pointing to itself then the list is empty.
- */
-static void locks_delete_block(struct file_lock *blocker,
-			       struct file_lock *waiter)
+/* Free a lock which is not in use. */
+static inline void locks_free_lock(struct file_lock *fl)
 {
-	struct file_lock *nextblock;
-	struct file_lock *prevblock;
-	
-	nextblock = waiter->fl_nextblock;
-	prevblock = waiter->fl_prevblock;
-
-	if (nextblock == NULL)
+	if (fl == NULL) {
+		BUG();
 		return;
-	
-	nextblock->fl_prevblock = prevblock;
-	prevblock->fl_nextblock = nextblock;
-
-	waiter->fl_prevblock = waiter->fl_nextblock = NULL;
-	if (blocker->fl_nextblock == blocker)
-		/* No more locks on blocker's blocked list */
-		blocker->fl_prevblock = blocker->fl_nextblock = NULL;
-	return;
-}
-
-/* The following two are for the benefit of lockd.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-	locks_insert_block(blocker, waiter);
-	return;
-}
-
-void
-posix_unblock_lock(struct file_lock *waiter)
-{
-	if (waiter->fl_prevblock)
-		locks_delete_block(waiter->fl_prevblock, waiter);
-	return;
-}
-
-/* Wake up processes blocked waiting for blocker.
- * If told to wait then schedule the processes until the block list
- * is empty, otherwise empty the block list ourselves.
- */
-static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait)
-{
-	struct file_lock *waiter;
-
-	while ((waiter = blocker->fl_nextblock) != NULL) {
-		/* N.B. Is it possible for the notify function to block?? */
-		if (waiter->fl_notify)
-			waiter->fl_notify(waiter);
-		wake_up(&waiter->fl_wait);
-		if (wait) {
-			/* Let the blocked process remove waiter from the
-			 * block list when it gets scheduled.
-			 */
-			current->policy |= SCHED_YIELD;
-			schedule();
-		} else {
-			/* Remove waiter from the block list, because by the
-			 * time it wakes up blocker won't exist any more.
-			 */
-			locks_delete_block(blocker, waiter);
-		}
 	}
-	return;
-}
-
-/* flock() system call entry point. Apply a FL_FLOCK style lock to
- * an open file descriptor.
- */
-asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
-{
-	struct file_lock file_lock;
-	struct file *filp;
-	int error;
-
-	lock_kernel();
-	error = -EBADF;
-	filp = fget(fd);
-	if (!filp)
-		goto out;
-	error = -EINVAL;
-	if (!flock_make_lock(filp, &file_lock, cmd))
-		goto out_putf;
-	error = -EBADF;
-	if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3))
-		goto out_putf;
-	error = flock_lock_file(filp, &file_lock,
-				(cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1);
-out_putf:
-	fput(filp);
-out:
-	unlock_kernel();
-	return (error);
-}
-
-/* Report the first existing lock that would conflict with l.
- * This implements the F_GETLK command of fcntl().
- */
-int fcntl_getlk(unsigned int fd, struct flock *l)
-{
-	struct file *filp;
-	struct file_lock *fl,file_lock;
-	struct flock flock;
-	int error;
-
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-	error = -EINVAL;
-	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
-		goto out;
-
-	error = -EBADF;
-	filp = fget(fd);
-	if (!filp)
-		goto out;
-
-	if (!posix_make_lock(filp, &file_lock, &flock))
-		goto out_putf;
-
-	if (filp->f_op->lock) {
-		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
-		if (error < 0)
-			goto out_putf;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
-		else
-		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
-	} else {
-		fl = posix_test_lock(filp, &file_lock);
-	}
- 
-	flock.l_type = F_UNLCK;
-	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
-		flock.l_start = fl->fl_start;
-		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
-			fl->fl_end - fl->fl_start + 1;
-		flock.l_whence = 0;
-		flock.l_type = fl->fl_type;
-	}
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
-  
-out_putf:
-	fput(filp);
-out:
-	return error;
-}
-
-/* Apply the lock described by l to an open file descriptor.
- * This implements both the F_SETLK and F_SETLKW commands of fcntl().
- */
-int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
-{
-	struct file *filp;
-	struct file_lock file_lock;
-	struct flock flock;
-	struct inode *inode;
-	int error;
-
-	/*
-	 * This might block, so we do it before checking the inode.
-	 */
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
-		goto out;
-
-	/* Get arguments and validate them ...
-	 */
-
-	error = -EBADF;
-	filp = fget(fd);
-	if (!filp)
-		goto out;
 
-	error = -EINVAL;
-	inode = filp->f_dentry->d_inode;
-
-	/* Don't allow mandatory locks on files that may be memory mapped
-	 * and shared.
-	 */
-	if (IS_MANDLOCK(inode) &&
-	    (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
-		struct vm_area_struct *vma;
-		struct address_space *mapping = inode->i_mapping;
-		spin_lock(&mapping->i_shared_lock);
-		for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) {
-			if (!(vma->vm_flags & VM_MAYSHARE))
-				continue;
-			spin_unlock(&mapping->i_shared_lock);
-			error = -EAGAIN;
-			goto out_putf;
-		}
-		spin_unlock(&mapping->i_shared_lock);
-	}
+	if (waitqueue_active(&fl->fl_wait))
+		panic("Attempting to free lock with active wait queue");
 
-	error = -EINVAL;
-	if (!posix_make_lock(filp, &file_lock, &flock))
-		goto out_putf;
-	
-	error = -EBADF;
-	switch (flock.l_type) {
-	case F_RDLCK:
-		if (!(filp->f_mode & FMODE_READ))
-			goto out_putf;
-		break;
-	case F_WRLCK:
-		if (!(filp->f_mode & FMODE_WRITE))
-			goto out_putf;
-		break;
-	case F_UNLCK:
-		break;
-	case F_SHLCK:
-	case F_EXLCK:
-#ifdef __sparc__
-/* warn a bit for now, but don't overdo it */
-{
-	static int count = 0;
-	if (!count) {
-		count=1;
-		printk(KERN_WARNING
-		       "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n",
-		       current->pid, current->comm);
-	}
-}
-		if (!(filp->f_mode & 3))
-			goto out_putf;
-		break;
-#endif
-	default:
-		error = -EINVAL;
-		goto out_putf;
-	}
+	if (!list_empty(&fl->fl_block))
+		panic("Attempting to free lock with active block list");
 
-	if (filp->f_op->lock != NULL) {
-		error = filp->f_op->lock(filp, cmd, &file_lock);
-		if (error < 0)
-			goto out_putf;
-	}
-	error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW);
+	if (!list_empty(&fl->fl_link))
+		panic("Attempting to free lock on active lock list");
 
-out_putf:
-	fput(filp);
-out:
-	return error;
+	kmem_cache_free(filelock_cache, fl);
 }
 
 /*
- * This function is called when the file is being removed
- * from the task's fd array.
+ * Initialises the fields of the file lock which are invariant for
+ * free file_locks.
  */
-void locks_remove_posix(struct file *filp, fl_owner_t owner)
+static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
 {
-	struct inode * inode = filp->f_dentry->d_inode;
-	struct file_lock file_lock, *fl;
-	struct file_lock **before;
+	struct file_lock *lock = (struct file_lock *) foo;
 
-	/*
-	 * For POSIX locks we free all locks on this file for the given task.
-	 */
-repeat:
-	before = &inode->i_flock;
-	while ((fl = *before) != NULL) {
-		if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) {
-			int (*lock)(struct file *, int, struct file_lock *);
-			lock = filp->f_op->lock;
-			if (lock) {
-				file_lock = *fl;
-				file_lock.fl_type = F_UNLCK;
-			}
-			locks_delete_lock(before, 0);
-			if (lock) {
-				lock(filp, F_SETLK, &file_lock);
-				/* List may have changed: */
-				goto repeat;
-			}
-			continue;
-		}
-		before = &fl->fl_next;
-	}
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) !=
+					SLAB_CTOR_CONSTRUCTOR)
+		return;
+
+	lock->fl_next = NULL;
+	INIT_LIST_HEAD(&lock->fl_link);
+	INIT_LIST_HEAD(&lock->fl_block);
+	init_waitqueue_head(&lock->fl_wait);
 }
 
 /*
- * This function is called on the last close of an open file.
+ * Initialize a new lock from an existing file_lock structure.
  */
-void locks_remove_flock(struct file *filp)
-{
-	struct inode * inode = filp->f_dentry->d_inode; 
-	struct file_lock file_lock, *fl;
-	struct file_lock **before;
-
-repeat:
-	before = &inode->i_flock;
-	while ((fl = *before) != NULL) {
-		if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) {
-			int (*lock)(struct file *, int, struct file_lock *);
-			lock = NULL;
-			if (filp->f_op)
-				lock = filp->f_op->lock;
-			if (lock) {
-				file_lock = *fl;
-				file_lock.fl_type = F_UNLCK;
-			}
-			locks_delete_lock(before, 0);
-			if (lock) {
-				lock(filp, F_SETLK, &file_lock);
-				/* List may have changed: */
-				goto repeat;
-			}
-			continue;
-		}
-		before = &fl->fl_next;
-	}
-}
-
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
-{
-	struct file_lock *cfl;
-
-	for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
-		if (!(cfl->fl_flags & FL_POSIX))
-			continue;
-		if (posix_locks_conflict(cfl, fl))
-			break;
-	}
-
-	return (cfl);
-}
-
-int locks_mandatory_locked(struct inode *inode)
-{
-	fl_owner_t owner = current->files;
-	struct file_lock *fl;
-
-	/*
-	 * Search the lock list for this inode for any POSIX locks.
-	 */
-	lock_kernel();
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_POSIX))
-			continue;
-		if (fl->fl_owner != owner)
-			break;
-	}
-	unlock_kernel();
-	return fl ? -EAGAIN : 0;
-}
-
-int locks_mandatory_area(int read_write, struct inode *inode,
-			 struct file *filp, loff_t offset,
-			 size_t count)
-{
-	struct file_lock *fl;
-	struct file_lock tfl;
-	int error;
-
-	memset(&tfl, 0, sizeof(tfl));
-
-	tfl.fl_file = filp;
-	tfl.fl_flags = FL_POSIX | FL_ACCESS;
-	tfl.fl_owner = current->files;
-	tfl.fl_pid = current->pid;
-	init_waitqueue_head(&tfl.fl_wait);
-	tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
-	tfl.fl_start = offset;
-	tfl.fl_end = offset + count - 1;
-
-	error = 0;
-	lock_kernel();
-
-repeat:
-	/* Search the lock list for this inode for locks that conflict with
-	 * the proposed read/write.
-	 */
-	for (fl = inode->i_flock; ; fl = fl->fl_next) {
-		error = 0;
-		if (!fl)
-			break;
-		if (!(fl->fl_flags & FL_POSIX))
-			continue;
-		/* Block for writes against a "read" lock,
-		 * and both reads and writes against a "write" lock.
-		 */
-		if (posix_locks_conflict(&tfl, fl)) {
-			error = -EAGAIN;
-			if (filp && (filp->f_flags & O_NONBLOCK))
-				break;
-			error = -ERESTARTSYS;
-			if (signal_pending(current))
-				break;
-			error = -EDEADLK;
-			if (posix_locks_deadlock(&tfl, fl))
-				break;
+static void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	new->fl_owner = fl->fl_owner;
+	new->fl_pid = fl->fl_pid;
+	new->fl_file = fl->fl_file;
+	new->fl_flags = fl->fl_flags;
+	new->fl_type = fl->fl_type;
+	new->fl_start = fl->fl_start;
+	new->fl_end = fl->fl_end;
+	new->fl_notify = fl->fl_notify;
+	new->fl_insert = fl->fl_insert;
+	new->fl_remove = fl->fl_remove;
+	new->fl_u = fl->fl_u;
+}
 
-			locks_insert_block(fl, &tfl);
-			interruptible_sleep_on(&tfl.fl_wait);
-			locks_delete_block(fl, &tfl);
+/* Fill in a file_lock structure with an appropriate FLOCK lock. */
+static struct file_lock *flock_make_lock(struct file *filp, unsigned int type)
+{
+	struct file_lock *fl = locks_alloc_lock();
+	if (fl == NULL)
+		return NULL;
 
-			/*
-			 * If we've been sleeping someone might have
-			 * changed the permissions behind our back.
-			 */
-			if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID)
-				break;
-			goto repeat;
-		}
-	}
-	unlock_kernel();
-	return error;
+	fl->fl_owner = NULL;
+	fl->fl_file = filp;
+	fl->fl_pid = current->pid;
+	fl->fl_flags = FL_FLOCK;
+	fl->fl_type = type;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_notify = NULL;
+	fl->fl_insert = NULL;
+	fl->fl_remove = NULL;
+	
+	return fl;
 }
 
 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
@@ -642,21 +210,6 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl,
 {
 	loff_t start;
 
-	memset(fl, 0, sizeof(*fl));
-	
-	init_waitqueue_head(&fl->fl_wait);
-	fl->fl_flags = FL_POSIX;
-
-	switch (l->l_type) {
-	case F_RDLCK:
-	case F_WRLCK:
-	case F_UNLCK:
-		fl->fl_type = l->l_type;
-		break;
-	default:
-		return (0);
-	}
-
 	switch (l->l_whence) {
 	case 0: /*SEEK_SET*/
 		start = 0;
@@ -680,44 +233,168 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl,
 	if (l->l_len == 0)
 		fl->fl_end = OFFSET_MAX;
 	
-	fl->fl_file = filp;
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->pid;
+	fl->fl_file = filp;
+	fl->fl_flags = FL_POSIX;
+	fl->fl_notify = NULL;
+	fl->fl_insert = NULL;
+	fl->fl_remove = NULL;
+
+	switch (l->l_type) {
+	case F_RDLCK:
+	case F_WRLCK:
+	case F_UNLCK:
+		fl->fl_type = l->l_type;
+		break;
+	default:
+		return (0);
+	}
 
 	return (1);
 }
 
-/* Verify a call to flock() and fill in a file_lock structure with
- * an appropriate FLOCK lock.
+/* Check if two locks overlap each other.
  */
-static int flock_make_lock(struct file *filp, struct file_lock *fl,
-			   unsigned int cmd)
+static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 {
-	memset(fl, 0, sizeof(*fl));
+	return ((fl1->fl_end >= fl2->fl_start) &&
+		(fl2->fl_end >= fl1->fl_start));
+}
 
-	init_waitqueue_head(&fl->fl_wait);
+/*
+ * Check whether two locks have the same owner
+ * N.B. Do we need the test on PID as well as owner?
+ * (Clone tasks should be considered as one "owner".)
+ */
+static inline int
+locks_same_owner(struct file_lock *fl1, struct file_lock *fl2)
+{
+	return (fl1->fl_owner == fl2->fl_owner) &&
+	       (fl1->fl_pid   == fl2->fl_pid);
+}
 
-	switch (cmd & ~LOCK_NB) {
-	case LOCK_SH:
-		fl->fl_type = F_RDLCK;
-		break;
-	case LOCK_EX:
-		fl->fl_type = F_WRLCK;
-		break;
-	case LOCK_UN:
+/* Remove waiter from blocker's block list.
+ * When blocker ends up pointing to itself then the list is empty.
+ */
+static void locks_delete_block(struct file_lock *waiter)
+{
+	list_del(&waiter->fl_block);
+	INIT_LIST_HEAD(&waiter->fl_block);
+	list_del(&waiter->fl_link);
+	INIT_LIST_HEAD(&waiter->fl_link);
+}
+
+/* Insert waiter into blocker's block list.
+ * We use a circular list so that processes can be easily woken up in
+ * the order they blocked. The documentation doesn't require this but
+ * it seems like the reasonable thing to do.
+ */
+static void locks_insert_block(struct file_lock *blocker, 
+			       struct file_lock *waiter)
+{
+	if (!list_empty(&waiter->fl_block)) {
+		printk(KERN_ERR "locks_insert_block: removing duplicated lock "
+			"(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid,
+			waiter->fl_start, waiter->fl_end, waiter->fl_type);
+		locks_delete_block(waiter);
+	}
+	list_add_tail(&waiter->fl_block, &blocker->fl_block);
+//	list_add(&waiter->fl_link, &blocked_list);
+//	waiter->fl_next = blocker;
+}
+
+/* Wake up processes blocked waiting for blocker.
+ * If told to wait then schedule the processes until the block list
+ * is empty, otherwise empty the block list ourselves.
+ */
+static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait)
+{
+	while (!list_empty(&blocker->fl_block)) {
+		struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block);
+		/* N.B. Is it possible for the notify function to block?? */
+		if (waiter->fl_notify)
+			waiter->fl_notify(waiter);
+		wake_up(&waiter->fl_wait);
+		if (wait) {
+			/* Let the blocked process remove waiter from the
+			 * block list when it gets scheduled.
+			 */
+			current->policy |= SCHED_YIELD;
+			schedule();
+		} else {
+			/* Remove waiter from the block list, because by the
+			 * time it wakes up blocker won't exist any more.
+			 */
+			locks_delete_block(waiter);
+		}
+	}
+}
+
+/* Insert file lock fl into an inode's lock list at the position indicated
+ * by pos. At the same time add the lock to the global file lock list.
+ */
+static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
+{
+	list_add(&fl->fl_link, &file_lock_list);
+
+	/* insert into file's list */
+	fl->fl_next = *pos;
+	*pos = fl;
+
+	if (fl->fl_insert)
+		fl->fl_insert(fl);
+}
+
+/* Delete a lock and free it.
+ * First remove our lock from the active lock lists. Then call
+ * locks_wake_up_blocks() to wake up processes that are blocked
+ * waiting for this lock. Finally free the lock structure.
+ */
+static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
+{
+	int (*lock)(struct file *, int, struct file_lock *);
+	struct file_lock *fl = *thisfl_p;
+
+	*thisfl_p = fl->fl_next;
+	fl->fl_next = NULL;
+
+	list_del(&fl->fl_link);
+	INIT_LIST_HEAD(&fl->fl_link);
+
+	if (fl->fl_remove)
+		fl->fl_remove(fl);
+
+	locks_wake_up_blocks(fl, wait);
+	lock = fl->fl_file->f_op->lock;
+	if (lock) {
 		fl->fl_type = F_UNLCK;
-		break;
-	default:
-		return (0);
+		lock(fl->fl_file, F_SETLK, fl);
 	}
+	locks_free_lock(fl);
+}
 
-	fl->fl_flags = FL_FLOCK;
-	fl->fl_start = 0;
-	fl->fl_end = OFFSET_MAX;
-	fl->fl_file = filp;
-	fl->fl_owner = NULL;
-	
-	return (1);
+/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
+ * checks for overlapping locks and shared/exclusive status.
+ */
+static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+	if (!locks_overlap(caller_fl, sys_fl))
+		return (0);
+
+	switch (caller_fl->fl_type) {
+	case F_RDLCK:
+		return (sys_fl->fl_type == F_WRLCK);
+		
+	case F_WRLCK:
+		return (1);
+
+	default:
+		printk("locks_conflict(): impossible lock type - %d\n",
+		       caller_fl->fl_type);
+		break;
+	}
+	return (0);	/* This should never happen */
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
@@ -750,27 +427,19 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
 	return (locks_conflict(caller_fl, sys_fl));
 }
 
-/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
- * checks for overlapping locks and shared/exclusive status.
- */
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+struct file_lock *
+posix_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (!locks_overlap(caller_fl, sys_fl))
-		return (0);
-
-	switch (caller_fl->fl_type) {
-	case F_RDLCK:
-		return (sys_fl->fl_type == F_WRLCK);
-		
-	case F_WRLCK:
-		return (1);
+	struct file_lock *cfl;
 
-	default:
-		printk("locks_conflict(): impossible lock type - %d\n",
-		       caller_fl->fl_type);
-		break;
+	for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
+		if (!(cfl->fl_flags & FL_POSIX))
+			continue;
+		if (posix_locks_conflict(cfl, fl))
+			break;
 	}
-	return (0);	/* This should never happen */
+
+	return (cfl);
 }
 
 /* This function tests for deadlock condition before putting a process to
@@ -790,8 +459,7 @@ static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 static int posix_locks_deadlock(struct file_lock *caller_fl,
 				struct file_lock *block_fl)
 {
-	struct file_lock *fl;
-	struct file_lock *bfl;
+	struct list_head *tmp;
 	void		 *caller_owner, *blocked_owner;
 	unsigned int	 caller_pid, blocked_pid;
 
@@ -802,11 +470,14 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
 
 next_task:
 	if (caller_owner == blocked_owner && caller_pid == blocked_pid)
-		return (1);
-	for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) {
-		if (fl->fl_owner == NULL || fl->fl_nextblock == NULL)
+		return 1;
+	list_for_each(tmp, &file_lock_list) {
+		struct list_head *btmp;
+		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+		if (fl->fl_owner == NULL || list_empty(&fl->fl_block))
 			continue;
-		for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) {
+		list_for_each(btmp, &fl->fl_block) {
+			struct file_lock *bfl = list_entry(tmp, struct file_lock, fl_block);
 			if (bfl->fl_owner == blocked_owner &&
 			    bfl->fl_pid == blocked_pid) {
 				if (fl->fl_owner == caller_owner &&
@@ -819,14 +490,94 @@ next_task:
 			}
 		}
 	}
-	return (0);
+	return 0;
+}
+
+int locks_mandatory_locked(struct inode *inode)
+{
+	fl_owner_t owner = current->files;
+	struct file_lock *fl;
+
+	/*
+	 * Search the lock list for this inode for any POSIX locks.
+	 */
+	lock_kernel();
+	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		if (fl->fl_owner != owner)
+			break;
+	}
+	unlock_kernel();
+	return fl ? -EAGAIN : 0;
+}
+
+int locks_mandatory_area(int read_write, struct inode *inode,
+			 struct file *filp, loff_t offset,
+			 size_t count)
+{
+	struct file_lock *fl;
+	struct file_lock *new_fl = locks_alloc_lock();
+	int error;
+
+	new_fl->fl_owner = current->files;
+	new_fl->fl_pid = current->pid;
+	new_fl->fl_file = filp;
+	new_fl->fl_flags = FL_POSIX | FL_ACCESS;
+	new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
+	new_fl->fl_start = offset;
+	new_fl->fl_end = offset + count - 1;
+
+	error = 0;
+	lock_kernel();
+
+repeat:
+	/* Search the lock list for this inode for locks that conflict with
+	 * the proposed read/write.
+	 */
+	for (fl = inode->i_flock; ; fl = fl->fl_next) {
+		error = 0;
+		if (!fl)
+			break;
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		/* Block for writes against a "read" lock,
+		 * and both reads and writes against a "write" lock.
+		 */
+		if (posix_locks_conflict(new_fl, fl)) {
+			error = -EAGAIN;
+			if (filp && (filp->f_flags & O_NONBLOCK))
+				break;
+			error = -ERESTARTSYS;
+			if (signal_pending(current))
+				break;
+			error = -EDEADLK;
+			if (posix_locks_deadlock(new_fl, fl))
+				break;
+
+			locks_insert_block(fl, new_fl);
+			interruptible_sleep_on(&new_fl->fl_wait);
+			locks_delete_block(new_fl);
+
+			/*
+			 * If we've been sleeping someone might have
+			 * changed the permissions behind our back.
+			 */
+			if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID)
+				break;
+			goto repeat;
+		}
+	}
+	unlock_kernel();
+	locks_free_lock(new_fl);
+	return error;
 }
 
 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at
  * the head of the list, but that's secret knowledge known only to the next
  * two functions.
  */
-static int flock_lock_file(struct file *filp, struct file_lock *caller,
+static int flock_lock_file(struct file *filp, unsigned int lock_type,
 			   unsigned int wait)
 {
 	struct file_lock *fl;
@@ -834,14 +585,14 @@ static int flock_lock_file(struct file *filp, struct file_lock *caller,
 	struct file_lock **before;
 	struct inode * inode = filp->f_dentry->d_inode;
 	int error, change;
-	int unlock = (caller->fl_type == F_UNLCK);
+	int unlock = (lock_type == F_UNLCK);
 
 	/*
 	 * If we need a new lock, get it in advance to avoid races.
 	 */
 	if (!unlock) {
 		error = -ENOLCK;
-		new_fl = locks_alloc_lock(caller);
+		new_fl = flock_make_lock(filp, lock_type);
 		if (!new_fl)
 			goto out;
 	}
@@ -851,8 +602,8 @@ search:
 	change = 0;
 	before = &inode->i_flock;
 	while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) {
-		if (caller->fl_file == fl->fl_file) {
-			if (caller->fl_type == fl->fl_type)
+		if (filp == fl->fl_file) {
+			if (lock_type == fl->fl_type)
 				goto out;
 			change = 1;
 			break;
@@ -888,7 +639,7 @@ repeat:
 			goto out;
 		locks_insert_block(fl, new_fl);
 		interruptible_sleep_on(&new_fl->fl_wait);
-		locks_delete_block(fl, new_fl);
+		locks_delete_block(new_fl);
 		goto repeat;
 	}
 	locks_insert_lock(&inode->i_flock, new_fl);
@@ -928,8 +679,8 @@ int posix_lock_file(struct file *filp, struct file_lock *caller,
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
 	 */
-	new_fl  = locks_empty_lock();
-	new_fl2 = locks_empty_lock();
+	new_fl  = locks_alloc_lock();
+	new_fl2 = locks_alloc_lock();
 	error = -ENOLCK; /* "no luck" */
 	if (!(new_fl && new_fl2))
 		goto out;
@@ -952,7 +703,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller,
 				goto out;
 			locks_insert_block(fl, caller);
 			interruptible_sleep_on(&caller->fl_wait);
-			locks_delete_block(fl, caller);
+			locks_delete_block(caller);
 			goto repeat;
   		}
   	}
@@ -1058,7 +809,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller,
 	if (!added) {
 		if (caller->fl_type == F_UNLCK)
 			goto out;
-		locks_init_lock(new_fl, caller);
+		locks_copy_lock(new_fl, caller);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
 	}
@@ -1068,8 +819,9 @@ int posix_lock_file(struct file *filp, struct file_lock *caller,
 			 * so we have to use the second new lock (in this
 			 * case, even F_UNLCK may fail!).
 			 */
-			left = locks_init_lock(new_fl2, right);
+			locks_copy_lock(new_fl2, right);
 			locks_insert_lock(before, left);
+			left = new_fl2;
 			new_fl2 = NULL;
 		}
 		right->fl_start = caller->fl_end + 1;
@@ -1081,101 +833,288 @@ int posix_lock_file(struct file *filp, struct file_lock *caller,
 	}
 out:
 	/*
-	 * Free any unused locks.  (They haven't
-	 * ever been used, so we use kfree().)
+	 * Free any unused locks.
 	 */
 	if (new_fl)
-		kfree(new_fl);
+		locks_free_lock(new_fl);
 	if (new_fl2)
-		kfree(new_fl2);
+		locks_free_lock(new_fl2);
 	return error;
 }
 
-/*
- * Allocate an empty lock structure. We can use GFP_KERNEL now that
- * all allocations are done in advance.
+static inline int flock_translate_cmd(int cmd) {
+	switch (cmd &~ LOCK_NB) {
+	case LOCK_SH:
+		return F_RDLCK;
+	case LOCK_EX:
+		return F_WRLCK;
+	case LOCK_UN:
+		return F_UNLCK;
+	}
+	return -EINVAL;
+}
+
+/* flock() system call entry point. Apply a FL_FLOCK style lock to
+ * an open file descriptor.
  */
-static struct file_lock *locks_empty_lock(void)
+asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
 {
-	/* Okay, let's make a new file_lock structure... */
-	return ((struct file_lock *) kmalloc(sizeof(struct file_lock),
-						GFP_KERNEL));
+	struct file *filp;
+	int error, type;
+
+	error = -EBADF;
+	filp = fget(fd);
+	if (!filp)
+		goto out;
+
+	error = flock_translate_cmd(cmd);
+	if (error < 0)
+		goto out_putf;
+	type = error;
+
+	error = -EBADF;
+	if ((type != F_UNLCK) && !(filp->f_mode & 3))
+		goto out_putf;
+
+	lock_kernel();
+	error = flock_lock_file(filp, type,
+				(cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1);
+	unlock_kernel();
+
+out_putf:
+	fput(filp);
+out:
+	return error;
 }
 
-/*
- * Initialize a new lock from an existing file_lock structure.
+/* Report the first existing lock that would conflict with l.
+ * This implements the F_GETLK command of fcntl().
  */
-static struct file_lock *locks_init_lock(struct file_lock *new,
-					 struct file_lock *fl)
+int fcntl_getlk(unsigned int fd, struct flock *l)
 {
-	if (new) {
-		memset(new, 0, sizeof(*new));
-		new->fl_owner = fl->fl_owner;
-		new->fl_pid = fl->fl_pid;
-		init_waitqueue_head(&new->fl_wait);
-		new->fl_file = fl->fl_file;
-		new->fl_flags = fl->fl_flags;
-		new->fl_type = fl->fl_type;
-		new->fl_start = fl->fl_start;
-		new->fl_end = fl->fl_end;
-		new->fl_notify = fl->fl_notify;
-		new->fl_insert = fl->fl_insert;
-		new->fl_remove = fl->fl_remove;
-		new->fl_u = fl->fl_u;
+	struct file *filp;
+	struct file_lock *fl, *file_lock = locks_alloc_lock();
+	struct flock flock;
+	int error;
+
+	error = -EFAULT;
+	if (copy_from_user(&flock, l, sizeof(flock)))
+		goto out;
+	error = -EINVAL;
+	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+		goto out;
+
+	error = -EBADF;
+	filp = fget(fd);
+	if (!filp)
+		goto out;
+
+	if (!posix_make_lock(filp, file_lock, &flock))
+		goto out_putf;
+
+	if (filp->f_op->lock) {
+		error = filp->f_op->lock(filp, F_GETLK, file_lock);
+		if (error < 0)
+			goto out_putf;
+		else if (error == LOCK_USE_CLNT)
+		  /* Bypass for NFS with no locking - 2.0.36 compat */
+		  fl = posix_test_lock(filp, file_lock);
+		else
+		  fl = (file_lock->fl_type == F_UNLCK ? NULL : file_lock);
+	} else {
+		fl = posix_test_lock(filp, file_lock);
+	}
+ 
+	flock.l_type = F_UNLCK;
+	if (fl != NULL) {
+		flock.l_pid = fl->fl_pid;
+		flock.l_start = fl->fl_start;
+		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+			fl->fl_end - fl->fl_start + 1;
+		flock.l_whence = 0;
+		flock.l_type = fl->fl_type;
 	}
-	return new;
+	error = -EFAULT;
+	if (!copy_to_user(l, &flock, sizeof(flock)))
+		error = 0;
+  
+out_putf:
+	fput(filp);
+out:
+	locks_free_lock(file_lock);
+	return error;
 }
 
-/* Insert file lock fl into an inode's lock list at the position indicated
- * by pos. At the same time add the lock to the global file lock list.
+/* Apply the lock described by l to an open file descriptor.
+ * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
-static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
+int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
 {
-	fl->fl_nextlink = file_lock_table;
-	fl->fl_prevlink = NULL;
-	if (file_lock_table != NULL)
-		file_lock_table->fl_prevlink = fl;
-	file_lock_table = fl;
-	fl->fl_next = *pos;	/* insert into file's list */
-	*pos = fl;
+	struct file *filp;
+	struct file_lock *file_lock = locks_alloc_lock();
+	struct flock flock;
+	struct inode *inode;
+	int error;
 
-	if (fl->fl_insert)
-		fl->fl_insert(fl);
+	/*
+	 * This might block, so we do it before checking the inode.
+	 */
+	error = -EFAULT;
+	if (copy_from_user(&flock, l, sizeof(flock)))
+		goto out;
 
-	return;
+	/* Get arguments and validate them ...
+	 */
+
+	error = -EBADF;
+	filp = fget(fd);
+	if (!filp)
+		goto out;
+
+	error = -EINVAL;
+	inode = filp->f_dentry->d_inode;
+
+	/* Don't allow mandatory locks on files that may be memory mapped
+	 * and shared.
+	 */
+	if (IS_MANDLOCK(inode) &&
+	    (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
+		struct vm_area_struct *vma;
+		struct address_space *mapping = inode->i_mapping;
+		spin_lock(&mapping->i_shared_lock);
+		for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) {
+			if (!(vma->vm_flags & VM_MAYSHARE))
+				continue;
+			spin_unlock(&mapping->i_shared_lock);
+			error = -EAGAIN;
+			goto out_putf;
+		}
+		spin_unlock(&mapping->i_shared_lock);
+	}
+
+	error = -EINVAL;
+	if (!posix_make_lock(filp, file_lock, &flock))
+		goto out_putf;
+	
+	error = -EBADF;
+	switch (flock.l_type) {
+	case F_RDLCK:
+		if (!(filp->f_mode & FMODE_READ))
+			goto out_putf;
+		break;
+	case F_WRLCK:
+		if (!(filp->f_mode & FMODE_WRITE))
+			goto out_putf;
+		break;
+	case F_UNLCK:
+		break;
+	case F_SHLCK:
+	case F_EXLCK:
+#ifdef __sparc__
+/* warn a bit for now, but don't overdo it */
+{
+	static int count = 0;
+	if (!count) {
+		count=1;
+		printk(KERN_WARNING
+		       "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n",
+		       current->pid, current->comm);
+	}
+}
+		if (!(filp->f_mode & 3))
+			goto out_putf;
+		break;
+#endif
+	default:
+		error = -EINVAL;
+		goto out_putf;
+	}
+
+	if (filp->f_op->lock != NULL) {
+		error = filp->f_op->lock(filp, cmd, file_lock);
+		if (error < 0)
+			goto out_putf;
+	}
+	error = posix_lock_file(filp, file_lock, cmd == F_SETLKW);
+
+out_putf:
+	fput(filp);
+out:
+	locks_free_lock(file_lock);
+	return error;
 }
 
-/* Delete a lock and free it.
- * First remove our lock from the active lock lists. Then call
- * locks_wake_up_blocks() to wake up processes that are blocked
- * waiting for this lock. Finally free the lock structure.
+/*
+ * This function is called when the file is being removed
+ * from the task's fd array.
  */
-static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
+void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
-	struct file_lock *thisfl;
-	struct file_lock *prevfl;
-	struct file_lock *nextfl;
-	
-	thisfl = *thisfl_p;
-	*thisfl_p = thisfl->fl_next;
+	struct inode * inode = filp->f_dentry->d_inode;
+	struct file_lock *fl;
+	struct file_lock **before;
 
-	prevfl = thisfl->fl_prevlink;
-	nextfl = thisfl->fl_nextlink;
+	/*
+	 * For POSIX locks we free all locks on this file for the given task.
+	 */
+repeat:
+	before = &inode->i_flock;
+	while ((fl = *before) != NULL) {
+		if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) {
+			locks_delete_lock(before, 0);
+			goto repeat;
+		}
+		before = &fl->fl_next;
+	}
+}
 
-	if (nextfl != NULL)
-		nextfl->fl_prevlink = prevfl;
+/*
+ * This function is called on the last close of an open file.
+ */
+void locks_remove_flock(struct file *filp)
+{
+	struct inode * inode = filp->f_dentry->d_inode; 
+	struct file_lock file_lock, *fl;
+	struct file_lock **before;
 
-	if (prevfl != NULL)
-		prevfl->fl_nextlink = nextfl;
-	else
-		file_lock_table = nextfl;
+repeat:
+	before = &inode->i_flock;
+	while ((fl = *before) != NULL) {
+		if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) {
+			int (*lock)(struct file *, int, struct file_lock *);
+			lock = NULL;
+			if (filp->f_op)
+				lock = filp->f_op->lock;
+			if (lock) {
+				file_lock = *fl;
+				file_lock.fl_type = F_UNLCK;
+			}
+			locks_delete_lock(before, 0);
+			if (lock) {
+				lock(filp, F_SETLK, &file_lock);
+				/* List may have changed: */
+				goto repeat;
+			}
+			continue;
+		}
+		before = &fl->fl_next;
+	}
+}
 
-	if (thisfl->fl_remove)
-		thisfl->fl_remove(thisfl);
-	
-	locks_wake_up_blocks(thisfl, wait);
-	locks_free_lock(thisfl);
+/* The following two are for the benefit of lockd.
+ */
+void
+posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
+{
+	lock_kernel();
+	locks_insert_block(blocker, waiter);
+	unlock_kernel();
+}
 
+void
+posix_unblock_lock(struct file_lock *waiter)
+{
+	locks_delete_block(waiter);
 	return;
 }
 
@@ -1202,8 +1141,8 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 		     kdevname(inode->i_dev), inode->i_ino,
 		     (long long)fl->fl_start, (long long)fl->fl_end);
 	sprintf(out, "%08lx %08lx %08lx %08lx %08lx\n",
-		(long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink,
-		(long)fl->fl_next, (long)fl->fl_nextblock);
+		(long)fl, (long)fl->fl_link.prev, (long)fl->fl_link.next,
+		(long)fl->fl_next, (long)fl->fl_block.next);
 }
 
 static void move_lock_status(char **p, off_t* pos, off_t offset)
@@ -1230,35 +1169,46 @@ static void move_lock_status(char **p, off_t* pos, off_t offset)
 
 int get_locks_status(char *buffer, char **start, off_t offset, int length)
 {
-	struct file_lock *fl;
-	struct file_lock *bfl;
+	struct list_head *tmp;
 	char *q = buffer;
 	off_t pos = 0;
-	int i;
+	int i = 0;
 
-	for (fl = file_lock_table, i = 1; fl != NULL; fl = fl->fl_nextlink, i++) {
-		lock_get_status(q, fl, i, "");
+	lock_kernel();
+	list_for_each(tmp, &file_lock_list) {
+		struct list_head *btmp;
+		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+		lock_get_status(q, fl, ++i, "");
 		move_lock_status(&q, &pos, offset);
 
 		if(pos >= offset+length)
 			goto done;
 
-		if ((bfl = fl->fl_nextblock) == NULL)
-			continue;
-		do {
+		list_for_each(btmp, &fl->fl_block) {
+			struct file_lock *bfl = list_entry(btmp,
+					struct file_lock, fl_block);
 			lock_get_status(q, bfl, i, " ->");
 			move_lock_status(&q, &pos, offset);
 
 			if(pos >= offset+length)
 				goto done;
-		} while ((bfl = bfl->fl_nextblock) != fl);
+		}
 	}
 done:
+	unlock_kernel();
 	*start = buffer;
 	if(q-buffer < length)
 		return (q-buffer);
 	return length;
 }
 
+static int __init filelock_init(void)
+{
+	filelock_cache = kmem_cache_create("file lock cache",
+			sizeof(struct file_lock), 0, 0, init_once, NULL);
+	if (!filelock_cache)
+		panic("cannot create file lock slab cache");
+	return 0;
+}
 
-
+module_init(filelock_init)
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 075574876..ca30b7753 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -276,16 +276,13 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
 	mark_inode_dirty(inode);
 
 	unlock_super(sb);
-printk("m_n_i: allocated inode ");
 	if(DQUOT_ALLOC_INODE(sb, inode)) {
-printk("fails quota test\n");
 		sb->dq_op->drop(inode);
 		inode->i_nlink = 0;
 		iput(inode);
 		*error = -EDQUOT;
 		return NULL;
 	}
-printk("is within quota\n");
 
 	*error = 0;
 	return inode;
diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c
index 30794d27a..96e1ffa86 100644
--- a/fs/minix/fsync.c
+++ b/fs/minix/fsync.c
@@ -329,7 +329,7 @@ static int V2_minix_sync_file(struct inode * inode, struct file * file)
  *	NULL
  */
  
-int minix_sync_file(struct file * file, struct dentry *dentry)
+int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
 	
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 6ddc278aa..fac903800 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -28,7 +28,7 @@
 #include <linux/minix_fs.h>
 
 static void minix_read_inode(struct inode * inode);
-static void minix_write_inode(struct inode * inode);
+static void minix_write_inode(struct inode * inode, int wait);
 static int minix_statfs(struct super_block *sb, struct statfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
@@ -1232,7 +1232,7 @@ static struct buffer_head *minix_update_inode(struct inode *inode)
 		return V2_minix_update_inode(inode);
 }
 
-static void minix_write_inode(struct inode * inode)
+static void minix_write_inode(struct inode * inode, int wait)
 {
 	struct buffer_head *bh;
 
diff --git a/fs/namei.c b/fs/namei.c
index 501000381..fcda2fd61 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -89,6 +89,12 @@
  *	if the pathname has trailing slashes - follow.
  *	otherwise - don't follow.
  * (applied in that order).
+ *
+ * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
+ * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
+ * During the 2.4 we need to fix the userland stuff depending on it -
+ * hopefully we will be able to get rid of that wart in 2.5. So far only
+ * XEmacs seems to be relying on it...
  */
 
 /* In order to reduce some races, while at the same time doing additional
@@ -191,21 +197,35 @@ int permission(struct inode * inode,int mask)
  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
  * > 0: (i_writecount) users are writing to the file.
  *
- * WARNING: as soon as we will move get_write_access(), do_mmap() or
- * prepare_binfmt() out of the big lock we will need a spinlock protecting
- * the checks in all 3. For the time being it is not needed.
+ * Normally we operate on that counter with atomic_{inc,dec} and it's safe
+ * except for the cases where we don't hold i_writecount yet. Then we need to
+ * use {get,deny}_write_access() - these functions check the sign and refuse
+ * to do the change if sign is wrong. Exclusion between them is provided by
+ * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
+ * who will try to move it in struct inode - just leave it here.
  */
+static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
 int get_write_access(struct inode * inode)
 {
-	if (atomic_read(&inode->i_writecount) < 0)
+	spin_lock(&arbitration_lock);
+	if (atomic_read(&inode->i_writecount) < 0) {
+		spin_unlock(&arbitration_lock);
 		return -ETXTBSY;
+	}
 	atomic_inc(&inode->i_writecount);
+	spin_unlock(&arbitration_lock);
 	return 0;
 }
-
-void put_write_access(struct inode * inode)
+int deny_write_access(struct file * file)
 {
-	atomic_dec(&inode->i_writecount);
+	spin_lock(&arbitration_lock);
+	if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
+		spin_unlock(&arbitration_lock);
+		return -ETXTBSY;
+	}
+	atomic_dec(&file->f_dentry->d_inode->i_writecount);
+	spin_unlock(&arbitration_lock);
+	return 0;
 }
 
 void path_release(struct nameidata *nd)
@@ -337,7 +357,34 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 {
 	return __follow_down(mnt,dentry);
 }
-
+ 
+static inline void follow_dotdot(struct nameidata *nd)
+{
+	while(1) {
+		struct vfsmount *parent;
+		struct dentry *dentry;
+		if (nd->dentry == current->fs->root &&
+		    nd->mnt == current->fs->rootmnt)  {
+			break;
+		}
+		if (nd->dentry != nd->mnt->mnt_root) {
+			dentry = dget(nd->dentry->d_parent);
+			dput(nd->dentry);
+			nd->dentry = dentry;
+			break;
+		}
+		parent=nd->mnt->mnt_parent;
+		if (parent == nd->mnt) {
+			break;
+		}
+		mntget(parent);
+		dentry=dget(nd->mnt->mnt_mountpoint);
+		dput(nd->dentry);
+		nd->dentry = dentry;
+		mntput(nd->mnt);
+		nd->mnt = parent;
+	}
+}
 /*
  * Name resolution.
  *
@@ -403,19 +450,7 @@ int path_walk(const char * name, struct nameidata *nd)
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				while (1) {
-					if (nd->dentry == current->fs->root &&
-					    nd->mnt == current->fs->rootmnt)
-						break;
-					if (nd->dentry != nd->mnt->mnt_root) {
-						dentry = dget(nd->dentry->d_parent);
-						dput(nd->dentry);
-						nd->dentry = dentry;
-						break;
-					}
-					if (!__follow_up(&nd->mnt, &nd->dentry))
-						break;
-				}
+				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -483,19 +518,7 @@ last_component:
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				while (1) {
-					if (nd->dentry == current->fs->root &&
-					    nd->mnt == current->fs->rootmnt)
-						break;
-					if (nd->dentry != nd->mnt->mnt_root) {
-						dentry = dget(nd->dentry->d_parent);
-						dput(nd->dentry);
-						nd->dentry = dentry;
-						break;
-					}
-					if (!__follow_up(&nd->mnt, &nd->dentry))
-						break;
-				}
+				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -771,8 +794,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 	int error;
 	if (!victim->d_inode || victim->d_parent->d_inode != dir)
 		return -ENOENT;
-	if (IS_DEADDIR(dir))
-		return -ENOENT;
 	error = permission(dir,MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
@@ -786,8 +807,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 			return -ENOTDIR;
 		if (IS_ROOT(victim))
 			return -EBUSY;
-		if (d_mountpoint(victim))
-			return -EBUSY;
 	} else if (S_ISDIR(victim->d_inode->i_mode))
 		return -EISDIR;
 	return 0;
@@ -872,83 +891,92 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 	int acc_mode, error = 0;
 	struct inode *inode;
 	struct dentry *dentry;
+	struct dentry *dir;
+	int count = 0;
 
 	acc_mode = ACC_MODE(flag);
+
+	/*
+	 * The simplest case - just a plain lookup.
+	 */
 	if (!(flag & O_CREAT)) {
 		if (path_init(pathname, lookup_flags(flag), nd))
 			error = path_walk(pathname, nd);
 		if (error)
 			return error;
-
 		dentry = nd->dentry;
-	} else {
-		struct dentry *dir;
+		goto ok;
+	}
 
-		if (path_init(pathname, LOOKUP_PARENT, nd))
-			error = path_walk(pathname, nd);
+	/*
+	 * Create - we need to know the parent.
+	 */
+	if (path_init(pathname, LOOKUP_PARENT, nd))
+		error = path_walk(pathname, nd);
+	if (error)
+		return error;
+
+	/*
+	 * We have the parent and last component. First of all, check
+	 * that we are not asked to creat(2) an obvious directory - that
+	 * will not do.
+	 */
+	error = -EISDIR;
+	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
+		goto exit;
+
+	dir = nd->dentry;
+	down(&dir->d_inode->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+
+do_last:
+	error = PTR_ERR(dentry);
+	if (IS_ERR(dentry)) {
+		up(&dir->d_inode->i_sem);
+		goto exit;
+	}
+
+	/* Negative dentry, just create the file */
+	if (!dentry->d_inode) {
+		error = vfs_create(dir->d_inode, dentry, mode);
+		up(&dir->d_inode->i_sem);
+		dput(nd->dentry);
+		nd->dentry = dentry;
 		if (error)
-			return error;
-		/*
-		 * It's not obvious that open(".", O_CREAT, foo) should
-		 * fail, but it's even less obvious that it should succeed.
-		 * Since O_CREAT means an intention to create the thing and
-		 * open(2) had never created directories, count it as caller's
-		 * luserdom and let him sod off - -EISDIR it is.
-		 */
-		error = -EISDIR;
-		if (nd->last_type != LAST_NORM)
-			goto exit;
-		/* same for foo/ */
-		if (nd->last.name[nd->last.len])
 			goto exit;
+		/* Don't check for write permission, don't truncate */
+		acc_mode = 0;
+		flag &= ~O_TRUNC;
+		goto ok;
+	}
 
-		dir = nd->dentry;
-		down(&dir->d_inode->i_sem);
+	/*
+	 * It already exists.
+	 */
+	up(&dir->d_inode->i_sem);
 
-		dentry = lookup_hash(&nd->last, nd->dentry);
-		error = PTR_ERR(dentry);
-		if (IS_ERR(dentry)) {
-			up(&dir->d_inode->i_sem);
-			goto exit;
-		}
+	error = -EEXIST;
+	if (flag & O_EXCL)
+		goto exit_dput;
 
-		if (dentry->d_inode) {
-			up(&dir->d_inode->i_sem);
-			error = -EEXIST;
-			if (flag & O_EXCL)
-				goto exit_dput;
-			if (dentry->d_inode->i_op &&
-			    dentry->d_inode->i_op->follow_link) {
-				/*
-				 * With O_EXCL it would be -EEXIST.
-				 * If symlink is a dangling one it's -ENOENT.
-				 * Otherwise we open the object it points to.
-				 */
-				error = do_follow_link(dentry, nd);
-				dput(dentry);
-				if (error)
-					return error;
-				dentry = nd->dentry;
-			} else {
-				dput(nd->dentry);
-				nd->dentry = dentry;
-			}
-			error = -EISDIR;
-			if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
-				goto exit;
-		} else {
-			error = vfs_create(dir->d_inode, dentry, mode);
-			up(&dir->d_inode->i_sem);
-			/* Don't check for write permission, don't truncate */
-			acc_mode = 0;
-			flag &= ~O_TRUNC;
-			dput(nd->dentry);
-			nd->dentry = dentry;
-			if (error)
-				goto exit;
-		}
+	if (d_mountpoint(dentry)) {
+		error = -ELOOP;
+		if (flag & O_NOFOLLOW)
+			goto exit_dput;
+		do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
 	}
+	error = -ENOENT;
+	if (!dentry->d_inode)
+		goto exit_dput;
+	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
+		goto do_link;
 
+	dput(nd->dentry);
+	nd->dentry = dentry;
+	error = -EISDIR;
+	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+		goto exit;
+ok:
 	error = -ENOENT;
 	inode = dentry->d_inode;
 	if (!inode)
@@ -1023,6 +1051,47 @@ exit_dput:
 exit:
 	path_release(nd);
 	return error;
+
+do_link:
+	error = -ELOOP;
+	if (flag & O_NOFOLLOW)
+		goto exit_dput;
+	/*
+	 * This is subtle. Instead of calling do_follow_link() we do the
+	 * thing by hands. The reason is that this way we have zero link_count
+	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
+	 * After that we have the parent and last component, i.e.
+	 * we are in the same situation as after the first path_walk().
+	 * Well, almost - if the last component is normal we get its copy
+	 * stored in nd->last.name and we will have to putname() it when we
+	 * are done. Procfs-like symlinks just set LAST_BIND.
+	 */
+	UPDATE_ATIME(dentry->d_inode);
+	error = dentry->d_inode->i_op->follow_link(dentry, nd);
+	dput(dentry);
+	if (error)
+		return error;
+	if (nd->last_type == LAST_BIND) {
+		dentry = nd->dentry;
+		goto ok;
+	}
+	error = -EISDIR;
+	if (nd->last_type != LAST_NORM)
+		goto exit;
+	if (nd->last.name[nd->last.len]) {
+		putname(nd->last.name);
+		goto exit;
+	}
+	if (count++==32) {
+		dentry = nd->dentry;
+		putname(nd->last.name);
+		goto ok;
+	}
+	dir = nd->dentry;
+	down(&dir->d_inode->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+	putname(nd->last.name);
+	goto do_last;
 }
 
 static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
@@ -1213,9 +1282,15 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
 	d_unhash(dentry);
-	error = dir->i_op->rmdir(dir, dentry);
-	if (!error)
-		dentry->d_inode->i_flags |= S_DEAD;
+	if (IS_DEADDIR(dir))
+		error = -ENOENT;
+	else if (d_mountpoint(dentry))
+		error = -EBUSY;
+	else {
+		error = dir->i_op->rmdir(dir, dentry);
+		if (!error)
+			dentry->d_inode->i_flags |= S_DEAD;
+	}
 	double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
 	if (!error)
 		d_delete(dentry);
@@ -1275,9 +1350,13 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 		error = -EPERM;
 		if (dir->i_op && dir->i_op->unlink) {
 			DQUOT_INIT(dir);
-			error = dir->i_op->unlink(dir, dentry);
-			if (!error)
-				d_delete(dentry);
+			if (d_mountpoint(dentry))
+				error = -EBUSY;
+			else {
+				error = dir->i_op->unlink(dir, dentry);
+				if (!error)
+					d_delete(dentry);
+			}
 		}
 	}
 	up(&dir->i_zombie);
@@ -1555,7 +1634,12 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	} else
 		double_down(&old_dir->i_zombie,
 			    &new_dir->i_zombie);
-	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
+		error = -ENOENT;
+	else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+		error = -EBUSY;
+	else 
+		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (target) {
 		if (!error)
 			target->i_flags |= S_DEAD;
@@ -1603,7 +1687,10 @@ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	DQUOT_INIT(old_dir);
 	DQUOT_INIT(new_dir);
 	double_down(&old_dir->i_zombie, &new_dir->i_zombie);
-	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+		error = -EBUSY;
+	else
+		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	double_up(&old_dir->i_zombie, &new_dir->i_zombie);
 	if (error)
 		return error;
@@ -1734,6 +1821,8 @@ out:
 static inline int
 __vfs_follow_link(struct nameidata *nd, const char *link)
 {
+	int res = 0;
+	char *name;
 	if (IS_ERR(link))
 		goto fail;
 
@@ -1741,10 +1830,25 @@ __vfs_follow_link(struct nameidata *nd, const char *link)
 		path_release(nd);
 		if (!walk_init_root(link, nd))
 			/* weird __emul_prefix() stuff did it */
-			return 0;
+			goto out;
 	}
-	return path_walk(link, nd);
-
+	res = path_walk(link, nd);
+out:
+	if (current->link_count || res || nd->last_type!=LAST_NORM)
+		return res;
+	/*
+	 * If it is an iterative symlinks resolution in open_namei() we
+	 * have to copy the last component. And all that crap because of
+	 * bloody create() on broken symlinks. Furrfu...
+	 */
+	name = __getname();
+	if (IS_ERR(name))
+		goto fail_name;
+	strcpy(name, nd->last.name);
+	nd->last.name = name;
+	return 0;
+fail_name:
+	link = name;
 fail:
 	path_release(nd);
 	return PTR_ERR(link);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 55daea198..11694e79b 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -973,7 +973,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 	/*
 	 * Check whether to close the file ...
 	 */
-	if (inode && NCP_FINFO(inode)->opened) {
+	if (inode) {
 		PPRINTK("ncp_unlink: closing file\n");
 		ncp_make_closed(inode);
 	}
@@ -982,7 +982,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 #ifdef CONFIG_NCPFS_STRONG
 	/* 9C is Invalid path.. It should be 8F, 90 - read only, but
 	   it is not :-( */
-	if (error == 0x9C && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */
+	if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */
 		error = ncp_force_unlink(dir, dentry);
 	}
 #endif
@@ -1051,7 +1051,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 	error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name,
 						      new_dir, __new_name);
 #ifdef CONFIG_NCPFS_STRONG
-	if ((error == 0x90 || error == -EACCES) &&
+	if ((error == 0x90 || error == 0x8B || error == -EACCES) &&
 			server->m.flags & NCP_MOUNT_STRONG) {	/* RO */
 		error = ncp_force_rename(old_dir, old_dentry, __old_name,
 					 new_dir, new_dentry, __new_name);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6f8fd2d63..3442c3f9f 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -26,7 +26,7 @@ static inline unsigned int min(unsigned int a, unsigned int b)
 	return a < b ? a : b;
 }
 
-static int ncp_fsync(struct file *file, struct dentry *dentry)
+static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	return 0;
 }
@@ -46,12 +46,12 @@ int ncp_make_open(struct inode *inode, int right)
 	}
 
 	DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n",
-		NCP_FINFO(inode)->opened, 
+		atomic_read(&NCP_FINFO(inode)->opened), 
 		NCP_FINFO(inode)->volNumber, 
 		NCP_FINFO(inode)->dirEntNum);
 	error = -EACCES;
-	lock_super(inode->i_sb);
-	if (!NCP_FINFO(inode)->opened) {
+	down(&NCP_FINFO(inode)->open_sem);
+	if (!atomic_read(&NCP_FINFO(inode)->opened)) {
 		struct ncp_entry_info finfo;
 		int result;
 
@@ -88,15 +88,18 @@ int ncp_make_open(struct inode *inode, int right)
 		 */
 	update:
 		ncp_update_inode(inode, &finfo);
+		atomic_set(&NCP_FINFO(inode)->opened, 1);
 	}
 
 	access = NCP_FINFO(inode)->access;
 	PPRINTK("ncp_make_open: file open, access=%x\n", access);
-	if (access == right || access == O_RDWR)
+	if (access == right || access == O_RDWR) {
+		atomic_inc(&NCP_FINFO(inode)->opened);
 		error = 0;
+	}
 
 out_unlock:
-	unlock_super(inode->i_sb);
+	up(&NCP_FINFO(inode)->open_sem);
 out:
 	return error;
 }
@@ -153,7 +156,7 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos)
 	freelen = ncp_read_bounce_size(bufsize);
 	freepage = kmalloc(freelen, GFP_NFS);
 	if (!freepage)
-		goto out;
+		goto outrel;
 	error = 0;
 	/* First read in as much as possible for each bufsize. */
 	while (already_read < count) {
@@ -166,9 +169,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos)
 				pos, to_read, buf, &read_this_time, 
 				freepage, freelen);
 		if (error) {
-			kfree(freepage);
-			error = -EIO;	/* This is not exact, i know.. */
-			goto out;
+			error = -EIO;	/* NW errno -> Linux errno */
+			break;
 		}
 		pos += read_this_time;
 		buf += read_this_time;
@@ -188,6 +190,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos)
 	
 	DPRINTK("ncp_file_read: exit %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+outrel:
+	ncp_inode_close(inode);		
 out:
 	return already_read ? already_read : error;
 }
@@ -236,8 +240,10 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 	already_written = 0;
 
 	bouncebuffer = kmalloc(bufsize, GFP_NFS);
-	if (!bouncebuffer)
-		return -EIO;	/* -ENOMEM */
+	if (!bouncebuffer) {
+		errno = -EIO;	/* -ENOMEM */
+		goto outrel;
+	}
 	while (already_written < count) {
 		int written_this_time;
 		size_t to_write = min(bufsize - (pos % bufsize),
@@ -271,15 +277,15 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 	}
 	DPRINTK("ncp_file_write: exit %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+outrel:
+	ncp_inode_close(inode);		
 out:
 	return already_written ? already_written : errno;
 }
 
 static int ncp_release(struct inode *inode, struct file *file) {
-	if (NCP_FINFO(inode)->opened) {
-		if (ncp_make_closed(inode)) {
-			DPRINTK("ncp_release: failed to close\n");
-		}
+	if (ncp_make_closed(inode)) {
+		DPRINTK("ncp_release: failed to close\n");
 	}
 	return 0;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index e885aed47..b6104831e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -61,7 +61,6 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
 #ifdef CONFIG_NCPFS_STRONG
 	NCP_FINFO(inode)->nwattr = nwinfo->i.attributes;
 #endif
-	NCP_FINFO(inode)->opened = nwinfo->opened;
 	NCP_FINFO(inode)->access = nwinfo->access;
 	NCP_FINFO(inode)->server_file_handle = nwinfo->server_file_handle;
 	memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
@@ -76,7 +75,7 @@ void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo)
 	struct nw_info_struct *nwi = &nwinfo->i;
 	struct ncp_server *server = NCP_SERVER(inode);
 
-	if (!NCP_FINFO(inode)->opened) {
+	if (!atomic_read(&NCP_FINFO(inode)->opened)) {
 #ifdef CONFIG_NCPFS_STRONG
 		NCP_FINFO(inode)->nwattr = nwi->attributes;
 #endif
@@ -216,6 +215,9 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 
 	inode = get_empty_inode();
 	if (inode) {
+		init_MUTEX(&NCP_FINFO(inode)->open_sem);
+		atomic_set(&NCP_FINFO(inode)->opened, info->opened);
+
 		inode->i_sb = sb;
 		inode->i_dev = sb->s_dev;
 		inode->i_ino = info->ino;
@@ -245,7 +247,7 @@ ncp_delete_inode(struct inode *inode)
 		DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
 	}
 
-	if (NCP_FINFO(inode)->opened && ncp_make_closed(inode) != 0) {
+	if (ncp_make_closed(inode) != 0) {
 		/* We can't do anything but complain. */
 		printk(KERN_ERR "ncp_delete_inode: could not close\n");
 	}
@@ -259,7 +261,6 @@ ncp_read_super(struct super_block *sb, void *raw_data, int silent)
 	struct ncp_server *server;
 	struct file *ncp_filp;
 	struct inode *root_inode;
-	kdev_t dev = sb->s_dev;
 	int error;
 #ifdef CONFIG_NCPFS_PACKET_SIGNING
 	int options;
@@ -318,7 +319,6 @@ ncp_read_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_blocksize = 1024;	/* Eh...  Is this correct? */
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = NCP_SUPER_MAGIC;
-	sb->s_dev = dev;
 	sb->s_op = &ncp_sops;
 
 	server = NCP_SBP(sb);
@@ -676,6 +676,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 
 		/* According to ndir, the changes only take effect after
 		   closing the file */
+		ncp_inode_close(inode);
 		result = ncp_make_closed(inode);
 		if (!result)
 			vmtruncate(inode, attr->ia_size);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 26c95fc8f..24e616396 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -335,18 +335,12 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
 			{
 				return result;
 			}
+			result = -EIO;
 			if (!ncp_conn_valid(server))
-			{
-				return -EIO;
-			}
+				goto outrel;
+			result = -EISDIR;
 			if (!S_ISREG(inode->i_mode))
-			{
-				return -EISDIR;
-			}
-			if (!NCP_FINFO(inode)->opened)
-			{
-				return -EBADFD;
-			}
+				goto outrel;
 			if (rqdata.cmd == NCP_LOCK_CLEAR)
 			{
 				result = ncp_ClearPhysicalRecord(NCP_SERVER(inode),
@@ -373,6 +367,8 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
 							rqdata.timeout);
 				if (result > 0) result = -EAGAIN;
 			}
+outrel:			
+			ncp_inode_close(inode);
 			return result;
 		}
 #endif	/* CONFIG_NCPFS_IOCTL_LOCKING */
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 752ae1e1e..08d28d895 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -82,6 +82,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
 				break;
 			}
 		}
+		ncp_inode_close(inode);
 
 	}
 
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 73afd107a..0353882b9 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -221,20 +221,23 @@ ncp_close_file(struct ncp_server *server, const char *file_id)
 	return result;
 }
 
-/*
- * Called with the superblock locked.
- */
 int
 ncp_make_closed(struct inode *inode)
 {
 	int err;
-	NCP_FINFO(inode)->opened = 0;
-	err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
 
-	if (!err)
-		PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n",
-			NCP_FINFO(inode)->volNumber,
-			NCP_FINFO(inode)->dirEntNum, err);
+	err = 0;
+	down(&NCP_FINFO(inode)->open_sem);	
+	if (atomic_read(&NCP_FINFO(inode)->opened) == 1) {
+		atomic_set(&NCP_FINFO(inode)->opened, 0);
+		err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
+
+		if (!err)
+			PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n",
+				NCP_FINFO(inode)->volNumber,
+				NCP_FINFO(inode)->dirEntNum, err);
+	}
+	up(&NCP_FINFO(inode)->open_sem);
 	return err;
 }
 
@@ -613,7 +616,8 @@ int ncp_open_create_file_or_subdir(struct ncp_server *server,
 
 	if ((result = ncp_request(server, 87)) != 0)
 		goto out;
-	target->opened = 1;
+	if (!(create_attributes & aDIR))
+		target->opened = 1;
 	target->server_file_handle = ncp_reply_dword(server, 0);
 	target->open_create_action = ncp_reply_byte(server, 4);
 
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 8b33a5c2e..31797a3c3 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -57,6 +57,10 @@ int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16,
 int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16,
 		const char *, int *);
 
+static inline void ncp_inode_close(struct inode *inode) {
+	atomic_dec(&NCP_FINFO(inode)->opened);
+}
+
 int ncp_obtain_info(struct ncp_server *server, struct inode *, char *,
 		struct nw_info_struct *target);
 int ncp_lookup_volume(struct ncp_server *, char *, struct nw_info_struct *);
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 46925eb6d..0962593da 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -50,10 +50,6 @@ static int ncp_symlink_readpage(struct file *file, struct page *page)
 	char *link;
 	char *buf = (char*)kmap(page);
 
-	error = -EIO;
-	if (ncp_make_open(inode,O_RDONLY))
-		goto fail;
-
 	error = -ENOMEM;
 	for (cnt = 0; (link=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_NFS))==NULL; cnt++) {
 		if (cnt > 10)
@@ -61,20 +57,22 @@ static int ncp_symlink_readpage(struct file *file, struct page *page)
 		schedule();
 	}
 
+	if (ncp_make_open(inode,O_RDONLY))
+		goto failEIO;
+
 	error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle,
                          0,NCP_MAX_SYMLINK_SIZE,link,&length);
 
-	if (error) {
-		kfree(link);
-		goto fail;
-	}
+	ncp_inode_close(inode);
+	/* Close file handle if no other users... */
+	ncp_make_closed(inode);
+	if (error)
+		goto failEIO;
+
 	if (length<NCP_MIN_SYMLINK_SIZE || 
 	    ((__u32 *)link)[0]!=NCP_SYMLINK_MAGIC0 ||
-	    ((__u32 *)link)[1]!=NCP_SYMLINK_MAGIC1) {
-		error = -EIO;
-		kfree(link);
-		goto fail;
-	}
+	    ((__u32 *)link)[1]!=NCP_SYMLINK_MAGIC1)
+	    	goto failEIO;
 
 	len = NCP_MAX_SYMLINK_SIZE;
 	error = ncp_vol2io(NCP_SERVER(inode), buf, &len, link+8, length-8, 0);
@@ -86,6 +84,9 @@ static int ncp_symlink_readpage(struct file *file, struct page *page)
 	UnlockPage(page);
 	return 0;
 
+failEIO:
+	error = -EIO;
+	kfree(link);
 fail:
 	SetPageError(page);
 	kunmap(page);
@@ -120,13 +121,15 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
 	if ((link=(char *)kmalloc(length+9,GFP_NFS))==NULL)
 		return -ENOMEM;
 
-	if (ncp_create_new(dir,dentry,0,aSHARED|aHIDDEN)) {
-		kfree(link);
-		return -EIO;
-	}
+	err = -EIO;
+	if (ncp_create_new(dir,dentry,0,aSHARED|aHIDDEN))
+		goto failfree;
 
 	inode=dentry->d_inode;
 
+	if (ncp_make_open(inode, O_WRONLY))
+		goto failfree;
+
 	((__u32 *)link)[0]=NCP_SYMLINK_MAGIC0;
 	((__u32 *)link)[1]=NCP_SYMLINK_MAGIC1;
 
@@ -134,19 +137,26 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
 	   symlink can point out of ncp filesystem */
 	length += 1;
 	err = ncp_io2vol(NCP_SERVER(inode),link+8,&length,symname,length-1,0);
-	if (err) {
-		kfree(link);
-		return err;
-	}
+	if (err)
+		goto fail;
 
 	if(ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, 
 	    		    0, length+8, link, &i) || i!=length+8) {
-		kfree(link);
-		return -EIO;
+		err = -EIO;
+		goto fail;
 	}
 
+	ncp_inode_close(inode);
+	ncp_make_closed(inode);
 	kfree(link);
 	return 0;
+
+fail:
+	ncp_inode_close(inode);
+	ncp_make_closed(inode);
+failfree:
+	kfree(link);
+	return err;	
 }
 #endif
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 62b37c8cf..06f067eea 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -38,7 +38,7 @@ static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
 static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *);
 static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *);
 static int  nfs_file_flush(struct file *);
-static int  nfs_fsync(struct file *, struct dentry *dentry);
+static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
 
 struct file_operations nfs_file_operations = {
 	read:		nfs_file_read,
@@ -123,7 +123,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
  * whether any write errors occurred for this process.
  */
 static int
-nfs_fsync(struct file *file, struct dentry *dentry)
+nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
 	int status;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index dee52dd8a..df2532048 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -9,8 +9,6 @@
  * creates a client control block and adds it to the hash
  * table. Then, you call NFSCTL_EXPORT for each fs.
  *
- * You cannot currently read the export information from the
- * kernel. It would be nice to have a /proc file though.
  *
  * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
  */
@@ -388,12 +386,10 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino,
 
 	err = -EPERM;
 	if (path) {
-		err = 0;
-		if (path_init(path, LOOKUP_POSITIVE, &nd))
-			err = path_walk(path, &nd);
-		if (err) {
+		if (path_init(path, LOOKUP_POSITIVE, &nd) &&
+		    path_walk(path, &nd)) {
 			printk("nfsd: exp_rootfh path not found %s", path);
-			return -EPERM;
+			return err;
 		}
 		dev = nd.dentry->d_inode->i_dev;
 		ino = nd.dentry->d_inode->i_ino;
@@ -438,7 +434,8 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino,
 	fh_put(&fh);
 
 out:
-	path_release(&nd);
+	if (path)
+		path_release(&nd);
 	return err;
 }
 
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 357a297f6..f5795583b 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -174,8 +174,9 @@ int
 nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 {
 	struct svc_cacherep	*rh, *rp;
-	struct svc_client	*clp = rqstp->rq_client;
 	u32			xid = rqstp->rq_xid,
+				proto =  rqstp->rq_prot,
+				vers = rqstp->rq_vers,
 				proc = rqstp->rq_proc;
 	unsigned long		age;
 
@@ -189,7 +190,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	while ((rp = rp->c_hash_next) != rh) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
-		    exp_checkaddr(clp, rp->c_client)) {
+		    proto == rp->c_prot && vers == rp->c_vers &&
+		    time_before(jiffies, rp->c_timestamp + 120*HZ) &&
+		    memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, rqstp->rq_addrlen)==0) {
 			nfsdstats.rchits++;
 			goto found_entry;
 		}
@@ -226,7 +229,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	rp->c_state = RC_INPROG;
 	rp->c_xid = xid;
 	rp->c_proc = proc;
-	rp->c_client = rqstp->rq_addr.sin_addr;
+	rp->c_addr = rqstp->rq_addr;
+	rp->c_prot = proto;
+	rp->c_vers = vers;
+	rp->c_timestamp = jiffies;
+
 	hash_refile(rp);
 
 	/* release any buffer */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c4e456185..913cbf5f8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -5,7 +5,6 @@
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
-#define NFS_GETFH_NEW
 
 #include <linux/config.h>
 #include <linux/module.h>
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 85a98c874..78f399bd3 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -495,17 +495,15 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 
 	dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
 
-	if (!fhp->fh_dverified) {
+	if (!fhp->fh_dentry) {
 		kdev_t xdev;
 		ino_t xino;
 		__u32 *datap=NULL;
 		int data_left = fh->fh_size/4;
 		int nfsdev;
 		error = nfserr_stale;
-#if CONFIG_NFSD_V3		
 		if (rqstp->rq_vers == 3)
 			error = nfserr_badhandle;
-#endif
 		if (fh->fh_version == 1) {
 			
 			datap = fh->fh_auth;
@@ -562,10 +560,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 		 * Look up the dentry using the NFS file handle.
 		 */
 		error = nfserr_stale;
-#if CONFIG_NFSD_V3		
 		if (rqstp->rq_vers == 3)
 			error = nfserr_badhandle;
-#endif
 
 		if (fh->fh_version == 1) {
 			/* if fileid_type != 0, and super_operations provide fh_to_dentry lookup,
@@ -611,7 +607,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 
 		fhp->fh_dentry = dentry;
 		fhp->fh_export = exp;
-		fhp->fh_dverified = 1;
 		nfsd_nr_verified++;
 	} else {
 		/* just rechecking permissions
@@ -731,7 +726,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry)
 		parent->d_name.name, dentry->d_name.name,
 		(inode ? inode->i_ino : 0));
 
-	if (fhp->fh_dverified || fhp->fh_locked || fhp->fh_dentry) {
+	if (fhp->fh_locked || fhp->fh_dentry) {
 		printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
 			parent->d_name.name, dentry->d_name.name);
 	}
@@ -757,8 +752,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry)
 	fhp->fh_handle.fh_size = (datap-fhp->fh_handle.fh_auth+1)*4;
 
 
-	/* We stuck it there, we know it's good. */
-	fhp->fh_dverified = 1;
 	nfsd_nr_verified++;
 	if (fhp->fh_handle.fh_fileid_type == 255)
 		return nfserr_opnotsupp;
@@ -775,7 +768,7 @@ fh_update(struct svc_fh *fhp)
 	struct dentry *dentry;
 	__u32 *datap;
 	
-	if (!fhp->fh_dverified)
+	if (!fhp->fh_dentry)
 		goto out_bad;
 
 	dentry = fhp->fh_dentry;
@@ -811,10 +804,9 @@ void
 fh_put(struct svc_fh *fhp)
 {
 	struct dentry * dentry = fhp->fh_dentry;
-	if (fhp->fh_dverified) {
+	if (dentry) {
 		fh_unlock(fhp);
 		fhp->fh_dentry = NULL;
-		fhp->fh_dverified = 0;
 		dput(dentry);
 		nfsd_nr_put++;
 	}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 763970736..b5057d57b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -239,7 +239,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 		 * whether the file exists or not. Time to bail ...
 		 */
 		nfserr = nfserr_acces;
-		if (!newfhp->fh_dverified) {
+		if (!newfhp->fh_dentry) {
 			printk(KERN_WARNING 
 				"nfsd_proc_create: file handle not verified\n");
 			goto out_unlock;
@@ -415,7 +415,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 
 	dprintk("nfsd: MKDIR    %s %s\n", SVCFH_fmt(&argp->fh), argp->name);
 
-	if (resp->fh.fh_dverified) {
+	if (resp->fh.fh_dentry) {
 		printk(KERN_WARNING
 			"nfsd_proc_mkdir: response already verified??\n");
 	}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fb3b32f8d..9a4d12a7d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -275,7 +275,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 	/* Encode result.
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
-#ifdef CONFIG_NFSD_V3
 	if (!(nfserr && rqstp->rq_vers == 2)) {
 		xdr = proc->pc_encode;
 		if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
@@ -286,17 +285,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 			return 1;
 		}
 	}
-#else
-	xdr = proc->pc_encode;
-	if (!nfserr && xdr
-	 && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
-		/* Failed to encode result. Release cache entry */
-		dprintk("nfsd: failed to encode result!\n");
-		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
-		*statp = rpc_system_err;
-		return 1;
-	}
-#endif /* CONFIG_NFSD_V3 */
 
 	/* Store reply in cache. */
 	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 40f1ab85a..7a144d707 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -165,6 +165,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 				dentry = mounts;
 			} else
 				dput(mounts);
+			mntput(mnt);
 		}
 	}
 	/*
@@ -253,8 +254,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap)
 			goto out_nfserr;
 
 		err = locks_verify_truncate(inode, NULL, iap->ia_size);
-		if (err)
+		if (err) {
+			put_write_access(inode);
 			goto out_nfserr;
+		}
 		DQUOT_INIT(inode);
 	}
 
@@ -314,11 +317,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap)
 	if (err)
 		goto out_nfserr;
 	if (EX_ISSYNC(fhp->fh_export))
-		write_inode_now(inode);
+		write_inode_now(inode, 0);
 	err = 0;
-
-	/* Don't unlock inode; the nfssvc_release functions are supposed
-	 * to do this. */
 out:
 	return err;
 
@@ -413,7 +413,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access)
  out:
 	return error;
 }
-#endif
+#endif /* CONFIG_NFSD_V3 */
 
 
 
@@ -512,7 +512,7 @@ nfsd_sync(struct file *filp)
 {
 	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
 	down(&filp->f_dentry->d_inode->i_sem);
-	filp->f_op->fsync(filp, filp->f_dentry);
+	filp->f_op->fsync(filp, filp->f_dentry, 0);
 	up(&filp->f_dentry->d_inode->i_sem);
 }
 
@@ -520,10 +520,10 @@ void
 nfsd_sync_dir(struct dentry *dp)
 {
 	struct inode *inode = dp->d_inode;
-	int (*fsync) (struct file *, struct dentry *);
+	int (*fsync) (struct file *, struct dentry *, int);
 	
 	if (inode->i_fop && (fsync = inode->i_fop->fsync)) {
-		fsync(NULL, dp);
+		fsync(NULL, dp, 0);
 	}
 }
 
@@ -598,7 +598,6 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	err = file.f_op->read(&file, buf, *count, &file.f_pos);
 	set_fs(oldfs);
-	nfsdstats.io_read += *count;
 
 	/* Write back readahead params */
 	if (ra != NULL) {
@@ -614,6 +613,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	}
 
 	if (err >= 0) {
+		nfsdstats.io_read += err;
 		*count = err;
 		err = 0;
 	} else 
@@ -665,19 +665,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	 * When gathered writes have been configured for this volume,
 	 * flushing the data to disk is handled separately below.
 	 */
-#ifdef CONFIG_NFSD_V3
+
 	if (file.f_op->fsync == 0) {/* COMMIT3 cannot work */
 	       stable = 2;
 	       *stablep = 2; /* FILE_SYNC */
 	}
+
 	if (!EX_ISSYNC(exp))
 		stable = 0;
 	if (stable && !EX_WGATHER(exp))
 		file.f_flags |= O_SYNC;
-#else
-	if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp))
-		file.f_flags |= O_SYNC;
-#endif /* CONFIG_NFSD_V3 */
 
 	file.f_pos = offset;		/* set write offset */
 
@@ -692,7 +689,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 #else
 	err = file.f_op->write(&file, buf, cnt, &file.f_pos);
 #endif
-	nfsdstats.io_write += cnt;
+	if (err >= 0)
+		nfsdstats.io_write += cnt;
 	set_fs(oldfs);
 
 	/* clear setuid/setgid flag after write */
@@ -734,7 +732,9 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 #else
 			dprintk("nfsd: write defer %d\n", current->pid);
 /* FIXME: Olaf commented this out [gam3] */
+			set_current_state(TASK_UNINTERRUPTIBLE);
 			schedule_timeout((HZ+99)/100);
+			current->state = TASK_RUNNING;
 			dprintk("nfsd: write resume %d\n", current->pid);
 #endif
 		}
@@ -743,7 +743,9 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 			dprintk("nfsd: write sync %d\n", current->pid);
 			nfsd_sync(&file);
 		}
+#if 0
 		wake_up(&inode->i_wait);
+#endif
 		last_ino = inode->i_ino;
 		last_dev = inode->i_dev;
 	}
@@ -762,11 +764,12 @@ out:
 
 #ifdef CONFIG_NFSD_V3
 /*
- * Commit all pendig writes to stable storage.
- * Strictly speaking, we could sync just indicated the file region here,
+ * Commit all pending writes to stable storage.
+ * Strictly speaking, we could sync just the indicated file region here,
  * but there's currently no way we can ask the VFS to do so.
  *
- * We lock the file to make sure we return full WCC data to the client.
+ * Unfortunately we cannot lock the file to make sure we return full WCC
+ * data to the client, as locking happens lower down in the filesystem.
  */
 int
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -828,7 +831,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * Check whether the response file handle has been verified yet.
 	 * If it has, the parent directory should already be locked.
 	 */
-	if (!resfhp->fh_dverified) {
+	if (!resfhp->fh_dentry) {
 		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
 		fh_lock(fhp);
 		dchild = lookup_one(fname, dentry);
@@ -891,7 +894,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
 	if (EX_ISSYNC(fhp->fh_export)) {
 		nfsd_sync_dir(dentry);
-		write_inode_now(dchild->d_inode);
+		write_inode_now(dchild->d_inode, 0);
 	}
 
 
@@ -928,6 +931,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct dentry	*dentry, *dchild;
 	struct inode	*dirp;
 	int		err;
+	__u32		v_mtime=0, v_atime=0;
+	int		v_mode=0;
 
 	err = nfserr_perm;
 	if (!flen)
@@ -963,6 +968,19 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (err)
 		goto out;
 
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* while the verifier would fit in mtime+atime,
+		 * solaris7 gets confused (bugid 4218508) if these have
+		 * the high bit set, so we use the mode as well
+		 */
+		v_mtime = verifier[0]&0x7fffffff;
+		v_atime = verifier[1]&0x7fffffff;
+		v_mode  = S_IFREG
+			| ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
+			| ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
+			;
+	}
+	
 	if (dchild->d_inode) {
 		err = 0;
 
@@ -976,10 +994,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			}
 			break;
 		case NFS3_CREATE_EXCLUSIVE:
-			if (   dchild->d_inode->i_mtime == verifier[0]
-			    && dchild->d_inode->i_atime == verifier[1]
-			    && dchild->d_inode->i_mode == S_IFREG
-			    && dchild->d_inode->i_size == 0 )
+			if (   dchild->d_inode->i_mtime == v_mtime
+			    && dchild->d_inode->i_atime == v_atime
+			    && dchild->d_inode->i_mode  == v_mode
+			    && dchild->d_inode->i_size  == 0 )
 				break;
 			 /* fallthru */
 		case NFS3_CREATE_GUARDED:
@@ -1005,19 +1023,23 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 
 	if (createmode == NFS3_CREATE_EXCLUSIVE) {
-		/* Cram the verifier into atime/mtime */
-		iap->ia_valid = ATTR_MTIME|ATTR_ATIME|ATTR_MTIME_SET|ATTR_ATIME_SET;
-		iap->ia_mtime = verifier[0];
-		iap->ia_atime = verifier[1];
+		/* Cram the verifier into atime/mtime/mode */
+		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+			| ATTR_MTIME_SET|ATTR_ATIME_SET
+			| ATTR_MODE;
+		iap->ia_mtime = v_mtime;
+		iap->ia_atime = v_atime;
+		iap->ia_mode  = v_mode;
 	}
 
-	/* Set file attributes. Mode has already been set and
-	 * setting uid/gid works only for root. Irix appears to
-	 * send along the gid when it tries to implement setgid
-	 * directories via NFS. Clear out all that cruft.
+	/* Set file attributes.
+	 * Mode has already been set but we might need to reset it
+	 * for CREATE_EXCLUSIVE
+	 * Irix appears to send along the gid when it tries to
+	 * implement setgid directories via NFS. Clear out all that cruft.
 	 */
  set_attr:
-	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
  		err = nfsd_setattr(rqstp, resfhp, iap);
 
  out:
@@ -1118,7 +1140,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 					| S_IFLNK;
 				err = notify_change(dnew, iap);
 				if (!err && EX_ISSYNC(fhp->fh_export))
-					write_inode_now(dentry->d_inode);
+					write_inode_now(dentry->d_inode, 0);
 		       }
 		}
 	} else
@@ -1178,7 +1200,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	if (!err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
 			nfsd_sync_dir(ddir);
-			write_inode_now(dest);
+			write_inode_now(dest, 0);
 		}
 	} else {
 		if (err == -EXDEV && rqstp->rq_vers == 2)
@@ -1230,7 +1252,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
 		goto out;
 
+	/* cannot use fh_lock as we need deadlock protective ordering
+	 * so do it by hand */
 	double_down(&tdir->i_sem, &fdir->i_sem);
+	ffhp->fh_locked = tfhp->fh_locked = 1;
+	fill_pre_wcc(ffhp);
+	fill_pre_wcc(tfhp);
+
 	odentry = lookup_one(fname, fdentry);
 	err = PTR_ERR(odentry);
 	if (IS_ERR(odentry))
@@ -1245,39 +1273,31 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	if (IS_ERR(ndentry))
 		goto out_dput_old;
 
-#ifdef CONFIG_NFSD_V3
-	/* Fill in the pre-op attr for the wcc data for both 
-	 * tdir and fdir
-	 */ 
-	fill_pre_wcc(ffhp);
-	fill_pre_wcc(tfhp);
-#endif /* CONFIG_NFSD_V3 */
 
 	err = vfs_rename(fdir, odentry, tdir, ndentry);
 	if (!err && EX_ISSYNC(tfhp->fh_export)) {
 		nfsd_sync_dir(tdentry);
 		nfsd_sync_dir(fdentry);
 	}
-#ifdef CONFIG_NFSD_V3
-        /* Fill in the post-op attr for the wcc data for both 
-         * tdir and fdir
-         */
-	fill_post_wcc(ffhp);
-	fill_post_wcc(tfhp);
-#endif /* CONFIG_NFSD_V3 */
-	double_up(&tdir->i_sem, &fdir->i_sem);
 	dput(ndentry);
 
-out_dput_old:
+ out_dput_old:
 	dput(odentry);
+ out_nfserr:
 	if (err)
-		goto out_nfserr;
+		err = nfserrno(err);
+
+	/* we cannot reply on fh_unlock on the two filehandles,
+	 * as that would do the wrong thing if the two directories
+	 * were the same, so again we do it by hand
+	 */
+	fill_post_wcc(ffhp);
+	fill_post_wcc(tfhp);
+	double_up(&tdir->i_sem, &fdir->i_sem);
+	ffhp->fh_locked = tfhp->fh_locked = 0;
+	
 out:
 	return err;
-
-out_nfserr:
-	err = nfserrno(err);
-	goto out;
 }
 
 /*
@@ -1320,17 +1340,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 		err = vfs_rmdir(dirp, rdentry);
 	}
 
-	fh_unlock(fhp);
-
 	dput(rdentry);
 
 	if (err)
 		goto out_nfserr;
-	if (EX_ISSYNC(fhp->fh_export)) {
-		down(&dentry->d_inode->i_sem);
+	if (EX_ISSYNC(fhp->fh_export)) 
 		nfsd_sync_dir(dentry);
-		up(&dentry->d_inode->i_sem);
-	}
+
 out:
 	return err;
 
@@ -1353,13 +1369,11 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	struct file	file;
 	struct readdir_cd cd;
 
-	err = 0;
-	if (offset > ~(u32) 0)
-		goto out;
-
 	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
 	if (err)
 		goto out;
+	if (offset > ~(u32) 0)
+		goto out_close;
 
 	err = nfserr_notdir;
 	if (!file.f_op->readdir)
@@ -1402,11 +1416,9 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	eof = !cd.eob;
 
 	if (cd.offset) {
-#ifdef CONFIG_NFSD_V3
 		if (rqstp->rq_vers == 3)
 			(void)xdr_encode_hyper(cd.offset, file.f_pos);
 		else
-#endif /* CONFIG_NFSD_V3 */
 			*cd.offset = htonl(file.f_pos);
 	}
 
diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c
index e0649ec7b..470f15c90 100644
--- a/fs/ntfs/fs.c
+++ b/fs/ntfs/fs.c
@@ -546,6 +546,7 @@ _linux_ntfs_mkdir(struct inode *dir, struct dentry* d, int mode)
 }
 #endif
 
+#if 0
 static int 
 ntfs_bmap(struct inode *ino,int block)
 {
@@ -554,6 +555,7 @@ ntfs_bmap(struct inode *ino,int block)
 	       ino->i_ino,block,ret);
 	return (ret==-1) ? 0:ret;
 }
+#endif
 
 /* It's fscking broken. */
 
diff --git a/fs/pipe.c b/fs/pipe.c
index b97851fab..a30985a53 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -607,6 +607,8 @@ static struct super_block * pipefs_read_super(struct super_block *sb, void *data
 	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 	root->i_uid = root->i_gid = 0;
 	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
+	root->i_sb = sb;
+	root->i_dev = sb->s_dev;
 	sb->s_blocksize = 1024;
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = PIPEFS_MAGIC;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb63722d5..01f5b22ea 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -408,6 +408,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto out;
 
 	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+	nd->last_type = LAST_BIND;
 out:
 #ifdef NULL_VFSMNT
 	mntput(dummy);
@@ -706,6 +707,7 @@ static struct dentry_operations pid_base_dentry_operations =
 };
 
 /* Lookups */
+#define MAX_MULBY10	((~0U-9)/10)
 
 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
 {
@@ -726,10 +728,10 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
 		name++;
 		if (c > 9)
 			goto out;
+		if (fd >= MAX_MULBY10)
+			goto out;
 		fd *= 10;
 		fd += c;
-		if (fd & 0xffff8000)
-			goto out;
 	}
 
 	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
@@ -940,12 +942,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
 		name++;
 		if (c > 9)
 			goto out;
+		if (pid >= MAX_MULBY10)
+			goto out;
 		pid *= 10;
 		pid += c;
 		if (!pid)
 			goto out;
-		if (pid & 0xffff0000)
-			goto out;
 	}
 
 	read_lock(&tasklist_lock);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 60393eb91..3576482ca 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -340,7 +340,6 @@ static struct super_block *qnx4_read_super(struct super_block *s,
 	set_blocksize(dev, QNX4_BLOCK_SIZE);
 	s->s_blocksize = QNX4_BLOCK_SIZE;
 	s->s_blocksize_bits = QNX4_BLOCK_SIZE_BITS;
-	s->s_dev = dev;
 
 	/* Check the boot signature. Since the qnx4 code is
 	   dangerous, we should leave as quickly as possible
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index f87d30e0b..9bb7611c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -298,15 +298,9 @@ static struct inode_operations ramfs_dir_inode_operations = {
 	rename:		ramfs_rename,
 };
 
-static void ramfs_put_super(struct super_block *sb)
-{
-	d_genocide(sb->s_root);
-	shrink_dcache_parent(sb->s_root);
-}
-
 static struct super_operations ramfs_ops = {
-	put_super:	ramfs_put_super,
 	statfs:		ramfs_statfs,
+	put_inode:	force_delete,
 };
 
 static struct super_block *ramfs_read_super(struct super_block * sb, void * data, int silent)
@@ -331,7 +325,7 @@ static struct super_block *ramfs_read_super(struct super_block * sb, void * data
 	return sb;
 }
 
-static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, 0);
+static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER);
 
 static int __init init_ramfs_fs(void)
 {
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index b47e236b0..49d47afa7 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -27,7 +27,7 @@
 /* #define pr_debug printk */
 
 static int
-smb_fsync(struct file *file, struct dentry * dentry)
+smb_fsync(struct file *file, struct dentry * dentry, int datasync)
 {
 #ifdef SMBFS_DEBUG_VERBOSE
 printk("smb_fsync: sync file %s/%s\n", 
diff --git a/fs/super.c b/fs/super.c
index 5b8974e5b..8def1c9c4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -260,7 +260,7 @@ int get_filesystem_list(char * buf)
 	return len;
 }
 
-static struct file_system_type *get_fs_type(const char *name)
+struct file_system_type *get_fs_type(const char *name)
 {
 	struct file_system_type *fs;
 	
@@ -281,14 +281,28 @@ static struct file_system_type *get_fs_type(const char *name)
 
 static LIST_HEAD(vfsmntlist);
 
-static struct vfsmount *add_vfsmnt(struct super_block *sb,
-				struct dentry *mountpoint,
+/**
+ *	add_vfsmnt - add a new mount node
+ *	@nd: location of mountpoint or %NULL if we want a root node
+ *	@root: root of (sub)tree to be mounted
+ *	@dev_name: device name to show in /proc/mounts
+ *
+ *	This is VFS idea of mount. New node is allocated, bound to a tree
+ *	we are mounting and optionally (OK, usually) registered as mounted
+ *	on a given mountpoint. Returns a pointer to new node or %NULL in
+ *	case of failure.
+ *
+ *	Potential reason for failure (aside of trivial lack of memory) is a
+ *	deleted mountpoint. Caller must hold ->i_zombie on mountpoint
+ *	dentry (if any).
+ */
+
+static struct vfsmount *add_vfsmnt(struct nameidata *nd,
 				struct dentry *root,
-				struct vfsmount *parent,
-				const char *dev_name,
-				const char *dir_name)
+				const char *dev_name)
 {
 	struct vfsmount *mnt;
+	struct super_block *sb = root->d_inode->i_sb;
 	char *name;
 
 	mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
@@ -296,13 +310,7 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb,
 		goto out;
 	memset(mnt, 0, sizeof(struct vfsmount));
 
-	atomic_set(&mnt->mnt_count,1);
-	mnt->mnt_sb = sb;
-	mnt->mnt_mountpoint = dget(mountpoint);
-	mnt->mnt_root = dget(root);
-	mnt->mnt_parent = parent ? mntget(parent) : mnt;
-
-	/* N.B. Is it really OK to have a vfsmount without names? */
+	/* It may be NULL, but who cares? */
 	if (dev_name) {
 		name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
 		if (name) {
@@ -310,51 +318,53 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb,
 			mnt->mnt_devname = name;
 		}
 	}
-	name = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
-	if (name) {
-		strcpy(name, dir_name);
-		mnt->mnt_dirname = name;
-	}
 	mnt->mnt_owner = current->uid;
+	atomic_set(&mnt->mnt_count,1);
+	mnt->mnt_sb = sb;
 
-	if (parent)
-		list_add(&mnt->mnt_child, &parent->mnt_mounts);
-	else
+	if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
+		goto fail;
+	mnt->mnt_root = dget(root);
+	mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);
+	mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;
+
+	if (nd) {
+		list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+		list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);
+	} else {
 		INIT_LIST_HEAD(&mnt->mnt_child);
+		INIT_LIST_HEAD(&mnt->mnt_clash);
+	}
 	INIT_LIST_HEAD(&mnt->mnt_mounts);
 	list_add(&mnt->mnt_instances, &sb->s_mounts);
-	list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
 	list_add(&mnt->mnt_list, vfsmntlist.prev);
 out:
 	return mnt;
+fail:
+	kfree(mnt->mnt_devname);
+	kfree(mnt);
+	return NULL;
 }
 
 static void move_vfsmnt(struct vfsmount *mnt,
 			struct dentry *mountpoint,
 			struct vfsmount *parent,
-			const char *dev_name,
-			const char *dir_name)
+			const char *dev_name)
 {
-	struct dentry *old_mountpoint = mnt->mnt_mountpoint;
-	struct vfsmount *old_parent = mnt->mnt_parent;
-	char *new_devname = NULL, *new_dirname = NULL;
+	struct dentry *old_mountpoint;
+	struct vfsmount *old_parent;
+	char *new_devname = NULL;
 
 	if (dev_name) {
 		new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
 		if (new_devname)
 			strcpy(new_devname, dev_name);
 	}
-	if (dir_name) {
-		new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
-		if (new_dirname)
-			strcpy(new_dirname, dir_name);
-	}
+
+	old_mountpoint = mnt->mnt_mountpoint;
+	old_parent = mnt->mnt_parent;
 
 	/* flip names */
-	if (new_dirname) {
-		kfree(mnt->mnt_dirname);
-		mnt->mnt_dirname = new_dirname;
-	}
 	if (new_devname) {
 		kfree(mnt->mnt_devname);
 		mnt->mnt_devname = new_devname;
@@ -365,11 +375,13 @@ static void move_vfsmnt(struct vfsmount *mnt,
 	mnt->mnt_parent = parent ? mntget(parent) : mnt;
 	list_del(&mnt->mnt_clash);
 	list_del(&mnt->mnt_child);
-	list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
-	if (parent)
+	if (parent) {
 		list_add(&mnt->mnt_child, &parent->mnt_mounts);
-	else
+		list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
+	} else {
 		INIT_LIST_HEAD(&mnt->mnt_child);
+		INIT_LIST_HEAD(&mnt->mnt_clash);
+	}
 
 	/* put the old stuff */
 	dput(old_mountpoint);
@@ -391,7 +403,6 @@ static void remove_vfsmnt(struct vfsmount *mnt)
 	dput(mnt->mnt_mountpoint);
 	dput(mnt->mnt_root);
 	kfree(mnt->mnt_devname);
-	kfree(mnt->mnt_dirname);
 	kfree(mnt);
 }
 
@@ -738,10 +749,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	/* Done with lookups, semaphore down */
 	down(&mount_sem);
 	dev = to_kdev_t(bdev->bd_dev);
-	check_disk_change(dev);
-	error = -EACCES;
-	if (!(flags & MS_RDONLY) && is_read_only(dev))
-		goto out;
 	sb = get_super(dev);
 	if (sb) {
 		if (fs_type == sb->s_type) {
@@ -755,6 +762,10 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 		error = blkdev_get(bdev, mode, 0, BDEV_FS);
 		if (error)
 			goto out;
+		check_disk_change(dev);
+		error = -EACCES;
+		if (!(flags & MS_RDONLY) && is_read_only(dev))
+			goto out1;
 		error = -EINVAL;
 		sb = read_super(dev, bdev, fs_type, flags, data, 0);
 		if (sb) {
@@ -762,6 +773,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 			path_release(&nd);
 			return sb;
 		}
+out1:
 		blkdev_put(bdev, BDEV_FS);
 	}
 out:
@@ -812,8 +824,14 @@ static struct block_device *kill_super(struct super_block *sb, int umount_root)
 {
 	struct block_device *bdev;
 	kdev_t dev;
-	dput(sb->s_root);
+	struct dentry *root = sb->s_root;
 	sb->s_root = NULL;
+	/* Need to clean after the sucker */
+	if (sb->s_type->fs_flags & FS_LITTER)
+		d_genocide(root);
+	if (sb->s_type->fs_flags & (FS_SINGLE|FS_LITTER))
+		shrink_dcache_parent(root);
+	dput(root);
 	lock_super(sb);
 	if (sb->s_op) {
 		if (sb->s_op->write_super && sb->s_dirt)
@@ -895,7 +913,7 @@ struct vfsmount *kern_mount(struct file_system_type *type)
 		put_unnamed_dev(dev);
 		return ERR_PTR(-EINVAL);
 	}
-	mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name);
+	mnt = add_vfsmnt(NULL, sb->s_root, "none");
 	if (!mnt) {
 		kill_super(sb, 0);
 		return ERR_PTR(-ENOMEM);
@@ -909,10 +927,7 @@ struct vfsmount *kern_mount(struct file_system_type *type)
 void kern_umount(struct vfsmount *mnt)
 {
 	struct super_block *sb = mnt->mnt_sb;
-	struct dentry *root = sb->s_root;
 	remove_vfsmnt(mnt);
-	dput(root);
-	sb->s_root = NULL;
 	kill_super(sb, 0);
 }
 
@@ -932,6 +947,16 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
 {
 	struct super_block * sb = mnt->mnt_sb;
 
+	/*
+	 * No sense to grab the lock for this test, but test itself looks
+	 * somewhat bogus. Suggestions for better replacement?
+	 * Ho-hum... In principle, we might treat that as umount + switch
+	 * to rootfs. GC would eventually take care of the old vfsmount.
+	 * The problem being: we have to implement rootfs and GC for that ;-)
+	 * Actually it makes sense, especially if rootfs would contain a
+	 * /reboot - static binary that would close all descriptors and
+	 * call reboot(9). Then init(8) could umount root and exec /reboot.
+	 */
 	if (mnt == current->fs->rootmnt && !umount_root) {
 		int retval = 0;
 		/*
@@ -952,6 +977,7 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
 	if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {
 		if (sb->s_type->fs_flags & FS_SINGLE)
 			put_filesystem(sb->s_type);
+		/* We hold two references, so mntput() is safe */
 		mntput(mnt);
 		remove_vfsmnt(mnt);
 		return 0;
@@ -988,14 +1014,14 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
 	shrink_dcache_sb(sb);
 	fsync_dev(sb->s_dev);
 
-	/* Something might grab it again - redo checks */
-
-	if (atomic_read(&mnt->mnt_count) > 2) {
+	if (sb->s_root->d_inode->i_state) {
 		mntput(mnt);
 		return -EBUSY;
 	}
 
-	if (sb->s_root->d_inode->i_state) {
+	/* Something might grab it again - redo checks */
+
+	if (atomic_read(&mnt->mnt_count) > 2) {
 		mntput(mnt);
 		return -EBUSY;
 	}
@@ -1067,6 +1093,8 @@ static int mount_is_safe(struct nameidata *nd)
 {
 	if (capable(CAP_SYS_ADMIN))
 		return 0;
+	return -EPERM;
+#ifdef notyet
 	if (S_ISLNK(nd->dentry->d_inode->i_mode))
 		return -EPERM;
 	if (nd->dentry->d_inode->i_mode & S_ISVTX) {
@@ -1076,6 +1104,7 @@ static int mount_is_safe(struct nameidata *nd)
 	if (permission(nd->dentry->d_inode, MAY_WRITE))
 		return -EPERM;
 	return 0;
+#endif
 }
 
 /*
@@ -1102,22 +1131,22 @@ static int do_loopback(char *old_name, char *new_name)
 	if (S_ISDIR(new_nd.dentry->d_inode->i_mode) !=
 	      S_ISDIR(old_nd.dentry->d_inode->i_mode))
 		goto out2;
-		
-	down(&mount_sem);
-	err = -ENOENT;
-	if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
-		goto out3;
-	if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
-		goto out3;
-	/* there we go */
+
 	err = -ENOMEM;
 	if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
 		get_filesystem(old_nd.mnt->mnt_sb->s_type);
-	if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry,
-	               new_nd.mnt, old_nd.mnt->mnt_devname, new_name))
+		
+	down(&mount_sem);
+	/* there we go */
+	down(&new_nd.dentry->d_inode->i_zombie);
+	if (IS_DEADDIR(new_nd.dentry->d_inode))
+		err = -ENOENT;
+	else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname))
 		err = 0;
-out3:
+	up(&new_nd.dentry->d_inode->i_zombie);
 	up(&mount_sem);
+	if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
+		put_filesystem(old_nd.mnt->mnt_sb->s_type);
 out2:
 	path_release(&new_nd);
 out1:
@@ -1215,7 +1244,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 {
 	struct file_system_type * fstype;
 	struct nameidata nd;
-	struct vfsmount *mnt;
+	struct vfsmount *mnt = NULL;
 	struct super_block *sb;
 	int retval = 0;
 	unsigned long flags = 0;
@@ -1224,8 +1253,6 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 
 	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
 		return -EINVAL;
-	if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
-		return -EINVAL;
 	if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
 		return -EINVAL;
 
@@ -1239,6 +1266,11 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 	if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL)
 		flags = new_flags & ~MS_MGC_MSK;
 
+	/* For the rest we need the type */
+
+	if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
+		return -EINVAL;
+
 	/* loopback mount? This is special - requires fewer capabilities */
 	if (strcmp(type_page, "bind")==0)
 		return do_loopback(dev_name, dir_name);
@@ -1272,16 +1304,18 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 	if (IS_ERR(sb))
 		goto dput_out;
 
-	retval = -ENOENT;
-	if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry))
-		goto fail;
-
 	/* Something was mounted here while we slept */
 	while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
 		;
-
-	retval = -ENOMEM;
-	mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name);
+	retval = -ENOENT;
+	if (!nd.dentry->d_inode)
+		goto fail;
+	down(&nd.dentry->d_inode->i_zombie);
+	if (!IS_DEADDIR(nd.dentry->d_inode)) {
+		retval = -ENOMEM;
+		mnt = add_vfsmnt(&nd, sb->s_root, dev_name);
+	}
+	up(&nd.dentry->d_inode->i_zombie);
 	if (!mnt)
 		goto fail;
 	retval = 0;
@@ -1312,15 +1346,6 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
 	if (retval < 0)
 		return retval;
 
-	/* copy_mount_options allows a NULL user pointer,
-	 * and just returns zero in that case.  But if we
-	 * allow the type to be NULL we will crash.
-	 * Previously we did not check this case.
-	 */
-	if (type_page == 0)
-		return -EINVAL;
-
-	lock_kernel();
 	dir_page = getname(dir_name);
 	retval = PTR_ERR(dir_page);
 	if (IS_ERR(dir_page))
@@ -1331,8 +1356,10 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
 		goto out2;
 	retval = copy_mount_options (data, &data_page);
 	if (retval >= 0) {
+		lock_kernel();
 		retval = do_mount((char*)dev_page,dir_page,(char*)type_page,
 				      new_flags, (void*)data_page);
+		unlock_kernel();
 		free_page(data_page);
 	}
 	free_page(dev_page);
@@ -1340,7 +1367,6 @@ out2:
 	putname(dir_page);
 out1:
 	free_page(type_page);
-	unlock_kernel();
 	return retval;
 }
 
@@ -1490,12 +1516,11 @@ mount_it:
 				  path + 5 + path_start, 0,
 				  NULL, NULL);
 		memcpy (path + path_start, "/dev/", 5);
-		vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
-					path + path_start, "/");
+		vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start);
 	}
 	else
-		vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
-					"/dev/root", "/");
+		vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root");
+	/* FIXME: if something will try to umount us right now... */
 	if (vfsmnt) {
 		set_fs_root(current->fs, vfsmnt, sb->s_root);
 		set_fs_pwd(current->fs, vfsmnt, sb->s_root);
@@ -1516,6 +1541,7 @@ static void chroot_fs_refs(struct dentry *old_root,
 
 	read_lock(&tasklist_lock);
 	for_each_task(p) {
+		/* FIXME - unprotected usage of ->fs + (harmless) race */
 		if (!p->fs) continue;
 		if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt)
 			set_fs_root(p->fs, new_rootmnt, new_root);
@@ -1576,7 +1602,10 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
 	root_mnt = mntget(current->fs->rootmnt);
 	root = dget(current->fs->root);
 	down(&mount_sem);
+	down(&old_nd.dentry->d_inode->i_zombie);
 	error = -ENOENT;
+	if (IS_DEADDIR(new_nd.dentry->d_inode))
+		goto out2;
 	if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
 		goto out2;
 	if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
@@ -1599,19 +1628,12 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
 	} else if (!is_subdir(old_nd.dentry, new_nd.dentry))
 		goto out2;
 
-	error = -ENOMEM;
-	name = __getname();
-	if (!name)
-		goto out2;
-
-	move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/");
-	move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL,
-			__d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry,
-				new_nd.mnt, name, PAGE_SIZE));
-	putname(name);
+	move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL);
+	move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL);
 	chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt);
 	error = 0;
 out2:
+	up(&old_nd.dentry->d_inode->i_zombie);
 	up(&mount_sem);
 	dput(root);
 	mntput(root_mnt);
@@ -1629,10 +1651,11 @@ out0:
 int __init change_root(kdev_t new_root_dev,const char *put_old)
 {
 	kdev_t old_root_dev = ROOT_DEV;
-	struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt);
+	struct vfsmount *old_rootmnt;
 	struct nameidata devfs_nd, nd;
 	int error = 0;
 
+	old_rootmnt = mntget(current->fs->rootmnt);
 	/*  First unmount devfs if mounted  */
 	if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd))
 		error = path_walk("/dev", &devfs_nd);
@@ -1675,7 +1698,8 @@ int __init change_root(kdev_t new_root_dev,const char *put_old)
 		printk(KERN_ERR "error %ld\n",blivet);
 		return error;
 	}
-	move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old);
+	/* FIXME: we should hold i_zombie on nd.dentry */
+	move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old");
 	mntput(old_rootmnt);
 	path_release(&nd);
 	return 0;
diff --git a/fs/sysv/fsync.c b/fs/sysv/fsync.c
index 3c9871be6..091605cd1 100644
--- a/fs/sysv/fsync.c
+++ b/fs/sysv/fsync.c
@@ -178,7 +178,7 @@ static int sync_tindirect(struct inode *inode, u32 *tiblockp, int convert,
 	return err;
 }
 
-int sysv_sync_file(struct file * file, struct dentry *dentry)
+int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	int wait, err = 0;
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index d7cc12187..bbd88336c 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -142,7 +142,7 @@ struct inode * sysv_new_inode(const struct inode * dir)
 	/* Change directory entry: */
 	inode->i_mode = 0;		/* for sysv_write_inode() */
 	inode->i_size = 0;		/* ditto */
-	sysv_write_inode(inode);	/* ensure inode not allocated again */
+	sysv_write_inode(inode, 0);	/* ensure inode not allocated again */
 					/* FIXME: caller may call this too. */
 	mark_inode_dirty(inode);	/* cleared by sysv_write_inode() */
 	/* That's it. */
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 455818959..9ac81643b 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -496,7 +496,6 @@ static struct super_block *sysv_read_super(struct super_block *sb,
 	sb->s_blocksize = sb->sv_block_size;
 	sb->s_blocksize_bits = sb->sv_block_size_bits;
 	/* set up enough so that it can read an inode */
-	sb->s_dev = dev;
 	sb->s_op = &sysv_sops;
 	root_inode = iget(sb,SYSV_ROOT_INO);
 	sb->s_root = d_alloc_root(root_inode);
@@ -1154,7 +1153,7 @@ static struct buffer_head * sysv_update_inode(struct inode * inode)
 	return bh;
 }
 
-void sysv_write_inode(struct inode * inode)
+void sysv_write_inode(struct inode * inode, int wait)
 {
 	struct buffer_head *bh;
 	bh = sysv_update_inode(inode);
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
index e7d067e62..bc107046d 100644
--- a/fs/udf/fsync.c
+++ b/fs/udf/fsync.c
@@ -96,7 +96,7 @@ static int sync_all_extents(struct inode * inode, int wait)
  *	even pass file to fsync ?
  */
 
-int udf_sync_file(struct file * file, struct dentry *dentry)
+int udf_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	int wait, err = 0;
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8c38883c0..360c12ba0 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1213,10 +1213,10 @@ udf_convert_permissions(struct FileEntry *fe)
  *	Written, tested, and released.
  */
 
-void udf_write_inode(struct inode * inode)
+void udf_write_inode(struct inode * inode, int sync)
 {
 	lock_kernel();
-	udf_update_inode(inode, 0);
+	udf_update_inode(inode, sync);
 	unlock_kernel();
 }
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5f76abbb0..f3f575d7e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1419,7 +1419,6 @@ udf_read_super(struct super_block *sb, void *options, int silent)
 	return sb;
 
 error_out:
-	sb->s_dev = NODEV;
 	if (UDF_SB_VAT(sb))
 		iput(UDF_SB_VAT(sb));
 	if (!(sb->s_flags & MS_RDONLY))
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 7dd00bc19..22cdd2c43 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -134,7 +134,7 @@ extern struct buffer_head * udf_bread(struct inode *, int, int, int *);
 extern void udf_read_inode(struct inode *);
 extern void udf_put_inode(struct inode *);
 extern void udf_delete_inode(struct inode *);
-extern void udf_write_inode(struct inode *);
+extern void udf_write_inode(struct inode *, int);
 extern long udf_locked_block_map(struct inode *, long);
 extern long udf_block_map(struct inode *, long);
 extern int inode_bmap(struct inode *, int, lb_addr *, Uint32 *, lb_addr *, Uint32 *, Uint32 *, struct buffer_head **);
@@ -184,7 +184,7 @@ extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32);
 extern int udf_new_block(const struct inode *, Uint16, Uint32, int *);
 
 /* fsync.c */
-extern int udf_sync_file(struct file *, struct dentry *);
+extern int udf_sync_file(struct file *, struct dentry *, int);
 
 /* directory.c */
 extern Uint8 * udf_filead_read(struct inode *, Uint8 *, Uint8, lb_addr, int *, int *, struct buffer_head **, int *);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7801add9a..eb1d86d18 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -136,6 +136,7 @@ int ufs_frag_map(struct inode *inode, int frag)
 		       ufs_block_bmap(bread(sb->s_dev, uspi->s_sbbase + i,
 					    sb->s_blocksize),
 				      frag & uspi->s_apbmask, uspi, swab));
+		goto out;
 	}
 	frag -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
 	if (frag < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) {
@@ -744,9 +745,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 	return 0;
 }
 
-void ufs_write_inode (struct inode * inode)
+void ufs_write_inode (struct inode * inode, int wait)
 {
-	ufs_update_inode (inode, 0);
+	ufs_update_inode (inode, wait);
 }
 
 int ufs_sync_inode (struct inode *inode)
diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c
index 14b23467d..8820a49dd 100644
--- a/fs/umsdos/inode.c
+++ b/fs/umsdos/inode.c
@@ -293,11 +293,11 @@ out:
 /*
  * Update the disk with the inode content
  */
-void UMSDOS_write_inode (struct inode *inode)
+void UMSDOS_write_inode (struct inode *inode, int wait)
 {
 	struct iattr newattrs;
 
-	fat_write_inode (inode);
+	fat_write_inode (inode, wait);
 	newattrs.ia_mtime = inode->i_mtime;
 	newattrs.ia_atime = inode->i_atime;
 	newattrs.ia_ctime = inode->i_ctime;
-- 
cgit v1.2.3