summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1994-11-28 11:59:19 +0000
committer <ralf@linux-mips.org>1994-11-28 11:59:19 +0000
commit1513ff9b7899ab588401c89db0e99903dbf5f886 (patch)
treef69cc81a940a502ea23d664c3ffb2d215a479667 /fs
Import of Linus's Linux 1.1.68
Diffstat (limited to 'fs')
-rw-r--r--fs/ChangeLog10
-rw-r--r--fs/Makefile116
-rw-r--r--fs/binfmt_elf.c791
-rw-r--r--fs/block_dev.c313
-rw-r--r--fs/buffer.c1849
-rw-r--r--fs/dcache.c253
-rw-r--r--fs/devices.c276
-rw-r--r--fs/exec.c961
-rw-r--r--fs/ext/Makefile31
-rw-r--r--fs/ext/dir.c131
-rw-r--r--fs/ext/file.c258
-rw-r--r--fs/ext/freelists.c341
-rw-r--r--fs/ext/fsync.c185
-rw-r--r--fs/ext/inode.c444
-rw-r--r--fs/ext/namei.c893
-rw-r--r--fs/ext/symlink.c108
-rw-r--r--fs/ext/truncate.c252
-rw-r--r--fs/ext2/CHANGES140
-rw-r--r--fs/ext2/Makefile31
-rw-r--r--fs/ext2/acl.c50
-rw-r--r--fs/ext2/balloc.c582
-rw-r--r--fs/ext2/bitmap.c25
-rw-r--r--fs/ext2/dir.c227
-rw-r--r--fs/ext2/file.c354
-rw-r--r--fs/ext2/fsync.c198
-rw-r--r--fs/ext2/ialloc.c554
-rw-r--r--fs/ext2/inode.c667
-rw-r--r--fs/ext2/ioctl.c75
-rw-r--r--fs/ext2/namei.c1098
-rw-r--r--fs/ext2/super.c755
-rw-r--r--fs/ext2/symlink.c127
-rw-r--r--fs/ext2/truncate.c349
-rw-r--r--fs/fcntl.c188
-rw-r--r--fs/fifo.c161
-rw-r--r--fs/file_table.c90
-rw-r--r--fs/filesystems.c99
-rw-r--r--fs/hpfs/Makefile30
-rw-r--r--fs/hpfs/README25
-rw-r--r--fs/hpfs/hpfs.h498
-rw-r--r--fs/hpfs/hpfs_fs.c1727
-rw-r--r--fs/inode.c572
-rw-r--r--fs/ioctl.c99
-rw-r--r--fs/isofs/Makefile30
-rw-r--r--fs/isofs/dir.c258
-rw-r--r--fs/isofs/file.c260
-rw-r--r--fs/isofs/inode.c707
-rw-r--r--fs/isofs/namei.c268
-rw-r--r--fs/isofs/rock.c523
-rw-r--r--fs/isofs/rock.h111
-rw-r--r--fs/isofs/symlink.c106
-rw-r--r--fs/isofs/util.c131
-rw-r--r--fs/locks.c506
-rw-r--r--fs/minix/Makefile31
-rw-r--r--fs/minix/bitmap.c208
-rw-r--r--fs/minix/dir.c108
-rw-r--r--fs/minix/file.c246
-rw-r--r--fs/minix/fsync.c159
-rw-r--r--fs/minix/inode.c513
-rw-r--r--fs/minix/namei.c830
-rw-r--r--fs/minix/symlink.c102
-rw-r--r--fs/minix/truncate.c184
-rw-r--r--fs/msdos/Makefile34
-rw-r--r--fs/msdos/dir.c126
-rw-r--r--fs/msdos/fat.c291
-rw-r--r--fs/msdos/file.c315
-rw-r--r--fs/msdos/inode.c494
-rw-r--r--fs/msdos/misc.c515
-rw-r--r--fs/msdos/mmap.c102
-rw-r--r--fs/msdos/namei.c620
-rw-r--r--fs/namei.c849
-rw-r--r--fs/nfs/Makefile31
-rw-r--r--fs/nfs/dir.c609
-rw-r--r--fs/nfs/file.c237
-rw-r--r--fs/nfs/inode.c240
-rw-r--r--fs/nfs/mmap.c103
-rw-r--r--fs/nfs/proc.c931
-rw-r--r--fs/nfs/sock.c242
-rw-r--r--fs/nfs/symlink.c116
-rw-r--r--fs/open.c516
-rw-r--r--fs/pipe.c426
-rw-r--r--fs/proc/Makefile30
-rw-r--r--fs/proc/array.c598
-rw-r--r--fs/proc/base.c155
-rw-r--r--fs/proc/fd.c180
-rw-r--r--fs/proc/inode.c191
-rw-r--r--fs/proc/kmsg.c76
-rw-r--r--fs/proc/link.c195
-rw-r--r--fs/proc/mem.c260
-rw-r--r--fs/proc/net.c300
-rw-r--r--fs/proc/root.c184
-rw-r--r--fs/read_write.c171
-rw-r--r--fs/select.c258
-rw-r--r--fs/stat.c207
-rw-r--r--fs/super.c689
-rw-r--r--fs/sysv/INTRO183
-rw-r--r--fs/sysv/Makefile31
-rw-r--r--fs/sysv/README37
-rw-r--r--fs/sysv/balloc.c329
-rw-r--r--fs/sysv/dir.c144
-rw-r--r--fs/sysv/file.c263
-rw-r--r--fs/sysv/fsync.c197
-rw-r--r--fs/sysv/ialloc.c218
-rw-r--r--fs/sysv/inode.c951
-rw-r--r--fs/sysv/mmap.c85
-rw-r--r--fs/sysv/namei.c822
-rw-r--r--fs/sysv/symlink.c110
-rw-r--r--fs/sysv/truncate.c283
-rw-r--r--fs/umsdos/Makefile44
-rw-r--r--fs/umsdos/README84
-rw-r--r--fs/umsdos/check.c55
-rw-r--r--fs/umsdos/dir.c706
-rw-r--r--fs/umsdos/emd.c505
-rw-r--r--fs/umsdos/file.c103
-rw-r--r--fs/umsdos/inode.c513
-rw-r--r--fs/umsdos/ioctl.c259
-rw-r--r--fs/umsdos/mangle.c478
-rw-r--r--fs/umsdos/namei.c1043
-rw-r--r--fs/umsdos/notes17
-rw-r--r--fs/umsdos/rdir.c239
-rw-r--r--fs/umsdos/symlink.c145
-rw-r--r--fs/xiafs/Makefile31
-rw-r--r--fs/xiafs/bitmap.c388
-rw-r--r--fs/xiafs/dir.c135
-rw-r--r--fs/xiafs/file.c252
-rw-r--r--fs/xiafs/fsync.c159
-rw-r--r--fs/xiafs/inode.c502
-rw-r--r--fs/xiafs/namei.c848
-rw-r--r--fs/xiafs/symlink.c118
-rw-r--r--fs/xiafs/truncate.c197
-rw-r--r--fs/xiafs/xiafs_mac.h32
130 files changed, 42011 insertions, 0 deletions
diff --git a/fs/ChangeLog b/fs/ChangeLog
new file mode 100644
index 000000000..056d07a86
--- /dev/null
+++ b/fs/ChangeLog
@@ -0,0 +1,10 @@
+Mon Oct 24 23:27:42 1994 Theodore Y. Ts'o (tytso@rt-11)
+
+ * fcntl.c (sys_fcntl): Liberalize security checks which Alan Cox
+ put in.
+
+Thu Oct 20 23:44:22 1994 Theodore Y. Ts'o (tytso@rt-11)
+
+ * fcntl.c (sys_fcntl): Add more of a security check to the
+ F_SETOWN fcntl().
+
diff --git a/fs/Makefile b/fs/Makefile
new file mode 100644
index 000000000..78dd720ce
--- /dev/null
+++ b/fs/Makefile
@@ -0,0 +1,116 @@
+#
+# Makefile for the linux filesystem.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+SUBDIRS = minix ext ext2 msdos proc isofs nfs xiafs umsdos hpfs sysv
+
+ifdef CONFIG_MINIX_FS
+FS_SUBDIRS := $(FS_SUBDIRS) minix
+endif
+ifdef CONFIG_EXT_FS
+FS_SUBDIRS := $(FS_SUBDIRS) ext
+endif
+ifdef CONFIG_EXT2_FS
+FS_SUBDIRS := $(FS_SUBDIRS) ext2
+endif
+ifdef CONFIG_MSDOS_FS
+FS_SUBDIRS := $(FS_SUBDIRS) msdos
+else
+MODULE_FS_SUBDIRS := $(MODULE_FS_SUBDIRS) msdos
+endif
+ifdef CONFIG_PROC_FS
+FS_SUBDIRS := $(FS_SUBDIRS) proc
+endif
+ifdef CONFIG_ISO9660_FS
+FS_SUBDIRS := $(FS_SUBDIRS) isofs
+endif
+ifdef CONFIG_NFS_FS
+FS_SUBDIRS := $(FS_SUBDIRS) nfs
+endif
+ifdef CONFIG_XIA_FS
+FS_SUBDIRS := $(FS_SUBDIRS) xiafs
+endif
+ifdef CONFIG_UMSDOS_FS
+FS_SUBDIRS := $(FS_SUBDIRS) umsdos
+else
+MODULE_FS_SUBDIRS := $(MODULE_FS_SUBDIRS) umsdos
+endif
+ifdef CONFIG_SYSV_FS
+FS_SUBDIRS := $(FS_SUBDIRS) sysv
+endif
+ifdef CONFIG_HPFS_FS
+FS_SUBDIRS := $(FS_SUBDIRS) hpfs
+endif
+
+ifdef CONFIG_BINFMT_ELF
+BINFMTS := $(BINFMTS) binfmt_elf.o
+else
+MODULES := $(MODULES) binfmt_elf.o
+endif
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= open.o read_write.o inode.o devices.o file_table.o buffer.o super.o \
+ block_dev.o stat.o exec.o pipe.o namei.o fcntl.o ioctl.o \
+ select.o fifo.o locks.o filesystems.o dcache.o $(BINFMTS)
+
+all: fs.o filesystems.a modules modules_fs
+
+fs.o: $(OBJS)
+ $(LD) -r -o fs.o $(OBJS)
+
+filesystems.a: dummy
+ rm -f filesystems.a
+ set -e; for i in $(FS_SUBDIRS); do \
+ test ! -d $$i || \
+ { $(MAKE) -C $$i; $(AR) rcs filesystems.a $$i/$$i.o; }; done
+
+ifdef MODULES
+
+modules:
+ $(MAKE) CFLAGS="$(CFLAGS) -DMODULE" $(MODULES)
+ (cd ../modules;for i in $(MODULES); do ln -sf ../fs/$$i .; done)
+
+else
+
+modules:
+
+endif
+
+ifdef MODULE_FS_SUBDIRS
+
+modules_fs:
+ set -e; for i in $(MODULE_FS_SUBDIRS); do \
+ test ! -d $$i || \
+ { $(MAKE) -C $$i; }; done
+
+
+else
+
+modules_fs:
+
+endif
+
+depend dep:
+ $(CPP) -M *.c > .depend
+ set -e; for i in $(SUBDIRS); do \
+ test ! -d $$i || $(MAKE) -C $$i dep; done
+
+dummy:
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
new file mode 100644
index 000000000..c2dc5cbca
--- /dev/null
+++ b/fs/binfmt_elf.c
@@ -0,0 +1,791 @@
+/*
+ * linux/fs/binfmt_elf.c
+ *
+ * These are the functions used to load ELF format executables as used
+ * on SVr4 machines. Information on the format may be found in the book
+ * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
+ * Tools".
+ *
+ * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
+ */
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/a.out.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/binfmts.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/malloc.h>
+#include <linux/shm.h>
+#include <linux/personality.h>
+
+#include <asm/segment.h>
+
+#include <linux/config.h>
+
+#ifndef CONFIG_BINFMT_ELF
+#include <linux/module.h>
+#include "../tools/version.h"
+#endif
+
+#include <linux/unistd.h>
+typedef int (*sysfun_p)();
+extern sysfun_p sys_call_table[];
+#define SYS(name) (sys_call_table[__NR_##name])
+
+#define DLINFO_ITEMS 8
+
+#include <linux/elf.h>
+
+static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
+static int load_elf_library(int fd);
+
+struct linux_binfmt elf_format = {
+#ifdef CONFIG_BINFMT_ELF
+ NULL, NULL, load_elf_binary, load_elf_library, NULL
+#else
+ NULL, &mod_use_count_, load_elf_binary, load_elf_library, NULL
+#endif
+};
+
+/* We need to explicitly zero any fractional pages
+ after the data section (i.e. bss). This would
+ contain the junk from the file that should not
+ be in memory */
+
+
+static void padzero(int elf_bss){
+ unsigned int fpnt, nbyte;
+
+ if(elf_bss & 0xfff) {
+
+ nbyte = (PAGE_SIZE - (elf_bss & 0xfff)) & 0xfff;
+ if(nbyte){
+ verify_area(VERIFY_WRITE, (void *) elf_bss, nbyte);
+
+ fpnt = elf_bss;
+ while(fpnt & 0xfff) put_fs_byte(0, fpnt++);
+ };
+ };
+}
+
+unsigned long * create_elf_tables(char * p,int argc,int envc,struct elfhdr * exec, unsigned int load_addr, int ibcs)
+{
+ unsigned long *argv,*envp, *dlinfo;
+ unsigned long * sp;
+ struct vm_area_struct *mpnt;
+
+ mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
+ if (mpnt) {
+ mpnt->vm_task = current;
+ mpnt->vm_start = PAGE_MASK & (unsigned long) p;
+ mpnt->vm_end = TASK_SIZE;
+ mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY;
+#ifdef VM_STACK_FLAGS
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_pte = 0;
+#else
+# ifdef VM_GROWSDOWN
+ mpnt->vm_flags = VM_GROWSDOWN;
+# endif
+#endif
+ mpnt->vm_share = NULL;
+ mpnt->vm_inode = NULL;
+ mpnt->vm_offset = 0;
+ mpnt->vm_ops = NULL;
+ insert_vm_struct(current, mpnt);
+#ifndef VM_GROWSDOWN
+ current->mm->stk_vma = mpnt;
+#endif
+
+ }
+ sp = (unsigned long *) (0xfffffffc & (unsigned long) p);
+ if(exec) sp -= DLINFO_ITEMS*2;
+ dlinfo = sp;
+ sp -= envc+1;
+ envp = sp;
+ sp -= argc+1;
+ argv = sp;
+ if (!ibcs) {
+ put_fs_long((unsigned long)envp,--sp);
+ put_fs_long((unsigned long)argv,--sp);
+ }
+
+ /* The constant numbers (0-9) that we are writing here are
+ described in the header file sys/auxv.h on at least
+ some versions of SVr4 */
+ if(exec) { /* Put this here for an ELF program interpreter */
+ struct elf_phdr * eppnt;
+ eppnt = (struct elf_phdr *) exec->e_phoff;
+ put_fs_long(3,dlinfo++); put_fs_long(load_addr + exec->e_phoff,dlinfo++);
+ put_fs_long(4,dlinfo++); put_fs_long(sizeof(struct elf_phdr),dlinfo++);
+ put_fs_long(5,dlinfo++); put_fs_long(exec->e_phnum,dlinfo++);
+ put_fs_long(9,dlinfo++); put_fs_long((unsigned long) exec->e_entry,dlinfo++);
+ put_fs_long(7,dlinfo++); put_fs_long(SHM_RANGE_START,dlinfo++);
+ put_fs_long(8,dlinfo++); put_fs_long(0,dlinfo++);
+ put_fs_long(6,dlinfo++); put_fs_long(PAGE_SIZE,dlinfo++);
+ put_fs_long(0,dlinfo++); put_fs_long(0,dlinfo++);
+ };
+
+ put_fs_long((unsigned long)argc,--sp);
+ current->mm->arg_start = (unsigned long) p;
+ while (argc-->0) {
+ put_fs_long((unsigned long) p,argv++);
+ while (get_fs_byte(p++)) /* nothing */ ;
+ }
+ put_fs_long(0,argv);
+ current->mm->arg_end = current->mm->env_start = (unsigned long) p;
+ while (envc-->0) {
+ put_fs_long((unsigned long) p,envp++);
+ while (get_fs_byte(p++)) /* nothing */ ;
+ }
+ put_fs_long(0,envp);
+ current->mm->env_end = (unsigned long) p;
+ return sp;
+}
+
+
+/* This is much more generalized than the library routine read function,
+ so we keep this separate. Technically the library read function
+ is only provided so that we can read a.out libraries that have
+ an ELF header */
+
+static unsigned int load_elf_interp(struct elfhdr * interp_elf_ex,
+ struct inode * interpreter_inode)
+{
+ struct file * file;
+ struct elf_phdr *elf_phdata = NULL;
+ struct elf_phdr *eppnt;
+ unsigned int len;
+ unsigned int load_addr;
+ int elf_exec_fileno;
+ int elf_bss;
+ int old_fs, retval;
+ unsigned int last_bss;
+ int error;
+ int i, k;
+
+ elf_bss = 0;
+ last_bss = 0;
+ error = load_addr = 0;
+
+ /* First of all, some simple consistency checks */
+ if((interp_elf_ex->e_type != ET_EXEC &&
+ interp_elf_ex->e_type != ET_DYN) ||
+ (interp_elf_ex->e_machine != EM_386 && interp_elf_ex->e_machine != EM_486) ||
+ (!interpreter_inode->i_op ||
+ !interpreter_inode->i_op->default_file_ops->mmap)){
+ return 0xffffffff;
+ };
+
+ /* Now read in all of the header information */
+
+ if(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > PAGE_SIZE)
+ return 0xffffffff;
+
+ elf_phdata = (struct elf_phdr *)
+ kmalloc(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum, GFP_KERNEL);
+ if(!elf_phdata) return 0xffffffff;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ retval = read_exec(interpreter_inode, interp_elf_ex->e_phoff, (char *) elf_phdata,
+ sizeof(struct elf_phdr) * interp_elf_ex->e_phnum);
+ set_fs(old_fs);
+
+ elf_exec_fileno = open_inode(interpreter_inode, O_RDONLY);
+ if (elf_exec_fileno < 0) return 0xffffffff;
+ file = current->files->fd[elf_exec_fileno];
+
+ eppnt = elf_phdata;
+ for(i=0; i<interp_elf_ex->e_phnum; i++, eppnt++)
+ if(eppnt->p_type == PT_LOAD) {
+ error = do_mmap(file,
+ eppnt->p_vaddr & 0xfffff000,
+ eppnt->p_filesz + (eppnt->p_vaddr & 0xfff),
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_DENYWRITE | (interp_elf_ex->e_type == ET_EXEC ? MAP_FIXED : 0),
+ eppnt->p_offset & 0xfffff000);
+
+ if(!load_addr && interp_elf_ex->e_type == ET_DYN)
+ load_addr = error;
+ k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
+ if(k > elf_bss) elf_bss = k;
+ if(error < 0 && error > -1024) break; /* Real error */
+ k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
+ if(k > last_bss) last_bss = k;
+ }
+
+ /* Now use mmap to map the library into memory. */
+
+
+ SYS(close)(elf_exec_fileno);
+ if(error < 0 && error > -1024) {
+ kfree(elf_phdata);
+ return 0xffffffff;
+ }
+
+ padzero(elf_bss);
+ len = (elf_bss + 0xfff) & 0xfffff000; /* What we have mapped so far */
+
+ /* Map the last of the bss segment */
+ if (last_bss > len)
+ do_mmap(NULL, len, last_bss-len,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ kfree(elf_phdata);
+
+ return ((unsigned int) interp_elf_ex->e_entry) + load_addr;
+}
+
+static unsigned int load_aout_interp(struct exec * interp_ex,
+ struct inode * interpreter_inode)
+{
+ int retval;
+ unsigned int elf_entry;
+
+ current->mm->brk = interp_ex->a_bss +
+ (current->mm->end_data = interp_ex->a_data +
+ (current->mm->end_code = interp_ex->a_text));
+ elf_entry = interp_ex->a_entry;
+
+
+ if (N_MAGIC(*interp_ex) == OMAGIC) {
+ do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ retval = read_exec(interpreter_inode, 32, (char *) 0,
+ interp_ex->a_text+interp_ex->a_data);
+ } else if (N_MAGIC(*interp_ex) == ZMAGIC || N_MAGIC(*interp_ex) == QMAGIC) {
+ do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ retval = read_exec(interpreter_inode,
+ N_TXTOFF(*interp_ex) ,
+ (char *) N_TXTADDR(*interp_ex),
+ interp_ex->a_text+interp_ex->a_data);
+ } else
+ retval = -1;
+
+ if(retval >= 0)
+ do_mmap(NULL, (interp_ex->a_text + interp_ex->a_data + 0xfff) &
+ 0xfffff000, interp_ex->a_bss,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ if(retval < 0) return 0xffffffff;
+ return elf_entry;
+}
+
+/*
+ * These are the functions used to load ELF style executables and shared
+ * libraries. There is no binary dependent code anywhere else.
+ */
+
+#define INTERPRETER_NONE 0
+#define INTERPRETER_AOUT 1
+#define INTERPRETER_ELF 2
+
+
+static int
+load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+{
+ struct elfhdr elf_ex;
+ struct elfhdr interp_elf_ex;
+ struct file * file;
+ struct exec interp_ex;
+ struct inode *interpreter_inode;
+ unsigned int load_addr;
+ unsigned int interpreter_type = INTERPRETER_NONE;
+ unsigned char ibcs2_interpreter;
+ int i;
+ int old_fs;
+ int error;
+ struct elf_phdr * elf_ppnt, *elf_phdata;
+ int elf_exec_fileno;
+ unsigned int elf_bss, k, elf_brk;
+ int retval;
+ char * elf_interpreter;
+ unsigned int elf_entry;
+ int status;
+ unsigned int start_code, end_code, end_data;
+ unsigned int elf_stack;
+ char passed_fileno[6];
+
+#ifndef CONFIG_BINFMT_ELF
+ MOD_INC_USE_COUNT;
+#endif
+
+ ibcs2_interpreter = 0;
+ status = 0;
+ load_addr = 0;
+ elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */
+
+ if (elf_ex.e_ident[0] != 0x7f ||
+ strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) {
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ }
+
+
+ /* First of all, some simple consistency checks */
+ if(elf_ex.e_type != ET_EXEC ||
+ (elf_ex.e_machine != EM_386 && elf_ex.e_machine != EM_486) ||
+ (!bprm->inode->i_op || !bprm->inode->i_op->default_file_ops ||
+ !bprm->inode->i_op->default_file_ops->mmap)){
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ };
+
+ /* Now read in all of the header information */
+
+ elf_phdata = (struct elf_phdr *) kmalloc(elf_ex.e_phentsize *
+ elf_ex.e_phnum, GFP_KERNEL);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ retval = read_exec(bprm->inode, elf_ex.e_phoff, (char *) elf_phdata,
+ elf_ex.e_phentsize * elf_ex.e_phnum);
+ set_fs(old_fs);
+ if (retval < 0) {
+ kfree (elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return retval;
+ }
+
+ elf_ppnt = elf_phdata;
+
+ elf_bss = 0;
+ elf_brk = 0;
+
+ elf_exec_fileno = open_inode(bprm->inode, O_RDONLY);
+
+ if (elf_exec_fileno < 0) {
+ kfree (elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return elf_exec_fileno;
+ }
+
+ file = current->files->fd[elf_exec_fileno];
+
+ elf_stack = 0xffffffff;
+ elf_interpreter = NULL;
+ start_code = 0;
+ end_code = 0;
+ end_data = 0;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+
+ for(i=0;i < elf_ex.e_phnum; i++){
+ if(elf_ppnt->p_type == PT_INTERP) {
+ /* This is the program interpreter used for shared libraries -
+ for now assume that this is an a.out format binary */
+
+ elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
+ GFP_KERNEL);
+
+ retval = read_exec(bprm->inode,elf_ppnt->p_offset,elf_interpreter,
+ elf_ppnt->p_filesz);
+ /* If the program interpreter is one of these two,
+ then assume an iBCS2 image. Otherwise assume
+ a native linux image. */
+ if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
+ strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
+ ibcs2_interpreter = 1;
+#if 0
+ printk("Using ELF interpreter %s\n", elf_interpreter);
+#endif
+ if(retval >= 0)
+ retval = namei(elf_interpreter, &interpreter_inode);
+ if(retval >= 0)
+ retval = read_exec(interpreter_inode,0,bprm->buf,128);
+
+ if(retval >= 0){
+ interp_ex = *((struct exec *) bprm->buf); /* exec-header */
+ interp_elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */
+
+ };
+ if(retval < 0) {
+ kfree (elf_phdata);
+ kfree(elf_interpreter);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return retval;
+ };
+ };
+ elf_ppnt++;
+ };
+
+ set_fs(old_fs);
+
+ /* Some simple consistency checks for the interpreter */
+ if(elf_interpreter){
+ interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
+ if(retval < 0) {
+ kfree(elf_interpreter);
+ kfree(elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ELIBACC;
+ };
+ /* Now figure out which format our binary is */
+ if((N_MAGIC(interp_ex) != OMAGIC) &&
+ (N_MAGIC(interp_ex) != ZMAGIC) &&
+ (N_MAGIC(interp_ex) != QMAGIC))
+ interpreter_type = INTERPRETER_ELF;
+
+ if (interp_elf_ex.e_ident[0] != 0x7f ||
+ strncmp(&interp_elf_ex.e_ident[1], "ELF",3) != 0)
+ interpreter_type &= ~INTERPRETER_ELF;
+
+ if(!interpreter_type)
+ {
+ kfree(elf_interpreter);
+ kfree(elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ELIBBAD;
+ };
+ }
+
+ /* OK, we are done with that, now set up the arg stuff,
+ and then start this sucker up */
+
+ if (!bprm->sh_bang) {
+ char * passed_p;
+
+ if(interpreter_type == INTERPRETER_AOUT) {
+ sprintf(passed_fileno, "%d", elf_exec_fileno);
+ passed_p = passed_fileno;
+
+ if(elf_interpreter) {
+ bprm->p = copy_strings(1,&passed_p,bprm->page,bprm->p,2);
+ bprm->argc++;
+ };
+ };
+ if (!bprm->p) {
+ if(elf_interpreter) {
+ kfree(elf_interpreter);
+ }
+ kfree (elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -E2BIG;
+ }
+ }
+
+ /* OK, This is the point of no return */
+ flush_old_exec(bprm);
+
+ current->mm->end_data = 0;
+ current->mm->end_code = 0;
+ current->mm->start_mmap = ELF_START_MMAP;
+ current->mm->mmap = NULL;
+ elf_entry = (unsigned int) elf_ex.e_entry;
+
+ /* Do this so that we can load the interpreter, if need be. We will
+ change some of these later */
+ current->mm->rss = 0;
+ bprm->p += change_ldt(0, bprm->page);
+ current->mm->start_stack = bprm->p;
+
+ /* Now we do a little grungy work by mmaping the ELF image into
+ the correct location in memory. At this point, we assume that
+ the image should be loaded at fixed address, not at a variable
+ address. */
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+
+ elf_ppnt = elf_phdata;
+ for(i=0;i < elf_ex.e_phnum; i++){
+
+ if(elf_ppnt->p_type == PT_INTERP) {
+ /* Set these up so that we are able to load the interpreter */
+ /* Now load the interpreter into user address space */
+ set_fs(old_fs);
+
+ if(interpreter_type & 1) elf_entry =
+ load_aout_interp(&interp_ex, interpreter_inode);
+
+ if(interpreter_type & 2) elf_entry =
+ load_elf_interp(&interp_elf_ex, interpreter_inode);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+
+ iput(interpreter_inode);
+ kfree(elf_interpreter);
+
+ if(elf_entry == 0xffffffff) {
+ printk("Unable to load interpreter\n");
+ kfree(elf_phdata);
+ send_sig(SIGSEGV, current, 0);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return 0;
+ };
+ };
+
+
+ if(elf_ppnt->p_type == PT_LOAD) {
+ error = do_mmap(file,
+ elf_ppnt->p_vaddr & 0xfffff000,
+ elf_ppnt->p_filesz + (elf_ppnt->p_vaddr & 0xfff),
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+ elf_ppnt->p_offset & 0xfffff000);
+
+#ifdef LOW_ELF_STACK
+ if(elf_ppnt->p_vaddr & 0xfffff000 < elf_stack)
+ elf_stack = elf_ppnt->p_vaddr & 0xfffff000;
+#endif
+
+ if(!load_addr)
+ load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset;
+ k = elf_ppnt->p_vaddr;
+ if(k > start_code) start_code = k;
+ k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
+ if(k > elf_bss) elf_bss = k;
+ if((elf_ppnt->p_flags | PROT_WRITE) && end_code < k)
+ end_code = k;
+ if(end_data < k) end_data = k;
+ k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
+ if(k > elf_brk) elf_brk = k;
+ };
+ elf_ppnt++;
+ };
+ set_fs(old_fs);
+
+ kfree(elf_phdata);
+
+ if(interpreter_type != INTERPRETER_AOUT) SYS(close)(elf_exec_fileno);
+ current->personality = (ibcs2_interpreter ? PER_SVR4 : PER_LINUX);
+
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)--;
+ if (current->binfmt && current->binfmt->use_count)
+ (*current->binfmt->use_count)--;
+ current->exec_domain = lookup_exec_domain(current->personality);
+ current->binfmt = &elf_format;
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)++;
+ if (current->binfmt && current->binfmt->use_count)
+ (*current->binfmt->use_count)++;
+
+#ifndef VM_STACK_FLAGS
+ current->executable = bprm->inode;
+ bprm->inode->i_count++;
+#endif
+#ifdef LOW_ELF_STACK
+ current->start_stack = p = elf_stack - 4;
+#endif
+ bprm->p -= MAX_ARG_PAGES*PAGE_SIZE;
+ bprm->p = (unsigned long)
+ create_elf_tables((char *)bprm->p,
+ bprm->argc,
+ bprm->envc,
+ (interpreter_type == INTERPRETER_ELF ? &elf_ex : NULL),
+ load_addr,
+ (interpreter_type == INTERPRETER_AOUT ? 0 : 1));
+ if(interpreter_type == INTERPRETER_AOUT)
+ current->mm->arg_start += strlen(passed_fileno) + 1;
+ current->mm->start_brk = current->mm->brk = elf_brk;
+ current->mm->end_code = end_code;
+ current->mm->start_code = start_code;
+ current->mm->end_data = end_data;
+ current->mm->start_stack = bprm->p;
+ current->suid = current->euid = current->fsuid = bprm->e_uid;
+ current->sgid = current->egid = current->fsgid = bprm->e_gid;
+
+ /* Calling sys_brk effectively mmaps the pages that we need for the bss and break
+ sections */
+ current->mm->brk = (elf_bss + 0xfff) & 0xfffff000;
+ SYS(brk)((elf_brk + 0xfff) & 0xfffff000);
+
+ padzero(elf_bss);
+
+#if 0
+ printk("(start_brk) %x\n" , current->mm->start_brk);
+ printk("(end_code) %x\n" , current->mm->end_code);
+ printk("(start_code) %x\n" , current->mm->start_code);
+ printk("(end_data) %x\n" , current->mm->end_data);
+ printk("(start_stack) %x\n" , current->mm->start_stack);
+ printk("(brk) %x\n" , current->mm->brk);
+#endif
+
+ /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
+ and some applications "depend" upon this behavior.
+ Since we do not have the power to recompile these, we
+ emulate the SVr4 behavior. Sigh. */
+ error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE, 0);
+
+ regs->eip = elf_entry; /* eip, magic happens :-) */
+ regs->esp = bprm->p; /* stack pointer */
+ if (current->flags & PF_PTRACED)
+ send_sig(SIGTRAP, current, 0);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return 0;
+}
+
+/* This is really simpleminded and specialized - we are loading an
+ a.out library that is given an ELF header. */
+
+static int
+load_elf_library(int fd){
+ struct file * file;
+ struct elfhdr elf_ex;
+ struct elf_phdr *elf_phdata = NULL;
+ struct inode * inode;
+ unsigned int len;
+ int elf_bss;
+ int old_fs, retval;
+ unsigned int bss;
+ int error;
+ int i,j, k;
+
+#ifndef CONFIG_BINFMT_ELF
+ MOD_INC_USE_COUNT;
+#endif
+
+ len = 0;
+ file = current->files->fd[fd];
+ inode = file->f_inode;
+ elf_bss = 0;
+
+ set_fs(KERNEL_DS);
+ if (file->f_op->read(inode, file, (char *) &elf_ex, sizeof(elf_ex)) != sizeof(elf_ex)) {
+ SYS(close)(fd);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -EACCES;
+ }
+ set_fs(USER_DS);
+
+ if (elf_ex.e_ident[0] != 0x7f ||
+ strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) {
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ }
+
+ /* First of all, some simple consistency checks */
+ if(elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
+ (elf_ex.e_machine != EM_386 && elf_ex.e_machine != EM_486) ||
+ (!inode->i_op || !inode->i_op->default_file_ops->mmap)){
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ };
+
+ /* Now read in all of the header information */
+
+ if(sizeof(struct elf_phdr) * elf_ex.e_phnum > PAGE_SIZE) {
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ }
+
+ elf_phdata = (struct elf_phdr *)
+ kmalloc(sizeof(struct elf_phdr) * elf_ex.e_phnum, GFP_KERNEL);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ retval = read_exec(inode, elf_ex.e_phoff, (char *) elf_phdata,
+ sizeof(struct elf_phdr) * elf_ex.e_phnum);
+ set_fs(old_fs);
+
+ j = 0;
+ for(i=0; i<elf_ex.e_phnum; i++)
+ if((elf_phdata + i)->p_type == PT_LOAD) j++;
+
+ if(j != 1) {
+ kfree(elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return -ENOEXEC;
+ };
+
+ while(elf_phdata->p_type != PT_LOAD) elf_phdata++;
+
+ /* Now use mmap to map the library into memory. */
+ error = do_mmap(file,
+ elf_phdata->p_vaddr & 0xfffff000,
+ elf_phdata->p_filesz + (elf_phdata->p_vaddr & 0xfff),
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+ elf_phdata->p_offset & 0xfffff000);
+
+ k = elf_phdata->p_vaddr + elf_phdata->p_filesz;
+ if(k > elf_bss) elf_bss = k;
+
+ SYS(close)(fd);
+ if (error != elf_phdata->p_vaddr & 0xfffff000) {
+ kfree(elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return error;
+ }
+
+ padzero(elf_bss);
+
+ len = (elf_phdata->p_filesz + elf_phdata->p_vaddr+ 0xfff) & 0xfffff000;
+ bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+ if (bss > len)
+ do_mmap(NULL, len, bss-len,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ kfree(elf_phdata);
+#ifndef CONFIG_BINFMT_ELF
+ MOD_DEC_USE_COUNT;
+#endif
+ return 0;
+}
+
+#ifndef CONFIG_BINFMT_ELF
+char kernel_version[] = UTS_RELEASE;
+
+int init_module(void) {
+ /* Install the COFF, ELF and XOUT loaders.
+ * N.B. We *rely* on the table being the right size with the
+ * right number of free slots...
+ */
+ register_binfmt(&elf_format);
+ return 0;
+}
+
+void cleanup_module( void) {
+
+ if (MOD_IN_USE)
+ printk(KERN_INFO "iBCS: module is in use, remove delayed\n");
+
+ /* Remove the COFF and ELF loaders. */
+ unregister_binfmt(&elf_format);
+}
+#endif
diff --git a/fs/block_dev.c b/fs/block_dev.c
new file mode 100644
index 000000000..d19af6fa0
--- /dev/null
+++ b/fs/block_dev.c
@@ -0,0 +1,313 @@
+/*
+ * linux/fs/block_dev.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/locks.h>
+#include <linux/fcntl.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+
+extern int *blk_size[];
+extern int *blksize_size[];
+
+#define NBUF 64
+
+int block_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int blocksize, blocksize_bits, i, j, buffercount,write_error;
+ int block, blocks;
+ loff_t offset;
+ int chars;
+ int written = 0;
+ int cluster_list[8];
+ struct buffer_head * bhlist[NBUF];
+ int blocks_per_cluster;
+ unsigned int size;
+ unsigned int dev;
+ struct buffer_head * bh, *bufferlist[NBUF];
+ register char * p;
+ int excess;
+
+ write_error = buffercount = 0;
+ dev = inode->i_rdev;
+ if ( is_read_only( inode->i_rdev ))
+ return -EPERM;
+ blocksize = BLOCK_SIZE;
+ if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
+ blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
+
+ i = blocksize;
+ blocksize_bits = 0;
+ while(i != 1) {
+ blocksize_bits++;
+ i >>= 1;
+ }
+
+ blocks_per_cluster = PAGE_SIZE / blocksize;
+
+ block = filp->f_pos >> blocksize_bits;
+ offset = filp->f_pos & (blocksize-1);
+
+ if (blk_size[MAJOR(dev)])
+ size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits;
+ else
+ size = INT_MAX;
+ while (count>0) {
+ if (block >= size)
+ return written;
+ chars = blocksize - offset;
+ if (chars > count)
+ chars=count;
+
+#if 0
+ if (chars == blocksize)
+ bh = getblk(dev, block, blocksize);
+ else
+ bh = breada(dev,block,block+1,block+2,-1);
+
+#else
+ for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i;
+ if((block % blocks_per_cluster) == 0)
+ generate_cluster(dev, cluster_list, blocksize);
+ bh = getblk(dev, block, blocksize);
+
+ if (chars != blocksize && !bh->b_uptodate) {
+ if(!filp->f_reada ||
+ !read_ahead[MAJOR(dev)]) {
+ /* We do this to force the read of a single buffer */
+ brelse(bh);
+ bh = bread(dev,block,blocksize);
+ } else {
+ /* Read-ahead before write */
+ blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
+ if (block + blocks > size) blocks = size - block;
+ if (blocks > NBUF) blocks=NBUF;
+ excess = (block + blocks) % blocks_per_cluster;
+ if ( blocks > excess )
+ blocks -= excess;
+ bhlist[0] = bh;
+ for(i=1; i<blocks; i++){
+ if(((i+block) % blocks_per_cluster) == 0) {
+ for(j=0; j<blocks_per_cluster; j++) cluster_list[j] = block+i+j;
+ generate_cluster(dev, cluster_list, blocksize);
+ };
+ bhlist[i] = getblk (dev, block+i, blocksize);
+ if(!bhlist[i]){
+ while(i >= 0) brelse(bhlist[i--]);
+ return written? written: -EIO;
+ };
+ };
+ ll_rw_block(READ, blocks, bhlist);
+ for(i=1; i<blocks; i++) brelse(bhlist[i]);
+ wait_on_buffer(bh);
+
+ };
+ };
+#endif
+ block++;
+ if (!bh)
+ return written?written:-EIO;
+ p = offset + bh->b_data;
+ offset = 0;
+ filp->f_pos += chars;
+ written += chars;
+ count -= chars;
+ memcpy_fromfs(p,buf,chars);
+ p += chars;
+ buf += chars;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ if (filp->f_flags & O_SYNC)
+ bufferlist[buffercount++] = bh;
+ else
+ brelse(bh);
+ if (buffercount == NBUF){
+ ll_rw_block(WRITE, buffercount, bufferlist);
+ for(i=0; i<buffercount; i++){
+ wait_on_buffer(bufferlist[i]);
+ if (!bufferlist[i]->b_uptodate)
+ write_error=1;
+ brelse(bufferlist[i]);
+ }
+ buffercount=0;
+ }
+ if(write_error)
+ break;
+ }
+ if ( buffercount ){
+ ll_rw_block(WRITE, buffercount, bufferlist);
+ for(i=0; i<buffercount; i++){
+ wait_on_buffer(bufferlist[i]);
+ if (!bufferlist[i]->b_uptodate)
+ write_error=1;
+ brelse(bufferlist[i]);
+ }
+ }
+ filp->f_reada = 1;
+ if(write_error)
+ return -EIO;
+ return written;
+}
+
+int block_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ unsigned int block;
+ loff_t offset;
+ int blocksize;
+ int blocksize_bits, i;
+ unsigned int blocks, rblocks, left;
+ int bhrequest, uptodate;
+ int cluster_list[8];
+ int blocks_per_cluster;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * buflist[NBUF];
+ struct buffer_head * bhreq[NBUF];
+ unsigned int chars;
+ loff_t size;
+ unsigned int dev;
+ int read;
+ int excess;
+
+ dev = inode->i_rdev;
+ blocksize = BLOCK_SIZE;
+ if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
+ blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
+ i = blocksize;
+ blocksize_bits = 0;
+ while (i != 1) {
+ blocksize_bits++;
+ i >>= 1;
+ }
+
+ offset = filp->f_pos;
+ if (blk_size[MAJOR(dev)])
+ size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
+ else
+ size = INT_MAX;
+
+ blocks_per_cluster = PAGE_SIZE / blocksize;
+
+ if (offset > size)
+ left = 0;
+ else
+ left = size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = offset >> blocksize_bits;
+ offset &= blocksize-1;
+ size >>= blocksize_bits;
+ rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
+ blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
+ excess = (block + blocks) % blocks_per_cluster;
+ if ( blocks > excess )
+ blocks -= excess;
+ if (rblocks > blocks)
+ blocks = rblocks;
+
+ }
+ if (block + blocks > size)
+ blocks = size - block;
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches. */
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (blocks) {
+ --blocks;
+#if 1
+ if((block % blocks_per_cluster) == 0) {
+ for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i;
+ generate_cluster(dev, cluster_list, blocksize);
+ }
+#endif
+ *bhb = getblk(dev, block++, blocksize);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if (uptodate)
+ break;
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest) {
+ ll_rw_block(READ, bhrequest, bhreq);
+ refill_freelist(blocksize);
+ }
+
+ do { /* Finish off all I/O that has actually completed */
+ if (*bhe) {
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < blocksize - offset)
+ chars = left;
+ else
+ chars = blocksize - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-->0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ return read;
+}
+
+int block_fsync(struct inode *inode, struct file *filp)
+{
+ return fsync_dev (inode->i_rdev);
+}
diff --git a/fs/buffer.c b/fs/buffer.c
new file mode 100644
index 000000000..6416a1f71
--- /dev/null
+++ b/fs/buffer.c
@@ -0,0 +1,1849 @@
+/*
+ * linux/fs/buffer.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'buffer.c' implements the buffer-cache functions. Race-conditions have
+ * been avoided by NEVER letting an interrupt change a buffer (except for the
+ * data, of course), but instead letting the caller do it.
+ */
+
+/*
+ * NOTE! There is one discordant note here: checking floppies for
+ * disk change. This is where it fits best, I think, as it should
+ * invalidate changed floppy-disk-caches.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/errno.h>
+#include <linux/malloc.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+
+#define NR_SIZES 4
+static char buffersize_index[9] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
+static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
+
+#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
+
+static int grow_buffers(int pri, int size);
+static int shrink_specific_buffers(unsigned int priority, int size);
+static int maybe_shrink_lav_buffers(int);
+
+static int nr_hash = 0; /* Size of hash table */
+static struct buffer_head ** hash_table;
+struct buffer_head ** buffer_pages;
+static struct buffer_head * lru_list[NR_LIST] = {NULL, };
+static struct buffer_head * free_list[NR_SIZES] = {NULL, };
+static struct buffer_head * unused_list = NULL;
+static struct wait_queue * buffer_wait = NULL;
+
+int nr_buffers = 0;
+int nr_buffers_type[NR_LIST] = {0,};
+int nr_buffers_size[NR_SIZES] = {0,};
+int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
+int buffer_usage[NR_SIZES] = {0,}; /* Usage counts used to determine load average */
+int buffers_lav[NR_SIZES] = {0,}; /* Load average of buffer usage */
+int nr_free[NR_SIZES] = {0,};
+int buffermem = 0;
+int nr_buffer_heads = 0;
+extern int *blksize_size[];
+
+/* Here is the parameter block for the bdflush process. */
+static void wakeup_bdflush(int);
+
+#define N_PARAM 9
+#define LAV
+
+static union bdflush_param{
+ struct {
+ int nfract; /* Percentage of buffer cache dirty to
+ activate bdflush */
+ int ndirty; /* Maximum number of dirty blocks to write out per
+ wake-cycle */
+ int nrefill; /* Number of clean buffers to try and obtain
+ each time we call refill */
+ int nref_dirt; /* Dirty buffer threshold for activating bdflush
+ when trying to refill buffers. */
+ int clu_nfract; /* Percentage of buffer cache to scan to
+ search for free clusters */
+ int age_buffer; /* Time for normal buffer to age before
+ we flush it */
+ int age_super; /* Time for superblock to age before we
+ flush it */
+ int lav_const; /* Constant used for load average (time
+ constant */
+ int lav_ratio; /* Used to determine how low a lav for a
+ particular size can go before we start to
+ trim back the buffers */
+ } b_un;
+ unsigned int data[N_PARAM];
+} bdf_prm = {{25, 500, 64, 256, 15, 3000, 500, 1884, 2}};
+
+/* The lav constant is set for 1 minute, as long as the update process runs
+ every 5 seconds. If you change the frequency of update, the time
+ constant will also change. */
+
+
+/* These are the min and max parameter values that we will allow to be assigned */
+static int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1};
+static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
+
+/*
+ * Rewrote the wait-routines to use the "new" wait-queue functionality,
+ * and getting rid of the cli-sti pairs. The wait-queue routines still
+ * need cli-sti, but now it's just a couple of 386 instructions or so.
+ *
+ * Note that the real wait_on_buffer() is an inline function that checks
+ * if 'b_wait' is set before calling this, so that the queues aren't set
+ * up unnecessarily.
+ */
+void __wait_on_buffer(struct buffer_head * bh)
+{
+ struct wait_queue wait = { current, NULL };
+
+ bh->b_count++;
+ add_wait_queue(&bh->b_wait, &wait);
+repeat:
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (bh->b_lock) {
+ schedule();
+ goto repeat;
+ }
+ remove_wait_queue(&bh->b_wait, &wait);
+ bh->b_count--;
+ current->state = TASK_RUNNING;
+}
+
+/* Call sync_buffers with wait!=0 to ensure that the call does not
+ return until all buffer writes have completed. Sync() may return
+ before the writes have finished; fsync() may not. */
+
+
+/* Godamity-damn. Some buffers (bitmaps for filesystems)
+ spontaneously dirty themselves without ever brelse being called.
+ We will ultimately want to put these in a separate list, but for
+ now we search all of the lists for dirty buffers */
+
+static int sync_buffers(dev_t dev, int wait)
+{
+ int i, retry, pass = 0, err = 0;
+ int nlist, ncount;
+ struct buffer_head * bh, *next;
+
+ /* One pass for no-wait, three for wait:
+ 0) write out all dirty, unlocked buffers;
+ 1) write out all dirty buffers, waiting if locked;
+ 2) wait for completion by waiting for all buffers to unlock. */
+ repeat:
+ retry = 0;
+ repeat2:
+ ncount = 0;
+ /* We search all lists as a failsafe mechanism, not because we expect
+ there to be dirty buffers on any of the other lists. */
+ for(nlist = 0; nlist < NR_LIST; nlist++)
+ {
+ repeat1:
+ bh = lru_list[nlist];
+ if(!bh) continue;
+ for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
+ if(bh->b_list != nlist) goto repeat1;
+ next = bh->b_next_free;
+ if(!lru_list[nlist]) break;
+ if (dev && bh->b_dev != dev)
+ continue;
+ if (bh->b_lock)
+ {
+ /* Buffer is locked; skip it unless wait is
+ requested AND pass > 0. */
+ if (!wait || !pass) {
+ retry = 1;
+ continue;
+ }
+ wait_on_buffer (bh);
+ goto repeat2;
+ }
+ /* If an unlocked buffer is not uptodate, there has
+ been an IO error. Skip it. */
+ if (wait && bh->b_req && !bh->b_lock &&
+ !bh->b_dirt && !bh->b_uptodate) {
+ err = 1;
+ printk("Weird - unlocked, clean and not uptodate buffer on list %d %x %lu\n", nlist, bh->b_dev, bh->b_blocknr);
+ continue;
+ }
+ /* Don't write clean buffers. Don't write ANY buffers
+ on the third pass. */
+ if (!bh->b_dirt || pass>=2)
+ continue;
+ /* don't bother about locked buffers */
+ if (bh->b_lock)
+ continue;
+ bh->b_count++;
+ bh->b_flushtime = 0;
+ ll_rw_block(WRITE, 1, &bh);
+
+ if(nlist != BUF_DIRTY) {
+ printk("[%d %x %ld] ", nlist, bh->b_dev, bh->b_blocknr);
+ ncount++;
+ };
+ bh->b_count--;
+ retry = 1;
+ }
+ }
+ if (ncount) printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
+
+ /* If we are waiting for the sync to succeed, and if any dirty
+ blocks were written, then repeat; on the second pass, only
+ wait for buffers being written (do not pass to write any
+ more buffers on the second pass). */
+ if (wait && retry && ++pass<=2)
+ goto repeat;
+ return err;
+}
+
+void sync_dev(dev_t dev)
+{
+ sync_buffers(dev, 0);
+ sync_supers(dev);
+ sync_inodes(dev);
+ sync_buffers(dev, 0);
+}
+
+int fsync_dev(dev_t dev)
+{
+ sync_buffers(dev, 0);
+ sync_supers(dev);
+ sync_inodes(dev);
+ return sync_buffers(dev, 1);
+}
+
+asmlinkage int sys_sync(void)
+{
+ sync_dev(0);
+ return 0;
+}
+
+int file_fsync (struct inode *inode, struct file *filp)
+{
+ return fsync_dev(inode->i_dev);
+}
+
+asmlinkage int sys_fsync(unsigned int fd)
+{
+ struct file * file;
+ struct inode * inode;
+
+ if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
+ return -EBADF;
+ if (!file->f_op || !file->f_op->fsync)
+ return -EINVAL;
+ if (file->f_op->fsync(inode,file))
+ return -EIO;
+ return 0;
+}
+
+void invalidate_buffers(dev_t dev)
+{
+ int i;
+ int nlist;
+ struct buffer_head * bh;
+
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ bh = lru_list[nlist];
+ for (i = nr_buffers_type[nlist]*2 ; --i > 0 ;
+ bh = bh->b_next_free) {
+ if (bh->b_dev != dev)
+ continue;
+ wait_on_buffer(bh);
+ if (bh->b_dev == dev)
+ bh->b_flushtime = bh->b_uptodate =
+ bh->b_dirt = bh->b_req = 0;
+ }
+ }
+}
+
+#define _hashfn(dev,block) (((unsigned)(dev^block))%nr_hash)
+#define hash(dev,block) hash_table[_hashfn(dev,block)]
+
+static inline void remove_from_hash_queue(struct buffer_head * bh)
+{
+ if (bh->b_next)
+ bh->b_next->b_prev = bh->b_prev;
+ if (bh->b_prev)
+ bh->b_prev->b_next = bh->b_next;
+ if (hash(bh->b_dev,bh->b_blocknr) == bh)
+ hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
+ bh->b_next = bh->b_prev = NULL;
+}
+
+static inline void remove_from_lru_list(struct buffer_head * bh)
+{
+ if (!(bh->b_prev_free) || !(bh->b_next_free))
+ panic("VFS: LRU block list corrupted");
+ if (bh->b_dev == 0xffff) panic("LRU list corrupted");
+ bh->b_prev_free->b_next_free = bh->b_next_free;
+ bh->b_next_free->b_prev_free = bh->b_prev_free;
+
+ if (lru_list[bh->b_list] == bh)
+ lru_list[bh->b_list] = bh->b_next_free;
+ if(lru_list[bh->b_list] == bh)
+ lru_list[bh->b_list] = NULL;
+ bh->b_next_free = bh->b_prev_free = NULL;
+}
+
+static inline void remove_from_free_list(struct buffer_head * bh)
+{
+ int isize = BUFSIZE_INDEX(bh->b_size);
+ if (!(bh->b_prev_free) || !(bh->b_next_free))
+ panic("VFS: Free block list corrupted");
+ if(bh->b_dev != 0xffff) panic("Free list corrupted");
+ if(!free_list[isize])
+ panic("Free list empty");
+ nr_free[isize]--;
+ if(bh->b_next_free == bh)
+ free_list[isize] = NULL;
+ else {
+ bh->b_prev_free->b_next_free = bh->b_next_free;
+ bh->b_next_free->b_prev_free = bh->b_prev_free;
+ if (free_list[isize] == bh)
+ free_list[isize] = bh->b_next_free;
+ };
+ bh->b_next_free = bh->b_prev_free = NULL;
+}
+
+static inline void remove_from_queues(struct buffer_head * bh)
+{
+ if(bh->b_dev == 0xffff) {
+ remove_from_free_list(bh); /* Free list entries should not be
+ in the hash queue */
+ return;
+ };
+ nr_buffers_type[bh->b_list]--;
+ nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
+ remove_from_hash_queue(bh);
+ remove_from_lru_list(bh);
+}
+
+static inline void put_last_lru(struct buffer_head * bh)
+{
+ if (!bh)
+ return;
+ if (bh == lru_list[bh->b_list]) {
+ lru_list[bh->b_list] = bh->b_next_free;
+ return;
+ }
+ if(bh->b_dev == 0xffff) panic("Wrong block for lru list");
+ remove_from_lru_list(bh);
+/* add to back of free list */
+
+ if(!lru_list[bh->b_list]) {
+ lru_list[bh->b_list] = bh;
+ lru_list[bh->b_list]->b_prev_free = bh;
+ };
+
+ bh->b_next_free = lru_list[bh->b_list];
+ bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
+ lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
+ lru_list[bh->b_list]->b_prev_free = bh;
+}
+
+static inline void put_last_free(struct buffer_head * bh)
+{
+ int isize;
+ if (!bh)
+ return;
+
+ isize = BUFSIZE_INDEX(bh->b_size);
+ bh->b_dev = 0xffff; /* So it is obvious we are on the free list */
+/* add to back of free list */
+
+ if(!free_list[isize]) {
+ free_list[isize] = bh;
+ bh->b_prev_free = bh;
+ };
+
+ nr_free[isize]++;
+ bh->b_next_free = free_list[isize];
+ bh->b_prev_free = free_list[isize]->b_prev_free;
+ free_list[isize]->b_prev_free->b_next_free = bh;
+ free_list[isize]->b_prev_free = bh;
+}
+
+static inline void insert_into_queues(struct buffer_head * bh)
+{
+/* put at end of free list */
+
+ if(bh->b_dev == 0xffff) {
+ put_last_free(bh);
+ return;
+ };
+ if(!lru_list[bh->b_list]) {
+ lru_list[bh->b_list] = bh;
+ bh->b_prev_free = bh;
+ };
+ if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
+ bh->b_next_free = lru_list[bh->b_list];
+ bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
+ lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
+ lru_list[bh->b_list]->b_prev_free = bh;
+ nr_buffers_type[bh->b_list]++;
+ nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
+/* put the buffer in new hash-queue if it has a device */
+ bh->b_prev = NULL;
+ bh->b_next = NULL;
+ if (!bh->b_dev)
+ return;
+ bh->b_next = hash(bh->b_dev,bh->b_blocknr);
+ hash(bh->b_dev,bh->b_blocknr) = bh;
+ if (bh->b_next)
+ bh->b_next->b_prev = bh;
+}
+
+static struct buffer_head * find_buffer(dev_t dev, int block, int size)
+{
+ struct buffer_head * tmp;
+
+ for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
+ if (tmp->b_dev==dev && tmp->b_blocknr==block)
+ if (tmp->b_size == size)
+ return tmp;
+ else {
+ printk("VFS: Wrong blocksize on device %d/%d\n",
+ MAJOR(dev), MINOR(dev));
+ return NULL;
+ }
+ return NULL;
+}
+
+/*
+ * Why like this, I hear you say... The reason is race-conditions.
+ * As we don't lock buffers (unless we are reading them, that is),
+ * something might happen to it while we sleep (ie a read-error
+ * will force it bad). This shouldn't really happen currently, but
+ * the code is ready.
+ */
+struct buffer_head * get_hash_table(dev_t dev, int block, int size)
+{
+ struct buffer_head * bh;
+
+ for (;;) {
+ if (!(bh=find_buffer(dev,block,size)))
+ return NULL;
+ bh->b_count++;
+ wait_on_buffer(bh);
+ if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
+ return bh;
+ bh->b_count--;
+ }
+}
+
+void set_blocksize(dev_t dev, int size)
+{
+ int i, nlist;
+ struct buffer_head * bh, *bhnext;
+
+ if (!blksize_size[MAJOR(dev)])
+ return;
+
+ switch(size) {
+ default: panic("Invalid blocksize passed to set_blocksize");
+ case 512: case 1024: case 2048: case 4096:;
+ }
+
+ if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
+ blksize_size[MAJOR(dev)][MINOR(dev)] = size;
+ return;
+ }
+ if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
+ return;
+ sync_buffers(dev, 2);
+ blksize_size[MAJOR(dev)][MINOR(dev)] = size;
+
+ /* We need to be quite careful how we do this - we are moving entries
+ around on the free list, and we can get in a loop if we are not careful.*/
+
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ bh = lru_list[nlist];
+ for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
+ if(!bh) break;
+ bhnext = bh->b_next_free;
+ if (bh->b_dev != dev)
+ continue;
+ if (bh->b_size == size)
+ continue;
+
+ wait_on_buffer(bh);
+ if (bh->b_dev == dev && bh->b_size != size) {
+ bh->b_uptodate = bh->b_dirt = bh->b_req =
+ bh->b_flushtime = 0;
+ };
+ remove_from_hash_queue(bh);
+ }
+ }
+}
+
+#define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
+
+void refill_freelist(int size)
+{
+ struct buffer_head * bh, * tmp;
+ struct buffer_head * candidate[NR_LIST];
+ unsigned int best_time, winner;
+ int isize = BUFSIZE_INDEX(size);
+ int buffers[NR_LIST];
+ int i;
+ int needed;
+
+ /* First see if we even need this. Sometimes it is advantageous
+ to request some blocks in a filesystem that we know that we will
+ be needing ahead of time. */
+
+ if (nr_free[isize] > 100)
+ return;
+
+ /* If there are too many dirty buffers, we wake up the update process
+ now so as to ensure that there are still clean buffers available
+ for user processes to use (and dirty) */
+
+ /* We are going to try and locate this much memory */
+ needed =bdf_prm.b_un.nrefill * size;
+
+ while (nr_free_pages > min_free_pages*2 && needed > 0 &&
+ grow_buffers(GFP_BUFFER, size)) {
+ needed -= PAGE_SIZE;
+ }
+
+ if(needed <= 0) return;
+
+ /* See if there are too many buffers of a different size.
+ If so, victimize them */
+
+ while(maybe_shrink_lav_buffers(size))
+ {
+ if(!grow_buffers(GFP_BUFFER, size)) break;
+ needed -= PAGE_SIZE;
+ if(needed <= 0) return;
+ };
+
+ /* OK, we cannot grow the buffer cache, now try and get some
+ from the lru list */
+
+ /* First set the candidate pointers to usable buffers. This
+ should be quick nearly all of the time. */
+
+repeat0:
+ for(i=0; i<NR_LIST; i++){
+ if(i == BUF_DIRTY || i == BUF_SHARED ||
+ nr_buffers_type[i] == 0) {
+ candidate[i] = NULL;
+ buffers[i] = 0;
+ continue;
+ }
+ buffers[i] = nr_buffers_type[i];
+ for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
+ {
+ if(buffers[i] < 0) panic("Here is the problem");
+ tmp = bh->b_next_free;
+ if (!bh) break;
+
+ if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 ||
+ bh->b_dirt) {
+ refile_buffer(bh);
+ continue;
+ };
+
+ if (bh->b_count || bh->b_size != size)
+ continue;
+
+ /* Buffers are written in the order they are placed
+ on the locked list. If we encounter a locked
+ buffer here, this means that the rest of them
+ are also locked */
+ if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
+ buffers[i] = 0;
+ break;
+ }
+
+ if (BADNESS(bh)) continue;
+ break;
+ };
+ if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
+ else candidate[i] = bh;
+ if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
+ }
+
+ repeat:
+ if(needed <= 0) return;
+
+ /* Now see which candidate wins the election */
+
+ winner = best_time = UINT_MAX;
+ for(i=0; i<NR_LIST; i++){
+ if(!candidate[i]) continue;
+ if(candidate[i]->b_lru_time < best_time){
+ best_time = candidate[i]->b_lru_time;
+ winner = i;
+ }
+ }
+
+ /* If we have a winner, use it, and then get a new candidate from that list */
+ if(winner != UINT_MAX) {
+ i = winner;
+ bh = candidate[i];
+ candidate[i] = bh->b_next_free;
+ if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */
+ if (bh->b_count || bh->b_size != size)
+ panic("Busy buffer in candidate list\n");
+ if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1)
+ panic("Shared buffer in candidate list\n");
+ if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
+
+ if(bh->b_dev == 0xffff) panic("Wrong list");
+ remove_from_queues(bh);
+ bh->b_dev = 0xffff;
+ put_last_free(bh);
+ needed -= bh->b_size;
+ buffers[i]--;
+ if(buffers[i] < 0) panic("Here is the problem");
+
+ if(buffers[i] == 0) candidate[i] = NULL;
+
+ /* Now all we need to do is advance the candidate pointer
+ from the winner list to the next usable buffer */
+ if(candidate[i] && buffers[i] > 0){
+ if(buffers[i] <= 0) panic("Here is another problem");
+ for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
+ if(buffers[i] < 0) panic("Here is the problem");
+ tmp = bh->b_next_free;
+ if (!bh) break;
+
+ if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 ||
+ bh->b_dirt) {
+ refile_buffer(bh);
+ continue;
+ };
+
+ if (bh->b_count || bh->b_size != size)
+ continue;
+
+ /* Buffers are written in the order they are
+ placed on the locked list. If we encounter
+ a locked buffer here, this means that the
+ rest of them are also locked */
+ if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
+ buffers[i] = 0;
+ break;
+ }
+
+ if (BADNESS(bh)) continue;
+ break;
+ };
+ if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
+ else candidate[i] = bh;
+ if(candidate[i] && candidate[i]->b_count)
+ panic("Here is the problem");
+ }
+
+ goto repeat;
+ }
+
+ if(needed <= 0) return;
+
+ /* Too bad, that was not enough. Try a little harder to grow some. */
+
+ if (nr_free_pages > 5) {
+ if (grow_buffers(GFP_BUFFER, size)) {
+ needed -= PAGE_SIZE;
+ goto repeat0;
+ };
+ }
+
+ /* and repeat until we find something good */
+ if (!grow_buffers(GFP_ATOMIC, size))
+ wakeup_bdflush(1);
+ needed -= PAGE_SIZE;
+ goto repeat0;
+}
+
+/*
+ * Ok, this is getblk, and it isn't very clear, again to hinder
+ * race-conditions. Most of the code is seldom used, (ie repeating),
+ * so it should be much more efficient than it looks.
+ *
+ * The algorithm is changed: hopefully better, and an elusive bug removed.
+ *
+ * 14.02.92: changed it to sync dirty buffers a bit: better performance
+ * when the filesystem starts to get full of dirty blocks (I hope).
+ */
+struct buffer_head * getblk(dev_t dev, int block, int size)
+{
+ struct buffer_head * bh;
+ int isize = BUFSIZE_INDEX(size);
+
+ /* Update this for the buffer size lav. */
+ buffer_usage[isize]++;
+
+ /* If there are too many dirty buffers, we wake up the update process
+ now so as to ensure that there are still clean buffers available
+ for user processes to use (and dirty) */
+repeat:
+ bh = get_hash_table(dev, block, size);
+ if (bh) {
+ if (bh->b_uptodate && !bh->b_dirt)
+ put_last_lru(bh);
+ if(!bh->b_dirt) bh->b_flushtime = 0;
+ return bh;
+ }
+
+ while(!free_list[isize]) refill_freelist(size);
+
+ if (find_buffer(dev,block,size))
+ goto repeat;
+
+ bh = free_list[isize];
+ remove_from_free_list(bh);
+
+/* OK, FINALLY we know that this buffer is the only one of it's kind, */
+/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
+ bh->b_count=1;
+ bh->b_dirt=0;
+ bh->b_lock=0;
+ bh->b_uptodate=0;
+ bh->b_flushtime = 0;
+ bh->b_req=0;
+ bh->b_dev=dev;
+ bh->b_blocknr=block;
+ insert_into_queues(bh);
+ return bh;
+}
+
+void set_writetime(struct buffer_head * buf, int flag)
+{
+ int newtime;
+
+ if (buf->b_dirt){
+ /* Move buffer to dirty list if jiffies is clear */
+ newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
+ bdf_prm.b_un.age_buffer);
+ if(!buf->b_flushtime || buf->b_flushtime > newtime)
+ buf->b_flushtime = newtime;
+ } else {
+ buf->b_flushtime = 0;
+ }
+}
+
+
+static char buffer_disposition[] = {BUF_CLEAN, BUF_SHARED, BUF_LOCKED, BUF_SHARED,
+ BUF_DIRTY, BUF_DIRTY, BUF_DIRTY, BUF_DIRTY};
+
+void refile_buffer(struct buffer_head * buf){
+ int i, dispose;
+ i = 0;
+ if(buf->b_dev == 0xffff) panic("Attempt to refile free buffer\n");
+ if(mem_map[MAP_NR((unsigned long) buf->b_data)] != 1) i = 1;
+ if(buf->b_lock) i |= 2;
+ if(buf->b_dirt) i |= 4;
+ dispose = buffer_disposition[i];
+ if(buf->b_list == BUF_SHARED && dispose == BUF_CLEAN)
+ dispose = BUF_UNSHARED;
+ if(dispose == -1) panic("Bad buffer settings (%d)\n", i);
+ if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
+ if(dispose != buf->b_list) {
+ if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
+ buf->b_lru_time = jiffies;
+ if(dispose == BUF_LOCKED &&
+ (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
+ dispose = BUF_LOCKED1;
+ remove_from_queues(buf);
+ buf->b_list = dispose;
+ insert_into_queues(buf);
+ if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] >
+ (nr_buffers - nr_buffers_type[BUF_SHARED]) *
+ bdf_prm.b_un.nfract/100)
+ wakeup_bdflush(0);
+ }
+}
+
+void brelse(struct buffer_head * buf)
+{
+ if (!buf)
+ return;
+ wait_on_buffer(buf);
+
+ /* If dirty, mark the time this buffer should be written back */
+ set_writetime(buf, 0);
+ refile_buffer(buf);
+
+ if (buf->b_count) {
+ if (--buf->b_count)
+ return;
+ wake_up(&buffer_wait);
+ return;
+ }
+ printk("VFS: brelse: Trying to free free buffer\n");
+}
+
+/*
+ * bread() reads a specified block and returns the buffer that contains
+ * it. It returns NULL if the block was unreadable.
+ */
+struct buffer_head * bread(dev_t dev, int block, int size)
+{
+ struct buffer_head * bh;
+
+ if (!(bh = getblk(dev, block, size))) {
+ printk("VFS: bread: READ error on device %d/%d\n",
+ MAJOR(dev), MINOR(dev));
+ return NULL;
+ }
+ if (bh->b_uptodate)
+ return bh;
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+/*
+ * Ok, breada can be used as bread, but additionally to mark other
+ * blocks for reading as well. End the argument list with a negative
+ * number.
+ */
+
+#define NBUF 16
+
+struct buffer_head * breada(dev_t dev, int block, int bufsize,
+ unsigned int pos, unsigned int filesize)
+{
+ struct buffer_head * bhlist[NBUF];
+ unsigned int blocks;
+ struct buffer_head * bh;
+ int index;
+ int i, j;
+
+ if (pos >= filesize)
+ return NULL;
+
+ if (block < 0 || !(bh = getblk(dev,block,bufsize)))
+ return NULL;
+
+ index = BUFSIZE_INDEX(bh->b_size);
+
+ if (bh->b_uptodate)
+ return bh;
+
+ blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
+
+ if (blocks > (read_ahead[MAJOR(dev)] >> index))
+ blocks = read_ahead[MAJOR(dev)] >> index;
+ if (blocks > NBUF)
+ blocks = NBUF;
+
+ bhlist[0] = bh;
+ j = 1;
+ for(i=1; i<blocks; i++) {
+ bh = getblk(dev,block+i,bufsize);
+ if (bh->b_uptodate) {
+ brelse(bh);
+ break;
+ }
+ bhlist[j++] = bh;
+ }
+
+ /* Request the read for these buffers, and then release them */
+ ll_rw_block(READ, j, bhlist);
+
+ for(i=1; i<j; i++)
+ brelse(bhlist[i]);
+
+ /* Wait for this buffer, and then continue on */
+ bh = bhlist[0];
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+/*
+ * See fs/inode.c for the weird use of volatile..
+ */
+static void put_unused_buffer_head(struct buffer_head * bh)
+{
+ struct wait_queue * wait;
+
+ wait = ((volatile struct buffer_head *) bh)->b_wait;
+ memset(bh,0,sizeof(*bh));
+ ((volatile struct buffer_head *) bh)->b_wait = wait;
+ bh->b_next_free = unused_list;
+ unused_list = bh;
+}
+
+static void get_more_buffer_heads(void)
+{
+ int i;
+ struct buffer_head * bh;
+
+ if (unused_list)
+ return;
+
+ if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
+ return;
+
+ for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
+ bh->b_next_free = unused_list; /* only make link */
+ unused_list = bh++;
+ }
+}
+
+static struct buffer_head * get_unused_buffer_head(void)
+{
+ struct buffer_head * bh;
+
+ get_more_buffer_heads();
+ if (!unused_list)
+ return NULL;
+ bh = unused_list;
+ unused_list = bh->b_next_free;
+ bh->b_next_free = NULL;
+ bh->b_data = NULL;
+ bh->b_size = 0;
+ bh->b_req = 0;
+ return bh;
+}
+
+/*
+ * Create the appropriate buffers when given a page for data area and
+ * the size of each buffer.. Use the bh->b_this_page linked list to
+ * follow the buffers created. Return NULL if unable to create more
+ * buffers.
+ */
+static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
+{
+ struct buffer_head *bh, *head;
+ unsigned long offset;
+
+ head = NULL;
+ offset = PAGE_SIZE;
+ while ((offset -= size) < PAGE_SIZE) {
+ bh = get_unused_buffer_head();
+ if (!bh)
+ goto no_grow;
+ bh->b_this_page = head;
+ head = bh;
+ bh->b_data = (char *) (page+offset);
+ bh->b_size = size;
+ bh->b_dev = 0xffff; /* Flag as unused */
+ }
+ return head;
+/*
+ * In case anything failed, we just free everything we got.
+ */
+no_grow:
+ bh = head;
+ while (bh) {
+ head = bh;
+ bh = bh->b_this_page;
+ put_unused_buffer_head(head);
+ }
+ return NULL;
+}
+
+static void read_buffers(struct buffer_head * bh[], int nrbuf)
+{
+ int i;
+ int bhnum = 0;
+ struct buffer_head * bhr[8];
+
+ for (i = 0 ; i < nrbuf ; i++) {
+ if (bh[i] && !bh[i]->b_uptodate)
+ bhr[bhnum++] = bh[i];
+ }
+ if (bhnum)
+ ll_rw_block(READ, bhnum, bhr);
+ for (i = 0 ; i < nrbuf ; i++) {
+ if (bh[i]) {
+ wait_on_buffer(bh[i]);
+ }
+ }
+}
+
+static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
+ dev_t dev, int *b, int size)
+{
+ struct buffer_head * bh[8];
+ unsigned long page;
+ unsigned long offset;
+ int block;
+ int nrbuf;
+
+ page = (unsigned long) first->b_data;
+ if (page & ~PAGE_MASK) {
+ brelse(first);
+ return 0;
+ }
+ mem_map[MAP_NR(page)]++;
+ bh[0] = first;
+ nrbuf = 1;
+ for (offset = size ; offset < PAGE_SIZE ; offset += size) {
+ block = *++b;
+ if (!block)
+ goto no_go;
+ first = get_hash_table(dev, block, size);
+ if (!first)
+ goto no_go;
+ bh[nrbuf++] = first;
+ if (page+offset != (unsigned long) first->b_data)
+ goto no_go;
+ }
+ read_buffers(bh,nrbuf); /* make sure they are actually read correctly */
+ while (nrbuf-- > 0)
+ brelse(bh[nrbuf]);
+ free_page(address);
+ ++current->mm->min_flt;
+ return page;
+no_go:
+ while (nrbuf-- > 0)
+ brelse(bh[nrbuf]);
+ free_page(page);
+ return 0;
+}
+
+static unsigned long try_to_load_aligned(unsigned long address,
+ dev_t dev, int b[], int size)
+{
+ struct buffer_head * bh, * tmp, * arr[8];
+ unsigned long offset;
+ int isize = BUFSIZE_INDEX(size);
+ int * p;
+ int block;
+
+ bh = create_buffers(address, size);
+ if (!bh)
+ return 0;
+ /* do any of the buffers already exist? punt if so.. */
+ p = b;
+ for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
+ block = *(p++);
+ if (!block)
+ goto not_aligned;
+ if (find_buffer(dev, block, size))
+ goto not_aligned;
+ }
+ tmp = bh;
+ p = b;
+ block = 0;
+ while (1) {
+ arr[block++] = bh;
+ bh->b_count = 1;
+ bh->b_dirt = 0;
+ bh->b_flushtime = 0;
+ bh->b_uptodate = 0;
+ bh->b_req = 0;
+ bh->b_dev = dev;
+ bh->b_blocknr = *(p++);
+ bh->b_list = BUF_CLEAN;
+ nr_buffers++;
+ nr_buffers_size[isize]++;
+ insert_into_queues(bh);
+ if (bh->b_this_page)
+ bh = bh->b_this_page;
+ else
+ break;
+ }
+ buffermem += PAGE_SIZE;
+ bh->b_this_page = tmp;
+ mem_map[MAP_NR(address)]++;
+ buffer_pages[MAP_NR(address)] = bh;
+ read_buffers(arr,block);
+ while (block-- > 0)
+ brelse(arr[block]);
+ ++current->mm->maj_flt;
+ return address;
+not_aligned:
+ while ((tmp = bh) != NULL) {
+ bh = bh->b_this_page;
+ put_unused_buffer_head(tmp);
+ }
+ return 0;
+}
+
+/*
+ * Try-to-share-buffers tries to minimize memory use by trying to keep
+ * both code pages and the buffer area in the same page. This is done by
+ * (a) checking if the buffers are already aligned correctly in memory and
+ * (b) if none of the buffer heads are in memory at all, trying to load
+ * them into memory the way we want them.
+ *
+ * This doesn't guarantee that the memory is shared, but should under most
+ * circumstances work very well indeed (ie >90% sharing of code pages on
+ * demand-loadable executables).
+ */
+static inline unsigned long try_to_share_buffers(unsigned long address,
+ dev_t dev, int *b, int size)
+{
+ struct buffer_head * bh;
+ int block;
+
+ block = b[0];
+ if (!block)
+ return 0;
+ bh = get_hash_table(dev, block, size);
+ if (bh)
+ return check_aligned(bh, address, dev, b, size);
+ return try_to_load_aligned(address, dev, b, size);
+}
+
+/*
+ * bread_page reads four buffers into memory at the desired address. It's
+ * a function of its own, as there is some speed to be got by reading them
+ * all at the same time, not waiting for one to be read, and then another
+ * etc. This also allows us to optimize memory usage by sharing code pages
+ * and filesystem buffers..
+ */
+unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int no_share)
+{
+ struct buffer_head * bh[8];
+ unsigned long where;
+ int i, j;
+
+ if (!no_share) {
+ where = try_to_share_buffers(address, dev, b, size);
+ if (where)
+ return where;
+ }
+ ++current->mm->maj_flt;
+ for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
+ bh[i] = NULL;
+ if (b[i])
+ bh[i] = getblk(dev, b[i], size);
+ }
+ read_buffers(bh,i);
+ where = address;
+ for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
+ if (bh[i]) {
+ if (bh[i]->b_uptodate)
+ memcpy((void *) where, bh[i]->b_data, size);
+ brelse(bh[i]);
+ }
+ }
+ return address;
+}
+
+/*
+ * Try to increase the number of buffers available: the size argument
+ * is used to determine what kind of buffers we want.
+ */
+static int grow_buffers(int pri, int size)
+{
+ unsigned long page;
+ struct buffer_head *bh, *tmp;
+ struct buffer_head * insert_point;
+ int isize;
+
+ if ((size & 511) || (size > PAGE_SIZE)) {
+ printk("VFS: grow_buffers: size = %d\n",size);
+ return 0;
+ }
+
+ isize = BUFSIZE_INDEX(size);
+
+ if (!(page = __get_free_page(pri)))
+ return 0;
+ bh = create_buffers(page, size);
+ if (!bh) {
+ free_page(page);
+ return 0;
+ }
+
+ insert_point = free_list[isize];
+
+ tmp = bh;
+ while (1) {
+ nr_free[isize]++;
+ if (insert_point) {
+ tmp->b_next_free = insert_point->b_next_free;
+ tmp->b_prev_free = insert_point;
+ insert_point->b_next_free->b_prev_free = tmp;
+ insert_point->b_next_free = tmp;
+ } else {
+ tmp->b_prev_free = tmp;
+ tmp->b_next_free = tmp;
+ }
+ insert_point = tmp;
+ ++nr_buffers;
+ if (tmp->b_this_page)
+ tmp = tmp->b_this_page;
+ else
+ break;
+ }
+ free_list[isize] = bh;
+ buffer_pages[MAP_NR(page)] = bh;
+ tmp->b_this_page = bh;
+ wake_up(&buffer_wait);
+ buffermem += PAGE_SIZE;
+ return 1;
+}
+
+/*
+ * try_to_free() checks if all the buffers on this particular page
+ * are unused, and free's the page if so.
+ */
+static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
+{
+ unsigned long page;
+ struct buffer_head * tmp, * p;
+ int isize = BUFSIZE_INDEX(bh->b_size);
+
+ *bhp = bh;
+ page = (unsigned long) bh->b_data;
+ page &= PAGE_MASK;
+ tmp = bh;
+ do {
+ if (!tmp)
+ return 0;
+ if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
+ return 0;
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+ tmp = bh;
+ do {
+ p = tmp;
+ tmp = tmp->b_this_page;
+ nr_buffers--;
+ nr_buffers_size[isize]--;
+ if (p == *bhp)
+ {
+ *bhp = p->b_prev_free;
+ if (p == *bhp) /* Was this the last in the list? */
+ *bhp = NULL;
+ }
+ remove_from_queues(p);
+ put_unused_buffer_head(p);
+ } while (tmp != bh);
+ buffermem -= PAGE_SIZE;
+ buffer_pages[MAP_NR(page)] = NULL;
+ free_page(page);
+ return !mem_map[MAP_NR(page)];
+}
+
+
+/*
+ * Consult the load average for buffers and decide whether or not
+ * we should shrink the buffers of one size or not. If we decide yes,
+ * do it and return 1. Else return 0. Do not attempt to shrink size
+ * that is specified.
+ *
+ * I would prefer not to use a load average, but the way things are now it
+ * seems unavoidable. The way to get rid of it would be to force clustering
+ * universally, so that when we reclaim buffers we always reclaim an entire
+ * page. Doing this would mean that we all need to move towards QMAGIC.
+ */
+
+static int maybe_shrink_lav_buffers(int size)
+{
+ int nlist;
+ int isize;
+ int total_lav, total_n_buffers, n_sizes;
+
+ /* Do not consider the shared buffers since they would not tend
+ to have getblk called very often, and this would throw off
+ the lav. They are not easily reclaimable anyway (let the swapper
+ make the first move). */
+
+ total_lav = total_n_buffers = n_sizes = 0;
+ for(nlist = 0; nlist < NR_SIZES; nlist++)
+ {
+ total_lav += buffers_lav[nlist];
+ if(nr_buffers_size[nlist]) n_sizes++;
+ total_n_buffers += nr_buffers_size[nlist];
+ total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED];
+ }
+
+ /* See if we have an excessive number of buffers of a particular
+ size - if so, victimize that bunch. */
+
+ isize = (size ? BUFSIZE_INDEX(size) : -1);
+
+ if (n_sizes > 1)
+ for(nlist = 0; nlist < NR_SIZES; nlist++)
+ {
+ if(nlist == isize) continue;
+ if(nr_buffers_size[nlist] &&
+ bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers <
+ total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
+ if(shrink_specific_buffers(6, bufferindex_size[nlist]))
+ return 1;
+ }
+ return 0;
+}
+/*
+ * Try to free up some pages by shrinking the buffer-cache
+ *
+ * Priority tells the routine how hard to try to shrink the
+ * buffers: 3 means "don't bother too much", while a value
+ * of 0 means "we'd better get some free pages now".
+ */
+int shrink_buffers(unsigned int priority)
+{
+ if (priority < 2) {
+ sync_buffers(0,0);
+ }
+
+ if(priority == 2) wakeup_bdflush(1);
+
+ if(maybe_shrink_lav_buffers(0)) return 1;
+
+ /* No good candidate size - take any size we can find */
+ return shrink_specific_buffers(priority, 0);
+}
+
+static int shrink_specific_buffers(unsigned int priority, int size)
+{
+ struct buffer_head *bh;
+ int nlist;
+ int i, isize, isize1;
+
+#ifdef DEBUG
+ if(size) printk("Shrinking buffers of size %d\n", size);
+#endif
+ /* First try the free lists, and see if we can get a complete page
+ from here */
+ isize1 = (size ? BUFSIZE_INDEX(size) : -1);
+
+ for(isize = 0; isize<NR_SIZES; isize++){
+ if(isize1 != -1 && isize1 != isize) continue;
+ bh = free_list[isize];
+ if(!bh) continue;
+ for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
+ if (bh->b_count || !bh->b_this_page)
+ continue;
+ if (try_to_free(bh, &bh))
+ return 1;
+ if(!bh) break; /* Some interrupt must have used it after we
+ freed the page. No big deal - keep looking */
+ }
+ }
+
+ /* Not enough in the free lists, now try the lru list */
+
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ repeat1:
+ if(priority > 3 && nlist == BUF_SHARED) continue;
+ bh = lru_list[nlist];
+ if(!bh) continue;
+ i = nr_buffers_type[nlist] >> priority;
+ for ( ; i-- > 0 ; bh = bh->b_next_free) {
+ /* We may have stalled while waiting for I/O to complete. */
+ if(bh->b_list != nlist) goto repeat1;
+ if (bh->b_count || !bh->b_this_page)
+ continue;
+ if(size && bh->b_size != size) continue;
+ if (bh->b_lock)
+ if (priority)
+ continue;
+ else
+ wait_on_buffer(bh);
+ if (bh->b_dirt) {
+ bh->b_count++;
+ bh->b_flushtime = 0;
+ ll_rw_block(WRITEA, 1, &bh);
+ bh->b_count--;
+ continue;
+ }
+ if (try_to_free(bh, &bh))
+ return 1;
+ if(!bh) break;
+ }
+ }
+ return 0;
+}
+
+
+void show_buffers(void)
+{
+ struct buffer_head * bh;
+ int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
+ int shared;
+ int nlist, isize;
+
+ printk("Buffer memory: %6dkB\n",buffermem>>10);
+ printk("Buffer heads: %6d\n",nr_buffer_heads);
+ printk("Buffer blocks: %6d\n",nr_buffers);
+
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ shared = found = locked = dirty = used = lastused = 0;
+ bh = lru_list[nlist];
+ if(!bh) continue;
+ do {
+ found++;
+ if (bh->b_lock)
+ locked++;
+ if (bh->b_dirt)
+ dirty++;
+ if(mem_map[MAP_NR(((unsigned long) bh->b_data))] !=1) shared++;
+ if (bh->b_count)
+ used++, lastused = found;
+ bh = bh->b_next_free;
+ } while (bh != lru_list[nlist]);
+ printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
+ nlist, found, used, lastused, locked, dirty, shared);
+ };
+ printk("Size [LAV] Free Clean Unshar Lck Lck1 Dirty Shared\n");
+ for(isize = 0; isize<NR_SIZES; isize++){
+ printk("%5d [%5d]: %7d ", bufferindex_size[isize],
+ buffers_lav[isize], nr_free[isize]);
+ for(nlist = 0; nlist < NR_LIST; nlist++)
+ printk("%7d ", nr_buffers_st[isize][nlist]);
+ printk("\n");
+ }
+}
+
+/*
+ * try_to_reassign() checks if all the buffers on this particular page
+ * are unused, and reassign to a new cluster them if this is true.
+ */
+static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
+ dev_t dev, unsigned int starting_block)
+{
+ unsigned long page;
+ struct buffer_head * tmp, * p;
+
+ *bhp = bh;
+ page = (unsigned long) bh->b_data;
+ page &= PAGE_MASK;
+ if(mem_map[MAP_NR(page)] != 1) return 0;
+ tmp = bh;
+ do {
+ if (!tmp)
+ return 0;
+
+ if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
+ return 0;
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+ tmp = bh;
+
+ while((unsigned int) tmp->b_data & (PAGE_SIZE - 1))
+ tmp = tmp->b_this_page;
+
+ /* This is the buffer at the head of the page */
+ bh = tmp;
+ do {
+ p = tmp;
+ tmp = tmp->b_this_page;
+ remove_from_queues(p);
+ p->b_dev=dev;
+ p->b_uptodate = 0;
+ p->b_req = 0;
+ p->b_blocknr=starting_block++;
+ insert_into_queues(p);
+ } while (tmp != bh);
+ return 1;
+}
+
+/*
+ * Try to find a free cluster by locating a page where
+ * all of the buffers are unused. We would like this function
+ * to be atomic, so we do not call anything that might cause
+ * the process to sleep. The priority is somewhat similar to
+ * the priority used in shrink_buffers.
+ *
+ * My thinking is that the kernel should end up using whole
+ * pages for the buffer cache as much of the time as possible.
+ * This way the other buffers on a particular page are likely
+ * to be very near each other on the free list, and we will not
+ * be expiring data prematurely. For now we only cannibalize buffers
+ * of the same size to keep the code simpler.
+ */
+static int reassign_cluster(dev_t dev,
+ unsigned int starting_block, int size)
+{
+ struct buffer_head *bh;
+ int isize = BUFSIZE_INDEX(size);
+ int i;
+
+ /* We want to give ourselves a really good shot at generating
+ a cluster, and since we only take buffers from the free
+ list, we "overfill" it a little. */
+
+ while(nr_free[isize] < 32) refill_freelist(size);
+
+ bh = free_list[isize];
+ if(bh)
+ for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
+ if (!bh->b_this_page) continue;
+ if (try_to_reassign(bh, &bh, dev, starting_block))
+ return 4;
+ }
+ return 0;
+}
+
+/* This function tries to generate a new cluster of buffers
+ * from a new page in memory. We should only do this if we have
+ * not expanded the buffer cache to the maximum size that we allow.
+ */
+static unsigned long try_to_generate_cluster(dev_t dev, int block, int size)
+{
+ struct buffer_head * bh, * tmp, * arr[8];
+ int isize = BUFSIZE_INDEX(size);
+ unsigned long offset;
+ unsigned long page;
+ int nblock;
+
+ page = get_free_page(GFP_NOBUFFER);
+ if(!page) return 0;
+
+ bh = create_buffers(page, size);
+ if (!bh) {
+ free_page(page);
+ return 0;
+ };
+ nblock = block;
+ for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
+ if (find_buffer(dev, nblock++, size))
+ goto not_aligned;
+ }
+ tmp = bh;
+ nblock = 0;
+ while (1) {
+ arr[nblock++] = bh;
+ bh->b_count = 1;
+ bh->b_dirt = 0;
+ bh->b_flushtime = 0;
+ bh->b_lock = 0;
+ bh->b_uptodate = 0;
+ bh->b_req = 0;
+ bh->b_dev = dev;
+ bh->b_list = BUF_CLEAN;
+ bh->b_blocknr = block++;
+ nr_buffers++;
+ nr_buffers_size[isize]++;
+ insert_into_queues(bh);
+ if (bh->b_this_page)
+ bh = bh->b_this_page;
+ else
+ break;
+ }
+ buffermem += PAGE_SIZE;
+ buffer_pages[MAP_NR(page)] = bh;
+ bh->b_this_page = tmp;
+ while (nblock-- > 0)
+ brelse(arr[nblock]);
+ return 4;
+not_aligned:
+ while ((tmp = bh) != NULL) {
+ bh = bh->b_this_page;
+ put_unused_buffer_head(tmp);
+ }
+ free_page(page);
+ return 0;
+}
+
+unsigned long generate_cluster(dev_t dev, int b[], int size)
+{
+ int i, offset;
+
+ for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
+ if(i && b[i]-1 != b[i-1]) return 0; /* No need to cluster */
+ if(find_buffer(dev, b[i], size)) return 0;
+ };
+
+ /* OK, we have a candidate for a new cluster */
+
+ /* See if one size of buffer is over-represented in the buffer cache,
+ if so reduce the numbers of buffers */
+ if(maybe_shrink_lav_buffers(size))
+ {
+ int retval;
+ retval = try_to_generate_cluster(dev, b[0], size);
+ if(retval) return retval;
+ };
+
+ if (nr_free_pages > min_free_pages*2)
+ return try_to_generate_cluster(dev, b[0], size);
+ else
+ return reassign_cluster(dev, b[0], size);
+}
+
+/*
+ * This initializes the initial buffer free list. nr_buffers_type is set
+ * to one less the actual number of buffers, as a sop to backwards
+ * compatibility --- the old code did this (I think unintentionally,
+ * but I'm not sure), and programs in the ps package expect it.
+ * - TYT 8/30/92
+ */
+void buffer_init(void)
+{
+ int i;
+ int isize = BUFSIZE_INDEX(BLOCK_SIZE);
+
+ if (high_memory >= 4*1024*1024) {
+ if(high_memory >= 16*1024*1024)
+ nr_hash = 16381;
+ else
+ nr_hash = 4093;
+ } else {
+ nr_hash = 997;
+ };
+
+ hash_table = (struct buffer_head **) vmalloc(nr_hash *
+ sizeof(struct buffer_head *));
+
+
+ buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) *
+ sizeof(struct buffer_head *));
+ for (i = 0 ; i < MAP_NR(high_memory) ; i++)
+ buffer_pages[i] = NULL;
+
+ for (i = 0 ; i < nr_hash ; i++)
+ hash_table[i] = NULL;
+ lru_list[BUF_CLEAN] = 0;
+ grow_buffers(GFP_KERNEL, BLOCK_SIZE);
+ if (!free_list[isize])
+ panic("VFS: Unable to initialize buffer free list!");
+ return;
+}
+
+/* This is a simple kernel daemon, whose job it is to provide a dynamically
+ * response to dirty buffers. Once this process is activated, we write back
+ * a limited number of buffers to the disks and then go back to sleep again.
+ * In effect this is a process which never leaves kernel mode, and does not have
+ * any user memory associated with it except for the stack. There is also
+ * a kernel stack page, which obviously must be separate from the user stack.
+ */
+struct wait_queue * bdflush_wait = NULL;
+struct wait_queue * bdflush_done = NULL;
+
+static int bdflush_running = 0;
+
+static void wakeup_bdflush(int wait)
+{
+ if(!bdflush_running){
+ printk("Warning - bdflush not running\n");
+ sync_buffers(0,0);
+ return;
+ };
+ wake_up(&bdflush_wait);
+ if(wait) sleep_on(&bdflush_done);
+}
+
+
+
+/*
+ * Here we attempt to write back old buffers. We also try and flush inodes
+ * and supers as well, since this function is essentially "update", and
+ * otherwise there would be no way of ensuring that these quantities ever
+ * get written back. Ideally, we would have a timestamp on the inodes
+ * and superblocks so that we could write back only the old ones as well
+ */
+
+asmlinkage int sync_old_buffers(void)
+{
+ int i, isize;
+ int ndirty, nwritten;
+ int nlist;
+ int ncount;
+ struct buffer_head * bh, *next;
+
+ sync_supers(0);
+ sync_inodes(0);
+
+ ncount = 0;
+#ifdef DEBUG
+ for(nlist = 0; nlist < NR_LIST; nlist++)
+#else
+ for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
+#endif
+ {
+ ndirty = 0;
+ nwritten = 0;
+ repeat:
+ bh = lru_list[nlist];
+ if(bh)
+ for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
+ /* We may have stalled while waiting for I/O to complete. */
+ if(bh->b_list != nlist) goto repeat;
+ next = bh->b_next_free;
+ if(!lru_list[nlist]) {
+ printk("Dirty list empty %d\n", i);
+ break;
+ }
+
+ /* Clean buffer on dirty list? Refile it */
+ if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
+ {
+ refile_buffer(bh);
+ continue;
+ }
+
+ if (bh->b_lock || !bh->b_dirt)
+ continue;
+ ndirty++;
+ if(bh->b_flushtime > jiffies) continue;
+ nwritten++;
+ bh->b_count++;
+ bh->b_flushtime = 0;
+#ifdef DEBUG
+ if(nlist != BUF_DIRTY) ncount++;
+#endif
+ ll_rw_block(WRITE, 1, &bh);
+ bh->b_count--;
+ }
+ }
+#ifdef DEBUG
+ if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
+ printk("Wrote %d/%d buffers\n", nwritten, ndirty);
+#endif
+
+ /* We assume that we only come through here on a regular
+ schedule, like every 5 seconds. Now update load averages.
+ Shift usage counts to prevent overflow. */
+ for(isize = 0; isize<NR_SIZES; isize++){
+ CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
+ buffer_usage[isize] = 0;
+ };
+ return 0;
+}
+
+
+/* This is the interface to bdflush. As we get more sophisticated, we can
+ * pass tuning parameters to this "process", to adjust how it behaves. If you
+ * invoke this again after you have done this once, you would simply modify
+ * the tuning parameters. We would want to verify each parameter, however,
+ * to make sure that it is reasonable. */
+
+asmlinkage int sys_bdflush(int func, int data)
+{
+ int i, error;
+ int ndirty;
+ int nlist;
+ int ncount;
+ struct buffer_head * bh, *next;
+
+ if (!suser())
+ return -EPERM;
+
+ if (func == 1)
+ return sync_old_buffers();
+
+ /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
+ if (func >= 2) {
+ i = (func-2) >> 1;
+ if (i < 0 || i >= N_PARAM)
+ return -EINVAL;
+ if((func & 1) == 0) {
+ error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
+ if (error)
+ return error;
+ put_fs_long(bdf_prm.data[i], data);
+ return 0;
+ };
+ if (data < bdflush_min[i] || data > bdflush_max[i])
+ return -EINVAL;
+ bdf_prm.data[i] = data;
+ return 0;
+ };
+
+ if (bdflush_running)
+ return -EBUSY; /* Only one copy of this running at one time */
+ bdflush_running++;
+
+ /* OK, from here on is the daemon */
+
+ for (;;) {
+#ifdef DEBUG
+ printk("bdflush() activated...");
+#endif
+
+ ncount = 0;
+#ifdef DEBUG
+ for(nlist = 0; nlist < NR_LIST; nlist++)
+#else
+ for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
+#endif
+ {
+ ndirty = 0;
+ repeat:
+ bh = lru_list[nlist];
+ if(bh)
+ for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
+ bh = next) {
+ /* We may have stalled while waiting for I/O to complete. */
+ if(bh->b_list != nlist) goto repeat;
+ next = bh->b_next_free;
+ if(!lru_list[nlist]) {
+ printk("Dirty list empty %d\n", i);
+ break;
+ }
+
+ /* Clean buffer on dirty list? Refile it */
+ if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
+ {
+ refile_buffer(bh);
+ continue;
+ }
+
+ if (bh->b_lock || !bh->b_dirt)
+ continue;
+ /* Should we write back buffers that are shared or not??
+ currently dirty buffers are not shared, so it does not matter */
+ bh->b_count++;
+ ndirty++;
+ bh->b_flushtime = 0;
+ ll_rw_block(WRITE, 1, &bh);
+#ifdef DEBUG
+ if(nlist != BUF_DIRTY) ncount++;
+#endif
+ bh->b_count--;
+ }
+ }
+#ifdef DEBUG
+ if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
+ printk("sleeping again.\n");
+#endif
+ wake_up(&bdflush_done);
+
+ /* If there are still a lot of dirty buffers around, skip the sleep
+ and flush some more */
+
+ if(nr_buffers_type[BUF_DIRTY] < (nr_buffers - nr_buffers_type[BUF_SHARED]) *
+ bdf_prm.b_un.nfract/100) {
+ if (current->signal & (1 << (SIGKILL-1))) {
+ bdflush_running--;
+ return 0;
+ }
+ current->signal = 0;
+ interruptible_sleep_on(&bdflush_wait);
+ }
+ }
+}
+
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 8
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -8
+ * c-argdecl-indent: 8
+ * c-label-offset: -8
+ * c-continued-statement-offset: 8
+ * c-continued-brace-offset: 0
+ * End:
+ */
diff --git a/fs/dcache.c b/fs/dcache.c
new file mode 100644
index 000000000..a40bdf316
--- /dev/null
+++ b/fs/dcache.c
@@ -0,0 +1,253 @@
+/*
+ * linux/fs/dcache.c
+ *
+ * (C) Copyright 1994 Linus Torvalds
+ */
+
+/*
+ * The directory cache is a "two-level" cache, each level doing LRU on
+ * its entries. Adding new entries puts them at the end of the LRU
+ * queue on the first-level cache, while the second-level cache is
+ * fed by any cache hits.
+ *
+ * The idea is that new additions (from readdir(), for example) will not
+ * flush the cache of entries that have really been used.
+ *
+ * There is a global hash-table over both caches that hashes the entries
+ * based on the directory inode number and device as well as on a
+ * string-hash computed over the name.
+ */
+
+#include <stddef.h>
+
+#include <linux/fs.h>
+#include <linux/string.h>
+
+/*
+ * Don't bother caching long names.. They just take up space in the cache, and
+ * for a name cache you just want to cache the "normal" names anyway which tend
+ * to be short.
+ */
+#define DCACHE_NAME_LEN 15
+#define DCACHE_SIZE 64
+
+struct hash_list {
+ struct dir_cache_entry * next;
+ struct dir_cache_entry * prev;
+};
+
+/*
+ * The dir_cache_entry must be in this order: we do ugly things with the pointers
+ */
+struct dir_cache_entry {
+ struct hash_list h;
+ unsigned long dev;
+ unsigned long dir;
+ unsigned long version;
+ unsigned long ino;
+ unsigned char name_len;
+ char name[DCACHE_NAME_LEN];
+ struct dir_cache_entry ** lru_head;
+ struct dir_cache_entry * next_lru, * prev_lru;
+};
+
+#define COPYDATA(de, newde) \
+memcpy((void *) &newde->dev, (void *) &de->dev, \
+4*sizeof(unsigned long) + 1 + DCACHE_NAME_LEN)
+
+static struct dir_cache_entry level1_cache[DCACHE_SIZE];
+static struct dir_cache_entry level2_cache[DCACHE_SIZE];
+
+/*
+ * The LRU-lists are doubly-linked circular lists, and do not change in size
+ * so these pointers always have something to point to (after _init)
+ */
+static struct dir_cache_entry * level1_head;
+static struct dir_cache_entry * level2_head;
+
+/*
+ * The hash-queues are also doubly-linked circular lists, but the head is
+ * itself on the doubly-linked list, not just a pointer to the first entry.
+ */
+#define DCACHE_HASH_QUEUES 19
+#define hash_fn(dev,dir,namehash) (((dev) ^ (dir) ^ (namehash)) % DCACHE_HASH_QUEUES)
+
+static struct hash_list hash_table[DCACHE_HASH_QUEUES];
+
+static inline void remove_lru(struct dir_cache_entry * de)
+{
+ de->next_lru->prev_lru = de->prev_lru;
+ de->prev_lru->next_lru = de->next_lru;
+}
+
+static inline void add_lru(struct dir_cache_entry * de, struct dir_cache_entry *head)
+{
+ de->next_lru = head;
+ de->prev_lru = head->prev_lru;
+ de->prev_lru->next_lru = de;
+ head->prev_lru = de;
+}
+
+static inline void update_lru(struct dir_cache_entry * de)
+{
+ if (de == *de->lru_head)
+ *de->lru_head = de->next_lru;
+ else {
+ remove_lru(de);
+ add_lru(de,*de->lru_head);
+ }
+}
+
+/*
+ * Stupid name"hash" algorithm. Write something better if you want to,
+ * but I doubt it matters that much
+ */
+static inline unsigned long namehash(const char * name, int len)
+{
+ return len * *(unsigned char *) name;
+}
+
+/*
+ * Hash queue manipulation. Look out for the casts..
+ */
+static inline void remove_hash(struct dir_cache_entry * de)
+{
+ if (de->h.next) {
+ de->h.next->h.prev = de->h.prev;
+ de->h.prev->h.next = de->h.next;
+ de->h.next = NULL;
+ }
+}
+
+static inline void add_hash(struct dir_cache_entry * de, struct hash_list * hash)
+{
+ de->h.next = hash->next;
+ de->h.prev = (struct dir_cache_entry *) hash;
+ hash->next->h.prev = de;
+ hash->next = de;
+}
+
+/*
+ * Find a directory cache entry given all the necessary info.
+ */
+static struct dir_cache_entry * find_entry(struct inode * dir, const char * name, int len, struct hash_list * hash)
+{
+ struct dir_cache_entry * de = hash->next;
+
+ for (de = hash->next ; de != (struct dir_cache_entry *) hash ; de = de->h.next) {
+ if (de->dev != dir->i_dev)
+ continue;
+ if (de->dir != dir->i_ino)
+ continue;
+ if (de->version != dir->i_version)
+ continue;
+ if (de->name_len != len)
+ continue;
+ if (memcmp(de->name, name, len))
+ continue;
+ return de;
+ }
+ return NULL;
+}
+
+/*
+ * Move a successfully used entry to level2. If already at level2,
+ * move it to the end of the LRU queue..
+ */
+static inline void move_to_level2(struct dir_cache_entry * old_de, struct hash_list * hash)
+{
+ struct dir_cache_entry * de;
+
+ if (old_de->lru_head == &level2_head) {
+ update_lru(old_de);
+ return;
+ }
+ de = level2_head;
+ level2_head = de->next_lru;
+ remove_hash(de);
+ COPYDATA(old_de, de);
+ add_hash(de, hash);
+}
+
+int dcache_lookup(struct inode * dir, const char * name, int len, unsigned long * ino)
+{
+ struct hash_list * hash;
+ struct dir_cache_entry *de;
+
+ if (len > DCACHE_NAME_LEN)
+ return 0;
+ hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len));
+ de = find_entry(dir, name, len, hash);
+ if (!de)
+ return 0;
+ *ino = de->ino;
+ move_to_level2(de, hash);
+ return 1;
+}
+
+void dcache_add(struct inode * dir, const char * name, int len, unsigned long ino)
+{
+ struct hash_list * hash;
+ struct dir_cache_entry *de;
+
+ if (len > DCACHE_NAME_LEN)
+ return;
+ hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len));
+ if ((de = find_entry(dir, name, len, hash)) != NULL) {
+ de->ino = ino;
+ update_lru(de);
+ return;
+ }
+ de = level1_head;
+ level1_head = de->next_lru;
+ remove_hash(de);
+ de->dev = dir->i_dev;
+ de->dir = dir->i_ino;
+ de->version = dir->i_version;
+ de->ino = ino;
+ de->name_len = len;
+ memcpy(de->name, name, len);
+ add_hash(de, hash);
+}
+
+unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end)
+{
+ int i;
+ struct dir_cache_entry * p;
+
+ /*
+ * Init level1 LRU lists..
+ */
+ p = level1_cache;
+ do {
+ p[1].prev_lru = p;
+ p[0].next_lru = p+1;
+ p[0].lru_head = &level1_head;
+ } while (++p < level1_cache + DCACHE_SIZE-1);
+ level1_cache[0].prev_lru = p;
+ p[0].next_lru = &level1_cache[0];
+ p[0].lru_head = &level1_head;
+ level1_head = level1_cache;
+
+ /*
+ * Init level2 LRU lists..
+ */
+ p = level2_cache;
+ do {
+ p[1].prev_lru = p;
+ p[0].next_lru = p+1;
+ p[0].lru_head = &level2_head;
+ } while (++p < level2_cache + DCACHE_SIZE-1);
+ level2_cache[0].prev_lru = p;
+ p[0].next_lru = &level2_cache[0];
+ p[0].lru_head = &level2_head;
+ level2_head = level2_cache;
+
+ /*
+ * Empty hash queues..
+ */
+ for (i = 0 ; i < DCACHE_HASH_QUEUES ; i++)
+ hash_table[i].next = hash_table[i].next =
+ (struct dir_cache_entry *) &hash_table[i];
+ return mem_start;
+}
diff --git a/fs/devices.c b/fs/devices.c
new file mode 100644
index 000000000..e79ea07d5
--- /dev/null
+++ b/fs/devices.c
@@ -0,0 +1,276 @@
+/*
+ * linux/fs/devices.c
+ *
+ * (C) 1993 Matthias Urlichs -- collected common code and tables.
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/errno.h>
+
+struct device_struct {
+ const char * name;
+ struct file_operations * fops;
+};
+
+static struct device_struct chrdevs[MAX_CHRDEV] = {
+ { NULL, NULL },
+};
+
+static struct device_struct blkdevs[MAX_BLKDEV] = {
+ { NULL, NULL },
+};
+
+int get_device_list(char * page)
+{
+ int i;
+ int len;
+
+ len = sprintf(page, "Character devices:\n");
+ for (i = 0; i < MAX_CHRDEV ; i++) {
+ if (chrdevs[i].fops) {
+ len += sprintf(page+len, "%2d %s\n", i, chrdevs[i].name);
+ }
+ }
+ len += sprintf(page+len, "\nBlock devices:\n");
+ for (i = 0; i < MAX_BLKDEV ; i++) {
+ if (blkdevs[i].fops) {
+ len += sprintf(page+len, "%2d %s\n", i, blkdevs[i].name);
+ }
+ }
+ return len;
+}
+
+struct file_operations * get_blkfops(unsigned int major)
+{
+ if (major >= MAX_BLKDEV)
+ return NULL;
+ return blkdevs[major].fops;
+}
+
+struct file_operations * get_chrfops(unsigned int major)
+{
+ if (major >= MAX_CHRDEV)
+ return NULL;
+ return chrdevs[major].fops;
+}
+
+int register_chrdev(unsigned int major, const char * name, struct file_operations *fops)
+{
+ if (major == 0) {
+ for (major = MAX_CHRDEV-1; major > 0; major--) {
+ if (chrdevs[major].fops == fops)
+ return major;
+ }
+ for (major = MAX_CHRDEV-1; major > 0; major--) {
+ if (chrdevs[major].fops == NULL) {
+ chrdevs[major].name = name;
+ chrdevs[major].fops = fops;
+ return major;
+ }
+ }
+ return -EBUSY;
+ }
+ if (major >= MAX_CHRDEV)
+ return -EINVAL;
+ if (chrdevs[major].fops && chrdevs[major].fops != fops)
+ return -EBUSY;
+ chrdevs[major].name = name;
+ chrdevs[major].fops = fops;
+ return 0;
+}
+
+int register_blkdev(unsigned int major, const char * name, struct file_operations *fops)
+{
+ if (major == 0) {
+ for (major = MAX_BLKDEV-1; major > 0; major--) {
+ if (blkdevs[major].fops == fops)
+ return major;
+ }
+ for (major = MAX_BLKDEV-1; major > 0; major--) {
+ if (blkdevs[major].fops == NULL) {
+ blkdevs[major].name = name;
+ blkdevs[major].fops = fops;
+ return major;
+ }
+ }
+ return -EBUSY;
+ }
+ if (major >= MAX_BLKDEV)
+ return -EINVAL;
+ if (blkdevs[major].fops && blkdevs[major].fops != fops)
+ return -EBUSY;
+ blkdevs[major].name = name;
+ blkdevs[major].fops = fops;
+ return 0;
+}
+
+int unregister_chrdev(unsigned int major, const char * name)
+{
+ if (major >= MAX_CHRDEV)
+ return -EINVAL;
+ if (!chrdevs[major].fops)
+ return -EINVAL;
+ if (strcmp(chrdevs[major].name, name))
+ return -EINVAL;
+ chrdevs[major].name = NULL;
+ chrdevs[major].fops = NULL;
+ return 0;
+}
+
+int unregister_blkdev(unsigned int major, const char * name)
+{
+ if (major >= MAX_BLKDEV)
+ return -EINVAL;
+ if (!blkdevs[major].fops)
+ return -EINVAL;
+ if (strcmp(blkdevs[major].name, name))
+ return -EINVAL;
+ blkdevs[major].name = NULL;
+ blkdevs[major].fops = NULL;
+ return 0;
+}
+
+/*
+ * This routine checks whether a removable media has been changed,
+ * and invalidates all buffer-cache-entries in that case. This
+ * is a relatively slow routine, so we have to try to minimize using
+ * it. Thus it is called only upon a 'mount' or 'open'. This
+ * is the best way of combining speed and utility, I think.
+ * People changing diskettes in the middle of an operation deserve
+ * to loose :-)
+ */
+int check_disk_change(dev_t dev)
+{
+ int i;
+ struct file_operations * fops;
+
+ i = MAJOR(dev);
+ if (i >= MAX_BLKDEV || (fops = blkdevs[i].fops) == NULL)
+ return 0;
+ if (fops->check_media_change == NULL)
+ return 0;
+ if (!fops->check_media_change(dev))
+ return 0;
+
+ printk("VFS: Disk change detected on device %d/%d\n",
+ MAJOR(dev), MINOR(dev));
+ for (i=0 ; i<NR_SUPER ; i++)
+ if (super_blocks[i].s_dev == dev)
+ put_super(super_blocks[i].s_dev);
+ invalidate_inodes(dev);
+ invalidate_buffers(dev);
+
+ if (fops->revalidate)
+ fops->revalidate(dev);
+ return 1;
+}
+
+/*
+ * Called every time a block special file is opened
+ */
+int blkdev_open(struct inode * inode, struct file * filp)
+{
+ int i;
+
+ i = MAJOR(inode->i_rdev);
+ if (i >= MAX_BLKDEV || !blkdevs[i].fops)
+ return -ENODEV;
+ filp->f_op = blkdevs[i].fops;
+ if (filp->f_op->open)
+ return filp->f_op->open(inode,filp);
+ return 0;
+}
+
+/*
+ * Dummy default file-operations: the only thing this does
+ * is contain the open that then fills in the correct operations
+ * depending on the special file...
+ */
+struct file_operations def_blk_fops = {
+ NULL, /* lseek */
+ NULL, /* read */
+ NULL, /* write */
+ NULL, /* readdir */
+ NULL, /* select */
+ NULL, /* ioctl */
+ NULL, /* mmap */
+ blkdev_open, /* open */
+ NULL, /* release */
+};
+
+struct inode_operations blkdev_inode_operations = {
+ &def_blk_fops, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+/*
+ * Called every time a character special file is opened
+ */
+int chrdev_open(struct inode * inode, struct file * filp)
+{
+ int i;
+
+ i = MAJOR(inode->i_rdev);
+ if (i >= MAX_CHRDEV || !chrdevs[i].fops)
+ return -ENODEV;
+ filp->f_op = chrdevs[i].fops;
+ if (filp->f_op->open)
+ return filp->f_op->open(inode,filp);
+ return 0;
+}
+
+/*
+ * Dummy default file-operations: the only thing this does
+ * is contain the open that then fills in the correct operations
+ * depending on the special file...
+ */
+struct file_operations def_chr_fops = {
+ NULL, /* lseek */
+ NULL, /* read */
+ NULL, /* write */
+ NULL, /* readdir */
+ NULL, /* select */
+ NULL, /* ioctl */
+ NULL, /* mmap */
+ chrdev_open, /* open */
+ NULL, /* release */
+};
+
+struct inode_operations chrdev_inode_operations = {
+ &def_chr_fops, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
diff --git a/fs/exec.c b/fs/exec.c
new file mode 100644
index 000000000..586098cd0
--- /dev/null
+++ b/fs/exec.c
@@ -0,0 +1,961 @@
+/*
+ * linux/fs/exec.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * #!-checking implemented by tytso.
+ */
+
+/*
+ * Demand-loading implemented 01.12.91 - no need to read anything but
+ * the header into memory. The inode of the executable is put into
+ * "current->executable", and page faults do the actual loading. Clean.
+ *
+ * Once more I can proudly say that linux stood up to being changed: it
+ * was less than 2 hours work to get demand-loading completely implemented.
+ *
+ * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
+ * current->executable is only used by the procfs. This allows a dispatch
+ * table to check for several different types of binary formats. We keep
+ * trying until we recognize the file or we run out of supported binary
+ * formats.
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/a.out.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/malloc.h>
+#include <linux/binfmts.h>
+#include <linux/personality.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+asmlinkage int sys_exit(int exit_code);
+asmlinkage int sys_brk(unsigned long);
+
+static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_aout_library(int fd);
+static int aout_core_dump(long signr, struct pt_regs * regs);
+
+/*
+ * Here are the actual binaries that will be accepted:
+ * add more with "register_binfmt()"..
+ */
+extern struct linux_binfmt elf_format;
+
+static struct linux_binfmt aout_format = {
+#ifndef CONFIG_BINFMT_ELF
+ NULL, NULL, load_aout_binary, load_aout_library, aout_core_dump
+#else
+ &elf_format, NULL, load_aout_binary, load_aout_library, aout_core_dump
+#endif
+};
+
+static struct linux_binfmt *formats = &aout_format;
+
+int register_binfmt(struct linux_binfmt * fmt)
+{
+ struct linux_binfmt ** tmp = &formats;
+
+ if (!fmt)
+ return -EINVAL;
+ if (fmt->next)
+ return -EBUSY;
+ while (*tmp) {
+ if (fmt == *tmp)
+ return -EBUSY;
+ tmp = &(*tmp)->next;
+ }
+ *tmp = fmt;
+ return 0;
+}
+
+int unregister_binfmt(struct linux_binfmt * fmt)
+{
+ struct linux_binfmt ** tmp = &formats;
+
+ while (*tmp) {
+ if (fmt == *tmp) {
+ *tmp = fmt->next;
+ return 0;
+ }
+ tmp = &(*tmp)->next;
+ }
+ return -EINVAL;
+}
+
+int open_inode(struct inode * inode, int mode)
+{
+ int error, fd;
+ struct file *f, **fpp;
+
+ if (!inode->i_op || !inode->i_op->default_file_ops)
+ return -EINVAL;
+ f = get_empty_filp();
+ if (!f)
+ return -EMFILE;
+ fd = 0;
+ fpp = current->files->fd;
+ for (;;) {
+ if (!*fpp)
+ break;
+ if (++fd > NR_OPEN)
+ return -ENFILE;
+ fpp++;
+ }
+ *fpp = f;
+ f->f_flags = mode;
+ f->f_mode = (mode+1) & O_ACCMODE;
+ f->f_inode = inode;
+ f->f_pos = 0;
+ f->f_reada = 0;
+ f->f_op = inode->i_op->default_file_ops;
+ if (f->f_op->open) {
+ error = f->f_op->open(inode,f);
+ if (error) {
+ *fpp = NULL;
+ f->f_count--;
+ return error;
+ }
+ }
+ inode->i_count++;
+ return fd;
+}
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * macros to write out all the necessary info.
+ */
+#define DUMP_WRITE(addr,nr) \
+while (file.f_op->write(inode,&file,(char *)(addr),(nr)) != (nr)) goto close_coredump
+
+#define DUMP_SEEK(offset) \
+if (file.f_op->lseek) { \
+ if (file.f_op->lseek(inode,&file,(offset),0) != (offset)) \
+ goto close_coredump; \
+} else file.f_pos = (offset)
+
+/*
+ * Routine writes a core dump image in the current directory.
+ * Currently only a stub-function.
+ *
+ * Note that setuid/setgid files won't make a core-dump if the uid/gid
+ * changed due to the set[u|g]id. It's enforced by the "current->dumpable"
+ * field, which also makes sure the core-dumps won't be recursive if the
+ * dumping of the process results in another error..
+ */
+static int aout_core_dump(long signr, struct pt_regs * regs)
+{
+ struct inode * inode = NULL;
+ struct file file;
+ unsigned short fs;
+ int has_dumped = 0;
+ char corefile[6+sizeof(current->comm)];
+ int i;
+ register int dump_start, dump_size;
+ struct user dump;
+
+ if (!current->dumpable)
+ return 0;
+ current->dumpable = 0;
+
+/* See if we have enough room to write the upage. */
+ if (current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE)
+ return 0;
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ memcpy(corefile,"core.",5);
+#if 0
+ memcpy(corefile+5,current->comm,sizeof(current->comm));
+#else
+ corefile[4] = '\0';
+#endif
+ if (open_namei(corefile,O_CREAT | 2 | O_TRUNC,0600,&inode,NULL)) {
+ inode = NULL;
+ goto end_coredump;
+ }
+ if (!S_ISREG(inode->i_mode))
+ goto end_coredump;
+ if (!inode->i_op || !inode->i_op->default_file_ops)
+ goto end_coredump;
+ if (get_write_access(inode))
+ goto end_coredump;
+ file.f_mode = 3;
+ file.f_flags = 0;
+ file.f_count = 1;
+ file.f_inode = inode;
+ file.f_pos = 0;
+ file.f_reada = 0;
+ file.f_op = inode->i_op->default_file_ops;
+ if (file.f_op->open)
+ if (file.f_op->open(inode,&file))
+ goto done_coredump;
+ if (!file.f_op->write)
+ goto close_coredump;
+ has_dumped = 1;
+/* changed the size calculations - should hopefully work better. lbt */
+ dump.magic = CMAGIC;
+ dump.start_code = 0;
+ dump.start_stack = regs->esp & ~(PAGE_SIZE - 1);
+ dump.u_tsize = ((unsigned long) current->mm->end_code) >> 12;
+ dump.u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> 12;
+ dump.u_dsize -= dump.u_tsize;
+ dump.u_ssize = 0;
+ for(i=0; i<8; i++) dump.u_debugreg[i] = current->debugreg[i];
+ if (dump.start_stack < TASK_SIZE)
+ dump.u_ssize = ((unsigned long) (TASK_SIZE - dump.start_stack)) >> 12;
+/* If the size of the dump file exceeds the rlimit, then see what would happen
+ if we wrote the stack, but not the data area. */
+ if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
+ current->rlim[RLIMIT_CORE].rlim_cur)
+ dump.u_dsize = 0;
+/* Make sure we have enough room to write the stack and data areas. */
+ if ((dump.u_ssize+1) * PAGE_SIZE >
+ current->rlim[RLIMIT_CORE].rlim_cur)
+ dump.u_ssize = 0;
+ strncpy(dump.u_comm, current->comm, sizeof(current->comm));
+ dump.u_ar0 = (struct pt_regs *)(((int)(&dump.regs)) -((int)(&dump)));
+ dump.signal = signr;
+ dump.regs = *regs;
+/* Flag indicating the math stuff is valid. We don't support this for the
+ soft-float routines yet */
+ if (hard_math) {
+ if ((dump.u_fpvalid = current->used_math) != 0) {
+ if (last_task_used_math == current)
+ __asm__("clts ; fnsave %0": :"m" (dump.i387));
+ else
+ memcpy(&dump.i387,&current->tss.i387.hard,sizeof(dump.i387));
+ }
+ } else {
+ /* we should dump the emulator state here, but we need to
+ convert it into standard 387 format first.. */
+ dump.u_fpvalid = 0;
+ }
+ set_fs(KERNEL_DS);
+/* struct user */
+ DUMP_WRITE(&dump,sizeof(dump));
+/* Now dump all of the user data. Include malloced stuff as well */
+ DUMP_SEEK(PAGE_SIZE);
+/* now we start writing out the user space info */
+ set_fs(USER_DS);
+/* Dump the data area */
+ if (dump.u_dsize != 0) {
+ dump_start = dump.u_tsize << 12;
+ dump_size = dump.u_dsize << 12;
+ DUMP_WRITE(dump_start,dump_size);
+ }
+/* Now prepare to dump the stack area */
+ if (dump.u_ssize != 0) {
+ dump_start = dump.start_stack;
+ dump_size = dump.u_ssize << 12;
+ DUMP_WRITE(dump_start,dump_size);
+ }
+/* Finally dump the task struct. Not be used by gdb, but could be useful */
+ set_fs(KERNEL_DS);
+ DUMP_WRITE(current,sizeof(*current));
+close_coredump:
+ if (file.f_op->release)
+ file.f_op->release(inode,&file);
+done_coredump:
+ put_write_access(inode);
+end_coredump:
+ set_fs(fs);
+ iput(inode);
+ return has_dumped;
+}
+
+/*
+ * Note that a shared library must be both readable and executable due to
+ * security reasons.
+ *
+ * Also note that we take the address to load from from the file itself.
+ */
+asmlinkage int sys_uselib(const char * library)
+{
+ int fd, retval;
+ struct file * file;
+ struct linux_binfmt * fmt;
+
+ fd = sys_open(library, 0, 0);
+ if (fd < 0)
+ return fd;
+ file = current->files->fd[fd];
+ retval = -ENOEXEC;
+ if (file && file->f_inode && file->f_op && file->f_op->read) {
+ for (fmt = formats ; fmt ; fmt = fmt->next) {
+ int (*fn)(int) = fmt->load_shlib;
+ if (!fn)
+ break;
+ retval = fn(fd);
+ if (retval != -ENOEXEC)
+ break;
+ }
+ }
+ sys_close(fd);
+ return retval;
+}
+
+/*
+ * create_tables() parses the env- and arg-strings in new user
+ * memory and creates the pointer tables from them, and puts their
+ * addresses on the "stack", returning the new stack pointer value.
+ */
+unsigned long * create_tables(char * p,int argc,int envc,int ibcs)
+{
+ unsigned long *argv,*envp;
+ unsigned long * sp;
+ struct vm_area_struct *mpnt;
+
+ mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
+ if (mpnt) {
+ mpnt->vm_task = current;
+ mpnt->vm_start = PAGE_MASK & (unsigned long) p;
+ mpnt->vm_end = TASK_SIZE;
+ mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY;
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_share = NULL;
+ mpnt->vm_ops = NULL;
+ mpnt->vm_offset = 0;
+ mpnt->vm_inode = NULL;
+ mpnt->vm_pte = 0;
+ insert_vm_struct(current, mpnt);
+ }
+ sp = (unsigned long *) (0xfffffffc & (unsigned long) p);
+ sp -= envc+1;
+ envp = sp;
+ sp -= argc+1;
+ argv = sp;
+ if (!ibcs) {
+ put_fs_long((unsigned long)envp,--sp);
+ put_fs_long((unsigned long)argv,--sp);
+ }
+ put_fs_long((unsigned long)argc,--sp);
+ current->mm->arg_start = (unsigned long) p;
+ while (argc-->0) {
+ put_fs_long((unsigned long) p,argv++);
+ while (get_fs_byte(p++)) /* nothing */ ;
+ }
+ put_fs_long(0,argv);
+ current->mm->arg_end = current->mm->env_start = (unsigned long) p;
+ while (envc-->0) {
+ put_fs_long((unsigned long) p,envp++);
+ while (get_fs_byte(p++)) /* nothing */ ;
+ }
+ put_fs_long(0,envp);
+ current->mm->env_end = (unsigned long) p;
+ return sp;
+}
+
+/*
+ * count() counts the number of arguments/envelopes
+ *
+ * We also do some limited EFAULT checking: this isn't complete, but
+ * it does cover most cases. I'll have to do this correctly some day..
+ */
+static int count(char ** argv)
+{
+ int error, i = 0;
+ char ** tmp, *p;
+
+ if ((tmp = argv) != NULL) {
+ error = verify_area(VERIFY_READ, tmp, sizeof(char *));
+ if (error)
+ return error;
+ while ((p = (char *) get_fs_long((unsigned long *) (tmp++))) != NULL) {
+ i++;
+ error = verify_area(VERIFY_READ, p, 1);
+ if (error)
+ return error;
+ }
+ }
+ return i;
+}
+
+/*
+ * 'copy_string()' copies argument/envelope strings from user
+ * memory to free pages in kernel mem. These are in a format ready
+ * to be put directly into the top of new user memory.
+ *
+ * Modified by TYT, 11/24/91 to add the from_kmem argument, which specifies
+ * whether the string and the string array are from user or kernel segments:
+ *
+ * from_kmem argv * argv **
+ * 0 user space user space
+ * 1 kernel space user space
+ * 2 kernel space kernel space
+ *
+ * We do this by playing games with the fs segment register. Since it
+ * is expensive to load a segment register, we try to avoid calling
+ * set_fs() unless we absolutely have to.
+ */
+unsigned long copy_strings(int argc,char ** argv,unsigned long *page,
+ unsigned long p, int from_kmem)
+{
+ char *tmp, *pag = NULL;
+ int len, offset = 0;
+ unsigned long old_fs, new_fs;
+
+ if (!p)
+ return 0; /* bullet-proofing */
+ new_fs = get_ds();
+ old_fs = get_fs();
+ if (from_kmem==2)
+ set_fs(new_fs);
+ while (argc-- > 0) {
+ if (from_kmem == 1)
+ set_fs(new_fs);
+ if (!(tmp = (char *)get_fs_long(((unsigned long *)argv)+argc)))
+ panic("VFS: argc is wrong");
+ if (from_kmem == 1)
+ set_fs(old_fs);
+ len=0; /* remember zero-padding */
+ do {
+ len++;
+ } while (get_fs_byte(tmp++));
+ if (p < len) { /* this shouldn't happen - 128kB */
+ set_fs(old_fs);
+ return 0;
+ }
+ while (len) {
+ --p; --tmp; --len;
+ if (--offset < 0) {
+ offset = p % PAGE_SIZE;
+ if (from_kmem==2)
+ set_fs(old_fs);
+ if (!(pag = (char *) page[p/PAGE_SIZE]) &&
+ !(pag = (char *) page[p/PAGE_SIZE] =
+ (unsigned long *) get_free_page(GFP_USER)))
+ return 0;
+ if (from_kmem==2)
+ set_fs(new_fs);
+
+ }
+ *(pag + offset) = get_fs_byte(tmp);
+ }
+ }
+ if (from_kmem==2)
+ set_fs(old_fs);
+ return p;
+}
+
+unsigned long change_ldt(unsigned long text_size,unsigned long * page)
+{
+ unsigned long code_limit,data_limit,code_base,data_base;
+ int i;
+
+ code_limit = TASK_SIZE;
+ data_limit = TASK_SIZE;
+ code_base = data_base = 0;
+ current->mm->start_code = code_base;
+ data_base += data_limit;
+ for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
+ data_base -= PAGE_SIZE;
+ if (page[i]) {
+ current->mm->rss++;
+ put_dirty_page(current,page[i],data_base);
+ }
+ }
+ return data_limit;
+}
+
+/*
+ * Read in the complete executable. This is used for "-N" files
+ * that aren't on a block boundary, and for files on filesystems
+ * without bmap support.
+ */
+int read_exec(struct inode *inode, unsigned long offset,
+ char * addr, unsigned long count)
+{
+ struct file file;
+ int result = -ENOEXEC;
+
+ if (!inode->i_op || !inode->i_op->default_file_ops)
+ goto end_readexec;
+ file.f_mode = 1;
+ file.f_flags = 0;
+ file.f_count = 1;
+ file.f_inode = inode;
+ file.f_pos = 0;
+ file.f_reada = 0;
+ file.f_op = inode->i_op->default_file_ops;
+ if (file.f_op->open)
+ if (file.f_op->open(inode,&file))
+ goto end_readexec;
+ if (!file.f_op || !file.f_op->read)
+ goto close_readexec;
+ if (file.f_op->lseek) {
+ if (file.f_op->lseek(inode,&file,offset,0) != offset)
+ goto close_readexec;
+ } else
+ file.f_pos = offset;
+ if (get_fs() == USER_DS) {
+ result = verify_area(VERIFY_WRITE, addr, count);
+ if (result)
+ goto close_readexec;
+ }
+ result = file.f_op->read(inode, &file, addr, count);
+close_readexec:
+ if (file.f_op->release)
+ file.f_op->release(inode,&file);
+end_readexec:
+ return result;
+}
+
+
+/*
+ * This function flushes out all traces of the currently running executable so
+ * that a new one can be started
+ */
+
+void flush_old_exec(struct linux_binprm * bprm)
+{
+ int i;
+ int ch;
+ char * name;
+ struct vm_area_struct * mpnt, *mpnt1;
+
+ current->dumpable = 1;
+ name = bprm->filename;
+ for (i=0; (ch = *(name++)) != '\0';) {
+ if (ch == '/')
+ i = 0;
+ else
+ if (i < 15)
+ current->comm[i++] = ch;
+ }
+ current->comm[i] = '\0';
+ /* Release all of the old mmap stuff. */
+
+ mpnt = current->mm->mmap;
+ current->mm->mmap = NULL;
+ while (mpnt) {
+ mpnt1 = mpnt->vm_next;
+ if (mpnt->vm_ops && mpnt->vm_ops->close)
+ mpnt->vm_ops->close(mpnt);
+ if (mpnt->vm_inode)
+ iput(mpnt->vm_inode);
+ kfree(mpnt);
+ mpnt = mpnt1;
+ }
+
+ /* Flush the old ldt stuff... */
+ if (current->ldt) {
+ free_page((unsigned long) current->ldt);
+ current->ldt = NULL;
+ for (i=1 ; i<NR_TASKS ; i++) {
+ if (task[i] == current) {
+ set_ldt_desc(gdt+(i<<1)+
+ FIRST_LDT_ENTRY,&default_ldt, 1);
+ load_ldt(i);
+ }
+ }
+ }
+
+ for (i=0 ; i<8 ; i++) current->debugreg[i] = 0;
+
+ if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
+ !permission(bprm->inode,MAY_READ))
+ current->dumpable = 0;
+ current->signal = 0;
+ for (i=0 ; i<32 ; i++) {
+ current->sigaction[i].sa_mask = 0;
+ current->sigaction[i].sa_flags = 0;
+ if (current->sigaction[i].sa_handler != SIG_IGN)
+ current->sigaction[i].sa_handler = NULL;
+ }
+ for (i=0 ; i<NR_OPEN ; i++)
+ if (FD_ISSET(i,&current->files->close_on_exec))
+ sys_close(i);
+ FD_ZERO(&current->files->close_on_exec);
+ clear_page_tables(current);
+ if (last_task_used_math == current)
+ last_task_used_math = NULL;
+ current->used_math = 0;
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
+{
+ struct linux_binprm bprm;
+ struct linux_binfmt * fmt;
+ unsigned long old_fs;
+ int i;
+ int retval;
+ int sh_bang = 0;
+
+ if (regs->cs != USER_CS)
+ return -EINVAL;
+ bprm.p = PAGE_SIZE*MAX_ARG_PAGES-4;
+ for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
+ bprm.page[i] = 0;
+ retval = open_namei(filename, 0, 0, &bprm.inode, NULL);
+ if (retval)
+ return retval;
+ bprm.filename = filename;
+ if ((bprm.argc = count(argv)) < 0)
+ return bprm.argc;
+ if ((bprm.envc = count(envp)) < 0)
+ return bprm.envc;
+
+restart_interp:
+ if (!S_ISREG(bprm.inode->i_mode)) { /* must be regular file */
+ retval = -EACCES;
+ goto exec_error2;
+ }
+ if (IS_NOEXEC(bprm.inode)) { /* FS mustn't be mounted noexec */
+ retval = -EPERM;
+ goto exec_error2;
+ }
+ if (!bprm.inode->i_sb) {
+ retval = -EACCES;
+ goto exec_error2;
+ }
+ i = bprm.inode->i_mode;
+ if (IS_NOSUID(bprm.inode) && (((i & S_ISUID) && bprm.inode->i_uid != current->
+ euid) || ((i & S_ISGID) && !in_group_p(bprm.inode->i_gid))) && !suser()) {
+ retval = -EPERM;
+ goto exec_error2;
+ }
+ /* make sure we don't let suid, sgid files be ptraced. */
+ if (current->flags & PF_PTRACED) {
+ bprm.e_uid = current->euid;
+ bprm.e_gid = current->egid;
+ } else {
+ bprm.e_uid = (i & S_ISUID) ? bprm.inode->i_uid : current->euid;
+ bprm.e_gid = (i & S_ISGID) ? bprm.inode->i_gid : current->egid;
+ }
+ if (!permission(bprm.inode, MAY_EXEC) ||
+ (!(bprm.inode->i_mode & 0111) && fsuser())) {
+ retval = -EACCES;
+ goto exec_error2;
+ }
+ /* better not execute files which are being written to */
+ if (bprm.inode->i_wcount > 0) {
+ retval = -ETXTBSY;
+ goto exec_error2;
+ }
+ memset(bprm.buf,0,sizeof(bprm.buf));
+ old_fs = get_fs();
+ set_fs(get_ds());
+ retval = read_exec(bprm.inode,0,bprm.buf,128);
+ set_fs(old_fs);
+ if (retval < 0)
+ goto exec_error2;
+ if ((bprm.buf[0] == '#') && (bprm.buf[1] == '!') && (!sh_bang)) {
+ /*
+ * This section does the #! interpretation.
+ * Sorta complicated, but hopefully it will work. -TYT
+ */
+
+ char *cp, *interp, *i_name, *i_arg;
+
+ iput(bprm.inode);
+ bprm.buf[127] = '\0';
+ if ((cp = strchr(bprm.buf, '\n')) == NULL)
+ cp = bprm.buf+127;
+ *cp = '\0';
+ while (cp > bprm.buf) {
+ cp--;
+ if ((*cp == ' ') || (*cp == '\t'))
+ *cp = '\0';
+ else
+ break;
+ }
+ for (cp = bprm.buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
+ if (!cp || *cp == '\0') {
+ retval = -ENOEXEC; /* No interpreter name found */
+ goto exec_error1;
+ }
+ interp = i_name = cp;
+ i_arg = 0;
+ for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
+ if (*cp == '/')
+ i_name = cp+1;
+ }
+ while ((*cp == ' ') || (*cp == '\t'))
+ *cp++ = '\0';
+ if (*cp)
+ i_arg = cp;
+ /*
+ * OK, we've parsed out the interpreter name and
+ * (optional) argument.
+ */
+ if (sh_bang++ == 0) {
+ bprm.p = copy_strings(bprm.envc, envp, bprm.page, bprm.p, 0);
+ bprm.p = copy_strings(--bprm.argc, argv+1, bprm.page, bprm.p, 0);
+ }
+ /*
+ * Splice in (1) the interpreter's name for argv[0]
+ * (2) (optional) argument to interpreter
+ * (3) filename of shell script
+ *
+ * This is done in reverse order, because of how the
+ * user environment and arguments are stored.
+ */
+ bprm.p = copy_strings(1, &bprm.filename, bprm.page, bprm.p, 2);
+ bprm.argc++;
+ if (i_arg) {
+ bprm.p = copy_strings(1, &i_arg, bprm.page, bprm.p, 2);
+ bprm.argc++;
+ }
+ bprm.p = copy_strings(1, &i_name, bprm.page, bprm.p, 2);
+ bprm.argc++;
+ if (!bprm.p) {
+ retval = -E2BIG;
+ goto exec_error1;
+ }
+ /*
+ * OK, now restart the process with the interpreter's inode.
+ * Note that we use open_namei() as the name is now in kernel
+ * space, and we don't need to copy it.
+ */
+ retval = open_namei(interp, 0, 0, &bprm.inode, NULL);
+ if (retval)
+ goto exec_error1;
+ goto restart_interp;
+ }
+ if (!sh_bang) {
+ bprm.p = copy_strings(bprm.envc,envp,bprm.page,bprm.p,0);
+ bprm.p = copy_strings(bprm.argc,argv,bprm.page,bprm.p,0);
+ if (!bprm.p) {
+ retval = -E2BIG;
+ goto exec_error2;
+ }
+ }
+
+ bprm.sh_bang = sh_bang;
+ for (fmt = formats ; fmt ; fmt = fmt->next) {
+ int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
+ if (!fn)
+ break;
+ retval = fn(&bprm, regs);
+ if (retval >= 0) {
+ iput(bprm.inode);
+ current->did_exec = 1;
+ return retval;
+ }
+ if (retval != -ENOEXEC)
+ break;
+ }
+exec_error2:
+ iput(bprm.inode);
+exec_error1:
+ for (i=0 ; i<MAX_ARG_PAGES ; i++)
+ free_page(bprm.page[i]);
+ return(retval);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+ int error;
+ char * filename;
+
+ error = getname((char *) regs.ebx, &filename);
+ if (error)
+ return error;
+ error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
+ putname(filename);
+ return error;
+}
+
+static void set_brk(unsigned long start, unsigned long end)
+{
+ start = PAGE_ALIGN(start);
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
+ do_mmap(NULL, start, end - start,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE, 0);
+}
+
+/*
+ * These are the functions used to load a.out style executables and shared
+ * libraries. There is no binary dependent code anywhere else.
+ */
+
+static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+{
+ struct exec ex;
+ struct file * file;
+ int fd, error;
+ unsigned long p = bprm->p;
+ unsigned long fd_offset;
+
+ ex = *((struct exec *) bprm->buf); /* exec-header */
+ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
+ N_MAGIC(ex) != QMAGIC) ||
+ ex.a_trsize || ex.a_drsize ||
+ bprm->inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
+ return -ENOEXEC;
+ }
+
+ current->personality = PER_LINUX;
+ fd_offset = N_TXTOFF(ex);
+ if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) {
+ printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n");
+ return -ENOEXEC;
+ }
+
+ if (N_MAGIC(ex) == ZMAGIC && ex.a_text &&
+ (fd_offset < bprm->inode->i_sb->s_blocksize)) {
+ printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n");
+ return -ENOEXEC;
+ }
+
+ /* OK, This is the point of no return */
+ flush_old_exec(bprm);
+
+ current->mm->brk = ex.a_bss +
+ (current->mm->start_brk =
+ (current->mm->end_data = ex.a_data +
+ (current->mm->end_code = ex.a_text +
+ (current->mm->start_code = N_TXTADDR(ex)))));
+ current->mm->rss = 0;
+ current->mm->mmap = NULL;
+ current->suid = current->euid = current->fsuid = bprm->e_uid;
+ current->sgid = current->egid = current->fsgid = bprm->e_gid;
+ if (N_MAGIC(ex) == OMAGIC) {
+ do_mmap(NULL, 0, ex.a_text+ex.a_data,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ read_exec(bprm->inode, 32, (char *) 0, ex.a_text+ex.a_data);
+ } else {
+ if (ex.a_text & 0xfff || ex.a_data & 0xfff)
+ printk(KERN_NOTICE "executable not page aligned\n");
+
+ fd = open_inode(bprm->inode, O_RDONLY);
+
+ if (fd < 0)
+ return fd;
+ file = current->files->fd[fd];
+ if (!file->f_op || !file->f_op->mmap) {
+ sys_close(fd);
+ do_mmap(NULL, 0, ex.a_text+ex.a_data,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ read_exec(bprm->inode, fd_offset,
+ (char *) N_TXTADDR(ex), ex.a_text+ex.a_data);
+ goto beyond_if;
+ }
+
+ error = do_mmap(file, N_TXTADDR(ex), ex.a_text,
+ PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+ fd_offset);
+
+ if (error != N_TXTADDR(ex)) {
+ sys_close(fd);
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+
+ error = do_mmap(file, N_TXTADDR(ex) + ex.a_text, ex.a_data,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+ fd_offset + ex.a_text);
+ sys_close(fd);
+ if (error != N_TXTADDR(ex) + ex.a_text) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+ }
+beyond_if:
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)--;
+ if (current->binfmt && current->binfmt->use_count)
+ (*current->binfmt->use_count)--;
+ current->exec_domain = lookup_exec_domain(current->personality);
+ current->binfmt = &aout_format;
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)++;
+ if (current->binfmt && current->binfmt->use_count)
+ (*current->binfmt->use_count)++;
+
+ set_brk(current->mm->start_brk, current->mm->brk);
+
+ p += change_ldt(ex.a_text,bprm->page);
+ p -= MAX_ARG_PAGES*PAGE_SIZE;
+ p = (unsigned long)create_tables((char *)p,
+ bprm->argc, bprm->envc,
+ current->personality != PER_LINUX);
+ current->mm->start_stack = p;
+ regs->eip = ex.a_entry; /* eip, magic happens :-) */
+ regs->esp = p; /* stack pointer */
+ if (current->flags & PF_PTRACED)
+ send_sig(SIGTRAP, current, 0);
+ return 0;
+}
+
+
+static int load_aout_library(int fd)
+{
+ struct file * file;
+ struct exec ex;
+ struct inode * inode;
+ unsigned int len;
+ unsigned int bss;
+ unsigned int start_addr;
+ int error;
+
+ file = current->files->fd[fd];
+ inode = file->f_inode;
+
+ set_fs(KERNEL_DS);
+ if (file->f_op->read(inode, file, (char *) &ex, sizeof(ex)) != sizeof(ex)) {
+ return -EACCES;
+ }
+ set_fs(USER_DS);
+
+ /* We come in here for the regular a.out style of shared libraries */
+ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || ex.a_trsize ||
+ ex.a_drsize || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
+ inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
+ return -ENOEXEC;
+ }
+ if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) &&
+ (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) {
+ printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n");
+ return -ENOEXEC;
+ }
+
+ if (N_FLAGS(ex)) return -ENOEXEC;
+
+ /* For QMAGIC, the starting address is 0x20 into the page. We mask
+ this off to get the starting address for the page */
+
+ start_addr = ex.a_entry & 0xfffff000;
+
+ /* Now use mmap to map the library into memory. */
+ error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+ N_TXTOFF(ex));
+ if (error != start_addr)
+ return error;
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len)
+ do_mmap(NULL, start_addr + len, bss-len,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_PRIVATE|MAP_FIXED, 0);
+ return 0;
+}
diff --git a/fs/ext/Makefile b/fs/ext/Makefile
new file mode 100644
index 000000000..5e23319c8
--- /dev/null
+++ b/fs/ext/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux ext-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= freelists.o truncate.o namei.o inode.o \
+ file.o dir.o symlink.o fsync.o
+
+ext.o: $(OBJS)
+ $(LD) -r -o ext.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/ext/dir.c b/fs/ext/dir.c
new file mode 100644
index 000000000..10e30fafa
--- /dev/null
+++ b/fs/ext/dir.c
@@ -0,0 +1,131 @@
+/*
+ * linux/fs/ext/dir.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/ext_fs.h>
+#include <linux/stat.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int ext_dir_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ return -EISDIR;
+}
+
+static int ext_readdir(struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations ext_dir_operations = {
+ NULL, /* lseek - default */
+ ext_dir_read, /* read */
+ NULL, /* write - bad */
+ ext_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync /* fsync */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations ext_dir_inode_operations = {
+ &ext_dir_operations, /* default directory file-ops */
+ ext_create, /* create */
+ ext_lookup, /* lookup */
+ ext_link, /* link */
+ ext_unlink, /* unlink */
+ ext_symlink, /* symlink */
+ ext_mkdir, /* mkdir */
+ ext_rmdir, /* rmdir */
+ ext_mknod, /* mknod */
+ ext_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ ext_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int ext_readdir(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ unsigned int i;
+ unsigned int ret;
+ off_t offset;
+ char c;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ if ((filp->f_pos & 7) != 0)
+ return -EBADF;
+ ret = 0;
+ while (!ret && filp->f_pos < inode->i_size) {
+ offset = filp->f_pos & 1023;
+ bh = ext_bread(inode,(filp->f_pos)>>BLOCK_SIZE_BITS,0);
+ if (!bh) {
+ filp->f_pos += 1024-offset;
+ continue;
+ }
+ for (i = 0; i < 1024 && i < offset; ) {
+ de = (struct ext_dir_entry *) (bh->b_data + i);
+ if (!de->rec_len)
+ break;
+ i += de->rec_len;
+ }
+ offset = i;
+ de = (struct ext_dir_entry *) (offset + bh->b_data);
+ while (!ret && offset < 1024 && filp->f_pos < inode->i_size) {
+ if (de->rec_len < 8 || de->rec_len % 8 != 0 ||
+ de->rec_len < de->name_len + 8 ||
+ (de->rec_len + (off_t) filp->f_pos - 1) / 1024 > ((off_t) filp->f_pos / 1024)) {
+ printk ("ext_readdir: bad dir entry, skipping\n");
+ printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n",
+ inode->i_dev, inode->i_ino, offset, de->rec_len, de->name_len);
+ filp->f_pos += 1024-offset;
+ if (filp->f_pos > inode->i_size)
+ filp->f_pos = inode->i_size;
+ continue;
+ }
+ offset += de->rec_len;
+ filp->f_pos += de->rec_len;
+ if (de->inode) {
+ for (i = 0; i < de->name_len; i++)
+ if ((c = de->name[i]) != 0)
+ put_fs_byte(c,i+dirent->d_name);
+ else
+ break;
+ if (i) {
+ put_fs_long(de->inode,&dirent->d_ino);
+ put_fs_byte(0,i+dirent->d_name);
+ put_fs_word(i,&dirent->d_reclen);
+ ret = ROUND_UP(NAME_OFFSET(dirent)+i+1);
+ break;
+ }
+ }
+ de = (struct ext_dir_entry *) ((char *) de
+ + de->rec_len);
+ }
+ brelse(bh);
+ }
+ return ret;
+}
diff --git a/fs/ext/file.c b/fs/ext/file.c
new file mode 100644
index 000000000..f32cdd898
--- /dev/null
+++ b/fs/ext/file.c
@@ -0,0 +1,258 @@
+/*
+ * linux/fs/ext/file.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/file.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#include <linux/fs.h>
+#include <linux/ext_fs.h>
+
+static int ext_file_read(struct inode *, struct file *, char *, int);
+static int ext_file_write(struct inode *, struct file *, char *, int);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ext filesystem.
+ */
+static struct file_operations ext_file_operations = {
+ NULL, /* lseek - default */
+ ext_file_read, /* read */
+ ext_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ ext_sync_file /* fsync */
+};
+
+struct inode_operations ext_file_inode_operations = {
+ &ext_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ ext_bmap, /* bmap */
+ ext_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int ext_file_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int read,left,chars;
+ int block, blocks, offset;
+ int bhrequest, uptodate;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+ unsigned int size;
+
+ if (!inode) {
+ printk("ext_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("ext_file_read: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ offset = filp->f_pos;
+ size = inode->i_size;
+ if (offset > size)
+ left = 0;
+ else
+ left = size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = offset >> BLOCK_SIZE_BITS;
+ offset &= BLOCK_SIZE-1;
+ size = (size + (BLOCK_SIZE-1)) >> BLOCK_SIZE_BITS;
+ blocks = (left + offset + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ if(blocks < read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9))
+ blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9);
+ if (block + blocks > size)
+ blocks = size - block;
+ }
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches. */
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (blocks) {
+ --blocks;
+ *bhb = ext_getblk(inode, block++, 0);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if (uptodate)
+ break;
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest)
+ ll_rw_block(READ, bhrequest, bhreq);
+
+ do { /* Finish off all I/O that has actually completed */
+ if (*bhe) {
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < BLOCK_SIZE - offset)
+ chars = left;
+ else
+ chars = BLOCK_SIZE - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-->0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+ return read;
+}
+
+static int ext_file_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ off_t pos;
+ int written,c;
+ struct buffer_head * bh;
+ char * p;
+
+ if (!inode) {
+ printk("ext_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("ext_file_write: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+/*
+ * ok, append may not work when many processes are writing at the same time
+ * but so what. That way leads to madness anyway.
+ */
+ if (filp->f_flags & O_APPEND)
+ pos = inode->i_size;
+ else
+ pos = filp->f_pos;
+ written = 0;
+ while (written<count) {
+ bh = ext_getblk(inode,pos/BLOCK_SIZE,1);
+ if (!bh) {
+ if (!written)
+ written = -ENOSPC;
+ break;
+ }
+ c = BLOCK_SIZE - (pos % BLOCK_SIZE);
+ if (c > count-written)
+ c = count-written;
+ if (c != BLOCK_SIZE && !bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ if (!written)
+ written = -EIO;
+ break;
+ }
+ }
+ p = (pos % BLOCK_SIZE) + bh->b_data;
+ pos += c;
+ if (pos > inode->i_size) {
+ inode->i_size = pos;
+ inode->i_dirt = 1;
+ }
+ written += c;
+ memcpy_fromfs(p,buf,c);
+ buf += c;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ brelse(bh);
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ filp->f_pos = pos;
+ inode->i_dirt = 1;
+ return written;
+}
diff --git a/fs/ext/freelists.c b/fs/ext/freelists.c
new file mode 100644
index 000000000..29c4c4289
--- /dev/null
+++ b/fs/ext/freelists.c
@@ -0,0 +1,341 @@
+/*
+ * linux/fs/ext/freelists.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ */
+
+/* freelists.c contains the code that handles the inode and block free lists */
+
+
+/*
+
+ The free blocks are managed by a linked list. The super block contains the
+ number of the first free block. This block contains 254 numbers of other
+ free blocks and the number of the next block in the list.
+
+ When an ext fs is mounted, the number of the first free block is stored
+ in s->u.ext_sb.s_firstfreeblocknumber and the block header is stored in
+ s->u.ext_sb.s_firstfreeblock. u.ext_sb.s_freeblockscount contains the count
+ of free blocks.
+
+ The free inodes are also managed by a linked list in a similar way. The
+ super block contains the number of the first free inode. This inode contains
+ 14 numbers of other free inodes and the number of the next inode in the list.
+
+ The number of the first free inode is stored in
+ s->u.ext_sb.s_firstfreeinodenumber and the header of the block containing
+ the inode is stored in s->u.ext_sb.s_firstfreeinodeblock.
+ u.ext_sb.s_freeinodescount contains the count of free inodes.
+
+*/
+
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/stat.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+void ext_free_block(struct super_block * sb, int block)
+{
+ struct buffer_head * bh;
+ struct ext_free_block * efb;
+
+ if (!sb) {
+ printk("trying to free block on non-existent device\n");
+ return;
+ }
+ lock_super (sb);
+ if (block < sb->u.ext_sb.s_firstdatazone ||
+ block >= sb->u.ext_sb.s_nzones) {
+ printk("trying to free block not in datazone\n");
+ return;
+ }
+ bh = get_hash_table(sb->s_dev, block, sb->s_blocksize);
+ if (bh)
+ bh->b_dirt=0;
+ brelse(bh);
+ if (sb->u.ext_sb.s_firstfreeblock)
+ efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data;
+ if (!sb->u.ext_sb.s_firstfreeblock || efb->count == 254) {
+#ifdef EXTFS_DEBUG
+printk("ext_free_block: block full, skipping to %d\n", block);
+#endif
+ if (sb->u.ext_sb.s_firstfreeblock)
+ brelse (sb->u.ext_sb.s_firstfreeblock);
+ if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev,
+ block, sb->s_blocksize)))
+ panic ("ext_free_block: unable to read block to free\n");
+ efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data;
+ efb->next = sb->u.ext_sb.s_firstfreeblocknumber;
+ efb->count = 0;
+ sb->u.ext_sb.s_firstfreeblocknumber = block;
+ } else {
+ efb->free[efb->count++] = block;
+ }
+ sb->u.ext_sb.s_freeblockscount ++;
+ sb->s_dirt = 1;
+ mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1);
+ unlock_super (sb);
+ return;
+}
+
+int ext_new_block(struct super_block * sb)
+{
+ struct buffer_head * bh;
+ struct ext_free_block * efb;
+ int j;
+
+ if (!sb) {
+ printk("trying to get new block from non-existent device\n");
+ return 0;
+ }
+ if (!sb->u.ext_sb.s_firstfreeblock)
+ return 0;
+ lock_super (sb);
+ efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data;
+ if (efb->count) {
+ j = efb->free[--efb->count];
+ mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1);
+ } else {
+#ifdef EXTFS_DEBUG
+printk("ext_new_block: block empty, skipping to %d\n", efb->next);
+#endif
+ j = sb->u.ext_sb.s_firstfreeblocknumber;
+ sb->u.ext_sb.s_firstfreeblocknumber = efb->next;
+ brelse (sb->u.ext_sb.s_firstfreeblock);
+ if (!sb->u.ext_sb.s_firstfreeblocknumber) {
+ sb->u.ext_sb.s_firstfreeblock = NULL;
+ } else {
+ if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev,
+ sb->u.ext_sb.s_firstfreeblocknumber,
+ sb->s_blocksize)))
+ panic ("ext_new_block: unable to read next free block\n");
+ }
+ }
+ if (j < sb->u.ext_sb.s_firstdatazone || j > sb->u.ext_sb.s_nzones) {
+ printk ("ext_new_block: blk = %d\n", j);
+ printk("allocating block not in data zone\n");
+ return 0;
+ }
+ sb->u.ext_sb.s_freeblockscount --;
+ sb->s_dirt = 1;
+
+ if (!(bh=getblk(sb->s_dev, j, sb->s_blocksize))) {
+ printk("new_block: cannot get block");
+ return 0;
+ }
+ memset(bh->b_data, 0, BLOCK_SIZE);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+#ifdef EXTFS_DEBUG
+printk("ext_new_block: allocating block %d\n", j);
+#endif
+ unlock_super (sb);
+ return j;
+}
+
+unsigned long ext_count_free_blocks(struct super_block *sb)
+{
+#ifdef EXTFS_DEBUG
+ struct buffer_head * bh;
+ struct ext_free_block * efb;
+ unsigned long count, block;
+
+ lock_super (sb);
+ if (!sb->u.ext_sb.s_firstfreeblock)
+ count = 0;
+ else {
+ efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data;
+ count = efb->count + 1;
+ block = efb->next;
+ while (block) {
+ if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) {
+ printk ("ext_count_free: error while reading free blocks list\n");
+ block = 0;
+ } else {
+ efb = (struct ext_free_block *) bh->b_data;
+ count += efb->count + 1;
+ block = efb->next;
+ brelse (bh);
+ }
+ }
+ }
+printk("ext_count_free_blocks: stored = %d, computed = %d\n",
+ sb->u.ext_sb.s_freeblockscount, count);
+ unlock_super (sb);
+ return count;
+#else
+ return sb->u.ext_sb.s_freeblockscount;
+#endif
+}
+
+void ext_free_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct ext_free_inode * efi;
+ struct super_block * sb;
+ unsigned long block;
+ unsigned long ino;
+ dev_t dev;
+
+ if (!inode)
+ return;
+ if (!inode->i_dev) {
+ printk("free_inode: inode has no device\n");
+ return;
+ }
+ if (inode->i_count != 1) {
+ printk("free_inode: inode has count=%d\n",inode->i_count);
+ return;
+ }
+ if (inode->i_nlink) {
+ printk("free_inode: inode has nlink=%d\n",inode->i_nlink);
+ return;
+ }
+ if (!inode->i_sb) {
+ printk("free_inode: inode on non-existent device\n");
+ return;
+ }
+ sb = inode->i_sb;
+ ino = inode->i_ino;
+ dev = inode->i_dev;
+ clear_inode(inode);
+ lock_super (sb);
+ if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) {
+ printk("free_inode: inode 0 or non-existent inode\n");
+ unlock_super (sb);
+ return;
+ }
+ if (sb->u.ext_sb.s_firstfreeinodeblock)
+ efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) +
+ (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK;
+ if (!sb->u.ext_sb.s_firstfreeinodeblock || efi->count == 14) {
+#ifdef EXTFS_DEBUG
+printk("ext_free_inode: inode full, skipping to %d\n", ino);
+#endif
+ if (sb->u.ext_sb.s_firstfreeinodeblock)
+ brelse (sb->u.ext_sb.s_firstfreeinodeblock);
+ block = 2 + (ino - 1) / EXT_INODES_PER_BLOCK;
+ if (!(bh = bread(dev, block, sb->s_blocksize)))
+ panic("ext_free_inode: unable to read inode block\n");
+ efi = ((struct ext_free_inode *) bh->b_data) +
+ (ino - 1) % EXT_INODES_PER_BLOCK;
+ efi->next = sb->u.ext_sb.s_firstfreeinodenumber;
+ efi->count = 0;
+ sb->u.ext_sb.s_firstfreeinodenumber = ino;
+ sb->u.ext_sb.s_firstfreeinodeblock = bh;
+ } else {
+ efi->free[efi->count++] = ino;
+ }
+ sb->u.ext_sb.s_freeinodescount ++;
+ sb->s_dirt = 1;
+ mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1);
+ unlock_super (sb);
+}
+
+struct inode * ext_new_inode(const struct inode * dir)
+{
+ struct super_block * sb;
+ struct inode * inode;
+ struct ext_free_inode * efi;
+ unsigned long block;
+ int j;
+
+ if (!dir || !(inode=get_empty_inode()))
+ return NULL;
+ sb = dir->i_sb;
+ inode->i_sb = sb;
+ inode->i_flags = sb->s_flags;
+ if (!sb->u.ext_sb.s_firstfreeinodeblock)
+ return 0;
+ lock_super (sb);
+ efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) +
+ (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK;
+ if (efi->count) {
+ j = efi->free[--efi->count];
+ mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1);
+ } else {
+#ifdef EXTFS_DEBUG
+printk("ext_free_inode: inode empty, skipping to %d\n", efi->next);
+#endif
+ j = sb->u.ext_sb.s_firstfreeinodenumber;
+ if (efi->next > sb->u.ext_sb.s_ninodes) {
+ printk ("efi->next = %ld\n", efi->next);
+ panic ("ext_new_inode: bad inode number in free list\n");
+ }
+ sb->u.ext_sb.s_firstfreeinodenumber = efi->next;
+ block = 2 + (((unsigned long) efi->next) - 1) / EXT_INODES_PER_BLOCK;
+ brelse (sb->u.ext_sb.s_firstfreeinodeblock);
+ if (!sb->u.ext_sb.s_firstfreeinodenumber) {
+ sb->u.ext_sb.s_firstfreeinodeblock = NULL;
+ } else {
+ if (!(sb->u.ext_sb.s_firstfreeinodeblock =
+ bread(sb->s_dev, block, sb->s_blocksize)))
+ panic ("ext_new_inode: unable to read next free inode block\n");
+ }
+ }
+ sb->u.ext_sb.s_freeinodescount --;
+ sb->s_dirt = 1;
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_dev = sb->s_dev;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_dirt = 1;
+ inode->i_ino = j;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
+ insert_inode_hash(inode);
+#ifdef EXTFS_DEBUG
+printk("ext_new_inode : allocating inode %d\n", inode->i_ino);
+#endif
+ unlock_super (sb);
+ return inode;
+}
+
+unsigned long ext_count_free_inodes(struct super_block *sb)
+{
+#ifdef EXTFS_DEBUG
+ struct buffer_head * bh;
+ struct ext_free_inode * efi;
+ unsigned long count, block, ino;
+
+ lock_super (sb);
+ if (!sb->u.ext_sb.s_firstfreeinodeblock)
+ count = 0;
+ else {
+ efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) +
+ ((sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK);
+ count = efi->count + 1;
+ ino = efi->next;
+ while (ino) {
+ if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) {
+ printk ("u.ext_sb.s_firstfreeinodenumber = %d, ino = %d\n",
+ (int) sb->u.ext_sb.s_firstfreeinodenumber,ino);
+ panic ("ext_count_fre_inodes: bad inode number in free list\n");
+ }
+ block = 2 + ((ino - 1) / EXT_INODES_PER_BLOCK);
+ if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) {
+ printk ("ext_count_free_inodes: error while reading free inodes list\n");
+ block = 0;
+ } else {
+ efi = ((struct ext_free_inode *) bh->b_data) +
+ ((ino - 1) % EXT_INODES_PER_BLOCK);
+ count += efi->count + 1;
+ ino = efi->next;
+ brelse (bh);
+ }
+ }
+ }
+printk("ext_count_free_inodes: stored = %d, computed = %d\n",
+ sb->u.ext_sb.s_freeinodescount, count);
+ unlock_super (sb);
+ return count;
+#else
+ return sb->u.ext_sb.s_freeinodescount;
+#endif
+}
diff --git a/fs/ext/fsync.c b/fs/ext/fsync.c
new file mode 100644
index 000000000..bb20383cc
--- /dev/null
+++ b/fs/ext/fsync.c
@@ -0,0 +1,185 @@
+
+/*
+ * linux/fs/ext/fsync.c
+ *
+ * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ * from
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ * from
+ * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * extfs fsync primitive
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/locks.h>
+
+#include <linux/fs.h>
+#include <linux/ext_fs.h>
+
+
+#define blocksize BLOCK_SIZE
+#define addr_per_block 256
+
+static int sync_block (struct inode * inode, unsigned long * block, int wait)
+{
+ struct buffer_head * bh;
+ int tmp;
+
+ if (!*block)
+ return 0;
+ tmp = *block;
+ bh = get_hash_table(inode->i_dev, *block, blocksize);
+ if (!bh)
+ return 0;
+ if (*block != tmp) {
+ brelse (bh);
+ return 1;
+ }
+ if (wait && bh->b_req && !bh->b_uptodate) {
+ brelse(bh);
+ return -1;
+ }
+ if (wait || !bh->b_uptodate || !bh->b_dirt)
+ {
+ brelse(bh);
+ return 0;
+ }
+ ll_rw_block(WRITE, 1, &bh);
+ bh->b_count--;
+ return 0;
+}
+
+static int sync_iblock (struct inode * inode, unsigned long * iblock,
+ struct buffer_head **bh, int wait)
+{
+ int rc, tmp;
+
+ *bh = NULL;
+ tmp = *iblock;
+ if (!tmp)
+ return 0;
+ rc = sync_block (inode, iblock, wait);
+ if (rc)
+ return rc;
+ *bh = bread(inode->i_dev, tmp, blocksize);
+ if (tmp != *iblock) {
+ brelse(*bh);
+ *bh = NULL;
+ return 1;
+ }
+ if (!*bh)
+ return -1;
+ return 0;
+}
+
+
+static int sync_direct(struct inode *inode, int wait)
+{
+ int i;
+ int rc, err = 0;
+
+ for (i = 0; i < 9; i++) {
+ rc = sync_block (inode, inode->u.ext_i.i_data + i, wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ return err;
+}
+
+static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait)
+{
+ int i;
+ struct buffer_head * ind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, iblock, &ind_bh, wait);
+ if (rc || !ind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_block (inode,
+ ((unsigned long *) ind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(ind_bh);
+ return err;
+}
+
+static int sync_dindirect(struct inode *inode, unsigned long *diblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * dind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, diblock, &dind_bh, wait);
+ if (rc || !dind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_indirect (inode,
+ ((unsigned long *) dind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(dind_bh);
+ return err;
+}
+
+static int sync_tindirect(struct inode *inode, unsigned long *tiblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * tind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, tiblock, &tind_bh, wait);
+ if (rc || !tind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_dindirect (inode,
+ ((unsigned long *) tind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(tind_bh);
+ return err;
+}
+
+int ext_sync_file(struct inode * inode, struct file *file)
+{
+ int wait, err = 0;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+ for (wait=0; wait<=1; wait++)
+ {
+ err |= sync_direct(inode, wait);
+ err |= sync_indirect(inode, inode->u.ext_i.i_data+9, wait);
+ err |= sync_dindirect(inode, inode->u.ext_i.i_data+10, wait);
+ err |= sync_tindirect(inode, inode->u.ext_i.i_data+11, wait);
+ }
+ err |= ext_sync_inode (inode);
+ return (err < 0) ? -EIO : 0;
+}
diff --git a/fs/ext/inode.c b/fs/ext/inode.c
new file mode 100644
index 000000000..b3ca2e2cf
--- /dev/null
+++ b/fs/ext/inode.c
@@ -0,0 +1,444 @@
+/*
+ * linux/fs/ext/inode.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+void ext_put_inode(struct inode *inode)
+{
+ if (inode->i_nlink)
+ return;
+ inode->i_size = 0;
+ ext_truncate(inode);
+ ext_free_inode(inode);
+}
+
+void ext_put_super(struct super_block *sb)
+{
+
+ lock_super(sb);
+ sb->s_dev = 0;
+ if (sb->u.ext_sb.s_firstfreeinodeblock)
+ brelse (sb->u.ext_sb.s_firstfreeinodeblock);
+ if (sb->u.ext_sb.s_firstfreeblock)
+ brelse (sb->u.ext_sb.s_firstfreeblock);
+ unlock_super(sb);
+ return;
+}
+
+static struct super_operations ext_sops = {
+ ext_read_inode,
+ NULL,
+ ext_write_inode,
+ ext_put_inode,
+ ext_put_super,
+ ext_write_super,
+ ext_statfs,
+ NULL
+};
+
+struct super_block *ext_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ struct ext_super_block *es;
+ int dev = s->s_dev,block;
+
+ lock_super(s);
+ set_blocksize(dev, BLOCK_SIZE);
+ if (!(bh = bread(dev, 1, BLOCK_SIZE))) {
+ s->s_dev=0;
+ unlock_super(s);
+ printk("EXT-fs: unable to read superblock\n");
+ return NULL;
+ }
+ es = (struct ext_super_block *) bh->b_data;
+ s->s_blocksize = 1024;
+ s->s_blocksize_bits = 10;
+ s->u.ext_sb.s_ninodes = es->s_ninodes;
+ s->u.ext_sb.s_nzones = es->s_nzones;
+ s->u.ext_sb.s_firstdatazone = es->s_firstdatazone;
+ s->u.ext_sb.s_log_zone_size = es->s_log_zone_size;
+ s->u.ext_sb.s_max_size = es->s_max_size;
+ s->s_magic = es->s_magic;
+ s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock;
+ s->u.ext_sb.s_freeblockscount = es->s_freeblockscount;
+ s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode;
+ s->u.ext_sb.s_freeinodescount = es->s_freeinodescount;
+ brelse(bh);
+ if (s->s_magic != EXT_SUPER_MAGIC) {
+ s->s_dev = 0;
+ unlock_super(s);
+ if (!silent)
+ printk("VFS: Can't find an extfs filesystem on dev 0x%04x.\n",
+ dev);
+ return NULL;
+ }
+ if (!s->u.ext_sb.s_firstfreeblocknumber)
+ s->u.ext_sb.s_firstfreeblock = NULL;
+ else
+ if (!(s->u.ext_sb.s_firstfreeblock = bread(dev,
+ s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) {
+ printk("ext_read_super: unable to read first free block\n");
+ s->s_dev = 0;
+ unlock_super(s);
+ return NULL;
+ }
+ if (!s->u.ext_sb.s_firstfreeinodenumber)
+ s->u.ext_sb.s_firstfreeinodeblock = NULL;
+ else {
+ block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK;
+ if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) {
+ printk("ext_read_super: unable to read first free inode block\n");
+ brelse(s->u.ext_sb.s_firstfreeblock);
+ s->s_dev = 0;
+ unlock_super (s);
+ return NULL;
+ }
+ }
+ unlock_super(s);
+ /* set up enough so that it can read an inode */
+ s->s_dev = dev;
+ s->s_op = &ext_sops;
+ if (!(s->s_mounted = iget(s,EXT_ROOT_INO))) {
+ s->s_dev=0;
+ printk("EXT-fs: get root inode failed\n");
+ return NULL;
+ }
+ return s;
+}
+
+void ext_write_super (struct super_block *sb)
+{
+ struct buffer_head * bh;
+ struct ext_super_block * es;
+
+ if (!(bh = bread(sb->s_dev, 1, BLOCK_SIZE))) {
+ printk ("ext_write_super: bread failed\n");
+ return;
+ }
+ es = (struct ext_super_block *) bh->b_data;
+ es->s_firstfreeblock = sb->u.ext_sb.s_firstfreeblocknumber;
+ es->s_freeblockscount = sb->u.ext_sb.s_freeblockscount;
+ es->s_firstfreeinode = sb->u.ext_sb.s_firstfreeinodenumber;
+ es->s_freeinodescount = sb->u.ext_sb.s_freeinodescount;
+ mark_buffer_dirty(bh, 1);
+ brelse (bh);
+ sb->s_dirt = 0;
+}
+
+void ext_statfs (struct super_block *sb, struct statfs *buf)
+{
+ long tmp;
+
+ put_fs_long(EXT_SUPER_MAGIC, &buf->f_type);
+ put_fs_long(1024, &buf->f_bsize);
+ put_fs_long(sb->u.ext_sb.s_nzones << sb->u.ext_sb.s_log_zone_size,
+ &buf->f_blocks);
+ tmp = ext_count_free_blocks(sb);
+ put_fs_long(tmp, &buf->f_bfree);
+ put_fs_long(tmp, &buf->f_bavail);
+ put_fs_long(sb->u.ext_sb.s_ninodes, &buf->f_files);
+ put_fs_long(ext_count_free_inodes(sb), &buf->f_ffree);
+ put_fs_long(EXT_NAME_LEN, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */
+}
+
+#define inode_bmap(inode,nr) ((inode)->u.ext_i.i_data[(nr)])
+
+static int block_bmap(struct buffer_head * bh, int nr)
+{
+ int tmp;
+
+ if (!bh)
+ return 0;
+ tmp = ((unsigned long *) bh->b_data)[nr];
+ brelse(bh);
+ return tmp;
+}
+
+int ext_bmap(struct inode * inode,int block)
+{
+ int i;
+
+ if (block<0) {
+ printk("ext_bmap: block<0");
+ return 0;
+ }
+ if (block >= 9+256+256*256+256*256*256) {
+ printk("ext_bmap: block>big");
+ return 0;
+ }
+ if (block<9)
+ return inode_bmap(inode,block);
+ block -= 9;
+ if (block<256) {
+ i = inode_bmap(inode,9);
+ if (!i)
+ return 0;
+ return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block);
+ }
+ block -= 256;
+ if (block<256*256) {
+ i = inode_bmap(inode,10);
+ if (!i)
+ return 0;
+ i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>8);
+ if (!i)
+ return 0;
+ return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255);
+ }
+ block -= 256*256;
+ i = inode_bmap(inode,11);
+ if (!i)
+ return 0;
+ i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>16);
+ if (!i)
+ return 0;
+ i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),(block>>8) & 255);
+ if (!i)
+ return 0;
+ return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255);
+}
+
+static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create)
+{
+ int tmp;
+ unsigned long * p;
+ struct buffer_head * result;
+
+ p = inode->u.ext_i.i_data + nr;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp == *p)
+ return result;
+ brelse(result);
+ goto repeat;
+ }
+ if (!create)
+ return NULL;
+ tmp = ext_new_block(inode->i_sb);
+ if (!tmp)
+ return NULL;
+ result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+ if (*p) {
+ ext_free_block(inode->i_sb,tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *p = tmp;
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return result;
+}
+
+static struct buffer_head * block_getblk(struct inode * inode,
+ struct buffer_head * bh, int nr, int create)
+{
+ int tmp;
+ unsigned long * p;
+ struct buffer_head * result;
+
+ if (!bh)
+ return NULL;
+ if (!bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ return NULL;
+ }
+ }
+ p = nr + (unsigned long *) bh->b_data;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+ if (tmp == *p) {
+ brelse(bh);
+ return result;
+ }
+ brelse(result);
+ goto repeat;
+ }
+ if (!create) {
+ brelse(bh);
+ return NULL;
+ }
+ tmp = ext_new_block(inode->i_sb);
+ if (!tmp) {
+ brelse(bh);
+ return NULL;
+ }
+ result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+ if (*p) {
+ ext_free_block(inode->i_sb,tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *p = tmp;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return result;
+}
+
+struct buffer_head * ext_getblk(struct inode * inode, int block, int create)
+{
+ struct buffer_head * bh;
+
+ if (block<0) {
+ printk("ext_getblk: block<0\n");
+ return NULL;
+ }
+ if (block >= 9+256+256*256+256*256*256) {
+ printk("ext_getblk: block>big\n");
+ return NULL;
+ }
+ if (block<9)
+ return inode_getblk(inode,block,create);
+ block -= 9;
+ if (block<256) {
+ bh = inode_getblk(inode,9,create);
+ return block_getblk(inode,bh,block,create);
+ }
+ block -= 256;
+ if (block<256*256) {
+ bh = inode_getblk(inode,10,create);
+ bh = block_getblk(inode,bh,block>>8,create);
+ return block_getblk(inode,bh,block & 255,create);
+ }
+ block -= 256*256;
+ bh = inode_getblk(inode,11,create);
+ bh = block_getblk(inode,bh,block>>16,create);
+ bh = block_getblk(inode,bh,(block>>8) & 255,create);
+ return block_getblk(inode,bh,block & 255,create);
+}
+
+struct buffer_head * ext_bread(struct inode * inode, int block, int create)
+{
+ struct buffer_head * bh;
+
+ bh = ext_getblk(inode,block,create);
+ if (!bh || bh->b_uptodate)
+ return bh;
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+void ext_read_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct ext_inode * raw_inode;
+ int block;
+
+ block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK;
+ if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE)))
+ panic("unable to read i-node block");
+ raw_inode = ((struct ext_inode *) bh->b_data) +
+ (inode->i_ino-1)%EXT_INODES_PER_BLOCK;
+ inode->i_mode = raw_inode->i_mode;
+ inode->i_uid = raw_inode->i_uid;
+ inode->i_gid = raw_inode->i_gid;
+ inode->i_nlink = raw_inode->i_nlinks;
+ inode->i_size = raw_inode->i_size;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ inode->i_rdev = raw_inode->i_zone[0];
+ else for (block = 0; block < 12; block++)
+ inode->u.ext_i.i_data[block] = raw_inode->i_zone[block];
+ brelse(bh);
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &ext_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &ext_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &ext_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+}
+
+static struct buffer_head * ext_update_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct ext_inode * raw_inode;
+ int block;
+
+ block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK;
+ if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE)))
+ panic("unable to read i-node block");
+ raw_inode = ((struct ext_inode *)bh->b_data) +
+ (inode->i_ino-1)%EXT_INODES_PER_BLOCK;
+ raw_inode->i_mode = inode->i_mode;
+ raw_inode->i_uid = inode->i_uid;
+ raw_inode->i_gid = inode->i_gid;
+ raw_inode->i_nlinks = inode->i_nlink;
+ raw_inode->i_size = inode->i_size;
+ raw_inode->i_time = inode->i_mtime;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_zone[0] = inode->i_rdev;
+ else for (block = 0; block < 12; block++)
+ raw_inode->i_zone[block] = inode->u.ext_i.i_data[block];
+ mark_buffer_dirty(bh, 1);
+ inode->i_dirt=0;
+ return bh;
+}
+
+void ext_write_inode(struct inode * inode)
+{
+ struct buffer_head *bh;
+ bh = ext_update_inode (inode);
+ brelse(bh);
+}
+
+int ext_sync_inode (struct inode *inode)
+{
+ int err = 0;
+ struct buffer_head *bh;
+
+ bh = ext_update_inode(inode);
+ if (bh && bh->b_dirt)
+ {
+ ll_rw_block(WRITE, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_req && !bh->b_uptodate)
+ {
+ printk ("IO error syncing ext inode [%04x:%08lx]\n",
+ inode->i_dev, inode->i_ino);
+ err = -1;
+ }
+ }
+ else if (!bh)
+ err = -1;
+ brelse (bh);
+ return err;
+}
+
diff --git a/fs/ext/namei.c b/fs/ext/namei.c
new file mode 100644
index 000000000..85a411e94
--- /dev/null
+++ b/fs/ext/namei.c
@@ -0,0 +1,893 @@
+/*
+ * linux/fs/ext/namei.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/errno.h>
+
+#include <asm/segment.h>
+
+/*
+ * comment out this line if you want names > EXT_NAME_LEN chars to be
+ * truncated. Else they will be disallowed.
+ */
+/* #define NO_TRUNCATE */
+
+/*
+ * EXT_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a power of 2 and must be greater or equal than 8
+ * because a directory entry needs 8 bytes for its fixed part
+ * (4 bytes for the inode, 2 bytes for the entry length and 2 bytes
+ * for the name length)
+ */
+#define EXT_DIR_PAD 8
+
+/*
+ *
+ * EXT_DIR_MIN_SIZE is the minimal size of a directory entry
+ *
+ * During allocations, a directory entry is split into 2 ones
+ * *ONLY* if the size of the unused part is greater than or
+ * equal to EXT_DIR_MIN_SIZE
+ */
+#define EXT_DIR_MIN_SIZE 12
+
+/*
+ * ok, we cannot use strncmp, as the name is not in our data space.
+ * Thus we'll have to use ext_match. No big problem. Match also makes
+ * some sanity tests.
+ *
+ * NOTE! unlike strncmp, ext_match returns 1 for success, 0 for failure.
+ */
+static int ext_match(int len,const char * name,struct ext_dir_entry * de)
+{
+ if (!de || !de->inode || len > EXT_NAME_LEN)
+ return 0;
+ /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
+ if (!len && (de->name[0]=='.') && (de->name[1]=='\0'))
+ return 1;
+ if (len != de->name_len)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+/*
+ * ext_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ *
+ * addition for the ext file system : this function returns the previous
+ * and next directory entries in the parameters prev_dir and next_dir
+ */
+static struct buffer_head * ext_find_entry(struct inode * dir,
+ const char * name, int namelen, struct ext_dir_entry ** res_dir,
+ struct ext_dir_entry ** prev_dir, struct ext_dir_entry ** next_dir)
+{
+ long offset;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de;
+
+ *res_dir = NULL;
+ if (!dir)
+ return NULL;
+#ifdef NO_TRUNCATE
+ if (namelen > EXT_NAME_LEN)
+ return NULL;
+#else
+ if (namelen > EXT_NAME_LEN)
+ namelen = EXT_NAME_LEN;
+#endif
+ bh = ext_bread(dir,0,0);
+ if (!bh)
+ return NULL;
+ if (prev_dir)
+ *prev_dir = NULL;
+ if (next_dir)
+ *next_dir = NULL;
+ offset = 0;
+ de = (struct ext_dir_entry *) bh->b_data;
+ while (offset < dir->i_size) {
+ if ((char *)de >= BLOCK_SIZE+bh->b_data) {
+ brelse(bh);
+ bh = NULL;
+ bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0);
+ if (!bh)
+ continue;
+ de = (struct ext_dir_entry *) bh->b_data;
+ if (prev_dir)
+ *prev_dir = NULL;
+ }
+ if (de->rec_len < 8 || de->rec_len % 8 != 0 ||
+ de->rec_len < de->name_len + 8 ||
+ (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) {
+ printk ("ext_find_entry: bad dir entry\n");
+ printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n",
+ dir->i_dev, dir->i_ino, offset, de->rec_len, de->name_len);
+ de = (struct ext_dir_entry *) (bh->b_data+BLOCK_SIZE);
+ offset = ((offset / BLOCK_SIZE) + 1) * BLOCK_SIZE;
+ continue;
+/* brelse (bh);
+ return NULL; */
+ }
+ if (ext_match(namelen,name,de)) {
+ *res_dir = de;
+ if (next_dir)
+ if (offset + de->rec_len < dir->i_size &&
+ ((char *)de) + de->rec_len < BLOCK_SIZE+bh->b_data)
+ *next_dir = (struct ext_dir_entry *)
+ ((char *) de + de->rec_len);
+ else
+ *next_dir = NULL;
+ return bh;
+ }
+ offset += de->rec_len;
+ if (prev_dir)
+ *prev_dir = de;
+ de = (struct ext_dir_entry *) ((char *) de + de->rec_len);
+ }
+ brelse(bh);
+ return NULL;
+}
+
+int ext_lookup(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ int ino;
+ struct ext_dir_entry * de;
+ struct buffer_head * bh;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(bh = ext_find_entry(dir,name,len,&de,NULL,NULL))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = de->inode;
+ brelse(bh);
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+ iput(dir);
+ return 0;
+}
+
+/*
+ * ext_add_entry()
+ *
+ * adds a file entry to the specified directory, using the same
+ * semantics as ext_find_entry(). It returns NULL if it failed.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static struct buffer_head * ext_add_entry(struct inode * dir,
+ const char * name, int namelen, struct ext_dir_entry ** res_dir)
+{
+ int i;
+ long offset;
+ unsigned short rec_len;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de, * de1;
+
+ *res_dir = NULL;
+ if (!dir)
+ return NULL;
+#ifdef NO_TRUNCATE
+ if (namelen > EXT_NAME_LEN)
+ return NULL;
+#else
+ if (namelen > EXT_NAME_LEN)
+ namelen = EXT_NAME_LEN;
+#endif
+ if (!namelen)
+ return NULL;
+ bh = ext_bread(dir,0,0);
+ if (!bh)
+ return NULL;
+ rec_len = ((8 + namelen + EXT_DIR_PAD - 1) / EXT_DIR_PAD) * EXT_DIR_PAD;
+ offset = 0;
+ de = (struct ext_dir_entry *) bh->b_data;
+ while (1) {
+ if ((char *)de >= BLOCK_SIZE+bh->b_data && offset < dir->i_size) {
+#ifdef EXTFS_DEBUG
+printk ("ext_add_entry: skipping to next block\n");
+#endif
+ brelse(bh);
+ bh = NULL;
+ bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0);
+ if (!bh)
+ return NULL;
+ de = (struct ext_dir_entry *) bh->b_data;
+ }
+ if (offset >= dir->i_size) {
+ /* Check that the directory entry fits in the block */
+ if (offset % BLOCK_SIZE == 0 ||
+ (BLOCK_SIZE - (offset % BLOCK_SIZE)) < rec_len) {
+ if ((offset % BLOCK_SIZE) != 0) {
+ /* If the entry does not fit in the
+ block, the remainder of the block
+ becomes an unused entry */
+ de->inode = 0;
+ de->rec_len = BLOCK_SIZE
+ - (offset & (BLOCK_SIZE - 1));
+ de->name_len = 0;
+ offset += de->rec_len;
+ dir->i_size += de->rec_len;
+ dir->i_dirt = 1;
+#if 0
+ dir->i_ctime = CURRENT_TIME;
+#endif
+ mark_buffer_dirty(bh, 1);
+ }
+ brelse (bh);
+ bh = NULL;
+#ifdef EXTFS_DEBUG
+printk ("ext_add_entry : creating next block\n");
+#endif
+ bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,1);
+ if (!bh)
+ return NULL; /* Other thing to do ??? */
+ de = (struct ext_dir_entry *) bh->b_data;
+ }
+ /* Allocate the entry */
+ de->inode=0;
+ de->rec_len = rec_len;
+ dir->i_size += de->rec_len;
+ dir->i_dirt = 1;
+#if 0
+ dir->i_ctime = CURRENT_TIME;
+#endif
+ }
+ if (de->rec_len < 8 || de->rec_len % 4 != 0 ||
+ de->rec_len < de->name_len + 8 ||
+ (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) {
+ printk ("ext_addr_entry: bad dir entry\n");
+ printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n",
+ dir->i_dev, dir->i_ino, offset, de->rec_len, de->name_len);
+ brelse (bh);
+ return NULL;
+ }
+ if (!de->inode && de->rec_len >= rec_len) {
+ if (de->rec_len > rec_len
+ && de->rec_len - rec_len >= EXT_DIR_MIN_SIZE) {
+ /* The found entry is too big : it is split
+ into 2 ones :
+ - the 1st one will be used to hold the name,
+ - the 2nd one is unused */
+ de1 = (struct ext_dir_entry *) ((char *) de + rec_len);
+ de1->inode = 0;
+ de1->rec_len = de->rec_len - rec_len;
+ de1->name_len = 0;
+ de->rec_len = rec_len;
+ }
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ de->name_len = namelen;
+ for (i=0; i < namelen ; i++)
+ de->name[i] = name[i];
+ mark_buffer_dirty(bh, 1);
+ *res_dir = de;
+ return bh;
+ }
+ offset += de->rec_len;
+ de = (struct ext_dir_entry *) ((char *) de + de->rec_len);
+ }
+ brelse(bh);
+ return NULL;
+}
+
+int ext_create(struct inode * dir,const char * name, int len, int mode,
+ struct inode ** result)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ inode = ext_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &ext_file_inode_operations;
+ inode->i_mode = mode;
+ inode->i_dirt = 1;
+ bh = ext_add_entry(dir,name,len,&de);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ *result = inode;
+ return 0;
+}
+
+int ext_mknod(struct inode * dir, const char * name, int len, int mode, int rdev)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de;
+
+ if (!dir)
+ return -ENOENT;
+ bh = ext_find_entry(dir,name,len,&de,NULL,NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ inode = ext_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_uid = current->fsuid;
+ inode->i_mode = mode;
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &ext_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ext_dir_inode_operations;
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ }
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &ext_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (S_ISBLK(mode) || S_ISCHR(mode))
+ inode->i_rdev = rdev;
+#if 0
+ inode->i_mtime = inode->i_atime = CURRENT_TIME;
+#endif
+ inode->i_dirt = 1;
+ bh = ext_add_entry(dir,name,len,&de);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int ext_mkdir(struct inode * dir, const char * name, int len, int mode)
+{
+ struct inode * inode;
+ struct buffer_head * bh, *dir_block;
+ struct ext_dir_entry * de;
+
+ bh = ext_find_entry(dir,name,len,&de,NULL,NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ inode = ext_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &ext_dir_inode_operations;
+ inode->i_size = 2 * 16; /* Each entry is coded on 16 bytes for "." and ".."
+ - 4 bytes for the inode number,
+ - 2 bytes for the record length
+ - 2 bytes for the name length
+ - 8 bytes for the name */
+#if 0
+ inode->i_mtime = inode->i_atime = CURRENT_TIME;
+#endif
+ dir_block = ext_bread(inode,0,1);
+ if (!dir_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de = (struct ext_dir_entry *) dir_block->b_data;
+ de->inode=inode->i_ino;
+ de->rec_len=16;
+ de->name_len=1;
+ strcpy(de->name,".");
+ de = (struct ext_dir_entry *) ((char *) de + de->rec_len);
+ de->inode = dir->i_ino;
+ de->rec_len=16;
+ de->name_len=2;
+ strcpy(de->name,"..");
+ inode->i_nlink = 2;
+ mark_buffer_dirty(dir_block, 1);
+ brelse(dir_block);
+ inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask);
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ inode->i_dirt = 1;
+ bh = ext_add_entry(dir,name,len,&de);
+ if (!bh) {
+ iput(dir);
+ inode->i_nlink=0;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ dir->i_nlink++;
+ dir->i_dirt = 1;
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return 0;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir(struct inode * inode)
+{
+ unsigned long offset;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de, * de1;
+
+ if (inode->i_size < 2 * 12 || !(bh = ext_bread(inode,0,0))) {
+ printk("warning - bad directory on dev %04x\n",inode->i_dev);
+ return 1;
+ }
+ de = (struct ext_dir_entry *) bh->b_data;
+ de1 = (struct ext_dir_entry *) ((char *) de + de->rec_len);
+ if (de->inode != inode->i_ino || !de1->inode ||
+ strcmp(".",de->name) || strcmp("..",de1->name)) {
+ printk("warning - bad directory on dev %04x\n",inode->i_dev);
+ return 1;
+ }
+ offset = de->rec_len + de1->rec_len;
+ de = (struct ext_dir_entry *) ((char *) de1 + de1->rec_len);
+ while (offset < inode->i_size ) {
+ if ((void *) de >= (void *) (bh->b_data+BLOCK_SIZE)) {
+ brelse(bh);
+ bh = ext_bread(inode, offset >> BLOCK_SIZE_BITS,1);
+ if (!bh) {
+ offset += BLOCK_SIZE;
+ continue;
+ }
+ de = (struct ext_dir_entry *) bh->b_data;
+ }
+ if (de->rec_len < 8 || de->rec_len %4 != 0 ||
+ de->rec_len < de->name_len + 8) {
+ printk ("empty_dir: bad dir entry\n");
+ printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n",
+ inode->i_dev, inode->i_ino, offset, de->rec_len, de->name_len);
+ brelse (bh);
+ return 1;
+ }
+ if (de->inode) {
+ brelse(bh);
+ return 0;
+ }
+ offset += de->rec_len;
+ de = (struct ext_dir_entry *) ((char *) de + de->rec_len);
+ }
+ brelse(bh);
+ return 1;
+}
+
+static inline void ext_merge_entries (struct ext_dir_entry * de,
+ struct ext_dir_entry * pde, struct ext_dir_entry * nde)
+{
+ if (nde && !nde->inode)
+ de->rec_len += nde->rec_len;
+ if (pde && !pde->inode)
+ pde->rec_len += de->rec_len;
+}
+
+int ext_rmdir(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de, * pde, * nde;
+
+ inode = NULL;
+ bh = ext_find_entry(dir,name,len,&de,&pde,&nde);
+ retval = -ENOENT;
+ if (!bh)
+ goto end_rmdir;
+ retval = -EPERM;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_rmdir;
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_rmdir;
+ if (inode->i_dev != dir->i_dev)
+ goto end_rmdir;
+ if (inode == dir) /* we may not delete ".", but "../dir" is ok */
+ goto end_rmdir;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -ENOTDIR;
+ goto end_rmdir;
+ }
+ if (!empty_dir(inode)) {
+ retval = -ENOTEMPTY;
+ goto end_rmdir;
+ }
+ if (inode->i_count > 1) {
+ retval = -EBUSY;
+ goto end_rmdir;
+ }
+ if (inode->i_nlink != 2)
+ printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink);
+ de->inode = 0;
+ de->name_len = 0;
+ ext_merge_entries (de, pde, nde);
+ mark_buffer_dirty(bh, 1);
+ inode->i_nlink=0;
+ inode->i_dirt=1;
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt=1;
+ retval = 0;
+end_rmdir:
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return retval;
+}
+
+int ext_unlink(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext_dir_entry * de, * pde, * nde;
+
+ retval = -ENOENT;
+ inode = NULL;
+ bh = ext_find_entry(dir,name,len,&de,&pde,&nde);
+ if (!bh)
+ goto end_unlink;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_unlink;
+ retval = -EPERM;
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_unlink;
+ if (S_ISDIR(inode->i_mode))
+ goto end_unlink;
+ if (!inode->i_nlink) {
+ printk("Deleting nonexistent file (%04x:%ld), %d\n",
+ inode->i_dev,inode->i_ino,inode->i_nlink);
+ inode->i_nlink=1;
+ }
+ de->inode = 0;
+ de->name_len = 0;
+ ext_merge_entries (de, pde, nde);
+ mark_buffer_dirty(bh, 1);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ inode->i_ctime = CURRENT_TIME;
+ dir->i_ctime = dir->i_mtime = inode->i_ctime;
+ dir->i_dirt = 1;
+ retval = 0;
+end_unlink:
+ brelse(bh);
+ iput(inode);
+ iput(dir);
+ return retval;
+}
+
+int ext_symlink(struct inode * dir, const char * name, int len, const char * symname)
+{
+ struct ext_dir_entry * de;
+ struct inode * inode = NULL;
+ struct buffer_head * bh = NULL, * name_block = NULL;
+ int i;
+ char c;
+
+ if (!(inode = ext_new_inode(dir))) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_mode = S_IFLNK | 0777;
+ inode->i_op = &ext_symlink_inode_operations;
+ name_block = ext_bread(inode,0,1);
+ if (!name_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ i = 0;
+ while (i < 1023 && (c = *(symname++)))
+ name_block->b_data[i++] = c;
+ name_block->b_data[i] = 0;
+ mark_buffer_dirty(name_block, 1);
+ brelse(name_block);
+ inode->i_size = i;
+ inode->i_dirt = 1;
+ bh = ext_find_entry(dir,name,len,&de,NULL,NULL);
+ if (bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ bh = ext_add_entry(dir,name,len,&de);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int ext_link(struct inode * oldinode, struct inode * dir, const char * name, int len)
+{
+ struct ext_dir_entry * de;
+ struct buffer_head * bh;
+
+ if (S_ISDIR(oldinode->i_mode)) {
+ iput(oldinode);
+ iput(dir);
+ return -EPERM;
+ }
+ if (oldinode->i_nlink > 32000) {
+ iput(oldinode);
+ iput(dir);
+ return -EMLINK;
+ }
+ bh = ext_find_entry(dir,name,len,&de,NULL,NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ iput(oldinode);
+ return -EEXIST;
+ }
+ bh = ext_add_entry(dir,name,len,&de);
+ if (!bh) {
+ iput(dir);
+ iput(oldinode);
+ return -ENOSPC;
+ }
+ de->inode = oldinode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ oldinode->i_nlink++;
+ oldinode->i_ctime = CURRENT_TIME;
+ oldinode->i_dirt = 1;
+ iput(oldinode);
+ return 0;
+}
+
+static int subdir(struct inode * new_inode, struct inode * old_inode)
+{
+ int ino;
+ int result;
+
+ new_inode->i_count++;
+ result = 0;
+ for (;;) {
+ if (new_inode == old_inode) {
+ result = 1;
+ break;
+ }
+ if (new_inode->i_dev != old_inode->i_dev)
+ break;
+ ino = new_inode->i_ino;
+ if (ext_lookup(new_inode,"..",2,&new_inode))
+ break;
+ if (new_inode->i_ino == ino)
+ break;
+ }
+ iput(new_inode);
+ return result;
+}
+
+#define PARENT_INO(buffer) \
+((struct ext_dir_entry *) ((char *) buffer + \
+((struct ext_dir_entry *) buffer)->rec_len))->inode
+
+#define PARENT_NAME(buffer) \
+((struct ext_dir_entry *) ((char *) buffer + \
+((struct ext_dir_entry *) buffer)->rec_len))->name
+
+/*
+ * rename uses retrying to avoid race-conditions: at least they should be minimal.
+ * it tries to allocate all the blocks, then sanity-checks, and if the sanity-
+ * checks fail, it tries to restart itself again. Very practical - no changes
+ * are done until we know everything works ok.. and then all the changes can be
+ * done in one fell swoop when we have claimed all the buffers needed.
+ *
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int do_ext_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct ext_dir_entry * old_de, * new_de, * pde, * nde;
+ int retval;
+
+ goto start_up;
+try_again:
+ brelse(old_bh);
+ brelse(new_bh);
+ brelse(dir_bh);
+ iput(old_inode);
+ iput(new_inode);
+ current->counter = 0;
+ schedule();
+start_up:
+ old_inode = new_inode = NULL;
+ old_bh = new_bh = dir_bh = NULL;
+ old_bh = ext_find_entry(old_dir,old_name,old_len,&old_de,&pde,&nde);
+ retval = -ENOENT;
+ if (!old_bh)
+ goto end_rename;
+ old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */
+ if (!old_inode)
+ goto end_rename;
+ retval = -EPERM;
+ if ((old_dir->i_mode & S_ISVTX) &&
+ current->fsuid != old_inode->i_uid &&
+ current->fsuid != old_dir->i_uid && !fsuser())
+ goto end_rename;
+ new_bh = ext_find_entry(new_dir,new_name,new_len,&new_de,NULL,NULL);
+ if (new_bh) {
+ new_inode = __iget(new_dir->i_sb, new_de->inode,0); /* don't cross mnt-points */
+ if (!new_inode) {
+ brelse(new_bh);
+ new_bh = NULL;
+ }
+ }
+ if (new_inode == old_inode) {
+ retval = 0;
+ goto end_rename;
+ }
+ if (new_inode && S_ISDIR(new_inode->i_mode)) {
+ retval = -EEXIST;
+ goto end_rename;
+ }
+ retval = -EPERM;
+ if (new_inode && (new_dir->i_mode & S_ISVTX) &&
+ current->fsuid != new_inode->i_uid &&
+ current->fsuid != new_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (S_ISDIR(old_inode->i_mode)) {
+ retval = -EEXIST;
+ if (new_bh)
+ goto end_rename;
+ retval = -EACCES;
+ if (!permission(old_inode, MAY_WRITE))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -EIO;
+ dir_bh = ext_bread(old_inode,0,0);
+ if (!dir_bh)
+ goto end_rename;
+ if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino)
+ goto end_rename;
+ }
+ if (!new_bh)
+ new_bh = ext_add_entry(new_dir,new_name,new_len,&new_de);
+ retval = -ENOSPC;
+ if (!new_bh)
+ goto end_rename;
+/* sanity checking before doing the rename - avoid races */
+ if (new_inode && (new_de->inode != new_inode->i_ino))
+ goto try_again;
+ if (new_de->inode && !new_inode)
+ goto try_again;
+ if (old_de->inode != old_inode->i_ino)
+ goto try_again;
+/* ok, that's it */
+ old_de->inode = 0;
+ old_de->name_len = 0;
+ new_de->inode = old_inode->i_ino;
+ ext_merge_entries (old_de, pde, nde);
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_dirt = 1;
+ }
+ mark_buffer_dirty(old_bh, 1);
+ mark_buffer_dirty(new_bh, 1);
+ if (dir_bh) {
+ PARENT_INO(dir_bh->b_data) = new_dir->i_ino;
+ mark_buffer_dirty(dir_bh, 1);
+ old_dir->i_nlink--;
+ new_dir->i_nlink++;
+ old_dir->i_dirt = 1;
+ new_dir->i_dirt = 1;
+ }
+ retval = 0;
+end_rename:
+ brelse(dir_bh);
+ brelse(old_bh);
+ brelse(new_bh);
+ iput(old_inode);
+ iput(new_inode);
+ iput(old_dir);
+ iput(new_dir);
+ return retval;
+}
+
+/*
+ * Ok, rename also locks out other renames, as they can change the parent of
+ * a directory, and we don't want any races. Other races are checked for by
+ * "do_rename()", which restarts if there are inconsistencies.
+ *
+ * Note that there is no race between different filesystems: it's only within
+ * the same device that races occur: many renames can happen at once, as long
+ * as they are on different partitions.
+ */
+int ext_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ static struct wait_queue * wait = NULL;
+ static int lock = 0;
+ int result;
+
+ while (lock)
+ sleep_on(&wait);
+ lock = 1;
+ result = do_ext_rename(old_dir, old_name, old_len,
+ new_dir, new_name, new_len);
+ lock = 0;
+ wake_up(&wait);
+ return result;
+}
diff --git a/fs/ext/symlink.c b/fs/ext/symlink.c
new file mode 100644
index 000000000..8c84bc622
--- /dev/null
+++ b/fs/ext/symlink.c
@@ -0,0 +1,108 @@
+/*
+ * linux/fs/ext/symlink.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/symlink.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext symlink handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/ext_fs.h>
+#include <linux/stat.h>
+
+static int ext_readlink(struct inode *, char *, int);
+static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations ext_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ ext_readlink, /* readlink */
+ ext_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int ext_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ struct buffer_head * bh;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (current->link_count > 5) {
+ iput(dir);
+ iput(inode);
+ return -ELOOP;
+ }
+ if (!(bh = ext_bread(inode, 0, 0))) {
+ iput(inode);
+ iput(dir);
+ return -EIO;
+ }
+ iput(inode);
+ current->link_count++;
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
+ current->link_count--;
+ brelse(bh);
+ return error;
+}
+
+static int ext_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ struct buffer_head * bh;
+ int i;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ if (buflen > 1023)
+ buflen = 1023;
+ bh = ext_bread(inode, 0, 0);
+ iput(inode);
+ if (!bh)
+ return 0;
+ i = 0;
+ while (i<buflen && (c = bh->b_data[i])) {
+ i++;
+ put_fs_byte(c,buffer++);
+ }
+ brelse(bh);
+ return i;
+}
diff --git a/fs/ext/truncate.c b/fs/ext/truncate.c
new file mode 100644
index 000000000..a2b485821
--- /dev/null
+++ b/fs/ext/truncate.c
@@ -0,0 +1,252 @@
+/*
+ * linux/fs/ext/truncate.c
+ *
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * from
+ *
+ * linux/fs/minix/truncate.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/ext_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/errno.h>
+
+/*
+ * Truncate has the most races in the whole filesystem: coding it is
+ * a pain in the a**. Especially as I don't do any locking...
+ *
+ * The code may look a bit weird, but that's just because I've tried to
+ * handle things like file-size changes in a somewhat graceful manner.
+ * Anyway, truncating a file at the same time somebody else writes to it
+ * is likely to result in pretty weird behaviour...
+ *
+ * The new code handles normal truncates (size = 0) as well as the more
+ * general case (size = XXX). I hope.
+ */
+
+static int trunc_direct(struct inode * inode)
+{
+ int i, tmp;
+ unsigned long * p;
+ struct buffer_head * bh;
+ int retry = 0;
+#define DIRECT_BLOCK ((inode->i_size + 1023) >> 10)
+
+repeat:
+ for (i = DIRECT_BLOCK ; i < 9 ; i++) {
+ p = inode->u.ext_i.i_data+i;
+ if (!(tmp = *p))
+ continue;
+ bh = getblk(inode->i_dev,tmp,BLOCK_SIZE);
+ if (i < DIRECT_BLOCK) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *p) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *p = 0;
+ inode->i_dirt = 1;
+ brelse(bh);
+ ext_free_block(inode->i_sb,tmp);
+ }
+ return retry;
+}
+
+static int trunc_indirect(struct inode * inode, int offset, unsigned long * p)
+{
+ int i, tmp;
+ struct buffer_head * bh;
+ struct buffer_head * ind_bh;
+ unsigned long * ind;
+ int retry = 0;
+#define INDIRECT_BLOCK (DIRECT_BLOCK-offset)
+
+ tmp = *p;
+ if (!tmp)
+ return 0;
+ ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp != *p) {
+ brelse(ind_bh);
+ return 1;
+ }
+ if (!ind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = INDIRECT_BLOCK ; i < 256 ; i++) {
+ if (i < 0)
+ i = 0;
+ if (i < INDIRECT_BLOCK)
+ goto repeat;
+ ind = i+(unsigned long *) ind_bh->b_data;
+ tmp = *ind;
+ if (!tmp)
+ continue;
+ bh = getblk(inode->i_dev,tmp,BLOCK_SIZE);
+ if (i < INDIRECT_BLOCK) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *ind) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *ind = 0;
+ mark_buffer_dirty(ind_bh, 1);
+ brelse(bh);
+ ext_free_block(inode->i_sb,tmp);
+ }
+ ind = (unsigned long *) ind_bh->b_data;
+ for (i = 0; i < 256; i++)
+ if (*(ind++))
+ break;
+ if (i >= 256)
+ if (ind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_dirt = 1;
+ ext_free_block(inode->i_sb,tmp);
+ }
+ brelse(ind_bh);
+ return retry;
+}
+
+static int trunc_dindirect(struct inode * inode, int offset, unsigned long * p)
+{
+ int i,tmp;
+ struct buffer_head * dind_bh;
+ unsigned long * dind;
+ int retry = 0;
+#define DINDIRECT_BLOCK ((DIRECT_BLOCK-offset)>>8)
+
+ tmp = *p;
+ if (!tmp)
+ return 0;
+ dind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp != *p) {
+ brelse(dind_bh);
+ return 1;
+ }
+ if (!dind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = DINDIRECT_BLOCK ; i < 256 ; i ++) {
+ if (i < 0)
+ i = 0;
+ if (i < DINDIRECT_BLOCK)
+ goto repeat;
+ dind = i+(unsigned long *) dind_bh->b_data;
+ tmp = *dind;
+ if (!tmp)
+ continue;
+ retry |= trunc_indirect(inode,offset+(i<<8),dind);
+ mark_buffer_dirty(dind_bh, 1);
+ }
+ dind = (unsigned long *) dind_bh->b_data;
+ for (i = 0; i < 256; i++)
+ if (*(dind++))
+ break;
+ if (i >= 256)
+ if (dind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_dirt = 1;
+ ext_free_block(inode->i_sb,tmp);
+ }
+ brelse(dind_bh);
+ return retry;
+}
+
+static int trunc_tindirect(struct inode * inode)
+{
+ int i,tmp;
+ struct buffer_head * tind_bh;
+ unsigned long * tind, * p;
+ int retry = 0;
+#define TINDIRECT_BLOCK ((DIRECT_BLOCK-(256*256+256+9))>>16)
+
+ p = inode->u.ext_i.i_data+11;
+ if (!(tmp = *p))
+ return 0;
+ tind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp != *p) {
+ brelse(tind_bh);
+ return 1;
+ }
+ if (!tind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = TINDIRECT_BLOCK ; i < 256 ; i ++) {
+ if (i < 0)
+ i = 0;
+ if (i < TINDIRECT_BLOCK)
+ goto repeat;
+ tind = i+(unsigned long *) tind_bh->b_data;
+ retry |= trunc_dindirect(inode,9+256+256*256+(i<<16),tind);
+ mark_buffer_dirty(tind_bh, 1);
+ }
+ tind = (unsigned long *) tind_bh->b_data;
+ for (i = 0; i < 256; i++)
+ if (*(tind++))
+ break;
+ if (i >= 256)
+ if (tind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_dirt = 1;
+ ext_free_block(inode->i_sb,tmp);
+ }
+ brelse(tind_bh);
+ return retry;
+}
+
+void ext_truncate(struct inode * inode)
+{
+ int retry;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ while (1) {
+ retry = trunc_direct(inode);
+ retry |= trunc_indirect(inode,9,inode->u.ext_i.i_data+9);
+ retry |= trunc_dindirect(inode,9+256,inode->u.ext_i.i_data+10);
+ retry |= trunc_tindirect(inode);
+ if (!retry)
+ break;
+ current->counter = 0;
+ schedule();
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
+
+/*
+ * Called when a inode is released. Note that this is different
+ * from ext_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+void ext_release(struct inode * inode, struct file * filp)
+{
+ printk("ext_release not implemented\n");
+}
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES
new file mode 100644
index 000000000..b760d18c7
--- /dev/null
+++ b/fs/ext2/CHANGES
@@ -0,0 +1,140 @@
+Changes from version 0.5 to version 0.5a
+========================================
+ - Some cleanups in the error messages (some versions of syslog contain
+ a bug which truncates an error message if it contains '\n').
+ - Check that no data can be written to a file past the 2GB limit.
+ - The famous readdir() bug has been fixed by Stephen Tweedie.
+ - Added a revision level in the superblock.
+ - Full support for O_SYNC flag of the open system call.
+ - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes
+ ext2fs to consider user #uid like root for the reserved blocks.
+ `resgid' acts the same way with group #gid. New fields in the
+ superblock contain default values for resuid and resgid and can
+ be modified by tune2fs.
+ Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>.
+ - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs
+ to remove the blocks used for FS structures from the total block
+ count in statfs. With `minixdf', ext2fs mimics Minix behavior
+ in statfs (i.e. it returns the total number of blocks on the
+ partition). This is intended to make bde happy :-)
+ - New file attributes:
+ - Immutable files cannot be modified. Data cannot be written to
+ these files. They cannot be removed, renamed and new links cannot
+ be created. Even root cannot modify the files. He has to remove
+ the immutable attribute first.
+ - Append-only files: can only be written in append-mode when writing.
+ They cannot be removed, renamed and new links cannot be created.
+ Note: files may only be added to an append-only directory.
+ - No-dump files: the attribute is not used by the kernel. My port
+ of dump uses it to avoid backing up files which are not important.
+ - New check in ext2_check_dir_entry: the inode number is checked.
+ - Support for big file systems: the copy of the FS descriptor is now
+ dynamically allocated (previous versions used a fixed size array).
+ This allows to mount 2GB+ FS.
+ - Reorganization of the ext2_inode structure to allow other operating
+ systems to create specific fields if they use ext2fs as their native
+ file system. Currently, ext2fs is only implemented in Linux but
+ will soon be part of Gnu Hurd and of Masix.
+
+Changes from version 0.4b to version 0.5
+========================================
+ - New superblock fields: s_lastcheck and s_checkinterval added
+ by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks
+ of the file system
+ - Real random numbers for secure rm added by Pierre del Perugia
+ <delperug@gla.ecoledoc.ibp.fr>
+ - The mount warnings related to the state of a fs are not printed
+ if the fs is mounted read-only, idea by Nick Holloway
+ <alfie@dcs.warwick.ac.uk>
+
+Changes from version 0.4a to version 0.4b
+=========================================
+ - Copyrights changed to include the name of my laboratory.
+ - Clean up of balloc.c and ialloc.c.
+ - More consistency checks.
+ - Block preallocation added by Stephen Tweedie.
+ - Direct reads of directories disallowed.
+ - Readahead implemented in readdir by Stephen Tweedie.
+ - Bugs in block and inodes allocation fixed.
+ - Readahead implemented in ext2_find_entry by Chip Salzenberg.
+ - New mount options:
+ `check=none|normal|strict'
+ `debug'
+ `errors=continue|remount-ro|panic'
+ `grpid', `bsdgroups'
+ `nocheck'
+ `nogrpid', `sysvgroups'
+ - truncate() now tries to deallocate contiguous blocks in a single call
+ to ext2_free_blocks().
+ - lots of cosmetic changes.
+
+Changes from version 0.4 to version 0.4a
+========================================
+ - the `sync' option support is now complete. Version 0.4 was not
+ supporting it when truncating a file. I have tested the synchronous
+ writes and they work but they make the system very slow :-( I have
+ to work again on this to make it faster.
+ - when detecting an error on a mounted filesystem, version 0.4 used
+ to try to write a flag in the super block even if the filesystem had
+ been mounted read-only. This is fixed.
+ - the `sb=#' option now causes the kernel code to use the filesystem
+ descriptors located at block #+1. Version 0.4 used the superblock
+ backup located at block # but used the main copy of the descriptors.
+ - a new file attribute `S' is supported. This attribute causes
+ synchronous writes but is applied to a file not to the entire file
+ system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for
+ suggesting it).
+ - the directory cache is inhibited by default. The cache management
+ code seems to be buggy and I have to look at it carefully before
+ using it again.
+ - deleting a file with the `s' attribute (secure deletion) causes its
+ blocks to be overwritten with random values not with zeros (thanks to
+ Michael A. Griffith <grif@cs.ucr.edu> for suggesting it).
+ - lots of cosmetic changes have been made.
+
+Changes from version 0.3 to version 0.4
+=======================================
+ - Three new mount options are supported: `check', `sync' and `sb=#'.
+ `check' tells the kernel code to make more consistency checks
+ when the file system is mounted. Currently, the kernel code checks
+ that the blocks and inodes bitmaps are consistent with the free
+ blocks and inodes counts. More checks will be added in future
+ releases.
+ `sync' tells the kernel code to use synchronous writes when updating
+ an inode, a bitmap, a directory entry or an indirect block. This
+ can make the file system much slower but can be a big win for files
+ recovery in case of a crash (and we can now say to the BSD folks
+ that Linux also supports synchronous updates :-).
+ `sb=#' tells the kernel code to use an alternate super block instead
+ of its master copy. `#' is the number of the block (counted in
+ 1024 bytes blocks) which contains the alternate super block.
+ An ext2 file system typically contains backups of the super block
+ at blocks 8193, 16385, and so on.
+ - I have change the meaning of the valid flag used by e2fsck. it
+ now contains the state of the file system. If the kernel code
+ detects an inconsistency while the file system is mounted, it flags
+ it as erroneous and e2fsck will detect that on next run.
+ - The super block now contains a mount counter. This counter is
+ incremented each time the file system is mounted read/write. When
+ this counter becomes bigger than a maximal mount counts (also stored
+ in the super block), e2fsck checks the file system, even if it had
+ been unmounted cleanly, and resets this counter to 0.
+ - File attributes are now supported. One can associate a set of
+ attributes to a file. Three attributes are defined:
+ `c': the file is marked for automatic compression,
+ `s': the file is marked for secure deletion: when the file is
+ deleted, its blocks are zeroed and written back to the disk,
+ `u': the file is marked for undeletion: when the file is deleted,
+ its contents are saved to allow a future undeletion.
+ Currently, only the `s' attribute is implemented in the kernel
+ code. Support for the other attributes will be added in a future
+ release.
+ - a few bugs related to times updates have been fixed by Bruce
+ Evans and me.
+ - a bug related to the links count of deleted inodes has been fixed.
+ Previous versions used to keep the links count set to 1 when a file
+ was deleted. The new version now sets links_count to 0 when deleting
+ the last link.
+ - a race condition when deallocating an inode has been fixed by
+ Stephen Tweedie.
+
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
new file mode 100644
index 000000000..599f2ad8f
--- /dev/null
+++ b/fs/ext2/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux ext2-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= acl.o balloc.o bitmap.o dir.o file.o fsync.o ialloc.o \
+ inode.o ioctl.o namei.o super.o symlink.o truncate.o
+
+ext2.o: $(OBJS)
+ $(LD) -r -o ext2.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
new file mode 100644
index 000000000..91ef7c8cc
--- /dev/null
+++ b/fs/ext2/acl.c
@@ -0,0 +1,50 @@
+/*
+ * linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+/*
+ * This file will contain the Access Control Lists management for the
+ * second extended file system.
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+
+/*
+ * ext2_permission ()
+ *
+ * Check for access rights
+ */
+int ext2_permission (struct inode * inode, int mask)
+{
+ unsigned short mode = inode->i_mode;
+
+ /*
+ * Nobody gets write access to an immutable file
+ */
+ if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
+ return 0;
+ /*
+ * Special case, access is always granted for root
+ */
+ if (fsuser())
+ return 1;
+ /*
+ * If no ACL, checks using the file mode
+ */
+ else if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p (inode->i_gid))
+ mode >>= 3;
+ if (((mode & mask & S_IRWXO) == mask))
+ return 1;
+ else
+ return 0;
+}
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
new file mode 100644
index 000000000..bc6faa7ed
--- /dev/null
+++ b/fs/ext2/balloc.c
@@ -0,0 +1,582 @@
+/*
+ * linux/fs/ext2/balloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * Enhanced block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
+ */
+
+/*
+ * balloc.c contains the blocks allocation and deallocation routines
+ */
+
+/*
+ * The free blocks are managed by bitmaps. A file system contains several
+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block. Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block. The descriptors are loaded in memory
+ * when a file system is mounted (see ext2_read_super).
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+#include <asm/bitops.h>
+
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+
+static struct ext2_group_desc * get_group_desc (struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh)
+{
+ unsigned long group_desc;
+ unsigned long desc;
+ struct ext2_group_desc * gdp;
+
+ if (block_group >= sb->u.ext2_sb.s_groups_count)
+ ext2_panic (sb, "get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sb->u.ext2_sb.s_groups_count);
+
+ group_desc = block_group / EXT2_DESC_PER_BLOCK(sb);
+ desc = block_group % EXT2_DESC_PER_BLOCK(sb);
+ if (!sb->u.ext2_sb.s_group_desc[group_desc])
+ ext2_panic (sb, "get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, desc);
+ gdp = (struct ext2_group_desc *)
+ sb->u.ext2_sb.s_group_desc[group_desc]->b_data;
+ if (bh)
+ *bh = sb->u.ext2_sb.s_group_desc[group_desc];
+ return gdp + desc;
+}
+
+static void read_block_bitmap (struct super_block * sb,
+ unsigned int block_group,
+ unsigned long bitmap_nr)
+{
+ struct ext2_group_desc * gdp;
+ struct buffer_head * bh;
+
+ gdp = get_group_desc (sb, block_group, NULL);
+ bh = bread (sb->s_dev, gdp->bg_block_bitmap, sb->s_blocksize);
+ if (!bh)
+ ext2_panic (sb, "read_block_bitmap",
+ "Cannot read block bitmap - "
+ "block_group = %d, block_bitmap = %lu",
+ block_group, gdp->bg_block_bitmap);
+ sb->u.ext2_sb.s_block_bitmap_number[bitmap_nr] = block_group;
+ sb->u.ext2_sb.s_block_bitmap[bitmap_nr] = bh;
+}
+
+/*
+ * load_block_bitmap loads the block bitmap for a blocks group
+ *
+ * It maintains a cache for the last bitmaps loaded. This cache is managed
+ * with a LRU algorithm.
+ *
+ * Notes:
+ * 1/ There is one cache per mounted file system.
+ * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
+ * this function reads the bitmap without maintaining a LRU cache.
+ */
+static int load__block_bitmap (struct super_block * sb,
+ unsigned int block_group)
+{
+ int i, j;
+ unsigned long block_bitmap_number;
+ struct buffer_head * block_bitmap;
+
+ if (block_group >= sb->u.ext2_sb.s_groups_count)
+ ext2_panic (sb, "load_block_bitmap",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sb->u.ext2_sb.s_groups_count);
+
+ if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) {
+ if (sb->u.ext2_sb.s_block_bitmap[block_group]) {
+ if (sb->u.ext2_sb.s_block_bitmap_number[block_group] !=
+ block_group)
+ ext2_panic (sb, "load_block_bitmap",
+ "block_group != block_bitmap_number");
+ else
+ return block_group;
+ } else {
+ read_block_bitmap (sb, block_group, block_group);
+ return block_group;
+ }
+ }
+
+ for (i = 0; i < sb->u.ext2_sb.s_loaded_block_bitmaps &&
+ sb->u.ext2_sb.s_block_bitmap_number[i] != block_group; i++)
+ ;
+ if (i < sb->u.ext2_sb.s_loaded_block_bitmaps &&
+ sb->u.ext2_sb.s_block_bitmap_number[i] == block_group) {
+ block_bitmap_number = sb->u.ext2_sb.s_block_bitmap_number[i];
+ block_bitmap = sb->u.ext2_sb.s_block_bitmap[i];
+ for (j = i; j > 0; j--) {
+ sb->u.ext2_sb.s_block_bitmap_number[j] =
+ sb->u.ext2_sb.s_block_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_block_bitmap[j] =
+ sb->u.ext2_sb.s_block_bitmap[j - 1];
+ }
+ sb->u.ext2_sb.s_block_bitmap_number[0] = block_bitmap_number;
+ sb->u.ext2_sb.s_block_bitmap[0] = block_bitmap;
+ } else {
+ if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED)
+ sb->u.ext2_sb.s_loaded_block_bitmaps++;
+ else
+ brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
+ for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) {
+ sb->u.ext2_sb.s_block_bitmap_number[j] =
+ sb->u.ext2_sb.s_block_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_block_bitmap[j] =
+ sb->u.ext2_sb.s_block_bitmap[j - 1];
+ }
+ read_block_bitmap (sb, block_group, 0);
+ }
+ return 0;
+}
+
+static inline int load_block_bitmap (struct super_block * sb,
+ unsigned int block_group)
+{
+ if (sb->u.ext2_sb.s_loaded_block_bitmaps > 0 &&
+ sb->u.ext2_sb.s_block_bitmap_number[0] == block_group)
+ return 0;
+
+ if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED &&
+ sb->u.ext2_sb.s_block_bitmap_number[block_group] == block_group &&
+ sb->u.ext2_sb.s_block_bitmap[block_group])
+ return block_group;
+
+ return load__block_bitmap (sb, block_group);
+}
+
+void ext2_free_blocks (struct super_block * sb, unsigned long block,
+ unsigned long count)
+{
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+ unsigned long block_group;
+ unsigned long bit;
+ unsigned long i;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ struct ext2_super_block * es;
+
+ if (!sb) {
+ printk ("ext2_free_blocks: nonexistent device");
+ return;
+ }
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ if (block < es->s_first_data_block ||
+ (block + count) > es->s_blocks_count) {
+ ext2_error (sb, "ext2_free_blocks",
+ "Freeing blocks not in datazone - "
+ "block = %lu, count = %lu", block, count);
+ unlock_super (sb);
+ return;
+ }
+
+ ext2_debug ("freeing block %lu\n", block);
+
+ block_group = (block - es->s_first_data_block) /
+ EXT2_BLOCKS_PER_GROUP(sb);
+ bit = (block - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb);
+ if (bit + count > EXT2_BLOCKS_PER_GROUP(sb))
+ ext2_panic (sb, "ext2_free_blocks",
+ "Freeing blocks across group boundary - "
+ "Block = %lu, count = %lu",
+ block, count);
+ bitmap_nr = load_block_bitmap (sb, block_group);
+ bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr];
+ gdp = get_group_desc (sb, block_group, &bh2);
+
+ if (test_opt (sb, CHECK_STRICT) &&
+ (in_range (gdp->bg_block_bitmap, block, count) ||
+ in_range (gdp->bg_inode_bitmap, block, count) ||
+ in_range (block, gdp->bg_inode_table,
+ sb->u.ext2_sb.s_itb_per_group) ||
+ in_range (block + count - 1, gdp->bg_inode_table,
+ sb->u.ext2_sb.s_itb_per_group)))
+ ext2_panic (sb, "ext2_free_blocks",
+ "Freeing blocks in system zones - "
+ "Block = %lu, count = %lu",
+ block, count);
+
+ for (i = 0; i < count; i++) {
+ if (!clear_bit (bit + i, bh->b_data))
+ ext2_warning (sb, "ext2_free_blocks",
+ "bit already cleared for block %lu",
+ block);
+ else {
+ gdp->bg_free_blocks_count++;
+ es->s_free_blocks_count++;
+ }
+ }
+
+ mark_buffer_dirty(bh2, 1);
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+
+ mark_buffer_dirty(bh, 1);
+ if (sb->s_flags & MS_SYNC) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ sb->s_dirt = 1;
+ unlock_super (sb);
+ return;
+}
+
+/*
+ * ext2_new_block uses a goal block to assist allocation. If the goal is
+ * free, or there is a free block within 32 blocks of the goal, that block
+ * is allocated. Otherwise a forward search is made for a free block; within
+ * each block group the search first looks for an entire free byte in the block
+ * bitmap, and then for any free bit if that fails.
+ */
+int ext2_new_block (struct super_block * sb, unsigned long goal,
+ unsigned long * prealloc_count,
+ unsigned long * prealloc_block)
+{
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+ char * p, * r;
+ int i, j, k, tmp;
+ unsigned long lmap;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ struct ext2_super_block * es;
+
+#ifdef EXT2FS_DEBUG
+ static int goal_hits = 0, goal_attempts = 0;
+#endif
+ if (!sb) {
+ printk ("ext2_new_block: nonexistent device");
+ return 0;
+ }
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ if (es->s_free_blocks_count <= es->s_r_blocks_count &&
+ (!fsuser() && (sb->u.ext2_sb.s_resuid != current->fsuid) &&
+ (sb->u.ext2_sb.s_resgid == 0 ||
+ !in_group_p (sb->u.ext2_sb.s_resgid)))) {
+ unlock_super (sb);
+ return 0;
+ }
+
+ ext2_debug ("goal=%lu.\n", goal);
+
+repeat:
+ /*
+ * First, test whether the goal block is free.
+ */
+ if (goal < es->s_first_data_block || goal >= es->s_blocks_count)
+ goal = es->s_first_data_block;
+ i = (goal - es->s_first_data_block) / EXT2_BLOCKS_PER_GROUP(sb);
+ gdp = get_group_desc (sb, i, &bh2);
+ if (gdp->bg_free_blocks_count > 0) {
+ j = ((goal - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb));
+#ifdef EXT2FS_DEBUG
+ if (j)
+ goal_attempts++;
+#endif
+ bitmap_nr = load_block_bitmap (sb, i);
+ bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr];
+
+ ext2_debug ("goal is at %d:%d.\n", i, j);
+
+ if (!test_bit(j, bh->b_data)) {
+#ifdef EXT2FS_DEBUG
+ goal_hits++;
+ ext2_debug ("goal bit allocated.\n");
+#endif
+ goto got_block;
+ }
+ if (j) {
+ /*
+ * The goal was occupied; search forward for a free
+ * block within the next 32 blocks
+ */
+ lmap = ((((unsigned long *) bh->b_data)[j >> 5]) >>
+ ((j & 31) + 1));
+ if (j < EXT2_BLOCKS_PER_GROUP(sb) - 32)
+ lmap |= (((unsigned long *) bh->b_data)[(j >> 5) + 1]) <<
+ (31 - (j & 31));
+ else
+ lmap |= 0xffffffff << (31 - (j & 31));
+ if (lmap != 0xffffffffl) {
+ k = ffz(lmap) + 1;
+ if ((j + k) < EXT2_BLOCKS_PER_GROUP(sb)) {
+ j += k;
+ goto got_block;
+ }
+ }
+ }
+
+ ext2_debug ("Bit not found near goal\n");
+
+ /*
+ * There has been no free block found in the near vicinity
+ * of the goal: do a search forward through the block groups,
+ * searching in each group first for an entire free byte in
+ * the bitmap and then for any free bit.
+ *
+ * Search first in the remainder of the current group; then,
+ * cyclicly search through the rest of the groups.
+ */
+ p = ((char *) bh->b_data) + (j >> 3);
+ r = memscan(p, 0, (EXT2_BLOCKS_PER_GROUP(sb) - j + 7) >> 3);
+ k = (r - ((char *) bh->b_data)) << 3;
+ if (k < EXT2_BLOCKS_PER_GROUP(sb)) {
+ j = k;
+ goto search_back;
+ }
+ k = find_next_zero_bit ((unsigned long *) bh->b_data,
+ EXT2_BLOCKS_PER_GROUP(sb),
+ j);
+ if (k < EXT2_BLOCKS_PER_GROUP(sb)) {
+ j = k;
+ goto got_block;
+ }
+ }
+
+ ext2_debug ("Bit not found in block group %d.\n", i);
+
+ /*
+ * Now search the rest of the groups. We assume that
+ * i and gdp correctly point to the last group visited.
+ */
+ for (k = 0; k < sb->u.ext2_sb.s_groups_count; k++) {
+ i++;
+ if (i >= sb->u.ext2_sb.s_groups_count)
+ i = 0;
+ gdp = get_group_desc (sb, i, &bh2);
+ if (gdp->bg_free_blocks_count > 0)
+ break;
+ }
+ if (k >= sb->u.ext2_sb.s_groups_count) {
+ unlock_super (sb);
+ return 0;
+ }
+ bitmap_nr = load_block_bitmap (sb, i);
+ bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr];
+ r = memscan(bh->b_data, 0, EXT2_BLOCKS_PER_GROUP(sb) >> 3);
+ j = (r - bh->b_data) << 3;
+ if (j < EXT2_BLOCKS_PER_GROUP(sb))
+ goto search_back;
+ else
+ j = find_first_zero_bit ((unsigned long *) bh->b_data,
+ EXT2_BLOCKS_PER_GROUP(sb));
+ if (j >= EXT2_BLOCKS_PER_GROUP(sb)) {
+ ext2_error (sb, "ext2_new_block",
+ "Free blocks count corrupted for block group %d", i);
+ unlock_super (sb);
+ return 0;
+ }
+
+search_back:
+ /*
+ * We have succeeded in finding a free byte in the block
+ * bitmap. Now search backwards up to 7 bits to find the
+ * start of this group of free blocks.
+ */
+ for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh->b_data); k++, j--);
+
+got_block:
+
+ ext2_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
+
+ tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + es->s_first_data_block;
+
+ if (test_opt (sb, CHECK_STRICT) &&
+ (tmp == gdp->bg_block_bitmap ||
+ tmp == gdp->bg_inode_bitmap ||
+ in_range (tmp, gdp->bg_inode_table, sb->u.ext2_sb.s_itb_per_group)))
+ ext2_panic (sb, "ext2_new_block",
+ "Allocating block in system zone - "
+ "block = %u", tmp);
+
+ if (set_bit (j, bh->b_data)) {
+ ext2_warning (sb, "ext2_new_block",
+ "bit already set for block %d", j);
+ goto repeat;
+ }
+
+ ext2_debug ("found bit %d\n", j);
+
+ /*
+ * Do block preallocation now if required.
+ */
+#ifdef EXT2_PREALLOCATE
+ if (prealloc_block) {
+ *prealloc_count = 0;
+ *prealloc_block = tmp + 1;
+ for (k = 1;
+ k < 8 && (j + k) < EXT2_BLOCKS_PER_GROUP(sb); k++) {
+ if (set_bit (j + k, bh->b_data))
+ break;
+ (*prealloc_count)++;
+ }
+ gdp->bg_free_blocks_count -= *prealloc_count;
+ es->s_free_blocks_count -= *prealloc_count;
+ ext2_debug ("Preallocated a further %lu bits.\n",
+ *prealloc_count);
+ }
+#endif
+
+ j = tmp;
+
+ mark_buffer_dirty(bh, 1);
+ if (sb->s_flags & MS_SYNC) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+
+ if (j >= es->s_blocks_count) {
+ ext2_error (sb, "ext2_new_block",
+ "block >= blocks count - "
+ "block_group = %d, block=%d", i, j);
+ unlock_super (sb);
+ return 0;
+ }
+ if (!(bh = getblk (sb->s_dev, j, sb->s_blocksize))) {
+ ext2_error (sb, "ext2_new_block", "cannot get block %d", j);
+ unlock_super (sb);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->s_blocksize);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse (bh);
+
+ ext2_debug ("allocating block %d. "
+ "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
+
+ gdp->bg_free_blocks_count--;
+ mark_buffer_dirty(bh2, 1);
+ es->s_free_blocks_count--;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ unlock_super (sb);
+ return j;
+}
+
+unsigned long ext2_count_free_blocks (struct super_block * sb)
+{
+#ifdef EXT2FS_DEBUG
+ struct ext2_super_block * es;
+ unsigned long desc_count, bitmap_count, x;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ int i;
+
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+ for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+ gdp = get_group_desc (sb, i, NULL);
+ desc_count += gdp->bg_free_blocks_count;
+ bitmap_nr = load_block_bitmap (sb, i);
+ x = ext2_count_free (sb->u.ext2_sb.s_block_bitmap[bitmap_nr],
+ sb->s_blocksize);
+ printk ("group %d: stored = %d, counted = %lu\n",
+ i, gdp->bg_free_blocks_count, x);
+ bitmap_count += x;
+ }
+ printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
+ es->s_free_blocks_count, desc_count, bitmap_count);
+ unlock_super (sb);
+ return bitmap_count;
+#else
+ return sb->u.ext2_sb.s_es->s_free_blocks_count;
+#endif
+}
+
+static inline int block_in_use (unsigned long block,
+ struct super_block * sb,
+ unsigned char * map)
+{
+ return test_bit ((block - sb->u.ext2_sb.s_es->s_first_data_block) %
+ EXT2_BLOCKS_PER_GROUP(sb), map);
+}
+
+void ext2_check_blocks_bitmap (struct super_block * sb)
+{
+ struct buffer_head * bh;
+ struct ext2_super_block * es;
+ unsigned long desc_count, bitmap_count, x;
+ unsigned long desc_blocks;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ int i, j;
+
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+ desc_blocks = (sb->u.ext2_sb.s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
+ EXT2_DESC_PER_BLOCK(sb);
+ for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+ gdp = get_group_desc (sb, i, NULL);
+ desc_count += gdp->bg_free_blocks_count;
+ bitmap_nr = load_block_bitmap (sb, i);
+ bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr];
+
+ if (!test_bit (0, bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Superblock in group %d is marked free", i);
+
+ for (j = 0; j < desc_blocks; j++)
+ if (!test_bit (j + 1, bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Descriptor block #%d in group "
+ "%d is marked free", j, i);
+
+ if (!block_in_use (gdp->bg_block_bitmap, sb, bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Block bitmap for group %d is marked free",
+ i);
+
+ if (!block_in_use (gdp->bg_inode_bitmap, sb, bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Inode bitmap for group %d is marked free",
+ i);
+
+ for (j = 0; j < sb->u.ext2_sb.s_itb_per_group; j++)
+ if (!block_in_use (gdp->bg_inode_table + j, sb, bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Block #%d of the inode table in "
+ "group %d is marked free", j, i);
+
+ x = ext2_count_free (bh, sb->s_blocksize);
+ if (gdp->bg_free_blocks_count != x)
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Wrong free blocks count for group %d, "
+ "stored = %d, counted = %lu", i,
+ gdp->bg_free_blocks_count, x);
+ bitmap_count += x;
+ }
+ if (es->s_free_blocks_count != bitmap_count)
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Wrong free blocks count in super block, "
+ "stored = %lu, counted = %lu",
+ es->s_free_blocks_count, bitmap_count);
+ unlock_super (sb);
+}
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
new file mode 100644
index 000000000..1084da16d
--- /dev/null
+++ b/fs/ext2/bitmap.c
@@ -0,0 +1,25 @@
+/*
+ * linux/fs/ext2/bitmap.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+
+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
+
+unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
+{
+ unsigned int i;
+ unsigned long sum = 0;
+
+ if (!map)
+ return (0);
+ for (i = 0; i < numchars; i++)
+ sum += nibblemap[map->b_data[i] & 0xf] +
+ nibblemap[(map->b_data[i] >> 4) & 0xf];
+ return (sum);
+}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
new file mode 100644
index 000000000..c98139bc6
--- /dev/null
+++ b/fs/ext2/dir.c
@@ -0,0 +1,227 @@
+/*
+ * linux/fs/ext2/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int ext2_dir_read (struct inode * inode, struct file * filp,
+ char * buf, int count)
+{
+ return -EISDIR;
+}
+
+static int ext2_readdir (struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations ext2_dir_operations = {
+ NULL, /* lseek - default */
+ ext2_dir_read, /* read */
+ NULL, /* write - bad */
+ ext2_readdir, /* readdir */
+ NULL, /* select - default */
+ ext2_ioctl, /* ioctl */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync, /* fsync */
+ NULL, /* fasync */
+ NULL, /* check_media_change */
+ NULL /* revalidate */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations ext2_dir_inode_operations = {
+ &ext2_dir_operations, /* default directory file-ops */
+ ext2_create, /* create */
+ ext2_lookup, /* lookup */
+ ext2_link, /* link */
+ ext2_unlink, /* unlink */
+ ext2_symlink, /* symlink */
+ ext2_mkdir, /* mkdir */
+ ext2_rmdir, /* rmdir */
+ ext2_mknod, /* mknod */
+ ext2_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ ext2_truncate, /* truncate */
+ ext2_permission, /* permission */
+ NULL /* smap */
+};
+
+int ext2_check_dir_entry (char * function, struct inode * dir,
+ struct ext2_dir_entry * de, struct buffer_head * bh,
+ unsigned long offset)
+{
+ char * error_msg = NULL;
+
+ if (de->rec_len < EXT2_DIR_REC_LEN(1))
+ error_msg = "rec_len is smaller than minimal";
+ else if (de->rec_len % 4 != 0)
+ error_msg = "rec_len % 4 != 0";
+ else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len))
+ error_msg = "rec_len is too small for name_len";
+ else if (dir && ((char *) de - bh->b_data) + de->rec_len >
+ dir->i_sb->s_blocksize)
+ error_msg = "directory entry across blocks";
+ else if (dir && de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
+ error_msg = "inode out of bounds";
+
+ if (error_msg != NULL)
+ ext2_error (dir->i_sb, function, "bad directory entry: %s\n"
+ "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+ error_msg, offset, de->inode, de->rec_len,
+ de->name_len);
+ return error_msg == NULL ? 1 : 0;
+}
+
+static int ext2_readdir (struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ unsigned long offset, blk;
+ int i, num, stored, dlen;
+ struct buffer_head * bh, * tmp, * bha[16];
+ struct ext2_dir_entry * de;
+ struct super_block * sb;
+ int err, version;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ sb = inode->i_sb;
+
+ stored = 0;
+ bh = NULL;
+ offset = filp->f_pos & (sb->s_blocksize - 1);
+
+ while (count > 0 && !stored && filp->f_pos < inode->i_size) {
+ blk = (filp->f_pos) >> EXT2_BLOCK_SIZE_BITS(sb);
+ bh = ext2_bread (inode, blk, 0, &err);
+ if (!bh) {
+ filp->f_pos += sb->s_blocksize - offset;
+ continue;
+ }
+
+ /*
+ * Do the readahead
+ */
+ if (!offset) {
+ for (i = 16 >> (EXT2_BLOCK_SIZE_BITS(sb) - 9), num = 0;
+ i > 0; i--) {
+ tmp = ext2_getblk (inode, ++blk, 0, &err);
+ if (tmp && !tmp->b_uptodate && !tmp->b_lock)
+ bha[num++] = tmp;
+ else
+ brelse (tmp);
+ }
+ if (num) {
+ ll_rw_block (READA, num, bha);
+ for (i = 0; i < num; i++)
+ brelse (bha[i]);
+ }
+ }
+
+revalidate:
+ /* If the dir block has changed since the last call to
+ * readdir(2), then we might be pointing to an invalid
+ * dirent right now. Scan from the start of the block
+ * to make sure. */
+ if (filp->f_version != inode->i_version) {
+ for (i = 0; i < sb->s_blocksize && i < offset; ) {
+ de = (struct ext2_dir_entry *)
+ (bh->b_data + i);
+ /* It's too expensive to do a full
+ * dirent test each time round this
+ * loop, but we do have to test at
+ * least that it is non-zero. A
+ * failure will be detected in the
+ * dirent test below. */
+ if (de->rec_len < EXT2_DIR_REC_LEN(1))
+ break;
+ i += de->rec_len;
+ }
+ offset = i;
+ filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
+ | offset;
+ filp->f_version = inode->i_version;
+ }
+
+ while (count > 0 && filp->f_pos < inode->i_size
+ && offset < sb->s_blocksize) {
+ de = (struct ext2_dir_entry *) (bh->b_data + offset);
+ if (!ext2_check_dir_entry ("ext2_readdir", inode, de,
+ bh, offset)) {
+ /* On error, skip the f_pos to the
+ next block. */
+ filp->f_pos = (filp->f_pos & (sb->s_blocksize - 1))
+ + sb->s_blocksize;
+ brelse (bh);
+ return stored;
+ }
+ if (de->inode) {
+ dlen = ROUND_UP(NAME_OFFSET(dirent)
+ + de->name_len + 1);
+ /* Old libc libraries always use a
+ count of 1. */
+ if (count == 1 && !stored)
+ count = dlen;
+ if (count < dlen) {
+ count = 0;
+ break;
+ }
+
+ /* We might block in the next section
+ * if the data destination is
+ * currently swapped out. So, use a
+ * version stamp to detect whether or
+ * not the directory has been modified
+ * during the copy operation. */
+ version = inode->i_version;
+ i = de->name_len;
+ memcpy_tofs (dirent->d_name, de->name, i);
+ put_fs_long (de->inode, &dirent->d_ino);
+ put_fs_byte (0, dirent->d_name + i);
+ put_fs_word (i, &dirent->d_reclen);
+ put_fs_long (dlen, &dirent->d_off);
+ if (version != inode->i_version)
+ goto revalidate;
+ dcache_add(inode, de->name, de->name_len,
+ de->inode);
+
+ stored += dlen;
+ count -= dlen;
+ ((char *) dirent) += dlen;
+ }
+ offset += de->rec_len;
+ filp->f_pos += de->rec_len;
+ }
+ offset = 0;
+ brelse (bh);
+ }
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+ return stored;
+}
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
new file mode 100644
index 000000000..20628b349
--- /dev/null
+++ b/fs/ext2/file.c
@@ -0,0 +1,354 @@
+/*
+ * linux/fs/ext2/file.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/file.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 fs regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+
+static int ext2_file_read (struct inode *, struct file *, char *, int);
+static int ext2_file_write (struct inode *, struct file *, char *, int);
+static void ext2_release_file (struct inode *, struct file *);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ext2 filesystem.
+ */
+static struct file_operations ext2_file_operations = {
+ NULL, /* lseek - default */
+ ext2_file_read, /* read */
+ ext2_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ ext2_ioctl, /* ioctl */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ ext2_release_file, /* release */
+ ext2_sync_file, /* fsync */
+ NULL, /* fasync */
+ NULL, /* check_media_change */
+ NULL /* revalidate */
+};
+
+struct inode_operations ext2_file_inode_operations = {
+ &ext2_file_operations,/* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ ext2_bmap, /* bmap */
+ ext2_truncate, /* truncate */
+ ext2_permission, /* permission */
+ NULL /* smap */
+};
+
+static int ext2_file_read (struct inode * inode, struct file * filp,
+ char * buf, int count)
+{
+ int read, left, chars;
+ int block, blocks, offset;
+ int bhrequest, uptodate;
+ int clusterblocks;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+ struct super_block * sb;
+ unsigned int size;
+ int err;
+
+ if (!inode) {
+ printk ("ext2_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ sb = inode->i_sb;
+ if (!S_ISREG(inode->i_mode)) {
+ ext2_warning (sb, "ext2_file_read", "mode = %07o",
+ inode->i_mode);
+ return -EINVAL;
+ }
+ offset = filp->f_pos;
+ size = inode->i_size;
+ if (offset > size)
+ left = 0;
+ else
+ left = size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = offset >> EXT2_BLOCK_SIZE_BITS(sb);
+ offset &= (sb->s_blocksize - 1);
+ size = (size + sb->s_blocksize - 1) >> EXT2_BLOCK_SIZE_BITS(sb);
+ blocks = (left + offset + sb->s_blocksize - 1) >> EXT2_BLOCK_SIZE_BITS(sb);
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ if (blocks < read_ahead[MAJOR(inode->i_dev)] >> (EXT2_BLOCK_SIZE_BITS(sb) - 9))
+ blocks = read_ahead[MAJOR(inode->i_dev)] >> (EXT2_BLOCK_SIZE_BITS(sb) - 9);
+ if (block + blocks > size)
+ blocks = size - block;
+ }
+
+ /*
+ * We do this in a two stage process. We first try and request
+ * as many blocks as we can, then we wait for the first one to
+ * complete, and then we try and wrap up as many as are actually
+ * done. This routine is rather generic, in that it can be used
+ * in a filesystem by substituting the appropriate function in
+ * for getblk
+ *
+ * This routine is optimized to make maximum use of the various
+ * buffers and caches.
+ */
+
+ clusterblocks = 0;
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (blocks) {
+ --blocks;
+#if 1
+ if(!clusterblocks) clusterblocks = ext2_getcluster(inode, block);
+ if(clusterblocks) clusterblocks--;
+#endif
+
+ *bhb = ext2_getblk (inode, block++, 0, &err);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /*
+ * If the block we have on hand is uptodate, go ahead
+ * and complete processing
+ */
+ if (uptodate)
+ break;
+
+ if (bhb == bhe)
+ break;
+ }
+
+ /*
+ * Now request them all
+ */
+ if (bhrequest)
+ ll_rw_block (READ, bhrequest, bhreq);
+
+ do {
+ /*
+ * Finish off all I/O that has actually completed
+ */
+ if (*bhe) {
+ wait_on_buffer (*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < sb->s_blocksize - offset)
+ chars = left;
+ else
+ chars = sb->s_blocksize - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs (buf, offset + (*bhe)->b_data,
+ chars);
+ brelse (*bhe);
+ buf += chars;
+ } else {
+ while (chars-- > 0)
+ put_fs_byte (0, buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+ /*
+ * Release the read-ahead blocks
+ */
+ while (bhe != bhb) {
+ brelse (*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ }
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+ return read;
+}
+
+static int ext2_file_write (struct inode * inode, struct file * filp,
+ char * buf, int count)
+{
+ const loff_t two_gb = 2147483647;
+ loff_t pos;
+ off_t pos2;
+ int written, c;
+ struct buffer_head * bh, *bufferlist[NBUF];
+ char * p;
+ struct super_block * sb;
+ int err;
+ int i,buffercount,write_error;
+
+ write_error = buffercount = 0;
+ if (!inode) {
+ printk("ext2_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ sb = inode->i_sb;
+ if (sb->s_flags & MS_RDONLY)
+ /*
+ * This fs has been automatically remounted ro because of errors
+ */
+ return -ENOSPC;
+
+ if (!S_ISREG(inode->i_mode)) {
+ ext2_warning (sb, "ext2_file_write", "mode = %07o",
+ inode->i_mode);
+ return -EINVAL;
+ }
+ down(&inode->i_sem);
+ if (filp->f_flags & O_APPEND)
+ pos = inode->i_size;
+ else
+ pos = filp->f_pos;
+ pos2 = (off_t) pos;
+ /*
+ * If a file has been opened in synchronous mode, we have to ensure
+ * that meta-data will also be written synchronously. Thus, we
+ * set the i_osync field. This field is tested by the allocation
+ * routines.
+ */
+ if (filp->f_flags & O_SYNC)
+ inode->u.ext2_i.i_osync++;
+ written = 0;
+ while (written < count) {
+ if (pos > two_gb) {
+ if (!written)
+ written = -EFBIG;
+ break;
+ }
+ bh = ext2_getblk (inode, pos2 / sb->s_blocksize, 1, &err);
+ if (!bh) {
+ if (!written)
+ written = err;
+ break;
+ }
+ c = sb->s_blocksize - (pos2 % sb->s_blocksize);
+ if (c > count-written)
+ c = count - written;
+ if (c != sb->s_blocksize && !bh->b_uptodate) {
+ ll_rw_block (READ, 1, &bh);
+ wait_on_buffer (bh);
+ if (!bh->b_uptodate) {
+ brelse (bh);
+ if (!written)
+ written = -EIO;
+ break;
+ }
+ }
+ p = (pos2 % sb->s_blocksize) + bh->b_data;
+ pos2 += c;
+ pos += c;
+ written += c;
+ memcpy_fromfs (p, buf, c);
+ buf += c;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ if (filp->f_flags & O_SYNC)
+ bufferlist[buffercount++] = bh;
+ else
+ brelse(bh);
+ if (buffercount == NBUF){
+ ll_rw_block(WRITE, buffercount, bufferlist);
+ for(i=0; i<buffercount; i++){
+ wait_on_buffer(bufferlist[i]);
+ if (!bufferlist[i]->b_uptodate)
+ write_error=1;
+ brelse(bufferlist[i]);
+ }
+ buffercount=0;
+ }
+ if(write_error)
+ break;
+ }
+ if ( buffercount ){
+ ll_rw_block(WRITE, buffercount, bufferlist);
+ for(i=0; i<buffercount; i++){
+ wait_on_buffer(bufferlist[i]);
+ if (!bufferlist[i]->b_uptodate)
+ write_error=1;
+ brelse(bufferlist[i]);
+ }
+ }
+ if (pos > inode->i_size)
+ inode->i_size = pos;
+ if (filp->f_flags & O_SYNC)
+ inode->u.ext2_i.i_osync--;
+ up(&inode->i_sem);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ filp->f_pos = pos;
+ inode->i_dirt = 1;
+ return written;
+}
+
+/*
+ * Called when a inode is released. Note that this is different
+ * from ext2_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+static void ext2_release_file (struct inode * inode, struct file * filp)
+{
+ if (filp->f_mode & 2)
+ ext2_discard_prealloc (inode);
+}
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
new file mode 100644
index 000000000..2f79c4749
--- /dev/null
+++ b/fs/ext2/fsync.c
@@ -0,0 +1,198 @@
+/*
+ * linux/fs/ext2/fsync.c
+ *
+ * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ * from
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ * from
+ * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2fs fsync primitive
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+
+#define blocksize (EXT2_BLOCK_SIZE(inode->i_sb))
+#define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb))
+
+static int sync_block (struct inode * inode, unsigned long * block, int wait)
+{
+ struct buffer_head * bh;
+ int tmp;
+
+ if (!*block)
+ return 0;
+ tmp = *block;
+ bh = get_hash_table (inode->i_dev, *block, blocksize);
+ if (!bh)
+ return 0;
+ if (*block != tmp) {
+ brelse (bh);
+ return 1;
+ }
+ if (wait && bh->b_req && !bh->b_uptodate) {
+ brelse (bh);
+ return -1;
+ }
+ if (wait || !bh->b_uptodate || !bh->b_dirt) {
+ brelse (bh);
+ return 0;
+ }
+ ll_rw_block (WRITE, 1, &bh);
+ bh->b_count--;
+ return 0;
+}
+
+static int sync_iblock (struct inode * inode, unsigned long * iblock,
+ struct buffer_head ** bh, int wait)
+{
+ int rc, tmp;
+
+ *bh = NULL;
+ tmp = *iblock;
+ if (!tmp)
+ return 0;
+ rc = sync_block (inode, iblock, wait);
+ if (rc)
+ return rc;
+ *bh = bread (inode->i_dev, tmp, blocksize);
+ if (tmp != *iblock) {
+ brelse (*bh);
+ *bh = NULL;
+ return 1;
+ }
+ if (!*bh)
+ return -1;
+ return 0;
+}
+
+
+static int sync_direct (struct inode * inode, int wait)
+{
+ int i;
+ int rc, err = 0;
+
+ for (i = 0; i < EXT2_NDIR_BLOCKS; i++) {
+ rc = sync_block (inode, inode->u.ext2_i.i_data + i, wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ return err;
+}
+
+static int sync_indirect (struct inode * inode, unsigned long * iblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * ind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, iblock, &ind_bh, wait);
+ if (rc || !ind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_block (inode,
+ ((unsigned long *) ind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse (ind_bh);
+ return err;
+}
+
+static int sync_dindirect (struct inode * inode, unsigned long * diblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * dind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, diblock, &dind_bh, wait);
+ if (rc || !dind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_indirect (inode,
+ ((unsigned long *) dind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse (dind_bh);
+ return err;
+}
+
+static int sync_tindirect (struct inode * inode, unsigned long * tiblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * tind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, tiblock, &tind_bh, wait);
+ if (rc || !tind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_dindirect (inode,
+ ((unsigned long *) tind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse (tind_bh);
+ return err;
+}
+
+int ext2_sync_file (struct inode * inode, struct file * file)
+{
+ int wait, err = 0;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+ if (S_ISLNK(inode->i_mode) && !(inode->i_blocks))
+ /*
+ * Don't sync fast links!
+ */
+ goto skip;
+
+ for (wait=0; wait<=1; wait++)
+ {
+ err |= sync_direct (inode, wait);
+ err |= sync_indirect (inode,
+ inode->u.ext2_i.i_data+EXT2_IND_BLOCK,
+ wait);
+ err |= sync_dindirect (inode,
+ inode->u.ext2_i.i_data+EXT2_DIND_BLOCK,
+ wait);
+ err |= sync_tindirect (inode,
+ inode->u.ext2_i.i_data+EXT2_TIND_BLOCK,
+ wait);
+ }
+skip:
+ err |= ext2_sync_inode (inode);
+ return (err < 0) ? -EIO : 0;
+}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
new file mode 100644
index 000000000..69c9e2224
--- /dev/null
+++ b/fs/ext2/ialloc.c
@@ -0,0 +1,554 @@
+/*
+ * linux/fs/ext2/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * BSD ufs-inspired inode and directory allocation by
+ * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
+ */
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps. A file system contains several
+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block. Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block. The descriptors are loaded in memory
+ * when a file system is mounted (see ext2_read_super).
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+#include <asm/bitops.h>
+
+static struct ext2_group_desc * get_group_desc (struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh)
+{
+ unsigned long group_desc;
+ unsigned long desc;
+ struct ext2_group_desc * gdp;
+
+ if (block_group >= sb->u.ext2_sb.s_groups_count)
+ ext2_panic (sb, "get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sb->u.ext2_sb.s_groups_count);
+
+ group_desc = block_group / EXT2_DESC_PER_BLOCK(sb);
+ desc = block_group % EXT2_DESC_PER_BLOCK(sb);
+ if (!sb->u.ext2_sb.s_group_desc[group_desc])
+ ext2_panic (sb, "get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, desc);
+ gdp = (struct ext2_group_desc *)
+ sb->u.ext2_sb.s_group_desc[group_desc]->b_data;
+ if (bh)
+ *bh = sb->u.ext2_sb.s_group_desc[group_desc];
+ return gdp + desc;
+}
+
+static void read_inode_bitmap (struct super_block * sb,
+ unsigned long block_group,
+ unsigned int bitmap_nr)
+{
+ struct ext2_group_desc * gdp;
+ struct buffer_head * bh;
+
+ gdp = get_group_desc (sb, block_group, NULL);
+ bh = bread (sb->s_dev, gdp->bg_inode_bitmap, sb->s_blocksize);
+ if (!bh)
+ ext2_panic (sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %lu",
+ block_group, gdp->bg_inode_bitmap);
+ sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
+ sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh;
+}
+
+/*
+ * load_inode_bitmap loads the inode bitmap for a blocks group
+ *
+ * It maintains a cache for the last bitmaps loaded. This cache is managed
+ * with a LRU algorithm.
+ *
+ * Notes:
+ * 1/ There is one cache per mounted file system.
+ * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
+ * this function reads the bitmap without maintaining a LRU cache.
+ */
+static int load_inode_bitmap (struct super_block * sb,
+ unsigned int block_group)
+{
+ int i, j;
+ unsigned long inode_bitmap_number;
+ struct buffer_head * inode_bitmap;
+
+ if (block_group >= sb->u.ext2_sb.s_groups_count)
+ ext2_panic (sb, "load_inode_bitmap",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sb->u.ext2_sb.s_groups_count);
+ if (sb->u.ext2_sb.s_loaded_inode_bitmaps > 0 &&
+ sb->u.ext2_sb.s_inode_bitmap_number[0] == block_group)
+ return 0;
+ if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) {
+ if (sb->u.ext2_sb.s_inode_bitmap[block_group]) {
+ if (sb->u.ext2_sb.s_inode_bitmap_number[block_group] != block_group)
+ ext2_panic (sb, "load_inode_bitmap",
+ "block_group != inode_bitmap_number");
+ else
+ return block_group;
+ } else {
+ read_inode_bitmap (sb, block_group, block_group);
+ return block_group;
+ }
+ }
+
+ for (i = 0; i < sb->u.ext2_sb.s_loaded_inode_bitmaps &&
+ sb->u.ext2_sb.s_inode_bitmap_number[i] != block_group;
+ i++)
+ ;
+ if (i < sb->u.ext2_sb.s_loaded_inode_bitmaps &&
+ sb->u.ext2_sb.s_inode_bitmap_number[i] == block_group) {
+ inode_bitmap_number = sb->u.ext2_sb.s_inode_bitmap_number[i];
+ inode_bitmap = sb->u.ext2_sb.s_inode_bitmap[i];
+ for (j = i; j > 0; j--) {
+ sb->u.ext2_sb.s_inode_bitmap_number[j] =
+ sb->u.ext2_sb.s_inode_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_inode_bitmap[j] =
+ sb->u.ext2_sb.s_inode_bitmap[j - 1];
+ }
+ sb->u.ext2_sb.s_inode_bitmap_number[0] = inode_bitmap_number;
+ sb->u.ext2_sb.s_inode_bitmap[0] = inode_bitmap;
+ } else {
+ if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
+ sb->u.ext2_sb.s_loaded_inode_bitmaps++;
+ else
+ brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
+ for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) {
+ sb->u.ext2_sb.s_inode_bitmap_number[j] =
+ sb->u.ext2_sb.s_inode_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_inode_bitmap[j] =
+ sb->u.ext2_sb.s_inode_bitmap[j - 1];
+ }
+ read_inode_bitmap (sb, block_group, 0);
+ }
+ return 0;
+}
+
+/*
+ * This function sets the deletion time for the inode
+ *
+ * This may be used one day by an 'undelete' program
+ */
+static void set_inode_dtime (struct inode * inode,
+ struct ext2_group_desc * gdp)
+{
+ unsigned long inode_block;
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode;
+
+ inode_block = gdp->bg_inode_table + (((inode->i_ino - 1) %
+ EXT2_INODES_PER_GROUP(inode->i_sb)) /
+ EXT2_INODES_PER_BLOCK(inode->i_sb));
+ bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize);
+ if (!bh)
+ ext2_panic (inode->i_sb, "set_inode_dtime",
+ "Cannot load inode table block - "
+ "inode=%lu, inode_block=%lu",
+ inode->i_ino, inode_block);
+ raw_inode = ((struct ext2_inode *) bh->b_data) +
+ (((inode->i_ino - 1) %
+ EXT2_INODES_PER_GROUP(inode->i_sb)) %
+ EXT2_INODES_PER_BLOCK(inode->i_sb));
+ raw_inode->i_links_count = 0;
+ raw_inode->i_dtime = CURRENT_TIME;
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(inode)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+}
+
+void ext2_free_inode (struct inode * inode)
+{
+ struct super_block * sb;
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+ unsigned long block_group;
+ unsigned long bit;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ struct ext2_super_block * es;
+
+ if (!inode)
+ return;
+ if (!inode->i_dev) {
+ printk ("ext2_free_inode: inode has no device\n");
+ return;
+ }
+ if (inode->i_count > 1) {
+ printk ("ext2_free_inode: inode has count=%d\n",
+ inode->i_count);
+ return;
+ }
+ if (inode->i_nlink) {
+ printk ("ext2_free_inode: inode has nlink=%d\n",
+ inode->i_nlink);
+ return;
+ }
+ if (!inode->i_sb) {
+ printk("ext2_free_inode: inode on nonexistent device\n");
+ return;
+ }
+
+ ext2_debug ("freeing inode %lu\n", inode->i_ino);
+
+ sb = inode->i_sb;
+ lock_super (sb);
+ if (inode->i_ino < EXT2_FIRST_INO ||
+ inode->i_ino > sb->u.ext2_sb.s_es->s_inodes_count) {
+ ext2_error (sb, "free_inode",
+ "reserved inode or nonexistent inode");
+ unlock_super (sb);
+ return;
+ }
+ es = sb->u.ext2_sb.s_es;
+ block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(sb);
+ bit = (inode->i_ino - 1) % EXT2_INODES_PER_GROUP(sb);
+ bitmap_nr = load_inode_bitmap (sb, block_group);
+ bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr];
+ if (!clear_bit (bit, bh->b_data))
+ ext2_warning (sb, "ext2_free_inode",
+ "bit already cleared for inode %lu", inode->i_ino);
+ else {
+ gdp = get_group_desc (sb, block_group, &bh2);
+ gdp->bg_free_inodes_count++;
+ if (S_ISDIR(inode->i_mode))
+ gdp->bg_used_dirs_count--;
+ mark_buffer_dirty(bh2, 1);
+ es->s_free_inodes_count++;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ set_inode_dtime (inode, gdp);
+ }
+ mark_buffer_dirty(bh, 1);
+ if (sb->s_flags & MS_SYNC) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+
+ sb->s_dirt = 1;
+ clear_inode (inode);
+ unlock_super (sb);
+}
+
+/*
+ * This function increments the inode version number
+ *
+ * This may be used one day by the NFS server
+ */
+static void inc_inode_version (struct inode * inode,
+ struct ext2_group_desc *gdp,
+ int mode)
+{
+ unsigned long inode_block;
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode;
+
+ inode_block = gdp->bg_inode_table + (((inode->i_ino - 1) %
+ EXT2_INODES_PER_GROUP(inode->i_sb)) /
+ EXT2_INODES_PER_BLOCK(inode->i_sb));
+ bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize);
+ if (!bh) {
+ ext2_error (inode->i_sb, "inc_inode_version",
+ "Cannot load inode table block - "
+ "inode=%lu, inode_block=%lu\n",
+ inode->i_ino, inode_block);
+ inode->u.ext2_i.i_version = 1;
+ return;
+ }
+ raw_inode = ((struct ext2_inode *) bh->b_data) +
+ (((inode->i_ino - 1) %
+ EXT2_INODES_PER_GROUP(inode->i_sb)) %
+ EXT2_INODES_PER_BLOCK(inode->i_sb));
+ raw_inode->i_version++;
+ inode->u.ext2_i.i_version = raw_inode->i_version;
+ mark_buffer_dirty(bh, 1);
+ brelse (bh);
+}
+
+/*
+ * There are two policies for allocating an inode. If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+struct inode * ext2_new_inode (const struct inode * dir, int mode)
+{
+ struct super_block * sb;
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+ int i, j, avefreei;
+ struct inode * inode;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ struct ext2_group_desc * tmp;
+ struct ext2_super_block * es;
+
+ if (!dir || !(inode = get_empty_inode ()))
+ return NULL;
+ sb = dir->i_sb;
+ inode->i_sb = sb;
+ inode->i_flags = sb->s_flags;
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+repeat:
+ gdp = NULL; i=0;
+
+ if (S_ISDIR(mode)) {
+ avefreei = es->s_free_inodes_count /
+ sb->u.ext2_sb.s_groups_count;
+/* I am not yet convinced that this next bit is necessary.
+ i = dir->u.ext2_i.i_block_group;
+ for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) {
+ tmp = get_group_desc (sb, i, &bh2);
+ if ((tmp->bg_used_dirs_count << 8) <
+ tmp->bg_free_inodes_count) {
+ gdp = tmp;
+ break;
+ }
+ else
+ i = ++i % sb->u.ext2_sb.s_groups_count;
+ }
+*/
+ if (!gdp) {
+ for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) {
+ tmp = get_group_desc (sb, j, &bh2);
+ if (tmp->bg_free_inodes_count &&
+ tmp->bg_free_inodes_count >= avefreei) {
+ if (!gdp ||
+ (tmp->bg_free_blocks_count >
+ gdp->bg_free_blocks_count)) {
+ i = j;
+ gdp = tmp;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Try to place the inode in it's parent directory
+ */
+ i = dir->u.ext2_i.i_block_group;
+ tmp = get_group_desc (sb, i, &bh2);
+ if (tmp->bg_free_inodes_count)
+ gdp = tmp;
+ else
+ {
+ /*
+ * Use a quadratic hash to find a group with a
+ * free inode
+ */
+ for (j = 1; j < sb->u.ext2_sb.s_groups_count; j <<= 1) {
+ i += j;
+ if (i >= sb->u.ext2_sb.s_groups_count)
+ i -= sb->u.ext2_sb.s_groups_count;
+ tmp = get_group_desc (sb, i, &bh2);
+ if (tmp->bg_free_inodes_count) {
+ gdp = tmp;
+ break;
+ }
+ }
+ }
+ if (!gdp) {
+ /*
+ * That failed: try linear search for a free inode
+ */
+ i = dir->u.ext2_i.i_block_group + 1;
+ for (j = 2; j < sb->u.ext2_sb.s_groups_count; j++) {
+ if (++i >= sb->u.ext2_sb.s_groups_count)
+ i = 0;
+ tmp = get_group_desc (sb, i, &bh2);
+ if (tmp->bg_free_inodes_count) {
+ gdp = tmp;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!gdp) {
+ unlock_super (sb);
+ iput(inode);
+ return NULL;
+ }
+ bitmap_nr = load_inode_bitmap (sb, i);
+ bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr];
+ if ((j = find_first_zero_bit ((unsigned long *) bh->b_data,
+ EXT2_INODES_PER_GROUP(sb))) <
+ EXT2_INODES_PER_GROUP(sb)) {
+ if (set_bit (j, bh->b_data)) {
+ ext2_warning (sb, "ext2_new_inode",
+ "bit already set for inode %d", j);
+ goto repeat;
+ }
+ mark_buffer_dirty(bh, 1);
+ if (sb->s_flags & MS_SYNC) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ } else {
+ if (gdp->bg_free_inodes_count != 0) {
+ ext2_error (sb, "ext2_new_inode",
+ "Free inodes count corrupted in group %d",
+ i);
+ unlock_super (sb);
+ iput (inode);
+ return NULL;
+ }
+ goto repeat;
+ }
+ j += i * EXT2_INODES_PER_GROUP(sb) + 1;
+ if (j < EXT2_FIRST_INO || j > es->s_inodes_count) {
+ ext2_error (sb, "ext2_new_inode",
+ "reserved inode or inode > inodes count - "
+ "block_group = %d,inode=%d", i, j);
+ unlock_super (sb);
+ iput (inode);
+ return NULL;
+ }
+ gdp->bg_free_inodes_count--;
+ if (S_ISDIR(mode))
+ gdp->bg_used_dirs_count++;
+ mark_buffer_dirty(bh2, 1);
+ es->s_free_inodes_count--;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ inode->i_mode = mode;
+ inode->i_sb = sb;
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_dev = sb->s_dev;
+ inode->i_uid = current->fsuid;
+ if (test_opt (sb, GRPID))
+ inode->i_gid = dir->i_gid;
+ else if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ } else
+ inode->i_gid = current->fsgid;
+ inode->i_dirt = 1;
+ inode->i_ino = j;
+ inode->i_blksize = sb->s_blocksize;
+ inode->i_blocks = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags;
+ if (S_ISLNK(mode))
+ inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL | EXT2_APPEND_FL);
+ inode->u.ext2_i.i_faddr = 0;
+ inode->u.ext2_i.i_frag_no = 0;
+ inode->u.ext2_i.i_frag_size = 0;
+ inode->u.ext2_i.i_file_acl = 0;
+ inode->u.ext2_i.i_dir_acl = 0;
+ inode->u.ext2_i.i_dtime = 0;
+ inode->u.ext2_i.i_block_group = i;
+ inode->i_op = NULL;
+ if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL)
+ inode->i_flags |= MS_SYNC;
+ insert_inode_hash(inode);
+ inc_inode_version (inode, gdp, mode);
+
+ ext2_debug ("allocating inode %lu\n", inode->i_ino);
+
+ unlock_super (sb);
+ return inode;
+}
+
+unsigned long ext2_count_free_inodes (struct super_block * sb)
+{
+#ifdef EXT2FS_DEBUG
+ struct ext2_super_block * es;
+ unsigned long desc_count, bitmap_count, x;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ int i;
+
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+ for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+ gdp = get_group_desc (sb, i, NULL);
+ desc_count += gdp->bg_free_inodes_count;
+ bitmap_nr = load_inode_bitmap (sb, i);
+ x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr],
+ EXT2_INODES_PER_GROUP(sb) / 8);
+ printk ("group %d: stored = %d, counted = %lu\n",
+ i, gdp->bg_free_inodes_count, x);
+ bitmap_count += x;
+ }
+ printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
+ es->s_free_inodes_count, desc_count, bitmap_count);
+ unlock_super (sb);
+ return desc_count;
+#else
+ return sb->u.ext2_sb.s_es->s_free_inodes_count;
+#endif
+}
+
+void ext2_check_inodes_bitmap (struct super_block * sb)
+{
+ struct ext2_super_block * es;
+ unsigned long desc_count, bitmap_count, x;
+ int bitmap_nr;
+ struct ext2_group_desc * gdp;
+ int i;
+
+ lock_super (sb);
+ es = sb->u.ext2_sb.s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+ for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+ gdp = get_group_desc (sb, i, NULL);
+ desc_count += gdp->bg_free_inodes_count;
+ bitmap_nr = load_inode_bitmap (sb, i);
+ x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr],
+ EXT2_INODES_PER_GROUP(sb) / 8);
+ if (gdp->bg_free_inodes_count != x)
+ ext2_error (sb, "ext2_check_inodes_bitmap",
+ "Wrong free inodes count in group %d, "
+ "stored = %d, counted = %lu", i,
+ gdp->bg_free_inodes_count, x);
+ bitmap_count += x;
+ }
+ if (es->s_free_inodes_count != bitmap_count)
+ ext2_error (sb, "ext2_check_inodes_bitmap",
+ "Wrong free inodes count in super block, "
+ "stored = %lu, counted = %lu",
+ es->s_free_inodes_count, bitmap_count);
+ unlock_super (sb);
+}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
new file mode 100644
index 000000000..633c33e4f
--- /dev/null
+++ b/fs/ext2/inode.c
@@ -0,0 +1,667 @@
+/*
+ * linux/fs/ext2/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Goal-directed block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+void ext2_put_inode (struct inode * inode)
+{
+ ext2_discard_prealloc (inode);
+ if (inode->i_nlink || inode->i_ino == EXT2_ACL_IDX_INO ||
+ inode->i_ino == EXT2_ACL_DATA_INO)
+ return;
+ inode->i_size = 0;
+ if (inode->i_blocks)
+ ext2_truncate (inode);
+ ext2_free_inode (inode);
+}
+
+#define inode_bmap(inode, nr) ((inode)->u.ext2_i.i_data[(nr)])
+
+static int block_bmap (struct buffer_head * bh, int nr)
+{
+ int tmp;
+
+ if (!bh)
+ return 0;
+ tmp = ((unsigned long *) bh->b_data)[nr];
+ brelse (bh);
+ return tmp;
+}
+
+/*
+ * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the
+ * superblock in the same manner as are ext2_free_blocks and
+ * ext2_new_block. We just wait on the super rather than locking it
+ * here, since ext2_new_block will do the necessary locking and we
+ * can't block until then.
+ */
+void ext2_discard_prealloc (struct inode * inode)
+{
+#ifdef EXT2_PREALLOCATE
+ if (inode->u.ext2_i.i_prealloc_count) {
+ int i = inode->u.ext2_i.i_prealloc_count;
+ inode->u.ext2_i.i_prealloc_count = 0;
+ ext2_free_blocks (inode->i_sb,
+ inode->u.ext2_i.i_prealloc_block,
+ i);
+ }
+#endif
+}
+
+static int ext2_alloc_block (struct inode * inode, unsigned long goal)
+{
+#ifdef EXT2FS_DEBUG
+ static unsigned long alloc_hits = 0, alloc_attempts = 0;
+#endif
+ unsigned long result;
+ struct buffer_head * bh;
+
+ wait_on_super (inode->i_sb);
+
+#ifdef EXT2_PREALLOCATE
+ if (inode->u.ext2_i.i_prealloc_count &&
+ (goal == inode->u.ext2_i.i_prealloc_block ||
+ goal + 1 == inode->u.ext2_i.i_prealloc_block))
+ {
+ result = inode->u.ext2_i.i_prealloc_block++;
+ inode->u.ext2_i.i_prealloc_count--;
+ ext2_debug ("preallocation hit (%lu/%lu).\n",
+ ++alloc_hits, ++alloc_attempts);
+
+ /* It doesn't matter if we block in getblk() since
+ we have already atomically allocated the block, and
+ are only clearing it now. */
+ if (!(bh = getblk (inode->i_sb->s_dev, result,
+ inode->i_sb->s_blocksize))) {
+ ext2_error (inode->i_sb, "ext2_alloc_block",
+ "cannot get block %lu", result);
+ return 0;
+ }
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse (bh);
+ } else {
+ ext2_discard_prealloc (inode);
+ ext2_debug ("preallocation miss (%lu/%lu).\n",
+ alloc_hits, ++alloc_attempts);
+ if (S_ISREG(inode->i_mode))
+ result = ext2_new_block
+ (inode->i_sb, goal,
+ &inode->u.ext2_i.i_prealloc_count,
+ &inode->u.ext2_i.i_prealloc_block);
+ else
+ result = ext2_new_block (inode->i_sb, goal, 0, 0);
+ }
+#else
+ result = ext2_new_block (inode->i_sb, goal, 0, 0);
+#endif
+
+ return result;
+}
+
+
+int ext2_bmap (struct inode * inode, int block)
+{
+ int i;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+
+ if (block < 0) {
+ ext2_warning (inode->i_sb, "ext2_bmap", "block < 0");
+ return 0;
+ }
+ if (block >= EXT2_NDIR_BLOCKS + addr_per_block +
+ addr_per_block * addr_per_block +
+ addr_per_block * addr_per_block * addr_per_block) {
+ ext2_warning (inode->i_sb, "ext2_bmap", "block > big");
+ return 0;
+ }
+ if (block < EXT2_NDIR_BLOCKS)
+ return inode_bmap (inode, block);
+ block -= EXT2_NDIR_BLOCKS;
+ if (block < addr_per_block) {
+ i = inode_bmap (inode, EXT2_IND_BLOCK);
+ if (!i)
+ return 0;
+ return block_bmap (bread (inode->i_dev, i,
+ inode->i_sb->s_blocksize), block);
+ }
+ block -= addr_per_block;
+ if (block < addr_per_block * addr_per_block) {
+ i = inode_bmap (inode, EXT2_DIND_BLOCK);
+ if (!i)
+ return 0;
+ i = block_bmap (bread (inode->i_dev, i,
+ inode->i_sb->s_blocksize),
+ block / addr_per_block);
+ if (!i)
+ return 0;
+ return block_bmap (bread (inode->i_dev, i,
+ inode->i_sb->s_blocksize),
+ block & (addr_per_block - 1));
+ }
+ block -= addr_per_block * addr_per_block;
+ i = inode_bmap (inode, EXT2_TIND_BLOCK);
+ if (!i)
+ return 0;
+ i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+ block / (addr_per_block * addr_per_block));
+ if (!i)
+ return 0;
+ i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+ (block / addr_per_block) & (addr_per_block - 1));
+ if (!i)
+ return 0;
+ return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+ block & (addr_per_block - 1));
+}
+
+static struct buffer_head * inode_getblk (struct inode * inode, int nr,
+ int create, int new_block, int * err)
+{
+ int tmp, goal = 0;
+ unsigned long * p;
+ struct buffer_head * result;
+ int blocks = inode->i_sb->s_blocksize / 512;
+
+ p = inode->u.ext2_i.i_data + nr;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (tmp == *p)
+ return result;
+ brelse (result);
+ goto repeat;
+ }
+ if (!create || new_block >=
+ (current->rlim[RLIMIT_FSIZE].rlim_cur >>
+ EXT2_BLOCK_SIZE_BITS(inode->i_sb))) {
+ *err = -EFBIG;
+ return NULL;
+ }
+ if (inode->u.ext2_i.i_next_alloc_block == new_block)
+ goal = inode->u.ext2_i.i_next_alloc_goal;
+
+ ext2_debug ("hint = %d,", goal);
+
+ if (!goal) {
+ for (tmp = nr - 1; tmp >= 0; tmp--) {
+ if (inode->u.ext2_i.i_data[tmp]) {
+ goal = inode->u.ext2_i.i_data[tmp];
+ break;
+ }
+ }
+ if (!goal)
+ goal = (inode->u.ext2_i.i_block_group *
+ EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
+ inode->i_sb->u.ext2_sb.s_es->s_first_data_block;
+ }
+
+ ext2_debug ("goal = %d.\n", goal);
+
+ tmp = ext2_alloc_block (inode, goal);
+ if (!tmp)
+ return NULL;
+ result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (*p) {
+ ext2_free_blocks (inode->i_sb, tmp, 1);
+ brelse (result);
+ goto repeat;
+ }
+ *p = tmp;
+ inode->u.ext2_i.i_next_alloc_block = new_block;
+ inode->u.ext2_i.i_next_alloc_goal = tmp;
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_blocks += blocks;
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
+ ext2_sync_inode (inode);
+ else
+ inode->i_dirt = 1;
+ return result;
+}
+
+static struct buffer_head * block_getblk (struct inode * inode,
+ struct buffer_head * bh, int nr,
+ int create, int blocksize,
+ int new_block, int * err)
+{
+ int tmp, goal = 0;
+ unsigned long * p;
+ struct buffer_head * result;
+ int blocks = inode->i_sb->s_blocksize / 512;
+
+ if (!bh)
+ return NULL;
+ if (!bh->b_uptodate) {
+ ll_rw_block (READ, 1, &bh);
+ wait_on_buffer (bh);
+ if (!bh->b_uptodate) {
+ brelse (bh);
+ return NULL;
+ }
+ }
+ p = (unsigned long *) bh->b_data + nr;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk (bh->b_dev, tmp, blocksize);
+ if (tmp == *p) {
+ brelse (bh);
+ return result;
+ }
+ brelse (result);
+ goto repeat;
+ }
+ if (!create || new_block >=
+ (current->rlim[RLIMIT_FSIZE].rlim_cur >>
+ EXT2_BLOCK_SIZE_BITS(inode->i_sb))) {
+ brelse (bh);
+ *err = -EFBIG;
+ return NULL;
+ }
+ if (inode->u.ext2_i.i_next_alloc_block == new_block)
+ goal = inode->u.ext2_i.i_next_alloc_goal;
+ if (!goal) {
+ for (tmp = nr - 1; tmp >= 0; tmp--) {
+ if (((unsigned long *) bh->b_data)[tmp]) {
+ goal = ((unsigned long *)bh->b_data)[tmp];
+ break;
+ }
+ }
+ if (!goal)
+ goal = bh->b_blocknr;
+ }
+ tmp = ext2_alloc_block (inode, goal);
+ if (!tmp) {
+ brelse (bh);
+ return NULL;
+ }
+ result = getblk (bh->b_dev, tmp, blocksize);
+ if (*p) {
+ ext2_free_blocks (inode->i_sb, tmp, 1);
+ brelse (result);
+ goto repeat;
+ }
+ *p = tmp;
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_blocks += blocks;
+ inode->i_dirt = 1;
+ inode->u.ext2_i.i_next_alloc_block = new_block;
+ inode->u.ext2_i.i_next_alloc_goal = tmp;
+ brelse (bh);
+ return result;
+}
+
+static int block_getcluster (struct inode * inode, struct buffer_head * bh,
+ int nr,
+ int blocksize)
+{
+ unsigned long * p;
+ int firstblock = 0;
+ int result = 0;
+ int i;
+
+ /* Check to see if clustering possible here. */
+
+ if(!bh) return 0;
+
+ if(nr % (PAGE_SIZE / inode->i_sb->s_blocksize) != 0) goto out;
+ if(nr + 3 > EXT2_ADDR_PER_BLOCK(inode->i_sb)) goto out;
+
+ for(i=0; i< (PAGE_SIZE / inode->i_sb->s_blocksize); i++) {
+ p = (unsigned long *) bh->b_data + nr + i;
+
+ /* All blocks in cluster must already be allocated */
+ if(*p == 0) goto out;
+
+ /* See if aligned correctly */
+ if(i==0) firstblock = *p;
+ else if(*p != firstblock + i) goto out;
+ };
+
+ p = (unsigned long *) bh->b_data + nr;
+ result = generate_cluster(bh->b_dev, (int *) p, blocksize);
+
+ out:
+ brelse(bh);
+ return result;
+}
+
+struct buffer_head * ext2_getblk (struct inode * inode, long block,
+ int create, int * err)
+{
+ struct buffer_head * bh;
+ unsigned long b;
+ unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+
+ *err = -EIO;
+ if (block < 0) {
+ ext2_warning (inode->i_sb, "ext2_getblk", "block < 0");
+ return NULL;
+ }
+ if (block > EXT2_NDIR_BLOCKS + addr_per_block +
+ addr_per_block * addr_per_block +
+ addr_per_block * addr_per_block * addr_per_block) {
+ ext2_warning (inode->i_sb, "ext2_getblk", "block > big");
+ return NULL;
+ }
+ /*
+ * If this is a sequential block allocation, set the next_alloc_block
+ * to this block now so that all the indblock and data block
+ * allocations use the same goal zone
+ */
+
+ ext2_debug ("block %lu, next %lu, goal %lu.\n", block,
+ inode->u.ext2_i.i_next_alloc_block,
+ inode->u.ext2_i.i_next_alloc_goal);
+
+ if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
+ inode->u.ext2_i.i_next_alloc_block++;
+ inode->u.ext2_i.i_next_alloc_goal++;
+ }
+
+ *err = -ENOSPC;
+ b = block;
+ if (block < EXT2_NDIR_BLOCKS)
+ return inode_getblk (inode, block, create, b, err);
+ block -= EXT2_NDIR_BLOCKS;
+ if (block < addr_per_block) {
+ bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err);
+ return block_getblk (inode, bh, block, create,
+ inode->i_sb->s_blocksize, b, err);
+ }
+ block -= addr_per_block;
+ if (block < addr_per_block * addr_per_block) {
+ bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err);
+ bh = block_getblk (inode, bh, block / addr_per_block, create,
+ inode->i_sb->s_blocksize, b, err);
+ return block_getblk (inode, bh, block & (addr_per_block - 1),
+ create, inode->i_sb->s_blocksize, b, err);
+ }
+ block -= addr_per_block * addr_per_block;
+ bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err);
+ bh = block_getblk (inode, bh, block/(addr_per_block * addr_per_block),
+ create, inode->i_sb->s_blocksize, b, err);
+ bh = block_getblk (inode, bh, (block/addr_per_block) & (addr_per_block - 1),
+ create, inode->i_sb->s_blocksize, b, err);
+ return block_getblk (inode, bh, block & (addr_per_block - 1), create,
+ inode->i_sb->s_blocksize, b, err);
+}
+
+int ext2_getcluster (struct inode * inode, long block)
+{
+ struct buffer_head * bh;
+ int err, create;
+ unsigned long b;
+ unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+
+ create = 0;
+ err = -EIO;
+ if (block < 0) {
+ ext2_warning (inode->i_sb, "ext2_getblk", "block < 0");
+ return 0;
+ }
+ if (block > EXT2_NDIR_BLOCKS + addr_per_block +
+ addr_per_block * addr_per_block +
+ addr_per_block * addr_per_block * addr_per_block) {
+ ext2_warning (inode->i_sb, "ext2_getblk", "block > big");
+ return 0;
+ }
+
+ err = -ENOSPC;
+ b = block;
+ if (block < EXT2_NDIR_BLOCKS) return 0;
+
+ block -= EXT2_NDIR_BLOCKS;
+
+ if (block < addr_per_block) {
+ bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, &err);
+ return block_getcluster (inode, bh, block,
+ inode->i_sb->s_blocksize);
+ }
+ block -= addr_per_block;
+ if (block < addr_per_block * addr_per_block) {
+ bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, &err);
+ bh = block_getblk (inode, bh, block / addr_per_block, create,
+ inode->i_sb->s_blocksize, b, &err);
+ return block_getcluster (inode, bh, block & (addr_per_block - 1),
+ inode->i_sb->s_blocksize);
+ }
+ block -= addr_per_block * addr_per_block;
+ bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, &err);
+ bh = block_getblk (inode, bh, block/(addr_per_block * addr_per_block),
+ create, inode->i_sb->s_blocksize, b, &err);
+ bh = block_getblk (inode, bh, (block/addr_per_block) & (addr_per_block - 1),
+ create, inode->i_sb->s_blocksize, b, &err);
+ return block_getcluster (inode, bh, block & (addr_per_block - 1),
+ inode->i_sb->s_blocksize);
+}
+
+struct buffer_head * ext2_bread (struct inode * inode, int block,
+ int create, int *err)
+{
+ struct buffer_head * bh;
+
+ bh = ext2_getblk (inode, block, create, err);
+ if (!bh || bh->b_uptodate)
+ return bh;
+ ll_rw_block (READ, 1, &bh);
+ wait_on_buffer (bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse (bh);
+ *err = -EIO;
+ return NULL;
+}
+
+void ext2_read_inode (struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode;
+ unsigned long block_group;
+ unsigned long group_desc;
+ unsigned long desc;
+ unsigned long block;
+ struct ext2_group_desc * gdp;
+
+ if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
+ inode->i_ino != EXT2_ACL_DATA_INO && inode->i_ino < EXT2_FIRST_INO) ||
+ inode->i_ino > inode->i_sb->u.ext2_sb.s_es->s_inodes_count) {
+ ext2_error (inode->i_sb, "ext2_read_inode",
+ "bad inode number: %lu", inode->i_ino);
+ return;
+ }
+ block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+ if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count)
+ ext2_panic (inode->i_sb, "ext2_read_inode",
+ "group >= groups count");
+ group_desc = block_group / EXT2_DESC_PER_BLOCK(inode->i_sb);
+ desc = block_group % EXT2_DESC_PER_BLOCK(inode->i_sb);
+ bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
+ if (!bh)
+ ext2_panic (inode->i_sb, "ext2_read_inode",
+ "Descriptor not loaded");
+ gdp = (struct ext2_group_desc *) bh->b_data;
+ block = gdp[desc].bg_inode_table +
+ (((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb))
+ / EXT2_INODES_PER_BLOCK(inode->i_sb));
+ if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize)))
+ ext2_panic (inode->i_sb, "ext2_read_inode",
+ "unable to read i-node block - "
+ "inode=%lu, block=%lu", inode->i_ino, block);
+ raw_inode = ((struct ext2_inode *) bh->b_data) +
+ (inode->i_ino - 1) % EXT2_INODES_PER_BLOCK(inode->i_sb);
+ inode->i_mode = raw_inode->i_mode;
+ inode->i_uid = raw_inode->i_uid;
+ inode->i_gid = raw_inode->i_gid;
+ inode->i_nlink = raw_inode->i_links_count;
+ inode->i_size = raw_inode->i_size;
+ inode->i_atime = raw_inode->i_atime;
+ inode->i_ctime = raw_inode->i_ctime;
+ inode->i_mtime = raw_inode->i_mtime;
+ inode->u.ext2_i.i_dtime = raw_inode->i_dtime;
+ inode->i_blksize = inode->i_sb->s_blocksize;
+ inode->i_blocks = raw_inode->i_blocks;
+ inode->i_version = ++event;
+ inode->u.ext2_i.i_flags = raw_inode->i_flags;
+ inode->u.ext2_i.i_faddr = raw_inode->i_faddr;
+ inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
+ inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
+ inode->u.ext2_i.i_osync = 0;
+ inode->u.ext2_i.i_file_acl = raw_inode->i_file_acl;
+ inode->u.ext2_i.i_dir_acl = raw_inode->i_dir_acl;
+ inode->u.ext2_i.i_version = raw_inode->i_version;
+ inode->u.ext2_i.i_block_group = block_group;
+ inode->u.ext2_i.i_next_alloc_block = 0;
+ inode->u.ext2_i.i_next_alloc_goal = 0;
+ if (inode->u.ext2_i.i_prealloc_count)
+ ext2_error (inode->i_sb, "ext2_read_inode",
+ "New inode has non-zero prealloc count!");
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ inode->i_rdev = raw_inode->i_block[0];
+ else for (block = 0; block < EXT2_N_BLOCKS; block++)
+ inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
+ brelse (bh);
+ inode->i_op = NULL;
+ if (inode->i_ino == EXT2_ACL_IDX_INO ||
+ inode->i_ino == EXT2_ACL_DATA_INO)
+ /* Nothing to do */ ;
+ else if (S_ISREG(inode->i_mode))
+ inode->i_op = &ext2_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &ext2_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &ext2_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL)
+ inode->i_flags |= MS_SYNC;
+ if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL)
+ inode->i_flags |= S_APPEND;
+ if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL)
+ inode->i_flags |= S_IMMUTABLE;
+}
+
+static struct buffer_head * ext2_update_inode (struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode;
+ unsigned long block_group;
+ unsigned long group_desc;
+ unsigned long desc;
+ unsigned long block;
+ struct ext2_group_desc * gdp;
+
+ if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino < EXT2_FIRST_INO) ||
+ inode->i_ino > inode->i_sb->u.ext2_sb.s_es->s_inodes_count) {
+ ext2_error (inode->i_sb, "ext2_write_inode",
+ "bad inode number: %lu", inode->i_ino);
+ return 0;
+ }
+ block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+ if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count)
+ ext2_panic (inode->i_sb, "ext2_write_inode",
+ "group >= groups count");
+ group_desc = block_group / EXT2_DESC_PER_BLOCK(inode->i_sb);
+ desc = block_group % EXT2_DESC_PER_BLOCK(inode->i_sb);
+ bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
+ if (!bh)
+ ext2_panic (inode->i_sb, "ext2_write_inode",
+ "Descriptor not loaded");
+ gdp = (struct ext2_group_desc *) bh->b_data;
+ block = gdp[desc].bg_inode_table +
+ (((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb))
+ / EXT2_INODES_PER_BLOCK(inode->i_sb));
+ if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize)))
+ ext2_panic (inode->i_sb, "ext2_write_inode",
+ "unable to read i-node block - "
+ "inode=%lu, block=%lu", inode->i_ino, block);
+ raw_inode = ((struct ext2_inode *)bh->b_data) +
+ (inode->i_ino - 1) % EXT2_INODES_PER_BLOCK(inode->i_sb);
+ raw_inode->i_mode = inode->i_mode;
+ raw_inode->i_uid = inode->i_uid;
+ raw_inode->i_gid = inode->i_gid;
+ raw_inode->i_links_count = inode->i_nlink;
+ raw_inode->i_size = inode->i_size;
+ raw_inode->i_atime = inode->i_atime;
+ raw_inode->i_ctime = inode->i_ctime;
+ raw_inode->i_mtime = inode->i_mtime;
+ raw_inode->i_blocks = inode->i_blocks;
+ raw_inode->i_dtime = inode->u.ext2_i.i_dtime;
+ raw_inode->i_flags = inode->u.ext2_i.i_flags;
+ raw_inode->i_faddr = inode->u.ext2_i.i_faddr;
+ raw_inode->i_frag = inode->u.ext2_i.i_frag_no;
+ raw_inode->i_fsize = inode->u.ext2_i.i_frag_size;
+ raw_inode->i_file_acl = inode->u.ext2_i.i_file_acl;
+ raw_inode->i_dir_acl = inode->u.ext2_i.i_dir_acl;
+ raw_inode->i_version = inode->u.ext2_i.i_version;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_block[0] = inode->i_rdev;
+ else for (block = 0; block < EXT2_N_BLOCKS; block++)
+ raw_inode->i_block[block] = inode->u.ext2_i.i_data[block];
+ mark_buffer_dirty(bh, 1);
+ inode->i_dirt = 0;
+ return bh;
+}
+
+void ext2_write_inode (struct inode * inode)
+{
+ struct buffer_head * bh;
+ bh = ext2_update_inode (inode);
+ brelse (bh);
+}
+
+int ext2_sync_inode (struct inode *inode)
+{
+ int err = 0;
+ struct buffer_head *bh;
+
+ bh = ext2_update_inode (inode);
+ if (bh && bh->b_dirt)
+ {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ if (bh->b_req && !bh->b_uptodate)
+ {
+ printk ("IO error syncing ext2 inode [%04x:%08lx]\n",
+ inode->i_dev, inode->i_ino);
+ err = -1;
+ }
+ }
+ else if (!bh)
+ err = -1;
+ brelse (bh);
+ return err;
+}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
new file mode 100644
index 000000000..447968ef0
--- /dev/null
+++ b/fs/ext2/ioctl.c
@@ -0,0 +1,75 @@
+/*
+ * linux/fs/ext2/ioctl.c
+ *
+ * Copyright (C) 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/ioctl.h>
+#include <linux/sched.h>
+
+int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int err;
+ unsigned long flags;
+
+ ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+ switch (cmd) {
+ case EXT2_IOC_GETFLAGS:
+ if ((err = verify_area (VERIFY_WRITE, (long *) arg, sizeof(long))))
+ return err;
+ put_fs_long (inode->u.ext2_i.i_flags, (long *) arg);
+ return 0;
+ case EXT2_IOC_SETFLAGS:
+ flags = get_fs_long ((long *) arg);
+ /*
+ * Only the super-user can change the IMMUTABLE flag
+ */
+ if ((flags & EXT2_IMMUTABLE_FL) ^
+ (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL)) {
+ /* This test looks nicer. Thanks to Pauline Middelink */
+ if (!fsuser())
+ return -EPERM;
+ } else
+ if ((current->fsuid != inode->i_uid) && !fsuser())
+ return -EPERM;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ inode->u.ext2_i.i_flags = flags;
+ if (flags & EXT2_APPEND_FL)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (flags & EXT2_IMMUTABLE_FL)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return 0;
+ case EXT2_IOC_GETVERSION:
+ if ((err = verify_area (VERIFY_WRITE, (long *) arg, sizeof(long))))
+ return err;
+ put_fs_long (inode->u.ext2_i.i_version, (long *) arg);
+ return 0;
+ case EXT2_IOC_SETVERSION:
+ if ((current->fsuid != inode->i_uid) && !fsuser())
+ return -EPERM;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ inode->u.ext2_i.i_version = get_fs_long ((long *) arg);
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
new file mode 100644
index 000000000..f56c5404e
--- /dev/null
+++ b/fs/ext2/namei.c
@@ -0,0 +1,1098 @@
+/*
+ * linux/fs/ext2/namei.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+/*
+ * comment out this line if you want names > EXT2_NAME_LEN chars to be
+ * truncated. Else they will be disallowed.
+ */
+/* #define NO_TRUNCATE */
+
+/*
+ * define how far ahead to read directories while searching them.
+ */
+#define NAMEI_RA_CHUNKS 2
+#define NAMEI_RA_BLOCKS 4
+#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+
+/*
+ * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
+ */
+static int ext2_match (int len, const char * const name,
+ struct ext2_dir_entry * de)
+{
+ if (!de || !de->inode || len > EXT2_NAME_LEN)
+ return 0;
+ /*
+ * "" means "." ---> so paths like "/usr/lib//libc.a" work
+ */
+ if (!len && de->name_len == 1 && (de->name[0] == '.') &&
+ (de->name[1] == '\0'))
+ return 1;
+ if (len != de->name_len)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+/*
+ * ext2_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ */
+static struct buffer_head * ext2_find_entry (struct inode * dir,
+ const char * const name, int namelen,
+ struct ext2_dir_entry ** res_dir)
+{
+ struct super_block * sb;
+ struct buffer_head * bh_use[NAMEI_RA_SIZE];
+ struct buffer_head * bh_read[NAMEI_RA_SIZE];
+ unsigned long offset;
+ int block, toread, i, err;
+
+ *res_dir = NULL;
+ if (!dir)
+ return NULL;
+ sb = dir->i_sb;
+
+#ifdef NO_TRUNCATE
+ if (namelen > EXT2_NAME_LEN)
+ return NULL;
+#else
+ if (namelen > EXT2_NAME_LEN)
+ namelen = EXT2_NAME_LEN;
+#endif
+
+ memset (bh_use, 0, sizeof (bh_use));
+ toread = 0;
+ for (block = 0; block < NAMEI_RA_SIZE; ++block) {
+ struct buffer_head * bh;
+
+ if ((block << EXT2_BLOCK_SIZE_BITS (sb)) >= dir->i_size)
+ break;
+ bh = ext2_getblk (dir, block, 0, &err);
+ bh_use[block] = bh;
+ if (bh && !bh->b_uptodate)
+ bh_read[toread++] = bh;
+ }
+
+ block = 0;
+ offset = 0;
+ while (offset < dir->i_size) {
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de;
+ char * dlimit;
+
+ if ((block % NAMEI_RA_BLOCKS) == 0 && toread) {
+ ll_rw_block (READ, toread, bh_read);
+ toread = 0;
+ }
+ bh = bh_use[block % NAMEI_RA_SIZE];
+ if (!bh)
+ ext2_panic (sb, "ext2_find_entry",
+ "buffer head pointer is NULL");
+ wait_on_buffer (bh);
+ if (!bh->b_uptodate) {
+ /*
+ * read error: all bets are off
+ */
+ break;
+ }
+
+ de = (struct ext2_dir_entry *) bh->b_data;
+ dlimit = bh->b_data + sb->s_blocksize;
+ while ((char *) de < dlimit) {
+ if (!ext2_check_dir_entry ("ext2_find_entry", dir,
+ de, bh, offset))
+ goto failure;
+ if (de->inode != 0 && ext2_match (namelen, name, de)) {
+ for (i = 0; i < NAMEI_RA_SIZE; ++i) {
+ if (bh_use[i] != bh)
+ brelse (bh_use[i]);
+ }
+ *res_dir = de;
+ return bh;
+ }
+ offset += de->rec_len;
+ de = (struct ext2_dir_entry *)
+ ((char *) de + de->rec_len);
+ }
+
+ brelse (bh);
+ if (((block + NAMEI_RA_SIZE) << EXT2_BLOCK_SIZE_BITS (sb)) >=
+ dir->i_size)
+ bh = NULL;
+ else
+ bh = ext2_getblk (dir, block + NAMEI_RA_SIZE, 0, &err);
+ bh_use[block++ % NAMEI_RA_SIZE] = bh;
+ if (bh && !bh->b_uptodate)
+ bh_read[toread++] = bh;
+ }
+
+failure:
+ for (i = 0; i < NAMEI_RA_SIZE; ++i)
+ brelse (bh_use[i]);
+ return NULL;
+}
+
+int ext2_lookup (struct inode * dir, const char * name, int len,
+ struct inode ** result)
+{
+ unsigned long ino;
+ struct ext2_dir_entry * de;
+ struct buffer_head * bh;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput (dir);
+ return -ENOENT;
+ }
+ if (dcache_lookup(dir, name, len, &ino)) {
+ if (!ino) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(*result = iget (dir->i_sb, ino))) {
+ iput (dir);
+ return -EACCES;
+ }
+ iput (dir);
+ return 0;
+ }
+ ino = dir->i_version;
+ if (!(bh = ext2_find_entry (dir, name, len, &de))) {
+ if (ino == dir->i_version)
+ dcache_add(dir, name, len, 0);
+ iput (dir);
+ return -ENOENT;
+ }
+ ino = de->inode;
+ dcache_add(dir, name, len, ino);
+ brelse (bh);
+ if (!(*result = iget (dir->i_sb, ino))) {
+ iput (dir);
+ return -EACCES;
+ }
+ iput (dir);
+ return 0;
+}
+
+/*
+ * ext2_add_entry()
+ *
+ * adds a file entry to the specified directory, using the same
+ * semantics as ext2_find_entry(). It returns NULL if it failed.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static struct buffer_head * ext2_add_entry (struct inode * dir,
+ const char * name, int namelen,
+ struct ext2_dir_entry ** res_dir,
+ int *err)
+{
+ unsigned long offset;
+ unsigned short rec_len;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de, * de1;
+ struct super_block * sb;
+
+ *err = -EINVAL;
+ *res_dir = NULL;
+ if (!dir)
+ return NULL;
+ sb = dir->i_sb;
+#ifdef NO_TRUNCATE
+ if (namelen > EXT2_NAME_LEN)
+ return NULL;
+#else
+ if (namelen > EXT2_NAME_LEN)
+ namelen = EXT2_NAME_LEN;
+#endif
+ if (!namelen)
+ return NULL;
+ /*
+ * Is this a busy deleted directory? Can't create new files if so
+ */
+ if (dir->i_size == 0)
+ {
+ *err = -ENOENT;
+ return NULL;
+ }
+ bh = ext2_bread (dir, 0, 0, err);
+ if (!bh)
+ return NULL;
+ rec_len = EXT2_DIR_REC_LEN(namelen);
+ offset = 0;
+ de = (struct ext2_dir_entry *) bh->b_data;
+ *err = -ENOSPC;
+ while (1) {
+ if ((char *)de >= sb->s_blocksize + bh->b_data) {
+ brelse (bh);
+ bh = NULL;
+ bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, err);
+ if (!bh)
+ return NULL;
+ if (dir->i_size <= offset) {
+ if (dir->i_size == 0) {
+ *err = -ENOENT;
+ return NULL;
+ }
+
+ ext2_debug ("creating next block\n");
+
+ de = (struct ext2_dir_entry *) bh->b_data;
+ de->inode = 0;
+ de->rec_len = sb->s_blocksize;
+ dir->i_size = offset + sb->s_blocksize;
+ dir->i_dirt = 1;
+ } else {
+
+ ext2_debug ("skipping to next block\n");
+
+ de = (struct ext2_dir_entry *) bh->b_data;
+ }
+ }
+ if (!ext2_check_dir_entry ("ext2_add_entry", dir, de, bh,
+ offset)) {
+ *err = -ENOENT;
+ brelse (bh);
+ return NULL;
+ }
+ if (de->inode != 0 && ext2_match (namelen, name, de)) {
+ *err = -EEXIST;
+ brelse (bh);
+ return NULL;
+ }
+ if ((de->inode == 0 && de->rec_len >= rec_len) ||
+ (de->rec_len >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) {
+ offset += de->rec_len;
+ if (de->inode) {
+ de1 = (struct ext2_dir_entry *) ((char *) de +
+ EXT2_DIR_REC_LEN(de->name_len));
+ de1->rec_len = de->rec_len -
+ EXT2_DIR_REC_LEN(de->name_len);
+ de->rec_len = EXT2_DIR_REC_LEN(de->name_len);
+ de = de1;
+ }
+ de->inode = 0;
+ de->name_len = namelen;
+ memcpy (de->name, name, namelen);
+ /*
+ * XXX shouldn't update any times until successful
+ * completion of syscall, but too many callers depend
+ * on this.
+ *
+ * XXX similarly, too many callers depend on
+ * ext2_new_inode() setting the times, but error
+ * recovery deletes the inode, so the worst that can
+ * happen is that the times are slightly out of date
+ * and/or different from the directory change time.
+ */
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ *res_dir = de;
+ *err = 0;
+ return bh;
+ }
+ offset += de->rec_len;
+ de = (struct ext2_dir_entry *) ((char *) de + de->rec_len);
+ }
+ brelse (bh);
+ return NULL;
+}
+
+/*
+ * ext2_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+ */
+static int ext2_delete_entry (struct ext2_dir_entry * dir,
+ struct buffer_head * bh)
+{
+ struct ext2_dir_entry * de, * pde;
+ int i;
+
+ i = 0;
+ pde = NULL;
+ de = (struct ext2_dir_entry *) bh->b_data;
+ while (i < bh->b_size) {
+ if (!ext2_check_dir_entry ("ext2_delete_entry", NULL,
+ de, bh, i))
+ return -EIO;
+ if (de == dir) {
+ if (pde)
+ pde->rec_len += dir->rec_len;
+ dir->inode = 0;
+ return 0;
+ }
+ i += de->rec_len;
+ pde = de;
+ de = (struct ext2_dir_entry *) ((char *) de + de->rec_len);
+ }
+ return -ENOENT;
+}
+
+int ext2_create (struct inode * dir,const char * name, int len, int mode,
+ struct inode ** result)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de;
+ int err;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ inode = ext2_new_inode (dir, mode);
+ if (!inode) {
+ iput (dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &ext2_file_inode_operations;
+ inode->i_mode = mode;
+ inode->i_dirt = 1;
+ bh = ext2_add_entry (dir, name, len, &de, &err);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ iput (dir);
+ return err;
+ }
+ de->inode = inode->i_ino;
+ dir->i_version = ++event;
+ dcache_add(dir, de->name, de->name_len, de->inode);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+ iput (dir);
+ *result = inode;
+ return 0;
+}
+
+int ext2_mknod (struct inode * dir, const char * name, int len, int mode,
+ int rdev)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de;
+ int err;
+
+ if (!dir)
+ return -ENOENT;
+ bh = ext2_find_entry (dir, name, len, &de);
+ if (bh) {
+ brelse (bh);
+ iput (dir);
+ return -EEXIST;
+ }
+ inode = ext2_new_inode (dir, mode);
+ if (!inode) {
+ iput (dir);
+ return -ENOSPC;
+ }
+ inode->i_uid = current->fsuid;
+ inode->i_mode = mode;
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &ext2_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ext2_dir_inode_operations;
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ }
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &ext2_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (S_ISBLK(mode) || S_ISCHR(mode))
+ inode->i_rdev = rdev;
+ inode->i_dirt = 1;
+ bh = ext2_add_entry (dir, name, len, &de, &err);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ iput (dir);
+ return err;
+ }
+ de->inode = inode->i_ino;
+ dir->i_version = ++event;
+ dcache_add(dir, de->name, de->name_len, de->inode);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+ iput (dir);
+ iput (inode);
+ return 0;
+}
+
+int ext2_mkdir (struct inode * dir, const char * name, int len, int mode)
+{
+ struct inode * inode;
+ struct buffer_head * bh, * dir_block;
+ struct ext2_dir_entry * de;
+ int err;
+
+ if (!dir)
+ return -ENOENT;
+ bh = ext2_find_entry (dir, name, len, &de);
+ if (bh) {
+ brelse (bh);
+ iput (dir);
+ return -EEXIST;
+ }
+ if (dir->i_nlink >= EXT2_LINK_MAX) {
+ iput (dir);
+ return -EMLINK;
+ }
+ inode = ext2_new_inode (dir, S_IFDIR);
+ if (!inode) {
+ iput (dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &ext2_dir_inode_operations;
+ inode->i_size = inode->i_sb->s_blocksize;
+ dir_block = ext2_bread (inode, 0, 1, &err);
+ if (!dir_block) {
+ iput (dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ return err;
+ }
+ inode->i_blocks = inode->i_sb->s_blocksize / 512;
+ de = (struct ext2_dir_entry *) dir_block->b_data;
+ de->inode = inode->i_ino;
+ de->name_len = 1;
+ de->rec_len = EXT2_DIR_REC_LEN(de->name_len);
+ strcpy (de->name, ".");
+ de = (struct ext2_dir_entry *) ((char *) de + de->rec_len);
+ de->inode = dir->i_ino;
+ de->rec_len = inode->i_sb->s_blocksize - EXT2_DIR_REC_LEN(1);
+ de->name_len = 2;
+ strcpy (de->name, "..");
+ inode->i_nlink = 2;
+ mark_buffer_dirty(dir_block, 1);
+ brelse (dir_block);
+ inode->i_mode = S_IFDIR | (mode & S_IRWXUGO & ~current->fs->umask);
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ inode->i_dirt = 1;
+ bh = ext2_add_entry (dir, name, len, &de, &err);
+ if (!bh) {
+ iput (dir);
+ inode->i_nlink = 0;
+ inode->i_dirt = 1;
+ iput (inode);
+ return err;
+ }
+ de->inode = inode->i_ino;
+ dir->i_version = ++event;
+ dcache_add(dir, de->name, de->name_len, de->inode);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ dir->i_nlink++;
+ dir->i_dirt = 1;
+ iput (dir);
+ iput (inode);
+ brelse (bh);
+ return 0;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir (struct inode * inode)
+{
+ unsigned long offset;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de, * de1;
+ struct super_block * sb;
+ int err;
+
+ sb = inode->i_sb;
+ if (inode->i_size < EXT2_DIR_REC_LEN(1) + EXT2_DIR_REC_LEN(2) ||
+ !(bh = ext2_bread (inode, 0, 0, &err))) {
+ ext2_warning (inode->i_sb, "empty_dir",
+ "bad directory (dir %lu)", inode->i_ino);
+ return 1;
+ }
+ de = (struct ext2_dir_entry *) bh->b_data;
+ de1 = (struct ext2_dir_entry *) ((char *) de + de->rec_len);
+ if (de->inode != inode->i_ino || !de1->inode ||
+ strcmp (".", de->name) || strcmp ("..", de1->name)) {
+ ext2_warning (inode->i_sb, "empty_dir",
+ "bad directory (dir %lu)", inode->i_ino);
+ return 1;
+ }
+ offset = de->rec_len + de1->rec_len;
+ de = (struct ext2_dir_entry *) ((char *) de1 + de1->rec_len);
+ while (offset < inode->i_size ) {
+ if ((void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
+ brelse (bh);
+ bh = ext2_bread (inode, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, &err);
+ if (!bh) {
+ offset += sb->s_blocksize;
+ continue;
+ }
+ de = (struct ext2_dir_entry *) bh->b_data;
+ }
+ if (!ext2_check_dir_entry ("empty_dir", inode, de, bh,
+ offset)) {
+ brelse (bh);
+ return 1;
+ }
+ if (de->inode) {
+ brelse (bh);
+ return 0;
+ }
+ offset += de->rec_len;
+ de = (struct ext2_dir_entry *) ((char *) de + de->rec_len);
+ }
+ brelse (bh);
+ return 1;
+}
+
+int ext2_rmdir (struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de;
+
+repeat:
+ if (!dir)
+ return -ENOENT;
+ inode = NULL;
+ bh = ext2_find_entry (dir, name, len, &de);
+ retval = -ENOENT;
+ if (!bh)
+ goto end_rmdir;
+ retval = -EPERM;
+ if (!(inode = iget (dir->i_sb, de->inode)))
+ goto end_rmdir;
+ if (inode->i_dev != dir->i_dev)
+ goto end_rmdir;
+ if (de->inode != inode->i_ino) {
+ iput(inode);
+ brelse(bh);
+ current->counter = 0;
+ schedule();
+ goto repeat;
+ }
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_rmdir;
+ if (inode == dir) /* we may not delete ".", but "../dir" is ok */
+ goto end_rmdir;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -ENOTDIR;
+ goto end_rmdir;
+ }
+ down(&inode->i_sem);
+ if (!empty_dir (inode))
+ retval = -ENOTEMPTY;
+ else if (de->inode != inode->i_ino)
+ retval = -ENOENT;
+ else {
+ if (inode->i_count > 1) {
+ /*
+ * Are we deleting the last instance of a busy directory?
+ * Better clean up if so.
+ *
+ * Make directory empty (it will be truncated when finally
+ * dereferenced). This also inhibits ext2_add_entry.
+ */
+ inode->i_size = 0;
+ }
+ retval = ext2_delete_entry (de, bh);
+ dir->i_version = ++event;
+ }
+ up(&inode->i_sem);
+ if (retval)
+ goto end_rmdir;
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ if (inode->i_nlink != 2)
+ ext2_warning (inode->i_sb, "ext2_rmdir",
+ "empty directory has nlink!=2 (%d)",
+ inode->i_nlink);
+ inode->i_version = ++event;
+ inode->i_nlink = 0;
+ inode->i_dirt = 1;
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+end_rmdir:
+ iput (dir);
+ iput (inode);
+ brelse (bh);
+ return retval;
+}
+
+int ext2_unlink (struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct ext2_dir_entry * de;
+
+repeat:
+ if (!dir)
+ return -ENOENT;
+ retval = -ENOENT;
+ inode = NULL;
+ bh = ext2_find_entry (dir, name, len, &de);
+ if (!bh)
+ goto end_unlink;
+ if (!(inode = iget (dir->i_sb, de->inode)))
+ goto end_unlink;
+ retval = -EPERM;
+ if (S_ISDIR(inode->i_mode))
+ goto end_unlink;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ goto end_unlink;
+ if (de->inode != inode->i_ino) {
+ iput(inode);
+ brelse(bh);
+ current->counter = 0;
+ schedule();
+ goto repeat;
+ }
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_unlink;
+ if (!inode->i_nlink) {
+ ext2_warning (inode->i_sb, "ext2_unlink",
+ "Deleting nonexistent file (%lu), %d",
+ inode->i_ino, inode->i_nlink);
+ inode->i_nlink = 1;
+ }
+ retval = ext2_delete_entry (de, bh);
+ if (retval)
+ goto end_unlink;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ inode->i_ctime = dir->i_ctime;
+ retval = 0;
+end_unlink:
+ brelse (bh);
+ iput (inode);
+ iput (dir);
+ return retval;
+}
+
+int ext2_symlink (struct inode * dir, const char * name, int len,
+ const char * symname)
+{
+ struct ext2_dir_entry * de;
+ struct inode * inode = NULL;
+ struct buffer_head * bh = NULL, * name_block = NULL;
+ char * link;
+ int i, err;
+ int l;
+ char c;
+
+ if (!(inode = ext2_new_inode (dir, S_IFLNK))) {
+ iput (dir);
+ return -ENOSPC;
+ }
+ inode->i_mode = S_IFLNK | S_IRWXUGO;
+ inode->i_op = &ext2_symlink_inode_operations;
+ for (l = 0; l < inode->i_sb->s_blocksize - 1 &&
+ symname [l]; l++)
+ ;
+ if (l >= EXT2_N_BLOCKS * sizeof (unsigned long)) {
+
+ ext2_debug ("l=%d, normal symlink\n", l);
+
+ name_block = ext2_bread (inode, 0, 1, &err);
+ if (!name_block) {
+ iput (dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ return err;
+ }
+ link = name_block->b_data;
+ } else {
+ link = (char *) inode->u.ext2_i.i_data;
+
+ ext2_debug ("l=%d, fast symlink\n", l);
+
+ }
+ i = 0;
+ while (i < inode->i_sb->s_blocksize - 1 && (c = *(symname++)))
+ link[i++] = c;
+ link[i] = 0;
+ if (name_block) {
+ mark_buffer_dirty(name_block, 1);
+ brelse (name_block);
+ }
+ inode->i_size = i;
+ inode->i_dirt = 1;
+ bh = ext2_find_entry (dir, name, len, &de);
+ if (bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ brelse (bh);
+ iput (dir);
+ return -EEXIST;
+ }
+ bh = ext2_add_entry (dir, name, len, &de, &err);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput (inode);
+ iput (dir);
+ return err;
+ }
+ de->inode = inode->i_ino;
+ dir->i_version = ++event;
+ dcache_add(dir, de->name, de->name_len, de->inode);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+ iput (dir);
+ iput (inode);
+ return 0;
+}
+
+int ext2_link (struct inode * oldinode, struct inode * dir,
+ const char * name, int len)
+{
+ struct ext2_dir_entry * de;
+ struct buffer_head * bh;
+ int err;
+
+ if (S_ISDIR(oldinode->i_mode)) {
+ iput (oldinode);
+ iput (dir);
+ return -EPERM;
+ }
+ if (IS_APPEND(oldinode) || IS_IMMUTABLE(oldinode)) {
+ iput (oldinode);
+ iput (dir);
+ return -EPERM;
+ }
+ if (oldinode->i_nlink >= EXT2_LINK_MAX) {
+ iput (oldinode);
+ iput (dir);
+ return -EMLINK;
+ }
+ bh = ext2_find_entry (dir, name, len, &de);
+ if (bh) {
+ brelse (bh);
+ iput (dir);
+ iput (oldinode);
+ return -EEXIST;
+ }
+ bh = ext2_add_entry (dir, name, len, &de, &err);
+ if (!bh) {
+ iput (dir);
+ iput (oldinode);
+ return err;
+ }
+ de->inode = oldinode->i_ino;
+ dir->i_version = ++event;
+ dcache_add(dir, de->name, de->name_len, de->inode);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+ iput (dir);
+ oldinode->i_nlink++;
+ oldinode->i_ctime = CURRENT_TIME;
+ oldinode->i_dirt = 1;
+ iput (oldinode);
+ return 0;
+}
+
+static int subdir (struct inode * new_inode, struct inode * old_inode)
+{
+ int ino;
+ int result;
+
+ new_inode->i_count++;
+ result = 0;
+ for (;;) {
+ if (new_inode == old_inode) {
+ result = 1;
+ break;
+ }
+ if (new_inode->i_dev != old_inode->i_dev)
+ break;
+ ino = new_inode->i_ino;
+ if (ext2_lookup (new_inode, "..", 2, &new_inode))
+ break;
+ if (new_inode->i_ino == ino)
+ break;
+ }
+ iput (new_inode);
+ return result;
+}
+
+#define PARENT_INO(buffer) \
+ ((struct ext2_dir_entry *) ((char *) buffer + \
+ ((struct ext2_dir_entry *) buffer)->rec_len))->inode
+
+#define PARENT_NAME(buffer) \
+ ((struct ext2_dir_entry *) ((char *) buffer + \
+ ((struct ext2_dir_entry *) buffer)->rec_len))->name
+
+/*
+ * rename uses retrying to avoid race-conditions: at least they should be
+ * minimal.
+ * it tries to allocate all the blocks, then sanity-checks, and if the sanity-
+ * checks fail, it tries to restart itself again. Very practical - no changes
+ * are done until we know everything works ok.. and then all the changes can be
+ * done in one fell swoop when we have claimed all the buffers needed.
+ *
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int do_ext2_rename (struct inode * old_dir, const char * old_name,
+ int old_len, struct inode * new_dir,
+ const char * new_name, int new_len)
+{
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct ext2_dir_entry * old_de, * new_de;
+ int retval;
+
+ goto start_up;
+try_again:
+ if (new_bh && new_de) {
+ ext2_delete_entry(new_de, new_bh);
+ new_dir->i_version = ++event;
+ }
+ brelse (old_bh);
+ brelse (new_bh);
+ brelse (dir_bh);
+ iput (old_inode);
+ iput (new_inode);
+ current->counter = 0;
+ schedule ();
+start_up:
+ old_inode = new_inode = NULL;
+ old_bh = new_bh = dir_bh = NULL;
+ new_de = NULL;
+ old_bh = ext2_find_entry (old_dir, old_name, old_len, &old_de);
+ retval = -ENOENT;
+ if (!old_bh)
+ goto end_rename;
+ old_inode = __iget (old_dir->i_sb, old_de->inode, 0); /* don't cross mnt-points */
+ if (!old_inode)
+ goto end_rename;
+ retval = -EPERM;
+ if ((old_dir->i_mode & S_ISVTX) &&
+ current->fsuid != old_inode->i_uid &&
+ current->fsuid != old_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (IS_APPEND(old_inode) || IS_IMMUTABLE(old_inode))
+ goto end_rename;
+ new_bh = ext2_find_entry (new_dir, new_name, new_len, &new_de);
+ if (new_bh) {
+ new_inode = __iget (new_dir->i_sb, new_de->inode, 0); /* no mntp cross */
+ if (!new_inode) {
+ brelse (new_bh);
+ new_bh = NULL;
+ }
+ }
+ if (new_inode == old_inode) {
+ retval = 0;
+ goto end_rename;
+ }
+ if (new_inode && S_ISDIR(new_inode->i_mode)) {
+ retval = -EISDIR;
+ if (!S_ISDIR(old_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir (new_dir, old_inode))
+ goto end_rename;
+ retval = -ENOTEMPTY;
+ if (!empty_dir (new_inode))
+ goto end_rename;
+ retval = -EBUSY;
+ if (new_inode->i_count > 1)
+ goto end_rename;
+ }
+ retval = -EPERM;
+ if (new_inode && (new_dir->i_mode & S_ISVTX) &&
+ current->fsuid != new_inode->i_uid &&
+ current->fsuid != new_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (S_ISDIR(old_inode->i_mode)) {
+ retval = -ENOTDIR;
+ if (new_inode && !S_ISDIR(new_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir (new_dir, old_inode))
+ goto end_rename;
+ dir_bh = ext2_bread (old_inode, 0, 0, &retval);
+ if (!dir_bh)
+ goto end_rename;
+ if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+ if (!new_inode && new_dir->i_nlink >= EXT2_LINK_MAX)
+ goto end_rename;
+ }
+ if (!new_bh)
+ new_bh = ext2_add_entry (new_dir, new_name, new_len, &new_de,
+ &retval);
+ if (!new_bh)
+ goto end_rename;
+ new_dir->i_version = ++event;
+ /*
+ * sanity checking before doing the rename - avoid races
+ */
+ if (new_inode && (new_de->inode != new_inode->i_ino))
+ goto try_again;
+ if (new_de->inode && !new_inode)
+ goto try_again;
+ if (old_de->inode != old_inode->i_ino)
+ goto try_again;
+ /*
+ * ok, that's it
+ */
+ new_de->inode = old_inode->i_ino;
+ dcache_add(new_dir, new_de->name, new_de->name_len, new_de->inode);
+ retval = ext2_delete_entry (old_de, old_bh);
+ if (retval == -ENOENT)
+ goto try_again;
+ if (retval)
+ goto end_rename;
+ old_dir->i_version = ++event;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_ctime = CURRENT_TIME;
+ new_inode->i_dirt = 1;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+ old_dir->i_dirt = 1;
+ if (dir_bh) {
+ PARENT_INO(dir_bh->b_data) = new_dir->i_ino;
+ dcache_add(old_inode, "..", 2, new_dir->i_ino);
+ mark_buffer_dirty(dir_bh, 1);
+ old_dir->i_nlink--;
+ old_dir->i_dirt = 1;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_dirt = 1;
+ } else {
+ new_dir->i_nlink++;
+ new_dir->i_dirt = 1;
+ }
+ }
+ mark_buffer_dirty(old_bh, 1);
+ if (IS_SYNC(old_dir)) {
+ ll_rw_block (WRITE, 1, &old_bh);
+ wait_on_buffer (old_bh);
+ }
+ mark_buffer_dirty(new_bh, 1);
+ if (IS_SYNC(new_dir)) {
+ ll_rw_block (WRITE, 1, &new_bh);
+ wait_on_buffer (new_bh);
+ }
+ retval = 0;
+end_rename:
+ brelse (dir_bh);
+ brelse (old_bh);
+ brelse (new_bh);
+ iput (old_inode);
+ iput (new_inode);
+ iput (old_dir);
+ iput (new_dir);
+ return retval;
+}
+
+/*
+ * Ok, rename also locks out other renames, as they can change the parent of
+ * a directory, and we don't want any races. Other races are checked for by
+ * "do_rename()", which restarts if there are inconsistencies.
+ *
+ * Note that there is no race between different filesystems: it's only within
+ * the same device that races occur: many renames can happen at once, as long
+ * as they are on different partitions.
+ *
+ * In the second extended file system, we use a lock flag stored in the memory
+ * super-block. This way, we really lock other renames only if they occur
+ * on the same file system
+ */
+int ext2_rename (struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ int result;
+
+ while (old_dir->i_sb->u.ext2_sb.s_rename_lock)
+ sleep_on (&old_dir->i_sb->u.ext2_sb.s_rename_wait);
+ old_dir->i_sb->u.ext2_sb.s_rename_lock = 1;
+ result = do_ext2_rename (old_dir, old_name, old_len, new_dir,
+ new_name, new_len);
+ old_dir->i_sb->u.ext2_sb.s_rename_lock = 0;
+ wake_up (&old_dir->i_sb->u.ext2_sb.s_rename_wait);
+ return result;
+}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
new file mode 100644
index 000000000..37fae41ad
--- /dev/null
+++ b/fs/ext2/super.c
@@ -0,0 +1,755 @@
+/*
+ * linux/fs/ext2/super.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <stdarg.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/malloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+void ext2_error (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+{
+ char buf[1024];
+ va_list args;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
+ sb->u.ext2_sb.s_es->s_state |= EXT2_ERROR_FS;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ }
+ va_start (args, fmt);
+ vsprintf (buf, fmt, args);
+ va_end (args);
+ if (test_opt (sb, ERRORS_PANIC) ||
+ (sb->u.ext2_sb.s_es->s_errors == EXT2_ERRORS_PANIC &&
+ !test_opt (sb, ERRORS_CONT) && !test_opt (sb, ERRORS_RO)))
+ panic ("EXT2-fs panic (device %d/%d): %s: %s\n",
+ MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf);
+ printk (KERN_CRIT "EXT2-fs error (device %d/%d): %s: %s\n",
+ MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf);
+ if (test_opt (sb, ERRORS_RO) ||
+ (sb->u.ext2_sb.s_es->s_errors == EXT2_ERRORS_RO &&
+ !test_opt (sb, ERRORS_CONT) && !test_opt (sb, ERRORS_PANIC))) {
+ printk ("Remounting filesystem read-only\n");
+ sb->s_flags |= MS_RDONLY;
+ }
+}
+
+NORET_TYPE void ext2_panic (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+{
+ char buf[1024];
+ va_list args;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
+ sb->u.ext2_sb.s_es->s_state |= EXT2_ERROR_FS;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ }
+ va_start (args, fmt);
+ vsprintf (buf, fmt, args);
+ va_end (args);
+ panic ("EXT2-fs panic (device %d/%d): %s: %s\n",
+ MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf);
+}
+
+void ext2_warning (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+{
+ char buf[1024];
+ va_list args;
+
+ va_start (args, fmt);
+ vsprintf (buf, fmt, args);
+ va_end (args);
+ printk (KERN_WARNING "EXT2-fs warning (device %d/%d): %s: %s\n",
+ MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf);
+}
+
+void ext2_put_super (struct super_block * sb)
+{
+ int db_count;
+ int i;
+
+ lock_super (sb);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sb->u.ext2_sb.s_es->s_state = sb->u.ext2_sb.s_mount_state;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ }
+ sb->s_dev = 0;
+ db_count = sb->u.ext2_sb.s_db_per_group;
+ for (i = 0; i < db_count; i++)
+ if (sb->u.ext2_sb.s_group_desc[i])
+ brelse (sb->u.ext2_sb.s_group_desc[i]);
+ kfree_s (sb->u.ext2_sb.s_group_desc,
+ db_count * sizeof (struct buffer_head *));
+ for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
+ if (sb->u.ext2_sb.s_inode_bitmap[i])
+ brelse (sb->u.ext2_sb.s_inode_bitmap[i]);
+ for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
+ if (sb->u.ext2_sb.s_block_bitmap[i])
+ brelse (sb->u.ext2_sb.s_block_bitmap[i]);
+ brelse (sb->u.ext2_sb.s_sbh);
+ unlock_super (sb);
+ return;
+}
+
+static struct super_operations ext2_sops = {
+ ext2_read_inode,
+ NULL,
+ ext2_write_inode,
+ ext2_put_inode,
+ ext2_put_super,
+ ext2_write_super,
+ ext2_statfs,
+ ext2_remount
+};
+
+#ifdef EXT2FS_PRE_02B_COMPAT
+
+static int convert_pre_02b_fs (struct super_block * sb,
+ struct buffer_head * bh)
+{
+ struct ext2_super_block * es;
+ struct ext2_old_group_desc old_group_desc [BLOCK_SIZE / sizeof (struct ext2_old_group_desc)];
+ struct ext2_group_desc * gdp;
+ struct buffer_head * bh2;
+ int groups_count;
+ int i;
+
+ es = (struct ext2_super_block *) bh->b_data;
+ bh2 = bread (sb->s_dev, 2, BLOCK_SIZE);
+ if (!bh2) {
+ printk ("Cannot read descriptor blocks while converting !\n");
+ return 0;
+ }
+ memcpy (old_group_desc, bh2->b_data, BLOCK_SIZE);
+ groups_count = (sb->u.ext2_sb.s_blocks_count -
+ sb->u.ext2_sb.s_first_data_block +
+ (EXT2_BLOCK_SIZE(sb) * 8) - 1) /
+ (EXT2_BLOCK_SIZE(sb) * 8);
+ memset (bh2->b_data, 0, BLOCK_SIZE);
+ gdp = (struct ext2_group_desc *) bh2->b_data;
+ for (i = 0; i < groups_count; i++) {
+ gdp[i].bg_block_bitmap = old_group_desc[i].bg_block_bitmap;
+ gdp[i].bg_inode_bitmap = old_group_desc[i].bg_inode_bitmap;
+ gdp[i].bg_inode_table = old_group_desc[i].bg_inode_table;
+ gdp[i].bg_free_blocks_count = old_group_desc[i].bg_free_blocks_count;
+ gdp[i].bg_free_inodes_count = old_group_desc[i].bg_free_inodes_count;
+ }
+ mark_buffer_dirty(bh2, 1);
+ brelse (bh2);
+ es->s_magic = EXT2_SUPER_MAGIC;
+ mark_buffer_dirty(bh, 1);
+ sb->s_magic = EXT2_SUPER_MAGIC;
+ return 1;
+}
+
+#endif
+
+/*
+ * This function has been shamelessly adapted from the msdos fs
+ */
+static int parse_options (char * options, unsigned long * sb_block,
+ unsigned short *resuid, unsigned short * resgid,
+ unsigned long * mount_options)
+{
+ char * this_char;
+ char * value;
+
+ if (!options)
+ return 1;
+ for (this_char = strtok (options, ",");
+ this_char != NULL;
+ this_char = strtok (NULL, ",")) {
+ if ((value = strchr (this_char, '=')) != NULL)
+ *value++ = 0;
+ if (!strcmp (this_char, "bsddf"))
+ clear_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "check")) {
+ if (!value || !*value)
+ set_opt (*mount_options, CHECK_NORMAL);
+ else if (!strcmp (value, "none")) {
+ clear_opt (*mount_options, CHECK_NORMAL);
+ clear_opt (*mount_options, CHECK_STRICT);
+ }
+ else if (strcmp (value, "normal"))
+ set_opt (*mount_options, CHECK_NORMAL);
+ else if (strcmp (value, "strict")) {
+ set_opt (*mount_options, CHECK_NORMAL);
+ set_opt (*mount_options, CHECK_STRICT);
+ }
+ else {
+ printk ("EXT2-fs: Invalid check option: %s\n",
+ value);
+ return 0;
+ }
+ }
+ else if (!strcmp (this_char, "debug"))
+ set_opt (*mount_options, DEBUG);
+ else if (!strcmp (this_char, "errors")) {
+ if (!value || !*value) {
+ printk ("EXT2-fs: the errors option requires "
+ "an argument");
+ return 0;
+ }
+ if (!strcmp (value, "continue")) {
+ clear_opt (*mount_options, ERRORS_RO);
+ clear_opt (*mount_options, ERRORS_PANIC);
+ set_opt (*mount_options, ERRORS_CONT);
+ }
+ else if (!strcmp (value, "remount-ro")) {
+ clear_opt (*mount_options, ERRORS_CONT);
+ clear_opt (*mount_options, ERRORS_PANIC);
+ set_opt (*mount_options, ERRORS_RO);
+ }
+ else if (!strcmp (value, "panic")) {
+ clear_opt (*mount_options, ERRORS_CONT);
+ clear_opt (*mount_options, ERRORS_RO);
+ set_opt (*mount_options, ERRORS_PANIC);
+ }
+ else {
+ printk ("EXT2-fs: Invalid errors option: %s\n",
+ value);
+ return 0;
+ }
+ }
+ else if (!strcmp (this_char, "grpid") ||
+ !strcmp (this_char, "bsdgroups"))
+ set_opt (*mount_options, GRPID);
+ else if (!strcmp (this_char, "minixdf"))
+ set_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "nocheck")) {
+ clear_opt (*mount_options, CHECK_NORMAL);
+ clear_opt (*mount_options, CHECK_STRICT);
+ }
+ else if (!strcmp (this_char, "nogrpid") ||
+ !strcmp (this_char, "sysvgroups"))
+ clear_opt (*mount_options, GRPID);
+ else if (!strcmp (this_char, "resgid")) {
+ if (!value || !*value) {
+ printk ("EXT2-fs: the resgid option requires "
+ "an argument");
+ return 0;
+ }
+ *resgid = simple_strtoul (value, &value, 0);
+ if (*value) {
+ printk ("EXT2-fs: Invalid resgid option: %s\n",
+ value);
+ return 0;
+ }
+ }
+ else if (!strcmp (this_char, "resuid")) {
+ if (!value || !*value) {
+ printk ("EXT2-fs: the resuid option requires "
+ "an argument");
+ return 0;
+ }
+ *resuid = simple_strtoul (value, &value, 0);
+ if (*value) {
+ printk ("EXT2-fs: Invalid resuid option: %s\n",
+ value);
+ return 0;
+ }
+ }
+ else if (!strcmp (this_char, "sb")) {
+ if (!value || !*value) {
+ printk ("EXT2-fs: the sb option requires "
+ "an argument");
+ return 0;
+ }
+ *sb_block = simple_strtoul (value, &value, 0);
+ if (*value) {
+ printk ("EXT2-fs: Invalid sb option: %s\n",
+ value);
+ return 0;
+ }
+ }
+ else {
+ printk ("EXT2-fs: Unrecognized mount option %s\n", this_char);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void ext2_setup_super (struct super_block * sb,
+ struct ext2_super_block * es)
+{
+ if (es->s_rev_level > EXT2_CURRENT_REV) {
+ printk ("EXT2-fs warning: revision level too high, "
+ "forcing read/only mode\n");
+ sb->s_flags |= MS_RDONLY;
+ }
+ if (!(sb->s_flags & MS_RDONLY)) {
+ if (!(sb->u.ext2_sb.s_mount_state & EXT2_VALID_FS))
+ printk ("EXT2-fs warning: mounting unchecked fs, "
+ "running e2fsck is recommended\n");
+ else if ((sb->u.ext2_sb.s_mount_state & EXT2_ERROR_FS))
+ printk ("EXT2-fs warning: mounting fs with errors, "
+ "running e2fsck is recommended\n");
+ else if (es->s_max_mnt_count >= 0 &&
+ es->s_mnt_count >= (unsigned short) es->s_max_mnt_count)
+ printk ("EXT2-fs warning: maximal mount count reached, "
+ "running e2fsck is recommended\n");
+ else if (es->s_checkinterval &&
+ (es->s_lastcheck + es->s_checkinterval <= CURRENT_TIME))
+ printk ("EXT2-fs warning: checktime reached, "
+ "running e2fsck is recommended\n");
+ es->s_state &= ~EXT2_VALID_FS;
+ if (!es->s_max_mnt_count)
+ es->s_max_mnt_count = EXT2_DFL_MAX_MNT_COUNT;
+ es->s_mnt_count++;
+ es->s_mtime = CURRENT_TIME;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ if (test_opt (sb, DEBUG))
+ printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+ EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
+ sb->u.ext2_sb.s_frag_size,
+ sb->u.ext2_sb.s_groups_count,
+ EXT2_BLOCKS_PER_GROUP(sb),
+ EXT2_INODES_PER_GROUP(sb),
+ sb->u.ext2_sb.s_mount_opt);
+ if (test_opt (sb, CHECK)) {
+ ext2_check_blocks_bitmap (sb);
+ ext2_check_inodes_bitmap (sb);
+ }
+ }
+}
+
+static int ext2_check_descriptors (struct super_block * sb)
+{
+ int i;
+ int desc_block = 0;
+ unsigned long block = sb->u.ext2_sb.s_es->s_first_data_block;
+ struct ext2_group_desc * gdp = NULL;
+
+ ext2_debug ("Checking group descriptors");
+
+ for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++)
+ {
+ if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
+ gdp = (struct ext2_group_desc *) sb->u.ext2_sb.s_group_desc[desc_block++]->b_data;
+ if (gdp->bg_block_bitmap < block ||
+ gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Block bitmap for group %d"
+ " not in group (block %lu)!",
+ i, gdp->bg_block_bitmap);
+ return 0;
+ }
+ if (gdp->bg_inode_bitmap < block ||
+ gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Inode bitmap for group %d"
+ " not in group (block %lu)!",
+ i, gdp->bg_inode_bitmap);
+ return 0;
+ }
+ if (gdp->bg_inode_table < block ||
+ gdp->bg_inode_table + sb->u.ext2_sb.s_itb_per_group >=
+ block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Inode table for group %d"
+ " not in group (block %lu)!",
+ i, gdp->bg_inode_table);
+ return 0;
+ }
+ block += EXT2_BLOCKS_PER_GROUP(sb);
+ gdp++;
+ }
+ return 1;
+}
+
+struct super_block * ext2_read_super (struct super_block * sb, void * data,
+ int silent)
+{
+ struct buffer_head * bh;
+ struct ext2_super_block * es;
+ unsigned long sb_block = 1;
+ unsigned short resuid = EXT2_DEF_RESUID;
+ unsigned short resgid = EXT2_DEF_RESGID;
+ unsigned long logic_sb_block = 1;
+ int dev = sb->s_dev;
+ int db_count;
+ int i, j;
+#ifdef EXT2FS_PRE_02B_COMPAT
+ int fs_converted = 0;
+#endif
+
+ set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL);
+ if (!parse_options ((char *) data, &sb_block, &resuid, &resgid,
+ &sb->u.ext2_sb.s_mount_opt)) {
+ sb->s_dev = 0;
+ return NULL;
+ }
+
+ lock_super (sb);
+ set_blocksize (dev, BLOCK_SIZE);
+ if (!(bh = bread (dev, sb_block, BLOCK_SIZE))) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ printk ("EXT2-fs: unable to read superblock\n");
+ return NULL;
+ }
+ /*
+ * Note: s_es must be initialized s_es as soon as possible because
+ * some ext2 macro-instructions depend on its value
+ */
+ es = (struct ext2_super_block *) bh->b_data;
+ sb->u.ext2_sb.s_es = es;
+ sb->s_magic = es->s_magic;
+ if (sb->s_magic != EXT2_SUPER_MAGIC
+#ifdef EXT2FS_PRE_02B_COMPAT
+ && sb->s_magic != EXT2_PRE_02B_MAGIC
+#endif
+ ) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ if (!silent)
+ printk ("VFS: Can't find an ext2 filesystem on dev %d/%d.\n",
+ MAJOR(dev), MINOR(dev));
+ return NULL;
+ }
+ sb->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
+ sb->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(sb);
+ if (sb->s_blocksize != BLOCK_SIZE &&
+ (sb->s_blocksize == 1024 || sb->s_blocksize == 2048 ||
+ sb->s_blocksize == 4096)) {
+ unsigned long offset;
+
+ brelse (bh);
+ set_blocksize (dev, sb->s_blocksize);
+ logic_sb_block = (sb_block*BLOCK_SIZE) / sb->s_blocksize;
+ offset = (sb_block*BLOCK_SIZE) % sb->s_blocksize;
+ bh = bread (dev, logic_sb_block, sb->s_blocksize);
+ if(!bh)
+ return NULL;
+ es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ sb->u.ext2_sb.s_es = es;
+ if (es->s_magic != EXT2_SUPER_MAGIC) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: Magic mismatch, very weird !\n");
+ return NULL;
+ }
+ }
+ sb->u.ext2_sb.s_frag_size = EXT2_MIN_FRAG_SIZE <<
+ es->s_log_frag_size;
+ if (sb->u.ext2_sb.s_frag_size)
+ sb->u.ext2_sb.s_frags_per_block = sb->s_blocksize /
+ sb->u.ext2_sb.s_frag_size;
+ else
+ sb->s_magic = 0;
+ sb->u.ext2_sb.s_blocks_per_group = es->s_blocks_per_group;
+ sb->u.ext2_sb.s_frags_per_group = es->s_frags_per_group;
+ sb->u.ext2_sb.s_inodes_per_group = es->s_inodes_per_group;
+ sb->u.ext2_sb.s_inodes_per_block = sb->s_blocksize /
+ sizeof (struct ext2_inode);
+ sb->u.ext2_sb.s_itb_per_group = sb->u.ext2_sb.s_inodes_per_group /
+ sb->u.ext2_sb.s_inodes_per_block;
+ sb->u.ext2_sb.s_desc_per_block = sb->s_blocksize /
+ sizeof (struct ext2_group_desc);
+ sb->u.ext2_sb.s_sbh = bh;
+ sb->u.ext2_sb.s_es = es;
+ if (resuid != EXT2_DEF_RESUID)
+ sb->u.ext2_sb.s_resuid = resuid;
+ else
+ sb->u.ext2_sb.s_resuid = es->s_def_resuid;
+ if (resgid != EXT2_DEF_RESGID)
+ sb->u.ext2_sb.s_resgid = resgid;
+ else
+ sb->u.ext2_sb.s_resgid = es->s_def_resgid;
+ sb->u.ext2_sb.s_mount_state = es->s_state;
+ sb->u.ext2_sb.s_rename_lock = 0;
+ sb->u.ext2_sb.s_rename_wait = NULL;
+#ifdef EXT2FS_PRE_02B_COMPAT
+ if (sb->s_magic == EXT2_PRE_02B_MAGIC) {
+ if (es->s_blocks_count > 262144) {
+ /*
+ * fs > 256 MB can't be converted
+ */
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: trying to mount a pre-0.2b file"
+ "system which cannot be converted\n");
+ return NULL;
+ }
+ printk ("EXT2-fs: mounting a pre 0.2b file system, "
+ "will try to convert the structure\n");
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: cannot convert a read-only fs\n");
+ return NULL;
+ }
+ if (!convert_pre_02b_fs (sb, bh)) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: conversion failed !!!\n");
+ return NULL;
+ }
+ printk ("EXT2-fs: conversion succeeded !!!\n");
+ fs_converted = 1;
+ }
+#endif
+ if (sb->s_magic != EXT2_SUPER_MAGIC) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ if (!silent)
+ printk ("VFS: Can't find an ext2 filesystem on dev %d/%d.\n",
+ MAJOR(dev), MINOR(dev));
+ return NULL;
+ }
+ if (sb->s_blocksize != bh->b_size) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ if (!silent)
+ printk ("VFS: Unsupported blocksize on dev 0x%04x.\n",
+ dev);
+ return NULL;
+ }
+
+ if (sb->s_blocksize != sb->u.ext2_sb.s_frag_size) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
+ sb->u.ext2_sb.s_frag_size, sb->s_blocksize);
+ return NULL;
+ }
+
+ sb->u.ext2_sb.s_groups_count = (es->s_blocks_count -
+ es->s_first_data_block +
+ EXT2_BLOCKS_PER_GROUP(sb) - 1) /
+ EXT2_BLOCKS_PER_GROUP(sb);
+ db_count = (sb->u.ext2_sb.s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
+ EXT2_DESC_PER_BLOCK(sb);
+ sb->u.ext2_sb.s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
+ if (sb->u.ext2_sb.s_group_desc == NULL) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ brelse (bh);
+ printk ("EXT2-fs: not enough memory\n");
+ return NULL;
+ }
+ for (i = 0; i < db_count; i++) {
+ sb->u.ext2_sb.s_group_desc[i] = bread (dev, logic_sb_block + i + 1,
+ sb->s_blocksize);
+ if (!sb->u.ext2_sb.s_group_desc[i]) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ for (j = 0; j < i; j++)
+ brelse (sb->u.ext2_sb.s_group_desc[j]);
+ kfree_s (sb->u.ext2_sb.s_group_desc,
+ db_count * sizeof (struct buffer_head *));
+ brelse (bh);
+ printk ("EXT2-fs: unable to read group descriptors\n");
+ return NULL;
+ }
+ }
+ if (!ext2_check_descriptors (sb)) {
+ sb->s_dev = 0;
+ unlock_super (sb);
+ for (j = 0; j < db_count; j++)
+ brelse (sb->u.ext2_sb.s_group_desc[j]);
+ kfree_s (sb->u.ext2_sb.s_group_desc,
+ db_count * sizeof (struct buffer_head *));
+ brelse (bh);
+ printk ("EXT2-fs: group descriptors corrupted !\n");
+ return NULL;
+ }
+ for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
+ sb->u.ext2_sb.s_inode_bitmap_number[i] = 0;
+ sb->u.ext2_sb.s_inode_bitmap[i] = NULL;
+ sb->u.ext2_sb.s_block_bitmap_number[i] = 0;
+ sb->u.ext2_sb.s_block_bitmap[i] = NULL;
+ }
+ sb->u.ext2_sb.s_loaded_inode_bitmaps = 0;
+ sb->u.ext2_sb.s_loaded_block_bitmaps = 0;
+ sb->u.ext2_sb.s_db_per_group = db_count;
+ unlock_super (sb);
+ /*
+ * set up enough so that it can read an inode
+ */
+ sb->s_dev = dev;
+ sb->s_op = &ext2_sops;
+ if (!(sb->s_mounted = iget (sb, EXT2_ROOT_INO))) {
+ sb->s_dev = 0;
+ for (i = 0; i < db_count; i++)
+ if (sb->u.ext2_sb.s_group_desc[i])
+ brelse (sb->u.ext2_sb.s_group_desc[i]);
+ kfree_s (sb->u.ext2_sb.s_group_desc,
+ db_count * sizeof (struct buffer_head *));
+ brelse (bh);
+ printk ("EXT2-fs: get root inode failed\n");
+ return NULL;
+ }
+#ifdef EXT2FS_PRE_02B_COMPAT
+ if (fs_converted) {
+ for (i = 0; i < db_count; i++)
+ mark_buffer_dirty(sb->u.ext2_sb.s_group_desc[i], 1);
+ sb->s_dirt = 1;
+ }
+#endif
+ ext2_setup_super (sb, es);
+ return sb;
+}
+
+static void ext2_commit_super (struct super_block * sb,
+ struct ext2_super_block * es)
+{
+ es->s_wtime = CURRENT_TIME;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 0;
+}
+
+/*
+ * In the second extended file system, it is not necessary to
+ * write the super block since we use a mapping of the
+ * disk super block in a buffer.
+ *
+ * However, this function is still used to set the fs valid
+ * flags to 0. We need to set this flag to 0 since the fs
+ * may have been checked while mounted and e2fsck may have
+ * set s_state to EXT2_VALID_FS after some corrections.
+ */
+
+void ext2_write_super (struct super_block * sb)
+{
+ struct ext2_super_block * es;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ es = sb->u.ext2_sb.s_es;
+
+ ext2_debug ("setting valid to 0\n");
+
+ if (es->s_state & EXT2_VALID_FS) {
+ es->s_state &= ~EXT2_VALID_FS;
+ es->s_mtime = CURRENT_TIME;
+ }
+ ext2_commit_super (sb, es);
+ }
+ sb->s_dirt = 0;
+}
+
+int ext2_remount (struct super_block * sb, int * flags, char * data)
+{
+ struct ext2_super_block * es;
+ unsigned short resuid = sb->u.ext2_sb.s_resuid;
+ unsigned short resgid = sb->u.ext2_sb.s_resgid;
+ unsigned long new_mount_opt;
+ unsigned long tmp;
+
+ /*
+ * Allow the "check" option to be passed as a remount option.
+ */
+ set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL);
+ if (!parse_options (data, &tmp, &resuid, &resgid,
+ &new_mount_opt))
+ return -EINVAL;
+
+ sb->u.ext2_sb.s_mount_opt = new_mount_opt;
+ sb->u.ext2_sb.s_resuid = resuid;
+ sb->u.ext2_sb.s_resgid = resgid;
+ es = sb->u.ext2_sb.s_es;
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ return 0;
+ if (*flags & MS_RDONLY) {
+ if (es->s_state & EXT2_VALID_FS ||
+ !(sb->u.ext2_sb.s_mount_state & EXT2_VALID_FS))
+ return 0;
+ /*
+ * OK, we are remounting a valid rw partition rdonly, so set
+ * the rdonly flag and then mark the partition as valid again.
+ */
+ es->s_state = sb->u.ext2_sb.s_mount_state;
+ es->s_mtime = CURRENT_TIME;
+ mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ ext2_commit_super (sb, es);
+ }
+ else {
+ /*
+ * Mounting a RDONLY partition read-write, so reread and
+ * store the current valid flag. (It may have been changed
+ * by e2fsck since we originally mounted the partition.)
+ */
+ sb->u.ext2_sb.s_mount_state = es->s_state;
+ sb->s_flags &= ~MS_RDONLY;
+ ext2_setup_super (sb, es);
+ }
+ return 0;
+}
+
+void ext2_statfs (struct super_block * sb, struct statfs * buf)
+{
+ long tmp;
+ unsigned long overhead;
+ unsigned long overhead_per_group;
+
+ if (test_opt (sb, MINIX_DF))
+ overhead = 0;
+ else {
+ /*
+ * Compute the overhead (FS structures)
+ */
+ overhead_per_group = 1 /* super block */ +
+ sb->u.ext2_sb.s_db_per_group /* descriptors */ +
+ 1 /* block bitmap */ +
+ 1 /* inode bitmap */ +
+ sb->u.ext2_sb.s_itb_per_group /* inode table */;
+ overhead = sb->u.ext2_sb.s_es->s_first_data_block +
+ sb->u.ext2_sb.s_groups_count * overhead_per_group;
+ }
+
+ put_fs_long (EXT2_SUPER_MAGIC, &buf->f_type);
+ put_fs_long (sb->s_blocksize, &buf->f_bsize);
+ put_fs_long (sb->u.ext2_sb.s_es->s_blocks_count - overhead,
+ &buf->f_blocks);
+ tmp = ext2_count_free_blocks (sb);
+ put_fs_long (tmp, &buf->f_bfree);
+ if (tmp >= sb->u.ext2_sb.s_es->s_r_blocks_count)
+ put_fs_long (tmp - sb->u.ext2_sb.s_es->s_r_blocks_count,
+ &buf->f_bavail);
+ else
+ put_fs_long (0, &buf->f_bavail);
+ put_fs_long (sb->u.ext2_sb.s_es->s_inodes_count, &buf->f_files);
+ put_fs_long (ext2_count_free_inodes (sb), &buf->f_ffree);
+ put_fs_long (EXT2_NAME_LEN, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */
+}
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
new file mode 100644
index 000000000..7d85ed74c
--- /dev/null
+++ b/fs/ext2/symlink.c
@@ -0,0 +1,127 @@
+/*
+ * linux/fs/ext2/symlink.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/symlink.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 symlink handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+
+static int ext2_readlink (struct inode *, char *, int);
+static int ext2_follow_link (struct inode *, struct inode *, int, int,
+ struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations ext2_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ ext2_readlink, /* readlink */
+ ext2_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL, /* permission */
+ NULL /* smap */
+};
+
+static int ext2_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ struct buffer_head * bh = NULL;
+ char * link;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput (dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput (dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (current->link_count > 5) {
+ iput (dir);
+ iput (inode);
+ return -ELOOP;
+ }
+ if (inode->i_blocks) {
+ if (!(bh = ext2_bread (inode, 0, 0, &error))) {
+ iput (dir);
+ iput (inode);
+ return -EIO;
+ }
+ link = bh->b_data;
+ } else
+ link = (char *) inode->u.ext2_i.i_data;
+ current->link_count++;
+ error = open_namei (link, flag, mode, res_inode, dir);
+ current->link_count--;
+ iput (inode);
+ if (bh)
+ brelse (bh);
+ return error;
+}
+
+static int ext2_readlink (struct inode * inode, char * buffer, int buflen)
+{
+ struct buffer_head * bh = NULL;
+ char * link;
+ int i, err;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput (inode);
+ return -EINVAL;
+ }
+ if (buflen > inode->i_sb->s_blocksize - 1)
+ buflen = inode->i_sb->s_blocksize - 1;
+ if (inode->i_blocks) {
+ bh = ext2_bread (inode, 0, 0, &err);
+ if (!bh) {
+ iput (inode);
+ return 0;
+ }
+ link = bh->b_data;
+ }
+ else
+ link = (char *) inode->u.ext2_i.i_data;
+ i = 0;
+ while (i < buflen && (c = link[i])) {
+ i++;
+ put_fs_byte (c, buffer++);
+ }
+ iput (inode);
+ if (bh)
+ brelse (bh);
+ return i;
+}
diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c
new file mode 100644
index 000000000..10a1fd236
--- /dev/null
+++ b/fs/ext2/truncate.c
@@ -0,0 +1,349 @@
+/*
+ * linux/fs/ext2/truncate.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/truncate.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * Real random numbers for secure rm added 94/02/18
+ * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/string.h>
+
+static int ext2_secrm_seed = 152; /* Random generator base */
+
+#define RANDOM_INT (ext2_secrm_seed = ext2_secrm_seed * 69069l +1)
+
+/*
+ * Truncate has the most races in the whole filesystem: coding it is
+ * a pain in the a**. Especially as I don't do any locking...
+ *
+ * The code may look a bit weird, but that's just because I've tried to
+ * handle things like file-size changes in a somewhat graceful manner.
+ * Anyway, truncating a file at the same time somebody else writes to it
+ * is likely to result in pretty weird behaviour...
+ *
+ * The new code handles normal truncates (size = 0) as well as the more
+ * general case (size = XXX). I hope.
+ */
+
+static int trunc_direct (struct inode * inode)
+{
+ int i, tmp;
+ unsigned long * p;
+ struct buffer_head * bh;
+ unsigned long block_to_free = 0;
+ unsigned long free_count = 0;
+ int retry = 0;
+ int blocks = inode->i_sb->s_blocksize / 512;
+#define DIRECT_BLOCK ((inode->i_size + inode->i_sb->s_blocksize - 1) / \
+ inode->i_sb->s_blocksize)
+ int direct_block = DIRECT_BLOCK;
+
+repeat:
+ for (i = direct_block ; i < EXT2_NDIR_BLOCKS ; i++) {
+ p = inode->u.ext2_i.i_data + i;
+ tmp = *p;
+ if (!tmp)
+ continue;
+ if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL)
+ bh = getblk (inode->i_dev, tmp,
+ inode->i_sb->s_blocksize);
+ else
+ bh = get_hash_table (inode->i_dev, tmp,
+ inode->i_sb->s_blocksize);
+ if (i < direct_block) {
+ brelse (bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *p) {
+ retry = 1;
+ brelse (bh);
+ continue;
+ }
+ *p = 0;
+ inode->i_blocks -= blocks;
+ inode->i_dirt = 1;
+ if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) {
+ memset(bh->b_data, RANDOM_INT, inode->i_sb->s_blocksize);
+ mark_buffer_dirty(bh, 1);
+ }
+ brelse (bh);
+ if (free_count == 0) {
+ block_to_free = tmp;
+ free_count++;
+ } else if (free_count > 0 && block_to_free == tmp - free_count)
+ free_count++;
+ else {
+ ext2_free_blocks (inode->i_sb, block_to_free, free_count);
+ block_to_free = tmp;
+ free_count = 1;
+ }
+/* ext2_free_blocks (inode->i_sb, tmp, 1); */
+ }
+ if (free_count > 0)
+ ext2_free_blocks (inode->i_sb, block_to_free, free_count);
+ return retry;
+}
+
+static int trunc_indirect (struct inode * inode, int offset, unsigned long * p)
+{
+ int i, tmp;
+ struct buffer_head * bh;
+ struct buffer_head * ind_bh;
+ unsigned long * ind;
+ unsigned long block_to_free = 0;
+ unsigned long free_count = 0;
+ int retry = 0;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int blocks = inode->i_sb->s_blocksize / 512;
+#define INDIRECT_BLOCK ((int)DIRECT_BLOCK - offset)
+ int indirect_block = INDIRECT_BLOCK;
+
+ tmp = *p;
+ if (!tmp)
+ return 0;
+ ind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (tmp != *p) {
+ brelse (ind_bh);
+ return 1;
+ }
+ if (!ind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = indirect_block ; i < addr_per_block ; i++) {
+ if (i < 0)
+ i = 0;
+ if (i < indirect_block)
+ goto repeat;
+ ind = i + (unsigned long *) ind_bh->b_data;
+ tmp = *ind;
+ if (!tmp)
+ continue;
+ if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL)
+ bh = getblk (inode->i_dev, tmp,
+ inode->i_sb->s_blocksize);
+ else
+ bh = get_hash_table (inode->i_dev, tmp,
+ inode->i_sb->s_blocksize);
+ if (i < indirect_block) {
+ brelse (bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *ind) {
+ retry = 1;
+ brelse (bh);
+ continue;
+ }
+ *ind = 0;
+ mark_buffer_dirty(ind_bh, 1);
+ if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) {
+ memset(bh->b_data, RANDOM_INT, inode->i_sb->s_blocksize);
+ mark_buffer_dirty(bh, 1);
+ }
+ brelse (bh);
+ if (free_count == 0) {
+ block_to_free = tmp;
+ free_count++;
+ } else if (free_count > 0 && block_to_free == tmp - free_count)
+ free_count++;
+ else {
+ ext2_free_blocks (inode->i_sb, block_to_free, free_count);
+ block_to_free = tmp;
+ free_count = 1;
+ }
+/* ext2_free_blocks (inode->i_sb, tmp, 1); */
+ inode->i_blocks -= blocks;
+ inode->i_dirt = 1;
+ }
+ if (free_count > 0)
+ ext2_free_blocks (inode->i_sb, block_to_free, free_count);
+ ind = (unsigned long *) ind_bh->b_data;
+ for (i = 0; i < addr_per_block; i++)
+ if (*(ind++))
+ break;
+ if (i >= addr_per_block)
+ if (ind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_blocks -= blocks;
+ inode->i_dirt = 1;
+ ext2_free_blocks (inode->i_sb, tmp, 1);
+ }
+ if (IS_SYNC(inode) && ind_bh->b_dirt) {
+ ll_rw_block (WRITE, 1, &ind_bh);
+ wait_on_buffer (ind_bh);
+ }
+ brelse (ind_bh);
+ return retry;
+}
+
+static int trunc_dindirect (struct inode * inode, int offset,
+ unsigned long * p)
+{
+ int i, tmp;
+ struct buffer_head * dind_bh;
+ unsigned long * dind;
+ int retry = 0;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int blocks = inode->i_sb->s_blocksize / 512;
+#define DINDIRECT_BLOCK (((int)DIRECT_BLOCK - offset) / addr_per_block)
+ int dindirect_block = DINDIRECT_BLOCK;
+
+ tmp = *p;
+ if (!tmp)
+ return 0;
+ dind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (tmp != *p) {
+ brelse (dind_bh);
+ return 1;
+ }
+ if (!dind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = dindirect_block ; i < addr_per_block ; i++) {
+ if (i < 0)
+ i = 0;
+ if (i < dindirect_block)
+ goto repeat;
+ dind = i + (unsigned long *) dind_bh->b_data;
+ tmp = *dind;
+ if (!tmp)
+ continue;
+ retry |= trunc_indirect (inode, offset + (i * addr_per_block),
+ dind);
+ mark_buffer_dirty(dind_bh, 1);
+ }
+ dind = (unsigned long *) dind_bh->b_data;
+ for (i = 0; i < addr_per_block; i++)
+ if (*(dind++))
+ break;
+ if (i >= addr_per_block)
+ if (dind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_blocks -= blocks;
+ inode->i_dirt = 1;
+ ext2_free_blocks (inode->i_sb, tmp, 1);
+ }
+ if (IS_SYNC(inode) && dind_bh->b_dirt) {
+ ll_rw_block (WRITE, 1, &dind_bh);
+ wait_on_buffer (dind_bh);
+ }
+ brelse (dind_bh);
+ return retry;
+}
+
+static int trunc_tindirect (struct inode * inode)
+{
+ int i, tmp;
+ struct buffer_head * tind_bh;
+ unsigned long * tind, * p;
+ int retry = 0;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int blocks = inode->i_sb->s_blocksize / 512;
+#define TINDIRECT_BLOCK (((int)DIRECT_BLOCK - (addr_per_block * addr_per_block + \
+ addr_per_block + EXT2_NDIR_BLOCKS)) / \
+ (addr_per_block * addr_per_block))
+ int tindirect_block = TINDIRECT_BLOCK;
+
+ p = inode->u.ext2_i.i_data + EXT2_TIND_BLOCK;
+ if (!(tmp = *p))
+ return 0;
+ tind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (tmp != *p) {
+ brelse (tind_bh);
+ return 1;
+ }
+ if (!tind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = tindirect_block ; i < addr_per_block ; i++) {
+ if (i < 0)
+ i = 0;
+ if (i < tindirect_block)
+ goto repeat;
+ tind = i + (unsigned long *) tind_bh->b_data;
+ retry |= trunc_dindirect(inode, EXT2_NDIR_BLOCKS +
+ addr_per_block + (i + 1) * addr_per_block * addr_per_block,
+ tind);
+ mark_buffer_dirty(tind_bh, 1);
+ }
+ tind = (unsigned long *) tind_bh->b_data;
+ for (i = 0; i < addr_per_block; i++)
+ if (*(tind++))
+ break;
+ if (i >= addr_per_block)
+ if (tind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_blocks -= blocks;
+ inode->i_dirt = 1;
+ ext2_free_blocks (inode->i_sb, tmp, 1);
+ }
+ if (IS_SYNC(inode) && tind_bh->b_dirt) {
+ ll_rw_block (WRITE, 1, &tind_bh);
+ wait_on_buffer (tind_bh);
+ }
+ brelse (tind_bh);
+ return retry;
+}
+
+void ext2_truncate (struct inode * inode)
+{
+ int retry;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+ ext2_discard_prealloc(inode);
+ while (1) {
+ down(&inode->i_sem);
+ retry = trunc_direct(inode);
+ retry |= trunc_indirect (inode, EXT2_IND_BLOCK,
+ (unsigned long *) &inode->u.ext2_i.i_data[EXT2_IND_BLOCK]);
+ retry |= trunc_dindirect (inode, EXT2_IND_BLOCK +
+ EXT2_ADDR_PER_BLOCK(inode->i_sb),
+ (unsigned long *) &inode->u.ext2_i.i_data[EXT2_DIND_BLOCK]);
+ retry |= trunc_tindirect (inode);
+ up(&inode->i_sem);
+ if (!retry)
+ break;
+ if (IS_SYNC(inode) && inode->i_dirt)
+ ext2_sync_inode (inode);
+ current->counter = 0;
+ schedule ();
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
diff --git a/fs/fcntl.c b/fs/fcntl.c
new file mode 100644
index 000000000..d3226eb01
--- /dev/null
+++ b/fs/fcntl.c
@@ -0,0 +1,188 @@
+/*
+ * linux/fs/fcntl.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+
+extern int fcntl_getlk(unsigned int, struct flock *);
+extern int fcntl_setlk(unsigned int, unsigned int, struct flock *);
+extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
+
+static int dupfd(unsigned int fd, unsigned int arg)
+{
+ if (fd >= NR_OPEN || !current->files->fd[fd])
+ return -EBADF;
+ if (arg >= NR_OPEN)
+ return -EINVAL;
+ while (arg < NR_OPEN)
+ if (current->files->fd[arg])
+ arg++;
+ else
+ break;
+ if (arg >= NR_OPEN)
+ return -EMFILE;
+ FD_CLR(arg, &current->files->close_on_exec);
+ (current->files->fd[arg] = current->files->fd[fd])->f_count++;
+ return arg;
+}
+
+asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+ if (oldfd >= NR_OPEN || !current->files->fd[oldfd])
+ return -EBADF;
+ if (newfd == oldfd)
+ return newfd;
+ /*
+ * errno's for dup2() are slightly different than for fcntl(F_DUPFD)
+ * for historical reasons.
+ */
+ if (newfd > NR_OPEN) /* historical botch - should have been >= */
+ return -EBADF; /* dupfd() would return -EINVAL */
+#if 1
+ if (newfd == NR_OPEN)
+ return -EBADF; /* dupfd() does return -EINVAL and that may
+ * even be the standard! But that is too
+ * weird for now.
+ */
+#endif
+ sys_close(newfd);
+ return dupfd(oldfd,newfd);
+}
+
+asmlinkage int sys_dup(unsigned int fildes)
+{
+ return dupfd(fildes,0);
+}
+
+asmlinkage int sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ struct file * filp;
+ struct task_struct *p;
+ int task_found = 0;
+
+ if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ return -EBADF;
+ switch (cmd) {
+ case F_DUPFD:
+ return dupfd(fd,arg);
+ case F_GETFD:
+ return FD_ISSET(fd, &current->files->close_on_exec);
+ case F_SETFD:
+ if (arg&1)
+ FD_SET(fd, &current->files->close_on_exec);
+ else
+ FD_CLR(fd, &current->files->close_on_exec);
+ return 0;
+ case F_GETFL:
+ return filp->f_flags;
+ case F_SETFL:
+ /*
+ * In the case of an append-only file, O_APPEND
+ * cannot be cleared
+ */
+ if (IS_APPEND(filp->f_inode) && !(arg & O_APPEND))
+ return -EPERM;
+ if ((arg & FASYNC) && !(filp->f_flags & FASYNC) &&
+ filp->f_op->fasync)
+ filp->f_op->fasync(filp->f_inode, filp, 1);
+ if (!(arg & FASYNC) && (filp->f_flags & FASYNC) &&
+ filp->f_op->fasync)
+ filp->f_op->fasync(filp->f_inode, filp, 0);
+ filp->f_flags &= ~(O_APPEND | O_NONBLOCK | FASYNC);
+ filp->f_flags |= arg & (O_APPEND | O_NONBLOCK |
+ FASYNC);
+ return 0;
+ case F_GETLK:
+ return fcntl_getlk(fd, (struct flock *) arg);
+ case F_SETLK:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
+ case F_SETLKW:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
+ case F_GETOWN:
+ /*
+ * XXX If f_owner is a process group, the
+ * negative return value will get converted
+ * into an error. Oops. If we keep the the
+ * current syscall conventions, the only way
+ * to fix this will be in libc.
+ */
+ return filp->f_owner;
+ case F_SETOWN:
+ /*
+ * Add the security checks - AC. Without
+ * this there is a massive Linux security
+ * hole here - consider what happens if
+ * you do something like
+ *
+ * fcntl(0,F_SETOWN,some_root_process);
+ * getchar();
+ *
+ * and input a line!
+ *
+ * BTW: Don't try this for fun. Several Unix
+ * systems I tried this on fall for the
+ * trick!
+ *
+ * I had to fix this botch job as Linux
+ * kill_fasync asserts priv making it a
+ * free all user process killer!
+ *
+ * Changed to make the security checks more
+ * liberal. -- TYT
+ */
+ if (current->pgrp == -arg || current->pid == arg)
+ goto fasync_ok;
+
+ for_each_task(p) {
+ if ((p->pid == arg) || (p->pid == -arg) ||
+ (p->pgrp == -arg)) {
+ task_found++;
+ if ((p->session != current->session) &&
+ (p->uid != current->uid) &&
+ (p->euid != current->euid) &&
+ !suser())
+ return -EPERM;
+ break;
+ }
+ }
+ if ((task_found == 0) && !suser())
+ return -EINVAL;
+ fasync_ok:
+ filp->f_owner = arg;
+ if (S_ISSOCK (filp->f_inode->i_mode))
+ sock_fcntl (filp, F_SETOWN, arg);
+ return 0;
+ default:
+ /* sockets need a few special fcntls. */
+ if (S_ISSOCK (filp->f_inode->i_mode))
+ {
+ return (sock_fcntl (filp, cmd, arg));
+ }
+ return -EINVAL;
+ }
+}
+
+void kill_fasync(struct fasync_struct *fa, int sig)
+{
+ while (fa) {
+ if (fa->magic != FASYNC_MAGIC) {
+ printk("kill_fasync: bad magic number in "
+ "fasync_struct!\n");
+ return;
+ }
+ if (fa->fa_file->f_owner > 0)
+ kill_proc(fa->fa_file->f_owner, sig, 1);
+ else
+ kill_pg(-fa->fa_file->f_owner, sig, 1);
+ fa = fa->fa_next;
+ }
+}
diff --git a/fs/fifo.c b/fs/fifo.c
new file mode 100644
index 000000000..ecd9bc232
--- /dev/null
+++ b/fs/fifo.c
@@ -0,0 +1,161 @@
+/*
+ * linux/fs/fifo.c
+ *
+ * written by Paul H. Hargrove
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+
+static int fifo_open(struct inode * inode,struct file * filp)
+{
+ int retval = 0;
+ unsigned long page;
+
+ switch( filp->f_mode ) {
+
+ case 1:
+ /*
+ * O_RDONLY
+ * POSIX.1 says that O_NONBLOCK means return with the FIFO
+ * opened, even when there is no process writing the FIFO.
+ */
+ filp->f_op = &connecting_fifo_fops;
+ if (!PIPE_READERS(*inode)++)
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ if (!(filp->f_flags & O_NONBLOCK) && !PIPE_WRITERS(*inode)) {
+ PIPE_RD_OPENERS(*inode)++;
+ while (!PIPE_WRITERS(*inode)) {
+ if (current->signal & ~current->blocked) {
+ retval = -ERESTARTSYS;
+ break;
+ }
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ }
+ if (!--PIPE_RD_OPENERS(*inode))
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ }
+ while (PIPE_WR_OPENERS(*inode))
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ if (PIPE_WRITERS(*inode))
+ filp->f_op = &read_fifo_fops;
+ if (retval && !--PIPE_READERS(*inode))
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ break;
+
+ case 2:
+ /*
+ * O_WRONLY
+ * POSIX.1 says that O_NONBLOCK means return -1 with
+ * errno=ENXIO when there is no process reading the FIFO.
+ */
+ if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) {
+ retval = -ENXIO;
+ break;
+ }
+ filp->f_op = &write_fifo_fops;
+ if (!PIPE_WRITERS(*inode)++)
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ if (!PIPE_READERS(*inode)) {
+ PIPE_WR_OPENERS(*inode)++;
+ while (!PIPE_READERS(*inode)) {
+ if (current->signal & ~current->blocked) {
+ retval = -ERESTARTSYS;
+ break;
+ }
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ }
+ if (!--PIPE_WR_OPENERS(*inode))
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ }
+ while (PIPE_RD_OPENERS(*inode))
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ if (retval && !--PIPE_WRITERS(*inode))
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ break;
+
+ case 3:
+ /*
+ * O_RDWR
+ * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
+ * This implementation will NEVER block on a O_RDWR open, since
+ * the process can at least talk to itself.
+ */
+ filp->f_op = &rdwr_fifo_fops;
+ if (!PIPE_READERS(*inode)++)
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ while (PIPE_WR_OPENERS(*inode))
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ if (!PIPE_WRITERS(*inode)++)
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ while (PIPE_RD_OPENERS(*inode))
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ break;
+
+ default:
+ retval = -EINVAL;
+ }
+ if (retval || PIPE_BASE(*inode))
+ return retval;
+ page = __get_free_page(GFP_KERNEL);
+ if (PIPE_BASE(*inode)) {
+ free_page(page);
+ return 0;
+ }
+ if (!page)
+ return -ENOMEM;
+ PIPE_LOCK(*inode) = 0;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_BASE(*inode) = (char *) page;
+ return 0;
+}
+
+/*
+ * Dummy default file-operations: the only thing this does
+ * is contain the open that then fills in the correct operations
+ * depending on the access mode of the file...
+ */
+static struct file_operations def_fifo_fops = {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ fifo_open, /* will set read or write pipe_fops */
+ NULL,
+ NULL
+};
+
+static struct inode_operations fifo_inode_operations = {
+ &def_fifo_fops, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+void init_fifo(struct inode * inode)
+{
+ inode->i_op = &fifo_inode_operations;
+ inode->i_pipe = 1;
+ PIPE_LOCK(*inode) = 0;
+ PIPE_BASE(*inode) = NULL;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+ PIPE_WAIT(*inode) = NULL;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
new file mode 100644
index 000000000..6438162a0
--- /dev/null
+++ b/fs/file_table.c
@@ -0,0 +1,90 @@
+/*
+ * linux/fs/file_table.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+struct file * first_file;
+int nr_files = 0;
+
+static void insert_file_free(struct file *file)
+{
+ file->f_next = first_file;
+ file->f_prev = first_file->f_prev;
+ file->f_next->f_prev = file;
+ file->f_prev->f_next = file;
+ first_file = file;
+}
+
+static void remove_file_free(struct file *file)
+{
+ if (first_file == file)
+ first_file = first_file->f_next;
+ if (file->f_next)
+ file->f_next->f_prev = file->f_prev;
+ if (file->f_prev)
+ file->f_prev->f_next = file->f_next;
+ file->f_next = file->f_prev = NULL;
+}
+
+static void put_last_free(struct file *file)
+{
+ remove_file_free(file);
+ file->f_prev = first_file->f_prev;
+ file->f_prev->f_next = file;
+ file->f_next = first_file;
+ file->f_next->f_prev = file;
+}
+
+void grow_files(void)
+{
+ struct file * file;
+ int i;
+
+ file = (struct file *) get_free_page(GFP_KERNEL);
+
+ if (!file)
+ return;
+
+ nr_files+=i= PAGE_SIZE/sizeof(struct file);
+
+ if (!first_file)
+ file->f_next = file->f_prev = first_file = file++, i--;
+
+ for (; i ; i--)
+ insert_file_free(file++);
+}
+
+unsigned long file_table_init(unsigned long start, unsigned long end)
+{
+ first_file = NULL;
+ return start;
+}
+
+struct file * get_empty_filp(void)
+{
+ int i;
+ struct file * f;
+
+ if (!first_file)
+ grow_files();
+repeat:
+ for (f = first_file, i=0; i < nr_files; i++, f = f->f_next)
+ if (!f->f_count) {
+ remove_file_free(f);
+ memset(f,0,sizeof(*f));
+ put_last_free(f);
+ f->f_count = 1;
+ f->f_version = ++event;
+ return f;
+ }
+ if (nr_files < NR_FILE) {
+ grow_files();
+ goto repeat;
+ }
+ return NULL;
+}
diff --git a/fs/filesystems.c b/fs/filesystems.c
new file mode 100644
index 000000000..7bcc695c5
--- /dev/null
+++ b/fs/filesystems.c
@@ -0,0 +1,99 @@
+/*
+ * linux/fs/filesystems.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * table of configured filesystems
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+
+#include <linux/minix_fs.h>
+#include <linux/ext_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/xia_fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/umsdos_fs.h>
+#include <linux/proc_fs.h>
+#include <linux/nfs_fs.h>
+#include <linux/iso_fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/hpfs_fs.h>
+
+extern void device_setup(void);
+
+/* This may be used only once, enforced by 'static int callable' */
+asmlinkage int sys_setup(void)
+{
+ static int callable = 1;
+
+ if (!callable)
+ return -1;
+ callable = 0;
+
+ device_setup();
+
+#ifdef CONFIG_MINIX_FS
+ register_filesystem(&(struct file_system_type)
+ {minix_read_super, "minix", 1, NULL});
+#endif
+
+#ifdef CONFIG_EXT_FS
+ register_filesystem(&(struct file_system_type)
+ {ext_read_super, "ext", 1, NULL});
+#endif
+
+#ifdef CONFIG_EXT2_FS
+ register_filesystem(&(struct file_system_type)
+ {ext2_read_super, "ext2", 1, NULL});
+#endif
+
+#ifdef CONFIG_XIA_FS
+ register_filesystem(&(struct file_system_type)
+ {xiafs_read_super, "xiafs", 1, NULL});
+#endif
+#ifdef CONFIG_UMSDOS_FS
+ register_filesystem(&(struct file_system_type)
+ {UMSDOS_read_super, "umsdos", 1, NULL});
+#endif
+
+#ifdef CONFIG_MSDOS_FS
+ register_filesystem(&(struct file_system_type)
+ {msdos_read_super, "msdos", 1, NULL});
+#endif
+
+#ifdef CONFIG_PROC_FS
+ register_filesystem(&(struct file_system_type)
+ {proc_read_super, "proc", 0, NULL});
+#endif
+
+#ifdef CONFIG_NFS_FS
+ register_filesystem(&(struct file_system_type)
+ {nfs_read_super, "nfs", 0, NULL});
+#endif
+
+#ifdef CONFIG_ISO9660_FS
+ register_filesystem(&(struct file_system_type)
+ {isofs_read_super, "iso9660", 1, NULL});
+#endif
+
+#ifdef CONFIG_SYSV_FS
+ register_filesystem(&(struct file_system_type)
+ {sysv_read_super, "xenix", 1, NULL});
+
+ register_filesystem(&(struct file_system_type)
+ {sysv_read_super, "sysv", 1, NULL});
+
+ register_filesystem(&(struct file_system_type)
+ {sysv_read_super, "coherent", 1, NULL});
+#endif
+
+#ifdef CONFIG_HPFS_FS
+ register_filesystem(&(struct file_system_type)
+ {hpfs_read_super, "hpfs", 1, NULL});
+#endif
+
+ mount_root();
+ return 0;
+}
diff --git a/fs/hpfs/Makefile b/fs/hpfs/Makefile
new file mode 100644
index 000000000..94ab74d5d
--- /dev/null
+++ b/fs/hpfs/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for the linux HPFS filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= hpfs_fs.o
+
+hpfs.o: $(OBJS)
+ ln -f hpfs_fs.o hpfs.o
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/hpfs/README b/fs/hpfs/README
new file mode 100644
index 000000000..7e4fe88ca
--- /dev/null
+++ b/fs/hpfs/README
@@ -0,0 +1,25 @@
+Linux can read, but not write, OS/2 HPFS partitions.
+
+Mount options are the same as for msdos partitions.
+
+ uid=nnn All files in the partition will be owned by user id nnn.
+ gid=nnn All files in the partition will be in group nnn.
+ umask=nnn The permission mask (see umask(1)) for the partition.
+ conv=binary Data is returned exactly as is, with CRLF's. [default]
+ conv=text (Carriage return, line feed) is replaced with newline.
+ conv=auto Chooses, file by file, conv=binary or conv=text (by guessing)
+
+There is one mount option unique to HPFS.
+
+ case=lower Convert file names to lower case. [default]
+ case=asis Return file names as is, in mixed case.
+
+Case is not significant in filename matching, like real HPFS.
+
+
+Command line example
+ mkdir -p /os2/c
+ mount -t hpfs -o uid=100,gid=100 /dev/sda6 /os2/c
+
+/etc/fstab example
+ /dev/sdb5 /d/f hpfs ro,uid=402,gid=402,umask=002
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
new file mode 100644
index 000000000..3121a415d
--- /dev/null
+++ b/fs/hpfs/hpfs.h
@@ -0,0 +1,498 @@
+/* The paper
+
+ Duncan, Roy
+ Design goals and implementation of the new High Performance File System
+ Microsoft Systems Journal Sept 1989 v4 n5 p1(13)
+
+ describes what HPFS looked like when it was new, and it is the source
+ of most of the information given here. The rest is conjecture.
+
+ For definitive information on the Duncan paper, see it, not this file.
+ For definitive information on HPFS, ask somebody else -- this is guesswork.
+ There are certain to be many mistakes. */
+
+/* Notation */
+
+typedef unsigned secno; /* sector number, partition relative */
+
+typedef secno dnode_secno; /* sector number of a dnode */
+typedef secno fnode_secno; /* sector number of an fnode */
+typedef secno anode_secno; /* sector number of an anode */
+
+/* sector 0 */
+
+/* The boot block is very like a FAT boot block, except that the
+ 29h signature byte is 28h instead, and the ID string is "HPFS". */
+
+struct hpfs_boot_block
+{
+ unsigned char jmp[3];
+ unsigned char oem_id[8];
+ unsigned char bytes_per_sector[2]; /* 512 */
+ unsigned char sectors_per_cluster;
+ unsigned char n_reserved_sectors[2];
+ unsigned char n_fats;
+ unsigned char n_rootdir_entries[2];
+ unsigned char n_sectors_s[2];
+ unsigned char media_byte;
+ unsigned short sectors_per_fat;
+ unsigned short sectors_per_track;
+ unsigned short heads_per_cyl;
+ unsigned int n_hidden_sectors;
+ unsigned int n_sectors_l; /* size of partition */
+ unsigned char drive_number;
+ unsigned char mbz;
+ unsigned char sig_28h; /* 28h */
+ unsigned char vol_serno[4];
+ unsigned char vol_label[11];
+ unsigned char sig_hpfs[8]; /* "HPFS " */
+ unsigned char pad[448];
+ unsigned short magic; /* aa55 */
+};
+
+
+/* sector 16 */
+
+/* The super block has the pointer to the root directory. */
+
+#define SB_MAGIC 0xf995e849
+
+struct hpfs_super_block
+{
+ unsigned magic; /* f995 e849 */
+ unsigned magic1; /* fa53 e9c5, more magic? */
+ unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/
+ fnode_secno root; /* fnode of root directory */
+ secno n_sectors; /* size of filesystem */
+ unsigned n_badblocks; /* number of bad blocks */
+ secno bitmaps; /* pointers to free space bit maps */
+ unsigned zero1; /* 0 */
+ secno badblocks; /* bad block list */
+ unsigned zero3; /* 0 */
+ time_t last_chkdsk; /* date last checked, 0 if never */
+ unsigned zero4; /* 0 */
+ secno n_dir_band; /* number of sectors in dir band */
+ secno dir_band_start; /* first sector in dir band */
+ secno dir_band_end; /* last sector in dir band */
+ secno dir_band_bitmap; /* free space map, 1 dnode per bit */
+ unsigned zero5[8]; /* 0 */
+ secno scratch_dnodes; /* ?? 8 preallocated sectors near dir
+ band, 4-aligned. */
+ unsigned zero6[103]; /* 0 */
+};
+
+
+/* sector 17 */
+
+/* The spare block has pointers to spare sectors. */
+
+#define SP_MAGIC 0xf9911849
+
+struct hpfs_spare_block
+{
+ unsigned magic; /* f991 1849 */
+ unsigned magic1; /* fa52 29c5, more magic? */
+
+ unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */
+ unsigned flag1234: 4; /* unknown flags */
+ unsigned fast: 1; /* partition was fast formatted */
+ unsigned flag6to31: 26; /* unknown flags */
+
+ secno hotfix_map; /* info about remapped bad sectors */
+ unsigned n_spares_used; /* number of hotfixes */
+ unsigned n_spares; /* number of spares in hotfix map */
+ unsigned n_dnode_spares_free; /* spare dnodes unused */
+ unsigned n_dnode_spares; /* length of spare_dnodes[] list,
+ follows in this block*/
+ secno code_page_dir; /* code page directory block */
+ unsigned n_code_pages; /* number of code pages */
+ unsigned large_numbers[2]; /* ?? */
+ unsigned zero1[15];
+ dnode_secno spare_dnodes[20]; /* emergency free dnode list */
+ unsigned zero2[81]; /* room for more? */
+};
+
+/* The bad block list is 4 sectors long. The first word must be zero,
+ the remaining words give n_badblocks bad block numbers.
+ I bet you can see it coming... */
+
+#define BAD_MAGIC 0
+
+/* The hotfix map is 4 sectors long. It looks like
+
+ secno from[n_spares];
+ secno to[n_spares];
+
+ The to[] list is initialized to point to n_spares preallocated empty
+ sectors. The from[] list contains the sector numbers of bad blocks
+ which have been remapped to corresponding sectors in the to[] list.
+ n_spares_used gives the length of the from[] list. */
+
+
+/* Sectors 18 and 19 are preallocated and unused.
+ Maybe they're spares for 16 and 17, but simple substitution fails. */
+
+
+/* The code page info pointed to by the spare block consists of an index
+ block and blocks containing character maps. The following is pretty
+ sketchy, but Linux doesn't use code pages so it doesn't matter. */
+
+/* block pointed to by spareblock->code_page_dir */
+
+#define CP_DIR_MAGIC 0x494521f7
+
+struct code_page_directory
+{
+ unsigned magic; /* 4945 21f7 */
+ unsigned n_code_pages; /* number of pointers following */
+ unsigned zero1[2];
+ struct {
+ unsigned short ix; /* index */
+ unsigned short code_page_number; /* code page number */
+ unsigned bounds; /* matches corresponding word
+ in data block */
+ secno code_page_data; /* sector number of a code_page_data
+ containing c.p. array */
+ unsigned index; /* index in c.p. array in that sector*/
+ } array[31]; /* unknown length */
+};
+
+/* blocks pointed to by code_page_directory */
+
+#define CP_DATA_MAGIC 0x894521f7
+
+struct code_page_data
+{
+ unsigned magic; /* 8945 21f7 */
+ unsigned n_used; /* # elements used in c_p_data[] */
+ unsigned bounds[3]; /* looks a bit like
+ (beg1,end1), (beg2,end2)
+ one byte each */
+ unsigned short offs[3]; /* offsets from start of sector
+ to start of c_p_data[ix] */
+ struct {
+ unsigned short ix; /* index */
+ unsigned short code_page_number; /* code page number */
+ unsigned short zero1;
+ unsigned char map[128]; /* map for chars 80..ff */
+ unsigned short zero2;
+ } code_page[3];
+ unsigned char incognita[78];
+};
+
+
+/* Free space bitmaps are 4 sectors long, which is 16384 bits.
+ 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap.
+ Bit order in the maps is little-endian. 0 means taken, 1 means free.
+
+ Bit map sectors are marked allocated in the bit maps, and so are sectors
+ off the end of the partition.
+
+ Band 0 is sectors 0-3fff, its map is in sectors 18-1b.
+ Band 1 is 4000-7fff, its map is in 7ffc-7fff.
+ Band 2 is 8000-ffff, its map is in 8000-8003.
+ The remaining bands have maps in their first (even) or last (odd) 4 sectors
+ -- if the last, partial, band is odd its map is in its last 4 sectors.
+
+ The bitmap locations are given in a table pointed to by the super block.
+ No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is
+ just where they usually are.
+
+ The "directory band" is a bunch of sectors preallocated for dnodes.
+ It has a 4-sector free space bitmap of its own. Each bit in the map
+ corresponds to one 4-sector dnode, bit 0 of the map corresponding to
+ the first 4 sectors of the directory band. The entire band is marked
+ allocated in the main bitmap. The super block gives the locations
+ of the directory band and its bitmap. ("band" doesn't mean it is
+ 8 meg long; it isn't.) */
+
+
+/* dnode: directory. 4 sectors long */
+
+/* A directory is a tree of dnodes. The fnode for a directory
+ contains one pointer, to the root dnode of the tree. The fnode
+ never moves, the dnodes do the B-tree thing, splitting and merging
+ as files are added and removed. */
+
+#define DNODE_MAGIC 0x77e40aae
+
+struct dnode {
+ unsigned magic; /* 77e4 0aae */
+ unsigned first_free; /* offset from start of dnode to
+ first free dir entry */
+ unsigned increment_me; /* some kind of activity counter?
+ Neither HPFS.IFS nor CHKDSK cares
+ if you change this word */
+ secno up; /* (root dnode) directory's fnode
+ (nonroot) parent dnode */
+ dnode_secno self; /* pointer to this dnode */
+ unsigned char dirent[2028]; /* one or more dirents */
+};
+
+struct hpfs_dirent {
+ unsigned short length; /* offset to next dirent */
+ unsigned first: 1; /* set on phony ^A^A (".") entry */
+ unsigned flag1: 1;
+ unsigned down: 1; /* down pointer present (after name) */
+ unsigned last: 1; /* set on phony \377 entry */
+ unsigned flag4: 1;
+ unsigned flag5: 1;
+ unsigned flag6: 1;
+ unsigned has_needea: 1; /* ?? some EA has NEEDEA set
+ I have no idea why this is
+ interesting in a dir entry */
+ unsigned read_only: 1; /* dos attrib */
+ unsigned hidden: 1; /* dos attrib */
+ unsigned system: 1; /* dos attrib */
+ unsigned flag11: 1; /* would be volume label dos attrib */
+ unsigned directory: 1; /* dos attrib */
+ unsigned archive: 1; /* dos attrib */
+ unsigned not_8x3: 1; /* name is not 8.3 */
+ unsigned flag15: 1;
+ fnode_secno fnode; /* fnode giving allocation info */
+ time_t write_date; /* mtime */
+ unsigned file_size; /* file length, bytes */
+ time_t read_date; /* atime */
+ time_t creation_date; /* ctime */
+ unsigned ea_size; /* total EA length, bytes */
+ unsigned char zero1;
+ unsigned char locality; /* 0=unk 1=seq 2=random 3=both */
+ unsigned char namelen, name[1]; /* file name */
+ /* dnode_secno down; btree down pointer, if present,
+ follows name on next word boundary, or maybe it's
+ precedes next dirent, which is on a word boundary. */
+};
+
+/* The b-tree down pointer from a dir entry */
+
+static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
+{
+ return *(dnode_secno *) ((void *) de + de->length - 4);
+}
+
+/* The first dir entry in a dnode */
+
+static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
+{
+ return (void *) dnode->dirent;
+}
+
+/* The end+1 of the dir entries */
+
+static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
+{
+ return (void *) dnode + dnode->first_free;
+}
+
+/* The dir entry after dir entry de */
+
+static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
+{
+ return (void *) de + de->length;
+}
+
+
+/* B+ tree: allocation info in fnodes and anodes */
+
+/* dnodes point to fnodes which are responsible for listing the sectors
+ assigned to the file. This is done with trees of (length,address)
+ pairs. (Actually triples, of (length, file-address, disk-address)
+ which can represent holes. Find out if HPFS does that.)
+ At any rate, fnodes contain a small tree; if subtrees are needed
+ they occupy essentially a full block in anodes. A leaf-level tree node
+ has 3-word entries giving sector runs, a non-leaf node has 2-word
+ entries giving subtree pointers. A flag in the header says which. */
+
+struct bplus_leaf_node
+{
+ unsigned file_secno; /* first file sector in extent */
+ unsigned length; /* length, sectors */
+ secno disk_secno; /* first corresponding disk sector */
+};
+
+struct bplus_internal_node
+{
+ unsigned file_secno; /* subtree maps sectors < this */
+ anode_secno down; /* pointer to subtree */
+};
+
+struct bplus_header
+{
+ unsigned flag0: 1;
+ unsigned flag1: 1;
+ unsigned flag2: 1;
+ unsigned flag3: 1;
+ unsigned flag4: 1;
+ unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
+ the data btree or some ea or the
+ main ea bootage pointer ea_secno */
+ /* also can get set in fnodes, which
+ may be a chkdsk glitch or may mean
+ this bit is irrelevant in fnodes,
+ or this interpretation is all wet */
+ unsigned flag6: 1;
+ unsigned internal: 1; /* 1 -> (internal) tree of anodes
+ 0 -> (leaf) list of extents */
+ unsigned char fill[3];
+ unsigned char n_free_nodes; /* free nodes in following array */
+ unsigned char n_used_nodes; /* used nodes in following array */
+ unsigned short first_free; /* offset from start of header to
+ first free node in array */
+ union {
+ struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
+ subtree pointers */
+ struct bplus_leaf_node external[0]; /* (external) 3-word entries giving
+ sector runs */
+ } u;
+};
+
+/* fnode: root of allocation b+ tree, and EA's */
+
+/* Every file and every directory has one fnode, pointed to by the directory
+ entry and pointing to the file's sectors or directory's root dnode. EA's
+ are also stored here, and there are said to be ACL's somewhere here too. */
+
+#define FNODE_MAGIC 0xf7e40aae
+
+struct fnode
+{
+ unsigned magic; /* f7e4 0aae */
+ unsigned zero1[2];
+ unsigned char len, name[15]; /* true length, truncated name */
+ fnode_secno up; /* pointer to file's directory fnode */
+ unsigned zero2[3];
+ unsigned ea_size_l; /* length of disk-resident ea's */
+ secno ea_secno; /* first sector of disk-resident ea's*/
+ unsigned short ea_size_s; /* length of fnode-resident ea's */
+
+ unsigned flag0: 1;
+ unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */
+ unsigned flag2: 1;
+ unsigned flag3: 1;
+ unsigned flag4: 1;
+ unsigned flag5: 1;
+ unsigned flag6: 1;
+ unsigned flag7: 1;
+ unsigned dirflag: 1; /* 1 -> directory. first & only extent
+ points to dnode. */
+ unsigned flag9: 1;
+ unsigned flag10: 1;
+ unsigned flag11: 1;
+ unsigned flag12: 1;
+ unsigned flag13: 1;
+ unsigned flag14: 1;
+ unsigned flag15: 1;
+
+ struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
+ union {
+ struct bplus_leaf_node external[8];
+ struct bplus_internal_node internal[12];
+ } u;
+
+ unsigned file_size; /* file length, bytes */
+ unsigned n_needea; /* number of EA's with NEEDEA set */
+ unsigned zero4[4];
+ unsigned ea_offs; /* offset from start of fnode
+ to first fnode-resident ea */
+ unsigned zero5[2];
+ unsigned char ea[316]; /* zero or more EA's, packed together
+ with no alignment padding.
+ (Do not use this name, get here
+ via fnode + ea_offs. I think.) */
+};
+
+
+/* anode: 99.44% pure allocation tree */
+
+#define ANODE_MAGIC 0x37e40aae
+
+struct anode
+{
+ unsigned magic; /* 37e4 0aae */
+ anode_secno self; /* pointer to this anode */
+ secno up; /* parent anode or fnode */
+
+ struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */
+ union {
+ struct bplus_leaf_node external[40];
+ struct bplus_internal_node internal[60];
+ } u;
+
+ unsigned fill[3]; /* unused */
+};
+
+
+/* extended attributes.
+
+ A file's EA info is stored as a list of (name,value) pairs. It is
+ usually in the fnode, but (if it's large) it is moved to a single
+ sector run outside the fnode, or to multiple runs with an anode tree
+ that points to them.
+
+ The value of a single EA is stored along with the name, or (if large)
+ it is moved to a single sector run, or multiple runs pointed to by an
+ anode tree, pointed to by the value field of the (name,value) pair.
+
+ Flags in the EA tell whether the value is immediate, in a single sector
+ run, or in multiple runs. Flags in the fnode tell whether the EA list
+ is immediate, in a single run, or in multiple runs. */
+
+struct extended_attribute
+{
+ unsigned indirect: 1; /* 1 -> value gives sector number
+ where real value starts */
+ unsigned anode: 1; /* 1 -> sector is an anode
+ that points to fragmented value */
+ unsigned flag2: 1;
+ unsigned flag3: 1;
+ unsigned flag4: 1;
+ unsigned flag5: 1;
+ unsigned flag6: 1;
+ unsigned needea: 1; /* required ea */
+ unsigned char namelen; /* length of name, bytes */
+ unsigned short valuelen; /* length of value, bytes */
+ /*
+ unsigned char name[namelen]; ascii attrib name
+ unsigned char nul; terminating '\0', not counted
+ unsigned char value[valuelen]; value, arbitrary
+ if this.indirect, valuelen is 8 and the value is
+ unsigned length; real length of value, bytes
+ secno secno; sector address where it starts
+ if this.anode, the above sector number is the root of an anode tree
+ which points to the value.
+ */
+};
+
+static inline unsigned char *ea_name (struct extended_attribute *ea)
+{
+ return (void *) ea + sizeof *ea;
+}
+
+static inline unsigned char *ea_value (struct extended_attribute *ea)
+{
+ return (void *) ea + sizeof *ea + ea->namelen + 1;
+}
+
+static inline struct extended_attribute *
+ ea_next_ea (struct extended_attribute *ea)
+{
+ return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen;
+}
+
+static inline unsigned ea_indirect_length (struct extended_attribute *ea)
+{
+ unsigned *v = (void *) ea_value (ea);
+ return v[0];
+}
+
+static inline secno ea_indirect_secno (struct extended_attribute *ea)
+{
+ unsigned *v = (void *) ea_value (ea);
+ return v[1];
+}
+
+/*
+ Local Variables:
+ comment-column: 40
+ End:
+*/
diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c
new file mode 100644
index 000000000..c05cf56ab
--- /dev/null
+++ b/fs/hpfs/hpfs_fs.c
@@ -0,0 +1,1727 @@
+/*
+ * linux/fs/hpfs/hpfs_fs.c
+ * read-only HPFS
+ * version 1.0
+ *
+ * Chris Smith 1993
+ *
+ * Sources & references:
+ * Duncan, _Design ... of HPFS_, MSJ 4(5) (C) 1989 Microsoft Corp
+ * linux/fs/minix Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ * linux/fs/msdos Written 1992, 1993 by Werner Almesberger
+ * linux/fs/isofs Copyright (C) 1991 Eric Youngdale
+ */
+
+#include <linux/fs.h>
+#include <linux/hpfs_fs.h>
+#include <linux/errno.h>
+#include <linux/malloc.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/locks.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <asm/bitops.h>
+#include <asm/segment.h>
+
+#include "hpfs.h"
+
+/*
+ * HPFS is a mixture of 512-byte blocks and 2048-byte blocks. The 2k blocks
+ * are used for directories and bitmaps. For bmap to work, we must run the
+ * file system with 512-byte blocks. The 2k blocks are assembled in buffers
+ * obtained from kmalloc.
+ *
+ * For a file's i-number we use the sector number of its fnode, coded.
+ * (Directory ino's are even, file ino's are odd, and ino >> 1 is the
+ * sector address of the fnode. This is a hack to allow lookup() to
+ * tell read_inode() whether it is necessary to read the fnode.)
+ *
+ * The map_xxx routines all read something into a buffer and return a
+ * pointer somewhere in the buffer. The caller must do the brelse.
+ * The other routines are balanced.
+ *
+ * For details on the data structures see hpfs.h and the Duncan paper.
+ *
+ * Overview
+ *
+ * [ The names of these data structures, except fnode, are not Microsoft's
+ * or IBM's. I don't know what names they use. The semantics described
+ * here are those of this implementation, and any coincidence between it
+ * and real HPFS is to be hoped for but not guaranteed by me, and
+ * certainly not guaranteed by MS or IBM. Who know nothing about this. ]
+ *
+ * [ Also, the following will make little sense if you haven't read the
+ * Duncan paper, which is excellent. ]
+ *
+ * HPFS is a tree. There are 3 kinds of nodes. A directory is a tree
+ * of dnodes, and a file's allocation info is a tree of sector runs
+ * stored in fnodes and anodes.
+ *
+ * The top pointer is in the super block, it points to the fnode of the
+ * root directory.
+ *
+ * The root directory -- all directories -- gives file names, dates &c,
+ * and fnode addresses. If the directory fits in one dnode, that's it,
+ * otherwise the top dnode points to other dnodes, forming a tree. A
+ * dnode tree (one directory) might look like
+ *
+ * ((a b c) d (e f g) h (i j) k l (m n o p))
+ *
+ * The subtrees appear between the files. Each dir entry contains, along
+ * with the name and fnode, a dnode pointer to the subtree that precedes it
+ * (if there is one; a flag tells that). The first entry in every directory
+ * is ^A^A, the "." entry for the directory itself. The last entry in every
+ * dnode is \377, a fake entry whose only valid fields are the bit marking
+ * it last and the down pointer to the subtree preceding it, if any.
+ *
+ * The "value" field of directory entries is an fnode address. The fnode
+ * tells where the sectors of the file are. The fnode for a subdirectory
+ * contains one pointer, to the root dnode of the subdirectory. The fnode
+ * for a data file contains, in effect, a tiny anode. (Most of the space
+ * in fnodes is for extended attributes.)
+ *
+ * anodes and the anode part of fnodes are trees of extents. An extent
+ * is a (length, disk address) pair, labeled with the file address being
+ * mapped. E.g.,
+ *
+ * (0: 3@1000 3: 1@2000 4: 2@10)
+ *
+ * means the file:disk sector map (0:1000 1:1001 2:1002 3:2000 4:10 5:11).
+ *
+ * There is space for 8 file:len@disk triples in an fnode, or for 40 in an
+ * anode. If this is insufficient, subtrees are used, as in
+ *
+ * (6: (0: 3@1000 3: 1@2000 4: 2@10) 12: (6: 3@8000 9: 1@9000 10: 2@20))
+ *
+ * The label on a subtree is the first address *after* that tree. The
+ * subtrees are always anodes. The label:subtree pairs require only
+ * two words each, so non-leaf subtrees have a different format; there
+ * is room for 12 label:subtree pairs in an fnode, or 60 in an anode.
+ *
+ * Within a directory, each dnode contains a pointer up to its parent
+ * dnode. The root dnode points up to the directory's fnode.
+ *
+ * Each fnode contains a pointer to the directory that contains it
+ * (to the fnode of the directory). So this pointer in a directory
+ * fnode is "..".
+ *
+ * On the disk, dnodes are all together in the center of the partition,
+ * and HPFS even manages to put all the dnodes for a single directory
+ * together, generally. fnodes are out with the data. anodes are seldom
+ * seen -- in fact noncontiguous files are seldom seen. I think this is
+ * partly the open() call that lets programs specify the length of an
+ * output file when they know it, and partly because HPFS.IFS really is
+ * very good at resisting fragmentation.
+ */
+
+/* notation */
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+#define little_ushort(x) (*(unsigned short *) &(x))
+typedef void nonconst;
+
+/* super block ops */
+
+static void hpfs_read_inode(struct inode *);
+static void hpfs_put_super(struct super_block *);
+static void hpfs_statfs(struct super_block *, struct statfs *);
+static int hpfs_remount_fs(struct super_block *, int *, char *);
+
+static const struct super_operations hpfs_sops =
+{
+ hpfs_read_inode, /* read_inode */
+ NULL, /* notify_change */
+ NULL, /* write_inode */
+ NULL, /* put_inode */
+ hpfs_put_super, /* put_super */
+ NULL, /* write_super */
+ hpfs_statfs, /* statfs */
+ hpfs_remount_fs, /* remount_fs */
+};
+
+/* file ops */
+
+static int hpfs_file_read(struct inode *, struct file *, char *, int);
+static secno hpfs_bmap(struct inode *, unsigned);
+
+static const struct file_operations hpfs_file_ops =
+{
+ NULL, /* lseek - default */
+ hpfs_file_read, /* read */
+ NULL, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ file_fsync, /* fsync */
+};
+
+static const struct inode_operations hpfs_file_iops =
+{
+ (nonconst *) & hpfs_file_ops, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ (int (*)(struct inode *, int))
+ &hpfs_bmap, /* bmap */
+ NULL, /* truncate */
+ NULL, /* permission */
+};
+
+/* directory ops */
+
+static int hpfs_dir_read(struct inode *inode, struct file *filp,
+ char *buf, int count);
+static int hpfs_readdir(struct inode *inode, struct file *filp,
+ struct dirent *dirent, int count);
+static int hpfs_lookup(struct inode *, const char *, int, struct inode **);
+
+static const struct file_operations hpfs_dir_ops =
+{
+ NULL, /* lseek - default */
+ hpfs_dir_read, /* read */
+ NULL, /* write - bad */
+ hpfs_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync, /* fsync */
+};
+
+static const struct inode_operations hpfs_dir_iops =
+{
+ (nonconst *) & hpfs_dir_ops, /* default directory file ops */
+ NULL, /* create */
+ hpfs_lookup, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL, /* permission */
+};
+
+/* Four 512-byte buffers and the 2k block obtained by concatenating them */
+
+struct quad_buffer_head {
+ struct buffer_head *bh[4];
+ void *data;
+};
+
+/* forwards */
+
+static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
+ int *lowercase, int *conv);
+static int check_warn(int not_ok,
+ const char *p1, const char *p2, const char *p3);
+static int zerop(void *addr, unsigned len);
+static void count_dnodes(struct inode *inode, dnode_secno dno,
+ unsigned *n_dnodes, unsigned *n_subdirs);
+static unsigned count_bitmap(struct super_block *s);
+static unsigned count_one_bitmap(dev_t dev, secno secno);
+static secno bplus_lookup(struct inode *inode, struct bplus_header *b,
+ secno file_secno, struct buffer_head **bhp);
+static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
+ const unsigned char *name, unsigned len,
+ struct quad_buffer_head *qbh);
+static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
+ struct quad_buffer_head *qbh);
+static void write_one_dirent(struct dirent *dirent, const unsigned char *name,
+ unsigned namelen, ino_t ino, int lowercase);
+static dnode_secno dir_subdno(struct inode *inode, unsigned pos);
+static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno,
+ int n,
+ struct quad_buffer_head *qbh);
+static unsigned choose_conv(unsigned char *p, unsigned len);
+static unsigned convcpy_tofs(unsigned char *out, unsigned char *in,
+ unsigned len);
+static dnode_secno fnode_dno(dev_t dev, ino_t ino);
+static struct fnode *map_fnode(dev_t dev, ino_t ino,
+ struct buffer_head **bhp);
+static struct anode *map_anode(dev_t dev, unsigned secno,
+ struct buffer_head **bhp);
+static struct dnode *map_dnode(dev_t dev, unsigned secno,
+ struct quad_buffer_head *qbh);
+static void *map_sector(dev_t dev, unsigned secno, struct buffer_head **bhp);
+static void *map_4sectors(dev_t dev, unsigned secno,
+ struct quad_buffer_head *qbh);
+static void brelse4(struct quad_buffer_head *qbh);
+
+/*
+ * make inode number for a file
+ */
+
+static inline ino_t file_ino(fnode_secno secno)
+{
+ return secno << 1 | 1;
+}
+
+/*
+ * make inode number for a directory
+ */
+
+static inline ino_t dir_ino(fnode_secno secno)
+{
+ return secno << 1;
+}
+
+/*
+ * get fnode address from an inode number
+ */
+
+static inline fnode_secno ino_secno(ino_t ino)
+{
+ return ino >> 1;
+}
+
+/*
+ * test for directory's inode number
+ */
+
+static inline int ino_is_dir(ino_t ino)
+{
+ return (ino & 1) == 0;
+}
+
+/*
+ * conv= options
+ */
+
+#define CONV_BINARY 0 /* no conversion */
+#define CONV_TEXT 1 /* crlf->newline */
+#define CONV_AUTO 2 /* decide based on file contents */
+
+/*
+ * local time (HPFS) to GMT (Unix)
+ */
+
+static inline time_t local_to_gmt(time_t t)
+{
+ extern struct timezone sys_tz;
+ return t + sys_tz.tz_minuteswest * 60;
+}
+
+/* super block ops */
+
+/*
+ * mount. This gets one thing, the root directory inode. It does a
+ * bunch of guessed-at consistency checks.
+ */
+
+struct super_block *hpfs_read_super(struct super_block *s,
+ void *options, int silent)
+{
+ struct hpfs_boot_block *bootblock;
+ struct hpfs_super_block *superblock;
+ struct hpfs_spare_block *spareblock;
+ struct hpfs_dirent *de;
+ struct buffer_head *bh0, *bh1, *bh2;
+ struct quad_buffer_head qbh;
+ dnode_secno root_dno;
+ dev_t dev;
+ uid_t uid;
+ gid_t gid;
+ umode_t umask;
+ int lowercase;
+ int conv;
+ int dubious;
+
+ /*
+ * Get the mount options
+ */
+
+ if (!parse_opts(options, &uid, &gid, &umask, &lowercase, &conv)) {
+ printk("HPFS: syntax error in mount options. Not mounted.\n");
+ s->s_dev = 0;
+ return 0;
+ }
+
+ /*
+ * Fill in the super block struct
+ */
+
+ lock_super(s);
+ dev = s->s_dev;
+ set_blocksize(dev, 512);
+
+ /*
+ * fetch sectors 0, 16, 17
+ */
+
+ bootblock = map_sector(dev, 0, &bh0);
+ if (!bootblock)
+ goto bail;
+
+ superblock = map_sector(dev, 16, &bh1);
+ if (!superblock)
+ goto bail0;
+
+ spareblock = map_sector(dev, 17, &bh2);
+ if (!spareblock)
+ goto bail1;
+
+ /*
+ * Check that this fs looks enough like a known one that we can find
+ * and read the root directory.
+ */
+
+ if (bootblock->magic != 0xaa55
+ || superblock->magic != SB_MAGIC
+ || spareblock->magic != SP_MAGIC
+ || bootblock->sig_28h != 0x28
+ || memcmp(&bootblock->sig_hpfs, "HPFS ", 8)
+ || little_ushort(bootblock->bytes_per_sector) != 512) {
+ printk("HPFS: hpfs_read_super: Not HPFS\n");
+ goto bail2;
+ }
+
+ /*
+ * Check for inconsistencies -- possibly wrong guesses here, possibly
+ * filesystem problems.
+ */
+
+ dubious = 0;
+
+ dubious |= check_warn(spareblock->dirty != 0,
+ "`Improperly stopped'", "flag is set", "run CHKDSK");
+ dubious |= check_warn(spareblock->n_spares_used != 0,
+ "Spare blocks", "may be in use", "run CHKDSK");
+
+ /*
+ * Above errors mean we could get wrong answers if we proceed,
+ * so don't
+ */
+
+ if (dubious)
+ goto bail2;
+
+ dubious |= check_warn((spareblock->n_dnode_spares !=
+ spareblock->n_dnode_spares_free),
+ "Spare dnodes", "may be in use", "run CHKDSK");
+ dubious |= check_warn(superblock->zero1 != 0,
+ "#1", "unknown word nonzero", "investigate");
+ dubious |= check_warn(superblock->zero3 != 0,
+ "#3", "unknown word nonzero", "investigate");
+ dubious |= check_warn(superblock->zero4 != 0,
+ "#4", "unknown word nonzero", "investigate");
+ dubious |= check_warn(!zerop(superblock->zero5,
+ sizeof superblock->zero5),
+ "#5", "unknown word nonzero", "investigate");
+ dubious |= check_warn(!zerop(superblock->zero6,
+ sizeof superblock->zero6),
+ "#6", "unknown word nonzero", "investigate");
+
+ if (dubious)
+ printk("HPFS: Proceeding, but operation may be unreliable\n");
+
+ /*
+ * set fs read only
+ */
+
+ s->s_flags |= MS_RDONLY;
+
+ /*
+ * fill in standard stuff
+ */
+
+ s->s_magic = HPFS_SUPER_MAGIC;
+ s->s_blocksize = 512;
+ s->s_blocksize_bits = 9;
+ s->s_op = (struct super_operations *) &hpfs_sops;
+
+ /*
+ * fill in hpfs stuff
+ */
+
+ s->s_hpfs_root = dir_ino(superblock->root);
+ s->s_hpfs_fs_size = superblock->n_sectors;
+ s->s_hpfs_dirband_size = superblock->n_dir_band / 4;
+ s->s_hpfs_dmap = superblock->dir_band_bitmap;
+ s->s_hpfs_bitmaps = superblock->bitmaps;
+ s->s_hpfs_uid = uid;
+ s->s_hpfs_gid = gid;
+ s->s_hpfs_mode = 0777 & ~umask;
+ s->s_hpfs_n_free = -1;
+ s->s_hpfs_n_free_dnodes = -1;
+ s->s_hpfs_lowercase = lowercase;
+ s->s_hpfs_conv = conv;
+
+ /*
+ * done with the low blocks
+ */
+
+ brelse(bh2);
+ brelse(bh1);
+ brelse(bh0);
+
+ /*
+ * all set. try it out.
+ */
+
+ s->s_mounted = iget(s, s->s_hpfs_root);
+ unlock_super(s);
+
+ if (!s->s_mounted) {
+ printk("HPFS: hpfs_read_super: inode get failed\n");
+ s->s_dev = 0;
+ return 0;
+ }
+
+ /*
+ * find the root directory's . pointer & finish filling in the inode
+ */
+
+ root_dno = fnode_dno(dev, s->s_hpfs_root);
+ if (root_dno)
+ de = map_dirent(s->s_mounted, root_dno, "\001\001", 2, &qbh);
+ if (!root_dno || !de) {
+ printk("HPFS: "
+ "hpfs_read_super: root dir isn't in the root dir\n");
+ s->s_dev = 0;
+ return 0;
+ }
+
+ s->s_mounted->i_atime = local_to_gmt(de->read_date);
+ s->s_mounted->i_mtime = local_to_gmt(de->write_date);
+ s->s_mounted->i_ctime = local_to_gmt(de->creation_date);
+
+ brelse4(&qbh);
+ return s;
+
+ bail2:
+ brelse(bh2);
+ bail1:
+ brelse(bh1);
+ bail0:
+ brelse(bh0);
+ bail:
+ s->s_dev = 0;
+ unlock_super(s);
+ return 0;
+}
+
+static int check_warn(int not_ok,
+ const char *p1, const char *p2, const char *p3)
+{
+ if (not_ok)
+ printk("HPFS: %s %s. Please %s\n", p1, p2, p3);
+ return not_ok;
+}
+
+static int zerop(void *addr, unsigned len)
+{
+ unsigned char *p = addr;
+ return p[0] == 0 && memcmp(p, p + 1, len - 1) == 0;
+}
+
+/*
+ * A tiny parser for option strings, stolen from dosfs.
+ */
+
+static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
+ int *lowercase, int *conv)
+{
+ char *p, *rhs;
+
+ *uid = current->uid;
+ *gid = current->gid;
+ *umask = current->fs->umask;
+ *lowercase = 1;
+ *conv = CONV_BINARY;
+
+ if (!opts)
+ return 1;
+
+ for (p = strtok(opts, ","); p != 0; p = strtok(0, ",")) {
+ if ((rhs = strchr(p, '=')) != 0)
+ *rhs++ = '\0';
+ if (!strcmp(p, "uid")) {
+ if (!rhs || !*rhs)
+ return 0;
+ *uid = simple_strtoul(rhs, &rhs, 0);
+ if (*rhs)
+ return 0;
+ }
+ else if (!strcmp(p, "gid")) {
+ if (!rhs || !*rhs)
+ return 0;
+ *gid = simple_strtoul(rhs, &rhs, 0);
+ if (*rhs)
+ return 0;
+ }
+ else if (!strcmp(p, "umask")) {
+ if (!rhs || !*rhs)
+ return 0;
+ *umask = simple_strtoul(rhs, &rhs, 8);
+ if (*rhs)
+ return 0;
+ }
+ else if (!strcmp(p, "case")) {
+ if (!strcmp(rhs, "lower"))
+ *lowercase = 1;
+ else if (!strcmp(rhs, "asis"))
+ *lowercase = 0;
+ else
+ return 0;
+ }
+ else if (!strcmp(p, "conv")) {
+ if (!strcmp(rhs, "binary"))
+ *conv = CONV_BINARY;
+ else if (!strcmp(rhs, "text"))
+ *conv = CONV_TEXT;
+ else if (!strcmp(rhs, "auto"))
+ *conv = CONV_AUTO;
+ else
+ return 0;
+ }
+ else
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * read_inode. This is called with exclusive access to a new inode that
+ * has only (i_dev,i_ino) set. It is responsible for filling in the rest.
+ * We leave the dates blank, to be filled in from the dir entry.
+ *
+ * NOTE that there must be no sleeping from the return in this routine
+ * until lookup() finishes filling in the inode, otherwise the partly
+ * completed inode would be visible during the sleep.
+ *
+ * It is done in this strange and sinful way because the alternative
+ * is to read the fnode, find the dir pointer in it, read that fnode
+ * to get the dnode pointer, search through that whole directory for
+ * the ino we're reading, and get the dates. It works that way, but
+ * ls sounds like fsck.
+ */
+
+static void hpfs_read_inode(struct inode *inode)
+{
+ struct super_block *s = inode->i_sb;
+
+ /* be ready to bail out */
+
+ inode->i_op = 0;
+ inode->i_mode = 0;
+
+ if (inode->i_ino == 0
+ || ino_secno(inode->i_ino) >= inode->i_sb->s_hpfs_fs_size) {
+ printk("HPFS: read_inode: bad ino\n");
+ return;
+ }
+
+ /*
+ * canned stuff
+ */
+
+ inode->i_uid = s->s_hpfs_uid;
+ inode->i_gid = s->s_hpfs_gid;
+ inode->i_mode = s->s_hpfs_mode;
+ inode->i_hpfs_conv = s->s_hpfs_conv;
+
+ inode->i_hpfs_dno = 0;
+ inode->i_hpfs_n_secs = 0;
+ inode->i_hpfs_file_sec = 0;
+ inode->i_hpfs_disk_sec = 0;
+ inode->i_hpfs_dpos = 0;
+ inode->i_hpfs_dsubdno = 0;
+
+ /*
+ * figure out whether we are looking at a directory or a file
+ */
+
+ if (ino_is_dir(inode->i_ino))
+ inode->i_mode |= S_IFDIR;
+ else {
+ inode->i_mode |= S_IFREG;
+ inode->i_mode &= ~0111;
+ }
+
+ /*
+ * these fields must be filled in from the dir entry, which we don't
+ * have but lookup does. It will fill them in before letting the
+ * inode out of its grasp.
+ */
+
+ inode->i_atime = 0;
+ inode->i_mtime = 0;
+ inode->i_ctime = 0;
+ inode->i_size = 0;
+
+ /*
+ * fill in the rest
+ */
+
+ if (S_ISREG(inode->i_mode)) {
+
+ inode->i_op = (struct inode_operations *) &hpfs_file_iops;
+ inode->i_nlink = 1;
+ inode->i_blksize = 512;
+
+ }
+ else {
+ unsigned n_dnodes, n_subdirs;
+ struct buffer_head *bh0;
+ struct fnode *fnode = map_fnode(inode->i_dev,
+ inode->i_ino, &bh0);
+
+ if (!fnode) {
+ printk("HPFS: read_inode: no fnode\n");
+ inode->i_mode = 0;
+ return;
+ }
+
+ inode->i_hpfs_parent_dir = dir_ino(fnode->up);
+ inode->i_hpfs_dno = fnode->u.external[0].disk_secno;
+
+ brelse(bh0);
+
+ n_dnodes = n_subdirs = 0;
+ count_dnodes(inode, inode->i_hpfs_dno, &n_dnodes, &n_subdirs);
+
+ inode->i_op = (struct inode_operations *) &hpfs_dir_iops;
+ inode->i_blksize = 512; /* 2048 here confuses ls & du & ... */
+ inode->i_blocks = 4 * n_dnodes;
+ inode->i_size = 512 * inode->i_blocks;
+ inode->i_nlink = 2 + n_subdirs;
+ }
+}
+
+/*
+ * unmount.
+ */
+
+static void hpfs_put_super(struct super_block *s)
+{
+ lock_super(s);
+ s->s_dev = 0;
+ unlock_super(s);
+}
+
+/*
+ * statfs. For free inode counts we report the count of dnodes in the
+ * directory band -- not exactly right but pretty analogous.
+ */
+
+static void hpfs_statfs(struct super_block *s, struct statfs *buf)
+{
+ /*
+ * count the bits in the bitmaps, unless we already have
+ */
+
+ if (s->s_hpfs_n_free == -1) {
+ s->s_hpfs_n_free = count_bitmap(s);
+ s->s_hpfs_n_free_dnodes =
+ count_one_bitmap(s->s_dev, s->s_hpfs_dmap);
+ }
+
+ /*
+ * fill in the user statfs struct
+ */
+
+ put_fs_long(s->s_magic, &buf->f_type);
+ put_fs_long(512, &buf->f_bsize);
+ put_fs_long(s->s_hpfs_fs_size, &buf->f_blocks);
+ put_fs_long(s->s_hpfs_n_free, &buf->f_bfree);
+ put_fs_long(s->s_hpfs_n_free, &buf->f_bavail);
+ put_fs_long(s->s_hpfs_dirband_size, &buf->f_files);
+ put_fs_long(s->s_hpfs_n_free_dnodes, &buf->f_ffree);
+ put_fs_long(254, &buf->f_namelen);
+}
+
+/*
+ * remount. Don't let read only be turned off.
+ */
+
+static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
+{
+ if (!(*flags & MS_RDONLY))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * count the dnodes in a directory, and the subdirs.
+ */
+
+static void count_dnodes(struct inode *inode, dnode_secno dno,
+ unsigned *n_dnodes, unsigned *n_subdirs)
+{
+ struct quad_buffer_head qbh;
+ struct dnode *dnode;
+ struct hpfs_dirent *de;
+ struct hpfs_dirent *de_end;
+
+ dnode = map_dnode(inode->i_dev, dno, &qbh);
+ if (!dnode)
+ return;
+ de = dnode_first_de(dnode);
+ de_end = dnode_end_de(dnode);
+
+ (*n_dnodes)++;
+
+ for (; de < de_end; de = de_next_de(de)) {
+ if (de->down)
+ count_dnodes(inode, de_down_pointer(de),
+ n_dnodes, n_subdirs);
+ if (de->directory && !de->first)
+ (*n_subdirs)++;
+ if (de->last || de->length == 0)
+ break;
+ }
+
+ brelse4(&qbh);
+}
+
+/*
+ * count the bits in the free space bit maps
+ */
+
+static unsigned count_bitmap(struct super_block *s)
+{
+ unsigned n, count, n_bands;
+ secno *bitmaps;
+ struct quad_buffer_head qbh;
+
+ /*
+ * there is one bit map for each 16384 sectors
+ */
+ n_bands = (s->s_hpfs_fs_size + 0x3fff) >> 14;
+
+ /*
+ * their locations are given in an array pointed to by the super
+ * block
+ */
+ bitmaps = map_4sectors(s->s_dev, s->s_hpfs_bitmaps, &qbh);
+ if (!bitmaps)
+ return 0;
+
+ count = 0;
+
+ /*
+ * map each one and count the free sectors
+ */
+ for (n = 0; n < n_bands; n++)
+ if (bitmaps[n] == 0)
+ printk("HPFS: bit map pointer missing\n");
+ else
+ count += count_one_bitmap(s->s_dev, bitmaps[n]);
+
+ brelse4(&qbh);
+ return count;
+}
+
+/*
+ * Read in one bit map, count the bits, return the count.
+ */
+
+static unsigned count_one_bitmap(dev_t dev, secno secno)
+{
+ struct quad_buffer_head qbh;
+ char *bits;
+ unsigned i, count;
+
+ bits = map_4sectors(dev, secno, &qbh);
+ if (!bits)
+ return 0;
+
+ count = 0;
+
+ for (i = 0; i < 8 * 2048; i++)
+ count += (test_bit(i, bits) != 0);
+ brelse4(&qbh);
+
+ return count;
+}
+
+/* file ops */
+
+/*
+ * read. Read the bytes, put them in buf, return the count.
+ */
+
+static int hpfs_file_read(struct inode *inode, struct file *filp,
+ char *buf, int count)
+{
+ unsigned q, r, n, n0;
+ struct buffer_head *bh;
+ char *block;
+ char *start;
+
+ if (inode == 0 || !S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ /*
+ * truncate count at EOF
+ */
+ if (count > inode->i_size - (off_t) filp->f_pos)
+ count = inode->i_size - filp->f_pos;
+
+ start = buf;
+ while (count > 0) {
+ /*
+ * get file sector number, offset in sector, length to end of
+ * sector
+ */
+ q = filp->f_pos >> 9;
+ r = filp->f_pos & 511;
+ n = 512 - r;
+
+ /*
+ * get length to copy to user buffer
+ */
+ if (n > count)
+ n = count;
+
+ /*
+ * read the sector, copy to user
+ */
+ block = map_sector(inode->i_dev, hpfs_bmap(inode, q), &bh);
+ if (!block)
+ return -EIO;
+
+ /*
+ * but first decide if it has \r\n, if the mount option said
+ * to do that
+ */
+ if (inode->i_hpfs_conv == CONV_AUTO)
+ inode->i_hpfs_conv = choose_conv(block + r, n);
+
+ if (inode->i_hpfs_conv == CONV_BINARY) {
+ /*
+ * regular copy, output length is same as input
+ * length
+ */
+ memcpy_tofs(buf, block + r, n);
+ n0 = n;
+ }
+ else {
+ /*
+ * squeeze out \r, output length varies
+ */
+ n0 = convcpy_tofs(buf, block + r, n);
+ if (count > inode->i_size - (off_t) filp->f_pos - n + n0)
+ count = inode->i_size - filp->f_pos - n + n0;
+ }
+
+ brelse(bh);
+
+ /*
+ * advance input n bytes, output n0 bytes
+ */
+ filp->f_pos += n;
+ buf += n0;
+ count -= n0;
+ }
+
+ return buf - start;
+}
+
+/*
+ * This routine implements conv=auto. Return CONV_BINARY or CONV_TEXT.
+ */
+
+static unsigned choose_conv(unsigned char *p, unsigned len)
+{
+ unsigned tvote, bvote;
+ unsigned c;
+
+ tvote = bvote = 0;
+
+ while (len--) {
+ c = *p++;
+ if (c < ' ')
+ if (c == '\r' && len && *p == '\n')
+ tvote += 10;
+ else if (c == '\t' || c == '\n');
+ else
+ bvote += 5;
+ else if (c < '\177')
+ tvote++;
+ else
+ bvote += 5;
+ }
+
+ if (tvote > bvote)
+ return CONV_TEXT;
+ else
+ return CONV_BINARY;
+}
+
+/*
+ * This routine implements conv=text. :s/crlf/nl/
+ */
+
+static unsigned convcpy_tofs(unsigned char *out, unsigned char *in,
+ unsigned len)
+{
+ unsigned char *start = out;
+
+ while (len--) {
+ unsigned c = *in++;
+ if (c == '\r' && (len == 0 || *in == '\n'));
+ else
+ put_fs_byte(c, out++);
+ }
+
+ return out - start;
+}
+
+/*
+ * Return the disk sector number containing a file sector.
+ */
+
+static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
+{
+ unsigned n, disk_secno;
+ struct fnode *fnode;
+ struct buffer_head *bh;
+
+ /*
+ * There is one sector run cached in the inode. See if the sector is
+ * in it.
+ */
+
+ n = file_secno - inode->i_hpfs_file_sec;
+ if (n < inode->i_hpfs_n_secs)
+ return inode->i_hpfs_disk_sec + n;
+
+ /*
+ * No, read the fnode and go find the sector.
+ */
+
+ else {
+ fnode = map_fnode(inode->i_dev, inode->i_ino, &bh);
+ if (!fnode)
+ return 0;
+ disk_secno = bplus_lookup(inode, &fnode->btree,
+ file_secno, &bh);
+ brelse(bh);
+ return disk_secno;
+ }
+}
+
+/*
+ * Search allocation tree *b for the given file sector number and return
+ * the disk sector number. Buffer *bhp has the tree in it, and can be
+ * reused for subtrees when access to *b is no longer needed.
+ * *bhp is busy on entry and exit.
+ */
+
+static secno bplus_lookup(struct inode *inode, struct bplus_header *b,
+ secno file_secno, struct buffer_head **bhp)
+{
+ int i;
+
+ /*
+ * A leaf-level tree gives a list of sector runs. Find the one
+ * containing the file sector we want, cache the map info in the
+ * inode for later, and return the corresponding disk sector.
+ */
+
+ if (!b->internal) {
+ struct bplus_leaf_node *n = b->u.external;
+ for (i = 0; i < b->n_used_nodes; i++) {
+ unsigned t = file_secno - n[i].file_secno;
+ if (t < n[i].length) {
+ inode->i_hpfs_file_sec = n[i].file_secno;
+ inode->i_hpfs_disk_sec = n[i].disk_secno;
+ inode->i_hpfs_n_secs = n[i].length;
+ return n[i].disk_secno + t;
+ }
+ }
+ }
+
+ /*
+ * A non-leaf tree gives a list of subtrees. Find the one containing
+ * the file sector we want, read it in, and recurse to search it.
+ */
+
+ else {
+ struct bplus_internal_node *n = b->u.internal;
+ for (i = 0; i < b->n_used_nodes; i++) {
+ if (file_secno < n[i].file_secno) {
+ struct anode *anode;
+ anode_secno ano = n[i].down;
+ brelse(*bhp);
+ anode = map_anode(inode->i_dev, ano, bhp);
+ if (!anode)
+ break;
+ return bplus_lookup(inode, &anode->btree,
+ file_secno, bhp);
+ }
+ }
+ }
+
+ /*
+ * If we get here there was a hole in the file. As far as I know we
+ * never do get here, but falling off the end would be indelicate. So
+ * return a pointer to a handy all-zero sector. This is not a
+ * reasonable way to handle files with holes if they really do
+ * happen.
+ */
+
+ printk("HPFS: bplus_lookup: sector not found\n");
+ return 15;
+}
+
+/* directory ops */
+
+/*
+ * lookup. Search the specified directory for the specified name, set
+ * *result to the corresponding inode.
+ *
+ * lookup uses the inode number to tell read_inode whether it is reading
+ * the inode of a directory or a file -- file ino's are odd, directory
+ * ino's are even. read_inode avoids i/o for file inodes; everything
+ * needed is up here in the directory. (And file fnodes are out in
+ * the boondocks.)
+ */
+
+static int hpfs_lookup(struct inode *dir, const char *name, int len,
+ struct inode **result)
+{
+ struct quad_buffer_head qbh;
+ struct hpfs_dirent *de;
+ struct inode *inode;
+ ino_t ino;
+
+ /* In case of madness */
+
+ *result = 0;
+ if (dir == 0)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode))
+ goto bail;
+
+ /*
+ * Read in the directory entry. "." is there under the name ^A^A .
+ * Always read the dir even for . and .. in case we need the dates.
+ */
+
+ if (name[0] == '.' && len == 1)
+ de = map_dirent(dir, dir->i_hpfs_dno, "\001\001", 2, &qbh);
+ else if (name[0] == '.' && name[1] == '.' && len == 2)
+ de = map_dirent(dir,
+ fnode_dno(dir->i_dev, dir->i_hpfs_parent_dir),
+ "\001\001", 2, &qbh);
+ else
+ de = map_dirent(dir, dir->i_hpfs_dno, name, len, &qbh);
+
+ /*
+ * This is not really a bailout, just means file not found.
+ */
+
+ if (!de)
+ goto bail;
+
+ /*
+ * Get inode number, what we're after.
+ */
+
+ if (de->directory)
+ ino = dir_ino(de->fnode);
+ else
+ ino = file_ino(de->fnode);
+
+ /*
+ * Go find or make an inode.
+ */
+
+ if (!(inode = iget(dir->i_sb, ino)))
+ goto bail1;
+
+ /*
+ * Fill in the info from the directory if this is a newly created
+ * inode.
+ */
+
+ if (!inode->i_atime) {
+ inode->i_atime = local_to_gmt(de->read_date);
+ inode->i_mtime = local_to_gmt(de->write_date);
+ inode->i_ctime = local_to_gmt(de->creation_date);
+ if (de->read_only)
+ inode->i_mode &= ~0222;
+ if (!de->directory) {
+ inode->i_size = de->file_size;
+ /*
+ * i_blocks should count the fnode and any anodes.
+ * We count 1 for the fnode and don't bother about
+ * anodes -- the disk heads are on the directory band
+ * and we want them to stay there.
+ */
+ inode->i_blocks = 1 + ((inode->i_size + 511) >> 9);
+ }
+ }
+
+ brelse4(&qbh);
+
+ /*
+ * Made it.
+ */
+
+ *result = inode;
+ iput(dir);
+ return 0;
+
+ /*
+ * Didn't.
+ */
+ bail1:
+ brelse4(&qbh);
+ bail:
+ iput(dir);
+ return -ENOENT;
+}
+
+/*
+ * Compare two counted strings ignoring case.
+ * HPFS directory order sorts letters as if they're upper case.
+ */
+
+static inline int memcasecmp(const unsigned char *s1, const unsigned char *s2,
+ unsigned n)
+{
+ int t;
+
+ if (n != 0)
+ do {
+ unsigned c1 = *s1++;
+ unsigned c2 = *s2++;
+ if (c1 - 'a' < 26)
+ c1 -= 040;
+ if (c2 - 'a' < 26)
+ c2 -= 040;
+ if ((t = c1 - c2) != 0)
+ return t;
+ } while (--n != 0);
+
+ return 0;
+}
+
+/*
+ * Search a directory for the given name, return a pointer to its dir entry
+ * and a pointer to the buffer containing it.
+ */
+
+static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
+ const unsigned char *name, unsigned len,
+ struct quad_buffer_head *qbh)
+{
+ struct dnode *dnode;
+ struct hpfs_dirent *de;
+ struct hpfs_dirent *de_end;
+ int t, l;
+
+ /*
+ * read the dnode at the root of our subtree
+ */
+ dnode = map_dnode(inode->i_dev, dno, qbh);
+ if (!dnode)
+ return 0;
+
+ /*
+ * get pointers to start and end+1 of dir entries
+ */
+ de = dnode_first_de(dnode);
+ de_end = dnode_end_de(dnode);
+
+ /*
+ * look through the entries for the name we're after
+ */
+ for ( ; de < de_end; de = de_next_de(de)) {
+
+ /*
+ * compare names
+ */
+ l = len < de->namelen ? len : de->namelen;
+ t = memcasecmp(name, de->name, l);
+
+ /*
+ * initial substring matches, compare lengths
+ */
+ if (t == 0) {
+ t = len - de->namelen;
+ /* bingo */
+ if (t == 0)
+ return de;
+ }
+
+ /*
+ * wanted name .lt. dir name => not present.
+ */
+ if (t < 0) {
+ /*
+ * if there is a subtree, search it.
+ */
+ if (de->down) {
+ dnode_secno sub_dno = de_down_pointer(de);
+ brelse4(qbh);
+ return map_dirent(inode, sub_dno,
+ name, len, qbh);
+ }
+ else
+ break;
+ }
+
+ /*
+ * de->last is set on the last name in the dnode (it's always
+ * a "\377" pseudo entry). de->length == 0 means we're about
+ * to infinite loop. This test does nothing in a well-formed
+ * dnode.
+ */
+ if (de->last || de->length == 0)
+ break;
+ }
+
+ /*
+ * name not found.
+ */
+
+ return 0;
+}
+
+/*
+ * readdir. Return exactly 1 dirent. (I tried and tried, but currently
+ * the interface with libc just does not permit more than 1. If it gets
+ * fixed, throw this out and just walk the tree and write records into
+ * the user buffer.)
+ *
+ * We keep track of our position in the dnode tree with a sort of
+ * dewey-decimal record of subtree locations. Like so:
+ *
+ * (1 (1.1 1.2 1.3) 2 3 (3.1 (3.1.1 3.1.2) 3.2 3.3 (3.3.1)) 4)
+ *
+ * Subtrees appear after their file, out of lexical order,
+ * which would be before their file. It's easier.
+ *
+ * A directory can't hold more than 56 files, so 6 bits are used for
+ * position numbers. If the tree is so deep that the position encoding
+ * doesn't fit, I'm sure something absolutely fascinating happens.
+ *
+ * The actual sequence of f_pos values is
+ * 0 => . -1 => .. 1 1.1 ... 8.9 9 => files -2 => eof
+ *
+ * The directory inode caches one position-to-dnode correspondence so
+ * we won't have to repeatedly scan the top levels of the tree.
+ */
+
+static int hpfs_readdir(struct inode *inode, struct file *filp,
+ struct dirent *dirent, int likely_story)
+{
+ struct quad_buffer_head qbh;
+ struct hpfs_dirent *de;
+ int namelen, lc;
+ ino_t ino;
+
+ if (inode == 0
+ || inode->i_sb == 0
+ || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+
+ lc = inode->i_sb->s_hpfs_lowercase;
+
+ switch ((off_t) filp->f_pos) {
+ case 0:
+ write_one_dirent(dirent, ".", 1, inode->i_ino, lc);
+ filp->f_pos = -1;
+ return ROUND_UP(NAME_OFFSET(dirent) + 2);
+
+ case -1:
+ write_one_dirent(dirent, "..", 2,
+ inode->i_hpfs_parent_dir, lc);
+ filp->f_pos = 1;
+ return ROUND_UP(NAME_OFFSET(dirent) + 3);
+
+ case -2:
+ return 0;
+
+ default:
+ de = map_pos_dirent(inode, &filp->f_pos, &qbh);
+ if (!de) {
+ filp->f_pos = -2;
+ return 0;
+ }
+
+ namelen = de->namelen;
+ if (de->directory)
+ ino = dir_ino(de->fnode);
+ else
+ ino = file_ino(de->fnode);
+ write_one_dirent(dirent, de->name, namelen, ino, lc);
+ brelse4(&qbh);
+
+ return ROUND_UP(NAME_OFFSET(dirent) + namelen + 1);
+ }
+}
+
+/*
+ * Send the given name and ino off to the user dirent struct at *dirent.
+ * Blam it to lowercase if the mount option said to.
+ *
+ * Note that Linux d_reclen is the length of the file name, and has nothing
+ * to do with the length of the dirent record.
+ */
+
+static void write_one_dirent(struct dirent *dirent, const unsigned char *name,
+ unsigned namelen, ino_t ino, int lowercase)
+{
+ unsigned n;
+
+ put_fs_long(ino, &dirent->d_ino);
+ put_fs_word(namelen, &dirent->d_reclen);
+
+ if (lowercase)
+ for (n = namelen; n != 0;) {
+ unsigned t = name[--n];
+ if (t - 'A' < 26)
+ t += 040;
+ put_fs_byte(t, &dirent->d_name[n]);
+ }
+ else
+ memcpy_tofs(dirent->d_name, name, namelen);
+
+ put_fs_byte(0, &dirent->d_name[namelen]);
+}
+
+/*
+ * Map the dir entry at subtree coordinates given by *posp, and
+ * increment *posp to point to the following dir entry.
+ */
+
+static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
+ struct quad_buffer_head *qbh)
+{
+ unsigned pos, q, r;
+ dnode_secno dno;
+ struct hpfs_dirent *de;
+
+ /*
+ * Get the position code and split off the rightmost index r
+ */
+
+ pos = *posp;
+ q = pos >> 6;
+ r = pos & 077;
+
+ /*
+ * Get the sector address of the dnode
+ * pointed to by the leading part q
+ */
+
+ dno = dir_subdno(inode, q);
+ if (!dno)
+ return 0;
+
+ /*
+ * Get the entry at index r in dnode q
+ */
+
+ de = map_nth_dirent(inode->i_dev, dno, r, qbh);
+
+ /*
+ * If none, we're out of files in this dnode. Ascend.
+ */
+
+ if (!de) {
+ if (q == 0)
+ return 0;
+ *posp = q + 1;
+ return map_pos_dirent(inode, posp, qbh);
+ }
+
+ /*
+ * If a subtree is here, descend.
+ */
+
+ if (de->down)
+ *posp = pos << 6 | 1;
+ else
+ *posp = pos + 1;
+
+ /*
+ * Don't return the ^A^A and \377 entries.
+ */
+
+ if (de->first || de->last) {
+ brelse4(qbh);
+ return map_pos_dirent(inode, posp, qbh);
+ }
+ else
+ return de;
+}
+
+/*
+ * Return the address of the dnode with subtree coordinates given by pos.
+ */
+
+static dnode_secno dir_subdno(struct inode *inode, unsigned pos)
+{
+ struct hpfs_dirent *de;
+ struct quad_buffer_head qbh;
+
+ /*
+ * 0 is the root dnode
+ */
+
+ if (pos == 0)
+ return inode->i_hpfs_dno;
+
+ /*
+ * we have one pos->dnode translation cached in the inode
+ */
+
+ else if (pos == inode->i_hpfs_dpos)
+ return inode->i_hpfs_dsubdno;
+
+ /*
+ * otherwise go look
+ */
+
+ else {
+ unsigned q = pos >> 6;
+ unsigned r = pos & 077;
+ dnode_secno dno;
+
+ /*
+ * dnode at position q
+ */
+ dno = dir_subdno(inode, q);
+ if (dno == 0)
+ return 0;
+
+ /*
+ * entry at index r
+ */
+ de = map_nth_dirent(inode->i_dev, dno, r, &qbh);
+ if (!de || !de->down)
+ return 0;
+
+ /*
+ * get the dnode down pointer
+ */
+ dno = de_down_pointer(de);
+ brelse4(&qbh);
+
+ /*
+ * cache it for next time
+ */
+ inode->i_hpfs_dpos = pos;
+ inode->i_hpfs_dsubdno = dno;
+ return dno;
+ }
+}
+
+/*
+ * Return the dir entry at index n in dnode dno, or 0 if there isn't one
+ */
+
+static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno,
+ int n,
+ struct quad_buffer_head *qbh)
+{
+ int i;
+ struct hpfs_dirent *de, *de_end;
+ struct dnode *dnode = map_dnode(dev, dno, qbh);
+
+ de = dnode_first_de(dnode);
+ de_end = dnode_end_de(dnode);
+
+ for (i = 1; de < de_end; i++, de = de_next_de(de)) {
+ if (i == n)
+ return de;
+ if (de->last || de->length == 0)
+ break;
+ }
+
+ brelse4(qbh);
+ return 0;
+}
+
+static int hpfs_dir_read(struct inode *inode, struct file *filp,
+ char *buf, int count)
+{
+ return -EISDIR;
+}
+
+/* Return the dnode pointer in a directory fnode */
+
+static dnode_secno fnode_dno(dev_t dev, ino_t ino)
+{
+ struct buffer_head *bh;
+ struct fnode *fnode;
+ dnode_secno dno;
+
+ fnode = map_fnode(dev, ino, &bh);
+ if (!fnode)
+ return 0;
+
+ dno = fnode->u.external[0].disk_secno;
+ brelse(bh);
+ return dno;
+}
+
+/* Map an fnode into a buffer and return pointers to it and to the buffer. */
+
+static struct fnode *map_fnode(dev_t dev, ino_t ino, struct buffer_head **bhp)
+{
+ struct fnode *fnode;
+
+ if (ino == 0) {
+ printk("HPFS: missing fnode\n");
+ return 0;
+ }
+
+ fnode = map_sector(dev, ino_secno(ino), bhp);
+ if (fnode)
+ if (fnode->magic != FNODE_MAGIC) {
+ printk("HPFS: map_fnode: bad fnode pointer\n");
+ brelse(*bhp);
+ return 0;
+ }
+ return fnode;
+}
+
+/* Map an anode into a buffer and return pointers to it and to the buffer. */
+
+static struct anode *map_anode(dev_t dev, unsigned secno,
+ struct buffer_head **bhp)
+{
+ struct anode *anode;
+
+ if (secno == 0) {
+ printk("HPFS: missing anode\n");
+ return 0;
+ }
+
+ anode = map_sector(dev, secno, bhp);
+ if (anode)
+ if (anode->magic != ANODE_MAGIC || anode->self != secno) {
+ printk("HPFS: map_anode: bad anode pointer\n");
+ brelse(*bhp);
+ return 0;
+ }
+ return anode;
+}
+
+/* Map a dnode into a buffer and return pointers to it and to the buffer. */
+
+static struct dnode *map_dnode(dev_t dev, unsigned secno,
+ struct quad_buffer_head *qbh)
+{
+ struct dnode *dnode;
+
+ if (secno == 0) {
+ printk("HPFS: missing dnode\n");
+ return 0;
+ }
+
+ dnode = map_4sectors(dev, secno, qbh);
+ if (dnode)
+ if (dnode->magic != DNODE_MAGIC || dnode->self != secno) {
+ printk("HPFS: map_dnode: bad dnode pointer\n");
+ brelse4(qbh);
+ return 0;
+ }
+ return dnode;
+}
+
+/* Map a sector into a buffer and return pointers to it and to the buffer. */
+
+static void *map_sector(dev_t dev, unsigned secno, struct buffer_head **bhp)
+{
+ struct buffer_head *bh;
+
+ if ((*bhp = bh = bread(dev, secno, 512)) != 0)
+ return bh->b_data;
+ else {
+ printk("HPFS: map_sector: read error\n");
+ return 0;
+ }
+}
+
+/* Map 4 sectors into a 4buffer and return pointers to it and to the buffer. */
+
+static void *map_4sectors(dev_t dev, unsigned secno,
+ struct quad_buffer_head *qbh)
+{
+ struct buffer_head *bh;
+ char *data;
+
+ if (secno & 3) {
+ printk("HPFS: map_4sectors: unaligned read\n");
+ return 0;
+ }
+
+ qbh->data = data = kmalloc(2048, GFP_KERNEL);
+ if (!data)
+ goto bail;
+
+ qbh->bh[0] = bh = breada(dev, secno, 512, 0, UINT_MAX);
+ if (!bh)
+ goto bail0;
+ memcpy(data, bh->b_data, 512);
+
+ qbh->bh[1] = bh = bread(dev, secno + 1, 512);
+ if (!bh)
+ goto bail1;
+ memcpy(data + 512, bh->b_data, 512);
+
+ qbh->bh[2] = bh = bread(dev, secno + 2, 512);
+ if (!bh)
+ goto bail2;
+ memcpy(data + 2 * 512, bh->b_data, 512);
+
+ qbh->bh[3] = bh = bread(dev, secno + 3, 512);
+ if (!bh)
+ goto bail3;
+ memcpy(data + 3 * 512, bh->b_data, 512);
+
+ return data;
+
+ bail3:
+ brelse(qbh->bh[2]);
+ bail2:
+ brelse(qbh->bh[1]);
+ bail1:
+ brelse(qbh->bh[0]);
+ bail0:
+ kfree_s(data, 2048);
+ bail:
+ printk("HPFS: map_4sectors: read error\n");
+ return 0;
+}
+
+/* Deallocate a 4-buffer block */
+
+static void brelse4(struct quad_buffer_head *qbh)
+{
+ brelse(qbh->bh[3]);
+ brelse(qbh->bh[2]);
+ brelse(qbh->bh[1]);
+ brelse(qbh->bh[0]);
+ kfree_s(qbh->data, 2048);
+}
diff --git a/fs/inode.c b/fs/inode.c
new file mode 100644
index 000000000..7278b850e
--- /dev/null
+++ b/fs/inode.c
@@ -0,0 +1,572 @@
+/*
+ * linux/fs/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+
+#include <asm/system.h>
+
+static struct inode_hash_entry {
+ struct inode * inode;
+ int updating;
+} hash_table[NR_IHASH];
+
+static struct inode * first_inode;
+static struct wait_queue * inode_wait = NULL;
+static int nr_inodes = 0, nr_free_inodes = 0;
+
+static inline int const hashfn(dev_t dev, unsigned int i)
+{
+ return (dev ^ i) % NR_IHASH;
+}
+
+static inline struct inode_hash_entry * const hash(dev_t dev, int i)
+{
+ return hash_table + hashfn(dev, i);
+}
+
+static void insert_inode_free(struct inode *inode)
+{
+ inode->i_next = first_inode;
+ inode->i_prev = first_inode->i_prev;
+ inode->i_next->i_prev = inode;
+ inode->i_prev->i_next = inode;
+ first_inode = inode;
+}
+
+static void remove_inode_free(struct inode *inode)
+{
+ if (first_inode == inode)
+ first_inode = first_inode->i_next;
+ if (inode->i_next)
+ inode->i_next->i_prev = inode->i_prev;
+ if (inode->i_prev)
+ inode->i_prev->i_next = inode->i_next;
+ inode->i_next = inode->i_prev = NULL;
+}
+
+void insert_inode_hash(struct inode *inode)
+{
+ struct inode_hash_entry *h;
+ h = hash(inode->i_dev, inode->i_ino);
+
+ inode->i_hash_next = h->inode;
+ inode->i_hash_prev = NULL;
+ if (inode->i_hash_next)
+ inode->i_hash_next->i_hash_prev = inode;
+ h->inode = inode;
+}
+
+static void remove_inode_hash(struct inode *inode)
+{
+ struct inode_hash_entry *h;
+ h = hash(inode->i_dev, inode->i_ino);
+
+ if (h->inode == inode)
+ h->inode = inode->i_hash_next;
+ if (inode->i_hash_next)
+ inode->i_hash_next->i_hash_prev = inode->i_hash_prev;
+ if (inode->i_hash_prev)
+ inode->i_hash_prev->i_hash_next = inode->i_hash_next;
+ inode->i_hash_prev = inode->i_hash_next = NULL;
+}
+
+static void put_last_free(struct inode *inode)
+{
+ remove_inode_free(inode);
+ inode->i_prev = first_inode->i_prev;
+ inode->i_prev->i_next = inode;
+ inode->i_next = first_inode;
+ inode->i_next->i_prev = inode;
+}
+
+void grow_inodes(void)
+{
+ struct inode * inode;
+ int i;
+
+ if (!(inode = (struct inode*) get_free_page(GFP_KERNEL)))
+ return;
+
+ i=PAGE_SIZE / sizeof(struct inode);
+ nr_inodes += i;
+ nr_free_inodes += i;
+
+ if (!first_inode)
+ inode->i_next = inode->i_prev = first_inode = inode++, i--;
+
+ for ( ; i ; i-- )
+ insert_inode_free(inode++);
+}
+
+unsigned long inode_init(unsigned long start, unsigned long end)
+{
+ memset(hash_table, 0, sizeof(hash_table));
+ first_inode = NULL;
+ return start;
+}
+
+static void __wait_on_inode(struct inode *);
+
+static inline void wait_on_inode(struct inode * inode)
+{
+ if (inode->i_lock)
+ __wait_on_inode(inode);
+}
+
+static inline void lock_inode(struct inode * inode)
+{
+ wait_on_inode(inode);
+ inode->i_lock = 1;
+}
+
+static inline void unlock_inode(struct inode * inode)
+{
+ inode->i_lock = 0;
+ wake_up(&inode->i_wait);
+}
+
+/*
+ * Note that we don't want to disturb any wait-queues when we discard
+ * an inode.
+ *
+ * Argghh. Got bitten by a gcc problem with inlining: no way to tell
+ * the compiler that the inline asm function 'memset' changes 'inode'.
+ * I've been searching for the bug for days, and was getting desperate.
+ * Finally looked at the assembler output... Grrr.
+ *
+ * The solution is the weird use of 'volatile'. Ho humm. Have to report
+ * it to the gcc lists, and hope we can do this more cleanly some day..
+ */
+void clear_inode(struct inode * inode)
+{
+ struct wait_queue * wait;
+
+ wait_on_inode(inode);
+ remove_inode_hash(inode);
+ remove_inode_free(inode);
+ wait = ((volatile struct inode *) inode)->i_wait;
+ if (inode->i_count)
+ nr_free_inodes++;
+ memset(inode,0,sizeof(*inode));
+ ((volatile struct inode *) inode)->i_wait = wait;
+ insert_inode_free(inode);
+}
+
+int fs_may_mount(dev_t dev)
+{
+ struct inode * inode, * next;
+ int i;
+
+ next = first_inode;
+ for (i = nr_inodes ; i > 0 ; i--) {
+ inode = next;
+ next = inode->i_next; /* clear_inode() changes the queues.. */
+ if (inode->i_dev != dev)
+ continue;
+ if (inode->i_count || inode->i_dirt || inode->i_lock)
+ return 0;
+ clear_inode(inode);
+ }
+ return 1;
+}
+
+int fs_may_umount(dev_t dev, struct inode * mount_root)
+{
+ struct inode * inode;
+ int i;
+
+ inode = first_inode;
+ for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) {
+ if (inode->i_dev != dev || !inode->i_count)
+ continue;
+ if (inode == mount_root && inode->i_count == 1)
+ continue;
+ return 0;
+ }
+ return 1;
+}
+
+int fs_may_remount_ro(dev_t dev)
+{
+ struct file * file;
+ int i;
+
+ /* Check that no files are currently opened for writing. */
+ for (file = first_file, i=0; i<nr_files; i++, file=file->f_next) {
+ if (!file->f_count || !file->f_inode ||
+ file->f_inode->i_dev != dev)
+ continue;
+ if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2))
+ return 0;
+ }
+ return 1;
+}
+
+static void write_inode(struct inode * inode)
+{
+ if (!inode->i_dirt)
+ return;
+ wait_on_inode(inode);
+ if (!inode->i_dirt)
+ return;
+ if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) {
+ inode->i_dirt = 0;
+ return;
+ }
+ inode->i_lock = 1;
+ inode->i_sb->s_op->write_inode(inode);
+ unlock_inode(inode);
+}
+
+static void read_inode(struct inode * inode)
+{
+ lock_inode(inode);
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode)
+ inode->i_sb->s_op->read_inode(inode);
+ unlock_inode(inode);
+}
+
+/* POSIX UID/GID verification for setting inode attributes */
+int inode_change_ok(struct inode *inode, struct iattr *attr)
+{
+ /* Make sure a caller can chown */
+ if ((attr->ia_valid & ATTR_UID) &&
+ (current->fsuid != inode->i_uid ||
+ attr->ia_uid != inode->i_uid) && !fsuser())
+ return -EPERM;
+
+ /* Make sure caller can chgrp */
+ if ((attr->ia_valid & ATTR_GID) &&
+ (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) &&
+ !fsuser())
+ return -EPERM;
+
+ /* Make sure a caller can chmod */
+ if (attr->ia_valid & ATTR_MODE) {
+ if ((current->fsuid != inode->i_uid) && !fsuser())
+ return -EPERM;
+ /* Also check the setgid bit! */
+ if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid :
+ inode->i_gid))
+ attr->ia_mode &= ~S_ISGID;
+ }
+
+ /* Check for setting the inode time */
+ if ((attr->ia_valid & ATTR_ATIME_SET) &&
+ ((current->fsuid != inode->i_uid) && !fsuser()))
+ return -EPERM;
+ if ((attr->ia_valid & ATTR_MTIME_SET) &&
+ ((current->fsuid != inode->i_uid) && !fsuser()))
+ return -EPERM;
+
+
+ return 0;
+}
+
+/*
+ * Set the appropriate attributes from an attribute structure into
+ * the inode structure.
+ */
+void inode_setattr(struct inode *inode, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_UID)
+ inode->i_uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ if (attr->ia_valid & ATTR_SIZE)
+ inode->i_size = attr->ia_size;
+ if (attr->ia_valid & ATTR_ATIME)
+ inode->i_atime = attr->ia_atime;
+ if (attr->ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (attr->ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
+ if (attr->ia_valid & ATTR_MODE) {
+ inode->i_mode = attr->ia_mode;
+ if (!fsuser() && !in_group_p(inode->i_gid))
+ inode->i_mode &= ~S_ISGID;
+ }
+ inode->i_dirt = 1;
+}
+
+/*
+ * notify_change is called for inode-changing operations such as
+ * chown, chmod, utime, and truncate. It is guaranteed (unlike
+ * write_inode) to be called from the context of the user requesting
+ * the change. It is not called for ordinary access-time updates.
+ * NFS uses this to get the authentication correct. -- jrs
+ */
+
+int notify_change(struct inode * inode, struct iattr *attr)
+{
+ int retval;
+
+ if (inode->i_sb && inode->i_sb->s_op &&
+ inode->i_sb->s_op->notify_change)
+ return inode->i_sb->s_op->notify_change(inode, attr);
+
+ if ((retval = inode_change_ok(inode, attr)) != 0)
+ return retval;
+
+ inode_setattr(inode, attr);
+ return 0;
+}
+
+/*
+ * bmap is needed for demand-loading and paging: if this function
+ * doesn't exist for a filesystem, then those things are impossible:
+ * executables cannot be run from the filesystem etc...
+ *
+ * This isn't as bad as it sounds: the read-routines might still work,
+ * so the filesystem would be otherwise ok (for example, you might have
+ * a DOS filesystem, which doesn't lend itself to bmap very well, but
+ * you could still transfer files to/from the filesystem)
+ */
+int bmap(struct inode * inode, int block)
+{
+ if (inode->i_op && inode->i_op->bmap)
+ return inode->i_op->bmap(inode,block);
+ return 0;
+}
+
+void invalidate_inodes(dev_t dev)
+{
+ struct inode * inode, * next;
+ int i;
+
+ next = first_inode;
+ for(i = nr_inodes ; i > 0 ; i--) {
+ inode = next;
+ next = inode->i_next; /* clear_inode() changes the queues.. */
+ if (inode->i_dev != dev)
+ continue;
+ if (inode->i_count || inode->i_dirt || inode->i_lock) {
+ printk("VFS: inode busy on removed device %d/%d\n", MAJOR(dev), MINOR(dev));
+ continue;
+ }
+ clear_inode(inode);
+ }
+}
+
+void sync_inodes(dev_t dev)
+{
+ int i;
+ struct inode * inode;
+
+ inode = first_inode;
+ for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) {
+ if (dev && inode->i_dev != dev)
+ continue;
+ wait_on_inode(inode);
+ if (inode->i_dirt)
+ write_inode(inode);
+ }
+}
+
+void iput(struct inode * inode)
+{
+ if (!inode)
+ return;
+ wait_on_inode(inode);
+ if (!inode->i_count) {
+ printk("VFS: iput: trying to free free inode\n");
+ printk("VFS: device %d/%d, inode %lu, mode=0%07o\n",
+ MAJOR(inode->i_rdev), MINOR(inode->i_rdev),
+ inode->i_ino, inode->i_mode);
+ return;
+ }
+ if (inode->i_pipe)
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+repeat:
+ if (inode->i_count>1) {
+ inode->i_count--;
+ return;
+ }
+ wake_up(&inode_wait);
+ if (inode->i_pipe) {
+ unsigned long page = (unsigned long) PIPE_BASE(*inode);
+ PIPE_BASE(*inode) = NULL;
+ free_page(page);
+ }
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) {
+ inode->i_sb->s_op->put_inode(inode);
+ if (!inode->i_nlink)
+ return;
+ }
+ if (inode->i_dirt) {
+ write_inode(inode); /* we can sleep - so do again */
+ wait_on_inode(inode);
+ goto repeat;
+ }
+ inode->i_count--;
+ nr_free_inodes++;
+ return;
+}
+
+struct inode * get_empty_inode(void)
+{
+ struct inode * inode, * best;
+ int i;
+
+ if (nr_inodes < NR_INODE && nr_free_inodes < (nr_inodes >> 2))
+ grow_inodes();
+repeat:
+ inode = first_inode;
+ best = NULL;
+ for (i = 0; i<nr_inodes; inode = inode->i_next, i++) {
+ if (!inode->i_count) {
+ if (!best)
+ best = inode;
+ if (!inode->i_dirt && !inode->i_lock) {
+ best = inode;
+ break;
+ }
+ }
+ }
+ if (!best || best->i_dirt || best->i_lock)
+ if (nr_inodes < NR_INODE) {
+ grow_inodes();
+ goto repeat;
+ }
+ inode = best;
+ if (!inode) {
+ printk("VFS: No free inodes - contact Linus\n");
+ sleep_on(&inode_wait);
+ goto repeat;
+ }
+ if (inode->i_lock) {
+ wait_on_inode(inode);
+ goto repeat;
+ }
+ if (inode->i_dirt) {
+ write_inode(inode);
+ goto repeat;
+ }
+ if (inode->i_count)
+ goto repeat;
+ clear_inode(inode);
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_version = ++event;
+ inode->i_sem.count = 1;
+ nr_free_inodes--;
+ if (nr_free_inodes < 0) {
+ printk ("VFS: get_empty_inode: bad free inode count.\n");
+ nr_free_inodes = 0;
+ }
+ return inode;
+}
+
+struct inode * get_pipe_inode(void)
+{
+ struct inode * inode;
+ extern struct inode_operations pipe_inode_operations;
+
+ if (!(inode = get_empty_inode()))
+ return NULL;
+ if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) {
+ iput(inode);
+ return NULL;
+ }
+ inode->i_op = &pipe_inode_operations;
+ inode->i_count = 2; /* sum of readers/writers */
+ PIPE_WAIT(*inode) = NULL;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ PIPE_LOCK(*inode) = 0;
+ inode->i_pipe = 1;
+ inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = PAGE_SIZE;
+ return inode;
+}
+
+struct inode * __iget(struct super_block * sb, int nr, int crossmntp)
+{
+ static struct wait_queue * update_wait = NULL;
+ struct inode_hash_entry * h;
+ struct inode * inode;
+ struct inode * empty = NULL;
+
+ if (!sb)
+ panic("VFS: iget with sb==NULL");
+ h = hash(sb->s_dev, nr);
+repeat:
+ for (inode = h->inode; inode ; inode = inode->i_hash_next)
+ if (inode->i_dev == sb->s_dev && inode->i_ino == nr)
+ goto found_it;
+ if (!empty) {
+ h->updating++;
+ empty = get_empty_inode();
+ if (!--h->updating)
+ wake_up(&update_wait);
+ if (empty)
+ goto repeat;
+ return (NULL);
+ }
+ inode = empty;
+ inode->i_sb = sb;
+ inode->i_dev = sb->s_dev;
+ inode->i_ino = nr;
+ inode->i_flags = sb->s_flags;
+ put_last_free(inode);
+ insert_inode_hash(inode);
+ read_inode(inode);
+ goto return_it;
+
+found_it:
+ if (!inode->i_count)
+ nr_free_inodes--;
+ inode->i_count++;
+ wait_on_inode(inode);
+ if (inode->i_dev != sb->s_dev || inode->i_ino != nr) {
+ printk("Whee.. inode changed from under us. Tell Linus\n");
+ iput(inode);
+ goto repeat;
+ }
+ if (crossmntp && inode->i_mount) {
+ struct inode * tmp = inode->i_mount;
+ tmp->i_count++;
+ iput(inode);
+ inode = tmp;
+ wait_on_inode(inode);
+ }
+ if (empty)
+ iput(empty);
+
+return_it:
+ while (h->updating)
+ sleep_on(&update_wait);
+ return inode;
+}
+
+/*
+ * The "new" scheduling primitives (new as of 0.97 or so) allow this to
+ * be done without disabling interrupts (other than in the actual queue
+ * updating things: only a couple of 386 instructions). This should be
+ * much better for interrupt latency.
+ */
+static void __wait_on_inode(struct inode * inode)
+{
+ struct wait_queue wait = { current, NULL };
+
+ add_wait_queue(&inode->i_wait, &wait);
+repeat:
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (inode->i_lock) {
+ schedule();
+ goto repeat;
+ }
+ remove_wait_queue(&inode->i_wait, &wait);
+ current->state = TASK_RUNNING;
+}
diff --git a/fs/ioctl.c b/fs/ioctl.c
new file mode 100644
index 000000000..22d0f4d10
--- /dev/null
+++ b/fs/ioctl.c
@@ -0,0 +1,99 @@
+/*
+ * linux/fs/ioctl.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/termios.h>
+#include <linux/fcntl.h> /* for f_flags values */
+
+static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg)
+{
+ int error;
+ int block;
+
+ switch (cmd) {
+ case FIBMAP:
+ if (filp->f_inode->i_op == NULL)
+ return -EBADF;
+ if (filp->f_inode->i_op->bmap == NULL)
+ return -EINVAL;
+ error = verify_area(VERIFY_WRITE,(void *) arg,4);
+ if (error)
+ return error;
+ block = get_fs_long((long *) arg);
+ block = filp->f_inode->i_op->bmap(filp->f_inode,block);
+ put_fs_long(block,(long *) arg);
+ return 0;
+ case FIGETBSZ:
+ if (filp->f_inode->i_sb == NULL)
+ return -EBADF;
+ error = verify_area(VERIFY_WRITE,(void *) arg,4);
+ if (error)
+ return error;
+ put_fs_long(filp->f_inode->i_sb->s_blocksize,
+ (long *) arg);
+ return 0;
+ case FIONREAD:
+ error = verify_area(VERIFY_WRITE,(void *) arg,4);
+ if (error)
+ return error;
+ put_fs_long(filp->f_inode->i_size - filp->f_pos,
+ (long *) arg);
+ return 0;
+ }
+ if (filp->f_op && filp->f_op->ioctl)
+ return filp->f_op->ioctl(filp->f_inode, filp, cmd,arg);
+ return -EINVAL;
+}
+
+
+asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ struct file * filp;
+ int on;
+
+ if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ return -EBADF;
+ switch (cmd) {
+ case FIOCLEX:
+ FD_SET(fd, &current->files->close_on_exec);
+ return 0;
+
+ case FIONCLEX:
+ FD_CLR(fd, &current->files->close_on_exec);
+ return 0;
+
+ case FIONBIO:
+ on = get_fs_long((unsigned long *) arg);
+ if (on)
+ filp->f_flags |= O_NONBLOCK;
+ else
+ filp->f_flags &= ~O_NONBLOCK;
+ return 0;
+
+ case FIOASYNC: /* O_SYNC is not yet implemented,
+ but it's here for completeness. */
+ on = get_fs_long ((unsigned long *) arg);
+ if (on)
+ filp->f_flags |= O_SYNC;
+ else
+ filp->f_flags &= ~O_SYNC;
+ return 0;
+
+ default:
+ if (filp->f_inode && S_ISREG(filp->f_inode->i_mode))
+ return file_ioctl(filp,cmd,arg);
+
+ if (filp->f_op && filp->f_op->ioctl)
+ return filp->f_op->ioctl(filp->f_inode, filp, cmd,arg);
+
+ return -EINVAL;
+ }
+}
diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile
new file mode 100644
index 000000000..a780af479
--- /dev/null
+++ b/fs/isofs/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for the linux isofs-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= namei.o inode.o file.o dir.o util.o rock.o symlink.o
+
+isofs.o: $(OBJS)
+ $(LD) -r -o isofs.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
new file mode 100644
index 000000000..b1934db04
--- /dev/null
+++ b/fs/isofs/dir.c
@@ -0,0 +1,258 @@
+/*
+ * linux/fs/isofs/dir.c
+ *
+ * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem.
+ *
+ * (C) 1991 Linus Torvalds - minix filesystem
+ *
+ * isofs directory handling functions
+ */
+
+#include <linux/errno.h>
+
+#include <asm/segment.h>
+
+#include <linux/fs.h>
+#include <linux/iso_fs.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/sched.h>
+#include <linux/locks.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int isofs_readdir(struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations isofs_dir_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read */
+ NULL, /* write - bad */
+ isofs_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* fsync */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations isofs_dir_inode_operations = {
+ &isofs_dir_operations, /* default directory file-ops */
+ NULL, /* create */
+ isofs_lookup, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ isofs_bmap, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int isofs_readdir(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
+ unsigned char bufbits = ISOFS_BUFFER_BITS(inode);
+ unsigned int block,offset,i, j;
+ char c = 0;
+ int inode_number;
+ struct buffer_head * bh;
+ void * cpnt = NULL;
+ unsigned int old_offset;
+ int dlen, rrflag;
+ int high_sierra = 0;
+ char * dpnt, *dpnt1;
+ struct iso_directory_record * de;
+
+ dpnt1 = NULL;
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+
+ offset = filp->f_pos & (bufsize - 1);
+ block = isofs_bmap(inode,filp->f_pos>>bufbits);
+
+ if(!block) return 0;
+
+ if(!(bh = breada(inode->i_dev, block, bufsize, filp->f_pos, inode->i_size)))
+ return 0;
+
+ while (filp->f_pos < inode->i_size) {
+#ifdef DEBUG
+ printk("Block, offset, f_pos: %x %x %x\n",
+ block, offset, filp->f_pos);
+#endif
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ inode_number = (block << bufbits) + (offset & (bufsize - 1));
+
+ /* If the length byte is zero, we should move on to the next
+ CDROM sector. If we are at the end of the directory, we
+ kick out of the while loop. */
+
+ if (*((unsigned char *) de) == 0) {
+ brelse(bh);
+ offset = 0;
+ filp->f_pos = ((filp->f_pos & ~(ISOFS_BLOCK_SIZE - 1))
+ + ISOFS_BLOCK_SIZE);
+ block = isofs_bmap(inode,(filp->f_pos)>>bufbits);
+ if (!block
+ || !(bh = breada(inode->i_dev, block, bufsize, filp->f_pos,
+ inode->i_size)))
+ return 0;
+ continue;
+ }
+
+ /* Make sure that the entire directory record is in the
+ current bh block.
+ If not, we malloc a buffer, and put the two halves together,
+ so that we can cleanly read the block */
+
+ old_offset = offset;
+ offset += *((unsigned char *) de);
+ filp->f_pos += *((unsigned char *) de);
+
+ if (offset > bufsize) {
+ unsigned int frag1;
+ frag1 = bufsize - old_offset;
+ cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL);
+ if (!cpnt) return 0;
+ memcpy(cpnt, bh->b_data + old_offset, frag1);
+ de = (struct iso_directory_record *) ((char *)cpnt);
+ brelse(bh);
+ offset = filp->f_pos & (bufsize - 1);
+ block = isofs_bmap(inode,(filp->f_pos)>> bufbits);
+ if (!block
+ || !(bh = breada(inode->i_dev, block, bufsize,
+ filp->f_pos, inode->i_size))) {
+ kfree(cpnt);
+ return 0;
+ };
+ memcpy((char *)cpnt+frag1, bh->b_data, offset);
+ }
+
+ /* Handle the case of the '.' directory */
+
+ rrflag = 0;
+ i = 1;
+ if (de->name_len[0] == 1 && de->name[0] == 0) {
+ put_fs_byte('.',dirent->d_name);
+ inode_number = inode->i_ino;
+ dpnt = ".";
+ }
+
+ /* Handle the case of the '..' directory */
+
+ else if (de->name_len[0] == 1 && de->name[0] == 1) {
+ put_fs_byte('.',dirent->d_name);
+ put_fs_byte('.',dirent->d_name+1);
+ i = 2;
+ dpnt = "..";
+ if((inode->i_sb->u.isofs_sb.s_firstdatazone) != inode->i_ino)
+ inode_number = inode->u.isofs_i.i_backlink;
+ else
+ inode_number = inode->i_ino;
+
+ /* This should never happen, but who knows. Try to be forgiving */
+ if(inode_number == -1) {
+ inode_number =
+ isofs_lookup_grandparent(inode,
+ find_rock_ridge_relocation(de, inode));
+ if(inode_number == -1){ /* Should never happen */
+ printk("Backlink not properly set.\n");
+ goto out;
+ };
+ }
+ }
+
+ /* Handle everything else. Do name translation if there
+ is no Rock Ridge NM field. */
+
+ else {
+ /* Do not report hidden or associated files */
+ high_sierra = inode->i_sb->u.isofs_sb.s_high_sierra;
+ if (de->flags[-high_sierra] & 5) {
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ };
+ continue;
+ }
+ dlen = de->name_len[0];
+ dpnt = de->name;
+ i = dlen;
+ rrflag = get_rock_ridge_filename(de, &dpnt, &dlen, inode);
+ if (rrflag) {
+ if (rrflag == -1) { /* This is a rock ridge reloc dir */
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ };
+ continue;
+ };
+ i = dlen;
+ }
+ else
+ if(inode->i_sb->u.isofs_sb.s_mapping == 'n') {
+ dpnt1 = dpnt;
+ dpnt = kmalloc(dlen, GFP_KERNEL);
+ if (!dpnt) goto out;
+ for (i = 0; i < dlen && i < NAME_MAX; i++) {
+ if (!(c = dpnt1[i])) break;
+ if (c >= 'A' && c <= 'Z') c |= 0x20; /* lower case */
+ if (c == '.' && i == dlen-3 && de->name[i+1] == ';' && de->name[i+2] == '1')
+ break; /* Drop trailing '.;1' (ISO9660:1988 7.5.1 requires period) */
+ if (c == ';' && i == dlen-2 && de->name[i+1] == '1')
+ break; /* Drop trailing ';1' */
+ if (c == ';') c = '.'; /* Convert remaining ';' to '.' */
+ dpnt[i] = c;
+ }
+ }
+ for(j=0; j<i; j++)
+ put_fs_byte(dpnt[j],j+dirent->d_name); /* And save it */
+ if(dpnt1) {
+ kfree(dpnt);
+ dpnt = dpnt1;
+ }
+
+ dcache_add(inode, dpnt, i, inode_number);
+ };
+#if 0
+ printk("Nchar: %d\n",i);
+#endif
+
+ if (rrflag) kfree(dpnt);
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ };
+
+ if (i) {
+ put_fs_long(inode_number, &dirent->d_ino);
+ put_fs_byte(0,i+dirent->d_name);
+ put_fs_word(i,&dirent->d_reclen);
+ brelse(bh);
+ return ROUND_UP(NAME_OFFSET(dirent) + i + 1);
+ }
+ }
+ /* We go here for any condition we cannot handle. We also drop through
+ to here at the end of the directory. */
+ out:
+ if (cpnt)
+ kfree(cpnt);
+ brelse(bh);
+ return 0;
+}
+
+
+
diff --git a/fs/isofs/file.c b/fs/isofs/file.c
new file mode 100644
index 000000000..ee0877d7b
--- /dev/null
+++ b/fs/isofs/file.c
@@ -0,0 +1,260 @@
+/*
+ * linux/fs/isofs/file.c
+ *
+ * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem.
+ *
+ * (C) 1991 Linus Torvalds - minix filesystem
+ *
+ * isofs regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/iso_fs.h>
+#include <linux/fcntl.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#include <linux/dirent.h>
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#include <linux/fs.h>
+#include <linux/iso_fs.h>
+
+static int isofs_file_read(struct inode *, struct file *, char *, int);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the isofs filesystem.
+ */
+static struct file_operations isofs_file_operations = {
+ NULL, /* lseek - default */
+ isofs_file_read, /* read */
+ NULL, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ NULL /* fsync */
+};
+
+struct inode_operations isofs_file_inode_operations = {
+ &isofs_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ isofs_bmap, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+/* This is a heuristic to determine if a file is text of binary. If it
+ * is text, then we translate all 0x0d characters to spaces. If the 0x0d
+ * character is not preceded or followed by a 0x0a, then we turn it into
+ * a 0x0a. A control-Z is also turned into a linefeed.
+ */
+
+static inline void unixify_to_fs(char * outbuf, char * buffer, int chars,
+ int mode)
+{
+ char outchar;
+
+ while(chars--){
+ outchar = *buffer;
+ if(outchar == 0x1a) outchar = 0x0a;
+ if(outchar == 0x0d){
+ if(mode == ISOFS_FILE_TEXT_M) outchar = 0x0a;
+ if(mode == ISOFS_FILE_TEXT) outchar = ' ';
+ }
+ put_fs_byte(outchar, outbuf++);
+ buffer++;
+ }
+}
+
+/*This function determines if a given file has a DOS-like text format or not*/
+
+static void isofs_determine_filetype(struct inode * inode)
+{
+ int block;
+ int result, i;
+ struct buffer_head * bh;
+ unsigned char * pnt;
+
+ block = isofs_bmap(inode,0);
+ if (block && (bh = bread(inode->i_dev,block, ISOFS_BUFFER_SIZE(inode)))) {
+ pnt = (unsigned char *) bh->b_data;
+ result = ISOFS_FILE_TEXT_M;
+ for(i=0;i<(inode->i_size < ISOFS_BUFFER_SIZE(inode) ? inode->i_size : ISOFS_BUFFER_SIZE(inode));
+ i++,pnt++){
+ if(*pnt & 0x80) {result = ISOFS_FILE_BINARY; break;};
+ if(*pnt >= 0x20 || *pnt == 0x1a) continue;
+ if(*pnt == 0x0a) {result = ISOFS_FILE_TEXT; continue;};
+ if(*pnt >= 0x9 && *pnt <= 0x0d) continue;
+ result = ISOFS_FILE_BINARY;
+ break;
+ }
+ brelse(bh);
+ inode->u.isofs_i.i_file_format = result;
+ }
+}
+
+static int isofs_file_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int read,left,chars;
+ int block, blocks, offset, total_blocks;
+ int bhrequest;
+ int ra_blocks, max_block, nextblock;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+
+ if (!inode) {
+ printk("isofs_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+ printk("isofs_file_read: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ if (inode->u.isofs_i.i_file_format == ISOFS_FILE_UNKNOWN)
+ isofs_determine_filetype(inode);
+ if (filp->f_pos > inode->i_size)
+ left = 0;
+ else
+ left = inode->i_size - filp->f_pos;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = filp->f_pos >> ISOFS_BUFFER_BITS(inode);
+ offset = (inode->u.isofs_i.i_first_extent + filp->f_pos)
+ & (ISOFS_BUFFER_SIZE(inode)-1);
+ blocks = (left + offset + ISOFS_BUFFER_SIZE(inode) - 1) / ISOFS_BUFFER_SIZE(inode);
+ bhb = bhe = buflist;
+
+ ra_blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9);
+ if(ra_blocks > blocks) blocks = ra_blocks;
+
+ /*
+ * this is for stopping read ahead at EOF. It's important for
+ * reading PhotoCD's, because they have many small data tracks instead
+ * of one big. And between two data-tracks are some unreadable sectors.
+ * A read ahead after a EOF may try to read such an unreadable sector.
+ * kraxel@cs.tu-berlin.de (Gerd Knorr)
+ */
+ total_blocks = (inode->i_size + (1 << ISOFS_BUFFER_BITS(inode)) - 1)
+ >> ISOFS_BUFFER_BITS(inode);
+ if (block + blocks > total_blocks)
+ blocks = total_blocks - block;
+
+ max_block = (inode->i_size + BLOCK_SIZE - 1)/BLOCK_SIZE;
+ nextblock = -1;
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches. */
+
+ do {
+ bhrequest = 0;
+ while (blocks) {
+ int uptodate;
+ --blocks;
+ *bhb = getblk(inode->i_dev,isofs_bmap(inode, block++), ISOFS_BUFFER_SIZE(inode));
+ uptodate = 1;
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ };
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if(uptodate) break;
+
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest)
+ ll_rw_block(READ, bhrequest, bhreq);
+
+ do{ /* Finish off all I/O that has actually completed */
+ if (*bhe) {/* test for valid buffer */
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+
+ if (left < ISOFS_BUFFER_SIZE(inode) - offset)
+ chars = left;
+ else
+ chars = ISOFS_BUFFER_SIZE(inode) - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ if (inode->u.isofs_i.i_file_format == ISOFS_FILE_TEXT ||
+ inode->u.isofs_i.i_file_format == ISOFS_FILE_TEXT_M)
+ unixify_to_fs(buf, offset+(*bhe)->b_data, chars,
+ inode->u.isofs_i.i_file_format);
+ else
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-->0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while( bhe != bhb && (*bhe == 0 || !(*bhe)->b_lock) &&
+ (left > 0));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ if (*bhe) brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+
+ filp->f_reada = 1;
+
+ if (!read)
+ return -EIO;
+ return read;
+}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
new file mode 100644
index 000000000..c1754e337
--- /dev/null
+++ b/fs/isofs/inode.c
@@ -0,0 +1,707 @@
+/*
+ * linux/fs/isofs/inode.c
+ *
+ * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem.
+ *
+ * (C) 1991 Linus Torvalds - minix filesystem
+ */
+
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/iso_fs.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/malloc.h>
+#include <linux/errno.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+#ifdef LEAK_CHECK
+static int check_malloc = 0;
+static int check_bread = 0;
+#endif
+
+void isofs_put_super(struct super_block *sb)
+{
+ lock_super(sb);
+
+#ifdef LEAK_CHECK
+ printk("Outstanding mallocs:%d, outstanding buffers: %d\n",
+ check_malloc, check_bread);
+#endif
+ sb->s_dev = 0;
+ unlock_super(sb);
+ return;
+}
+
+static struct super_operations isofs_sops = {
+ isofs_read_inode,
+ NULL, /* notify_change */
+ NULL, /* write_inode */
+ NULL, /* put_inode */
+ isofs_put_super,
+ NULL, /* write_super */
+ isofs_statfs,
+ NULL
+};
+
+struct iso9660_options{
+ char map;
+ char rock;
+ char cruft;
+ unsigned char conversion;
+ unsigned int blocksize;
+ gid_t gid;
+ uid_t uid;
+};
+
+static int parse_options(char *options, struct iso9660_options * popt)
+{
+ char *this_char,*value;
+
+ popt->map = 'n';
+ popt->rock = 'y';
+ popt->cruft = 'n';
+ popt->conversion = 'a';
+ popt->blocksize = 1024;
+ popt->gid = 0;
+ popt->uid = 0;
+ if (!options) return 1;
+ for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) {
+ if (strncmp(this_char,"norock",6) == 0) {
+ popt->rock = 'n';
+ continue;
+ };
+ if (strncmp(this_char,"cruft",5) == 0) {
+ popt->cruft = 'y';
+ continue;
+ };
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"map") && value) {
+ if (value[0] && !value[1] && strchr("on",*value))
+ popt->map = *value;
+ else if (!strcmp(value,"off")) popt->map = 'o';
+ else if (!strcmp(value,"normal")) popt->map = 'n';
+ else return 0;
+ }
+ else if (!strcmp(this_char,"conv") && value) {
+ if (value[0] && !value[1] && strchr("btma",*value))
+ popt->conversion = *value;
+ else if (!strcmp(value,"binary")) popt->conversion = 'b';
+ else if (!strcmp(value,"text")) popt->conversion = 't';
+ else if (!strcmp(value,"mtext")) popt->conversion = 'm';
+ else if (!strcmp(value,"auto")) popt->conversion = 'a';
+ else return 0;
+ }
+ else if (value &&
+ (!strcmp(this_char,"block") ||
+ !strcmp(this_char,"uid") ||
+ !strcmp(this_char,"gid"))) {
+ char * vpnt = value;
+ unsigned int ivalue;
+ ivalue = 0;
+ while(*vpnt){
+ if(*vpnt < '0' || *vpnt > '9') break;
+ ivalue = ivalue * 10 + (*vpnt - '0');
+ vpnt++;
+ };
+ if (*vpnt) return 0;
+ switch(*this_char) {
+ case 'b':
+ if (ivalue != 1024 && ivalue != 2048) return 0;
+ popt->blocksize = ivalue;
+ break;
+ case 'u':
+ popt->uid = ivalue;
+ break;
+ case 'g':
+ popt->gid = ivalue;
+ break;
+ }
+ }
+ else return 0;
+ }
+ return 1;
+}
+
+struct super_block *isofs_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ int iso_blknum;
+ unsigned int blocksize_bits;
+ int high_sierra;
+ int dev=s->s_dev;
+ struct iso_volume_descriptor *vdp;
+ struct hs_volume_descriptor *hdp;
+
+ struct iso_primary_descriptor *pri = NULL;
+ struct hs_primary_descriptor *h_pri = NULL;
+
+ struct iso_directory_record *rootp;
+
+ struct iso9660_options opt;
+
+ if (!parse_options((char *) data,&opt)) {
+ s->s_dev = 0;
+ return NULL;
+ }
+
+#if 0
+ printk("map = %c\n", opt.map);
+ printk("rock = %c\n", opt.rock);
+ printk("cruft = %c\n", opt.cruft);
+ printk("conversion = %c\n", opt.conversion);
+ printk("blocksize = %d\n", opt.blocksize);
+ printk("gid = %d\n", opt.gid);
+ printk("uid = %d\n", opt.uid);
+#endif
+
+ blocksize_bits = 0;
+ {
+ int i = opt.blocksize;
+ while (i != 1){
+ blocksize_bits++;
+ i >>=1;
+ };
+ };
+ set_blocksize(dev, opt.blocksize);
+
+ lock_super(s);
+
+ s->u.isofs_sb.s_high_sierra = high_sierra = 0; /* default is iso9660 */
+
+ for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+ if (!(bh = bread(dev, iso_blknum << (ISOFS_BLOCK_BITS-blocksize_bits), opt.blocksize))) {
+ s->s_dev=0;
+ printk("isofs_read_super: bread failed, dev 0x%x iso_blknum %d\n",
+ dev, iso_blknum);
+ unlock_super(s);
+ return NULL;
+ }
+
+ vdp = (struct iso_volume_descriptor *)bh->b_data;
+ hdp = (struct hs_volume_descriptor *)bh->b_data;
+
+
+ if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) {
+ if (isonum_711 (hdp->type) != ISO_VD_PRIMARY)
+ goto out;
+ if (isonum_711 (hdp->type) == ISO_VD_END)
+ goto out;
+
+ s->u.isofs_sb.s_high_sierra = 1;
+ high_sierra = 1;
+ opt.rock = 'n';
+ h_pri = (struct hs_primary_descriptor *)vdp;
+ break;
+ };
+
+ if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) {
+ if (isonum_711 (vdp->type) != ISO_VD_PRIMARY)
+ goto out;
+ if (isonum_711 (vdp->type) == ISO_VD_END)
+ goto out;
+
+ pri = (struct iso_primary_descriptor *)vdp;
+ break;
+ };
+
+ brelse(bh);
+ }
+ if(iso_blknum == 100) {
+ if (!silent)
+ printk("Unable to identify CD-ROM format.\n");
+ s->s_dev = 0;
+ unlock_super(s);
+ return NULL;
+ };
+
+
+ if(high_sierra){
+ rootp = (struct iso_directory_record *) h_pri->root_directory_record;
+ if (isonum_723 (h_pri->volume_set_size) != 1) {
+ printk("Multi-volume disks not (yet) supported.\n");
+ goto out;
+ };
+ s->u.isofs_sb.s_nzones = isonum_733 (h_pri->volume_space_size);
+ s->u.isofs_sb.s_log_zone_size = isonum_723 (h_pri->logical_block_size);
+ s->u.isofs_sb.s_max_size = isonum_733(h_pri->volume_space_size);
+ } else {
+ rootp = (struct iso_directory_record *) pri->root_directory_record;
+ if (isonum_723 (pri->volume_set_size) != 1) {
+ printk("Multi-volume disks not (yet) supported.\n");
+ goto out;
+ };
+ s->u.isofs_sb.s_nzones = isonum_733 (pri->volume_space_size);
+ s->u.isofs_sb.s_log_zone_size = isonum_723 (pri->logical_block_size);
+ s->u.isofs_sb.s_max_size = isonum_733(pri->volume_space_size);
+ }
+
+ s->u.isofs_sb.s_ninodes = 0; /* No way to figure this out easily */
+
+ /* RDE: convert log zone size to bit shift */
+
+ switch (s -> u.isofs_sb.s_log_zone_size)
+ { case 512: s -> u.isofs_sb.s_log_zone_size = 9; break;
+ case 1024: s -> u.isofs_sb.s_log_zone_size = 10; break;
+ case 2048: s -> u.isofs_sb.s_log_zone_size = 11; break;
+
+ default:
+ printk("Bad logical zone size %ld\n", s -> u.isofs_sb.s_log_zone_size);
+ goto out;
+ }
+
+ /* RDE: data zone now byte offset! */
+
+ s->u.isofs_sb.s_firstdatazone = (isonum_733( rootp->extent)
+ << s -> u.isofs_sb.s_log_zone_size);
+ s->s_magic = ISOFS_SUPER_MAGIC;
+
+ /* The CDROM is read-only, has no nodes (devices) on it, and since
+ all of the files appear to be owned by root, we really do not want
+ to allow suid. (suid or devices will not show up unless we have
+ Rock Ridge extensions) */
+
+ s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */;
+
+ brelse(bh);
+
+ printk("Max size:%ld Log zone size:%ld\n",
+ s->u.isofs_sb.s_max_size,
+ 1UL << s->u.isofs_sb.s_log_zone_size);
+ printk("First datazone:%ld Root inode number %d\n",
+ s->u.isofs_sb.s_firstdatazone >> s -> u.isofs_sb.s_log_zone_size,
+ isonum_733 (rootp->extent) << s -> u.isofs_sb.s_log_zone_size);
+ if(high_sierra) printk("Disc in High Sierra format.\n");
+ unlock_super(s);
+ /* set up enough so that it can read an inode */
+
+ s->s_dev = dev;
+ s->s_op = &isofs_sops;
+ s->u.isofs_sb.s_mapping = opt.map;
+ s->u.isofs_sb.s_rock = (opt.rock == 'y' ? 1 : 0);
+ s->u.isofs_sb.s_conversion = opt.conversion;
+ s->u.isofs_sb.s_cruft = opt.cruft;
+ s->u.isofs_sb.s_uid = opt.uid;
+ s->u.isofs_sb.s_gid = opt.gid;
+ s->s_blocksize = opt.blocksize;
+ s->s_blocksize_bits = blocksize_bits;
+ s->s_mounted = iget(s, isonum_733 (rootp->extent) << s -> u.isofs_sb.s_log_zone_size);
+ unlock_super(s);
+
+ if (!(s->s_mounted)) {
+ s->s_dev=0;
+ printk("get root inode failed\n");
+ return NULL;
+ }
+
+ if(!check_disk_change(s->s_dev)) return s;
+ out: /* Kick out for various error conditions */
+ brelse(bh);
+ s->s_dev = 0;
+ unlock_super(s);
+ return NULL;
+}
+
+void isofs_statfs (struct super_block *sb, struct statfs *buf)
+{
+ put_fs_long(ISOFS_SUPER_MAGIC, &buf->f_type);
+ put_fs_long(1 << ISOFS_BLOCK_BITS, &buf->f_bsize);
+ put_fs_long(sb->u.isofs_sb.s_nzones, &buf->f_blocks);
+ put_fs_long(0, &buf->f_bfree);
+ put_fs_long(0, &buf->f_bavail);
+ put_fs_long(sb->u.isofs_sb.s_ninodes, &buf->f_files);
+ put_fs_long(0, &buf->f_ffree);
+ put_fs_long(NAME_MAX, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */
+}
+
+int isofs_bmap(struct inode * inode,int block)
+{
+
+ if (block<0) {
+ printk("_isofs_bmap: block<0");
+ return 0;
+ }
+ return (inode->u.isofs_i.i_first_extent >> ISOFS_BUFFER_BITS(inode)) + block;
+}
+
+void isofs_read_inode(struct inode * inode)
+{
+ unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
+ struct buffer_head * bh;
+ struct iso_directory_record * raw_inode;
+ unsigned char *pnt = NULL;
+ void *cpnt = NULL;
+ int high_sierra;
+ int block;
+ int i;
+
+ block = inode->i_ino >> ISOFS_BUFFER_BITS(inode);
+ if (!(bh=bread(inode->i_dev,block, bufsize))) {
+ printk("unable to read i-node block");
+ goto fail;
+ }
+
+ pnt = ((unsigned char *) bh->b_data
+ + (inode->i_ino & (bufsize - 1)));
+ raw_inode = ((struct iso_directory_record *) pnt);
+ high_sierra = inode->i_sb->u.isofs_sb.s_high_sierra;
+
+ if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize){
+ int frag1, offset;
+
+ offset = (inode->i_ino & (bufsize - 1));
+ frag1 = bufsize - offset;
+ cpnt = kmalloc(*pnt,GFP_KERNEL);
+ if (cpnt == NULL) {
+ printk(KERN_INFO "NoMem ISO inode %lu\n",inode->i_ino);
+ brelse(bh);
+ goto fail;
+ }
+ memcpy(cpnt, bh->b_data + offset, frag1);
+ brelse(bh);
+ if (!(bh = bread(inode->i_dev,++block, bufsize))) {
+ kfree(cpnt);
+ printk("unable to read i-node block");
+ goto fail;
+ }
+ offset += *pnt - bufsize;
+ memcpy((char *)cpnt+frag1, bh->b_data, offset);
+ pnt = ((unsigned char *) cpnt);
+ raw_inode = ((struct iso_directory_record *) pnt);
+ }
+
+ inode->i_mode = S_IRUGO; /* Everybody gets to read the file. */
+ inode->i_nlink = 1;
+
+ if (raw_inode->flags[-high_sierra] & 2) {
+ inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR;
+ inode->i_nlink = 1; /* Set to 1. We know there are 2, but
+ the find utility tries to optimize
+ if it is 2, and it screws up. It is
+ easier to give 1 which tells find to
+ do it the hard way. */
+ } else {
+ inode->i_mode = S_IRUGO; /* Everybody gets to read the file. */
+ inode->i_nlink = 1;
+ inode->i_mode |= S_IFREG;
+/* If there are no periods in the name, then set the execute permission bit */
+ for(i=0; i< raw_inode->name_len[0]; i++)
+ if(raw_inode->name[i]=='.' || raw_inode->name[i]==';')
+ break;
+ if(i == raw_inode->name_len[0] || raw_inode->name[i] == ';')
+ inode->i_mode |= S_IXUGO; /* execute permission */
+ }
+ inode->i_uid = inode->i_sb->u.isofs_sb.s_uid;
+ inode->i_gid = inode->i_sb->u.isofs_sb.s_gid;
+ inode->i_size = isonum_733 (raw_inode->size);
+
+ /* There are defective discs out there - we do this to protect
+ ourselves. A cdrom will never contain more than 700Mb */
+ if((inode->i_size < 0 || inode->i_size > 700000000) &&
+ inode->i_sb->u.isofs_sb.s_cruft == 'n') {
+ printk("Warning: defective cdrom. Enabling \"cruft\" mount option.\n");
+ inode->i_sb->u.isofs_sb.s_cruft = 'y';
+ }
+
+/* Some dipshit decided to store some other bit of information in the high
+ byte of the file length. Catch this and holler. WARNING: this will make
+ it impossible for a file to be > 16Mb on the CDROM!!!*/
+
+ if(inode->i_sb->u.isofs_sb.s_cruft == 'y' &&
+ inode->i_size & 0xff000000){
+/* printk("Illegal format on cdrom. Pester manufacturer.\n"); */
+ inode->i_size &= 0x00ffffff;
+ }
+
+ if (raw_inode->interleave[0]) {
+ printk("Interleaved files not (yet) supported.\n");
+ inode->i_size = 0;
+ }
+
+ /* I have no idea what file_unit_size is used for, so
+ we will flag it for now */
+ if(raw_inode->file_unit_size[0] != 0){
+ printk("File unit size != 0 for ISO file (%ld).\n",inode->i_ino);
+ }
+
+ /* I have no idea what other flag bits are used for, so
+ we will flag it for now */
+#ifdef DEBUG
+ if((raw_inode->flags[-high_sierra] & ~2)!= 0){
+ printk("Unusual flag settings for ISO file (%ld %x).\n",
+ inode->i_ino, raw_inode->flags[-high_sierra]);
+ }
+#endif
+
+#ifdef DEBUG
+ printk("Get inode %d: %d %d: %d\n",inode->i_ino, block,
+ ((int)pnt) & 0x3ff, inode->i_size);
+#endif
+
+ inode->i_mtime = inode->i_atime = inode->i_ctime =
+ iso_date(raw_inode->date, high_sierra);
+
+ inode->u.isofs_i.i_first_extent = (isonum_733 (raw_inode->extent) +
+ isonum_711 (raw_inode->ext_attr_length))
+ << inode -> i_sb -> u.isofs_sb.s_log_zone_size;
+
+ inode->u.isofs_i.i_backlink = 0xffffffff; /* Will be used for previous directory */
+ switch (inode->i_sb->u.isofs_sb.s_conversion){
+ case 'a':
+ inode->u.isofs_i.i_file_format = ISOFS_FILE_UNKNOWN; /* File type */
+ break;
+ case 'b':
+ inode->u.isofs_i.i_file_format = ISOFS_FILE_BINARY; /* File type */
+ break;
+ case 't':
+ inode->u.isofs_i.i_file_format = ISOFS_FILE_TEXT; /* File type */
+ break;
+ case 'm':
+ inode->u.isofs_i.i_file_format = ISOFS_FILE_TEXT_M; /* File type */
+ break;
+ }
+
+/* Now test for possible Rock Ridge extensions which will override some of
+ these numbers in the inode structure. */
+
+ if (!high_sierra)
+ parse_rock_ridge_inode(raw_inode, inode);
+
+#ifdef DEBUG
+ printk("Inode: %x extent: %x\n",inode->i_ino, inode->u.isofs_i.i_first_extent);
+#endif
+ brelse(bh);
+
+ inode->i_op = NULL;
+
+ /* A volume number of 0 is nonsense. Disable checking if we see
+ this */
+ if (inode->i_sb->u.isofs_sb.s_cruft == 'n' &&
+ isonum_723 (raw_inode->volume_sequence_number) == 0) {
+ printk("Warning: defective cdrom. Enabling \"cruft\" mount option.\n");
+ inode->i_sb->u.isofs_sb.s_cruft = 'y';
+ }
+
+ if (inode->i_sb->u.isofs_sb.s_cruft != 'y' &&
+ isonum_723 (raw_inode->volume_sequence_number) != 1) {
+ printk("Multi volume CD somehow got mounted.\n");
+ } else {
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &isofs_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &isofs_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &isofs_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ }
+ if (cpnt) {
+ kfree (cpnt);
+ cpnt = NULL;
+ }
+ return;
+ fail:
+ /* With a data error we return this information */
+ inode->i_mtime = inode->i_atime = inode->i_ctime = 0;
+ inode->u.isofs_i.i_first_extent = 0;
+ inode->u.isofs_i.i_backlink = 0xffffffff;
+ inode->i_size = 0;
+ inode->i_nlink = 1;
+ inode->i_uid = inode->i_gid = 0;
+ inode->i_mode = S_IFREG; /*Regular file, no one gets to read*/
+ inode->i_op = NULL;
+ return;
+}
+
+/* There are times when we need to know the inode number of a parent of
+ a particular directory. When control passes through a routine that
+ has access to the parent information, it fills it into the inode structure,
+ but sometimes the inode gets flushed out of the queue, and someone
+ remembers the number. When they try to open up again, we have lost
+ the information. The '..' entry on the disc points to the data area
+ for a particular inode, so we can follow these links back up, but since
+ we do not know the inode number, we do not actually know how large the
+ directory is. The disc is almost always correct, and there is
+ enough error checking on the drive itself, but an open ended search
+ makes me a little nervous.
+
+ The bsd iso filesystem uses the extent number for an inode, and this
+ would work really nicely for us except that the read_inode function
+ would not have any clean way of finding the actual directory record
+ that goes with the file. If we had such info, then it would pay
+ to change the inode numbers and eliminate this function.
+*/
+
+int isofs_lookup_grandparent(struct inode * parent, int extent)
+{
+ unsigned long bufsize = ISOFS_BUFFER_SIZE(parent);
+ unsigned char bufbits = ISOFS_BUFFER_BITS(parent);
+ unsigned int block,offset;
+ int parent_dir, inode_number;
+ int old_offset;
+ void * cpnt = NULL;
+ int result;
+ int directory_size;
+ struct buffer_head * bh;
+ struct iso_directory_record * de;
+
+ offset = 0;
+ block = extent << (ISOFS_BLOCK_BITS - bufbits);
+ if (!(bh = bread(parent->i_dev, block, bufsize))) return -1;
+
+ while (1 == 1) {
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ if (*((unsigned char *) de) == 0)
+ {
+ brelse(bh);
+ return -1;
+ }
+
+ offset += *((unsigned char *) de);
+
+ if (offset >= bufsize)
+ {
+ printk(".. Directory not in first block"
+ " of directory.\n");
+ brelse(bh);
+ return -1;
+ }
+
+ if (de->name_len[0] == 1 && de->name[0] == 1)
+ {
+ parent_dir = find_rock_ridge_relocation(de, parent);
+ directory_size = isonum_733 (de->size);
+ brelse(bh);
+ break;
+ }
+ }
+#ifdef DEBUG
+ printk("Parent dir:%x\n",parent_dir);
+#endif
+ /* Now we know the extent where the parent dir starts on. */
+
+ result = -1;
+
+ offset = 0;
+ block = parent_dir << (ISOFS_BLOCK_BITS - bufbits);
+ if (!block || !(bh = bread(parent->i_dev,block, bufsize)))
+ return -1;
+
+ for(;;)
+ {
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ inode_number = (block << bufbits)+(offset & (bufsize - 1));
+
+ /* If the length byte is zero, we should move on to the next
+ CDROM sector. If we are at the end of the directory, we
+ kick out of the while loop. */
+
+ if (*((unsigned char *) de) == 0)
+ {
+ brelse(bh);
+ offset = 0;
+ block++;
+ directory_size -= bufsize;
+ if(directory_size < 0) return -1;
+ if((block & 1) && (ISOFS_BLOCK_BITS - bufbits))
+ return -1;
+ if (!block
+ || !(bh = bread(parent->i_dev,block, bufsize)))
+ return -1;
+ continue;
+ }
+
+ /* Make sure that the entire directory record is in the current
+ bh block. If not, we malloc a buffer, and put the two
+ halves together, so that we can cleanly read the block. */
+
+ old_offset = offset;
+ offset += *((unsigned char *) de);
+
+ if (offset >= bufsize)
+ {
+ unsigned int frag1;
+ frag1 = bufsize - old_offset;
+ cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL);
+ if (!cpnt) return -1;
+ memcpy(cpnt, bh->b_data + old_offset, frag1);
+ de = (struct iso_directory_record *) ((char *)cpnt);
+ brelse(bh);
+ offset -= bufsize;
+ directory_size -= bufsize;
+ if(directory_size < 0) return -1;
+ block++;
+ if(!(bh = bread(parent->i_dev,block,bufsize))) {
+ kfree(cpnt);
+ return -1;
+ };
+ memcpy((char *)cpnt+frag1, bh->b_data, offset);
+ }
+
+ if (find_rock_ridge_relocation(de, parent) == extent){
+ result = inode_number;
+ goto out;
+ }
+
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ }
+ }
+
+ /* We go here for any condition we cannot handle.
+ We also drop through to here at the end of the directory. */
+
+ out:
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ }
+ brelse(bh);
+#ifdef DEBUG
+ printk("Resultant Inode %d\n",result);
+#endif
+ return result;
+}
+
+#ifdef LEAK_CHECK
+#undef malloc
+#undef free_s
+#undef bread
+#undef brelse
+
+void * leak_check_malloc(unsigned int size){
+ void * tmp;
+ check_malloc++;
+ tmp = kmalloc(size, GFP_KERNEL);
+ return tmp;
+}
+
+void leak_check_free_s(void * obj, int size){
+ check_malloc--;
+ return kfree_s(obj, size);
+}
+
+struct buffer_head * leak_check_bread(int dev, int block, int size){
+ check_bread++;
+ return bread(dev, block, size);
+}
+
+void leak_check_brelse(struct buffer_head * bh){
+ check_bread--;
+ return brelse(bh);
+}
+
+#endif
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
new file mode 100644
index 000000000..1473b1b7f
--- /dev/null
+++ b/fs/isofs/namei.c
@@ -0,0 +1,268 @@
+/*
+ * linux/fs/isofs/namei.c
+ *
+ * (C) 1992 Eric Youngdale Modified for ISO9660 filesystem.
+ *
+ * (C) 1991 Linus Torvalds - minix filesystem
+ */
+
+#include <linux/sched.h>
+#include <linux/iso_fs.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <asm/segment.h>
+#include <linux/malloc.h>
+
+#include <linux/errno.h>
+
+/*
+ * ok, we cannot use strncmp, as the name is not in our data space.
+ * Thus we'll have to use isofs_match. No big problem. Match also makes
+ * some sanity tests.
+ *
+ * NOTE! unlike strncmp, isofs_match returns 1 for success, 0 for failure.
+ */
+static int isofs_match(int len,const char * name, char * compare, int dlen)
+{
+ if (!compare)
+ return 0;
+
+ /* check special "." and ".." files */
+ if (dlen == 1) {
+ /* "." */
+ if (compare[0] == 0) {
+ if (!len)
+ return 1;
+ compare = ".";
+ } else if (compare[0] == 1) {
+ compare = "..";
+ dlen = 2;
+ }
+ }
+#if 0
+ if (len <= 2) printk("Match: %d %d %s %d %d \n",len,dlen,compare,de->name[0], dlen);
+#endif
+
+ if (dlen != len)
+ return 0;
+ return !memcmp(name, compare, len);
+}
+
+/*
+ * isofs_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as an inode number). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ */
+static struct buffer_head * isofs_find_entry(struct inode * dir,
+ const char * name, int namelen, unsigned long * ino, unsigned long * ino_back)
+{
+ unsigned long bufsize = ISOFS_BUFFER_SIZE(dir);
+ unsigned char bufbits = ISOFS_BUFFER_BITS(dir);
+ unsigned int block, i, f_pos, offset, inode_number;
+ struct buffer_head * bh;
+ void * cpnt = NULL;
+ unsigned int old_offset;
+ unsigned int backlink;
+ int dlen, rrflag, match;
+ int high_sierra = 0;
+ char * dpnt;
+ struct iso_directory_record * de;
+ char c;
+
+ *ino = 0;
+ if (!dir) return NULL;
+
+ if (!(block = dir->u.isofs_i.i_first_extent)) return NULL;
+
+ f_pos = 0;
+
+ offset = f_pos & (bufsize - 1);
+ block = isofs_bmap(dir,f_pos >> bufbits);
+
+ if (!block || !(bh = bread(dir->i_dev,block,bufsize))) return NULL;
+
+ while (f_pos < dir->i_size) {
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ backlink = dir->i_ino;
+ inode_number = (block << bufbits) + (offset & (bufsize - 1));
+
+ /* If byte is zero, this is the end of file, or time to move to
+ the next sector. Usually 2048 byte boundaries. */
+
+ if (*((unsigned char *) de) == 0) {
+ brelse(bh);
+ offset = 0;
+ f_pos = ((f_pos & ~(ISOFS_BLOCK_SIZE - 1))
+ + ISOFS_BLOCK_SIZE);
+ block = isofs_bmap(dir,f_pos>>bufbits);
+ if (!block || !(bh = bread(dir->i_dev,block,bufsize)))
+ return 0;
+ continue; /* Will kick out if past end of directory */
+ }
+
+ old_offset = offset;
+ offset += *((unsigned char *) de);
+ f_pos += *((unsigned char *) de);
+
+ /* Handle case where the directory entry spans two blocks.
+ Usually 1024 byte boundaries */
+ if (offset >= bufsize) {
+ unsigned int frag1;
+ frag1 = bufsize - old_offset;
+ cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL);
+ if (!cpnt) return 0;
+ memcpy(cpnt, bh->b_data + old_offset, frag1);
+
+ de = (struct iso_directory_record *) cpnt;
+ brelse(bh);
+ offset = f_pos & (bufsize - 1);
+ block = isofs_bmap(dir,f_pos>>bufbits);
+ if (!block || !(bh = bread(dir->i_dev,block,bufsize))) {
+ kfree(cpnt);
+ return 0;
+ };
+ memcpy((char *)cpnt+frag1, bh->b_data, offset);
+ }
+
+ /* Handle the '.' case */
+
+ if (de->name[0]==0 && de->name_len[0]==1) {
+ inode_number = dir->i_ino;
+ backlink = 0;
+ }
+
+ /* Handle the '..' case */
+
+ if (de->name[0]==1 && de->name_len[0]==1) {
+#if 0
+ printk("Doing .. (%d %d)",
+ dir->i_sb->s_firstdatazone,
+ dir->i_ino);
+#endif
+ if((dir->i_sb->u.isofs_sb.s_firstdatazone) != dir->i_ino)
+ inode_number = dir->u.isofs_i.i_backlink;
+ else
+ inode_number = dir->i_ino;
+ backlink = 0;
+ }
+
+ /* Do not report hidden or associated files */
+ high_sierra = dir->i_sb->u.isofs_sb.s_high_sierra;
+ if (de->flags[-high_sierra] & 5) {
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ };
+ continue;
+ }
+
+ dlen = de->name_len[0];
+ dpnt = de->name;
+ /* Now convert the filename in the buffer to lower case */
+ rrflag = get_rock_ridge_filename(de, &dpnt, &dlen, dir);
+ if (rrflag) {
+ if (rrflag == -1) goto out; /* Relocated deep directory */
+ } else {
+ if(dir->i_sb->u.isofs_sb.s_mapping == 'n') {
+ for (i = 0; i < dlen; i++) {
+ c = dpnt[i];
+ if (c >= 'A' && c <= 'Z') c |= 0x20; /* lower case */
+ if (c == ';' && i == dlen-2 && dpnt[i+1] == '1') {
+ dlen -= 2;
+ break;
+ }
+ if (c == ';') c = '.';
+ de->name[i] = c;
+ }
+ /* This allows us to match with and without a trailing
+ period. */
+ if(dpnt[dlen-1] == '.' && namelen == dlen-1)
+ dlen--;
+ }
+ }
+ match = isofs_match(namelen,name,dpnt,dlen);
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ }
+
+ if(rrflag) kfree(dpnt);
+ if (match) {
+ if(inode_number == -1) {
+ /* Should only happen for the '..' entry */
+ inode_number =
+ isofs_lookup_grandparent(dir,
+ find_rock_ridge_relocation(de,dir));
+ if(inode_number == -1){
+ /* Should never happen */
+ printk("Backlink not properly set.\n");
+ goto out;
+ }
+ }
+ *ino = inode_number;
+ *ino_back = backlink;
+ return bh;
+ }
+ }
+ out:
+ if (cpnt)
+ kfree(cpnt);
+ brelse(bh);
+ return NULL;
+}
+
+int isofs_lookup(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ unsigned long ino, ino_back;
+ struct buffer_head * bh;
+
+#ifdef DEBUG
+ printk("lookup: %x %d\n",dir->i_ino, len);
+#endif
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+
+ ino = 0;
+
+ if (dcache_lookup(dir, name, len, &ino)) ino_back = dir->i_ino;
+
+ if (!ino) {
+ if (!(bh = isofs_find_entry(dir,name,len, &ino, &ino_back))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (ino_back == dir->i_ino)
+ dcache_add(dir, name, len, ino);
+ brelse(bh);
+ };
+
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+
+ /* We need this backlink for the ".." entry unless the name that we
+ are looking up traversed a mount point (in which case the inode
+ may not even be on an iso9660 filesystem, and writing to
+ u.isofs_i would only cause memory corruption).
+ */
+
+ if (ino_back && !(*result)->i_pipe && (*result)->i_sb == dir->i_sb) {
+ (*result)->u.isofs_i.i_backlink = ino_back;
+ }
+
+ iput(dir);
+ return 0;
+}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
new file mode 100644
index 000000000..686c1d910
--- /dev/null
+++ b/fs/isofs/rock.c
@@ -0,0 +1,523 @@
+/*
+ * linux/fs/isofs/rock.c
+ *
+ * (C) 1992, 1993 Eric Youngdale
+ *
+ * Rock Ridge Extensions to iso9660
+ */
+#include <linux/config.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/iso_fs.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+
+#include "rock.h"
+
+/* These functions are designed to read the system areas of a directory record
+ * and extract relevant information. There are different functions provided
+ * depending upon what information we need at the time. One function fills
+ * out an inode structure, a second one extracts a filename, a third one
+ * returns a symbolic link name, and a fourth one returns the extent number
+ * for the file. */
+
+#define SIG(A,B) ((A << 8) | B)
+
+
+/* This is a way of ensuring that we have something in the system
+ use fields that is compatible with Rock Ridge */
+#define CHECK_SP(FAIL) \
+ if(rr->u.SP.magic[0] != 0xbe) FAIL; \
+ if(rr->u.SP.magic[1] != 0xef) FAIL;
+
+/* We define a series of macros because each function must do exactly the
+ same thing in certain places. We use the macros to ensure that everything
+ is done correctly */
+
+#define CONTINUE_DECLS \
+ int cont_extent = 0, cont_offset = 0, cont_size = 0; \
+ void * buffer = 0
+
+#define CHECK_CE \
+ {cont_extent = isonum_733(rr->u.CE.extent); \
+ cont_offset = isonum_733(rr->u.CE.offset); \
+ cont_size = isonum_733(rr->u.CE.size);}
+
+#define SETUP_ROCK_RIDGE(DE,CHR,LEN) \
+ {LEN= sizeof(struct iso_directory_record) + DE->name_len[0]; \
+ if(LEN & 1) LEN++; \
+ CHR = ((unsigned char *) DE) + LEN; \
+ LEN = *((unsigned char *) DE) - LEN;}
+
+#define MAYBE_CONTINUE(LABEL,DEV) \
+ {if (buffer) kfree(buffer); \
+ if (cont_extent){ \
+ int block, offset, offset1; \
+ struct buffer_head * bh; \
+ buffer = kmalloc(cont_size,GFP_KERNEL); \
+ if (!buffer) goto out; \
+ block = cont_extent; \
+ offset = cont_offset; \
+ offset1 = 0; \
+ if(ISOFS_BUFFER_SIZE(DEV) == 1024) { \
+ block <<= 1; \
+ if (offset >= 1024) block++; \
+ offset &= 1023; \
+ if(offset + cont_size >= 1024) { \
+ bh = bread(DEV->i_dev, block++, ISOFS_BUFFER_SIZE(DEV)); \
+ if(!bh) {printk("Unable to read continuation Rock Ridge record\n"); \
+ kfree(buffer); \
+ buffer = NULL; } else { \
+ memcpy(buffer, bh->b_data + offset, 1024 - offset); \
+ brelse(bh); \
+ offset1 = 1024 - offset; \
+ offset = 0;} \
+ } \
+ }; \
+ if(buffer) { \
+ bh = bread(DEV->i_dev, block, ISOFS_BUFFER_SIZE(DEV)); \
+ if(bh){ \
+ memcpy(buffer + offset1, bh->b_data + offset, cont_size - offset1); \
+ brelse(bh); \
+ chr = (unsigned char *) buffer; \
+ len = cont_size; \
+ cont_extent = 0; \
+ cont_size = 0; \
+ cont_offset = 0; \
+ goto LABEL; \
+ }; \
+ } \
+ printk("Unable to read rock-ridge attributes\n"); \
+ }}
+
+/* This is the inner layer of the get filename routine, and is called
+ for each system area and continuation record related to the file */
+
+int find_rock_ridge_relocation(struct iso_directory_record * de,
+ struct inode * inode) {
+ int flag;
+ int len;
+ int retval;
+ unsigned char * chr;
+ CONTINUE_DECLS;
+ flag = 0;
+
+ /* If this is a '..' then we are looking for the parent, otherwise we
+ are looking for the child */
+
+ if (de->name[0]==1 && de->name_len[0]==1) flag = 1;
+ /* Return value if we do not find appropriate record. */
+ retval = isonum_733 (de->extent);
+
+ if (!inode->i_sb->u.isofs_sb.s_rock) return retval;
+
+ SETUP_ROCK_RIDGE(de, chr, len);
+ repeat:
+ {
+ int rrflag, sig;
+ struct rock_ridge * rr;
+
+ while (len > 1){ /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+ if (rr->len == 0) goto out; /* Something got screwed up here */
+ sig = (chr[0] << 8) + chr[1];
+ chr += rr->len;
+ len -= rr->len;
+
+ switch(sig){
+ case SIG('R','R'):
+ rrflag = rr->u.RR.flags[0];
+ if (flag && !(rrflag & RR_PL)) goto out;
+ if (!flag && !(rrflag & RR_CL)) goto out;
+ break;
+ case SIG('S','P'):
+ CHECK_SP(goto out);
+ break;
+ case SIG('C','L'):
+#ifdef DEBUG
+ printk("RR: CL\n");
+#endif
+ if (flag == 0) {
+ retval = isonum_733(rr->u.CL.location);
+ goto out;
+ };
+ break;
+ case SIG('P','L'):
+#ifdef DEBUG
+ printk("RR: PL\n");
+#endif
+ if (flag != 0) {
+ retval = isonum_733(rr->u.PL.location);
+ goto out;
+ };
+ break;
+ case SIG('C','E'):
+ CHECK_CE; /* This tells is if there is a continuation record */
+ break;
+ default:
+ break;
+ }
+ };
+ };
+ MAYBE_CONTINUE(repeat, inode);
+ return retval;
+ out:
+ if(buffer) kfree(buffer);
+ return retval;
+}
+
+int get_rock_ridge_filename(struct iso_directory_record * de,
+ char ** name, int * namlen, struct inode * inode)
+{
+ int len;
+ unsigned char * chr;
+ CONTINUE_DECLS;
+ char * retname = NULL;
+ int retnamlen = 0, truncate=0;
+
+ if (!inode->i_sb->u.isofs_sb.s_rock) return 0;
+
+ SETUP_ROCK_RIDGE(de, chr, len);
+ repeat:
+ {
+ struct rock_ridge * rr;
+ int sig;
+
+ while (len > 1){ /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+ if (rr->len == 0) goto out; /* Something got screwed up here */
+ sig = (chr[0] << 8) + chr[1];
+ chr += rr->len;
+ len -= rr->len;
+
+ switch(sig){
+ case SIG('R','R'):
+ if((rr->u.RR.flags[0] & RR_NM) == 0) goto out;
+ break;
+ case SIG('S','P'):
+ CHECK_SP(goto out);
+ break;
+ case SIG('C','E'):
+ CHECK_CE;
+ break;
+ case SIG('N','M'):
+ if (truncate) break;
+ if (rr->u.NM.flags & ~1) {
+ printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags);
+ break;
+ };
+ if (!retname){
+ retname = (char *) kmalloc (255,GFP_KERNEL);
+ /* This may be a waste, but we only
+ need this for a moment. The layers
+ that call this function should
+ deallocate the mem fairly soon
+ after control is returned */
+
+ if (!retname) goto out;
+ *retname = 0; /* Zero length string */
+ retnamlen = 0;
+ };
+ if((strlen(retname) + rr->len - 5) >= 254) {
+ truncate = 1;
+ break;
+ };
+ strncat(retname, rr->u.NM.name, rr->len - 5);
+ retnamlen += rr->len - 5;
+ break;
+ case SIG('R','E'):
+#ifdef DEBUG
+ printk("RR: RE (%x)\n", inode->i_ino);
+#endif
+ if (buffer) kfree(buffer);
+ if (retname) kfree(retname);
+ return -1;
+ default:
+ break;
+ }
+ };
+ }
+ MAYBE_CONTINUE(repeat,inode);
+ if(retname){
+ *name = retname;
+ *namlen = retnamlen;
+ return 1;
+ };
+ return 0; /* This file did not have a NM field */
+ out:
+ if(buffer) kfree(buffer);
+ if (retname) kfree(retname);
+ return 0;
+}
+
+int parse_rock_ridge_inode(struct iso_directory_record * de,
+ struct inode * inode){
+ int len;
+ unsigned char * chr;
+ CONTINUE_DECLS;
+
+ if (!inode->i_sb->u.isofs_sb.s_rock) return 0;
+
+ SETUP_ROCK_RIDGE(de, chr, len);
+ repeat:
+ {
+ int cnt, sig;
+ struct inode * reloc;
+ struct rock_ridge * rr;
+ int rootflag;
+
+ while (len > 1){ /* There may be one byte for padding somewhere */
+ rr = (struct rock_ridge *) chr;
+ if (rr->len == 0) goto out; /* Something got screwed up here */
+ sig = (chr[0] << 8) + chr[1];
+ chr += rr->len;
+ len -= rr->len;
+
+ switch(sig){
+ case SIG('R','R'):
+ if((rr->u.RR.flags[0] &
+ (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out;
+ break;
+ case SIG('S','P'):
+ CHECK_SP(goto out);
+ break;
+ case SIG('C','E'):
+ CHECK_CE;
+ break;
+ case SIG('E','R'):
+ printk("ISO9660 Extensions: ");
+ { int p;
+ for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]);
+ };
+ printk("\n");
+ break;
+ case SIG('P','X'):
+ inode->i_mode = isonum_733(rr->u.PX.mode);
+ inode->i_nlink = isonum_733(rr->u.PX.n_links);
+ inode->i_uid = isonum_733(rr->u.PX.uid);
+ inode->i_gid = isonum_733(rr->u.PX.gid);
+ break;
+ case SIG('P','N'):
+ { int high, low;
+ high = isonum_733(rr->u.PN.dev_high);
+ low = isonum_733(rr->u.PN.dev_low);
+ inode->i_rdev = ((high << 8) | (low & 0xff)) & 0xffff;
+ };
+ break;
+ case SIG('T','F'):
+ /* Some RRIP writers incorrectly place ctime in the TF_CREATE field.
+ Try and handle this correctly for either case. */
+ cnt = 0; /* Rock ridge never appears on a High Sierra disk */
+ if(rr->u.TF.flags & TF_CREATE)
+ inode->i_ctime = iso_date(rr->u.TF.times[cnt++].time, 0);
+ if(rr->u.TF.flags & TF_MODIFY)
+ inode->i_mtime = iso_date(rr->u.TF.times[cnt++].time, 0);
+ if(rr->u.TF.flags & TF_ACCESS)
+ inode->i_atime = iso_date(rr->u.TF.times[cnt++].time, 0);
+ if(rr->u.TF.flags & TF_ATTRIBUTES)
+ inode->i_ctime = iso_date(rr->u.TF.times[cnt++].time, 0);
+ break;
+ case SIG('S','L'):
+ {int slen;
+ struct SL_component * slp;
+ slen = rr->len - 5;
+ slp = &rr->u.SL.link;
+ inode->i_size = 0;
+ while (slen > 1){
+ rootflag = 0;
+ switch(slp->flags &~1){
+ case 0:
+ inode->i_size += slp->len;
+ break;
+ case 2:
+ inode->i_size += 1;
+ break;
+ case 4:
+ inode->i_size += 2;
+ break;
+ case 8:
+ rootflag = 1;
+ inode->i_size += 1;
+ break;
+ default:
+ printk("Symlink component flag not implemented\n");
+ };
+ slen -= slp->len + 2;
+ slp = (struct SL_component *) (((char *) slp) + slp->len + 2);
+
+ if(slen < 2) break;
+ if(!rootflag) inode->i_size += 1;
+ };
+ };
+ break;
+ case SIG('R','E'):
+ printk("Attempt to read inode for relocated directory\n");
+ goto out;
+ case SIG('C','L'):
+#ifdef DEBUG
+ printk("RR CL (%x)\n",inode->i_ino);
+#endif
+ inode->u.isofs_i.i_first_extent = isonum_733(rr->u.CL.location) <<
+ inode -> i_sb -> u.isofs_sb.s_log_zone_size;
+ reloc = iget(inode->i_sb, inode->u.isofs_i.i_first_extent);
+ inode->i_mode = reloc->i_mode;
+ inode->i_nlink = reloc->i_nlink;
+ inode->i_uid = reloc->i_uid;
+ inode->i_gid = reloc->i_gid;
+ inode->i_rdev = reloc->i_rdev;
+ inode->i_size = reloc->i_size;
+ inode->i_atime = reloc->i_atime;
+ inode->i_ctime = reloc->i_ctime;
+ inode->i_mtime = reloc->i_mtime;
+ iput(reloc);
+ break;
+ default:
+ break;
+ }
+ };
+ }
+ MAYBE_CONTINUE(repeat,inode);
+ return 0;
+ out:
+ if(buffer) kfree(buffer);
+ return 0;
+}
+
+
+/* Returns the name of the file that this inode is symlinked to. This is
+ in malloc'd memory, so it needs to be freed, once we are through with it */
+
+char * get_rock_ridge_symlink(struct inode * inode)
+{
+ unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
+ unsigned char bufbits = ISOFS_BUFFER_BITS(inode);
+ struct buffer_head * bh;
+ unsigned char * pnt;
+ void * cpnt = NULL;
+ char * rpnt;
+ struct iso_directory_record * raw_inode;
+ CONTINUE_DECLS;
+ int block;
+ int sig;
+ int rootflag;
+ int len;
+ unsigned char * chr;
+ struct rock_ridge * rr;
+
+ if (!inode->i_sb->u.isofs_sb.s_rock)
+ panic("Cannot have symlink with high sierra variant of iso filesystem\n");
+
+ rpnt = 0;
+
+ block = inode->i_ino >> bufbits;
+ if (!(bh=bread(inode->i_dev,block, bufsize))) {
+ printk("unable to read i-node block");
+ return NULL;
+ };
+
+ pnt = ((unsigned char *) bh->b_data) + (inode->i_ino & (bufsize - 1));
+
+ raw_inode = ((struct iso_directory_record *) pnt);
+
+ if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize){
+ int frag1, offset;
+
+ offset = (inode->i_ino & (bufsize - 1));
+ frag1 = bufsize - offset;
+ cpnt = kmalloc(*pnt,GFP_KERNEL);
+ if(!cpnt) return NULL;
+ memcpy(cpnt, bh->b_data + offset, frag1);
+ brelse(bh);
+ if (!(bh = bread(inode->i_dev,++block, bufsize))) {
+ kfree(cpnt);
+ printk("unable to read i-node block");
+ return NULL;
+ };
+ offset += *pnt - bufsize;
+ memcpy((char *)cpnt+frag1, bh->b_data, offset);
+ pnt = ((unsigned char *) cpnt);
+ raw_inode = ((struct iso_directory_record *) pnt);
+ };
+
+ /* Now test for possible Rock Ridge extensions which will override some of
+ these numbers in the inode structure. */
+
+ SETUP_ROCK_RIDGE(raw_inode, chr, len);
+
+ repeat:
+ while (len > 1){ /* There may be one byte for padding somewhere */
+ if (rpnt) break;
+ rr = (struct rock_ridge *) chr;
+ if (rr->len == 0) goto out; /* Something got screwed up here */
+ sig = (chr[0] << 8) + chr[1];
+ chr += rr->len;
+ len -= rr->len;
+
+ switch(sig){
+ case SIG('R','R'):
+ if((rr->u.RR.flags[0] & RR_SL) == 0) goto out;
+ break;
+ case SIG('S','P'):
+ CHECK_SP(goto out);
+ break;
+ case SIG('S','L'):
+ {int slen;
+ struct SL_component * slp;
+ slen = rr->len - 5;
+ slp = &rr->u.SL.link;
+ while (slen > 1){
+ if (!rpnt){
+ rpnt = (char *) kmalloc (inode->i_size +1, GFP_KERNEL);
+ if (!rpnt) goto out;
+ *rpnt = 0;
+ };
+ rootflag = 0;
+ switch(slp->flags &~1){
+ case 0:
+ strncat(rpnt,slp->text, slp->len);
+ break;
+ case 2:
+ strcat(rpnt,".");
+ break;
+ case 4:
+ strcat(rpnt,"..");
+ break;
+ case 8:
+ rootflag = 1;
+ strcat(rpnt,"/");
+ break;
+ default:
+ printk("Symlink component flag not implemented (%d)\n",slen);
+ };
+ slen -= slp->len + 2;
+ slp = (struct SL_component *) (((char *) slp) + slp->len + 2);
+
+ if(slen < 2) break;
+ if(!rootflag) strcat(rpnt,"/");
+ };
+ break;
+ default:
+ break;
+ }
+ };
+ };
+ MAYBE_CONTINUE(repeat,inode);
+ brelse(bh);
+
+ if (cpnt) {
+ kfree(cpnt);
+ cpnt = NULL;
+ };
+
+ return rpnt;
+ out:
+ if(buffer) kfree(buffer);
+ return 0;
+}
+
+
+
+
+
+
diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
new file mode 100644
index 000000000..36057b8fa
--- /dev/null
+++ b/fs/isofs/rock.h
@@ -0,0 +1,111 @@
+/* These structs are used by the system-use-sharing protocol, in which the
+ Rock Ridge extensions are embedded. It is quite possible that other
+ extensions are present on the disk, and this is fine as long as they
+ all use SUSP */
+
+struct SU_SP{
+ unsigned char magic[2];
+ unsigned char skip;
+};
+
+struct SU_CE{
+ char extent[8];
+ char offset[8];
+ char size[8];
+};
+
+struct SU_ER{
+ unsigned char len_id;
+ unsigned char len_des;
+ unsigned char len_src;
+ unsigned char ext_ver;
+ char data[0];
+};
+
+struct RR_RR{
+ char flags[1];
+};
+
+struct RR_PX{
+ char mode[8];
+ char n_links[8];
+ char uid[8];
+ char gid[8];
+};
+
+struct RR_PN{
+ char dev_high[8];
+ char dev_low[8];
+};
+
+
+struct SL_component{
+ unsigned char flags;
+ unsigned char len;
+ char text[0];
+};
+
+struct RR_SL{
+ unsigned char flags;
+ struct SL_component link;
+};
+
+struct RR_NM{
+ unsigned char flags;
+ char name[0];
+};
+
+struct RR_CL{
+ char location[8];
+};
+
+struct RR_PL{
+ char location[8];
+};
+
+struct stamp{
+ char time[7];
+};
+
+struct RR_TF{
+ char flags;
+ struct stamp times[0]; /* Variable number of these beasts */
+};
+
+/* These are the bits and their meanings for flags in the TF structure. */
+#define TF_CREATE 1
+#define TF_MODIFY 2
+#define TF_ACCESS 4
+#define TF_ATTRIBUTES 8
+#define TF_BACKUP 16
+#define TF_EXPIRATION 32
+#define TF_EFFECTIVE 64
+#define TF_LONG_FORM 128
+
+struct rock_ridge{
+ char signature[2];
+ unsigned char len;
+ unsigned char version;
+ union{
+ struct SU_SP SP;
+ struct SU_CE CE;
+ struct SU_ER ER;
+ struct RR_RR RR;
+ struct RR_PX PX;
+ struct RR_PN PN;
+ struct RR_SL SL;
+ struct RR_NM NM;
+ struct RR_CL CL;
+ struct RR_PL PL;
+ struct RR_TF TF;
+ } u;
+};
+
+#define RR_PX 1 /* POSIX attributes */
+#define RR_PN 2 /* POSIX devices */
+#define RR_SL 4 /* Symbolic link */
+#define RR_NM 8 /* Alternate Name */
+#define RR_CL 16 /* Child link */
+#define RR_PL 32 /* Parent link */
+#define RR_RE 64 /* Relocation directory */
+#define RR_TF 128 /* Timestamps */
diff --git a/fs/isofs/symlink.c b/fs/isofs/symlink.c
new file mode 100644
index 000000000..fa4a45ba6
--- /dev/null
+++ b/fs/isofs/symlink.c
@@ -0,0 +1,106 @@
+/*
+ * linux/fs/isofs/symlink.c
+ *
+ * (C) 1992 Eric Youngdale Modified for ISO9660 filesystem.
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * isofs symlink handling code. This is only used with the Rock Ridge
+ * extensions to iso9660
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/iso_fs.h>
+#include <linux/stat.h>
+#include <linux/malloc.h>
+
+static int isofs_readlink(struct inode *, char *, int);
+static int isofs_follow_link(struct inode *, struct inode *, int, int, struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations isofs_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ isofs_readlink, /* readlink */
+ isofs_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int isofs_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ char * pnt;
+
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ *res_inode = NULL;
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if ((current->link_count > 5) ||
+ !(pnt = get_rock_ridge_symlink(inode))) {
+ iput(dir);
+ iput(inode);
+ *res_inode = NULL;
+ return -ELOOP;
+ }
+ iput(inode);
+ current->link_count++;
+ error = open_namei(pnt,flag,mode,res_inode,dir);
+ current->link_count--;
+ kfree(pnt);
+ return error;
+}
+
+static int isofs_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ char * pnt;
+ int i;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+
+ if (buflen > 1023)
+ buflen = 1023;
+ pnt = get_rock_ridge_symlink(inode);
+
+ iput(inode);
+ if (!pnt)
+ return 0;
+ i = 0;
+
+ while (i<buflen && (c = pnt[i])) {
+ i++;
+ put_fs_byte(c,buffer++);
+ }
+ kfree(pnt);
+ return i;
+}
diff --git a/fs/isofs/util.c b/fs/isofs/util.c
new file mode 100644
index 000000000..dbeee868d
--- /dev/null
+++ b/fs/isofs/util.c
@@ -0,0 +1,131 @@
+/*
+ * linux/fs/isofs/util.c
+ *
+ * The special functions in the file are numbered according to the section
+ * of the iso 9660 standard in which they are described. isonum_733 will
+ * convert numbers according to section 7.3.3, etc.
+ *
+ * isofs special functions. This file was lifted in its entirety from
+ * the bsd386 iso9660 filesystem, by Pace Williamson.
+ */
+
+
+int
+isonum_711 (char * p)
+{
+ return (*p & 0xff);
+}
+
+int
+isonum_712 (char * p)
+{
+ int val;
+
+ val = *p;
+ if (val & 0x80)
+ val |= 0xffffff00;
+ return (val);
+}
+
+int
+isonum_721 (char * p)
+{
+ return ((p[0] & 0xff) | ((p[1] & 0xff) << 8));
+}
+
+int
+isonum_722 (char * p)
+{
+ return (((p[0] & 0xff) << 8) | (p[1] & 0xff));
+}
+
+int
+isonum_723 (char * p)
+{
+#if 0
+ if (p[0] != p[3] || p[1] != p[2]) {
+ fprintf (stderr, "invalid format 7.2.3 number\n");
+ exit (1);
+ }
+#endif
+ return (isonum_721 (p));
+}
+
+int
+isonum_731 (char * p)
+{
+ return ((p[0] & 0xff)
+ | ((p[1] & 0xff) << 8)
+ | ((p[2] & 0xff) << 16)
+ | ((p[3] & 0xff) << 24));
+}
+
+int
+isonum_732 (char * p)
+{
+ return (((p[0] & 0xff) << 24)
+ | ((p[1] & 0xff) << 16)
+ | ((p[2] & 0xff) << 8)
+ | (p[3] & 0xff));
+}
+
+int
+isonum_733 (char * p)
+{
+#if 0
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (p[i] != p[7-i]) {
+ fprintf (stderr, "bad format 7.3.3 number\n");
+ exit (1);
+ }
+ }
+#endif
+ return (isonum_731 (p));
+}
+
+/* We have to convert from a MM/DD/YY format to the unix ctime format. We have to
+ take into account leap years and all of that good stuff. Unfortunately, the kernel
+ does not have the information on hand to take into account daylight savings time,
+ so there will be cases (roughly half the time) where the dates are off by one hour. */
+int iso_date(char * p, int flag)
+{
+ int year, month, day, hour ,minute, second, tz;
+ int crtime, days, i;
+
+ year = p[0] - 70;
+ month = p[1];
+ day = p[2];
+ hour = p[3];
+ minute = p[4];
+ second = p[5];
+ if (flag == 0) tz = p[6]; /* High sierra has no time zone */
+ else tz = 0;
+
+ if (year < 0) {
+ crtime = 0;
+ } else {
+ int monlen[12] = {31,28,31,30,31,30,31,31,30,31,30,31};
+ days = year * 365;
+ if (year > 2)
+ days += (year+1) / 4;
+ for (i = 1; i < month; i++)
+ days += monlen[i-1];
+ if (((year+2) % 4) == 0 && month > 2)
+ days++;
+ days += day - 1;
+ crtime = ((((days * 24) + hour) * 60 + minute) * 60)
+ + second;
+
+ /* sign extend */
+ if (tz & 0x80)
+ tz |= (-1 << 8);
+
+ /* timezone offset is unreliable on some disks */
+ if (-48 <= tz && tz <= 52)
+ crtime += tz * 15 * 60;
+ }
+ return crtime;
+}
+
diff --git a/fs/locks.c b/fs/locks.c
new file mode 100644
index 000000000..d1de73ad0
--- /dev/null
+++ b/fs/locks.c
@@ -0,0 +1,506 @@
+/*
+ * linux/fs/locks.c
+ *
+ * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
+ * Doug Evans, 92Aug07, dje@sspiff.uucp.
+ *
+ * Deadlock Detection added by Kelly Carmichael, kelly@[142.24.8.65]
+ * September 17, 1994.
+ *
+ * FIXME: one thing isn't handled yet:
+ * - mandatory locks (requires lots of changes elsewhere)
+ *
+ * Edited by Kai Petzke, wpp@marie.physik.tu-berlin.de
+ *
+ * Converted file_lock_table to a linked list from an array, which eliminates
+ * the limits on how many active file locks are open - Chad Page
+ * (pageone@netcom.com), November 27, 1994
+ */
+
+#define DEADLOCK_DETECTION
+
+#include <asm/segment.h>
+
+#include <linux/malloc.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+#define OFFSET_MAX ((off_t)0x7fffffff) /* FIXME: move elsewhere? */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l,
+ unsigned int fd);
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
+static int overlap(struct file_lock *fl1, struct file_lock *fl2);
+static int lock_it(struct file *filp, struct file_lock *caller, unsigned int fd);
+static struct file_lock *alloc_lock(struct file_lock **pos, struct file_lock *fl,
+ unsigned int fd);
+static void free_lock(struct file_lock **fl);
+#ifdef DEADLOCK_DETECTION
+int locks_deadlocked(int my_pid,int blocked_pid);
+#endif
+
+static struct file_lock *file_lock_table = NULL;
+static struct file_lock *file_lock_free_list = NULL;
+
+int fcntl_getlk(unsigned int fd, struct flock *l)
+{
+ int error;
+ struct flock flock;
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+
+ if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ return -EBADF;
+ error = verify_area(VERIFY_WRITE,l, sizeof(*l));
+ if (error)
+ return error;
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (flock.l_type == F_UNLCK)
+ return -EINVAL;
+ if (!copy_flock(filp, &file_lock, &flock, fd))
+ return -EINVAL;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (conflict(&file_lock, fl)) {
+ flock.l_pid = fl->fl_owner->pid;
+ flock.l_start = fl->fl_start;
+ flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ fl->fl_end - fl->fl_start + 1;
+ flock.l_whence = fl->fl_whence;
+ flock.l_type = fl->fl_type;
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+ }
+ }
+
+ flock.l_type = F_UNLCK; /* no conflict found */
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+}
+
+/*
+ * This function implements both F_SETLK and F_SETLKW.
+ */
+
+int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
+{
+ int error;
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+ struct flock flock;
+
+ /*
+ * Get arguments and validate them ...
+ */
+
+ if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ return -EBADF;
+ error = verify_area(VERIFY_READ, l, sizeof(*l));
+ if (error)
+ return error;
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (!copy_flock(filp, &file_lock, &flock, fd))
+ return -EINVAL;
+ switch (file_lock.fl_type) {
+ case F_RDLCK :
+ if (!(filp->f_mode & 1))
+ return -EBADF;
+ break;
+ case F_WRLCK :
+ if (!(filp->f_mode & 2))
+ return -EBADF;
+ break;
+ case F_SHLCK :
+ if (!(filp->f_mode & 3))
+ return -EBADF;
+ file_lock.fl_type = F_RDLCK;
+ break;
+ case F_EXLCK :
+ if (!(filp->f_mode & 3))
+ return -EBADF;
+ file_lock.fl_type = F_WRLCK;
+ break;
+ case F_UNLCK :
+ break;
+ }
+
+ /*
+ * Scan for a conflicting lock ...
+ */
+
+ if (file_lock.fl_type != F_UNLCK) {
+repeat:
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (!conflict(&file_lock, fl))
+ continue;
+ /*
+ * File is locked by another process. If this is
+ * F_SETLKW wait for the lock to be released.
+ */
+ if (cmd == F_SETLKW) {
+ if (current->signal & ~current->blocked)
+ return -ERESTARTSYS;
+#ifdef DEADLOCK_DETECTION
+ if (locks_deadlocked(file_lock.fl_owner->pid,fl->fl_owner->pid)) return -EDEADLOCK;
+#endif
+ interruptible_sleep_on(&fl->fl_wait);
+ if (current->signal & ~current->blocked)
+ return -ERESTARTSYS;
+ goto repeat;
+ }
+ return -EAGAIN;
+ }
+ }
+
+ /*
+ * Lock doesn't conflict with any other lock ...
+ */
+
+ return lock_it(filp, &file_lock, fd);
+}
+
+#ifdef DEADLOCK_DETECTION
+/*
+ * This function tests for deadlock condition before putting a process to sleep
+ * this detection scheme is recursive... we may need some test as to make it
+ * exit if the function gets stuck due to bad lock data.
+ */
+
+int locks_deadlocked(int my_pid,int blocked_pid)
+{
+ int ret_val;
+ struct wait_queue *dlock_wait;
+ struct file_lock *fl;
+ for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) {
+ if (fl->fl_owner == NULL) continue; /* not a used lock */
+ if (fl->fl_owner->pid != my_pid) continue;
+ if (fl->fl_wait == NULL) continue; /* no queues */
+ dlock_wait = fl->fl_wait;
+ do {
+ if (dlock_wait->task != NULL) {
+ if (dlock_wait->task->pid == blocked_pid) return -EDEADLOCK;
+ ret_val = locks_deadlocked(dlock_wait->task->pid,blocked_pid);
+ if (ret_val) return -EDEADLOCK;
+ }
+ dlock_wait = dlock_wait->next;
+ } while (dlock_wait != NULL);
+ }
+ return 0;
+}
+#endif
+
+/*
+ * This function is called when the file is closed.
+ */
+
+void fcntl_remove_locks(struct task_struct *task, struct file *filp,
+ unsigned int fd)
+{
+ struct file_lock *fl;
+ struct file_lock **before;
+
+ /* Find first lock owned by caller ... */
+
+ before = &filp->f_inode->i_flock;
+ while ((fl = *before) && (task != fl->fl_owner || fd != fl->fl_fd))
+ before = &fl->fl_next;
+
+ /* The list is sorted by owner and fd ... */
+
+ while ((fl = *before) && task == fl->fl_owner && fd == fl->fl_fd)
+ free_lock(before);
+}
+
+/*
+ * Verify a "struct flock" and copy it to a "struct file_lock" ...
+ * Result is a boolean indicating success.
+ */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l,
+ unsigned int fd)
+{
+ off_t start;
+
+ if (!filp->f_inode) /* just in case */
+ return 0;
+ if (l->l_type != F_UNLCK && l->l_type != F_RDLCK && l->l_type != F_WRLCK
+ && l->l_type != F_SHLCK && l->l_type != F_EXLCK)
+ return 0;
+ switch (l->l_whence) {
+ case 0 /*SEEK_SET*/ : start = 0; break;
+ case 1 /*SEEK_CUR*/ : start = filp->f_pos; break;
+ case 2 /*SEEK_END*/ : start = filp->f_inode->i_size; break;
+ default : return 0;
+ }
+ if ((start += l->l_start) < 0 || l->l_len < 0)
+ return 0;
+ fl->fl_type = l->l_type;
+ fl->fl_start = start; /* we record the absolute position */
+ fl->fl_whence = 0; /* FIXME: do we record {l_start} as passed? */
+ if (l->l_len == 0 || (fl->fl_end = start + l->l_len - 1) < 0)
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = current;
+ fl->fl_fd = fd;
+ fl->fl_wait = NULL; /* just for cleanliness */
+ return 1;
+}
+
+/*
+ * Determine if lock {sys_fl} blocks lock {caller_fl} ...
+ */
+
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+ if ( caller_fl->fl_owner == sys_fl->fl_owner
+ && caller_fl->fl_fd == sys_fl->fl_fd)
+ return 0;
+ if (!overlap(caller_fl, sys_fl))
+ return 0;
+ switch (caller_fl->fl_type) {
+ case F_RDLCK :
+ return sys_fl->fl_type != F_RDLCK;
+ case F_WRLCK :
+ return 1; /* overlapping region not owned by caller */
+ }
+ return 0; /* shouldn't get here, but just in case */
+}
+
+static int overlap(struct file_lock *fl1, struct file_lock *fl2)
+{
+ return fl1->fl_end >= fl2->fl_start && fl2->fl_end >= fl1->fl_start;
+}
+
+/*
+ * Add a lock to a file ...
+ * Result is 0 for success or -ENOLCK.
+ *
+ * We merge adjacent locks whenever possible.
+ *
+ * WARNING: We assume the lock doesn't conflict with any other lock.
+ */
+
+/*
+ * Rewritten by Kai Petzke:
+ * We sort the lock list first by owner, then by the starting address.
+ *
+ * To make freeing a lock much faster, we keep a pointer to the lock before the
+ * actual one. But the real gain of the new coding was, that lock_it() and
+ * unlock_it() became one function.
+ *
+ * To all purists: Yes, I use a few goto's. Just pass on to the next function.
+ */
+
+static int lock_it(struct file *filp, struct file_lock *caller, unsigned int fd)
+{
+ struct file_lock *fl;
+ struct file_lock *left = 0;
+ struct file_lock *right = 0;
+ struct file_lock **before;
+ int added = 0;
+
+ /*
+ * Find the first old lock with the same owner as the new lock.
+ */
+
+ before = &filp->f_inode->i_flock;
+ while ((fl = *before) &&
+ (caller->fl_owner != fl->fl_owner ||
+ caller->fl_fd != fl->fl_fd))
+ before = &fl->fl_next;
+
+ /*
+ * Look up all locks of this owner.
+ */
+
+ while ( (fl = *before)
+ && caller->fl_owner == fl->fl_owner
+ && caller->fl_fd == fl->fl_fd) {
+ /*
+ * Detect adjacent or overlapping regions (if same lock type)
+ */
+ if (caller->fl_type == fl->fl_type) {
+ if (fl->fl_end < caller->fl_start - 1)
+ goto next_lock;
+ /*
+ * If the next lock in the list has entirely bigger
+ * addresses than the new one, insert the lock here.
+ */
+ if (fl->fl_start > caller->fl_end + 1)
+ break;
+
+ /*
+ * If we come here, the new and old lock are of the
+ * same type and adjacent or overlapping. Make one
+ * lock yielding from the lower start address of both
+ * locks to the higher end address.
+ */
+ if (fl->fl_start > caller->fl_start)
+ fl->fl_start = caller->fl_start;
+ else
+ caller->fl_start = fl->fl_start;
+ if (fl->fl_end < caller->fl_end)
+ fl->fl_end = caller->fl_end;
+ else
+ caller->fl_end = fl->fl_end;
+ if (added) {
+ free_lock(before);
+ continue;
+ }
+ caller = fl;
+ added = 1;
+ goto next_lock;
+ }
+ /*
+ * Processing for different lock types is a bit more complex.
+ */
+ if (fl->fl_end < caller->fl_start)
+ goto next_lock;
+ if (fl->fl_start > caller->fl_end)
+ break;
+ if (caller->fl_type == F_UNLCK)
+ added = 1;
+ if (fl->fl_start < caller->fl_start)
+ left = fl;
+ /*
+ * If the next lock in the list has a higher end address than
+ * the new one, insert the new one here.
+ */
+ if (fl->fl_end > caller->fl_end) {
+ right = fl;
+ break;
+ }
+ if (fl->fl_start >= caller->fl_start) {
+ /*
+ * The new lock completely replaces an old one (This may
+ * happen several times).
+ */
+ if (added) {
+ free_lock(before);
+ continue;
+ }
+ /*
+ * Replace the old lock with the new one. Wake up
+ * anybody waiting for the old one, as the change in
+ * lock type might satisfy his needs.
+ */
+ wake_up(&fl->fl_wait);
+ fl->fl_start = caller->fl_start;
+ fl->fl_end = caller->fl_end;
+ fl->fl_type = caller->fl_type;
+ caller = fl;
+ added = 1;
+ }
+ /*
+ * Go on to next lock.
+ */
+next_lock:
+ before = &(*before)->fl_next;
+ }
+
+ if (! added) {
+ if (caller->fl_type == F_UNLCK) {
+/*
+ * XXX - under iBCS-2, attempting to unlock a not-locked region is
+ * not considered an error condition, although I'm not sure if this
+ * should be a default behavior (it makes porting to native Linux easy)
+ * or a personality option.
+ *
+ * Does Xopen/1170 say anything about this?
+ * - drew@Colorado.EDU
+ */
+#if 0
+ return -EINVAL;
+#else
+ return 0;
+#endif
+ }
+ if (! (caller = alloc_lock(before, caller, fd)))
+ return -ENOLCK;
+ }
+ if (right) {
+ if (left == right) {
+ /*
+ * The new lock breaks the old one in two pieces, so we
+ * have to allocate one more lock (in this case, even
+ * F_UNLCK may fail!).
+ */
+ if (! (left = alloc_lock(before, right, fd))) {
+ if (! added)
+ free_lock(before);
+ return -ENOLCK;
+ }
+ }
+ right->fl_start = caller->fl_end + 1;
+ }
+ if (left)
+ left->fl_end = caller->fl_start - 1;
+ return 0;
+}
+
+/*
+ * File_lock() inserts a lock at the position pos of the linked list.
+ *
+ * Modified to create a new node if no free entries available - Chad Page
+ *
+ */
+
+static struct file_lock *alloc_lock(struct file_lock **pos,
+ struct file_lock *fl,
+ unsigned int fd)
+{
+ struct file_lock *tmp;
+
+ tmp = file_lock_free_list;
+
+ if (tmp == NULL)
+ {
+ /* Okay, let's make a new file_lock structure... */
+ tmp = (struct file_lock *)kmalloc(sizeof(struct file_lock), GFP_KERNEL);
+ tmp -> fl_owner = NULL;
+ tmp -> fl_next = file_lock_free_list;
+ tmp -> fl_nextlink = file_lock_table;
+ file_lock_table = tmp;
+ }
+ else
+ {
+ /* remove from free list */
+ file_lock_free_list = tmp->fl_next;
+ }
+
+ if (tmp->fl_owner != NULL)
+ panic("alloc_lock: broken free list\n");
+
+ *tmp = *fl;
+
+ tmp->fl_next = *pos; /* insert into file's list */
+ *pos = tmp;
+
+ tmp->fl_owner = current; /* FIXME: needed? */
+ tmp->fl_fd = fd; /* FIXME: needed? */
+ tmp->fl_wait = NULL;
+ return tmp;
+}
+
+/*
+ * Add a lock to the free list ...
+ */
+
+static void free_lock(struct file_lock **fl_p)
+{
+ struct file_lock *fl;
+
+ fl = *fl_p;
+ if (fl->fl_owner == NULL) /* sanity check */
+ panic("free_lock: broken lock list\n");
+
+ *fl_p = (*fl_p)->fl_next;
+
+ fl->fl_next = file_lock_free_list; /* add to free list */
+ file_lock_free_list = fl;
+ fl->fl_owner = NULL; /* for sanity checks */
+
+ wake_up(&fl->fl_wait);
+}
diff --git a/fs/minix/Makefile b/fs/minix/Makefile
new file mode 100644
index 000000000..20e7f3dae
--- /dev/null
+++ b/fs/minix/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux minix-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= bitmap.o truncate.o namei.o inode.o \
+ file.o dir.o symlink.o fsync.o
+
+minix.o: $(OBJS)
+ $(LD) -r -o minix.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
new file mode 100644
index 000000000..d42b86eea
--- /dev/null
+++ b/fs/minix/bitmap.c
@@ -0,0 +1,208 @@
+/*
+ * linux/fs/minix/bitmap.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/* bitmap.c contains the code that handles the inode and block bitmaps */
+
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/stat.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/bitops.h>
+
+static int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 };
+
+static unsigned long count_used(struct buffer_head *map[], unsigned numblocks,
+ unsigned numbits)
+{
+ unsigned i, j, end, sum = 0;
+ struct buffer_head *bh;
+
+ for (i=0; (i<numblocks) && numbits; i++) {
+ if (!(bh=map[i]))
+ return(0);
+ if (numbits >= (8*BLOCK_SIZE)) {
+ end = BLOCK_SIZE;
+ numbits -= 8*BLOCK_SIZE;
+ } else {
+ int tmp;
+ end = numbits >> 3;
+ numbits &= 0x7;
+ tmp = bh->b_data[end] & ((1<<numbits)-1);
+ sum += nibblemap[tmp&0xf] + nibblemap[(tmp>>4)&0xf];
+ numbits = 0;
+ }
+ for (j=0; j<end; j++)
+ sum += nibblemap[bh->b_data[j] & 0xf]
+ + nibblemap[(bh->b_data[j]>>4)&0xf];
+ }
+ return(sum);
+}
+
+void minix_free_block(struct super_block * sb, int block)
+{
+ struct buffer_head * bh;
+ unsigned int bit,zone;
+
+ if (!sb) {
+ printk("trying to free block on nonexistent device\n");
+ return;
+ }
+ if (block < sb->u.minix_sb.s_firstdatazone ||
+ block >= sb->u.minix_sb.s_nzones) {
+ printk("trying to free block not in datazone\n");
+ return;
+ }
+ bh = get_hash_table(sb->s_dev,block,BLOCK_SIZE);
+ if (bh)
+ bh->b_dirt=0;
+ brelse(bh);
+ zone = block - sb->u.minix_sb.s_firstdatazone + 1;
+ bit = zone & 8191;
+ zone >>= 13;
+ bh = sb->u.minix_sb.s_zmap[zone];
+ if (!bh) {
+ printk("minix_free_block: nonexistent bitmap buffer\n");
+ return;
+ }
+ if (!clear_bit(bit,bh->b_data))
+ printk("free_block (%04x:%d): bit already cleared\n",sb->s_dev,block);
+ mark_buffer_dirty(bh, 1);
+ return;
+}
+
+int minix_new_block(struct super_block * sb)
+{
+ struct buffer_head * bh;
+ int i,j;
+
+ if (!sb) {
+ printk("trying to get new block from nonexistent device\n");
+ return 0;
+ }
+repeat:
+ j = 8192;
+ for (i=0 ; i<8 ; i++)
+ if ((bh=sb->u.minix_sb.s_zmap[i]) != NULL)
+ if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192)
+ break;
+ if (i>=8 || !bh || j>=8192)
+ return 0;
+ if (set_bit(j,bh->b_data)) {
+ printk("new_block: bit already set");
+ goto repeat;
+ }
+ mark_buffer_dirty(bh, 1);
+ j += i*8192 + sb->u.minix_sb.s_firstdatazone-1;
+ if (j < sb->u.minix_sb.s_firstdatazone ||
+ j >= sb->u.minix_sb.s_nzones)
+ return 0;
+ if (!(bh = getblk(sb->s_dev,j,BLOCK_SIZE))) {
+ printk("new_block: cannot get block");
+ return 0;
+ }
+ memset(bh->b_data, 0, BLOCK_SIZE);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return j;
+}
+
+unsigned long minix_count_free_blocks(struct super_block *sb)
+{
+ return (sb->u.minix_sb.s_nzones - count_used(sb->u.minix_sb.s_zmap,sb->u.minix_sb.s_zmap_blocks,sb->u.minix_sb.s_nzones))
+ << sb->u.minix_sb.s_log_zone_size;
+}
+
+void minix_free_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ unsigned long ino;
+
+ if (!inode)
+ return;
+ if (!inode->i_dev) {
+ printk("free_inode: inode has no device\n");
+ return;
+ }
+ if (inode->i_count != 1) {
+ printk("free_inode: inode has count=%d\n",inode->i_count);
+ return;
+ }
+ if (inode->i_nlink) {
+ printk("free_inode: inode has nlink=%d\n",inode->i_nlink);
+ return;
+ }
+ if (!inode->i_sb) {
+ printk("free_inode: inode on nonexistent device\n");
+ return;
+ }
+ if (inode->i_ino < 1 || inode->i_ino >= inode->i_sb->u.minix_sb.s_ninodes) {
+ printk("free_inode: inode 0 or nonexistent inode\n");
+ return;
+ }
+ ino = inode->i_ino;
+ if (!(bh=inode->i_sb->u.minix_sb.s_imap[ino >> 13])) {
+ printk("free_inode: nonexistent imap in superblock\n");
+ return;
+ }
+ clear_inode(inode);
+ if (!clear_bit(ino & 8191, bh->b_data))
+ printk("free_inode: bit %lu already cleared.\n",ino);
+ mark_buffer_dirty(bh, 1);
+}
+
+struct inode * minix_new_inode(const struct inode * dir)
+{
+ struct super_block * sb;
+ struct inode * inode;
+ struct buffer_head * bh;
+ int i,j;
+
+ if (!dir || !(inode = get_empty_inode()))
+ return NULL;
+ sb = dir->i_sb;
+ inode->i_sb = sb;
+ inode->i_flags = inode->i_sb->s_flags;
+ j = 8192;
+ for (i=0 ; i<8 ; i++)
+ if ((bh = inode->i_sb->u.minix_sb.s_imap[i]) != NULL)
+ if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192)
+ break;
+ if (!bh || j >= 8192) {
+ iput(inode);
+ return NULL;
+ }
+ if (set_bit(j,bh->b_data)) { /* shouldn't happen */
+ printk("new_inode: bit already set");
+ iput(inode);
+ return NULL;
+ }
+ mark_buffer_dirty(bh, 1);
+ j += i*8192;
+ if (!j || j >= inode->i_sb->u.minix_sb.s_ninodes) {
+ iput(inode);
+ return NULL;
+ }
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_dev = sb->s_dev;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_dirt = 1;
+ inode->i_ino = j;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
+ insert_inode_hash(inode);
+ return inode;
+}
+
+unsigned long minix_count_free_inodes(struct super_block *sb)
+{
+ return sb->u.minix_sb.s_ninodes - count_used(sb->u.minix_sb.s_imap,sb->u.minix_sb.s_imap_blocks,sb->u.minix_sb.s_ninodes);
+}
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
new file mode 100644
index 000000000..6ece61971
--- /dev/null
+++ b/fs/minix/dir.c
@@ -0,0 +1,108 @@
+/*
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * minix directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/stat.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int minix_dir_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ return -EISDIR;
+}
+
+static int minix_readdir(struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations minix_dir_operations = {
+ NULL, /* lseek - default */
+ minix_dir_read, /* read */
+ NULL, /* write - bad */
+ minix_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync /* default fsync */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations minix_dir_inode_operations = {
+ &minix_dir_operations, /* default directory file-ops */
+ minix_create, /* create */
+ minix_lookup, /* lookup */
+ minix_link, /* link */
+ minix_unlink, /* unlink */
+ minix_symlink, /* symlink */
+ minix_mkdir, /* mkdir */
+ minix_rmdir, /* rmdir */
+ minix_mknod, /* mknod */
+ minix_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ minix_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int minix_readdir(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ unsigned int offset,i,ret;
+ int version;
+ char c;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+ struct minix_sb_info * info;
+
+ if (!inode || !inode->i_sb || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ info = &inode->i_sb->u.minix_sb;
+ if (filp->f_pos & (info->s_dirsize - 1))
+ return -EBADF;
+ ret = 0;
+ while (!ret && filp->f_pos < inode->i_size) {
+ offset = filp->f_pos & 1023;
+ bh = minix_bread(inode,(filp->f_pos)>>BLOCK_SIZE_BITS,0);
+ if (!bh) {
+ filp->f_pos += 1024-offset;
+ continue;
+ }
+ while (!ret && offset < 1024 && filp->f_pos < inode->i_size) {
+ de = (struct minix_dir_entry *) (offset + bh->b_data);
+ offset += info->s_dirsize;
+ filp->f_pos += info->s_dirsize;
+retry:
+ if (de->inode) {
+ version = inode->i_version;
+ for (i = 0; i < info->s_namelen; i++)
+ if ((c = de->name[i]) != 0)
+ put_fs_byte(c,i+dirent->d_name);
+ else
+ break;
+ if (i) {
+ put_fs_long(de->inode,&dirent->d_ino);
+ put_fs_byte(0,i+dirent->d_name);
+ put_fs_word(i,&dirent->d_reclen);
+ if (version != inode->i_version)
+ goto retry;
+ ret = ROUND_UP(NAME_OFFSET(dirent)+i+1);
+ }
+ }
+ }
+ brelse(bh);
+ }
+ return ret;
+}
diff --git a/fs/minix/file.c b/fs/minix/file.c
new file mode 100644
index 000000000..670fb5e75
--- /dev/null
+++ b/fs/minix/file.c
@@ -0,0 +1,246 @@
+/*
+ * linux/fs/minix/file.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * minix regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+
+static int minix_file_read(struct inode *, struct file *, char *, int);
+static int minix_file_write(struct inode *, struct file *, char *, int);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the minix filesystem.
+ */
+static struct file_operations minix_file_operations = {
+ NULL, /* lseek - default */
+ minix_file_read, /* read */
+ minix_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ minix_sync_file /* fsync */
+};
+
+struct inode_operations minix_file_inode_operations = {
+ &minix_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ minix_bmap, /* bmap */
+ minix_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int minix_file_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int read,left,chars;
+ int block, blocks, offset;
+ int bhrequest, uptodate;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+ unsigned int size;
+
+ if (!inode) {
+ printk("minix_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("minix_file_read: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ offset = filp->f_pos;
+ size = inode->i_size;
+ if (offset > size)
+ left = 0;
+ else
+ left = size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = offset >> BLOCK_SIZE_BITS;
+ offset &= BLOCK_SIZE-1;
+ size = (size + (BLOCK_SIZE-1)) >> BLOCK_SIZE_BITS;
+ blocks = (left + offset + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ if(blocks < read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9))
+ blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9);
+ if (block + blocks > size)
+ blocks = size - block;
+ }
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches. */
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (blocks) {
+ --blocks;
+ *bhb = minix_getblk(inode, block++, 0);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if (uptodate)
+ break;
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest)
+ ll_rw_block(READ, bhrequest, bhreq);
+
+ do { /* Finish off all I/O that has actually completed */
+ if (*bhe) {
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < BLOCK_SIZE - offset)
+ chars = left;
+ else
+ chars = BLOCK_SIZE - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-->0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ if (!IS_RDONLY(inode))
+ inode->i_atime = CURRENT_TIME;
+ return read;
+}
+
+static int minix_file_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ off_t pos;
+ int written,c;
+ struct buffer_head * bh;
+ char * p;
+
+ if (!inode) {
+ printk("minix_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("minix_file_write: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ down(&inode->i_sem);
+ if (filp->f_flags & O_APPEND)
+ pos = inode->i_size;
+ else
+ pos = filp->f_pos;
+ written = 0;
+ while (written < count) {
+ bh = minix_getblk(inode,pos/BLOCK_SIZE,1);
+ if (!bh) {
+ if (!written)
+ written = -ENOSPC;
+ break;
+ }
+ c = BLOCK_SIZE - (pos % BLOCK_SIZE);
+ if (c > count-written)
+ c = count-written;
+ if (c != BLOCK_SIZE && !bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ if (!written)
+ written = -EIO;
+ break;
+ }
+ }
+ p = (pos % BLOCK_SIZE) + bh->b_data;
+ pos += c;
+ written += c;
+ memcpy_fromfs(p,buf,c);
+ buf += c;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ brelse(bh);
+ }
+ if (pos > inode->i_size)
+ inode->i_size = pos;
+ up(&inode->i_sem);
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ filp->f_pos = pos;
+ inode->i_dirt = 1;
+ return written;
+}
diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c
new file mode 100644
index 000000000..737a5bfcd
--- /dev/null
+++ b/fs/minix/fsync.c
@@ -0,0 +1,159 @@
+/*
+ * linux/fs/minix/fsync.c
+ *
+ * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ * from
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * minix fsync primitive
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/locks.h>
+
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+
+
+#define blocksize BLOCK_SIZE
+#define addr_per_block 512
+
+static int sync_block (struct inode * inode, unsigned short * block, int wait)
+{
+ struct buffer_head * bh;
+ unsigned short tmp;
+
+ if (!*block)
+ return 0;
+ tmp = *block;
+ bh = get_hash_table(inode->i_dev, *block, blocksize);
+ if (!bh)
+ return 0;
+ if (*block != tmp) {
+ brelse (bh);
+ return 1;
+ }
+ if (wait && bh->b_req && !bh->b_uptodate) {
+ brelse(bh);
+ return -1;
+ }
+ if (wait || !bh->b_uptodate || !bh->b_dirt)
+ {
+ brelse(bh);
+ return 0;
+ }
+ ll_rw_block(WRITE, 1, &bh);
+ bh->b_count--;
+ return 0;
+}
+
+static int sync_iblock (struct inode * inode, unsigned short * iblock,
+ struct buffer_head **bh, int wait)
+{
+ int rc;
+ unsigned short tmp;
+
+ *bh = NULL;
+ tmp = *iblock;
+ if (!tmp)
+ return 0;
+ rc = sync_block (inode, iblock, wait);
+ if (rc)
+ return rc;
+ *bh = bread(inode->i_dev, tmp, blocksize);
+ if (tmp != *iblock) {
+ brelse(*bh);
+ *bh = NULL;
+ return 1;
+ }
+ if (!*bh)
+ return -1;
+ return 0;
+}
+
+
+static int sync_direct(struct inode *inode, int wait)
+{
+ int i;
+ int rc, err = 0;
+
+ for (i = 0; i < 7; i++) {
+ rc = sync_block (inode, inode->u.minix_i.i_data + i, wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ return err;
+}
+
+static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait)
+{
+ int i;
+ struct buffer_head * ind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, iblock, &ind_bh, wait);
+ if (rc || !ind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_block (inode,
+ ((unsigned short *) ind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(ind_bh);
+ return err;
+}
+
+static int sync_dindirect(struct inode *inode, unsigned short *diblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * dind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, diblock, &dind_bh, wait);
+ if (rc || !dind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_indirect (inode,
+ ((unsigned short *) dind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(dind_bh);
+ return err;
+}
+
+int minix_sync_file(struct inode * inode, struct file * file)
+{
+ int wait, err = 0;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+
+ for (wait=0; wait<=1; wait++)
+ {
+ err |= sync_direct(inode, wait);
+ err |= sync_indirect(inode, inode->u.minix_i.i_data+7, wait);
+ err |= sync_dindirect(inode, inode->u.minix_i.i_data+8, wait);
+ }
+ err |= minix_sync_inode (inode);
+ return (err < 0) ? -EIO : 0;
+}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
new file mode 100644
index 000000000..2aeb538ee
--- /dev/null
+++ b/fs/minix/inode.c
@@ -0,0 +1,513 @@
+/*
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+
+void minix_put_inode(struct inode *inode)
+{
+ if (inode->i_nlink)
+ return;
+ inode->i_size = 0;
+ minix_truncate(inode);
+ minix_free_inode(inode);
+}
+
+static void minix_commit_super (struct super_block * sb,
+ struct minix_super_block * ms)
+{
+ mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1);
+ sb->s_dirt = 0;
+}
+
+void minix_write_super (struct super_block * sb)
+{
+ struct minix_super_block * ms;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ ms = sb->u.minix_sb.s_ms;
+
+ if (ms->s_state & MINIX_VALID_FS)
+ ms->s_state &= ~MINIX_VALID_FS;
+ minix_commit_super (sb, ms);
+ }
+ sb->s_dirt = 0;
+}
+
+
+void minix_put_super(struct super_block *sb)
+{
+ int i;
+
+ lock_super(sb);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sb->u.minix_sb.s_ms->s_state = sb->u.minix_sb.s_mount_state;
+ mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1);
+ }
+ sb->s_dev = 0;
+ for(i = 0 ; i < MINIX_I_MAP_SLOTS ; i++)
+ brelse(sb->u.minix_sb.s_imap[i]);
+ for(i = 0 ; i < MINIX_Z_MAP_SLOTS ; i++)
+ brelse(sb->u.minix_sb.s_zmap[i]);
+ brelse (sb->u.minix_sb.s_sbh);
+ unlock_super(sb);
+ return;
+}
+
+static struct super_operations minix_sops = {
+ minix_read_inode,
+ NULL,
+ minix_write_inode,
+ minix_put_inode,
+ minix_put_super,
+ minix_write_super,
+ minix_statfs,
+ minix_remount
+};
+
+int minix_remount (struct super_block * sb, int * flags, char * data)
+{
+ struct minix_super_block * ms;
+
+ ms = sb->u.minix_sb.s_ms;
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ return 0;
+ if (*flags & MS_RDONLY) {
+ if (ms->s_state & MINIX_VALID_FS ||
+ !(sb->u.minix_sb.s_mount_state & MINIX_VALID_FS))
+ return 0;
+ /* Mounting a rw partition read-only. */
+ ms->s_state = sb->u.minix_sb.s_mount_state;
+ mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+ minix_commit_super (sb, ms);
+ }
+ else {
+ /* Mount a partition which is read-only, read-write. */
+ sb->u.minix_sb.s_mount_state = ms->s_state;
+ ms->s_state &= ~MINIX_VALID_FS;
+ mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1);
+ sb->s_dirt = 1;
+
+ if (!(sb->u.minix_sb.s_mount_state & MINIX_VALID_FS))
+ printk ("MINIX-fs warning: remounting unchecked fs, "
+ "running fsck is recommended.\n");
+ else if ((sb->u.minix_sb.s_mount_state & MINIX_ERROR_FS))
+ printk ("MINIX-fs warning: remounting fs with errors, "
+ "running fsck is recommended.\n");
+ }
+ return 0;
+}
+
+
+struct super_block *minix_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ struct minix_super_block *ms;
+ int i,dev=s->s_dev,block;
+
+ if (32 != sizeof (struct minix_inode))
+ panic("bad i-node size");
+ lock_super(s);
+ set_blocksize(dev, BLOCK_SIZE);
+ if (!(bh = bread(dev,1,BLOCK_SIZE))) {
+ s->s_dev=0;
+ unlock_super(s);
+ printk("MINIX-fs: unable to read superblock\n");
+ return NULL;
+ }
+ ms = (struct minix_super_block *) bh->b_data;
+ s->u.minix_sb.s_ms = ms;
+ s->u.minix_sb.s_sbh = bh;
+ s->u.minix_sb.s_mount_state = ms->s_state;
+ s->s_blocksize = 1024;
+ s->s_blocksize_bits = 10;
+ s->u.minix_sb.s_ninodes = ms->s_ninodes;
+ s->u.minix_sb.s_nzones = ms->s_nzones;
+ s->u.minix_sb.s_imap_blocks = ms->s_imap_blocks;
+ s->u.minix_sb.s_zmap_blocks = ms->s_zmap_blocks;
+ s->u.minix_sb.s_firstdatazone = ms->s_firstdatazone;
+ s->u.minix_sb.s_log_zone_size = ms->s_log_zone_size;
+ s->u.minix_sb.s_max_size = ms->s_max_size;
+ s->s_magic = ms->s_magic;
+ if (s->s_magic == MINIX_SUPER_MAGIC) {
+ s->u.minix_sb.s_dirsize = 16;
+ s->u.minix_sb.s_namelen = 14;
+ } else if (s->s_magic == MINIX_SUPER_MAGIC2) {
+ s->u.minix_sb.s_dirsize = 32;
+ s->u.minix_sb.s_namelen = 30;
+ } else {
+ s->s_dev = 0;
+ unlock_super(s);
+ brelse(bh);
+ if (!silent)
+ printk("VFS: Can't find a minix filesystem on dev 0x%04x.\n", dev);
+ return NULL;
+ }
+ for (i=0;i < MINIX_I_MAP_SLOTS;i++)
+ s->u.minix_sb.s_imap[i] = NULL;
+ for (i=0;i < MINIX_Z_MAP_SLOTS;i++)
+ s->u.minix_sb.s_zmap[i] = NULL;
+ block=2;
+ for (i=0 ; i < s->u.minix_sb.s_imap_blocks ; i++)
+ if ((s->u.minix_sb.s_imap[i]=bread(dev,block,BLOCK_SIZE)) != NULL)
+ block++;
+ else
+ break;
+ for (i=0 ; i < s->u.minix_sb.s_zmap_blocks ; i++)
+ if ((s->u.minix_sb.s_zmap[i]=bread(dev,block,BLOCK_SIZE)) != NULL)
+ block++;
+ else
+ break;
+ if (block != 2+s->u.minix_sb.s_imap_blocks+s->u.minix_sb.s_zmap_blocks) {
+ for(i=0;i<MINIX_I_MAP_SLOTS;i++)
+ brelse(s->u.minix_sb.s_imap[i]);
+ for(i=0;i<MINIX_Z_MAP_SLOTS;i++)
+ brelse(s->u.minix_sb.s_zmap[i]);
+ s->s_dev=0;
+ unlock_super(s);
+ brelse(bh);
+ printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
+ return NULL;
+ }
+ set_bit(0,s->u.minix_sb.s_imap[0]->b_data);
+ set_bit(0,s->u.minix_sb.s_zmap[0]->b_data);
+ unlock_super(s);
+ /* set up enough so that it can read an inode */
+ s->s_dev = dev;
+ s->s_op = &minix_sops;
+ s->s_mounted = iget(s,MINIX_ROOT_INO);
+ if (!s->s_mounted) {
+ s->s_dev = 0;
+ brelse(bh);
+ printk("MINIX-fs: get root inode failed\n");
+ return NULL;
+ }
+ if (!(s->s_flags & MS_RDONLY)) {
+ ms->s_state &= ~MINIX_VALID_FS;
+ mark_buffer_dirty(bh, 1);
+ s->s_dirt = 1;
+ }
+ if (!(s->u.minix_sb.s_mount_state & MINIX_VALID_FS))
+ printk ("MINIX-fs: mounting unchecked file system, "
+ "running fsck is recommended.\n");
+ else if (s->u.minix_sb.s_mount_state & MINIX_ERROR_FS)
+ printk ("MINIX-fs: mounting file system with errors, "
+ "running fsck is recommended.\n");
+ return s;
+}
+
+void minix_statfs(struct super_block *sb, struct statfs *buf)
+{
+ long tmp;
+
+ put_fs_long(MINIX_SUPER_MAGIC, &buf->f_type);
+ put_fs_long(1024, &buf->f_bsize);
+ tmp = sb->u.minix_sb.s_nzones - sb->u.minix_sb.s_firstdatazone;
+ tmp <<= sb->u.minix_sb.s_log_zone_size;
+ put_fs_long(tmp, &buf->f_blocks);
+ tmp = minix_count_free_blocks(sb);
+ put_fs_long(tmp, &buf->f_bfree);
+ put_fs_long(tmp, &buf->f_bavail);
+ put_fs_long(sb->u.minix_sb.s_ninodes, &buf->f_files);
+ put_fs_long(minix_count_free_inodes(sb), &buf->f_ffree);
+ put_fs_long(sb->u.minix_sb.s_namelen, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */
+}
+
+#define inode_bmap(inode,nr) ((inode)->u.minix_i.i_data[(nr)])
+
+static int block_bmap(struct buffer_head * bh, int nr)
+{
+ int tmp;
+
+ if (!bh)
+ return 0;
+ tmp = ((unsigned short *) bh->b_data)[nr];
+ brelse(bh);
+ return tmp;
+}
+
+int minix_bmap(struct inode * inode,int block)
+{
+ int i;
+
+ if (block<0) {
+ printk("minix_bmap: block<0");
+ return 0;
+ }
+ if (block >= 7+512+512*512) {
+ printk("minix_bmap: block>big");
+ return 0;
+ }
+ if (block < 7)
+ return inode_bmap(inode,block);
+ block -= 7;
+ if (block < 512) {
+ i = inode_bmap(inode,7);
+ if (!i)
+ return 0;
+ return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block);
+ }
+ block -= 512;
+ i = inode_bmap(inode,8);
+ if (!i)
+ return 0;
+ i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>9);
+ if (!i)
+ return 0;
+ return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 511);
+}
+
+static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create)
+{
+ int tmp;
+ unsigned short *p;
+ struct buffer_head * result;
+
+ p = inode->u.minix_i.i_data + nr;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp == *p)
+ return result;
+ brelse(result);
+ goto repeat;
+ }
+ if (!create)
+ return NULL;
+ tmp = minix_new_block(inode->i_sb);
+ if (!tmp)
+ return NULL;
+ result = getblk(inode->i_dev, tmp, BLOCK_SIZE);
+ if (*p) {
+ minix_free_block(inode->i_sb,tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *p = tmp;
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return result;
+}
+
+static struct buffer_head * block_getblk(struct inode * inode,
+ struct buffer_head * bh, int nr, int create)
+{
+ int tmp;
+ unsigned short *p;
+ struct buffer_head * result;
+
+ if (!bh)
+ return NULL;
+ if (!bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ return NULL;
+ }
+ }
+ p = nr + (unsigned short *) bh->b_data;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+ if (tmp == *p) {
+ brelse(bh);
+ return result;
+ }
+ brelse(result);
+ goto repeat;
+ }
+ if (!create) {
+ brelse(bh);
+ return NULL;
+ }
+ tmp = minix_new_block(inode->i_sb);
+ if (!tmp) {
+ brelse(bh);
+ return NULL;
+ }
+ result = getblk(bh->b_dev, tmp, BLOCK_SIZE);
+ if (*p) {
+ minix_free_block(inode->i_sb,tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *p = tmp;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return result;
+}
+
+struct buffer_head * minix_getblk(struct inode * inode, int block, int create)
+{
+ struct buffer_head * bh;
+
+ if (block<0) {
+ printk("minix_getblk: block<0");
+ return NULL;
+ }
+ if (block >= 7+512+512*512) {
+ printk("minix_getblk: block>big");
+ return NULL;
+ }
+ if (block < 7)
+ return inode_getblk(inode,block,create);
+ block -= 7;
+ if (block < 512) {
+ bh = inode_getblk(inode,7,create);
+ return block_getblk(inode, bh, block, create);
+ }
+ block -= 512;
+ bh = inode_getblk(inode,8,create);
+ bh = block_getblk(inode, bh, block>>9, create);
+ return block_getblk(inode, bh, block & 511, create);
+}
+
+struct buffer_head * minix_bread(struct inode * inode, int block, int create)
+{
+ struct buffer_head * bh;
+
+ bh = minix_getblk(inode,block,create);
+ if (!bh || bh->b_uptodate)
+ return bh;
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+void minix_read_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct minix_inode * raw_inode;
+ int block, ino;
+
+ ino = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+ if (!ino || ino >= inode->i_sb->u.minix_sb.s_ninodes) {
+ printk("Bad inode number on dev 0x%04x: %d is out of range\n",
+ inode->i_dev, ino);
+ return;
+ }
+ block = 2 + inode->i_sb->u.minix_sb.s_imap_blocks +
+ inode->i_sb->u.minix_sb.s_zmap_blocks +
+ (ino-1)/MINIX_INODES_PER_BLOCK;
+ if (!(bh=bread(inode->i_dev,block, BLOCK_SIZE))) {
+ printk("Major problem: unable to read inode from dev 0x%04x\n",
+ inode->i_dev);
+ return;
+ }
+ raw_inode = ((struct minix_inode *) bh->b_data) +
+ (ino-1)%MINIX_INODES_PER_BLOCK;
+ inode->i_mode = raw_inode->i_mode;
+ inode->i_uid = raw_inode->i_uid;
+ inode->i_gid = raw_inode->i_gid;
+ inode->i_nlink = raw_inode->i_nlinks;
+ inode->i_size = raw_inode->i_size;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ inode->i_rdev = raw_inode->i_zone[0];
+ else for (block = 0; block < 9; block++)
+ inode->u.minix_i.i_data[block] = raw_inode->i_zone[block];
+ brelse(bh);
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &minix_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &minix_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &minix_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+}
+
+static struct buffer_head * minix_update_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct minix_inode * raw_inode;
+ int ino, block;
+
+ ino = inode->i_ino;
+ if (!ino || ino >= inode->i_sb->u.minix_sb.s_ninodes) {
+ printk("Bad inode number on dev 0x%04x: %d is out of range\n",
+ inode->i_dev, ino);
+ inode->i_dirt = 0;
+ return 0;
+ }
+ block = 2 + inode->i_sb->u.minix_sb.s_imap_blocks + inode->i_sb->u.minix_sb.s_zmap_blocks +
+ (ino-1)/MINIX_INODES_PER_BLOCK;
+ if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) {
+ printk("unable to read i-node block\n");
+ inode->i_dirt = 0;
+ return 0;
+ }
+ raw_inode = ((struct minix_inode *)bh->b_data) +
+ (ino-1)%MINIX_INODES_PER_BLOCK;
+ raw_inode->i_mode = inode->i_mode;
+ raw_inode->i_uid = inode->i_uid;
+ raw_inode->i_gid = inode->i_gid;
+ raw_inode->i_nlinks = inode->i_nlink;
+ raw_inode->i_size = inode->i_size;
+ raw_inode->i_time = inode->i_mtime;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_zone[0] = inode->i_rdev;
+ else for (block = 0; block < 9; block++)
+ raw_inode->i_zone[block] = inode->u.minix_i.i_data[block];
+ inode->i_dirt=0;
+ mark_buffer_dirty(bh, 1);
+ return bh;
+}
+
+void minix_write_inode(struct inode * inode)
+{
+ struct buffer_head *bh;
+ bh = minix_update_inode(inode);
+ brelse(bh);
+}
+
+int minix_sync_inode(struct inode * inode)
+{
+ int err = 0;
+ struct buffer_head *bh;
+
+ bh = minix_update_inode(inode);
+ if (bh && bh->b_dirt)
+ {
+ ll_rw_block(WRITE, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_req && !bh->b_uptodate)
+ {
+ printk ("IO error syncing minix inode [%04x:%08lx]\n",
+ inode->i_dev, inode->i_ino);
+ err = -1;
+ }
+ }
+ else if (!bh)
+ err = -1;
+ brelse (bh);
+ return err;
+}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
new file mode 100644
index 000000000..8246e3ce7
--- /dev/null
+++ b/fs/minix/namei.c
@@ -0,0 +1,830 @@
+/*
+ * linux/fs/minix/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/errno.h>
+
+#include <asm/segment.h>
+
+/*
+ * comment out this line if you want names > info->s_namelen chars to be
+ * truncated. Else they will be disallowed (ENAMETOOLONG).
+ */
+/* #define NO_TRUNCATE */
+
+static inline int namecompare(int len, int maxlen,
+ const char * name, const char * buffer)
+{
+ if (len > maxlen)
+ return 0;
+ if (len < maxlen && buffer[len])
+ return 0;
+ return !memcmp(name, buffer, len);
+}
+
+/*
+ * ok, we cannot use strncmp, as the name is not in our data space.
+ * Thus we'll have to use minix_match. No big problem. Match also makes
+ * some sanity tests.
+ *
+ * NOTE! unlike strncmp, minix_match returns 1 for success, 0 for failure.
+ */
+static int minix_match(int len, const char * name,
+ struct buffer_head * bh, unsigned long * offset,
+ struct minix_sb_info * info)
+{
+ struct minix_dir_entry * de;
+
+ de = (struct minix_dir_entry *) (bh->b_data + *offset);
+ *offset += info->s_dirsize;
+ if (!de->inode || len > info->s_namelen)
+ return 0;
+ /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
+ if (!len && (de->name[0]=='.') && (de->name[1]=='\0'))
+ return 1;
+ return namecompare(len,info->s_namelen,name,de->name);
+}
+
+/*
+ * minix_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ */
+static struct buffer_head * minix_find_entry(struct inode * dir,
+ const char * name, int namelen, struct minix_dir_entry ** res_dir)
+{
+ unsigned long block, offset;
+ struct buffer_head * bh;
+ struct minix_sb_info * info;
+
+ *res_dir = NULL;
+ if (!dir || !dir->i_sb)
+ return NULL;
+ info = &dir->i_sb->u.minix_sb;
+ if (namelen > info->s_namelen) {
+#ifdef NO_TRUNCATE
+ return NULL;
+#else
+ namelen = info->s_namelen;
+#endif
+ }
+ bh = NULL;
+ block = offset = 0;
+ while (block*BLOCK_SIZE+offset < dir->i_size) {
+ if (!bh) {
+ bh = minix_bread(dir,block,0);
+ if (!bh) {
+ block++;
+ continue;
+ }
+ }
+ *res_dir = (struct minix_dir_entry *) (bh->b_data + offset);
+ if (minix_match(namelen,name,bh,&offset,info))
+ return bh;
+ if (offset < bh->b_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0;
+ block++;
+ }
+ brelse(bh);
+ *res_dir = NULL;
+ return NULL;
+}
+
+int minix_lookup(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ int ino;
+ struct minix_dir_entry * de;
+ struct buffer_head * bh;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(bh = minix_find_entry(dir,name,len,&de))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = de->inode;
+ brelse(bh);
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+ iput(dir);
+ return 0;
+}
+
+/*
+ * minix_add_entry()
+ *
+ * adds a file entry to the specified directory, returning a possible
+ * error value if it fails.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static int minix_add_entry(struct inode * dir,
+ const char * name, int namelen,
+ struct buffer_head ** res_buf,
+ struct minix_dir_entry ** res_dir)
+{
+ int i;
+ unsigned long block, offset;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+ struct minix_sb_info * info;
+
+ *res_buf = NULL;
+ *res_dir = NULL;
+ if (!dir || !dir->i_sb)
+ return -ENOENT;
+ info = &dir->i_sb->u.minix_sb;
+ if (namelen > info->s_namelen) {
+#ifdef NO_TRUNCATE
+ return -ENAMETOOLONG;
+#else
+ namelen = info->s_namelen;
+#endif
+ }
+ if (!namelen)
+ return -ENOENT;
+ bh = NULL;
+ block = offset = 0;
+ while (1) {
+ if (!bh) {
+ bh = minix_bread(dir,block,1);
+ if (!bh)
+ return -ENOSPC;
+ }
+ de = (struct minix_dir_entry *) (bh->b_data + offset);
+ offset += info->s_dirsize;
+ if (block*bh->b_size + offset > dir->i_size) {
+ de->inode = 0;
+ dir->i_size = block*bh->b_size + offset;
+ dir->i_dirt = 1;
+ }
+ if (de->inode) {
+ if (namecompare(namelen, info->s_namelen, name, de->name)) {
+ brelse(bh);
+ return -EEXIST;
+ }
+ } else {
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ for (i = 0; i < info->s_namelen ; i++)
+ de->name[i] = (i < namelen) ? name[i] : 0;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ *res_dir = de;
+ break;
+ }
+ if (offset < bh->b_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0;
+ block++;
+ }
+ *res_buf = bh;
+ return 0;
+}
+
+int minix_create(struct inode * dir,const char * name, int len, int mode,
+ struct inode ** result)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ inode = minix_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &minix_file_inode_operations;
+ inode->i_mode = mode;
+ inode->i_dirt = 1;
+ error = minix_add_entry(dir,name,len, &bh ,&de);
+ if (error) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ *result = inode;
+ return 0;
+}
+
+int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rdev)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+
+ if (!dir)
+ return -ENOENT;
+ bh = minix_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ inode = minix_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_uid = current->fsuid;
+ inode->i_mode = mode;
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &minix_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &minix_dir_inode_operations;
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ }
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &minix_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (S_ISBLK(mode) || S_ISCHR(mode))
+ inode->i_rdev = rdev;
+ inode->i_dirt = 1;
+ error = minix_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int minix_mkdir(struct inode * dir, const char * name, int len, int mode)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh, *dir_block;
+ struct minix_dir_entry * de;
+ struct minix_sb_info * info;
+
+ if (!dir || !dir->i_sb) {
+ iput(dir);
+ return -EINVAL;
+ }
+ info = &dir->i_sb->u.minix_sb;
+ bh = minix_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ if (dir->i_nlink >= MINIX_LINK_MAX) {
+ iput(dir);
+ return -EMLINK;
+ }
+ inode = minix_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &minix_dir_inode_operations;
+ inode->i_size = 2 * info->s_dirsize;
+ dir_block = minix_bread(inode,0,1);
+ if (!dir_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de = (struct minix_dir_entry *) dir_block->b_data;
+ de->inode=inode->i_ino;
+ strcpy(de->name,".");
+ de = (struct minix_dir_entry *) (dir_block->b_data + info->s_dirsize);
+ de->inode = dir->i_ino;
+ strcpy(de->name,"..");
+ inode->i_nlink = 2;
+ mark_buffer_dirty(dir_block, 1);
+ brelse(dir_block);
+ inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask);
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ inode->i_dirt = 1;
+ error = minix_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ iput(dir);
+ inode->i_nlink=0;
+ iput(inode);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ dir->i_nlink++;
+ dir->i_dirt = 1;
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return 0;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir(struct inode * inode)
+{
+ unsigned int block, offset;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+ struct minix_sb_info * info;
+
+ if (!inode || !inode->i_sb)
+ return 1;
+ info = &inode->i_sb->u.minix_sb;
+ block = 0;
+ bh = NULL;
+ offset = 2*info->s_dirsize;
+ if (inode->i_size & (info->s_dirsize-1))
+ goto bad_dir;
+ if (inode->i_size < offset)
+ goto bad_dir;
+ bh = minix_bread(inode,0,0);
+ if (!bh)
+ goto bad_dir;
+ de = (struct minix_dir_entry *) bh->b_data;
+ if (!de->inode || strcmp(de->name,"."))
+ goto bad_dir;
+ de = (struct minix_dir_entry *) (bh->b_data + info->s_dirsize);
+ if (!de->inode || strcmp(de->name,".."))
+ goto bad_dir;
+ while (block*BLOCK_SIZE+offset < inode->i_size) {
+ if (!bh) {
+ bh = minix_bread(inode,block,0);
+ if (!bh) {
+ block++;
+ continue;
+ }
+ }
+ de = (struct minix_dir_entry *) (bh->b_data + offset);
+ offset += info->s_dirsize;
+ if (de->inode) {
+ brelse(bh);
+ return 0;
+ }
+ if (offset < bh->b_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0;
+ block++;
+ }
+ brelse(bh);
+ return 1;
+bad_dir:
+ brelse(bh);
+ printk("Bad directory on device %04x\n",inode->i_dev);
+ return 1;
+}
+
+int minix_rmdir(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+
+ inode = NULL;
+ bh = minix_find_entry(dir,name,len,&de);
+ retval = -ENOENT;
+ if (!bh)
+ goto end_rmdir;
+ retval = -EPERM;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_rmdir;
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_rmdir;
+ if (inode->i_dev != dir->i_dev)
+ goto end_rmdir;
+ if (inode == dir) /* we may not delete ".", but "../dir" is ok */
+ goto end_rmdir;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -ENOTDIR;
+ goto end_rmdir;
+ }
+ if (!empty_dir(inode)) {
+ retval = -ENOTEMPTY;
+ goto end_rmdir;
+ }
+ if (de->inode != inode->i_ino) {
+ retval = -ENOENT;
+ goto end_rmdir;
+ }
+ if (inode->i_count > 1) {
+ retval = -EBUSY;
+ goto end_rmdir;
+ }
+ if (inode->i_nlink != 2)
+ printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink);
+ de->inode = 0;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ inode->i_nlink=0;
+ inode->i_dirt=1;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_nlink--;
+ dir->i_dirt=1;
+ retval = 0;
+end_rmdir:
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return retval;
+}
+
+int minix_unlink(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct minix_dir_entry * de;
+
+repeat:
+ retval = -ENOENT;
+ inode = NULL;
+ bh = minix_find_entry(dir,name,len,&de);
+ if (!bh)
+ goto end_unlink;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_unlink;
+ retval = -EPERM;
+ if (S_ISDIR(inode->i_mode))
+ goto end_unlink;
+ if (de->inode != inode->i_ino) {
+ iput(inode);
+ brelse(bh);
+ current->counter = 0;
+ schedule();
+ goto repeat;
+ }
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_unlink;
+ if (de->inode != inode->i_ino) {
+ retval = -ENOENT;
+ goto end_unlink;
+ }
+ if (!inode->i_nlink) {
+ printk("Deleting nonexistent file (%04x:%lu), %d\n",
+ inode->i_dev,inode->i_ino,inode->i_nlink);
+ inode->i_nlink=1;
+ }
+ de->inode = 0;
+ dir->i_version = ++event;
+ mark_buffer_dirty(bh, 1);
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ inode->i_nlink--;
+ inode->i_ctime = dir->i_ctime;
+ inode->i_dirt = 1;
+ retval = 0;
+end_unlink:
+ brelse(bh);
+ iput(inode);
+ iput(dir);
+ return retval;
+}
+
+int minix_symlink(struct inode * dir, const char * name, int len, const char * symname)
+{
+ struct minix_dir_entry * de;
+ struct inode * inode = NULL;
+ struct buffer_head * bh = NULL, * name_block = NULL;
+ int i;
+ char c;
+
+ if (!(inode = minix_new_inode(dir))) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_mode = S_IFLNK | 0777;
+ inode->i_op = &minix_symlink_inode_operations;
+ name_block = minix_bread(inode,0,1);
+ if (!name_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ i = 0;
+ while (i < 1023 && (c=*(symname++)))
+ name_block->b_data[i++] = c;
+ name_block->b_data[i] = 0;
+ mark_buffer_dirty(name_block, 1);
+ brelse(name_block);
+ inode->i_size = i;
+ inode->i_dirt = 1;
+ bh = minix_find_entry(dir,name,len,&de);
+ if (bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ i = minix_add_entry(dir, name, len, &bh, &de);
+ if (i) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return i;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int minix_link(struct inode * oldinode, struct inode * dir, const char * name, int len)
+{
+ int error;
+ struct minix_dir_entry * de;
+ struct buffer_head * bh;
+
+ if (S_ISDIR(oldinode->i_mode)) {
+ iput(oldinode);
+ iput(dir);
+ return -EPERM;
+ }
+ if (oldinode->i_nlink >= MINIX_LINK_MAX) {
+ iput(oldinode);
+ iput(dir);
+ return -EMLINK;
+ }
+ bh = minix_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ iput(oldinode);
+ return -EEXIST;
+ }
+ error = minix_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ iput(dir);
+ iput(oldinode);
+ return error;
+ }
+ de->inode = oldinode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ oldinode->i_nlink++;
+ oldinode->i_ctime = CURRENT_TIME;
+ oldinode->i_dirt = 1;
+ iput(oldinode);
+ return 0;
+}
+
+static int subdir(struct inode * new_inode, struct inode * old_inode)
+{
+ int ino;
+ int result;
+
+ new_inode->i_count++;
+ result = 0;
+ for (;;) {
+ if (new_inode == old_inode) {
+ result = 1;
+ break;
+ }
+ if (new_inode->i_dev != old_inode->i_dev)
+ break;
+ ino = new_inode->i_ino;
+ if (minix_lookup(new_inode,"..",2,&new_inode))
+ break;
+ if (new_inode->i_ino == ino)
+ break;
+ }
+ iput(new_inode);
+ return result;
+}
+
+#define PARENT_INO(buffer) \
+(((struct minix_dir_entry *) ((buffer)+info->s_dirsize))->inode)
+
+/*
+ * rename uses retrying to avoid race-conditions: at least they should be minimal.
+ * it tries to allocate all the blocks, then sanity-checks, and if the sanity-
+ * checks fail, it tries to restart itself again. Very practical - no changes
+ * are done until we know everything works ok.. and then all the changes can be
+ * done in one fell swoop when we have claimed all the buffers needed.
+ *
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int do_minix_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct minix_dir_entry * old_de, * new_de;
+ struct minix_sb_info * info;
+ int retval;
+
+ info = &old_dir->i_sb->u.minix_sb;
+ goto start_up;
+try_again:
+ brelse(old_bh);
+ brelse(new_bh);
+ brelse(dir_bh);
+ iput(old_inode);
+ iput(new_inode);
+ current->counter = 0;
+ schedule();
+start_up:
+ old_inode = new_inode = NULL;
+ old_bh = new_bh = dir_bh = NULL;
+ old_bh = minix_find_entry(old_dir,old_name,old_len,&old_de);
+ retval = -ENOENT;
+ if (!old_bh)
+ goto end_rename;
+ old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */
+ if (!old_inode)
+ goto end_rename;
+ retval = -EPERM;
+ if ((old_dir->i_mode & S_ISVTX) &&
+ current->fsuid != old_inode->i_uid &&
+ current->fsuid != old_dir->i_uid && !fsuser())
+ goto end_rename;
+ new_bh = minix_find_entry(new_dir,new_name,new_len,&new_de);
+ if (new_bh) {
+ new_inode = __iget(new_dir->i_sb, new_de->inode, 0);
+ if (!new_inode) {
+ brelse(new_bh);
+ new_bh = NULL;
+ }
+ }
+ if (new_inode == old_inode) {
+ retval = 0;
+ goto end_rename;
+ }
+ if (new_inode && S_ISDIR(new_inode->i_mode)) {
+ retval = -EISDIR;
+ if (!S_ISDIR(old_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -ENOTEMPTY;
+ if (!empty_dir(new_inode))
+ goto end_rename;
+ retval = -EBUSY;
+ if (new_inode->i_count > 1)
+ goto end_rename;
+ }
+ retval = -EPERM;
+ if (new_inode && (new_dir->i_mode & S_ISVTX) &&
+ current->fsuid != new_inode->i_uid &&
+ current->fsuid != new_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (S_ISDIR(old_inode->i_mode)) {
+ retval = -ENOTDIR;
+ if (new_inode && !S_ISDIR(new_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -EIO;
+ dir_bh = minix_bread(old_inode,0,0);
+ if (!dir_bh)
+ goto end_rename;
+ if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+ if (!new_inode && new_dir->i_nlink >= MINIX_LINK_MAX)
+ goto end_rename;
+ }
+ if (!new_bh) {
+ retval = minix_add_entry(new_dir,new_name,new_len,&new_bh,&new_de);
+ if (retval)
+ goto end_rename;
+ }
+/* sanity checking before doing the rename - avoid races */
+ if (new_inode && (new_de->inode != new_inode->i_ino))
+ goto try_again;
+ if (new_de->inode && !new_inode)
+ goto try_again;
+ if (old_de->inode != old_inode->i_ino)
+ goto try_again;
+/* ok, that's it */
+ old_de->inode = 0;
+ new_de->inode = old_inode->i_ino;
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+ old_dir->i_dirt = 1;
+ old_dir->i_version = ++event;
+ new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME;
+ new_dir->i_dirt = 1;
+ new_dir->i_version = ++event;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_ctime = CURRENT_TIME;
+ new_inode->i_dirt = 1;
+ }
+ mark_buffer_dirty(old_bh, 1);
+ mark_buffer_dirty(new_bh, 1);
+ if (dir_bh) {
+ PARENT_INO(dir_bh->b_data) = new_dir->i_ino;
+ mark_buffer_dirty(dir_bh, 1);
+ old_dir->i_nlink--;
+ old_dir->i_dirt = 1;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_dirt = 1;
+ } else {
+ new_dir->i_nlink++;
+ new_dir->i_dirt = 1;
+ }
+ }
+ retval = 0;
+end_rename:
+ brelse(dir_bh);
+ brelse(old_bh);
+ brelse(new_bh);
+ iput(old_inode);
+ iput(new_inode);
+ iput(old_dir);
+ iput(new_dir);
+ return retval;
+}
+
+/*
+ * Ok, rename also locks out other renames, as they can change the parent of
+ * a directory, and we don't want any races. Other races are checked for by
+ * "do_rename()", which restarts if there are inconsistencies.
+ *
+ * Note that there is no race between different filesystems: it's only within
+ * the same device that races occur: many renames can happen at once, as long
+ * as they are on different partitions.
+ */
+int minix_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ static struct wait_queue * wait = NULL;
+ static int lock = 0;
+ int result;
+
+ while (lock)
+ sleep_on(&wait);
+ lock = 1;
+ result = do_minix_rename(old_dir, old_name, old_len,
+ new_dir, new_name, new_len);
+ lock = 0;
+ wake_up(&wait);
+ return result;
+}
diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c
new file mode 100644
index 000000000..bbd2b1f56
--- /dev/null
+++ b/fs/minix/symlink.c
@@ -0,0 +1,102 @@
+/*
+ * linux/fs/minix/symlink.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * minix symlink handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/stat.h>
+
+static int minix_readlink(struct inode *, char *, int);
+static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations minix_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ minix_readlink, /* readlink */
+ minix_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int minix_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ struct buffer_head * bh;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (current->link_count > 5) {
+ iput(inode);
+ iput(dir);
+ return -ELOOP;
+ }
+ if (!(bh = minix_bread(inode, 0, 0))) {
+ iput(inode);
+ iput(dir);
+ return -EIO;
+ }
+ iput(inode);
+ current->link_count++;
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
+ current->link_count--;
+ brelse(bh);
+ return error;
+}
+
+static int minix_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ struct buffer_head * bh;
+ int i;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ if (buflen > 1023)
+ buflen = 1023;
+ bh = minix_bread(inode, 0, 0);
+ iput(inode);
+ if (!bh)
+ return 0;
+ i = 0;
+ while (i<buflen && (c = bh->b_data[i])) {
+ i++;
+ put_fs_byte(c,buffer++);
+ }
+ brelse(bh);
+ return i;
+}
diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c
new file mode 100644
index 000000000..0b127b9b8
--- /dev/null
+++ b/fs/minix/truncate.c
@@ -0,0 +1,184 @@
+/*
+ * linux/fs/truncate.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+/*
+ * Truncate has the most races in the whole filesystem: coding it is
+ * a pain in the a**. Especially as I don't do any locking...
+ *
+ * The code may look a bit weird, but that's just because I've tried to
+ * handle things like file-size changes in a somewhat graceful manner.
+ * Anyway, truncating a file at the same time somebody else writes to it
+ * is likely to result in pretty weird behaviour...
+ *
+ * The new code handles normal truncates (size = 0) as well as the more
+ * general case (size = XXX). I hope.
+ */
+
+static int trunc_direct(struct inode * inode)
+{
+ unsigned short * p;
+ struct buffer_head * bh;
+ int i, tmp;
+ int retry = 0;
+#define DIRECT_BLOCK ((inode->i_size + 1023) >> 10)
+
+repeat:
+ for (i = DIRECT_BLOCK ; i < 7 ; i++) {
+ p = i + inode->u.minix_i.i_data;
+ if (!(tmp = *p))
+ continue;
+ bh = get_hash_table(inode->i_dev,tmp,BLOCK_SIZE);
+ if (i < DIRECT_BLOCK) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *p) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *p = 0;
+ inode->i_dirt = 1;
+ brelse(bh);
+ minix_free_block(inode->i_sb,tmp);
+ }
+ return retry;
+}
+
+static int trunc_indirect(struct inode * inode, int offset, unsigned short * p)
+{
+ struct buffer_head * bh;
+ int i, tmp;
+ struct buffer_head * ind_bh;
+ unsigned short * ind;
+ int retry = 0;
+#define INDIRECT_BLOCK (DIRECT_BLOCK-offset)
+
+ tmp = *p;
+ if (!tmp)
+ return 0;
+ ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp != *p) {
+ brelse(ind_bh);
+ return 1;
+ }
+ if (!ind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = INDIRECT_BLOCK ; i < 512 ; i++) {
+ if (i < 0)
+ i = 0;
+ if (i < INDIRECT_BLOCK)
+ goto repeat;
+ ind = i+(unsigned short *) ind_bh->b_data;
+ tmp = *ind;
+ if (!tmp)
+ continue;
+ bh = get_hash_table(inode->i_dev,tmp,BLOCK_SIZE);
+ if (i < INDIRECT_BLOCK) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *ind) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *ind = 0;
+ mark_buffer_dirty(ind_bh, 1);
+ brelse(bh);
+ minix_free_block(inode->i_sb,tmp);
+ }
+ ind = (unsigned short *) ind_bh->b_data;
+ for (i = 0; i < 512; i++)
+ if (*(ind++))
+ break;
+ if (i >= 512)
+ if (ind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ minix_free_block(inode->i_sb,tmp);
+ }
+ brelse(ind_bh);
+ return retry;
+}
+
+static int trunc_dindirect(struct inode * inode)
+{
+ int i, tmp;
+ struct buffer_head * dind_bh;
+ unsigned short * dind, * p;
+ int retry = 0;
+#define DINDIRECT_BLOCK ((DIRECT_BLOCK-(512+7))>>9)
+
+ p = 8 + inode->u.minix_i.i_data;
+ if (!(tmp = *p))
+ return 0;
+ dind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE);
+ if (tmp != *p) {
+ brelse(dind_bh);
+ return 1;
+ }
+ if (!dind_bh) {
+ *p = 0;
+ return 0;
+ }
+repeat:
+ for (i = DINDIRECT_BLOCK ; i < 512 ; i ++) {
+ if (i < 0)
+ i = 0;
+ if (i < DINDIRECT_BLOCK)
+ goto repeat;
+ dind = i+(unsigned short *) dind_bh->b_data;
+ retry |= trunc_indirect(inode,7+512+(i<<9),dind);
+ mark_buffer_dirty(dind_bh, 1);
+ }
+ dind = (unsigned short *) dind_bh->b_data;
+ for (i = 0; i < 512; i++)
+ if (*(dind++))
+ break;
+ if (i >= 512)
+ if (dind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *p;
+ *p = 0;
+ inode->i_dirt = 1;
+ minix_free_block(inode->i_sb,tmp);
+ }
+ brelse(dind_bh);
+ return retry;
+}
+
+void minix_truncate(struct inode * inode)
+{
+ int retry;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ while (1) {
+ retry = trunc_direct(inode);
+ retry |= trunc_indirect(inode,7,inode->u.minix_i.i_data+7);
+ retry |= trunc_dindirect(inode);
+ if (!retry)
+ break;
+ current->counter = 0;
+ schedule();
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile
new file mode 100644
index 000000000..2c690d3ea
--- /dev/null
+++ b/fs/msdos/Makefile
@@ -0,0 +1,34 @@
+#
+# Makefile for the linux MS-DOS-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+ifndef CONFIG_MSDOS_FS
+CFLAGS := $(CFLAGS) -DMODULE
+endif
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= namei.o inode.o file.o dir.o misc.o fat.o
+
+msdos.o: $(OBJS)
+ $(LD) -r -o msdos.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/msdos/dir.c b/fs/msdos/dir.c
new file mode 100644
index 000000000..2138b8778
--- /dev/null
+++ b/fs/msdos/dir.c
@@ -0,0 +1,126 @@
+/*
+ * linux/fs/msdos/dir.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ *
+ * MS-DOS directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+
+#define PRINTK(X)
+
+static int msdos_dir_read(struct inode * inode,struct file * filp, char * buf,int count)
+{
+ return -EISDIR;
+}
+
+static struct file_operations msdos_dir_operations = {
+ NULL, /* lseek - default */
+ msdos_dir_read, /* read */
+ NULL, /* write - bad */
+ msdos_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync /* fsync */
+};
+
+struct inode_operations msdos_dir_inode_operations = {
+ &msdos_dir_operations, /* default directory file-ops */
+ msdos_create, /* create */
+ msdos_lookup, /* lookup */
+ NULL, /* link */
+ msdos_unlink, /* unlink */
+ NULL, /* symlink */
+ msdos_mkdir, /* mkdir */
+ msdos_rmdir, /* rmdir */
+ NULL, /* mknod */
+ msdos_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ msdos_bmap, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+int msdos_readdir(
+ struct inode *inode,
+ struct file *filp,
+ struct dirent *dirent, /* dirent in user space */
+ int count)
+{
+ int ino,i,i2,last;
+ char c,*walk;
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+
+ if (!inode || !S_ISDIR(inode->i_mode)) return -EBADF;
+ if (inode->i_ino == MSDOS_ROOT_INO) {
+/* Fake . and .. for the root directory. */
+ if (filp->f_pos == 2) filp->f_pos = 0;
+ else if (filp->f_pos < 2) {
+ walk = filp->f_pos++ ? ".." : ".";
+ for (i = 0; *walk; walk++)
+ put_fs_byte(*walk,dirent->d_name+i++);
+ put_fs_long(MSDOS_ROOT_INO,&dirent->d_ino);
+ put_fs_byte(0,dirent->d_name+i);
+ put_fs_word(i,&dirent->d_reclen);
+ return ROUND_UP(NAME_OFFSET(dirent) + i + 1);
+ }
+ }
+ if (filp->f_pos & (sizeof(struct msdos_dir_entry)-1)) return -ENOENT;
+ bh = NULL;
+ while ((ino = msdos_get_entry(inode,&filp->f_pos,&bh,&de)) > -1) {
+ if (!IS_FREE(de->name) && !(de->attr & ATTR_VOLUME)) {
+ char bufname[13];
+ char *ptname = bufname;
+ for (i = last = 0; i < 8; i++) {
+ if (!(c = de->name[i])) break;
+ if (c >= 'A' && c <= 'Z') c += 32;
+ if (c != ' ')
+ last = i+1;
+ ptname[i] = c;
+ }
+ i = last;
+ ptname[i] = '.';
+ i++;
+ for (i2 = 0; i2 < 3; i2++) {
+ if (!(c = de->ext[i2])) break;
+ if (c >= 'A' && c <= 'Z') c += 32;
+ if (c != ' ')
+ last = i+1;
+ ptname[i] = c;
+ i++;
+ }
+ if ((i = last) != 0) {
+ if (!strcmp(de->name,MSDOS_DOT))
+ ino = inode->i_ino;
+ else if (!strcmp(de->name,MSDOS_DOTDOT))
+ ino = msdos_parent_ino(inode,0);
+ bufname[i] = '\0';
+ put_fs_long(ino,&dirent->d_ino);
+ memcpy_tofs(dirent->d_name,bufname,i+1);
+ put_fs_word(i,&dirent->d_reclen);
+ PRINTK (("readdir avant brelse\n"));
+ brelse(bh);
+ PRINTK (("readdir retourne %d\n",i));
+ return ROUND_UP(NAME_OFFSET(dirent) + i + 1);
+ }
+ }
+ }
+ if (bh) brelse(bh);
+ return 0;
+}
diff --git a/fs/msdos/fat.c b/fs/msdos/fat.c
new file mode 100644
index 000000000..651e58b24
--- /dev/null
+++ b/fs/msdos/fat.c
@@ -0,0 +1,291 @@
+/*
+ * linux/fs/msdos/fat.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ */
+
+#include <linux/msdos_fs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+
+
+static struct fat_cache *fat_cache,cache[FAT_CACHE];
+
+/* Returns the this'th FAT entry, -1 if it is an end-of-file entry. If
+ new_value is != -1, that FAT entry is replaced by it. */
+
+int fat_access(struct super_block *sb,int nr,int new_value)
+{
+ struct buffer_head *bh,*bh2,*c_bh,*c_bh2;
+ unsigned char *p_first,*p_last;
+ int first,last,next,copy;
+
+ if ((unsigned) (nr-2) >= MSDOS_SB(sb)->clusters) return 0;
+ if (MSDOS_SB(sb)->fat_bits == 16) first = last = nr*2;
+ else {
+ first = nr*3/2;
+ last = first+1;
+ }
+ if (!(bh = msdos_sread(sb->s_dev,MSDOS_SB(sb)->fat_start+(first >>
+ SECTOR_BITS)))) {
+ printk("bread in fat_access failed\n");
+ return 0;
+ }
+ if ((first >> SECTOR_BITS) == (last >> SECTOR_BITS))
+ bh2 = bh;
+ else {
+ if (!(bh2 = msdos_sread(sb->s_dev,MSDOS_SB(sb)->fat_start+(last
+ >> SECTOR_BITS)))) {
+ brelse(bh);
+ printk("bread in fat_access failed\n");
+ return 0;
+ }
+ }
+ if (MSDOS_SB(sb)->fat_bits == 16) {
+ p_first = p_last = NULL; /* GCC needs that stuff */
+ next = CF_LE_W(((unsigned short *) bh->b_data)[(first &
+ (SECTOR_SIZE-1)) >> 1]);
+ if (next >= 0xfff7) next = -1;
+ }
+ else {
+ p_first = &((unsigned char *) bh->b_data)[first & (SECTOR_SIZE-1)];
+ p_last = &((unsigned char *) bh2->b_data)[(first+1) &
+ (SECTOR_SIZE-1)];
+ if (nr & 1) next = ((*p_first >> 4) | (*p_last << 4)) & 0xfff;
+ else next = (*p_first+(*p_last << 8)) & 0xfff;
+ if (next >= 0xff7) next = -1;
+ }
+ if (new_value != -1) {
+ if (MSDOS_SB(sb)->fat_bits == 16)
+ ((unsigned short *) bh->b_data)[(first & (SECTOR_SIZE-1)) >>
+ 1] = CT_LE_W(new_value);
+ else {
+ if (nr & 1) {
+ *p_first = (*p_first & 0xf) | (new_value << 4);
+ *p_last = new_value >> 4;
+ }
+ else {
+ *p_first = new_value & 0xff;
+ *p_last = (*p_last & 0xf0) | (new_value >> 8);
+ }
+ mark_buffer_dirty(bh2, 1);
+ }
+ mark_buffer_dirty(bh, 1);
+ for (copy = 1; copy < MSDOS_SB(sb)->fats; copy++) {
+ if (!(c_bh = msdos_sread(sb->s_dev,MSDOS_SB(sb)->
+ fat_start+(first >> SECTOR_BITS)+MSDOS_SB(sb)->
+ fat_length*copy))) break;
+ memcpy(c_bh->b_data,bh->b_data,SECTOR_SIZE);
+ mark_buffer_dirty(c_bh, 1);
+ if (bh != bh2) {
+ if (!(c_bh2 = msdos_sread(sb->s_dev,
+ MSDOS_SB(sb)->fat_start+(first >>
+ SECTOR_BITS)+MSDOS_SB(sb)->fat_length*copy
+ +1))) {
+ brelse(c_bh);
+ break;
+ }
+ memcpy(c_bh2->b_data,bh2->b_data,SECTOR_SIZE);
+ brelse(c_bh2);
+ }
+ brelse(c_bh);
+ }
+ }
+ brelse(bh);
+ if (bh != bh2) brelse(bh2);
+ return next;
+}
+
+
+void cache_init(void)
+{
+ static int initialized = 0;
+ int count;
+
+ if (initialized) return;
+ fat_cache = &cache[0];
+ for (count = 0; count < FAT_CACHE; count++) {
+ cache[count].device = 0;
+ cache[count].next = count == FAT_CACHE-1 ? NULL :
+ &cache[count+1];
+ }
+ initialized = 1;
+}
+
+
+void cache_lookup(struct inode *inode,int cluster,int *f_clu,int *d_clu)
+{
+ struct fat_cache *walk;
+
+#ifdef DEBUG
+printk("cache lookup: <%d,%d> %d (%d,%d) -> ",inode->i_dev,inode->i_ino,cluster,
+ *f_clu,*d_clu);
+#endif
+ for (walk = fat_cache; walk; walk = walk->next)
+ if (inode->i_dev == walk->device && walk->ino == inode->i_ino &&
+ walk->file_cluster <= cluster && walk->file_cluster >
+ *f_clu) {
+ *d_clu = walk->disk_cluster;
+#ifdef DEBUG
+printk("cache hit: %d (%d)\n",walk->file_cluster,*d_clu);
+#endif
+ if ((*f_clu = walk->file_cluster) == cluster) return;
+ }
+#ifdef DEBUG
+printk("cache miss\n");
+#endif
+}
+
+
+#ifdef DEBUG
+static void list_cache(void)
+{
+ struct fat_cache *walk;
+
+ for (walk = fat_cache; walk; walk = walk->next) {
+ if (walk->device)
+ printk("<%d,%d>(%d,%d) ",walk->device,walk->ino,
+ walk->file_cluster,walk->disk_cluster);
+ else printk("-- ");
+ }
+ printk("\n");
+}
+#endif
+
+
+void cache_add(struct inode *inode,int f_clu,int d_clu)
+{
+ struct fat_cache *walk,*last;
+
+#ifdef DEBUG
+printk("cache add: <%d,%d> %d (%d)\n",inode->i_dev,inode->i_ino,f_clu,d_clu);
+#endif
+ last = NULL;
+ for (walk = fat_cache; walk->next; walk = (last = walk)->next)
+ if (inode->i_dev == walk->device && walk->ino == inode->i_ino &&
+ walk->file_cluster == f_clu) {
+ if (walk->disk_cluster != d_clu) {
+ printk("FAT cache corruption");
+ cache_inval_inode(inode);
+ return;
+ }
+ /* update LRU */
+ if (last == NULL) return;
+ last->next = walk->next;
+ walk->next = fat_cache;
+ fat_cache = walk;
+#ifdef DEBUG
+list_cache();
+#endif
+ return;
+ }
+ walk->device = inode->i_dev;
+ walk->ino = inode->i_ino;
+ walk->file_cluster = f_clu;
+ walk->disk_cluster = d_clu;
+ last->next = NULL;
+ walk->next = fat_cache;
+ fat_cache = walk;
+#ifdef DEBUG
+list_cache();
+#endif
+}
+
+
+/* Cache invalidation occurs rarely, thus the LRU chain is not updated. It
+ fixes itself after a while. */
+
+void cache_inval_inode(struct inode *inode)
+{
+ struct fat_cache *walk;
+
+ for (walk = fat_cache; walk; walk = walk->next)
+ if (walk->device == inode->i_dev && walk->ino == inode->i_ino)
+ walk->device = 0;
+}
+
+
+void cache_inval_dev(int device)
+{
+ struct fat_cache *walk;
+
+ for (walk = fat_cache; walk; walk = walk->next)
+ if (walk->device == device) walk->device = 0;
+}
+
+
+int get_cluster(struct inode *inode,int cluster)
+{
+ int nr,count;
+
+ if (!(nr = MSDOS_I(inode)->i_start)) return 0;
+ if (!cluster) return nr;
+ count = 0;
+ for (cache_lookup(inode,cluster,&count,&nr); count < cluster;
+ count++) {
+ if ((nr = fat_access(inode->i_sb,nr,-1)) == -1) return 0;
+ if (!nr) return 0;
+ }
+ cache_add(inode,cluster,nr);
+ return nr;
+}
+
+
+int msdos_smap(struct inode *inode,int sector)
+{
+ struct msdos_sb_info *sb;
+ int cluster,offset;
+
+ sb = MSDOS_SB(inode->i_sb);
+ if (inode->i_ino == MSDOS_ROOT_INO || (S_ISDIR(inode->i_mode) &&
+ !MSDOS_I(inode)->i_start)) {
+ if (sector >= sb->dir_entries >> MSDOS_DPS_BITS) return 0;
+ return sector+sb->dir_start;
+ }
+ cluster = sector/sb->cluster_size;
+ offset = sector % sb->cluster_size;
+ if (!(cluster = get_cluster(inode,cluster))) return 0;
+ return (cluster-2)*sb->cluster_size+sb->data_start+offset;
+}
+
+
+/* Free all clusters after the skip'th cluster. Doesn't use the cache,
+ because this way we get an additional sanity check. */
+
+int fat_free(struct inode *inode,int skip)
+{
+ int nr,last;
+
+ if (!(nr = MSDOS_I(inode)->i_start)) return 0;
+ last = 0;
+ while (skip--) {
+ last = nr;
+ if ((nr = fat_access(inode->i_sb,nr,-1)) == -1) return 0;
+ if (!nr) {
+ printk("fat_free: skipped EOF\n");
+ return -EIO;
+ }
+ }
+ if (last)
+ fat_access(inode->i_sb,last,MSDOS_SB(inode->i_sb)->fat_bits ==
+ 12 ? 0xff8 : 0xfff8);
+ else {
+ MSDOS_I(inode)->i_start = 0;
+ inode->i_dirt = 1;
+ }
+ lock_fat(inode->i_sb);
+ while (nr != -1) {
+ if (!(nr = fat_access(inode->i_sb,nr,0))) {
+ fs_panic(inode->i_sb,"fat_free: deleting beyond EOF");
+ break;
+ }
+ if (MSDOS_SB(inode->i_sb)->free_clusters != -1)
+ MSDOS_SB(inode->i_sb)->free_clusters++;
+ inode->i_blocks -= MSDOS_SB(inode->i_sb)->cluster_size;
+ }
+ unlock_fat(inode->i_sb);
+ cache_inval_inode(inode);
+ return 0;
+}
diff --git a/fs/msdos/file.c b/fs/msdos/file.c
new file mode 100644
index 000000000..fb41fff21
--- /dev/null
+++ b/fs/msdos/file.c
@@ -0,0 +1,315 @@
+/*
+ * linux/fs/msdos/file.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ *
+ * MS-DOS regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/locks.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+static struct file_operations msdos_file_operations = {
+ NULL, /* lseek - default */
+ msdos_file_read, /* read */
+ msdos_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ file_fsync /* fsync */
+};
+
+struct inode_operations msdos_file_inode_operations = {
+ &msdos_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ msdos_bmap, /* bmap */
+ msdos_truncate, /* truncate */
+ NULL, /* permission */
+ NULL /* smap */
+};
+
+#define MSDOS_PREFETCH 32
+struct msdos_pre {
+ int file_sector;/* Next sector to read in the prefetch table */
+ /* This is relative to the file, not the disk */
+ struct buffer_head *bhlist[MSDOS_PREFETCH]; /* All buffers needed */
+ int nblist; /* Number of buffers in bhlist */
+ int nolist; /* index in bhlist */
+};
+/*
+ Order the prefetch of more sectors.
+*/
+static void msdos_prefetch (
+ struct inode *inode,
+ struct msdos_pre *pre,
+ int nb) /* How many must be prefetch at once */
+{
+ struct buffer_head *bhreq[MSDOS_PREFETCH]; /* Buffers not */
+ /* already read */
+ int nbreq=0; /* Number of buffers in bhreq */
+ int i;
+ for (i=0; i<nb; i++){
+ int sector = msdos_smap(inode,pre->file_sector);
+ if (sector != 0){
+ struct buffer_head *bh;
+ PRINTK (("fsector2 %d -> %d\n",pre->file_sector-1,sector));
+ pre->file_sector++;
+ bh = getblk(inode->i_dev,sector,SECTOR_SIZE);
+ if (bh == NULL) break;
+ pre->bhlist[pre->nblist++] = bh;
+ if (!bh->b_uptodate) bhreq[nbreq++] = bh;
+ }else{
+ break;
+ }
+ }
+ if (nbreq > 0) ll_rw_block (READ,nbreq,bhreq);
+ for (i=pre->nblist; i<MSDOS_PREFETCH; i++) pre->bhlist[i] = NULL;
+}
+
+/*
+ Read a file into user space
+*/
+int msdos_file_read(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ char *start = buf;
+ char *end = buf + count;
+ int i;
+ int left_in_file;
+ struct msdos_pre pre;
+
+
+ if (!inode) {
+ printk("msdos_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ /* S_ISLNK allows for UMSDOS. Should never happen for normal MSDOS */
+ if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) {
+ printk("msdos_file_read: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ if (filp->f_pos >= inode->i_size || count <= 0) return 0;
+ /*
+ Tell the buffer cache which block we expect to read in advance
+ Since we are limited with the stack, we preread only MSDOS_PREFETCH
+ because we have to keep the result into the local
+ arrays pre.bhlist and bhreq.
+
+ Each time we process one block in bhlist, we replace
+ it by a new prefetch block if needed.
+ */
+ PRINTK (("#### ino %ld pos %ld size %ld count %d\n",inode->i_ino,filp->f_pos,inode->i_size,count));
+ {
+ /*
+ We must prefetch complete block, so we must
+ take in account the offset in the first block.
+ */
+ int count_max = (filp->f_pos & (SECTOR_SIZE-1)) + count;
+ int to_reada; /* How many block to read all at once */
+ pre.file_sector = filp->f_pos >> SECTOR_BITS;
+ to_reada = count_max / SECTOR_SIZE;
+ if (count_max & (SECTOR_SIZE-1)) to_reada++;
+ if (filp->f_reada || !MSDOS_I(inode)->i_binary){
+ /* Doing a read ahead on ascii file make sure we always */
+ /* pre read enough, since we don't know how many blocks */
+ /* we really need */
+ int ahead = read_ahead[MAJOR(inode->i_dev)];
+ PRINTK (("to_reada %d ahead %d\n",to_reada,ahead));
+ if (ahead == 0) ahead = 8;
+ to_reada += ahead;
+ }
+ if (to_reada > MSDOS_PREFETCH) to_reada = MSDOS_PREFETCH;
+ pre.nblist = 0;
+ msdos_prefetch (inode,&pre,to_reada);
+ }
+ pre.nolist = 0;
+ PRINTK (("count %d ahead %d nblist %d\n",count,read_ahead[MAJOR(inode->i_dev)],pre.nblist));
+ while ((left_in_file = inode->i_size - filp->f_pos) > 0
+ && buf < end){
+ struct buffer_head *bh = pre.bhlist[pre.nolist];
+ char *data;
+ int size,offset;
+ if (bh == NULL) break;
+ pre.bhlist[pre.nolist] = NULL;
+ pre.nolist++;
+ if (pre.nolist == MSDOS_PREFETCH/2){
+ memcpy (pre.bhlist,pre.bhlist+MSDOS_PREFETCH/2
+ ,(MSDOS_PREFETCH/2)*sizeof(pre.bhlist[0]));
+ pre.nblist -= MSDOS_PREFETCH/2;
+ msdos_prefetch (inode,&pre,MSDOS_PREFETCH/2);
+ pre.nolist = 0;
+ }
+ PRINTK (("file_read pos %ld nblist %d %d %d\n",filp->f_pos,pre.nblist,pre.fetched,count));
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate){
+ /* read error ? */
+ brelse (bh);
+ break;
+ }
+ offset = filp->f_pos & (SECTOR_SIZE-1);
+ data = bh->b_data + offset;
+ size = MIN(SECTOR_SIZE-offset,left_in_file);
+ if (MSDOS_I(inode)->i_binary) {
+ size = MIN(size,end-buf);
+ memcpy_tofs(buf,data,size);
+ buf += size;
+ filp->f_pos += size;
+ }else{
+ for (; size && buf < end; size--) {
+ char ch = *data++;
+ filp->f_pos++;
+ if (ch == 26){
+ filp->f_pos = inode->i_size;
+ break;
+ }else if (ch != '\r'){
+ put_fs_byte(ch,buf++);
+ }
+ }
+ }
+ brelse(bh);
+ }
+ PRINTK (("--- %d -> %d\n",count,(int)(buf-start)));
+ for (i=0; i<pre.nblist; i++) brelse (pre.bhlist[i]);
+ if (start == buf) return -EIO;
+ if (!IS_RDONLY(inode)) inode->i_atime = CURRENT_TIME;
+ filp->f_reada = 1; /* Will be reset if a lseek is done */
+ return buf-start;
+}
+
+/*
+ Write to a file either from user space
+*/
+int msdos_file_write(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ int sector,offset,size,left,written;
+ int error,carry;
+ char *start,*to,ch;
+ struct buffer_head *bh;
+ int binary_mode = MSDOS_I(inode)->i_binary;
+
+ if (!inode) {
+ printk("msdos_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ /* S_ISLNK allows for UMSDOS. Should never happen for normal MSDOS */
+ if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) {
+ printk("msdos_file_write: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+/*
+ * ok, append may not work when many processes are writing at the same time
+ * but so what. That way leads to madness anyway.
+ */
+ if (filp->f_flags & O_APPEND) filp->f_pos = inode->i_size;
+ if (count <= 0) return 0;
+ error = carry = 0;
+ for (start = buf; count || carry; count -= size) {
+ while (!(sector = msdos_smap(inode,filp->f_pos >> SECTOR_BITS)))
+ if ((error = msdos_add_cluster(inode)) < 0) break;
+ if (error) {
+ msdos_truncate(inode);
+ break;
+ }
+ offset = filp->f_pos & (SECTOR_SIZE-1);
+ size = MIN(SECTOR_SIZE-offset,MAX(carry,count));
+ if (binary_mode
+ && offset == 0
+ && (size == SECTOR_SIZE
+ || filp->f_pos + size >= inode->i_size)){
+ /* No need to read the block first since we will */
+ /* completely overwrite it */
+ /* or at least write past the end of file */
+ if (!(bh = getblk(inode->i_dev,sector,SECTOR_SIZE))){
+ error = -EIO;
+ break;
+ }
+ }else if (!(bh = msdos_sread(inode->i_dev,sector))) {
+ error = -EIO;
+ break;
+ }
+ if (binary_mode) {
+ memcpy_fromfs(bh->b_data+offset,buf,written = size);
+ buf += size;
+ }
+ else {
+ written = left = SECTOR_SIZE-offset;
+ to = (char *) bh->b_data+(filp->f_pos & (SECTOR_SIZE-1));
+ if (carry) {
+ *to++ = '\n';
+ left--;
+ carry = 0;
+ }
+ for (size = 0; size < count && left; size++) {
+ if ((ch = get_fs_byte(buf++)) == '\n') {
+ *to++ = '\r';
+ left--;
+ }
+ if (!left) carry = 1;
+ else {
+ *to++ = ch;
+ left--;
+ }
+ }
+ written -= left;
+ }
+ filp->f_pos += written;
+ if (filp->f_pos > inode->i_size) {
+ inode->i_size = filp->f_pos;
+ inode->i_dirt = 1;
+ }
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ brelse(bh);
+ }
+ if (start == buf)
+ return error;
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+ inode->i_dirt = 1;
+ return buf-start;
+}
+
+void msdos_truncate(struct inode *inode)
+{
+ int cluster;
+
+ cluster = SECTOR_SIZE*MSDOS_SB(inode->i_sb)->cluster_size;
+ (void) fat_free(inode,(inode->i_size+(cluster-1))/cluster);
+ MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+ inode->i_dirt = 1;
+}
diff --git a/fs/msdos/inode.c b/fs/msdos/inode.c
new file mode 100644
index 000000000..e0577fbef
--- /dev/null
+++ b/fs/msdos/inode.c
@@ -0,0 +1,494 @@
+/*
+ * linux/fs/msdos/inode.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ */
+
+#include <linux/msdos_fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#ifdef MODULE
+ #include <linux/module.h>
+ #include "../../tools/version.h"
+#endif
+
+#include <asm/segment.h>
+
+extern int *blksize_size[];
+
+void msdos_put_inode(struct inode *inode)
+{
+ struct inode *depend;
+ struct super_block *sb;
+
+ if (inode->i_nlink) {
+ if (MSDOS_I(inode)->i_busy) cache_inval_inode(inode);
+ return;
+ }
+ inode->i_size = 0;
+ msdos_truncate(inode);
+ depend = MSDOS_I(inode)->i_depend;
+ sb = inode->i_sb;
+ clear_inode(inode);
+ if (depend) {
+ if (MSDOS_I(depend)->i_old != inode) {
+ printk("Invalid link (0x%X): expected 0x%X, got 0x%X\n",
+ (int) depend,(int) inode,(int) MSDOS_I(depend)->
+ i_old);
+ fs_panic(sb,"...");
+ return;
+ }
+ MSDOS_I(depend)->i_old = NULL;
+ iput(depend);
+ }
+}
+
+
+void msdos_put_super(struct super_block *sb)
+{
+ cache_inval_dev(sb->s_dev);
+ set_blocksize (sb->s_dev,BLOCK_SIZE);
+ lock_super(sb);
+ sb->s_dev = 0;
+ unlock_super(sb);
+ #ifdef MODULE
+ MOD_DEC_USE_COUNT;
+ #endif
+ return;
+}
+
+
+static struct super_operations msdos_sops = {
+ msdos_read_inode,
+ msdos_notify_change,
+ msdos_write_inode,
+ msdos_put_inode,
+ msdos_put_super,
+ NULL, /* added in 0.96c */
+ msdos_statfs,
+ NULL
+};
+
+
+static int parse_options(char *options,char *check,char *conversion,uid_t *uid,
+ gid_t *gid,int *umask,int *debug,int *fat,int *quiet)
+{
+ char *this_char,*value;
+
+ *check = 'n';
+ *conversion = 'b';
+ *uid = current->uid;
+ *gid = current->gid;
+ *umask = current->fs->umask;
+ *debug = *fat = *quiet = 0;
+ if (!options) return 1;
+ for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) {
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"check") && value) {
+ if (value[0] && !value[1] && strchr("rns",*value))
+ *check = *value;
+ else if (!strcmp(value,"relaxed")) *check = 'r';
+ else if (!strcmp(value,"normal")) *check = 'n';
+ else if (!strcmp(value,"strict")) *check = 's';
+ else return 0;
+ }
+ else if (!strcmp(this_char,"conv") && value) {
+ if (value[0] && !value[1] && strchr("bta",*value))
+ *conversion = *value;
+ else if (!strcmp(value,"binary")) *conversion = 'b';
+ else if (!strcmp(value,"text")) *conversion = 't';
+ else if (!strcmp(value,"auto")) *conversion = 'a';
+ else return 0;
+ }
+ else if (!strcmp(this_char,"uid")) {
+ if (!value || !*value)
+ return 0;
+ *uid = simple_strtoul(value,&value,0);
+ if (*value)
+ return 0;
+ }
+ else if (!strcmp(this_char,"gid")) {
+ if (!value || !*value)
+ return 0;
+ *gid = simple_strtoul(value,&value,0);
+ if (*value)
+ return 0;
+ }
+ else if (!strcmp(this_char,"umask")) {
+ if (!value || !*value)
+ return 0;
+ *umask = simple_strtoul(value,&value,8);
+ if (*value)
+ return 0;
+ }
+ else if (!strcmp(this_char,"debug")) {
+ if (value) return 0;
+ *debug = 1;
+ }
+ else if (!strcmp(this_char,"fat")) {
+ if (!value || !*value)
+ return 0;
+ *fat = simple_strtoul(value,&value,0);
+ if (*value || (*fat != 12 && *fat != 16))
+ return 0;
+ }
+ else if (!strcmp(this_char,"quiet")) {
+ if (value) return 0;
+ *quiet = 1;
+ }
+ else return 0;
+ }
+ return 1;
+}
+
+
+/* Read the super block of an MS-DOS FS. */
+
+struct super_block *msdos_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ struct msdos_boot_sector *b;
+ int data_sectors,logical_sector_size,sector_mult;
+ int debug,error,fat,quiet;
+ char check,conversion;
+ uid_t uid;
+ gid_t gid;
+ int umask;
+
+ if (!parse_options((char *) data,&check,&conversion,&uid,&gid,&umask,
+ &debug,&fat,&quiet)) {
+ s->s_dev = 0;
+ return NULL;
+ }
+ cache_init();
+ lock_super(s);
+ set_blocksize(s->s_dev, SECTOR_SIZE);
+ bh = bread(s->s_dev, 0, SECTOR_SIZE);
+ unlock_super(s);
+ if (bh == NULL) {
+ s->s_dev = 0;
+ printk("MSDOS bread failed\n");
+ return NULL;
+ }
+ b = (struct msdos_boot_sector *) bh->b_data;
+ s->s_blocksize = 512; /* Using this small block size solve the */
+ /* the misfit with buffer cache and cluster */
+ /* because cluster (DOS) are often aligned */
+ /* on odd sector */
+ s->s_blocksize_bits = 9; /* we cannot handle anything else yet */
+/*
+ * The DOS3 partition size limit is *not* 32M as many people think.
+ * Instead, it is 64K sectors (with the usual sector size being
+ * 512 bytes, leading to a 32M limit).
+ *
+ * DOS 3 partition managers got around this problem by faking a
+ * larger sector size, ie treating multiple physical sectors as
+ * a single logical sector.
+ *
+ * We can accommodate this scheme by adjusting our cluster size,
+ * fat_start, and data_start by an appropriate value.
+ *
+ * (by Drew Eckhardt)
+ */
+
+#define ROUND_TO_MULTIPLE(n,m) ((n) && (m) ? (n)+(m)-1-((n)-1)%(m) : 0)
+ /* don't divide by zero */
+
+ logical_sector_size = CF_LE_W(*(unsigned short *) &b->sector_size);
+ sector_mult = logical_sector_size >> SECTOR_BITS;
+ MSDOS_SB(s)->cluster_size = b->cluster_size*sector_mult;
+ MSDOS_SB(s)->fats = b->fats;
+ MSDOS_SB(s)->fat_start = CF_LE_W(b->reserved)*sector_mult;
+ MSDOS_SB(s)->fat_length = CF_LE_W(b->fat_length)*sector_mult;
+ MSDOS_SB(s)->dir_start = (CF_LE_W(b->reserved)+b->fats*CF_LE_W(
+ b->fat_length))*sector_mult;
+ MSDOS_SB(s)->dir_entries = CF_LE_W(*((unsigned short *) &b->dir_entries
+ ));
+ MSDOS_SB(s)->data_start = MSDOS_SB(s)->dir_start+ROUND_TO_MULTIPLE((
+ MSDOS_SB(s)->dir_entries << MSDOS_DIR_BITS) >> SECTOR_BITS,
+ sector_mult);
+ data_sectors = (CF_LE_W(*((unsigned short *) &b->sectors)) ?
+ CF_LE_W(*((unsigned short *) &b->sectors)) :
+ CF_LE_L(b->total_sect))*sector_mult-MSDOS_SB(s)->data_start;
+ error = !b->cluster_size || !sector_mult;
+ if (!error) {
+ MSDOS_SB(s)->clusters = b->cluster_size ? data_sectors/
+ b->cluster_size/sector_mult : 0;
+ MSDOS_SB(s)->fat_bits = fat ? fat : MSDOS_SB(s)->clusters >
+ MSDOS_FAT12 ? 16 : 12;
+ error = !MSDOS_SB(s)->fats || (MSDOS_SB(s)->dir_entries &
+ (MSDOS_DPS-1)) || MSDOS_SB(s)->clusters+2 > MSDOS_SB(s)->
+ fat_length*SECTOR_SIZE*8/MSDOS_SB(s)->fat_bits ||
+ (logical_sector_size & (SECTOR_SIZE-1)) || !b->secs_track ||
+ !b->heads;
+ }
+ brelse(bh);
+ if (error || debug) {
+ /* The MSDOS_CAN_BMAP is obsolete, but left just to remember */
+ printk("[MS-DOS FS Rel. 12,FAT %d,check=%c,conv=%c,"
+ "uid=%d,gid=%d,umask=%03o%s]\n",MSDOS_SB(s)->fat_bits,check,
+ conversion,uid,gid,umask,MSDOS_CAN_BMAP(MSDOS_SB(s)) ?
+ ",bmap" : "");
+ printk("[me=0x%x,cs=%d,#f=%d,fs=%d,fl=%d,ds=%d,de=%d,data=%d,"
+ "se=%d,ts=%ld,ls=%d]\n",b->media,MSDOS_SB(s)->cluster_size,
+ MSDOS_SB(s)->fats,MSDOS_SB(s)->fat_start,MSDOS_SB(s)->
+ fat_length,MSDOS_SB(s)->dir_start,MSDOS_SB(s)->dir_entries,
+ MSDOS_SB(s)->data_start,CF_LE_W(*(unsigned short *) &b->
+ sectors),b->total_sect,logical_sector_size);
+ }
+ if (error) {
+ if (!silent)
+ printk("VFS: Can't find a valid MSDOS filesystem on dev 0x%04x.\n",
+ s->s_dev);
+ s->s_dev = 0;
+ return NULL;
+ }
+ s->s_magic = MSDOS_SUPER_MAGIC;
+ MSDOS_SB(s)->name_check = check;
+ MSDOS_SB(s)->conversion = conversion;
+ /* set up enough so that it can read an inode */
+ s->s_op = &msdos_sops;
+ MSDOS_SB(s)->fs_uid = uid;
+ MSDOS_SB(s)->fs_gid = gid;
+ MSDOS_SB(s)->fs_umask = umask;
+ MSDOS_SB(s)->quiet = quiet;
+ MSDOS_SB(s)->free_clusters = -1; /* don't know yet */
+ MSDOS_SB(s)->fat_wait = NULL;
+ MSDOS_SB(s)->fat_lock = 0;
+ MSDOS_SB(s)->prev_free = 0;
+ if (!(s->s_mounted = iget(s,MSDOS_ROOT_INO))) {
+ s->s_dev = 0;
+ printk("get root inode failed\n");
+ return NULL;
+ }
+ #ifdef MODULE
+ MOD_INC_USE_COUNT;
+ #endif
+ return s;
+}
+
+
+void msdos_statfs(struct super_block *sb,struct statfs *buf)
+{
+ int free,nr;
+
+ put_fs_long(sb->s_magic,&buf->f_type);
+ put_fs_long(MSDOS_SB(sb)->cluster_size*SECTOR_SIZE,&buf->f_bsize);
+ put_fs_long(MSDOS_SB(sb)->clusters,&buf->f_blocks);
+ lock_fat(sb);
+ if (MSDOS_SB(sb)->free_clusters != -1)
+ free = MSDOS_SB(sb)->free_clusters;
+ else {
+ free = 0;
+ for (nr = 2; nr < MSDOS_SB(sb)->clusters+2; nr++)
+ if (!fat_access(sb,nr,-1)) free++;
+ MSDOS_SB(sb)->free_clusters = free;
+ }
+ unlock_fat(sb);
+ put_fs_long(free,&buf->f_bfree);
+ put_fs_long(free,&buf->f_bavail);
+ put_fs_long(0,&buf->f_files);
+ put_fs_long(0,&buf->f_ffree);
+ put_fs_long(12,&buf->f_namelen);
+}
+
+
+int msdos_bmap(struct inode *inode,int block)
+{
+ struct msdos_sb_info *sb;
+ int cluster,offset;
+
+ sb = MSDOS_SB(inode->i_sb);
+ if (inode->i_ino == MSDOS_ROOT_INO) {
+ return sb->dir_start + block;
+ }
+ cluster = block/sb->cluster_size;
+ offset = block % sb->cluster_size;
+ if (!(cluster = get_cluster(inode,cluster))) return 0;
+ return (cluster-2)*sb->cluster_size+sb->data_start+offset;
+}
+
+
+void msdos_read_inode(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *raw_entry;
+ int nr;
+
+/* printk("read inode %d\n",inode->i_ino); */
+ MSDOS_I(inode)->i_busy = 0;
+ MSDOS_I(inode)->i_depend = MSDOS_I(inode)->i_old = NULL;
+ MSDOS_I(inode)->i_binary = 1;
+ inode->i_uid = MSDOS_SB(inode->i_sb)->fs_uid;
+ inode->i_gid = MSDOS_SB(inode->i_sb)->fs_gid;
+ if (inode->i_ino == MSDOS_ROOT_INO) {
+ inode->i_mode = (S_IRWXUGO & ~MSDOS_SB(inode->i_sb)->fs_umask) |
+ S_IFDIR;
+ inode->i_op = &msdos_dir_inode_operations;
+ inode->i_nlink = msdos_subdirs(inode)+2;
+ /* subdirs (neither . nor ..) plus . and "self" */
+ inode->i_size = MSDOS_SB(inode->i_sb)->dir_entries*
+ sizeof(struct msdos_dir_entry);
+ inode->i_blksize = MSDOS_SB(inode->i_sb)->cluster_size*
+ SECTOR_SIZE;
+ inode->i_blocks = (inode->i_size+inode->i_blksize-1)/
+ inode->i_blksize*MSDOS_SB(inode->i_sb)->cluster_size;
+ MSDOS_I(inode)->i_start = 0;
+ MSDOS_I(inode)->i_attrs = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = 0;
+ return;
+ }
+ if (!(bh = bread(inode->i_dev,inode->i_ino >> MSDOS_DPB_BITS,
+ SECTOR_SIZE))) {
+ printk("dev = 0x%04X, ino = %ld\n",inode->i_dev,inode->i_ino);
+ panic("msdos_read_inode: unable to read i-node block");
+ }
+ raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
+ [inode->i_ino & (MSDOS_DPB-1)];
+ if ((raw_entry->attr & ATTR_DIR) && !IS_FREE(raw_entry->name)) {
+ inode->i_mode = MSDOS_MKMODE(raw_entry->attr,S_IRWXUGO &
+ ~MSDOS_SB(inode->i_sb)->fs_umask) | S_IFDIR;
+ inode->i_op = &msdos_dir_inode_operations;
+ MSDOS_I(inode)->i_start = CF_LE_W(raw_entry->start);
+ inode->i_nlink = msdos_subdirs(inode);
+ /* includes .., compensating for "self" */
+#ifdef DEBUG
+ if (!inode->i_nlink) {
+ printk("directory %d: i_nlink == 0\n",inode->i_ino);
+ inode->i_nlink = 1;
+ }
+#endif
+ inode->i_size = 0;
+ if ((nr = CF_LE_W(raw_entry->start)) != 0)
+ while (nr != -1) {
+ inode->i_size += SECTOR_SIZE*MSDOS_SB(inode->
+ i_sb)->cluster_size;
+ if (!(nr = fat_access(inode->i_sb,nr,-1))) {
+ printk("Directory %ld: bad FAT\n",
+ inode->i_ino);
+ break;
+ }
+ }
+ }
+ else {
+ inode->i_mode = MSDOS_MKMODE(raw_entry->attr,(IS_NOEXEC(inode)
+ ? S_IRUGO|S_IWUGO : S_IRWXUGO) & ~MSDOS_SB(inode->i_sb)->fs_umask) |
+ S_IFREG;
+ inode->i_op = &msdos_file_inode_operations; /* Now can always bmap */
+ MSDOS_I(inode)->i_start = CF_LE_W(raw_entry->start);
+ inode->i_nlink = 1;
+ inode->i_size = CF_LE_L(raw_entry->size);
+ }
+ MSDOS_I(inode)->i_binary = is_binary(MSDOS_SB(inode->i_sb)->conversion,
+ raw_entry->ext);
+ MSDOS_I(inode)->i_attrs = raw_entry->attr & ATTR_UNUSED;
+ /* this is as close to the truth as we can get ... */
+ inode->i_blksize = MSDOS_SB(inode->i_sb)->cluster_size*SECTOR_SIZE;
+ inode->i_blocks = (inode->i_size+inode->i_blksize-1)/
+ inode->i_blksize*MSDOS_SB(inode->i_sb)->cluster_size;
+ inode->i_mtime = inode->i_atime = inode->i_ctime =
+ date_dos2unix(CF_LE_W(raw_entry->time),CF_LE_W(raw_entry->date));
+ brelse(bh);
+}
+
+
+void msdos_write_inode(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *raw_entry;
+
+ inode->i_dirt = 0;
+ if (inode->i_ino == MSDOS_ROOT_INO || !inode->i_nlink) return;
+ if (!(bh = bread(inode->i_dev,inode->i_ino >> MSDOS_DPB_BITS,
+ SECTOR_SIZE))) {
+ printk("dev = 0x%04X, ino = %ld\n",inode->i_dev,inode->i_ino);
+ panic("msdos_write_inode: unable to read i-node block");
+ }
+ raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
+ [inode->i_ino & (MSDOS_DPB-1)];
+ if (S_ISDIR(inode->i_mode)) {
+ raw_entry->attr = ATTR_DIR;
+ raw_entry->size = 0;
+ }
+ else {
+ raw_entry->attr = ATTR_NONE;
+ raw_entry->size = CT_LE_L(inode->i_size);
+ }
+ raw_entry->attr |= MSDOS_MKATTR(inode->i_mode) |
+ MSDOS_I(inode)->i_attrs;
+ raw_entry->start = CT_LE_L(MSDOS_I(inode)->i_start);
+ date_unix2dos(inode->i_mtime,&raw_entry->time,&raw_entry->date);
+ raw_entry->time = CT_LE_W(raw_entry->time);
+ raw_entry->date = CT_LE_W(raw_entry->date);
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+}
+
+
+int msdos_notify_change(struct inode * inode,struct iattr * attr)
+{
+ int error;
+
+ error = inode_change_ok(inode, attr);
+ if (error)
+ return error;
+
+ if (((attr->ia_valid & ATTR_UID) &&
+ (attr->ia_uid != MSDOS_SB(inode->i_sb)->fs_uid)) ||
+ ((attr->ia_valid & ATTR_GID) &&
+ (attr->ia_gid != MSDOS_SB(inode->i_sb)->fs_gid)) ||
+ ((attr->ia_valid & ATTR_MODE) &&
+ (attr->ia_mode & ~MSDOS_VALID_MODE)))
+ error = -EPERM;
+
+ if (error)
+ return MSDOS_SB(inode->i_sb)->quiet ? 0 : error;
+
+ inode_setattr(inode, attr);
+
+ if (IS_NOEXEC(inode) && !S_ISDIR(inode->i_mode))
+ inode->i_mode &= S_IFMT | S_IRUGO | S_IWUGO;
+ else
+ inode->i_mode |= S_IXUGO;
+
+ inode->i_mode = ((inode->i_mode & S_IFMT) | ((((inode->i_mode & S_IRWXU
+ & ~MSDOS_SB(inode->i_sb)->fs_umask) | S_IRUSR) >> 6)*S_IXUGO)) &
+ ~MSDOS_SB(inode->i_sb)->fs_umask;
+ return 0;
+}
+#ifdef MODULE
+
+char kernel_version[] = UTS_RELEASE;
+
+static struct file_system_type msdos_fs_type = {
+ msdos_read_super, "msdos", 1, NULL
+};
+
+int init_module(void)
+{
+ register_filesystem(&msdos_fs_type);
+ return 0;
+}
+
+void cleanup_module(void)
+{
+ if (MOD_IN_USE)
+ printk("msdos: device busy, remove delayed\n");
+ else
+ {
+ unregister_filesystem(&msdos_fs_type);
+ }
+}
+
+#endif
+
diff --git a/fs/msdos/misc.c b/fs/msdos/misc.c
new file mode 100644
index 000000000..630198afa
--- /dev/null
+++ b/fs/msdos/misc.c
@@ -0,0 +1,515 @@
+/*
+ * linux/fs/msdos/misc.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ */
+
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+
+
+#define PRINTK(x)
+/* Well-known binary file extensions */
+
+static char bin_extensions[] =
+ "EXECOMBINAPPSYSDRVOVLOVROBJLIBDLLPIF" /* program code */
+ "ARCZIPLHALZHZOOTARZ ARJ" /* common archivers */
+ "TZ TAZTZPTPZ" /* abbreviations of tar.Z and tar.zip */
+ "GZ TGZDEB" /* .gz, .tar.gz and Debian packages */
+ "GIFBMPTIFGL JPGPCX" /* graphics */
+ "TFMVF GF PK PXLDVI"; /* TeX */
+
+
+/*
+ * fs_panic reports a severe file system problem and sets the file system
+ * read-only. The file system can be made writable again by remounting it.
+ */
+
+void fs_panic(struct super_block *s,char *msg)
+{
+ int not_ro;
+
+ not_ro = !(s->s_flags & MS_RDONLY);
+ if (not_ro) s->s_flags |= MS_RDONLY;
+ printk("Filesystem panic (dev 0x%04X, mounted on 0x%04X:%ld)\n %s\n",
+ s->s_dev,s->s_covered->i_dev,s->s_covered->i_ino,msg);
+ if (not_ro)
+ printk(" File system has been set read-only\n");
+}
+
+
+/*
+ * is_binary selects optional text conversion based on the conversion mode and
+ * the extension part of the file name.
+ */
+
+int is_binary(char conversion,char *extension)
+{
+ char *walk;
+
+ switch (conversion) {
+ case 'b':
+ return 1;
+ case 't':
+ return 0;
+ case 'a':
+ for (walk = bin_extensions; *walk; walk += 3)
+ if (!strncmp(extension,walk,3)) return 1;
+ return 0;
+ default:
+ printk("Invalid conversion mode - defaulting to "
+ "binary.\n");
+ return 1;
+ }
+}
+
+
+/* File creation lock. This is system-wide to avoid deadlocks in rename. */
+/* (rename might deadlock before detecting cross-FS moves.) */
+
+static struct wait_queue *creation_wait = NULL;
+static creation_lock = 0;
+
+
+void lock_creation(void)
+{
+ while (creation_lock) sleep_on(&creation_wait);
+ creation_lock = 1;
+}
+
+
+void unlock_creation(void)
+{
+ creation_lock = 0;
+ wake_up(&creation_wait);
+}
+
+
+void lock_fat(struct super_block *sb)
+{
+ while (MSDOS_SB(sb)->fat_lock) sleep_on(&MSDOS_SB(sb)->fat_wait);
+ MSDOS_SB(sb)->fat_lock = 1;
+}
+
+
+void unlock_fat(struct super_block *sb)
+{
+ MSDOS_SB(sb)->fat_lock = 0;
+ wake_up(&MSDOS_SB(sb)->fat_wait);
+}
+
+
+/*
+ * msdos_add_cluster tries to allocate a new cluster and adds it to the file
+ * represented by inode. The cluster is zero-initialized.
+ */
+
+int msdos_add_cluster(struct inode *inode)
+{
+ int count,nr,limit,last,current,sector,last_sector;
+ struct buffer_head *bh;
+ int cluster_size = MSDOS_SB(inode->i_sb)->cluster_size;
+
+ if (inode->i_ino == MSDOS_ROOT_INO) return -ENOSPC;
+ if (!MSDOS_SB(inode->i_sb)->free_clusters) return -ENOSPC;
+ lock_fat(inode->i_sb);
+ limit = MSDOS_SB(inode->i_sb)->clusters;
+ nr = limit; /* to keep GCC happy */
+ for (count = 0; count < limit; count++) {
+ nr = ((count+MSDOS_SB(inode->i_sb)->prev_free) % limit)+2;
+ if (fat_access(inode->i_sb,nr,-1) == 0) break;
+ }
+#ifdef DEBUG
+printk("free cluster: %d\n",nr);
+#endif
+ MSDOS_SB(inode->i_sb)->prev_free = (count+MSDOS_SB(inode->i_sb)->
+ prev_free+1) % limit;
+ if (count >= limit) {
+ MSDOS_SB(inode->i_sb)->free_clusters = 0;
+ unlock_fat(inode->i_sb);
+ return -ENOSPC;
+ }
+ fat_access(inode->i_sb,nr,MSDOS_SB(inode->i_sb)->fat_bits == 12 ?
+ 0xff8 : 0xfff8);
+ if (MSDOS_SB(inode->i_sb)->free_clusters != -1)
+ MSDOS_SB(inode->i_sb)->free_clusters--;
+ unlock_fat(inode->i_sb);
+#ifdef DEBUG
+printk("set to %x\n",fat_access(inode->i_sb,nr,-1));
+#endif
+ last = 0;
+ if ((current = MSDOS_I(inode)->i_start) != 0) {
+ cache_lookup(inode,INT_MAX,&last,&current);
+ while (current && current != -1)
+ if (!(current = fat_access(inode->i_sb,
+ last = current,-1))) {
+ fs_panic(inode->i_sb,"File without EOF");
+ return -ENOSPC;
+ }
+ }
+#ifdef DEBUG
+printk("last = %d\n",last);
+#endif
+ if (last) fat_access(inode->i_sb,last,nr);
+ else {
+ MSDOS_I(inode)->i_start = nr;
+ inode->i_dirt = 1;
+ }
+#ifdef DEBUG
+if (last) printk("next set to %d\n",fat_access(inode->i_sb,last,-1));
+#endif
+ sector = MSDOS_SB(inode->i_sb)->data_start+(nr-2)*cluster_size;
+ last_sector = sector + cluster_size;
+ for ( ; sector < last_sector; sector++) {
+ #ifdef DEBUG
+ printk("zeroing sector %d\n",sector);
+ #endif
+ if (!(bh = getblk(inode->i_dev,sector,SECTOR_SIZE)))
+ printk("getblk failed\n");
+ else {
+ memset(bh->b_data,0,SECTOR_SIZE);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ }
+ }
+ inode->i_blocks += cluster_size;
+ if (S_ISDIR(inode->i_mode)) {
+ if (inode->i_size & (SECTOR_SIZE-1)) {
+ fs_panic(inode->i_sb,"Odd directory size");
+ inode->i_size = (inode->i_size+SECTOR_SIZE) &
+ ~(SECTOR_SIZE-1);
+ }
+ inode->i_size += SECTOR_SIZE*cluster_size;
+#ifdef DEBUG
+printk("size is %d now (%x)\n",inode->i_size,inode);
+#endif
+ inode->i_dirt = 1;
+ }
+ return 0;
+}
+
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+
+static int day_n[] = { 0,31,59,90,120,151,181,212,243,273,304,334,0,0,0,0 };
+ /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
+
+
+extern struct timezone sys_tz;
+
+
+/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
+
+int date_dos2unix(unsigned short time,unsigned short date)
+{
+ int month,year,secs;
+
+ month = ((date >> 5) & 15)-1;
+ year = date >> 9;
+ secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400*
+ ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
+ month < 2 ? 1 : 0)+3653);
+ /* days since 1.1.70 plus 80's leap day */
+ secs += sys_tz.tz_minuteswest*60;
+ return secs;
+}
+
+
+/* Convert linear UNIX date to a MS-DOS time/date pair. */
+
+void date_unix2dos(int unix_date,unsigned short *time,
+ unsigned short *date)
+{
+ int day,year,nl_day,month;
+
+ unix_date -= sys_tz.tz_minuteswest*60;
+ *time = (unix_date % 60)/2+(((unix_date/60) % 60) << 5)+
+ (((unix_date/3600) % 24) << 11);
+ day = unix_date/86400-3652;
+ year = day/365;
+ if ((year+3)/4+365*year > day) year--;
+ day -= (year+3)/4+365*year;
+ if (day == 59 && !(year & 3)) {
+ nl_day = day;
+ month = 2;
+ }
+ else {
+ nl_day = (year & 3) || day <= 59 ? day : day-1;
+ for (month = 0; month < 12; month++)
+ if (day_n[month] > nl_day) break;
+ }
+ *date = nl_day-day_n[month-1]+1+(month << 5)+(year << 9);
+}
+
+
+/* Returns the inode number of the directory entry at offset pos. If bh is
+ non-NULL, it is brelse'd before. Pos is incremented. The buffer header is
+ returned in bh. */
+
+int msdos_get_entry(struct inode *dir, loff_t *pos,struct buffer_head **bh,
+ struct msdos_dir_entry **de)
+{
+ int sector,offset;
+
+ while (1) {
+ offset = *pos;
+ PRINTK (("get_entry offset %d\n",offset));
+ if ((sector = msdos_smap(dir,offset >> SECTOR_BITS)) == -1)
+ return -1;
+ PRINTK (("get_entry sector %d %p\n",sector,*bh));
+ if (!sector)
+ return -1; /* beyond EOF */
+ *pos += sizeof(struct msdos_dir_entry);
+ if (*bh)
+ brelse(*bh);
+ PRINTK (("get_entry sector apres brelse\n"));
+ if (!(*bh = msdos_sread(dir->i_dev,sector))) {
+ printk("Directory sread (sector %d) failed\n",sector);
+ continue;
+ }
+ PRINTK (("get_entry apres sread\n"));
+ *de = (struct msdos_dir_entry *) ((*bh)->b_data+(offset &
+ (SECTOR_SIZE-1)));
+ return (sector << MSDOS_DPS_BITS)+((offset & (SECTOR_SIZE-1)) >>
+ MSDOS_DIR_BITS);
+ }
+}
+
+
+/*
+ * Now an ugly part: this set of directory scan routines works on clusters
+ * rather than on inodes and sectors. They are necessary to locate the '..'
+ * directory "inode". raw_scan_sector operates in four modes:
+ *
+ * name number ino action
+ * -------- -------- -------- -------------------------------------------------
+ * non-NULL - X Find an entry with that name
+ * NULL non-NULL non-NULL Find an entry whose data starts at *number
+ * NULL non-NULL NULL Count subdirectories in *number. (*)
+ * NULL NULL non-NULL Find an empty entry
+ *
+ * (*) The return code should be ignored. It DOES NOT indicate success or
+ * failure. *number has to be initialized to zero.
+ *
+ * - = not used, X = a value is returned unless NULL
+ *
+ * If res_bh is non-NULL, the buffer is not deallocated but returned to the
+ * caller on success. res_de is set accordingly.
+ *
+ * If cont is non-zero, raw_found continues with the entry after the one
+ * res_bh/res_de point to.
+ */
+
+
+#define RSS_NAME /* search for name */ \
+ done = !strncmp(data[entry].name,name,MSDOS_NAME) && \
+ !(data[entry].attr & ATTR_VOLUME);
+
+#define RSS_START /* search for start cluster */ \
+ done = !IS_FREE(data[entry].name) && CF_LE_W(data[entry].start) == *number;
+
+#define RSS_FREE /* search for free entry */ \
+ { \
+ done = IS_FREE(data[entry].name); \
+ if (done) { \
+ inode = iget(sb,sector*MSDOS_DPS+entry); \
+ if (inode) { \
+ /* Directory slots of busy deleted files aren't available yet. */ \
+ done = !MSDOS_I(inode)->i_busy; \
+ iput(inode); \
+ } \
+ } \
+ }
+
+#define RSS_COUNT /* count subdirectories */ \
+ { \
+ done = 0; \
+ if (!IS_FREE(data[entry].name) && (data[entry].attr & ATTR_DIR)) \
+ (*number)++; \
+ }
+
+static int raw_scan_sector(struct super_block *sb,int sector,char *name,
+ int *number,int *ino,struct buffer_head **res_bh,
+ struct msdos_dir_entry **res_de)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *data;
+ struct inode *inode;
+ int entry,start,done;
+
+ if (!(bh = msdos_sread(sb->s_dev,sector))) return -EIO;
+ data = (struct msdos_dir_entry *) bh->b_data;
+ for (entry = 0; entry < MSDOS_DPS; entry++) {
+ if (name) RSS_NAME
+ else {
+ if (!ino) RSS_COUNT
+ else {
+ if (number) RSS_START
+ else RSS_FREE
+ }
+ }
+ if (done) {
+ if (ino) *ino = sector*MSDOS_DPS+entry;
+ start = CF_LE_W(data[entry].start);
+ if (!res_bh) brelse(bh);
+ else {
+ *res_bh = bh;
+ *res_de = &data[entry];
+ }
+ return start;
+ }
+ }
+ brelse(bh);
+ return -ENOENT;
+}
+
+
+/*
+ * raw_scan_root performs raw_scan_sector on the root directory until the
+ * requested entry is found or the end of the directory is reached.
+ */
+
+static int raw_scan_root(struct super_block *sb,char *name,int *number,int *ino,
+ struct buffer_head **res_bh,struct msdos_dir_entry **res_de)
+{
+ int count,cluster;
+
+ for (count = 0; count < MSDOS_SB(sb)->dir_entries/MSDOS_DPS; count++) {
+ if ((cluster = raw_scan_sector(sb,MSDOS_SB(sb)->dir_start+count,
+ name,number,ino,res_bh,res_de)) >= 0) return cluster;
+ }
+ return -ENOENT;
+}
+
+
+/*
+ * raw_scan_nonroot performs raw_scan_sector on a non-root directory until the
+ * requested entry is found or the end of the directory is reached.
+ */
+
+static int raw_scan_nonroot(struct super_block *sb,int start,char *name,
+ int *number,int *ino,struct buffer_head **res_bh,struct msdos_dir_entry
+ **res_de)
+{
+ int count,cluster;
+
+#ifdef DEBUG
+ printk("raw_scan_nonroot: start=%d\n",start);
+#endif
+ do {
+ for (count = 0; count < MSDOS_SB(sb)->cluster_size; count++) {
+ if ((cluster = raw_scan_sector(sb,(start-2)*
+ MSDOS_SB(sb)->cluster_size+MSDOS_SB(sb)->data_start+
+ count,name,number,ino,res_bh,res_de)) >= 0)
+ return cluster;
+ }
+ if (!(start = fat_access(sb,start,-1))) {
+ fs_panic(sb,"FAT error");
+ break;
+ }
+#ifdef DEBUG
+ printk("next start: %d\n",start);
+#endif
+ }
+ while (start != -1);
+ return -ENOENT;
+}
+
+
+/*
+ * raw_scan performs raw_scan_sector on any sector.
+ *
+ * NOTE: raw_scan must not be used on a directory that is is the process of
+ * being created.
+ */
+
+static int raw_scan(struct super_block *sb,int start,char *name,int *number,
+ int *ino,struct buffer_head **res_bh,struct msdos_dir_entry **res_de)
+{
+ if (start)
+ return raw_scan_nonroot(sb,start,name,number,ino,res_bh,res_de);
+ else return raw_scan_root(sb,name,number,ino,res_bh,res_de);
+}
+
+
+/*
+ * msdos_parent_ino returns the inode number of the parent directory of dir.
+ * File creation has to be deferred while msdos_parent_ino is running to
+ * prevent renames.
+ */
+
+int msdos_parent_ino(struct inode *dir,int locked)
+{
+ static int zero = 0;
+ int error,current,prev,nr;
+
+ if (!S_ISDIR(dir->i_mode)) panic("Non-directory fed to m_p_i");
+ if (dir->i_ino == MSDOS_ROOT_INO) return dir->i_ino;
+ if (!locked) lock_creation(); /* prevent renames */
+ if ((current = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,MSDOS_DOTDOT,
+ &zero,NULL,NULL,NULL)) < 0) {
+ if (!locked) unlock_creation();
+ return current;
+ }
+ if (!current) nr = MSDOS_ROOT_INO;
+ else {
+ if ((prev = raw_scan(dir->i_sb,current,MSDOS_DOTDOT,&zero,NULL,
+ NULL,NULL)) < 0) {
+ if (!locked) unlock_creation();
+ return prev;
+ }
+ if ((error = raw_scan(dir->i_sb,prev,NULL,&current,&nr,NULL,
+ NULL)) < 0) {
+ if (!locked) unlock_creation();
+ return error;
+ }
+ }
+ if (!locked) unlock_creation();
+ return nr;
+}
+
+
+/*
+ * msdos_subdirs counts the number of sub-directories of dir. It can be run
+ * on directories being created.
+ */
+
+int msdos_subdirs(struct inode *dir)
+{
+ int count;
+
+ count = 0;
+ if (dir->i_ino == MSDOS_ROOT_INO)
+ (void) raw_scan_root(dir->i_sb,NULL,&count,NULL,NULL,NULL);
+ else {
+ if (!MSDOS_I(dir)->i_start) return 0; /* in mkdir */
+ else (void) raw_scan_nonroot(dir->i_sb,MSDOS_I(dir)->i_start,
+ NULL,&count,NULL,NULL,NULL);
+ }
+ return count;
+}
+
+
+/*
+ * Scans a directory for a given file (name points to its formatted name) or
+ * for an empty directory slot (name is NULL). Returns an error code or zero.
+ */
+
+int msdos_scan(struct inode *dir,char *name,struct buffer_head **res_bh,
+ struct msdos_dir_entry **res_de,int *ino)
+{
+ int res;
+
+ if (name)
+ res = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,name,NULL,ino,
+ res_bh,res_de);
+ else res = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,NULL,NULL,ino,
+ res_bh,res_de);
+ return res < 0 ? res : 0;
+}
diff --git a/fs/msdos/mmap.c b/fs/msdos/mmap.c
new file mode 100644
index 000000000..0e85584e9
--- /dev/null
+++ b/fs/msdos/mmap.c
@@ -0,0 +1,102 @@
+/*
+ * fs/msdos/mmap.c
+ *
+ * Written by Jacques Gelinas (jacques@solucorp.qc.ca)
+ * Inspired by fs/nfs/mmap.c (Jaon Tombs 15 Aug 1993)
+ *
+ * msdos mmap handling
+ */
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/malloc.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/msdos_fs.h>
+
+/*
+ * Fill in the supplied page for mmap
+ */
+static unsigned long msdos_file_mmap_nopage(
+ struct vm_area_struct * area,
+ unsigned long address,
+ unsigned long page,
+ int error_code)
+{
+ struct inode * inode = area->vm_inode;
+ unsigned int clear;
+ int pos;
+ long gap; /* distance from eof to pos */
+
+ address &= PAGE_MASK;
+ pos = address - area->vm_start + area->vm_offset;
+
+ clear = 0;
+ gap = inode->i_size - pos;
+ if (gap <= 0){
+ /* mmaping beyond end of file */
+ clear = PAGE_SIZE;
+ }else{
+ int cur_read;
+ int need_read;
+ struct file filp;
+ if (gap < PAGE_SIZE){
+ clear = PAGE_SIZE - gap;
+ }
+ filp.f_pos = pos;
+ need_read = PAGE_SIZE - clear;
+ {
+ unsigned long cur_fs = get_fs();
+ set_fs (KERNEL_DS);
+ cur_read = msdos_file_read (inode,&filp,(char*)page
+ ,need_read);
+ set_fs (cur_fs);
+ }
+ if (cur_read != need_read){
+ printk ("MSDOS: Error while reading an mmap file %d <> %d\n"
+ ,cur_read,need_read);
+ }
+ }
+ if (clear > 0){
+ memset ((char*)page+PAGE_SIZE-clear,0,clear);
+ }
+ return page;
+}
+
+struct vm_operations_struct msdos_file_mmap = {
+ NULL, /* open */
+ NULL, /* close */
+ msdos_file_mmap_nopage, /* nopage */
+ NULL, /* wppage */
+ NULL, /* share */
+ NULL, /* unmap */
+};
+
+/*
+ * This is used for a general mmap of an msdos file
+ * Returns 0 if ok, or a negative error code if not.
+ */
+int msdos_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
+{
+ if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported now */
+ return -EINVAL;
+ if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
+ return -EINVAL;
+ if (!inode->i_sb || !S_ISREG(inode->i_mode))
+ return -EACCES;
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+
+ vma->vm_inode = inode;
+ inode->i_count++;
+ vma->vm_ops = &msdos_file_mmap;
+ return 0;
+}
+
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
new file mode 100644
index 000000000..ad3b9c8bc
--- /dev/null
+++ b/fs/msdos/namei.c
@@ -0,0 +1,620 @@
+/*
+ * linux/fs/msdos/namei.c
+ *
+ * Written 1992,1993 by Werner Almesberger
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/msdos_fs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+
+#define PRINTK(x)
+
+/* MS-DOS "device special files" */
+
+static char *reserved_names[] = {
+ "CON ","PRN ","NUL ","AUX ",
+ "LPT1 ","LPT2 ","LPT3 ","LPT4 ",
+ "COM1 ","COM2 ","COM3 ","COM4 ",
+ NULL };
+
+
+/* Characters that are undesirable in an MS-DOS file name */
+
+static char bad_chars[] = "*?<>|\"";
+static char bad_if_strict[] = "+=,; ";
+
+
+/* Formats an MS-DOS file name. Rejects invalid names. */
+
+static int msdos_format_name(char conv,const char *name,int len,char *res,
+ int dot_dirs)
+{
+ char *walk,**reserved;
+ unsigned char c;
+ int space;
+
+ if (IS_FREE(name)) return -EINVAL;
+ if (name[0] == '.' && (len == 1 || (len == 2 && name[1] == '.'))) {
+ if (!dot_dirs) return -EEXIST;
+ memset(res+1,' ',10);
+ while (len--) *res++ = '.';
+ return 0;
+ }
+ space = 1; /* disallow names starting with a dot */
+ c = 0;
+ for (walk = res; len && walk-res < 8; walk++) {
+ c = *name++;
+ len--;
+ if (conv != 'r' && strchr(bad_chars,c)) return -EINVAL;
+ if (conv == 's' && strchr(bad_if_strict,c)) return -EINVAL;
+ if (c >= 'A' && c <= 'Z' && conv == 's') return -EINVAL;
+ if (c < ' ' || c == ':' || c == '\\') return -EINVAL;
+ if (c == '.') break;
+ space = c == ' ';
+ *walk = c >= 'a' && c <= 'z' ? c-32 : c;
+ }
+ if (space) return -EINVAL;
+ if (conv == 's' && len && c != '.') {
+ c = *name++;
+ len--;
+ if (c != '.') return -EINVAL;
+ }
+ while (c != '.' && len--) c = *name++;
+ if (c == '.') {
+ while (walk-res < 8) *walk++ = ' ';
+ while (len > 0 && walk-res < MSDOS_NAME) {
+ c = *name++;
+ len--;
+ if (conv != 'r' && strchr(bad_chars,c)) return -EINVAL;
+ if (conv == 's' && strchr(bad_if_strict,c))
+ return -EINVAL;
+ if (c < ' ' || c == ':' || c == '\\' || c == '.')
+ return -EINVAL;
+ if (c >= 'A' && c <= 'Z' && conv == 's') return -EINVAL;
+ space = c == ' ';
+ *walk++ = c >= 'a' && c <= 'z' ? c-32 : c;
+ }
+ if (space) return -EINVAL;
+ if (conv == 's' && len) return -EINVAL;
+ }
+ while (walk-res < MSDOS_NAME) *walk++ = ' ';
+ for (reserved = reserved_names; *reserved; reserved++)
+ if (!strncmp(res,*reserved,8)) return -EINVAL;
+ return 0;
+}
+
+
+/* Locates a directory entry. */
+
+static int msdos_find(struct inode *dir,const char *name,int len,
+ struct buffer_head **bh,struct msdos_dir_entry **de,int *ino)
+{
+ char msdos_name[MSDOS_NAME];
+ int res;
+
+ if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len,
+ msdos_name,1)) < 0) return res;
+ return msdos_scan(dir,msdos_name,bh,de,ino);
+}
+
+
+int msdos_lookup(struct inode *dir,const char *name,int len,
+ struct inode **result)
+{
+ int ino,res;
+ struct msdos_dir_entry *de;
+ struct buffer_head *bh;
+ struct inode *next;
+
+ PRINTK (("msdos_lookup\n"));
+
+ *result = NULL;
+ if (!dir) return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ PRINTK (("msdos_lookup 2\n"));
+ if (len == 1 && name[0] == '.') {
+ *result = dir;
+ return 0;
+ }
+ if (len == 2 && name[0] == '.' && name[1] == '.') {
+ ino = msdos_parent_ino(dir,0);
+ iput(dir);
+ if (ino < 0) return ino;
+ if (!(*result = iget(dir->i_sb,ino))) return -EACCES;
+ return 0;
+ }
+ PRINTK (("msdos_lookup 3\n"));
+ if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) {
+ iput(dir);
+ return res;
+ }
+ PRINTK (("msdos_lookup 4\n"));
+ if (bh) brelse(bh);
+ PRINTK (("msdos_lookup 4.5\n"));
+/* printk("lookup: ino=%d\n",ino); */
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+ PRINTK (("msdos_lookup 5\n"));
+ if (MSDOS_I(*result)->i_busy) { /* mkdir in progress */
+ iput(*result);
+ iput(dir);
+ return -ENOENT;
+ }
+ PRINTK (("msdos_lookup 6\n"));
+ while (MSDOS_I(*result)->i_old) {
+ next = MSDOS_I(*result)->i_old;
+ iput(*result);
+ if (!(*result = iget(next->i_sb,next->i_ino))) {
+ fs_panic(dir->i_sb,"msdos_lookup: Can't happen");
+ iput(dir);
+ return -ENOENT;
+ }
+ }
+ PRINTK (("msdos_lookup 7\n"));
+ iput(dir);
+ PRINTK (("msdos_lookup 8\n"));
+ return 0;
+}
+
+
+/* Creates a directory entry (name is already formatted). */
+
+static int msdos_create_entry(struct inode *dir,char *name,int is_dir,
+ struct inode **result)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+ int res,ino;
+
+ if ((res = msdos_scan(dir,NULL,&bh,&de,&ino)) < 0) {
+ if (res != -ENOENT) return res;
+ if (dir->i_ino == MSDOS_ROOT_INO) return -ENOSPC;
+ if ((res = msdos_add_cluster(dir)) < 0) return res;
+ if ((res = msdos_scan(dir,NULL,&bh,&de,&ino)) < 0) return res;
+ }
+ /*
+ * XXX all times should be set by caller upon successful completion.
+ */
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ memcpy(de->name,name,MSDOS_NAME);
+ memset(de->unused, 0, sizeof(de->unused));
+ de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
+ de->start = 0;
+ date_unix2dos(dir->i_mtime,&de->time,&de->date);
+ de->size = 0;
+ mark_buffer_dirty(bh, 1);
+ if ((*result = iget(dir->i_sb,ino)) != NULL)
+ msdos_read_inode(*result);
+ brelse(bh);
+ if (!*result) return -EIO;
+ (*result)->i_mtime = (*result)->i_atime = (*result)->i_ctime =
+ CURRENT_TIME;
+ (*result)->i_dirt = 1;
+ return 0;
+}
+
+
+int msdos_create(struct inode *dir,const char *name,int len,int mode,
+ struct inode **result)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+ char msdos_name[MSDOS_NAME];
+ int ino,res;
+
+ if (!dir) return -ENOENT;
+ if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len,
+ msdos_name,0)) < 0) {
+ iput(dir);
+ return res;
+ }
+ lock_creation();
+ if (msdos_scan(dir,msdos_name,&bh,&de,&ino) >= 0) {
+ unlock_creation();
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ res = msdos_create_entry(dir,msdos_name,S_ISDIR(mode),result);
+ unlock_creation();
+ iput(dir);
+ return res;
+}
+
+
+#ifdef DEBUG
+
+static void dump_fat(struct super_block *sb,int start)
+{
+ printk("[");
+ while (start) {
+ printk("%d ",start);
+ start = fat_access(sb,start,-1);
+ if (!start) {
+ printk("ERROR");
+ break;
+ }
+ if (start == -1) break;
+ }
+ printk("]\n");
+}
+
+#endif
+
+
+int msdos_mkdir(struct inode *dir,const char *name,int len,int mode)
+{
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+ struct inode *inode,*dot;
+ char msdos_name[MSDOS_NAME];
+ int ino,res;
+
+ if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len,
+ msdos_name,0)) < 0) {
+ iput(dir);
+ return res;
+ }
+ lock_creation();
+ if (msdos_scan(dir,msdos_name,&bh,&de,&ino) >= 0) {
+ unlock_creation();
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ if ((res = msdos_create_entry(dir,msdos_name,1,&inode)) < 0) {
+ unlock_creation();
+ iput(dir);
+ return res;
+ }
+ dir->i_nlink++;
+ inode->i_nlink = 2; /* no need to mark them dirty */
+ MSDOS_I(inode)->i_busy = 1; /* prevent lookups */
+ if ((res = msdos_add_cluster(inode)) < 0) goto mkdir_error;
+ if ((res = msdos_create_entry(inode,MSDOS_DOT,1,&dot)) < 0)
+ goto mkdir_error;
+ dot->i_size = inode->i_size; /* doesn't grow in the 2nd create_entry */
+ MSDOS_I(dot)->i_start = MSDOS_I(inode)->i_start;
+ dot->i_nlink = inode->i_nlink;
+ dot->i_dirt = 1;
+ iput(dot);
+ if ((res = msdos_create_entry(inode,MSDOS_DOTDOT,1,&dot)) < 0)
+ goto mkdir_error;
+ unlock_creation();
+ dot->i_size = dir->i_size;
+ MSDOS_I(dot)->i_start = MSDOS_I(dir)->i_start;
+ dot->i_nlink = dir->i_nlink;
+ dot->i_dirt = 1;
+ MSDOS_I(inode)->i_busy = 0;
+ iput(dot);
+ iput(inode);
+ iput(dir);
+ return 0;
+mkdir_error:
+ iput(inode);
+ if (msdos_rmdir(dir,name,len) < 0)
+ fs_panic(dir->i_sb,"rmdir in mkdir failed");
+ unlock_creation();
+ return res;
+}
+
+
+static int msdos_empty(struct inode *dir)
+{
+ loff_t pos;
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+
+ if (dir->i_count > 1)
+ return -EBUSY;
+ if (MSDOS_I(dir)->i_start) { /* may be zero in mkdir */
+ pos = 0;
+ bh = NULL;
+ while (msdos_get_entry(dir,&pos,&bh,&de) > -1)
+ if (!IS_FREE(de->name) && strncmp(de->name,MSDOS_DOT,
+ MSDOS_NAME) && strncmp(de->name,MSDOS_DOTDOT,
+ MSDOS_NAME)) {
+ brelse(bh);
+ return -ENOTEMPTY;
+ }
+ if (bh)
+ brelse(bh);
+ }
+ return 0;
+}
+
+
+int msdos_rmdir(struct inode *dir,const char *name,int len)
+{
+ int res,ino;
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+ struct inode *inode;
+
+ bh = NULL;
+ inode = NULL;
+ res = -EPERM;
+ if (name[0] == '.' && (len == 1 || (len == 2 && name[1] == '.')))
+ goto rmdir_done;
+ if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) goto rmdir_done;
+ res = -ENOENT;
+ if (!(inode = iget(dir->i_sb,ino))) goto rmdir_done;
+ res = -ENOTDIR;
+ if (!S_ISDIR(inode->i_mode)) goto rmdir_done;
+ res = -EBUSY;
+ if (dir->i_dev != inode->i_dev || dir == inode) goto rmdir_done;
+ res = msdos_empty(inode);
+ if (res)
+ goto rmdir_done;
+ inode->i_nlink = 0;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_nlink--;
+ inode->i_dirt = dir->i_dirt = 1;
+ de->name[0] = DELETED_FLAG;
+ mark_buffer_dirty(bh, 1);
+ res = 0;
+rmdir_done:
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return res;
+}
+
+
+static int msdos_unlinkx(
+ struct inode *dir,
+ const char *name,
+ int len,
+ int nospc) /* Flag special file ? */
+{
+ int res,ino;
+ struct buffer_head *bh;
+ struct msdos_dir_entry *de;
+ struct inode *inode;
+
+ bh = NULL;
+ inode = NULL;
+ if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0)
+ goto unlink_done;
+ if (!(inode = iget(dir->i_sb,ino))) {
+ res = -ENOENT;
+ goto unlink_done;
+ }
+ if (!S_ISREG(inode->i_mode) && nospc){
+ res = -EPERM;
+ goto unlink_done;
+ }
+ inode->i_nlink = 0;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ MSDOS_I(inode)->i_busy = 1;
+ inode->i_dirt = dir->i_dirt = 1;
+ de->name[0] = DELETED_FLAG;
+ mark_buffer_dirty(bh, 1);
+unlink_done:
+ brelse(bh);
+ iput(inode);
+ iput(dir);
+ return res;
+}
+
+int msdos_unlink(struct inode *dir,const char *name,int len)
+{
+ return msdos_unlinkx (dir,name,len,1);
+}
+/*
+ Special entry for umsdos
+*/
+int msdos_unlink_umsdos(struct inode *dir,const char *name,int len)
+{
+ return msdos_unlinkx (dir,name,len,0);
+}
+
+static int rename_same_dir(struct inode *old_dir,char *old_name,
+ struct inode *new_dir,char *new_name,struct buffer_head *old_bh,
+ struct msdos_dir_entry *old_de,int old_ino)
+{
+ struct buffer_head *new_bh;
+ struct msdos_dir_entry *new_de;
+ struct inode *new_inode,*old_inode;
+ int new_ino,exists,error;
+
+ if (!strncmp(old_name,new_name,MSDOS_NAME)) return 0;
+ exists = msdos_scan(new_dir,new_name,&new_bh,&new_de,&new_ino) >= 0;
+ if (*(unsigned char *) old_de->name == DELETED_FLAG) {
+ if (exists) brelse(new_bh);
+ return -ENOENT;
+ }
+ if (exists) {
+ if (!(new_inode = iget(new_dir->i_sb,new_ino))) {
+ brelse(new_bh);
+ return -EIO;
+ }
+ error = S_ISDIR(new_inode->i_mode) ? (old_de->attr & ATTR_DIR) ?
+ msdos_empty(new_inode) : -EPERM : (old_de->attr & ATTR_DIR)
+ ? -EPERM : 0;
+ if (error) {
+ iput(new_inode);
+ brelse(new_bh);
+ return error;
+ }
+ if (S_ISDIR(new_inode->i_mode)) {
+ new_dir->i_nlink--;
+ new_dir->i_dirt = 1;
+ }
+ new_inode->i_nlink = 0;
+ MSDOS_I(new_inode)->i_busy = 1;
+ new_inode->i_dirt = 1;
+ new_de->name[0] = DELETED_FLAG;
+ mark_buffer_dirty(new_bh, 1);
+ iput(new_inode);
+ brelse(new_bh);
+ }
+ memcpy(old_de->name,new_name,MSDOS_NAME);
+ mark_buffer_dirty(old_bh, 1);
+ if (MSDOS_SB(old_dir->i_sb)->conversion == 'a') /* update binary info */
+ if ((old_inode = iget(old_dir->i_sb,old_ino)) != NULL) {
+ msdos_read_inode(old_inode);
+ iput(old_inode);
+ }
+ return 0;
+}
+
+
+static int rename_diff_dir(struct inode *old_dir,char *old_name,
+ struct inode *new_dir,char *new_name,struct buffer_head *old_bh,
+ struct msdos_dir_entry *old_de,int old_ino)
+{
+ struct buffer_head *new_bh,*free_bh,*dotdot_bh;
+ struct msdos_dir_entry *new_de,*free_de,*dotdot_de;
+ struct inode *old_inode,*new_inode,*free_inode,*dotdot_inode,*walk;
+ int new_ino,free_ino,dotdot_ino;
+ int error,exists,ino;
+
+ if (old_dir->i_dev != new_dir->i_dev) return -EINVAL;
+ if (old_ino == new_dir->i_ino) return -EINVAL;
+ if (!(walk = iget(new_dir->i_sb,new_dir->i_ino))) return -EIO;
+ while (walk->i_ino != MSDOS_ROOT_INO) {
+ ino = msdos_parent_ino(walk,1);
+ iput(walk);
+ if (ino < 0) return ino;
+ if (ino == old_ino) return -EINVAL;
+ if (!(walk = iget(new_dir->i_sb,ino))) return -EIO;
+ }
+ iput(walk);
+ while ((error = msdos_scan(new_dir,NULL,&free_bh,&free_de,&free_ino)) <
+ 0) {
+ if (error != -ENOENT) return error;
+ error = msdos_add_cluster(new_dir);
+ if (error) return error;
+ }
+ exists = msdos_scan(new_dir,new_name,&new_bh,&new_de,&new_ino) >= 0;
+ if (!(old_inode = iget(old_dir->i_sb,old_ino))) {
+ brelse(free_bh);
+ if (exists) brelse(new_bh);
+ return -EIO;
+ }
+ if (*(unsigned char *) old_de->name == DELETED_FLAG) {
+ iput(old_inode);
+ brelse(free_bh);
+ if (exists) brelse(new_bh);
+ return -ENOENT;
+ }
+ new_inode = NULL; /* to make GCC happy */
+ if (exists) {
+ if (!(new_inode = iget(new_dir->i_sb,new_ino))) {
+ iput(old_inode);
+ brelse(new_bh);
+ return -EIO;
+ }
+ error = S_ISDIR(new_inode->i_mode) ? (old_de->attr & ATTR_DIR) ?
+ msdos_empty(new_inode) : -EPERM : (old_de->attr & ATTR_DIR)
+ ? -EPERM : 0;
+ if (error) {
+ iput(new_inode);
+ iput(old_inode);
+ brelse(new_bh);
+ return error;
+ }
+ new_inode->i_nlink = 0;
+ MSDOS_I(new_inode)->i_busy = 1;
+ new_inode->i_dirt = 1;
+ new_de->name[0] = DELETED_FLAG;
+ mark_buffer_dirty(new_bh, 1);
+ }
+ memcpy(free_de,old_de,sizeof(struct msdos_dir_entry));
+ memcpy(free_de->name,new_name,MSDOS_NAME);
+ if (!(free_inode = iget(new_dir->i_sb,free_ino))) {
+ free_de->name[0] = DELETED_FLAG;
+/* Don't mark free_bh as dirty. Both states are supposed to be equivalent. */
+ brelse(free_bh);
+ if (exists) {
+ iput(new_inode);
+ brelse(new_bh);
+ }
+ return -EIO;
+ }
+ if (exists && S_ISDIR(new_inode->i_mode)) {
+ new_dir->i_nlink--;
+ new_dir->i_dirt = 1;
+ }
+ msdos_read_inode(free_inode);
+ MSDOS_I(old_inode)->i_busy = 1;
+ cache_inval_inode(old_inode);
+ old_inode->i_dirt = 1;
+ old_de->name[0] = DELETED_FLAG;
+ mark_buffer_dirty(old_bh, 1);
+ mark_buffer_dirty(free_bh, 1);
+ if (!exists) iput(free_inode);
+ else {
+ MSDOS_I(new_inode)->i_depend = free_inode;
+ MSDOS_I(free_inode)->i_old = new_inode;
+ /* free_inode is put when putting new_inode */
+ iput(new_inode);
+ brelse(new_bh);
+ }
+ if (S_ISDIR(old_inode->i_mode)) {
+ if ((error = msdos_scan(old_inode,MSDOS_DOTDOT,&dotdot_bh,
+ &dotdot_de,&dotdot_ino)) < 0) goto rename_done;
+ if (!(dotdot_inode = iget(old_inode->i_sb,dotdot_ino))) {
+ brelse(dotdot_bh);
+ error = -EIO;
+ goto rename_done;
+ }
+ dotdot_de->start = MSDOS_I(dotdot_inode)->i_start =
+ MSDOS_I(new_dir)->i_start;
+ dotdot_inode->i_dirt = 1;
+ mark_buffer_dirty(dotdot_bh, 1);
+ old_dir->i_nlink--;
+ new_dir->i_nlink++;
+ /* no need to mark them dirty */
+ dotdot_inode->i_nlink = new_dir->i_nlink;
+ iput(dotdot_inode);
+ brelse(dotdot_bh);
+ }
+ error = 0;
+rename_done:
+ brelse(free_bh);
+ iput(old_inode);
+ return error;
+}
+
+
+int msdos_rename(struct inode *old_dir,const char *old_name,int old_len,
+ struct inode *new_dir,const char *new_name,int new_len)
+{
+ char old_msdos_name[MSDOS_NAME],new_msdos_name[MSDOS_NAME];
+ struct buffer_head *old_bh;
+ struct msdos_dir_entry *old_de;
+ int old_ino,error;
+
+ if ((error = msdos_format_name(MSDOS_SB(old_dir->i_sb)->name_check,
+ old_name,old_len,old_msdos_name,1)) < 0) goto rename_done;
+ if ((error = msdos_format_name(MSDOS_SB(new_dir->i_sb)->name_check,
+ new_name,new_len,new_msdos_name,0)) < 0) goto rename_done;
+ if ((error = msdos_scan(old_dir,old_msdos_name,&old_bh,&old_de,
+ &old_ino)) < 0) goto rename_done;
+ lock_creation();
+ if (old_dir == new_dir)
+ error = rename_same_dir(old_dir,old_msdos_name,new_dir,
+ new_msdos_name,old_bh,old_de,old_ino);
+ else error = rename_diff_dir(old_dir,old_msdos_name,new_dir,
+ new_msdos_name,old_bh,old_de,old_ino);
+ unlock_creation();
+ brelse(old_bh);
+rename_done:
+ iput(old_dir);
+ iput(new_dir);
+ return error;
+}
diff --git a/fs/namei.c b/fs/namei.c
new file mode 100644
index 000000000..f5f8b5c14
--- /dev/null
+++ b/fs/namei.c
@@ -0,0 +1,849 @@
+/*
+ * linux/fs/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * Some corrections by tytso.
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+
+/*
+ * How long a filename can we get from user space?
+ * -EFAULT if invalid area
+ * 0 if ok (ENAMETOOLONG before EFAULT)
+ * >0 EFAULT after xx bytes
+ */
+static inline int get_max_filename(unsigned long address)
+{
+ struct vm_area_struct * vma;
+
+ if (get_fs() == KERNEL_DS)
+ return 0;
+ for (vma = current->mm->mmap ; ; vma = vma->vm_next) {
+ if (!vma)
+ return -EFAULT;
+ if (vma->vm_end > address)
+ break;
+ }
+ if (vma->vm_start > address || !(vma->vm_page_prot & PAGE_USER))
+ return -EFAULT;
+ address = vma->vm_end - address;
+ if (address > PAGE_SIZE)
+ return 0;
+ if (vma->vm_next && vma->vm_next->vm_start == vma->vm_end &&
+ (vma->vm_next->vm_page_prot & PAGE_USER))
+ return 0;
+ return address;
+}
+
+/*
+ * In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+ *
+ * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
+ */
+int getname(const char * filename, char **result)
+{
+ int i, error;
+ unsigned long page;
+ char * tmp, c;
+
+ i = get_max_filename((unsigned long) filename);
+ if (i < 0)
+ return i;
+ error = -EFAULT;
+ if (!i) {
+ error = -ENAMETOOLONG;
+ i = PAGE_SIZE;
+ }
+ c = get_fs_byte(filename++);
+ if (!c)
+ return -ENOENT;
+ if(!(page = __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ *result = tmp = (char *) page;
+ while (--i) {
+ *(tmp++) = c;
+ c = get_fs_byte(filename++);
+ if (!c) {
+ *tmp = '\0';
+ return 0;
+ }
+ }
+ free_page(page);
+ return error;
+}
+
+void putname(char * name)
+{
+ free_page((unsigned long) name);
+}
+
+/*
+ * permission()
+ *
+ * is used to check for read/write/execute permissions on a file.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things..
+ */
+int permission(struct inode * inode,int mask)
+{
+ int mode = inode->i_mode;
+
+ if (inode->i_op && inode->i_op->permission)
+ return inode->i_op->permission(inode, mask);
+ else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
+ return 0; /* Nobody gets write access to an immutable file */
+ else if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+ if (((mode & mask & 0007) == mask) || fsuser())
+ return 1;
+ return 0;
+}
+
+/*
+ * get_write_access() gets write permission for a file.
+ * put_write_access() releases this write permission.
+ * This is used for regular files.
+ * We cannot support write (and maybe mmap read-write shared) accesses and
+ * MAP_DENYWRITE mmappings simultaneously.
+ */
+int get_write_access(struct inode * inode)
+{
+ struct task_struct ** p;
+
+ if ((inode->i_count > 1) && S_ISREG(inode->i_mode)) /* shortcut */
+ for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
+ struct vm_area_struct * mpnt;
+ if (!*p)
+ continue;
+ for(mpnt = (*p)->mm->mmap; mpnt; mpnt = mpnt->vm_next) {
+ if (inode != mpnt->vm_inode)
+ continue;
+ if (mpnt->vm_flags & VM_DENYWRITE)
+ return -ETXTBSY;
+ }
+ }
+ inode->i_wcount++;
+ return 0;
+}
+
+void put_write_access(struct inode * inode)
+{
+ inode->i_wcount--;
+}
+
+/*
+ * lookup() looks up one part of a pathname, using the fs-dependent
+ * routines (currently minix_lookup) for it. It also checks for
+ * fathers (pseudo-roots, mount-points)
+ */
+int lookup(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ struct super_block * sb;
+ int perm;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+/* check permissions before traversing mount-points */
+ perm = permission(dir,MAY_EXEC);
+ if (len==2 && name[0] == '.' && name[1] == '.') {
+ if (dir == current->fs->root) {
+ *result = dir;
+ return 0;
+ } else if ((sb = dir->i_sb) && (dir == sb->s_mounted)) {
+ sb = dir->i_sb;
+ iput(dir);
+ dir = sb->s_covered;
+ if (!dir)
+ return -ENOENT;
+ dir->i_count++;
+ }
+ }
+ if (!dir->i_op || !dir->i_op->lookup) {
+ iput(dir);
+ return -ENOTDIR;
+ }
+ if (!perm) {
+ iput(dir);
+ return -EACCES;
+ }
+ if (!len) {
+ *result = dir;
+ return 0;
+ }
+ return dir->i_op->lookup(dir,name,len,result);
+}
+
+int follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ if (!dir || !inode) {
+ iput(dir);
+ iput(inode);
+ *res_inode = NULL;
+ return -ENOENT;
+ }
+ if (!inode->i_op || !inode->i_op->follow_link) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ return inode->i_op->follow_link(dir,inode,flag,mode,res_inode);
+}
+
+/*
+ * dir_namei()
+ *
+ * dir_namei() returns the inode of the directory of the
+ * specified name, and the name within that directory.
+ */
+static int dir_namei(const char * pathname, int * namelen, const char ** name,
+ struct inode * base, struct inode ** res_inode)
+{
+ char c;
+ const char * thisname;
+ int len,error;
+ struct inode * inode;
+
+ *res_inode = NULL;
+ if (!base) {
+ base = current->fs->pwd;
+ base->i_count++;
+ }
+ if ((c = *pathname) == '/') {
+ iput(base);
+ base = current->fs->root;
+ pathname++;
+ base->i_count++;
+ }
+ while (1) {
+ thisname = pathname;
+ for(len=0;(c = *(pathname++))&&(c != '/');len++)
+ /* nothing */ ;
+ if (!c)
+ break;
+ base->i_count++;
+ error = lookup(base,thisname,len,&inode);
+ if (error) {
+ iput(base);
+ return error;
+ }
+ error = follow_link(base,inode,0,0,&base);
+ if (error)
+ return error;
+ }
+ if (!base->i_op || !base->i_op->lookup) {
+ iput(base);
+ return -ENOTDIR;
+ }
+ *name = thisname;
+ *namelen = len;
+ *res_inode = base;
+ return 0;
+}
+
+static int _namei(const char * pathname, struct inode * base,
+ int follow_links, struct inode ** res_inode)
+{
+ const char * basename;
+ int namelen,error;
+ struct inode * inode;
+
+ *res_inode = NULL;
+ error = dir_namei(pathname,&namelen,&basename,base,&base);
+ if (error)
+ return error;
+ base->i_count++; /* lookup uses up base */
+ error = lookup(base,basename,namelen,&inode);
+ if (error) {
+ iput(base);
+ return error;
+ }
+ if (follow_links) {
+ error = follow_link(base,inode,0,0,&inode);
+ if (error)
+ return error;
+ } else
+ iput(base);
+ *res_inode = inode;
+ return 0;
+}
+
+int lnamei(const char * pathname, struct inode ** res_inode)
+{
+ int error;
+ char * tmp;
+
+ error = getname(pathname,&tmp);
+ if (!error) {
+ error = _namei(tmp,NULL,0,res_inode);
+ putname(tmp);
+ }
+ return error;
+}
+
+/*
+ * namei()
+ *
+ * is used by most simple commands to get the inode of a specified name.
+ * Open, link etc use their own routines, but this is enough for things
+ * like 'chmod' etc.
+ */
+int namei(const char * pathname, struct inode ** res_inode)
+{
+ int error;
+ char * tmp;
+
+ error = getname(pathname,&tmp);
+ if (!error) {
+ error = _namei(tmp,NULL,1,res_inode);
+ putname(tmp);
+ }
+ return error;
+}
+
+/*
+ * open_namei()
+ *
+ * namei for open - this is in fact almost the whole open-routine.
+ *
+ * Note that the low bits of "flag" aren't the same as in the open
+ * system call - they are 00 - no permissions needed
+ * 01 - read permission needed
+ * 10 - write permission needed
+ * 11 - read/write permissions needed
+ * which is a lot more logical, and also allows the "no perm" needed
+ * for symlinks (where the permissions are checked later).
+ */
+int open_namei(const char * pathname, int flag, int mode,
+ struct inode ** res_inode, struct inode * base)
+{
+ const char * basename;
+ int namelen,error;
+ struct inode * dir, *inode;
+
+ mode &= S_IALLUGO & ~current->fs->umask;
+ mode |= S_IFREG;
+ error = dir_namei(pathname,&namelen,&basename,base,&dir);
+ if (error)
+ return error;
+ if (!namelen) { /* special case: '/usr/' etc */
+ if (flag & 2) {
+ iput(dir);
+ return -EISDIR;
+ }
+ /* thanks to Paul Pluzhnikov for noticing this was missing.. */
+ if (!permission(dir,ACC_MODE(flag))) {
+ iput(dir);
+ return -EACCES;
+ }
+ *res_inode=dir;
+ return 0;
+ }
+ dir->i_count++; /* lookup eats the dir */
+ if (flag & O_CREAT) {
+ down(&dir->i_sem);
+ error = lookup(dir,basename,namelen,&inode);
+ if (!error) {
+ if (flag & O_EXCL) {
+ iput(inode);
+ error = -EEXIST;
+ }
+ } else if (!permission(dir,MAY_WRITE | MAY_EXEC))
+ error = -EACCES;
+ else if (!dir->i_op || !dir->i_op->create)
+ error = -EACCES;
+ else if (IS_RDONLY(dir))
+ error = -EROFS;
+ else {
+ dir->i_count++; /* create eats the dir */
+ error = dir->i_op->create(dir,basename,namelen,mode,res_inode);
+ up(&dir->i_sem);
+ iput(dir);
+ return error;
+ }
+ up(&dir->i_sem);
+ } else
+ error = lookup(dir,basename,namelen,&inode);
+ if (error) {
+ iput(dir);
+ return error;
+ }
+ error = follow_link(dir,inode,flag,mode,&inode);
+ if (error)
+ return error;
+ if (S_ISDIR(inode->i_mode) && (flag & 2)) {
+ iput(inode);
+ return -EISDIR;
+ }
+ if (!permission(inode,ACC_MODE(flag))) {
+ iput(inode);
+ return -EACCES;
+ }
+ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
+ if (IS_NODEV(inode)) {
+ iput(inode);
+ return -EACCES;
+ }
+ flag &= ~O_TRUNC;
+ } else {
+ if (IS_RDONLY(inode) && (flag & 2)) {
+ iput(inode);
+ return -EROFS;
+ }
+ }
+ /*
+ * An append-only file must be opened in append mode for writing
+ */
+ if (IS_APPEND(inode) && ((flag & 2) && !(flag & O_APPEND))) {
+ iput(inode);
+ return -EPERM;
+ }
+ if (flag & O_TRUNC) {
+ struct iattr newattrs;
+
+ if ((error = get_write_access(inode))) {
+ iput(inode);
+ return error;
+ }
+ newattrs.ia_size = 0;
+ newattrs.ia_valid = ATTR_SIZE;
+ if ((error = notify_change(inode, &newattrs))) {
+ put_write_access(inode);
+ iput(inode);
+ return error;
+ }
+ inode->i_size = 0;
+ if (inode->i_op && inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+ inode->i_dirt = 1;
+ put_write_access(inode);
+ }
+ *res_inode = inode;
+ return 0;
+}
+
+int do_mknod(const char * filename, int mode, dev_t dev)
+{
+ const char * basename;
+ int namelen, error;
+ struct inode * dir;
+
+ mode &= ~current->fs->umask;
+ error = dir_namei(filename,&namelen,&basename, NULL, &dir);
+ if (error)
+ return error;
+ if (!namelen) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(dir);
+ return -EROFS;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ return -EACCES;
+ }
+ if (!dir->i_op || !dir->i_op->mknod) {
+ iput(dir);
+ return -EPERM;
+ }
+ dir->i_count++;
+ down(&dir->i_sem);
+ error = dir->i_op->mknod(dir,basename,namelen,mode,dev);
+ up(&dir->i_sem);
+ iput(dir);
+ return error;
+}
+
+asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev)
+{
+ int error;
+ char * tmp;
+
+ if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !fsuser()))
+ return -EPERM;
+ switch (mode & S_IFMT) {
+ case 0:
+ mode |= S_IFREG;
+ break;
+ case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO:
+ break;
+ default:
+ return -EINVAL;
+ }
+ error = getname(filename,&tmp);
+ if (!error) {
+ error = do_mknod(tmp,mode,dev);
+ putname(tmp);
+ }
+ return error;
+}
+
+static int do_mkdir(const char * pathname, int mode)
+{
+ const char * basename;
+ int namelen, error;
+ struct inode * dir;
+
+ error = dir_namei(pathname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
+ if (!namelen) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(dir);
+ return -EROFS;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ return -EACCES;
+ }
+ if (!dir->i_op || !dir->i_op->mkdir) {
+ iput(dir);
+ return -EPERM;
+ }
+ dir->i_count++;
+ down(&dir->i_sem);
+ error = dir->i_op->mkdir(dir, basename, namelen, mode & 0777 & ~current->fs->umask);
+ up(&dir->i_sem);
+ iput(dir);
+ return error;
+}
+
+asmlinkage int sys_mkdir(const char * pathname, int mode)
+{
+ int error;
+ char * tmp;
+
+ error = getname(pathname,&tmp);
+ if (!error) {
+ error = do_mkdir(tmp,mode);
+ putname(tmp);
+ }
+ return error;
+}
+
+static int do_rmdir(const char * name)
+{
+ const char * basename;
+ int namelen, error;
+ struct inode * dir;
+
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
+ if (!namelen) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(dir);
+ return -EROFS;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ return -EACCES;
+ }
+ /*
+ * A subdirectory cannot be removed from an append-only directory
+ */
+ if (IS_APPEND(dir)) {
+ iput(dir);
+ return -EPERM;
+ }
+ if (!dir->i_op || !dir->i_op->rmdir) {
+ iput(dir);
+ return -EPERM;
+ }
+ return dir->i_op->rmdir(dir,basename,namelen);
+}
+
+asmlinkage int sys_rmdir(const char * pathname)
+{
+ int error;
+ char * tmp;
+
+ error = getname(pathname,&tmp);
+ if (!error) {
+ error = do_rmdir(tmp);
+ putname(tmp);
+ }
+ return error;
+}
+
+static int do_unlink(const char * name)
+{
+ const char * basename;
+ int namelen, error;
+ struct inode * dir;
+
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
+ if (!namelen) {
+ iput(dir);
+ return -EPERM;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(dir);
+ return -EROFS;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ return -EACCES;
+ }
+ /*
+ * A file cannot be removed from an append-only directory
+ */
+ if (IS_APPEND(dir)) {
+ iput(dir);
+ return -EPERM;
+ }
+ if (!dir->i_op || !dir->i_op->unlink) {
+ iput(dir);
+ return -EPERM;
+ }
+ return dir->i_op->unlink(dir,basename,namelen);
+}
+
+asmlinkage int sys_unlink(const char * pathname)
+{
+ int error;
+ char * tmp;
+
+ error = getname(pathname,&tmp);
+ if (!error) {
+ error = do_unlink(tmp);
+ putname(tmp);
+ }
+ return error;
+}
+
+static int do_symlink(const char * oldname, const char * newname)
+{
+ struct inode * dir;
+ const char * basename;
+ int namelen, error;
+
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
+ if (!namelen) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(dir);
+ return -EROFS;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ return -EACCES;
+ }
+ if (!dir->i_op || !dir->i_op->symlink) {
+ iput(dir);
+ return -EPERM;
+ }
+ dir->i_count++;
+ down(&dir->i_sem);
+ error = dir->i_op->symlink(dir,basename,namelen,oldname);
+ up(&dir->i_sem);
+ iput(dir);
+ return error;
+}
+
+asmlinkage int sys_symlink(const char * oldname, const char * newname)
+{
+ int error;
+ char * from, * to;
+
+ error = getname(oldname,&from);
+ if (!error) {
+ error = getname(newname,&to);
+ if (!error) {
+ error = do_symlink(from,to);
+ putname(to);
+ }
+ putname(from);
+ }
+ return error;
+}
+
+static int do_link(struct inode * oldinode, const char * newname)
+{
+ struct inode * dir;
+ const char * basename;
+ int namelen, error;
+
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error) {
+ iput(oldinode);
+ return error;
+ }
+ if (!namelen) {
+ iput(oldinode);
+ iput(dir);
+ return -EPERM;
+ }
+ if (IS_RDONLY(dir)) {
+ iput(oldinode);
+ iput(dir);
+ return -EROFS;
+ }
+ if (dir->i_dev != oldinode->i_dev) {
+ iput(dir);
+ iput(oldinode);
+ return -EXDEV;
+ }
+ if (!permission(dir,MAY_WRITE | MAY_EXEC)) {
+ iput(dir);
+ iput(oldinode);
+ return -EACCES;
+ }
+ /*
+ * A link to an append-only or immutable file cannot be created
+ */
+ if (IS_APPEND(oldinode) || IS_IMMUTABLE(oldinode)) {
+ iput(dir);
+ iput(oldinode);
+ return -EPERM;
+ }
+ if (!dir->i_op || !dir->i_op->link) {
+ iput(dir);
+ iput(oldinode);
+ return -EPERM;
+ }
+ dir->i_count++;
+ down(&dir->i_sem);
+ error = dir->i_op->link(oldinode, dir, basename, namelen);
+ up(&dir->i_sem);
+ iput(dir);
+ return error;
+}
+
+asmlinkage int sys_link(const char * oldname, const char * newname)
+{
+ int error;
+ char * to;
+ struct inode * oldinode;
+
+ error = namei(oldname, &oldinode);
+ if (error)
+ return error;
+ error = getname(newname,&to);
+ if (error) {
+ iput(oldinode);
+ return error;
+ }
+ error = do_link(oldinode,to);
+ putname(to);
+ return error;
+}
+
+static int do_rename(const char * oldname, const char * newname)
+{
+ struct inode * old_dir, * new_dir;
+ const char * old_base, * new_base;
+ int old_len, new_len, error;
+
+ error = dir_namei(oldname,&old_len,&old_base,NULL,&old_dir);
+ if (error)
+ return error;
+ if (!permission(old_dir,MAY_WRITE | MAY_EXEC)) {
+ iput(old_dir);
+ return -EACCES;
+ }
+ if (!old_len || (old_base[0] == '.' &&
+ (old_len == 1 || (old_base[1] == '.' &&
+ old_len == 2)))) {
+ iput(old_dir);
+ return -EPERM;
+ }
+ error = dir_namei(newname,&new_len,&new_base,NULL,&new_dir);
+ if (error) {
+ iput(old_dir);
+ return error;
+ }
+ if (!permission(new_dir,MAY_WRITE | MAY_EXEC)) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EACCES;
+ }
+ if (!new_len || (new_base[0] == '.' &&
+ (new_len == 1 || (new_base[1] == '.' &&
+ new_len == 2)))) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EPERM;
+ }
+ if (new_dir->i_dev != old_dir->i_dev) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EXDEV;
+ }
+ if (IS_RDONLY(new_dir) || IS_RDONLY(old_dir)) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EROFS;
+ }
+ /*
+ * A file cannot be removed from an append-only directory
+ */
+ if (IS_APPEND(old_dir)) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EPERM;
+ }
+ if (!old_dir->i_op || !old_dir->i_op->rename) {
+ iput(old_dir);
+ iput(new_dir);
+ return -EPERM;
+ }
+ new_dir->i_count++;
+ down(&new_dir->i_sem);
+ error = old_dir->i_op->rename(old_dir, old_base, old_len,
+ new_dir, new_base, new_len);
+ up(&new_dir->i_sem);
+ iput(new_dir);
+ return error;
+}
+
+asmlinkage int sys_rename(const char * oldname, const char * newname)
+{
+ int error;
+ char * from, * to;
+
+ error = getname(oldname,&from);
+ if (!error) {
+ error = getname(newname,&to);
+ if (!error) {
+ error = do_rename(from,to);
+ putname(to);
+ }
+ putname(from);
+ }
+ return error;
+}
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
new file mode 100644
index 000000000..8610c95b1
--- /dev/null
+++ b/fs/nfs/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux nfs-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= proc.o sock.o inode.o file.o dir.o \
+ symlink.o mmap.o
+
+nfs.o: $(OBJS)
+ $(LD) -r -o nfs.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
new file mode 100644
index 000000000..62a3e0821
--- /dev/null
+++ b/fs/nfs/dir.c
@@ -0,0 +1,609 @@
+/*
+ * linux/fs/nfs/dir.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * nfs directory handling functions
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/nfs_fs.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/malloc.h>
+#include <linux/mm.h>
+
+#include <asm/segment.h> /* for fs functions */
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int nfs_dir_read(struct inode *, struct file *filp, char *buf,
+ int count);
+static int nfs_readdir(struct inode *, struct file *, struct dirent *, int);
+static int nfs_lookup(struct inode *dir, const char *name, int len,
+ struct inode **result);
+static int nfs_create(struct inode *dir, const char *name, int len, int mode,
+ struct inode **result);
+static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode);
+static int nfs_rmdir(struct inode *dir, const char *name, int len);
+static int nfs_unlink(struct inode *dir, const char *name, int len);
+static int nfs_symlink(struct inode *inode, const char *name, int len,
+ const char *symname);
+static int nfs_link(struct inode *oldinode, struct inode *dir,
+ const char *name, int len);
+static int nfs_mknod(struct inode *dir, const char *name, int len, int mode,
+ int rdev);
+static int nfs_rename(struct inode *old_dir, const char *old_name,
+ int old_len, struct inode *new_dir, const char *new_name,
+ int new_len);
+
+static struct file_operations nfs_dir_operations = {
+ NULL, /* lseek - default */
+ nfs_dir_read, /* read - bad */
+ NULL, /* write - bad */
+ nfs_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* fsync */
+};
+
+struct inode_operations nfs_dir_inode_operations = {
+ &nfs_dir_operations, /* default directory file-ops */
+ nfs_create, /* create */
+ nfs_lookup, /* lookup */
+ nfs_link, /* link */
+ nfs_unlink, /* unlink */
+ nfs_symlink, /* symlink */
+ nfs_mkdir, /* mkdir */
+ nfs_rmdir, /* rmdir */
+ nfs_mknod, /* mknod */
+ nfs_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int nfs_dir_read(struct inode *inode, struct file *filp, char *buf,
+ int count)
+{
+ return -EISDIR;
+}
+
+/*
+ * We need to do caching of directory entries to prevent an
+ * incredible amount of RPC traffic. Only the most recent open
+ * directory is cached. This seems sufficient for most purposes.
+ * Technically, we ought to flush the cache on close but this is
+ * not a problem in practice.
+ */
+
+static int nfs_readdir(struct inode *inode, struct file *filp,
+ struct dirent *dirent, int count)
+{
+ static int c_dev = 0;
+ static int c_ino;
+ static int c_size;
+ static struct nfs_entry *c_entry = NULL;
+
+ int result;
+ int i;
+ struct nfs_entry *entry;
+
+ if (!inode || !S_ISDIR(inode->i_mode)) {
+ printk("nfs_readdir: inode is NULL or not a directory\n");
+ return -EBADF;
+ }
+
+ /* initialize cache memory if it hasn't been used before */
+
+ if (c_entry == NULL) {
+ i = sizeof (struct nfs_entry)*NFS_READDIR_CACHE_SIZE;
+ c_entry = (struct nfs_entry *) kmalloc(i, GFP_KERNEL);
+ for (i = 0; i < NFS_READDIR_CACHE_SIZE; i++) {
+ c_entry[i].name = (char *) kmalloc(NFS_MAXNAMLEN + 1,
+ GFP_KERNEL);
+ }
+ }
+ entry = NULL;
+
+ /* try to find it in the cache */
+
+ if (inode->i_dev == c_dev && inode->i_ino == c_ino) {
+ for (i = 0; i < c_size; i++) {
+ if (filp->f_pos == c_entry[i].cookie) {
+ if (i == c_size - 1) {
+ if (c_entry[i].eof)
+ return 0;
+ }
+ else
+ entry = c_entry + i + 1;
+ break;
+ }
+ }
+ }
+
+ /* if we didn't find it in the cache, revert to an nfs call */
+
+ if (!entry) {
+ result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode),
+ filp->f_pos, NFS_READDIR_CACHE_SIZE, c_entry);
+ if (result < 0) {
+ c_dev = 0;
+ return result;
+ }
+ if (result > 0) {
+ c_dev = inode->i_dev;
+ c_ino = inode->i_ino;
+ c_size = result;
+ entry = c_entry + 0;
+ }
+ }
+
+ /* if we found it in the cache or from an nfs call, return results */
+
+ if (entry) {
+ i = strlen(entry->name);
+ memcpy_tofs(dirent->d_name, entry->name, i + 1);
+ put_fs_long(entry->fileid, &dirent->d_ino);
+ put_fs_word(i, &dirent->d_reclen);
+ filp->f_pos = entry->cookie;
+ return ROUND_UP(NAME_OFFSET(dirent)+i+1);
+ }
+ return 0;
+}
+
+/*
+ * Lookup caching is a big win for performance but this is just
+ * a trial to see how well it works on a small scale.
+ * For example, bash does a lookup on ".." 13 times for each path
+ * element when running pwd. Yes, hard to believe but true.
+ * Try pwd in a filesystem mounted with noac.
+ *
+ * It trades a little cpu time and memory for a lot of network bandwidth.
+ * Since the cache is not hashed yet, it is a good idea not to make it too
+ * large because every lookup looks through the entire cache even
+ * though most of them will fail.
+ */
+
+static struct nfs_lookup_cache_entry {
+ int dev;
+ int inode;
+ char filename[NFS_MAXNAMLEN + 1];
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ int expiration_date;
+} nfs_lookup_cache[NFS_LOOKUP_CACHE_SIZE];
+
+static struct nfs_lookup_cache_entry *nfs_lookup_cache_index(struct inode *dir,
+ const char *filename)
+{
+ struct nfs_lookup_cache_entry *entry;
+ int i;
+
+ for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) {
+ entry = nfs_lookup_cache + i;
+ if (entry->dev == dir->i_dev && entry->inode == dir->i_ino
+ && !strncmp(filename, entry->filename, NFS_MAXNAMLEN))
+ return entry;
+ }
+ return NULL;
+}
+
+static int nfs_lookup_cache_lookup(struct inode *dir, const char *filename,
+ struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ static int nfs_lookup_cache_in_use = 0;
+
+ struct nfs_lookup_cache_entry *entry;
+
+ if (!nfs_lookup_cache_in_use) {
+ memset(nfs_lookup_cache, 0, sizeof(nfs_lookup_cache));
+ nfs_lookup_cache_in_use = 1;
+ }
+ if ((entry = nfs_lookup_cache_index(dir, filename))) {
+ if (jiffies > entry->expiration_date) {
+ entry->dev = 0;
+ return 0;
+ }
+ *fhandle = entry->fhandle;
+ *fattr = entry->fattr;
+ return 1;
+ }
+ return 0;
+}
+
+static void nfs_lookup_cache_add(struct inode *dir, const char *filename,
+ struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ static int nfs_lookup_cache_pos = 0;
+ struct nfs_lookup_cache_entry *entry;
+
+ /* compensate for bug in SGI NFS server */
+ if (fattr->size == -1 || fattr->uid == -1 || fattr->gid == -1
+ || fattr->atime.seconds == -1 || fattr->mtime.seconds == -1)
+ return;
+ if (!(entry = nfs_lookup_cache_index(dir, filename))) {
+ entry = nfs_lookup_cache + nfs_lookup_cache_pos++;
+ if (nfs_lookup_cache_pos == NFS_LOOKUP_CACHE_SIZE)
+ nfs_lookup_cache_pos = 0;
+ }
+ entry->dev = dir->i_dev;
+ entry->inode = dir->i_ino;
+ strcpy(entry->filename, filename);
+ entry->fhandle = *fhandle;
+ entry->fattr = *fattr;
+ entry->expiration_date = jiffies + (S_ISDIR(fattr->mode)
+ ? NFS_SERVER(dir)->acdirmax : NFS_SERVER(dir)->acregmax);
+}
+
+static void nfs_lookup_cache_remove(struct inode *dir, struct inode *inode,
+ const char *filename)
+{
+ struct nfs_lookup_cache_entry *entry;
+ int dev;
+ int fileid;
+ int i;
+
+ if (inode) {
+ dev = inode->i_dev;
+ fileid = inode->i_ino;
+ }
+ else if ((entry = nfs_lookup_cache_index(dir, filename))) {
+ dev = entry->dev;
+ fileid = entry->fattr.fileid;
+ }
+ else
+ return;
+ for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) {
+ entry = nfs_lookup_cache + i;
+ if (entry->dev == dev && entry->fattr.fileid == fileid)
+ entry->dev = 0;
+ }
+}
+
+static void nfs_lookup_cache_refresh(struct inode *file,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_lookup_cache_entry *entry;
+ int dev = file->i_dev;
+ int fileid = file->i_ino;
+ int i;
+
+ for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) {
+ entry = nfs_lookup_cache + i;
+ if (entry->dev == dev && entry->fattr.fileid == fileid)
+ entry->fattr = *fattr;
+ }
+}
+
+static int nfs_lookup(struct inode *dir, const char *__name, int len,
+ struct inode **result)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ char name[len > NFS_MAXNAMLEN? 1 : len+1];
+ int error;
+
+ *result = NULL;
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_lookup: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ memcpy(name,__name,len);
+ name[len] = '\0';
+ if (len == 1 && name[0] == '.') { /* cheat for "." */
+ *result = dir;
+ return 0;
+ }
+ if ((NFS_SERVER(dir)->flags & NFS_MOUNT_NOAC)
+ || !nfs_lookup_cache_lookup(dir, name, &fhandle, &fattr)) {
+ if ((error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir),
+ name, &fhandle, &fattr))) {
+ iput(dir);
+ return error;
+ }
+ nfs_lookup_cache_add(dir, name, &fhandle, &fattr);
+ }
+ if (!(*result = nfs_fhget(dir->i_sb, &fhandle, &fattr))) {
+ iput(dir);
+ return -EACCES;
+ }
+ iput(dir);
+ return 0;
+}
+
+static int nfs_create(struct inode *dir, const char *name, int len, int mode,
+ struct inode **result)
+{
+ struct nfs_sattr sattr;
+ struct nfs_fattr fattr;
+ struct nfs_fh fhandle;
+ int error;
+
+ *result = NULL;
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_create: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ sattr.mode = mode;
+ sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
+ sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ if ((error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+ name, &sattr, &fhandle, &fattr))) {
+ iput(dir);
+ return error;
+ }
+ if (!(*result = nfs_fhget(dir->i_sb, &fhandle, &fattr))) {
+ iput(dir);
+ return -EACCES;
+ }
+ nfs_lookup_cache_add(dir, name, &fhandle, &fattr);
+ iput(dir);
+ return 0;
+}
+
+static int nfs_mknod(struct inode *dir, const char *name, int len,
+ int mode, int rdev)
+{
+ struct nfs_sattr sattr;
+ struct nfs_fattr fattr;
+ struct nfs_fh fhandle;
+ int error;
+
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_mknod: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ sattr.mode = mode;
+ sattr.uid = sattr.gid = (unsigned) -1;
+ if (S_ISCHR(mode) || S_ISBLK(mode))
+ sattr.size = rdev; /* get out your barf bag */
+ else
+ sattr.size = (unsigned) -1;
+ sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+ name, &sattr, &fhandle, &fattr);
+ if (!error)
+ nfs_lookup_cache_add(dir, name, &fhandle, &fattr);
+ iput(dir);
+ return error;
+}
+
+static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode)
+{
+ struct nfs_sattr sattr;
+ struct nfs_fattr fattr;
+ struct nfs_fh fhandle;
+ int error;
+
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_mkdir: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ sattr.mode = mode;
+ sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
+ sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
+ name, &sattr, &fhandle, &fattr);
+ if (!error)
+ nfs_lookup_cache_add(dir, name, &fhandle, &fattr);
+ iput(dir);
+ return error;
+}
+
+static int nfs_rmdir(struct inode *dir, const char *name, int len)
+{
+ int error;
+
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_rmdir: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), name);
+ if (!error)
+ nfs_lookup_cache_remove(dir, NULL, name);
+ iput(dir);
+ return error;
+}
+
+static int nfs_unlink(struct inode *dir, const char *name, int len)
+{
+ int error;
+
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_unlink: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), name);
+ if (!error)
+ nfs_lookup_cache_remove(dir, NULL, name);
+ iput(dir);
+ return error;
+}
+
+static int nfs_symlink(struct inode *dir, const char *name, int len,
+ const char *symname)
+{
+ struct nfs_sattr sattr;
+ int error;
+
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_symlink: inode is NULL or not a directory\n");
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ if (strlen(symname) > NFS_MAXPATHLEN) {
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */
+ sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
+ sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
+ name, symname, &sattr);
+ iput(dir);
+ return error;
+}
+
+static int nfs_link(struct inode *oldinode, struct inode *dir,
+ const char *name, int len)
+{
+ int error;
+
+ if (!oldinode) {
+ printk("nfs_link: old inode is NULL\n");
+ iput(oldinode);
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!dir || !S_ISDIR(dir->i_mode)) {
+ printk("nfs_link: dir is NULL or not a directory\n");
+ iput(oldinode);
+ iput(dir);
+ return -ENOENT;
+ }
+ if (len > NFS_MAXNAMLEN) {
+ iput(oldinode);
+ iput(dir);
+ return -ENAMETOOLONG;
+ }
+ error = nfs_proc_link(NFS_SERVER(oldinode), NFS_FH(oldinode),
+ NFS_FH(dir), name);
+ if (!error)
+ nfs_lookup_cache_remove(dir, oldinode, NULL);
+ iput(oldinode);
+ iput(dir);
+ return error;
+}
+
+static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len,
+ struct inode *new_dir, const char *new_name, int new_len)
+{
+ int error;
+
+ if (!old_dir || !S_ISDIR(old_dir->i_mode)) {
+ printk("nfs_rename: old inode is NULL or not a directory\n");
+ iput(old_dir);
+ iput(new_dir);
+ return -ENOENT;
+ }
+ if (!new_dir || !S_ISDIR(new_dir->i_mode)) {
+ printk("nfs_rename: new inode is NULL or not a directory\n");
+ iput(old_dir);
+ iput(new_dir);
+ return -ENOENT;
+ }
+ if (old_len > NFS_MAXNAMLEN || new_len > NFS_MAXNAMLEN) {
+ iput(old_dir);
+ iput(new_dir);
+ return -ENAMETOOLONG;
+ }
+ error = nfs_proc_rename(NFS_SERVER(old_dir),
+ NFS_FH(old_dir), old_name,
+ NFS_FH(new_dir), new_name);
+ if (!error) {
+ nfs_lookup_cache_remove(old_dir, NULL, old_name);
+ nfs_lookup_cache_remove(new_dir, NULL, new_name);
+ }
+ iput(old_dir);
+ iput(new_dir);
+ return error;
+}
+
+/*
+ * Many nfs protocol calls return the new file attributes after
+ * an operation. Here we update the inode to reflect the state
+ * of the server's inode.
+ */
+
+void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int was_empty;
+
+ if (!inode || !fattr) {
+ printk("nfs_refresh_inode: inode or fattr is NULL\n");
+ return;
+ }
+ if (inode->i_ino != fattr->fileid) {
+ printk("nfs_refresh_inode: inode number mismatch\n");
+ return;
+ }
+ was_empty = inode->i_mode == 0;
+ inode->i_mode = fattr->mode;
+ inode->i_nlink = fattr->nlink;
+ inode->i_uid = fattr->uid;
+ inode->i_gid = fattr->gid;
+ inode->i_size = fattr->size;
+ inode->i_blksize = fattr->blocksize;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ inode->i_rdev = fattr->rdev;
+ else
+ inode->i_rdev = 0;
+ inode->i_blocks = fattr->blocks;
+ inode->i_atime = fattr->atime.seconds;
+ inode->i_mtime = fattr->mtime.seconds;
+ inode->i_ctime = fattr->ctime.seconds;
+ if (was_empty) {
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &nfs_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &nfs_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &nfs_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ else
+ inode->i_op = NULL;
+ }
+ nfs_lookup_cache_refresh(inode, fattr);
+}
+
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
new file mode 100644
index 000000000..e71d29483
--- /dev/null
+++ b/fs/nfs/file.c
@@ -0,0 +1,237 @@
+/*
+ * linux/fs/nfs/file.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * Changes Copyright (C) 1994 by Florian La Roche
+ * - Do not copy data too often around in the kernel.
+ * - In nfs_file_read the return value of kmalloc wasn't checked.
+ * - Put in a better version of read look-ahead buffering. Original idea
+ * and implementation by Wai S Kok elekokws@ee.nus.sg.
+ *
+ * Expire cache on write to a file by Wai S Kok (Oct 1994).
+ *
+ * nfs regular file handling functions
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/nfs_fs.h>
+#include <linux/malloc.h>
+
+static int nfs_file_read(struct inode *, struct file *, char *, int);
+static int nfs_file_write(struct inode *, struct file *, char *, int);
+static int nfs_fsync(struct inode *, struct file *);
+
+static struct file_operations nfs_file_operations = {
+ NULL, /* lseek - default */
+ nfs_file_read, /* read */
+ nfs_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ nfs_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ nfs_fsync, /* fsync */
+};
+
+struct inode_operations nfs_file_inode_operations = {
+ &nfs_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL /* truncate */
+};
+
+/* Once data is inserted, it can only be deleted, if (in_use==0). */
+struct read_cache {
+ int in_use; /* currently in use? */
+ unsigned long inode_num; /* inode number */
+ off_t file_pos; /* file position */
+ int len; /* size of data */
+ unsigned long time; /* time, this entry was inserted */
+ char * buf; /* data */
+ int buf_size; /* size of buffer */
+};
+
+#define READ_CACHE_SIZE 5
+#define EXPIRE_CACHE (HZ * 3) /* keep no longer than 3 seconds */
+
+unsigned long num_requests = 0;
+unsigned long num_cache_hits = 0;
+
+static int tail = 0; /* next cache slot to replace */
+
+static struct read_cache cache[READ_CACHE_SIZE] = {
+ { 0, 0, -1, 0, 0, NULL, 0 },
+ { 0, 0, -1, 0, 0, NULL, 0 },
+ { 0, 0, -1, 0, 0, NULL, 0 },
+ { 0, 0, -1, 0, 0, NULL, 0 },
+ { 0, 0, -1, 0, 0, NULL, 0 } };
+
+static int nfs_fsync(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static int nfs_file_read(struct inode *inode, struct file *file, char *buf,
+ int count)
+{
+ int result, hunk, i, n, fs;
+ struct nfs_fattr fattr;
+ char *data;
+ off_t pos;
+
+ if (!inode) {
+ printk("nfs_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("nfs_file_read: read from non-file, mode %07o\n",
+ inode->i_mode);
+ return -EINVAL;
+ }
+ pos = file->f_pos;
+ if (pos + count > inode->i_size)
+ count = inode->i_size - pos;
+ if (count <= 0)
+ return 0;
+ ++num_requests;
+ cli();
+ for (i = 0; i < READ_CACHE_SIZE; i++)
+ if ((cache[i].inode_num == inode->i_ino)
+ && (cache[i].file_pos <= pos)
+ && (cache[i].file_pos + cache[i].len >= pos + count)
+ && (abs(jiffies - cache[i].time) <= EXPIRE_CACHE))
+ break;
+ if (i < READ_CACHE_SIZE) {
+ ++cache[i].in_use;
+ sti();
+ ++num_cache_hits;
+ memcpy_tofs(buf, cache[i].buf + pos - cache[i].file_pos, count);
+ --cache[i].in_use;
+ file->f_pos += count;
+ return count;
+ }
+ sti();
+ n = NFS_SERVER(inode)->rsize;
+ for (i = 0; i < count - n; i += n) {
+ result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode),
+ pos, n, buf, &fattr, 1);
+ if (result < 0)
+ return result;
+ pos += result;
+ buf += result;
+ if (result < n) {
+ file->f_pos = pos;
+ nfs_refresh_inode(inode, &fattr);
+ return i + result;
+ }
+ }
+ fs = 0;
+ if (!(data = (char *)kmalloc(n, GFP_KERNEL))) {
+ data = buf;
+ fs = 1;
+ }
+ result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode),
+ pos, n, data, &fattr, fs);
+ if (result < 0) {
+ if (!fs)
+ kfree_s(data, n);
+ return result;
+ }
+ hunk = count - i;
+ if (result < hunk)
+ hunk = result;
+ if (fs) {
+ file->f_pos = pos + hunk;
+ nfs_refresh_inode(inode, &fattr);
+ return i + hunk;
+ }
+ memcpy_tofs(buf, data, hunk);
+ file->f_pos = pos + hunk;
+ nfs_refresh_inode(inode, &fattr);
+ cli();
+ if (cache[tail].in_use == 0) {
+ if (cache[tail].buf)
+ kfree_s(cache[tail].buf, cache[tail].buf_size);
+ cache[tail].buf = data;
+ cache[tail].buf_size = n;
+ cache[tail].inode_num = inode->i_ino;
+ cache[tail].file_pos = pos;
+ cache[tail].len = result;
+ cache[tail].time = jiffies;
+ if (++tail >= READ_CACHE_SIZE)
+ tail = 0;
+ } else
+ kfree_s(data, n);
+ sti();
+ return i + hunk;
+}
+
+static int nfs_file_write(struct inode *inode, struct file *file, char *buf,
+ int count)
+{
+ int result, hunk, i, n, pos;
+ struct nfs_fattr fattr;
+
+ if (!inode) {
+ printk("nfs_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("nfs_file_write: write to non-file, mode %07o\n",
+ inode->i_mode);
+ return -EINVAL;
+ }
+ if (count <= 0)
+ return 0;
+
+ cli();
+ /* If hit, cache is dirty and must be expired. */
+ for (i = 0; i < READ_CACHE_SIZE; i++)
+ if(cache[i].inode_num == inode->i_ino)
+ cache[i].time -= EXPIRE_CACHE;
+ sti();
+
+ pos = file->f_pos;
+ if (file->f_flags & O_APPEND)
+ pos = inode->i_size;
+ n = NFS_SERVER(inode)->wsize;
+ for (i = 0; i < count; i += n) {
+ hunk = count - i;
+ if (hunk >= n)
+ hunk = n;
+ result = nfs_proc_write(NFS_SERVER(inode), NFS_FH(inode),
+ pos, hunk, buf, &fattr);
+ if (result < 0)
+ return result;
+ pos += hunk;
+ buf += hunk;
+ if (hunk < n) {
+ i += hunk;
+ break;
+ }
+ }
+ file->f_pos = pos;
+ nfs_refresh_inode(inode, &fattr);
+ return i;
+}
+
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
new file mode 100644
index 000000000..17f2cb6f0
--- /dev/null
+++ b/fs/nfs/inode.c
@@ -0,0 +1,240 @@
+/*
+ * linux/fs/nfs/inode.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * nfs inode and superblock handling functions
+ */
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/nfs_fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+
+extern int close_fp(struct file *filp, unsigned int fd);
+
+static int nfs_notify_change(struct inode *, struct iattr *);
+static void nfs_put_inode(struct inode *);
+static void nfs_put_super(struct super_block *);
+static void nfs_statfs(struct super_block *, struct statfs *);
+
+static struct super_operations nfs_sops = {
+ NULL, /* read inode */
+ nfs_notify_change, /* notify change */
+ NULL, /* write inode */
+ nfs_put_inode, /* put inode */
+ nfs_put_super, /* put superblock */
+ NULL, /* write superblock */
+ nfs_statfs, /* stat filesystem */
+ NULL
+};
+
+static void nfs_put_inode(struct inode * inode)
+{
+ clear_inode(inode);
+}
+
+void nfs_put_super(struct super_block *sb)
+{
+ /* No locks should be open on this, so 0 should be safe as a fd. */
+ close_fp(sb->u.nfs_sb.s_server.file, 0);
+ lock_super(sb);
+ sb->s_dev = 0;
+ unlock_super(sb);
+}
+
+/*
+ * The way this works is that the mount process passes a structure
+ * in the data argument which contains an open socket to the NFS
+ * server and the root file handle obtained from the server's mount
+ * daemon. We stash theses away in the private superblock fields.
+ * Later we can add other mount parameters like caching values.
+ */
+
+struct super_block *nfs_read_super(struct super_block *sb, void *raw_data,
+ int silent)
+{
+ struct nfs_mount_data *data = (struct nfs_mount_data *) raw_data;
+ struct nfs_server *server;
+ unsigned int fd;
+ struct file *filp;
+ dev_t dev = sb->s_dev;
+
+ if (!data) {
+ printk("nfs_read_super: missing data argument\n");
+ sb->s_dev = 0;
+ return NULL;
+ }
+ fd = data->fd;
+ if (data->version != NFS_MOUNT_VERSION) {
+ printk("nfs warning: mount version %s than kernel\n",
+ data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+ }
+ if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) {
+ printk("nfs_read_super: invalid file descriptor\n");
+ sb->s_dev = 0;
+ return NULL;
+ }
+ if (!S_ISSOCK(filp->f_inode->i_mode)) {
+ printk("nfs_read_super: not a socket\n");
+ sb->s_dev = 0;
+ return NULL;
+ }
+ filp->f_count++;
+ lock_super(sb);
+ sb->s_blocksize = 1024; /* XXX */
+ sb->s_blocksize_bits = 10;
+ sb->s_magic = NFS_SUPER_MAGIC;
+ sb->s_dev = dev;
+ sb->s_op = &nfs_sops;
+ server = &sb->u.nfs_sb.s_server;
+ server->file = filp;
+ server->lock = 0;
+ server->wait = NULL;
+ server->flags = data->flags;
+ server->rsize = data->rsize;
+ if (server->rsize <= 0)
+ server->rsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ else if (server->rsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
+ server->rsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+ server->wsize = data->wsize;
+ if (server->wsize <= 0)
+ server->wsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ else if (server->wsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
+ server->wsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+ server->timeo = data->timeo*HZ/10;
+ server->retrans = data->retrans;
+ server->acregmin = data->acregmin*HZ;
+ server->acregmax = data->acregmax*HZ;
+ server->acdirmin = data->acdirmin*HZ;
+ server->acdirmax = data->acdirmax*HZ;
+ strcpy(server->hostname, data->hostname);
+ sb->u.nfs_sb.s_root = data->root;
+ unlock_super(sb);
+ if (!(sb->s_mounted = nfs_fhget(sb, &data->root, NULL))) {
+ sb->s_dev = 0;
+ printk("nfs_read_super: get root inode failed\n");
+ return NULL;
+ }
+ return sb;
+}
+
+void nfs_statfs(struct super_block *sb, struct statfs *buf)
+{
+ int error;
+ struct nfs_fsinfo res;
+
+ put_fs_long(NFS_SUPER_MAGIC, &buf->f_type);
+ error = nfs_proc_statfs(&sb->u.nfs_sb.s_server, &sb->u.nfs_sb.s_root,
+ &res);
+ if (error) {
+ printk("nfs_statfs: statfs error = %d\n", -error);
+ res.bsize = res.blocks = res.bfree = res.bavail = 0;
+ }
+ put_fs_long(res.bsize, &buf->f_bsize);
+ put_fs_long(res.blocks, &buf->f_blocks);
+ put_fs_long(res.bfree, &buf->f_bfree);
+ put_fs_long(res.bavail, &buf->f_bavail);
+ put_fs_long(0, &buf->f_files);
+ put_fs_long(0, &buf->f_ffree);
+ /* We should really try to interrogate the remote server to find
+ it's maximum name length here */
+ put_fs_long(NAME_MAX, &buf->f_namelen);
+}
+
+/*
+ * This is our own version of iget that looks up inodes by file handle
+ * instead of inode number. We use this technique instead of using
+ * the vfs read_inode function because there is no way to pass the
+ * file handle or current attributes into the read_inode function.
+ * We just have to be careful not to subvert iget's special handling
+ * of mount points.
+ */
+
+struct inode *nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_fattr newfattr;
+ int error;
+ struct inode *inode;
+
+ if (!sb) {
+ printk("nfs_fhget: super block is NULL\n");
+ return NULL;
+ }
+ if (!fattr) {
+ error = nfs_proc_getattr(&sb->u.nfs_sb.s_server, fhandle,
+ &newfattr);
+ if (error) {
+ printk("nfs_fhget: getattr error = %d\n", -error);
+ return NULL;
+ }
+ fattr = &newfattr;
+ }
+ if (!(inode = iget(sb, fattr->fileid))) {
+ printk("nfs_fhget: iget failed\n");
+ return NULL;
+ }
+ if (inode->i_dev == sb->s_dev) {
+ if (inode->i_ino != fattr->fileid) {
+ printk("nfs_fhget: unexpected inode from iget\n");
+ return inode;
+ }
+ *NFS_FH(inode) = *fhandle;
+ nfs_refresh_inode(inode, fattr);
+ }
+ return inode;
+}
+
+int nfs_notify_change(struct inode *inode, struct iattr *attr)
+{
+ struct nfs_sattr sattr;
+ struct nfs_fattr fattr;
+ int error;
+
+ if (attr->ia_valid & ATTR_MODE)
+ sattr.mode = attr->ia_mode;
+ else
+ sattr.mode = (unsigned) -1;
+
+ if (attr->ia_valid & ATTR_UID)
+ sattr.uid = attr->ia_uid;
+ else
+ sattr.uid = (unsigned) -1;
+
+ if (attr->ia_valid & ATTR_GID)
+ sattr.gid = attr->ia_gid;
+ else
+ sattr.gid = (unsigned) -1;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ sattr.size = S_ISREG(inode->i_mode) ? attr->ia_size : -1;
+ else
+ sattr.size = (unsigned) -1;
+
+ if (attr->ia_valid & ATTR_MTIME) {
+ sattr.mtime.seconds = attr->ia_mtime;
+ sattr.mtime.useconds = 0;
+ } else
+ sattr.mtime.seconds = sattr.mtime.useconds = (unsigned) -1;
+
+ if (attr->ia_valid & ATTR_ATIME) {
+ sattr.atime.seconds = attr->ia_atime;
+ sattr.atime.useconds = 0;
+ } else
+ sattr.atime.seconds = sattr.atime.useconds = (unsigned) -1;
+
+ error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode),
+ &sattr, &fattr);
+ if (!error)
+ nfs_refresh_inode(inode, &fattr);
+ inode->i_dirt = 0;
+ return error;
+}
diff --git a/fs/nfs/mmap.c b/fs/nfs/mmap.c
new file mode 100644
index 000000000..811176a69
--- /dev/null
+++ b/fs/nfs/mmap.c
@@ -0,0 +1,103 @@
+/*
+ * fs/nfs/mmap.c by Jon Tombs 15 Aug 1993
+ *
+ * This code is from
+ * linux/mm/mmap.c which was written by obz, Linus and Eric
+ * and
+ * linux/mm/memory.c by Linus Torvalds and others
+ *
+ * Copyright (C) 1993
+ *
+ */
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/malloc.h>
+#include <linux/nfs_fs.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+/*
+ * Fill in the supplied page for mmap
+ */
+static unsigned long nfs_file_mmap_nopage(struct vm_area_struct * area,
+ unsigned long address, unsigned long page, int no_share)
+{
+ struct inode * inode = area->vm_inode;
+ unsigned int clear;
+ unsigned long tmp;
+ int n;
+ int i;
+ int pos;
+ struct nfs_fattr fattr;
+
+ address &= PAGE_MASK;
+ pos = address - area->vm_start + area->vm_offset;
+
+ clear = 0;
+ if (address + PAGE_SIZE > area->vm_end) {
+ clear = address + PAGE_SIZE - area->vm_end;
+ }
+
+ n = NFS_SERVER(inode)->rsize; /* what we can read in one go */
+
+ for (i = 0; i < (PAGE_SIZE - clear); i += n) {
+ int hunk, result;
+
+ hunk = PAGE_SIZE - i;
+ if (hunk > n)
+ hunk = n;
+ result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode),
+ pos, hunk, (char *) (page + i), &fattr, 0);
+ if (result < 0)
+ break;
+ pos += result;
+ if (result < n) {
+ i += result;
+ break;
+ }
+ }
+
+#ifdef doweneedthishere
+ nfs_refresh_inode(inode, &fattr);
+#endif
+
+ tmp = page + PAGE_SIZE;
+ while (clear--) {
+ *(char *)--tmp = 0;
+ }
+ return page;
+}
+struct vm_operations_struct nfs_file_mmap = {
+ NULL, /* open */
+ NULL, /* close */
+ nfs_file_mmap_nopage, /* nopage */
+ NULL, /* wppage */
+ NULL, /* share */
+ NULL, /* unmap */
+};
+
+
+/* This is used for a general mmap of a nfs file */
+int nfs_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
+{
+ if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported now */
+ return -EINVAL;
+ if (!inode->i_sb || !S_ISREG(inode->i_mode))
+ return -EACCES;
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+
+ vma->vm_inode = inode;
+ inode->i_count++;
+ vma->vm_ops = &nfs_file_mmap;
+ return 0;
+}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
new file mode 100644
index 000000000..fd01dad99
--- /dev/null
+++ b/fs/nfs/proc.c
@@ -0,0 +1,931 @@
+/*
+ * linux/fs/nfs/proc.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Rick Sladkey
+ *
+ * OS-independent nfs remote procedure call functions
+ *
+ * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
+ * so at last we can have decent(ish) throughput off a
+ * Sun server.
+ *
+ * Coding optimized and cleaned up by Florian La Roche.
+ * Note: Error returns are optimized for NFS_OK, which isn't translated via
+ * nfs_stat_to_errno(), but happens to be already the right return code.
+ *
+ * FixMe: We ought to define a sensible small max size for
+ * things like getattr that are tiny packets and use the
+ * old get_free_page stuff with it.
+ *
+ * Also, the code currently doesn't check the size of the packet, when
+ * it decodes the packet.
+ *
+ * Feel free to fix it and mail me the diffs if it worries you.
+ */
+
+/*
+ * Defining NFS_PROC_DEBUG causes a lookup of a file named
+ * "xyzzy" to toggle debugging. Just cd to an NFS-mounted
+ * filesystem and type 'ls xyzzy' to turn on debugging.
+ */
+
+#if 0
+#define NFS_PROC_DEBUG
+#endif
+
+#include <linux/config.h>
+#include <linux/param.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/nfs_fs.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <asm/segment.h>
+
+#ifdef NFS_PROC_DEBUG
+
+static int proc_debug = 0;
+#define PRINTK(format, args...) \
+ do { \
+ if (proc_debug) \
+ printk(format , ## args); \
+ } while (0)
+
+#else /* !NFS_PROC_DEBUG */
+
+#define PRINTK(format, args...) do ; while (0)
+
+#endif /* !NFS_PROC_DEBUG */
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+static int *nfs_rpc_header(int *p, int procedure, int ruid);
+static int *nfs_rpc_verify(int *p);
+static int nfs_stat_to_errno(int stat);
+
+/*
+ * Our memory allocation and release functions.
+ */
+
+#define NFS_SLACK_SPACE 1024 /* Total overkill */
+/* !!! Be careful, this constant is now also used in sock.c...
+ We should easily convert to not using it anymore for most cases... */
+
+static inline int *nfs_rpc_alloc(int size)
+{
+ int *i;
+
+ while (!(i = (int *)kmalloc(size+NFS_SLACK_SPACE,GFP_NFS))) {
+ schedule();
+ }
+ return i;
+}
+
+static inline void nfs_rpc_free(int *p)
+{
+ kfree((void *)p);
+}
+
+/*
+ * Here are a bunch of xdr encode/decode functions that convert
+ * between machine dependent and xdr data formats.
+ */
+
+#define QUADLEN(len) (((len) + 3) >> 2)
+
+static inline int *xdr_encode_fhandle(int *p, struct nfs_fh *fhandle)
+{
+ *((struct nfs_fh *) p) = *fhandle;
+ return p + QUADLEN(sizeof(*fhandle));
+}
+
+static inline int *xdr_decode_fhandle(int *p, struct nfs_fh *fhandle)
+{
+ *fhandle = *((struct nfs_fh *) p);
+ return p + QUADLEN(sizeof(*fhandle));
+}
+
+static inline int *xdr_encode_string(int *p, const char *string)
+{
+ int len = strlen(string);
+ int quadlen = QUADLEN(len);
+
+ p[quadlen] = 0;
+ *p++ = htonl(len);
+ memcpy(p, string, len);
+ return p + quadlen;
+}
+
+static inline int *xdr_decode_string(int *p, char *string, unsigned int maxlen)
+{
+ unsigned int len = ntohl(*p++);
+ if (len > maxlen)
+ return NULL;
+ memcpy(string, p, len);
+ string[len] = '\0';
+ return p + QUADLEN(len);
+}
+
+static inline int *xdr_decode_string2(int *p, char **string, unsigned int *len,
+ unsigned int maxlen)
+{
+ *len = ntohl(*p++);
+ if (*len > maxlen)
+ return NULL;
+ *string = (char *) p;
+ return p + QUADLEN(*len);
+}
+
+
+static inline int *xdr_encode_data(int *p, char *data, int len)
+{
+ int quadlen = QUADLEN(len);
+
+ p[quadlen] = 0;
+ *p++ = htonl(len);
+ memcpy_fromfs(p, data, len);
+ return p + quadlen;
+}
+
+static inline int *xdr_decode_data(int *p, char *data, int *lenp, int maxlen,
+ int fs)
+{
+ unsigned len = *lenp = ntohl(*p++);
+ if (len > maxlen)
+ return NULL;
+ if (fs)
+ memcpy_tofs(data, p, len);
+ else
+ memcpy(data, p, len);
+ return p + QUADLEN(len);
+}
+
+static int *xdr_decode_fattr(int *p, struct nfs_fattr *fattr)
+{
+ fattr->type = (enum nfs_ftype) ntohl(*p++);
+ fattr->mode = ntohl(*p++);
+ fattr->nlink = ntohl(*p++);
+ fattr->uid = ntohl(*p++);
+ fattr->gid = ntohl(*p++);
+ fattr->size = ntohl(*p++);
+ fattr->blocksize = ntohl(*p++);
+ fattr->rdev = ntohl(*p++);
+ fattr->blocks = ntohl(*p++);
+ fattr->fsid = ntohl(*p++);
+ fattr->fileid = ntohl(*p++);
+ fattr->atime.seconds = ntohl(*p++);
+ fattr->atime.useconds = ntohl(*p++);
+ fattr->mtime.seconds = ntohl(*p++);
+ fattr->mtime.useconds = ntohl(*p++);
+ fattr->ctime.seconds = ntohl(*p++);
+ fattr->ctime.useconds = ntohl(*p++);
+ return p;
+}
+
+static int *xdr_encode_sattr(int *p, struct nfs_sattr *sattr)
+{
+ *p++ = htonl(sattr->mode);
+ *p++ = htonl(sattr->uid);
+ *p++ = htonl(sattr->gid);
+ *p++ = htonl(sattr->size);
+ *p++ = htonl(sattr->atime.seconds);
+ *p++ = htonl(sattr->atime.useconds);
+ *p++ = htonl(sattr->mtime.seconds);
+ *p++ = htonl(sattr->mtime.useconds);
+ return p;
+}
+
+static int *xdr_decode_entry(int *p, struct nfs_entry *entry)
+{
+ entry->fileid = ntohl(*p++);
+ if (!(p = xdr_decode_string(p, entry->name, NFS_MAXNAMLEN)))
+ return NULL;
+ entry->cookie = ntohl(*p++);
+ entry->eof = 0;
+ return p;
+}
+
+static int *xdr_decode_fsinfo(int *p, struct nfs_fsinfo *res)
+{
+ res->tsize = ntohl(*p++);
+ res->bsize = ntohl(*p++);
+ res->blocks = ntohl(*p++);
+ res->bfree = ntohl(*p++);
+ res->bavail = ntohl(*p++);
+ return p;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+
+int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call getattr\n");
+ if (!(p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_GETATTR, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply getattr\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply getattr failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_sattr *sattr, struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call setattr\n");
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_SETATTR, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ p = xdr_encode_sattr(p, sattr);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply setattr\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply setattr failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call lookup %s\n", name);
+#ifdef NFS_PROC_DEBUG
+ if (!strcmp(name, "xyzzy"))
+ proc_debug = 1 - proc_debug;
+#endif
+ if (!(p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_LOOKUP, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fhandle(p, fhandle);
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply lookup\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply lookup failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle,
+ int **p0, char **string, unsigned int *len, unsigned int maxlen)
+{
+ int *p;
+ int status, ruid = 0;
+
+ PRINTK("NFS call readlink\n");
+ if (!(*p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(*p0, NFSPROC_READLINK, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ if ((status = nfs_rpc_call(server, *p0, p, server->rsize)) < 0)
+ return status;
+ if (!(p = nfs_rpc_verify(*p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ if (!(p = xdr_decode_string2(p, string, len, maxlen))) {
+ printk("nfs_proc_readlink: giant pathname\n");
+ status = -errno_NFSERR_IO;
+ }
+ else /* status = 0, */
+ PRINTK("NFS reply readlink\n");
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply readlink failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ return status;
+}
+
+int nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle,
+ int offset, int count, char *data, struct nfs_fattr *fattr, int fs)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+ int len;
+
+ PRINTK("NFS call read %d @ %d\n", count, offset);
+ if (!(p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_READ, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ *p++ = htonl(offset);
+ *p++ = htonl(count);
+ *p++ = htonl(count); /* traditional, could be any value */
+ if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fattr(p, fattr);
+ if (!(p = xdr_decode_data(p, data, &len, count, fs))) {
+ printk("nfs_proc_read: giant data size\n");
+ status = -errno_NFSERR_IO;
+ }
+ else {
+ status = len;
+ PRINTK("NFS reply read %d\n", len);
+ }
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply read failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle,
+ int offset, int count, char *data, struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call write %d @ %d\n", count, offset);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_WRITE, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ *p++ = htonl(offset); /* traditional, could be any value */
+ *p++ = htonl(offset);
+ *p++ = htonl(count); /* traditional, could be any value */
+ p = xdr_encode_data(p, data, count);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply write\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply write failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir,
+ const char *name, struct nfs_sattr *sattr,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call create %s\n", name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_CREATE, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ p = xdr_encode_sattr(p, sattr);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fhandle(p, fhandle);
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply create\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply create failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call remove %s\n", name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_REMOVE, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ PRINTK("NFS reply remove\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply remove failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_rename(struct nfs_server *server,
+ struct nfs_fh *old_dir, const char *old_name,
+ struct nfs_fh *new_dir, const char *new_name)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call rename %s -> %s\n", old_name, new_name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_RENAME, ruid);
+ p = xdr_encode_fhandle(p, old_dir);
+ p = xdr_encode_string(p, old_name);
+ p = xdr_encode_fhandle(p, new_dir);
+ p = xdr_encode_string(p, new_name);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ PRINTK("NFS reply rename\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply rename failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fh *dir, const char *name)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call link %s\n", name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_LINK, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ PRINTK("NFS reply link\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply link failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir,
+ const char *name, const char *path, struct nfs_sattr *sattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call symlink %s -> %s\n", name, path);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_SYMLINK, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ p = xdr_encode_string(p, path);
+ p = xdr_encode_sattr(p, sattr);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ PRINTK("NFS reply symlink\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply symlink failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir,
+ const char *name, struct nfs_sattr *sattr,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call mkdir %s\n", name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_MKDIR, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ p = xdr_encode_sattr(p, sattr);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fhandle(p, fhandle);
+ p = xdr_decode_fattr(p, fattr);
+ PRINTK("NFS reply mkdir\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply mkdir failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call rmdir %s\n", name);
+ if (!(p0 = nfs_rpc_alloc(server->wsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_RMDIR, ruid);
+ p = xdr_encode_fhandle(p, dir);
+ p = xdr_encode_string(p, name);
+ if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ PRINTK("NFS reply rmdir\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply rmdir failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
+ int cookie, int count, struct nfs_entry *entry)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+ int i;
+ int size;
+ int eof;
+
+ PRINTK("NFS call readdir %d @ %d\n", count, cookie);
+ size = server->rsize;
+ if (!(p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_READDIR, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ *p++ = htonl(cookie);
+ *p++ = htonl(size);
+ if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ for (i = 0; i < count && *p++; i++) {
+ if (!(p = xdr_decode_entry(p, entry++)))
+ break;
+ }
+ if (!p) {
+ printk("nfs_proc_readdir: giant filename\n");
+ status = -errno_NFSERR_IO;
+ }
+ else {
+ eof = (i == count && !*p++ && *p++)
+ || (i < count && *p++);
+ if (eof && i)
+ entry[-1].eof = 1;
+ PRINTK("NFS reply readdir %d %s\n", i,
+ eof ? "eof" : "");
+ status = i;
+ }
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply readdir failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *res)
+{
+ int *p, *p0;
+ int status;
+ int ruid = 0;
+
+ PRINTK("NFS call statfs\n");
+ if (!(p0 = nfs_rpc_alloc(server->rsize)))
+ return -EIO;
+retry:
+ p = nfs_rpc_header(p0, NFSPROC_STATFS, ruid);
+ p = xdr_encode_fhandle(p, fhandle);
+ if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) {
+ nfs_rpc_free(p0);
+ return status;
+ }
+ if (!(p = nfs_rpc_verify(p0)))
+ status = -errno_NFSERR_IO;
+ else if ((status = ntohl(*p++)) == NFS_OK) {
+ p = xdr_decode_fsinfo(p, res);
+ PRINTK("NFS reply statfs\n");
+ /* status = 0; */
+ }
+ else {
+ if (!ruid && current->fsuid == 0 && current->uid != 0) {
+ ruid = 1;
+ goto retry;
+ }
+ PRINTK("NFS reply statfs failed = %d\n", status);
+ status = -nfs_stat_to_errno(status);
+ }
+ nfs_rpc_free(p0);
+ return status;
+}
+
+/*
+ * Here are a few RPC-assist functions.
+ */
+
+static int *nfs_rpc_header(int *p, int procedure, int ruid)
+{
+ int *p1, *p2;
+ int i;
+ static int xid = 0;
+ unsigned char *sys = (unsigned char *) system_utsname.nodename;
+
+ if (xid == 0) {
+ xid = CURRENT_TIME;
+ xid ^= (sys[3]<<24) | (sys[2]<<16) | (sys[1]<<8) | sys[0];
+ }
+ *p++ = htonl(++xid);
+ *p++ = htonl(RPC_CALL);
+ *p++ = htonl(RPC_VERSION);
+ *p++ = htonl(NFS_PROGRAM);
+ *p++ = htonl(NFS_VERSION);
+ *p++ = htonl(procedure);
+ *p++ = htonl(RPC_AUTH_UNIX);
+ p1 = p++;
+ *p++ = htonl(CURRENT_TIME); /* traditional, could be anything */
+ p = xdr_encode_string(p, (char *) sys);
+ *p++ = htonl(ruid ? current->uid : current->fsuid);
+ *p++ = htonl(current->egid);
+ p2 = p++;
+ for (i = 0; i < 16 && i < NGROUPS && current->groups[i] != NOGROUP; i++)
+ *p++ = htonl(current->groups[i]);
+ *p2 = htonl(i);
+ *p1 = htonl((p - (p1 + 1)) << 2);
+ *p++ = htonl(RPC_AUTH_NULL);
+ *p++ = htonl(0);
+ return p;
+}
+
+static int *nfs_rpc_verify(int *p)
+{
+ unsigned int n;
+
+ p++;
+ if ((n = ntohl(*p++)) != RPC_REPLY) {
+ printk("nfs_rpc_verify: not an RPC reply: %d\n", n);
+ return NULL;
+ }
+ if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
+ printk("nfs_rpc_verify: RPC call rejected: %d\n", n);
+ return NULL;
+ }
+ switch (n = ntohl(*p++)) {
+ case RPC_AUTH_NULL: case RPC_AUTH_UNIX: case RPC_AUTH_SHORT:
+ break;
+ default:
+ printk("nfs_rpc_verify: bad RPC authentication type: %d\n", n);
+ return NULL;
+ }
+ if ((n = ntohl(*p++)) > 400) {
+ printk("nfs_rpc_verify: giant auth size\n");
+ return NULL;
+ }
+ p += QUADLEN(n);
+ if ((n = ntohl(*p++)) != RPC_SUCCESS) {
+ printk("nfs_rpc_verify: RPC call failed: %d\n", n);
+ return NULL;
+ }
+ return p;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+
+static struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS_OK, 0 },
+ { NFSERR_PERM, EPERM },
+ { NFSERR_NOENT, ENOENT },
+ { NFSERR_IO, errno_NFSERR_IO },
+ { NFSERR_NXIO, ENXIO },
+ { NFSERR_ACCES, EACCES },
+ { NFSERR_EXIST, EEXIST },
+ { NFSERR_NODEV, ENODEV },
+ { NFSERR_NOTDIR, ENOTDIR },
+ { NFSERR_ISDIR, EISDIR },
+ { NFSERR_INVAL, EINVAL },
+ { NFSERR_FBIG, EFBIG },
+ { NFSERR_NOSPC, ENOSPC },
+ { NFSERR_ROFS, EROFS },
+ { NFSERR_NAMETOOLONG, ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, ENOTEMPTY },
+ { NFSERR_DQUOT, EDQUOT },
+ { NFSERR_STALE, ESTALE },
+#ifdef EWFLUSH
+ { NFSERR_WFLUSH, EWFLUSH },
+#endif
+ { -1, EIO }
+};
+
+static int nfs_stat_to_errno(int stat)
+{
+ int i;
+
+ for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+ if (nfs_errtbl[i].stat == stat)
+ return nfs_errtbl[i].errno;
+ }
+ printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+ return nfs_errtbl[i].errno;
+}
+
diff --git a/fs/nfs/sock.c b/fs/nfs/sock.c
new file mode 100644
index 000000000..2455d938a
--- /dev/null
+++ b/fs/nfs/sock.c
@@ -0,0 +1,242 @@
+/*
+ * linux/fs/nfs/sock.c
+ *
+ * Copyright (C) 1992, 1993 Rick Sladkey
+ *
+ * low-level nfs remote procedure call interface
+ *
+ * FIXES
+ *
+ * 2/7/94 James Bottomley and Jon Peatfield DAMTP, Cambridge University
+ *
+ * An xid mismatch no longer causes the request to be trashed.
+ *
+ * Peter Eriksson - incorrect XID used to confuse Linux
+ * Florian La Roche - use the correct max size, if reading a packet and
+ * also verify, if the whole packet has been read...
+ * more checks should be done in proc.c...
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/nfs_fs.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/fcntl.h>
+#include <asm/segment.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+
+/* JEJB/JSP 2/7/94
+ * this must match the value of NFS_SLACK_SPACE in linux/fs/nfs/proc.c
+ * ***FIXME*** should probably put this in nfs_fs.h */
+#define NFS_SLACK_SPACE 1024
+
+
+extern struct socket *socki_lookup(struct inode *inode);
+
+#define _S(nr) (1<<((nr)-1))
+
+/*
+ * We violate some modularity principles here by poking around
+ * in some socket internals. Besides having to call socket
+ * functions from kernel-space instead of user space, the socket
+ * interface does not lend itself well to being cleanly called
+ * without a file descriptor. Since the nfs calls can run on
+ * behalf of any process, the superblock maintains a file pointer
+ * to the server socket.
+ */
+
+static int do_nfs_rpc_call(struct nfs_server *server, int *start, int *end, int size)
+{
+ struct file *file;
+ struct inode *inode;
+ struct socket *sock;
+ unsigned short fs;
+ int result;
+ int xid;
+ int len;
+ select_table wait_table;
+ struct select_table_entry entry;
+ int (*select) (struct inode *, struct file *, int, select_table *);
+ int init_timeout, max_timeout;
+ int timeout;
+ int retrans;
+ int major_timeout_seen;
+ char *server_name;
+ int n;
+ int addrlen;
+ unsigned long old_mask;
+ /* JEJB/JSP 2/7/94
+ * This is for a 4 byte recv of the xid only */
+ int recv_xid;
+
+ xid = start[0];
+ len = ((char *) end) - ((char *) start);
+ file = server->file;
+ inode = file->f_inode;
+ select = file->f_op->select;
+ sock = socki_lookup(inode);
+ if (!sock) {
+ printk("nfs_rpc_call: socki_lookup failed\n");
+ return -EBADF;
+ }
+ init_timeout = server->timeo;
+ max_timeout = NFS_MAX_RPC_TIMEOUT*HZ/10;
+ retrans = server->retrans;
+ major_timeout_seen = 0;
+ server_name = server->hostname;
+ old_mask = current->blocked;
+ current->blocked |= ~(_S(SIGKILL)
+#if 0
+ | _S(SIGSTOP)
+#endif
+ | ((server->flags & NFS_MOUNT_INTR)
+ ? ((current->sigaction[SIGINT - 1].sa_handler == SIG_DFL
+ ? _S(SIGINT) : 0)
+ | (current->sigaction[SIGQUIT - 1].sa_handler == SIG_DFL
+ ? _S(SIGQUIT) : 0))
+ : 0));
+ fs = get_fs();
+ set_fs(get_ds());
+ for (n = 0, timeout = init_timeout; ; n++, timeout <<= 1) {
+ result = sock->ops->send(sock, (void *) start, len, 0, 0);
+ if (result < 0) {
+ printk("nfs_rpc_call: send error = %d\n", result);
+ break;
+ }
+ re_select:
+ wait_table.nr = 0;
+ wait_table.entry = &entry;
+ current->state = TASK_INTERRUPTIBLE;
+ if (!select(inode, file, SEL_IN, &wait_table)
+ && !select(inode, file, SEL_IN, NULL)) {
+ if (timeout > max_timeout) {
+ /* JEJB/JSP 2/7/94
+ * This is useful to see if the system is
+ * hanging */
+ printk("NFS max timeout reached on %s\n",
+ server_name);
+ timeout = max_timeout;
+ }
+ current->timeout = jiffies + timeout;
+ schedule();
+ remove_wait_queue(entry.wait_address, &entry.wait);
+ current->state = TASK_RUNNING;
+ if (current->signal & ~current->blocked) {
+ current->timeout = 0;
+ result = -ERESTARTSYS;
+ break;
+ }
+ if (!current->timeout) {
+ if (n < retrans)
+ continue;
+ if (server->flags & NFS_MOUNT_SOFT) {
+ printk("NFS server %s not responding, "
+ "timed out\n", server_name);
+ result = -EIO;
+ break;
+ }
+ n = 0;
+ timeout = init_timeout;
+ init_timeout <<= 1;
+ if (!major_timeout_seen) {
+ printk("NFS server %s not responding, "
+ "still trying\n", server_name);
+ }
+ major_timeout_seen = 1;
+ continue;
+ }
+ else
+ current->timeout = 0;
+ }
+ else if (wait_table.nr)
+ remove_wait_queue(entry.wait_address, &entry.wait);
+ current->state = TASK_RUNNING;
+ addrlen = 0;
+ /* JEJB/JSP 2/7/94
+ * Get the xid from the next packet using a peek, so keep it
+ * on the recv queue. If it is wrong, it will be some reply
+ * we don't now need, so discard it */
+ result = sock->ops->recvfrom(sock, (void *)&recv_xid,
+ sizeof(recv_xid), 1, MSG_PEEK,
+ NULL, &addrlen);
+ if (result < 0) {
+ if (result == -EAGAIN) {
+#if 0
+ printk("nfs_rpc_call: bad select ready\n");
+#endif
+ goto re_select;
+ }
+ if (result == -ECONNREFUSED) {
+#if 0
+ printk("nfs_rpc_call: server playing coy\n");
+#endif
+ goto re_select;
+ }
+ if (result != -ERESTARTSYS) {
+ printk("nfs_rpc_call: recv error = %d\n",
+ -result);
+ }
+ break;
+ }
+ if (recv_xid == xid) {
+ if (major_timeout_seen)
+ printk("NFS server %s OK\n", server_name);
+ break;
+ }
+ /* JEJB/JSP 2/7/94
+ * we have xid mismatch, so discard the packet and start
+ * again. What a hack! but I can't call recvfrom with
+ * a null buffer yet. */
+ (void)sock->ops->recvfrom(sock, (void *)&recv_xid,
+ sizeof(recv_xid), 1, 0, NULL,
+ &addrlen);
+#if 0
+ printk("nfs_rpc_call: XID mismatch\n");
+#endif
+ goto re_select;
+ }
+ /* JEJB/JSP 2/7/94
+ *
+ * we have the correct xid, so read into the correct place and
+ * return it
+ *
+ */
+ result=sock->ops->recvfrom(sock, (void *)start,
+ size + 1024, 1, 0, NULL,
+ /* Here is NFS_SLACK_SPACE..., hack */
+ &addrlen);
+ if (result < 0) {
+ printk("NFS: notice message: result=%d\n", result);
+ } else if (result < addrlen) {
+ printk("NFS: just caught a too small read memory size..., email to NET channel\n");
+ printk("NFS: result=%d,addrlen=%d\n", result, addrlen);
+ result = -EIO;
+ }
+ current->blocked = old_mask;
+ set_fs(fs);
+ return result;
+}
+
+/*
+ * For now we lock out other simultaneous nfs calls for the same filesystem
+ * because we are single-threaded and don't want to get mismatched
+ * RPC replies.
+ */
+
+int nfs_rpc_call(struct nfs_server *server, int *start, int *end, int size)
+{
+ int result;
+
+ while (server->lock)
+ sleep_on(&server->wait);
+ server->lock = 1;
+ result = do_nfs_rpc_call(server, start, end, size);
+ server->lock = 0;
+ wake_up(&server->wait);
+ return result;
+}
+
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
new file mode 100644
index 000000000..4cbe631c6
--- /dev/null
+++ b/fs/nfs/symlink.c
@@ -0,0 +1,116 @@
+/*
+ * linux/fs/nfs/symlink.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * Optimization changes Copyright (C) 1994 Florian La Roche
+ *
+ * nfs symlink handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/nfs_fs.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/string.h>
+
+static int nfs_readlink(struct inode *, char *, int);
+static int nfs_follow_link(struct inode *, struct inode *, int, int,
+ struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations nfs_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ nfs_readlink, /* readlink */
+ nfs_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int nfs_follow_link(struct inode *dir, struct inode *inode,
+ int flag, int mode, struct inode **res_inode)
+{
+ int error, *mem;
+ unsigned int len;
+ char *res, *res2;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (current->link_count > 5) {
+ iput(inode);
+ iput(dir);
+ return -ELOOP;
+ }
+ error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem,
+ &res, &len, NFS_MAXPATHLEN);
+ if (error) {
+ iput(inode);
+ iput(dir);
+ kfree(mem);
+ return error;
+ }
+ while ((res2 = (char *) kmalloc(NFS_MAXPATHLEN + 1, GFP_NFS)) == NULL) {
+ schedule();
+ }
+ memcpy(res2, res, len);
+ res2[len] = '\0';
+ kfree(mem);
+ iput(inode);
+ current->link_count++;
+ error = open_namei(res2, flag, mode, res_inode, dir);
+ current->link_count--;
+ kfree_s(res2, NFS_MAXPATHLEN + 1);
+ return error;
+}
+
+static int nfs_readlink(struct inode *inode, char *buffer, int buflen)
+{
+ int error, *mem;
+ unsigned int len;
+ char *res;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ if (buflen > NFS_MAXPATHLEN)
+ buflen = NFS_MAXPATHLEN;
+ error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem,
+ &res, &len, buflen);
+ iput(inode);
+ if (! error) {
+ memcpy_tofs(buffer, res, len);
+ put_fs_byte('\0', buffer + len);
+ error = len;
+ }
+ kfree(mem);
+ return error;
+}
diff --git a/fs/open.c b/fs/open.c
new file mode 100644
index 000000000..ff95d375f
--- /dev/null
+++ b/fs/open.c
@@ -0,0 +1,516 @@
+/*
+ * linux/fs/open.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/vfs.h>
+#include <linux/types.h>
+#include <linux/utime.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/tty.h>
+#include <linux/time.h>
+
+#include <asm/segment.h>
+
+extern void fcntl_remove_locks(struct task_struct *, struct file *, unsigned int fd);
+
+asmlinkage int sys_ustat(int dev, struct ustat * ubuf)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_statfs(const char * path, struct statfs * buf)
+{
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs));
+ if (error)
+ return error;
+ error = namei(path,&inode);
+ if (error)
+ return error;
+ if (!inode->i_sb->s_op->statfs) {
+ iput(inode);
+ return -ENOSYS;
+ }
+ inode->i_sb->s_op->statfs(inode->i_sb, buf);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf)
+{
+ struct inode * inode;
+ struct file * file;
+ int error;
+
+ error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs));
+ if (error)
+ return error;
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ return -EBADF;
+ if (!(inode = file->f_inode))
+ return -ENOENT;
+ if (!inode->i_sb->s_op->statfs)
+ return -ENOSYS;
+ inode->i_sb->s_op->statfs(inode->i_sb, buf);
+ return 0;
+}
+
+asmlinkage int sys_truncate(const char * path, unsigned int length)
+{
+ struct inode * inode;
+ int error;
+ struct iattr newattrs;
+
+ error = namei(path,&inode);
+ if (error)
+ return error;
+ if (S_ISDIR(inode->i_mode) || !permission(inode,MAY_WRITE)) {
+ iput(inode);
+ return -EACCES;
+ }
+ if (IS_RDONLY(inode)) {
+ iput(inode);
+ return -EROFS;
+ }
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+ iput(inode);
+ return -EPERM;
+ }
+ error = get_write_access(inode);
+ if (error) {
+ iput(inode);
+ return error;
+ }
+ inode->i_size = newattrs.ia_size = length;
+ if (inode->i_op && inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+ newattrs.ia_ctime = newattrs.ia_mtime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME;
+ inode->i_dirt = 1;
+ error = notify_change(inode, &newattrs);
+ put_write_access(inode);
+ iput(inode);
+ return error;
+}
+
+asmlinkage int sys_ftruncate(unsigned int fd, unsigned int length)
+{
+ struct inode * inode;
+ struct file * file;
+ struct iattr newattrs;
+
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ return -EBADF;
+ if (!(inode = file->f_inode))
+ return -ENOENT;
+ if (S_ISDIR(inode->i_mode) || !(file->f_mode & 2))
+ return -EACCES;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ inode->i_size = newattrs.ia_size = length;
+ if (inode->i_op && inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+ newattrs.ia_ctime = newattrs.ia_mtime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME;
+ inode->i_dirt = 1;
+ return notify_change(inode, &newattrs);
+}
+
+/* If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+asmlinkage int sys_utime(char * filename, struct utimbuf * times)
+{
+ struct inode * inode;
+ long actime,modtime;
+ int error;
+ unsigned int flags = 0;
+ struct iattr newattrs;
+
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ if (IS_RDONLY(inode)) {
+ iput(inode);
+ return -EROFS;
+ }
+ /* Don't worry, the checks are done in inode_change_ok() */
+ if (times) {
+ actime = get_fs_long((unsigned long *) &times->actime);
+ modtime = get_fs_long((unsigned long *) &times->modtime);
+ newattrs.ia_ctime = CURRENT_TIME;
+ flags = ATTR_ATIME_SET | ATTR_MTIME_SET;
+ } else {
+ if (!permission(inode,MAY_WRITE)) {
+ iput(inode);
+ return -EACCES;
+ }
+ actime = modtime = newattrs.ia_ctime = CURRENT_TIME;
+ }
+ newattrs.ia_atime = actime;
+ newattrs.ia_mtime = modtime;
+ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME | flags;
+ inode->i_dirt = 1;
+ error = notify_change(inode, &newattrs);
+ iput(inode);
+ return error;
+}
+
+/*
+ * access() needs to use the real uid/gid, not the effective uid/gid.
+ * We do this by temporarily setting fsuid/fsgid to the wanted values
+ */
+asmlinkage int sys_access(const char * filename, int mode)
+{
+ struct inode * inode;
+ int old_fsuid, old_fsgid;
+ int res;
+
+ if (mode != (mode & S_IRWXO)) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+ old_fsuid = current->fsuid;
+ old_fsgid = current->fsgid;
+ current->fsuid = current->uid;
+ current->fsgid = current->gid;
+ res = namei(filename,&inode);
+ if (!res) {
+ if (!permission(inode, mode))
+ res = -EACCES;
+ iput(inode);
+ }
+ current->fsuid = old_fsuid;
+ current->fsgid = old_fsgid;
+ return res;
+}
+
+asmlinkage int sys_chdir(const char * filename)
+{
+ struct inode * inode;
+ int error;
+
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ if (!S_ISDIR(inode->i_mode)) {
+ iput(inode);
+ return -ENOTDIR;
+ }
+ if (!permission(inode,MAY_EXEC)) {
+ iput(inode);
+ return -EACCES;
+ }
+ iput(current->fs->pwd);
+ current->fs->pwd = inode;
+ return (0);
+}
+
+asmlinkage int sys_fchdir(unsigned int fd)
+{
+ struct inode * inode;
+ struct file * file;
+
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ return -EBADF;
+ if (!(inode = file->f_inode))
+ return -ENOENT;
+ if (!S_ISDIR(inode->i_mode))
+ return -ENOTDIR;
+ if (!permission(inode,MAY_EXEC))
+ return -EACCES;
+ iput(current->fs->pwd);
+ current->fs->pwd = inode;
+ inode->i_count++;
+ return (0);
+}
+
+asmlinkage int sys_chroot(const char * filename)
+{
+ struct inode * inode;
+ int error;
+
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ if (!S_ISDIR(inode->i_mode)) {
+ iput(inode);
+ return -ENOTDIR;
+ }
+ if (!fsuser()) {
+ iput(inode);
+ return -EPERM;
+ }
+ iput(current->fs->root);
+ current->fs->root = inode;
+ return (0);
+}
+
+asmlinkage int sys_fchmod(unsigned int fd, mode_t mode)
+{
+ struct inode * inode;
+ struct file * file;
+ struct iattr newattrs;
+
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ return -EBADF;
+ if (!(inode = file->f_inode))
+ return -ENOENT;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (mode == (mode_t) -1)
+ mode = inode->i_mode;
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_ctime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ inode->i_dirt = 1;
+ return notify_change(inode, &newattrs);
+}
+
+asmlinkage int sys_chmod(const char * filename, mode_t mode)
+{
+ struct inode * inode;
+ int error;
+ struct iattr newattrs;
+
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ if (IS_RDONLY(inode)) {
+ iput(inode);
+ return -EROFS;
+ }
+ if (mode == (mode_t) -1)
+ mode = inode->i_mode;
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_ctime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ inode->i_dirt = 1;
+ error = notify_change(inode, &newattrs);
+ iput(inode);
+ return error;
+}
+
+asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group)
+{
+ struct inode * inode;
+ struct file * file;
+ struct iattr newattrs;
+
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ return -EBADF;
+ if (!(inode = file->f_inode))
+ return -ENOENT;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (user == (uid_t) -1)
+ user = inode->i_uid;
+ if (group == (gid_t) -1)
+ group = inode->i_gid;
+ newattrs.ia_mode = inode->i_mode;
+ newattrs.ia_uid = user;
+ newattrs.ia_gid = group;
+ newattrs.ia_ctime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
+ /*
+ * If the owner has been changed, remove the setuid bit
+ */
+ if (user != inode->i_uid && (inode->i_mode & S_ISUID)) {
+ newattrs.ia_mode &= ~S_ISUID;
+ newattrs.ia_valid |= ATTR_MODE;
+ }
+ /*
+ * If the group has been changed, remove the setgid bit
+ */
+ if (group != inode->i_gid && (inode->i_mode & S_ISGID)) {
+ newattrs.ia_mode &= ~S_ISGID;
+ newattrs.ia_valid |= ATTR_MODE;
+ }
+ inode->i_dirt = 1;
+ return notify_change(inode, &newattrs);
+}
+
+asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group)
+{
+ struct inode * inode;
+ int error;
+ struct iattr newattrs;
+
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
+ if (IS_RDONLY(inode)) {
+ iput(inode);
+ return -EROFS;
+ }
+ if (user == (uid_t) -1)
+ user = inode->i_uid;
+ if (group == (gid_t) -1)
+ group = inode->i_gid;
+ newattrs.ia_mode = inode->i_mode;
+ newattrs.ia_uid = user;
+ newattrs.ia_gid = group;
+ newattrs.ia_ctime = CURRENT_TIME;
+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
+ /*
+ * If the owner has been changed, remove the setuid bit
+ */
+ if (user != inode->i_uid && (inode->i_mode & S_ISUID)) {
+ newattrs.ia_mode &= ~S_ISUID;
+ newattrs.ia_valid |= ATTR_MODE;
+ }
+ /*
+ * If the group has been changed, remove the setgid bit
+ */
+ if (group != inode->i_gid && (inode->i_mode & S_ISGID)) {
+ newattrs.ia_mode &= ~S_ISGID;
+ newattrs.ia_valid |= ATTR_MODE;
+ }
+ inode->i_dirt = 1;
+ error = notify_change(inode, &newattrs);
+ iput(inode);
+ return(error);
+}
+
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ * 00 - read-only
+ * 01 - write-only
+ * 10 - read-write
+ * 11 - special
+ * it is changed into
+ * 00 - no permissions needed
+ * 01 - read-permission
+ * 10 - write-permission
+ * 11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
+int do_open(const char * filename,int flags,int mode)
+{
+ struct inode * inode;
+ struct file * f;
+ int flag,error,fd;
+
+ for(fd=0 ; fd<NR_OPEN ; fd++)
+ if (!current->files->fd[fd])
+ break;
+ if (fd>=NR_OPEN)
+ return -EMFILE;
+ FD_CLR(fd,&current->files->close_on_exec);
+ f = get_empty_filp();
+ if (!f)
+ return -ENFILE;
+ current->files->fd[fd] = f;
+ f->f_flags = flag = flags;
+ f->f_mode = (flag+1) & O_ACCMODE;
+ if (f->f_mode)
+ flag++;
+ if (flag & (O_TRUNC | O_CREAT))
+ flag |= 2;
+ error = open_namei(filename,flag,mode,&inode,NULL);
+ if (!error && (f->f_mode & 2))
+ error = get_write_access(inode);
+ if (error) {
+ current->files->fd[fd]=NULL;
+ f->f_count--;
+ return error;
+ }
+
+ f->f_inode = inode;
+ f->f_pos = 0;
+ f->f_reada = 0;
+ f->f_op = NULL;
+ if (inode->i_op)
+ f->f_op = inode->i_op->default_file_ops;
+ if (f->f_op && f->f_op->open) {
+ error = f->f_op->open(inode,f);
+ if (error) {
+ if (f->f_mode & 2) put_write_access(inode);
+ iput(inode);
+ f->f_count--;
+ current->files->fd[fd]=NULL;
+ return error;
+ }
+ }
+ f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+ return (fd);
+}
+
+asmlinkage int sys_open(const char * filename,int flags,int mode)
+{
+ char * tmp;
+ int error;
+
+ error = getname(filename, &tmp);
+ if (error)
+ return error;
+ error = do_open(tmp,flags,mode);
+ putname(tmp);
+ return error;
+}
+
+asmlinkage int sys_creat(const char * pathname, int mode)
+{
+ return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
+}
+
+int close_fp(struct file *filp, unsigned int fd)
+{
+ struct inode *inode;
+
+ if (filp->f_count == 0) {
+ printk("VFS: Close: file count is 0\n");
+ return 0;
+ }
+ inode = filp->f_inode;
+ if (inode)
+ fcntl_remove_locks(current, filp, fd);
+ if (filp->f_count > 1) {
+ filp->f_count--;
+ return 0;
+ }
+ if (filp->f_op && filp->f_op->release)
+ filp->f_op->release(inode,filp);
+ filp->f_count--;
+ filp->f_inode = NULL;
+ if (filp->f_mode & 2) put_write_access(inode);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_close(unsigned int fd)
+{
+ struct file * filp;
+
+ if (fd >= NR_OPEN)
+ return -EBADF;
+ FD_CLR(fd, &current->files->close_on_exec);
+ if (!(filp = current->files->fd[fd]))
+ return -EBADF;
+ current->files->fd[fd] = NULL;
+ return (close_fp (filp, fd));
+}
+
+/*
+ * This routine simulates a hangup on the tty, to arrange that users
+ * are given clean terminals at login time.
+ */
+asmlinkage int sys_vhangup(void)
+{
+ if (!suser())
+ return -EPERM;
+ /* If there is a controlling tty, hang it up */
+ if (current->tty)
+ tty_vhangup(current->tty);
+ return 0;
+}
diff --git a/fs/pipe.c b/fs/pipe.c
new file mode 100644
index 000000000..bc557888e
--- /dev/null
+++ b/fs/pipe.c
@@ -0,0 +1,426 @@
+/*
+ * linux/fs/pipe.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+
+
+/* We don't use the head/tail construction any more. Now we use the start/len*/
+/* construction providing full use of PIPE_BUF (multiple of PAGE_SIZE) */
+/* Florian Coosmann (FGC) ^ current = 1 */
+/* Additionally, we now use locking technique. This prevents race condition */
+/* in case of paging and multiple read/write on the same pipe. (FGC) */
+
+
+static int pipe_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int chars = 0, size = 0, read = 0;
+ char *pipebuf;
+
+ if (filp->f_flags & O_NONBLOCK) {
+ if (PIPE_LOCK(*inode))
+ return -EAGAIN;
+ if (PIPE_EMPTY(*inode))
+ if (PIPE_WRITERS(*inode))
+ return -EAGAIN;
+ else
+ return 0;
+ } else while (PIPE_EMPTY(*inode) || PIPE_LOCK(*inode)) {
+ if (PIPE_EMPTY(*inode)) {
+ if (!PIPE_WRITERS(*inode))
+ return 0;
+ }
+ if (current->signal & ~current->blocked)
+ return -ERESTARTSYS;
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ }
+ PIPE_LOCK(*inode)++;
+ while (count>0 && (size = PIPE_SIZE(*inode))) {
+ chars = PIPE_MAX_RCHUNK(*inode);
+ if (chars > count)
+ chars = count;
+ if (chars > size)
+ chars = size;
+ read += chars;
+ pipebuf = PIPE_BASE(*inode)+PIPE_START(*inode);
+ PIPE_START(*inode) += chars;
+ PIPE_START(*inode) &= (PIPE_BUF-1);
+ PIPE_LEN(*inode) -= chars;
+ count -= chars;
+ memcpy_tofs(buf, pipebuf, chars );
+ buf += chars;
+ }
+ PIPE_LOCK(*inode)--;
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ if (read)
+ return read;
+ if (PIPE_WRITERS(*inode))
+ return -EAGAIN;
+ return 0;
+}
+
+static int pipe_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int chars = 0, free = 0, written = 0;
+ char *pipebuf;
+
+ if (!PIPE_READERS(*inode)) { /* no readers */
+ send_sig(SIGPIPE,current,0);
+ return -EPIPE;
+ }
+/* if count <= PIPE_BUF, we have to make it atomic */
+ if (count <= PIPE_BUF)
+ free = count;
+ else
+ free = 1; /* can't do it atomically, wait for any free space */
+ while (count>0) {
+ while ((PIPE_FREE(*inode) < free) || PIPE_LOCK(*inode)) {
+ if (!PIPE_READERS(*inode)) { /* no readers */
+ send_sig(SIGPIPE,current,0);
+ return written? :-EPIPE;
+ }
+ if (current->signal & ~current->blocked)
+ return written? :-ERESTARTSYS;
+ if (filp->f_flags & O_NONBLOCK)
+ return written? :-EAGAIN;
+ interruptible_sleep_on(&PIPE_WAIT(*inode));
+ }
+ PIPE_LOCK(*inode)++;
+ while (count>0 && (free = PIPE_FREE(*inode))) {
+ chars = PIPE_MAX_WCHUNK(*inode);
+ if (chars > count)
+ chars = count;
+ if (chars > free)
+ chars = free;
+ pipebuf = PIPE_BASE(*inode)+PIPE_END(*inode);
+ written += chars;
+ PIPE_LEN(*inode) += chars;
+ count -= chars;
+ memcpy_fromfs(pipebuf, buf, chars );
+ buf += chars;
+ }
+ PIPE_LOCK(*inode)--;
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+ free = 1;
+ }
+ return written;
+}
+
+static int pipe_lseek(struct inode * inode, struct file * file, off_t offset, int orig)
+{
+ return -ESPIPE;
+}
+
+static int pipe_readdir(struct inode * inode, struct file * file, struct dirent * de, int count)
+{
+ return -ENOTDIR;
+}
+
+static int bad_pipe_rw(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ return -EBADF;
+}
+
+static int pipe_ioctl(struct inode *pino, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+{
+ int error;
+
+ switch (cmd) {
+ case FIONREAD:
+ error = verify_area(VERIFY_WRITE, (void *) arg,4);
+ if (!error)
+ put_fs_long(PIPE_SIZE(*pino),(unsigned long *) arg);
+ return error;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int pipe_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait)
+{
+ switch (sel_type) {
+ case SEL_IN:
+ if (!PIPE_EMPTY(*inode) || !PIPE_WRITERS(*inode))
+ return 1;
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_OUT:
+ if (!PIPE_FULL(*inode) || !PIPE_READERS(*inode))
+ return 1;
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_EX:
+ if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode))
+ return 1;
+ select_wait(&inode->i_wait,wait);
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * Arggh. Why does SunOS have to have different select() behaviour
+ * for pipes and fifos? Hate-Hate-Hate. See difference in SEL_IN..
+ */
+static int fifo_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait)
+{
+ switch (sel_type) {
+ case SEL_IN:
+ if (!PIPE_EMPTY(*inode))
+ return 1;
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_OUT:
+ if (!PIPE_FULL(*inode) || !PIPE_READERS(*inode))
+ return 1;
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_EX:
+ if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode))
+ return 1;
+ select_wait(&inode->i_wait,wait);
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * The 'connect_xxx()' functions are needed for named pipes when
+ * the open() code hasn't guaranteed a connection (O_NONBLOCK),
+ * and we need to act differently until we do get a writer..
+ */
+static int connect_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ while (!PIPE_SIZE(*inode)) {
+ if (PIPE_WRITERS(*inode))
+ break;
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+ wake_up_interruptible(& PIPE_WAIT(*inode));
+ if (current->signal & ~current->blocked)
+ return -ERESTARTSYS;
+ interruptible_sleep_on(& PIPE_WAIT(*inode));
+ }
+ filp->f_op = &read_fifo_fops;
+ return pipe_read(inode,filp,buf,count);
+}
+
+static int connect_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait)
+{
+ switch (sel_type) {
+ case SEL_IN:
+ if (!PIPE_EMPTY(*inode)) {
+ filp->f_op = &read_fifo_fops;
+ return 1;
+ }
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_OUT:
+ if (!PIPE_FULL(*inode))
+ return 1;
+ select_wait(&PIPE_WAIT(*inode), wait);
+ return 0;
+ case SEL_EX:
+ if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode))
+ return 1;
+ select_wait(&inode->i_wait,wait);
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * Ok, these three routines NOW keep track of readers/writers,
+ * Linus previously did it with inode->i_count checking.
+ */
+static void pipe_read_release(struct inode * inode, struct file * filp)
+{
+ PIPE_READERS(*inode)--;
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+}
+
+static void pipe_write_release(struct inode * inode, struct file * filp)
+{
+ PIPE_WRITERS(*inode)--;
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+}
+
+static void pipe_rdwr_release(struct inode * inode, struct file * filp)
+{
+ PIPE_READERS(*inode)--;
+ PIPE_WRITERS(*inode)--;
+ wake_up_interruptible(&PIPE_WAIT(*inode));
+}
+
+/*
+ * The file_operations structs are not static because they
+ * are also used in linux/fs/fifo.c to do operations on fifo's.
+ */
+struct file_operations connecting_fifo_fops = {
+ pipe_lseek,
+ connect_read,
+ bad_pipe_rw,
+ pipe_readdir,
+ connect_select,
+ pipe_ioctl,
+ NULL, /* no mmap on pipes.. surprise */
+ NULL, /* no special open code */
+ pipe_read_release,
+ NULL
+};
+
+struct file_operations read_fifo_fops = {
+ pipe_lseek,
+ pipe_read,
+ bad_pipe_rw,
+ pipe_readdir,
+ fifo_select,
+ pipe_ioctl,
+ NULL, /* no mmap on pipes.. surprise */
+ NULL, /* no special open code */
+ pipe_read_release,
+ NULL
+};
+
+struct file_operations write_fifo_fops = {
+ pipe_lseek,
+ bad_pipe_rw,
+ pipe_write,
+ pipe_readdir,
+ fifo_select,
+ pipe_ioctl,
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ pipe_write_release,
+ NULL
+};
+
+struct file_operations rdwr_fifo_fops = {
+ pipe_lseek,
+ pipe_read,
+ pipe_write,
+ pipe_readdir,
+ fifo_select,
+ pipe_ioctl,
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ pipe_rdwr_release,
+ NULL
+};
+
+struct file_operations read_pipe_fops = {
+ pipe_lseek,
+ pipe_read,
+ bad_pipe_rw,
+ pipe_readdir,
+ pipe_select,
+ pipe_ioctl,
+ NULL, /* no mmap on pipes.. surprise */
+ NULL, /* no special open code */
+ pipe_read_release,
+ NULL
+};
+
+struct file_operations write_pipe_fops = {
+ pipe_lseek,
+ bad_pipe_rw,
+ pipe_write,
+ pipe_readdir,
+ pipe_select,
+ pipe_ioctl,
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ pipe_write_release,
+ NULL
+};
+
+struct file_operations rdwr_pipe_fops = {
+ pipe_lseek,
+ pipe_read,
+ pipe_write,
+ pipe_readdir,
+ pipe_select,
+ pipe_ioctl,
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ pipe_rdwr_release,
+ NULL
+};
+
+struct inode_operations pipe_inode_operations = {
+ &rdwr_pipe_fops,
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+ struct inode * inode;
+ struct file * f[2];
+ int fd[2];
+ int i,j;
+
+ j = verify_area(VERIFY_WRITE,fildes,8);
+ if (j)
+ return j;
+ for(j=0 ; j<2 ; j++)
+ if (!(f[j] = get_empty_filp()))
+ break;
+ if (j==1)
+ f[0]->f_count--;
+ if (j<2)
+ return -ENFILE;
+ j=0;
+ for(i=0;j<2 && i<NR_OPEN;i++)
+ if (!current->files->fd[i]) {
+ current->files->fd[ fd[j]=i ] = f[j];
+ j++;
+ }
+ if (j==1)
+ current->files->fd[fd[0]]=NULL;
+ if (j<2) {
+ f[0]->f_count--;
+ f[1]->f_count--;
+ return -EMFILE;
+ }
+ if (!(inode=get_pipe_inode())) {
+ current->files->fd[fd[0]] = NULL;
+ current->files->fd[fd[1]] = NULL;
+ f[0]->f_count--;
+ f[1]->f_count--;
+ return -ENFILE;
+ }
+ f[0]->f_inode = f[1]->f_inode = inode;
+ f[0]->f_pos = f[1]->f_pos = 0;
+ f[0]->f_flags = O_RDONLY;
+ f[0]->f_op = &read_pipe_fops;
+ f[0]->f_mode = 1; /* read */
+ f[1]->f_flags = O_WRONLY;
+ f[1]->f_op = &write_pipe_fops;
+ f[1]->f_mode = 2; /* write */
+ put_fs_long(fd[0],0+fildes);
+ put_fs_long(fd[1],1+fildes);
+ return 0;
+}
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
new file mode 100644
index 000000000..71c62433c
--- /dev/null
+++ b/fs/proc/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for the linux proc-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= inode.o root.o base.o mem.o link.o fd.o array.o kmsg.o net.o
+
+proc.o: $(OBJS)
+ $(LD) -r -o proc.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/proc/array.c b/fs/proc/array.c
new file mode 100644
index 000000000..6fd7bccbe
--- /dev/null
+++ b/fs/proc/array.c
@@ -0,0 +1,598 @@
+/*
+ * linux/fs/proc/array.c
+ *
+ * Copyright (C) 1992 by Linus Torvalds
+ * based on ideas by Darren Senn
+ *
+ * Fixes:
+ * Michael. K. Johnson: stat,statm extensions.
+ * <johnsonm@stolaf.edu>
+ *
+ * Pauline Middelink : Made cmdline,envline only break at '\0's, to
+ * make sure SET_PROCTITLE works. Also removed
+ * bad '!' which forced address recalculation for
+ * EVERY character on the current page.
+ * <middelin@polyware.iaf.nl>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/tty.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/string.h>
+#include <linux/mman.h>
+#include <linux/proc_fs.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+#ifdef CONFIG_DEBUG_MALLOC
+int get_malloc(char * buffer);
+#endif
+
+static int read_core(struct inode * inode, struct file * file,char * buf, int count)
+{
+ unsigned long p = file->f_pos;
+ int read;
+ int count1;
+ char * pnt;
+ struct user dump;
+
+ memset(&dump, 0, sizeof(struct user));
+ dump.magic = CMAGIC;
+ dump.u_dsize = high_memory >> 12;
+
+ if (count < 0)
+ return -EINVAL;
+ if (p >= high_memory + PAGE_SIZE)
+ return 0;
+ if (count > high_memory + PAGE_SIZE - p)
+ count = high_memory + PAGE_SIZE - p;
+ read = 0;
+
+ if (p < sizeof(struct user) && count > 0) {
+ count1 = count;
+ if (p + count1 > sizeof(struct user))
+ count1 = sizeof(struct user)-p;
+ pnt = (char *) &dump + p;
+ memcpy_tofs(buf,(void *) pnt, count1);
+ buf += count1;
+ p += count1;
+ count -= count1;
+ read += count1;
+ }
+
+ while (p < 2*PAGE_SIZE && count > 0) {
+ put_fs_byte(0,buf);
+ buf++;
+ p++;
+ count--;
+ read++;
+ }
+ memcpy_tofs(buf,(void *) (p - PAGE_SIZE),count);
+ read += count;
+ file->f_pos += read;
+ return read;
+}
+
+static struct file_operations proc_kcore_operations = {
+ NULL, /* lseek */
+ read_core,
+};
+
+struct inode_operations proc_kcore_inode_operations = {
+ &proc_kcore_operations,
+};
+
+static int get_loadavg(char * buffer)
+{
+ int a, b, c;
+
+ a = avenrun[0] + (FIXED_1/200);
+ b = avenrun[1] + (FIXED_1/200);
+ c = avenrun[2] + (FIXED_1/200);
+ return sprintf(buffer,"%d.%02d %d.%02d %d.%02d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c));
+}
+
+static int get_kstat(char * buffer)
+{
+ int i, len;
+ unsigned sum = 0;
+
+ for (i = 0 ; i < 16 ; i++)
+ sum += kstat.interrupts[i];
+ len = sprintf(buffer,
+ "cpu %u %u %u %lu\n"
+ "disk %u %u %u %u\n"
+ "page %u %u\n"
+ "swap %u %u\n"
+ "intr %u",
+ kstat.cpu_user,
+ kstat.cpu_nice,
+ kstat.cpu_system,
+ jiffies - (kstat.cpu_user + kstat.cpu_nice + kstat.cpu_system),
+ kstat.dk_drive[0],
+ kstat.dk_drive[1],
+ kstat.dk_drive[2],
+ kstat.dk_drive[3],
+ kstat.pgpgin,
+ kstat.pgpgout,
+ kstat.pswpin,
+ kstat.pswpout,
+ sum);
+ for (i = 0 ; i < 16 ; i++)
+ len += sprintf(buffer + len, " %u", kstat.interrupts[i]);
+ len += sprintf(buffer + len,
+ "\nctxt %u\n"
+ "btime %lu\n",
+ kstat.context_swtch,
+ xtime.tv_sec - jiffies / HZ);
+ return len;
+}
+
+
+static int get_uptime(char * buffer)
+{
+ unsigned long uptime;
+ unsigned long idle;
+
+ uptime = jiffies;
+ idle = task[0]->utime + task[0]->stime;
+ return sprintf(buffer,"%lu.%02lu %lu.%02lu\n",
+ uptime / HZ,
+ uptime % HZ,
+ idle / HZ,
+ idle % HZ);
+}
+
+static int get_meminfo(char * buffer)
+{
+ struct sysinfo i;
+
+ si_meminfo(&i);
+ si_swapinfo(&i);
+ return sprintf(buffer, " total: used: free: shared: buffers:\n"
+ "Mem: %8lu %8lu %8lu %8lu %8lu\n"
+ "Swap: %8lu %8lu %8lu\n",
+ i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram,
+ i.totalswap, i.totalswap-i.freeswap, i.freeswap);
+}
+
+static int get_version(char * buffer)
+{
+ extern char *linux_banner;
+
+ strcpy(buffer, linux_banner);
+ return strlen(buffer);
+}
+
+static struct task_struct ** get_task(pid_t pid)
+{
+ struct task_struct ** p;
+
+ p = task;
+ while (++p < task+NR_TASKS) {
+ if (*p && (*p)->pid == pid)
+ return p;
+ }
+ return NULL;
+}
+
+static unsigned long get_phys_addr(struct task_struct ** p, unsigned long ptr)
+{
+ unsigned long page;
+
+ if (!p || !*p || ptr >= TASK_SIZE)
+ return 0;
+ page = *PAGE_DIR_OFFSET((*p)->tss.cr3,ptr);
+ if (!(page & PAGE_PRESENT))
+ return 0;
+ page &= PAGE_MASK;
+ page += PAGE_PTR(ptr);
+ page = *(unsigned long *) page;
+ if (!(page & PAGE_PRESENT))
+ return 0;
+ page &= PAGE_MASK;
+ page += ptr & ~PAGE_MASK;
+ return page;
+}
+
+static int get_array(struct task_struct ** p, unsigned long start, unsigned long end, char * buffer)
+{
+ unsigned long addr;
+ int size = 0, result = 0;
+ char c;
+
+ if (start >= end)
+ return result;
+ for (;;) {
+ addr = get_phys_addr(p, start);
+ if (!addr)
+ goto ready;
+ do {
+ c = *(char *) addr;
+ if (!c)
+ result = size;
+ if (size < PAGE_SIZE)
+ buffer[size++] = c;
+ else
+ goto ready;
+ addr++;
+ start++;
+ if (!c && start >= end)
+ goto ready;
+ } while (addr & ~PAGE_MASK);
+ }
+ready:
+ /* remove the trailing blanks, used to fill out argv,envp space */
+ while (result>0 && buffer[result-1]==' ')
+ result--;
+ return result;
+}
+
+static int get_env(int pid, char * buffer)
+{
+ struct task_struct ** p = get_task(pid);
+
+ if (!p || !*p)
+ return 0;
+ return get_array(p, (*p)->mm->env_start, (*p)->mm->env_end, buffer);
+}
+
+static int get_arg(int pid, char * buffer)
+{
+ struct task_struct ** p = get_task(pid);
+
+ if (!p || !*p)
+ return 0;
+ return get_array(p, (*p)->mm->arg_start, (*p)->mm->arg_end, buffer);
+}
+
+static unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ebp, eip;
+ unsigned long stack_page;
+ int count = 0;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+ stack_page = p->kernel_stack_page;
+ if (!stack_page)
+ return 0;
+ ebp = p->tss.ebp;
+ do {
+ if (ebp < stack_page || ebp >= 4092+stack_page)
+ return 0;
+ eip = *(unsigned long *) (ebp+4);
+ if ((void *)eip != sleep_on &&
+ (void *)eip != interruptible_sleep_on)
+ return eip;
+ ebp = *(unsigned long *) ebp;
+ } while (count++ < 16);
+ return 0;
+}
+
+#define KSTK_EIP(stack) (((unsigned long *)stack)[1019])
+#define KSTK_ESP(stack) (((unsigned long *)stack)[1022])
+
+static int get_stat(int pid, char * buffer)
+{
+ struct task_struct ** p = get_task(pid);
+ unsigned long sigignore=0, sigcatch=0, bit=1, wchan;
+ unsigned long vsize, eip, esp;
+ int i,tty_pgrp;
+ char state;
+
+ if (!p || !*p)
+ return 0;
+ if ((*p)->state < 0 || (*p)->state > 5)
+ state = '.';
+ else
+ state = "RSDZTD"[(*p)->state];
+ eip = esp = 0;
+ vsize = (*p)->kernel_stack_page;
+ if (vsize) {
+ eip = KSTK_EIP(vsize);
+ esp = KSTK_ESP(vsize);
+ vsize = (*p)->mm->brk - (*p)->mm->start_code + PAGE_SIZE-1;
+ if (esp)
+ vsize += TASK_SIZE - esp;
+ }
+ wchan = get_wchan(*p);
+ for(i=0; i<32; ++i) {
+ switch((int) (*p)->sigaction[i].sa_handler) {
+ case 1: sigignore |= bit; break;
+ case 0: break;
+ default: sigcatch |= bit;
+ } bit <<= 1;
+ }
+ if ((*p)->tty)
+ tty_pgrp = (*p)->tty->pgrp;
+ else
+ tty_pgrp = -1;
+ return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+%lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %u %lu %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu\n",
+ pid,
+ (*p)->comm,
+ state,
+ (*p)->p_pptr->pid,
+ (*p)->pgrp,
+ (*p)->session,
+ (*p)->tty ? (*p)->tty->device : 0,
+ tty_pgrp,
+ (*p)->flags,
+ (*p)->mm->min_flt,
+ (*p)->mm->cmin_flt,
+ (*p)->mm->maj_flt,
+ (*p)->mm->cmaj_flt,
+ (*p)->utime,
+ (*p)->stime,
+ (*p)->cutime,
+ (*p)->cstime,
+ (*p)->counter, /* this is the kernel priority ---
+ subtract 30 in your user-level program. */
+ (*p)->priority, /* this is the nice value ---
+ subtract 15 in your user-level program. */
+ (*p)->timeout,
+ (*p)->it_real_value,
+ (*p)->start_time,
+ vsize,
+ (*p)->mm->rss, /* you might want to shift this left 3 */
+ (*p)->rlim[RLIMIT_RSS].rlim_cur,
+ (*p)->mm->start_code,
+ (*p)->mm->end_code,
+ (*p)->mm->start_stack,
+ esp,
+ eip,
+ (*p)->signal,
+ (*p)->blocked,
+ sigignore,
+ sigcatch,
+ wchan);
+}
+
+static int get_statm(int pid, char * buffer)
+{
+ struct task_struct ** p = get_task(pid);
+ int i, tpag;
+ int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;
+ unsigned long ptbl, *buf, *pte, *pagedir, map_nr;
+
+ if (!p || !*p)
+ return 0;
+ tpag = (*p)->mm->end_code / PAGE_SIZE;
+ if ((*p)->state != TASK_ZOMBIE) {
+ pagedir = (unsigned long *) (*p)->tss.cr3;
+ for (i = 0; i < 0x300; ++i) {
+ if ((ptbl = pagedir[i]) == 0) {
+ tpag -= PTRS_PER_PAGE;
+ continue;
+ }
+ buf = (unsigned long *)(ptbl & PAGE_MASK);
+ for (pte = buf; pte < (buf + PTRS_PER_PAGE); ++pte) {
+ if (*pte != 0) {
+ ++size;
+ if (*pte & 1) {
+ ++resident;
+ if (tpag > 0)
+ ++trs;
+ else
+ ++drs;
+ if (i >= 15 && i < 0x2f0) {
+ ++lrs;
+ if (*pte & 0x40)
+ ++dt;
+ else
+ --drs;
+ }
+ map_nr = MAP_NR(*pte);
+ if (map_nr < (high_memory / PAGE_SIZE) && mem_map[map_nr] > 1)
+ ++share;
+ }
+ }
+ --tpag;
+ }
+ }
+ }
+ return sprintf(buffer,"%d %d %d %d %d %d %d\n",
+ size, resident, share, trs, lrs, drs, dt);
+}
+
+static int get_maps(int pid, char *buf)
+{
+ int sz = 0;
+ struct task_struct **p = get_task(pid);
+ struct vm_area_struct *map;
+
+ if (!p || !*p)
+ return 0;
+
+ for(map = (*p)->mm->mmap; map != NULL; map = map->vm_next) {
+ char str[7], *cp = str;
+ int flags;
+ int end = sz + 80; /* Length of line */
+ dev_t dev;
+ unsigned long ino;
+
+ flags = map->vm_flags;
+
+ *cp++ = flags & VM_READ ? 'r' : '-';
+ *cp++ = flags & VM_WRITE ? 'w' : '-';
+ *cp++ = flags & VM_EXEC ? 'x' : '-';
+ *cp++ = flags & VM_SHARED ? 's' : 'p';
+ *cp++ = 0;
+
+ if (end >= PAGE_SIZE) {
+ sprintf(buf+sz, "...\n");
+ break;
+ }
+
+ if (map->vm_inode != NULL) {
+ dev = map->vm_inode->i_dev;
+ ino = map->vm_inode->i_ino;
+ } else {
+ dev = 0;
+ ino = 0;
+ }
+
+ sz += sprintf(buf+sz, "%08lx-%08lx %s %08lx %02x:%02x %lu\n",
+ map->vm_start, map->vm_end, str, map->vm_offset,
+ MAJOR(dev),MINOR(dev), ino);
+ if (sz > end) {
+ printk("get_maps: end(%d) < sz(%d)\n", end, sz);
+ break;
+ }
+ }
+
+ return sz;
+}
+
+extern int get_module_list(char *);
+extern int get_device_list(char *);
+extern int get_filesystem_list(char *);
+extern int get_ksyms_list(char *);
+extern int get_irq_list(char *);
+extern int get_dma_list(char *);
+
+static int get_root_array(char * page, int type)
+{
+ switch (type) {
+ case PROC_LOADAVG:
+ return get_loadavg(page);
+
+ case PROC_UPTIME:
+ return get_uptime(page);
+
+ case PROC_MEMINFO:
+ return get_meminfo(page);
+
+ case PROC_VERSION:
+ return get_version(page);
+
+#ifdef CONFIG_DEBUG_MALLOC
+ case PROC_MALLOC:
+ return get_malloc(page);
+#endif
+
+ case PROC_MODULES:
+ return get_module_list(page);
+
+ case PROC_STAT:
+ return get_kstat(page);
+
+ case PROC_DEVICES:
+ return get_device_list(page);
+
+ case PROC_INTERRUPTS:
+ return get_irq_list(page);
+
+ case PROC_FILESYSTEMS:
+ return get_filesystem_list(page);
+
+ case PROC_KSYMS:
+ return get_ksyms_list(page);
+
+ case PROC_DMA:
+ return get_dma_list(page);
+ }
+ return -EBADF;
+}
+
+static int get_process_array(char * page, int pid, int type)
+{
+ switch (type) {
+ case PROC_PID_ENVIRON:
+ return get_env(pid, page);
+ case PROC_PID_CMDLINE:
+ return get_arg(pid, page);
+ case PROC_PID_STAT:
+ return get_stat(pid, page);
+ case PROC_PID_STATM:
+ return get_statm(pid, page);
+ case PROC_PID_MAPS:
+ return get_maps(pid, page);
+ }
+ return -EBADF;
+}
+
+
+static inline int fill_array(char * page, int pid, int type)
+{
+ if (pid)
+ return get_process_array(page, pid, type);
+ return get_root_array(page, type);
+}
+
+static int array_read(struct inode * inode, struct file * file,char * buf, int count)
+{
+ unsigned long page;
+ int length;
+ int end;
+ unsigned int type, pid;
+
+ if (count < 0)
+ return -EINVAL;
+ if (!(page = __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ type = inode->i_ino;
+ pid = type >> 16;
+ type &= 0x0000ffff;
+ length = fill_array((char *) page, pid, type);
+ if (length < 0) {
+ free_page(page);
+ return length;
+ }
+ if (file->f_pos >= length) {
+ free_page(page);
+ return 0;
+ }
+ if (count + file->f_pos > length)
+ count = length - file->f_pos;
+ end = count + file->f_pos;
+ memcpy_tofs(buf, (char *) page + file->f_pos, count);
+ free_page(page);
+ file->f_pos = end;
+ return count;
+}
+
+static struct file_operations proc_array_operations = {
+ NULL, /* array_lseek */
+ array_read,
+ NULL, /* array_write */
+ NULL, /* array_readdir */
+ NULL, /* array_select */
+ NULL, /* array_ioctl */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_array_inode_operations = {
+ &proc_array_operations, /* default base directory file-ops */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
diff --git a/fs/proc/base.c b/fs/proc/base.c
new file mode 100644
index 000000000..3dcf0189b
--- /dev/null
+++ b/fs/proc/base.c
@@ -0,0 +1,155 @@
+/*
+ * linux/fs/proc/base.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * proc base directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+static int proc_readbase(struct inode *, struct file *, struct dirent *, int);
+static int proc_lookupbase(struct inode *,const char *,int,struct inode **);
+
+static struct file_operations proc_base_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read - bad */
+ NULL, /* write - bad */
+ proc_readbase, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+struct inode_operations proc_base_inode_operations = {
+ &proc_base_operations, /* default base directory file-ops */
+ NULL, /* create */
+ proc_lookupbase, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static struct proc_dir_entry base_dir[] = {
+ { PROC_PID_INO, 1, "." },
+ { PROC_ROOT_INO, 2, ".." },
+ { PROC_PID_MEM, 3, "mem" },
+ { PROC_PID_CWD, 3, "cwd" },
+ { PROC_PID_ROOT, 4, "root" },
+ { PROC_PID_EXE, 3, "exe" },
+ { PROC_PID_FD, 2, "fd" },
+ { PROC_PID_ENVIRON, 7, "environ" },
+ { PROC_PID_CMDLINE, 7, "cmdline" },
+ { PROC_PID_STAT, 4, "stat" },
+ { PROC_PID_STATM, 5, "statm" },
+ { PROC_PID_MAPS, 4, "maps" }
+};
+
+#define NR_BASE_DIRENTRY ((sizeof (base_dir))/(sizeof (base_dir[0])))
+
+int proc_match(int len,const char * name,struct proc_dir_entry * de)
+{
+ if (!de || !de->low_ino)
+ return 0;
+ /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
+ if (!len && (de->name[0]=='.') && (de->name[1]=='\0'))
+ return 1;
+ if (de->namelen != len)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+static int proc_lookupbase(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ unsigned int pid, ino;
+ int i;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = dir->i_ino;
+ pid = ino >> 16;
+ i = NR_BASE_DIRENTRY;
+ while (i-- > 0 && !proc_match(len,name,base_dir+i))
+ /* nothing */;
+ if (i < 0) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (base_dir[i].low_ino == 1)
+ ino = 1;
+ else
+ ino = (pid << 16) + base_dir[i].low_ino;
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == pid)
+ break;
+ if (!pid || i >= NR_TASKS) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ iput(dir);
+ return 0;
+}
+
+static int proc_readbase(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ struct proc_dir_entry * de;
+ unsigned int pid, ino;
+ int i,j;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ ino = inode->i_ino;
+ pid = ino >> 16;
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == pid)
+ break;
+ if (!pid || i >= NR_TASKS)
+ return 0;
+ if (((unsigned) filp->f_pos) < NR_BASE_DIRENTRY) {
+ de = base_dir + filp->f_pos;
+ filp->f_pos++;
+ i = de->namelen;
+ ino = de->low_ino;
+ if (ino != 1)
+ ino |= (pid << 16);
+ put_fs_long(ino, &dirent->d_ino);
+ put_fs_word(i,&dirent->d_reclen);
+ put_fs_byte(0,i+dirent->d_name);
+ j = i;
+ while (i--)
+ put_fs_byte(de->name[i], i+dirent->d_name);
+ return j;
+ }
+ return 0;
+}
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 000000000..954540871
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,180 @@
+/*
+ * linux/fs/proc/fd.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * proc fd directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+static int proc_readfd(struct inode *, struct file *, struct dirent *, int);
+static int proc_lookupfd(struct inode *,const char *,int,struct inode **);
+
+static struct file_operations proc_fd_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read - bad */
+ NULL, /* write - bad */
+ proc_readfd, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+struct inode_operations proc_fd_inode_operations = {
+ &proc_fd_operations, /* default base directory file-ops */
+ NULL, /* create */
+ proc_lookupfd, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int proc_lookupfd(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ unsigned int ino, pid, fd, c;
+ struct task_struct * p;
+ struct super_block * sb;
+ int i;
+
+ *result = NULL;
+ ino = dir->i_ino;
+ pid = ino >> 16;
+ ino &= 0x0000ffff;
+ ino -= 7;
+ if (!dir)
+ return -ENOENT;
+ sb = dir->i_sb;
+ if (!pid || ino || !S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!len || (name[0] == '.' && (len == 1 ||
+ (name[1] == '.' && len == 2)))) {
+ if (len < 2) {
+ *result = dir;
+ return 0;
+ }
+ if (!(*result = iget(sb,(pid << 16)+2))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ iput(dir);
+ return 0;
+ }
+ iput(dir);
+ fd = 0;
+ while (len-- > 0) {
+ c = *name - '0';
+ name++;
+ if (c > 9) {
+ fd = 0xfffff;
+ break;
+ }
+ fd *= 10;
+ fd += c;
+ if (fd & 0xffff0000) {
+ fd = 0xfffff;
+ break;
+ }
+ }
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if ((p = task[i]) && p->pid == pid)
+ break;
+ if (!pid || i >= NR_TASKS)
+ return -ENOENT;
+
+ if (fd >= NR_OPEN || !p->files->fd[fd] || !p->files->fd[fd]->f_inode)
+ return -ENOENT;
+
+ ino = (pid << 16) + 0x100 + fd;
+
+ if (!(*result = iget(sb,ino)))
+ return -ENOENT;
+ return 0;
+}
+
+static int proc_readfd(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ struct task_struct * p;
+ unsigned int fd, pid, ino;
+ int i,j;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ ino = inode->i_ino;
+ pid = ino >> 16;
+ ino &= 0x0000ffff;
+ ino -= 7;
+ if (ino)
+ return 0;
+ while (1) {
+ fd = filp->f_pos;
+ filp->f_pos++;
+ if (fd < 2) {
+ i = j = fd+1;
+ if (!fd)
+ fd = inode->i_ino;
+ else
+ fd = (inode->i_ino & 0xffff0000) | 2;
+ put_fs_long(fd, &dirent->d_ino);
+ put_fs_word(i, &dirent->d_reclen);
+ put_fs_byte(0, i+dirent->d_name);
+ while (i--)
+ put_fs_byte('.', i+dirent->d_name);
+ return j;
+ }
+ fd -= 2;
+ for (i = 1 ; i < NR_TASKS ; i++)
+ if ((p = task[i]) && p->pid == pid)
+ break;
+ if (i >= NR_TASKS)
+ return 0;
+ if (fd >= NR_OPEN)
+ break;
+
+ if (!p->files->fd[fd] || !p->files->fd[fd]->f_inode)
+ continue;
+
+ j = 10;
+ i = 1;
+ while (fd >= j) {
+ j *= 10;
+ i++;
+ }
+ j = i;
+ ino = (pid << 16) + 0x100 + fd;
+
+ put_fs_long(ino, &dirent->d_ino);
+ put_fs_word(i, &dirent->d_reclen);
+ put_fs_byte(0, i+dirent->d_name);
+ while (i--) {
+ put_fs_byte('0'+(fd % 10), i+dirent->d_name);
+ fd /= 10;
+ }
+ return j;
+ }
+ return 0;
+}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
new file mode 100644
index 000000000..0d0848b33
--- /dev/null
+++ b/fs/proc/inode.c
@@ -0,0 +1,191 @@
+/*
+ * linux/fs/proc/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/limits.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+void proc_put_inode(struct inode *inode)
+{
+ if (inode->i_nlink)
+ return;
+ inode->i_size = 0;
+}
+
+void proc_put_super(struct super_block *sb)
+{
+ lock_super(sb);
+ sb->s_dev = 0;
+ unlock_super(sb);
+}
+
+static struct super_operations proc_sops = {
+ proc_read_inode,
+ NULL,
+ proc_write_inode,
+ proc_put_inode,
+ proc_put_super,
+ NULL,
+ proc_statfs,
+ NULL
+};
+
+struct super_block *proc_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ lock_super(s);
+ s->s_blocksize = 1024;
+ s->s_blocksize_bits = 10;
+ s->s_magic = PROC_SUPER_MAGIC;
+ s->s_op = &proc_sops;
+ unlock_super(s);
+ if (!(s->s_mounted = iget(s,PROC_ROOT_INO))) {
+ s->s_dev = 0;
+ printk("get root inode failed\n");
+ return NULL;
+ }
+ return s;
+}
+
+void proc_statfs(struct super_block *sb, struct statfs *buf)
+{
+ put_fs_long(PROC_SUPER_MAGIC, &buf->f_type);
+ put_fs_long(PAGE_SIZE/sizeof(long), &buf->f_bsize);
+ put_fs_long(0, &buf->f_blocks);
+ put_fs_long(0, &buf->f_bfree);
+ put_fs_long(0, &buf->f_bavail);
+ put_fs_long(0, &buf->f_files);
+ put_fs_long(0, &buf->f_ffree);
+ put_fs_long(NAME_MAX, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */
+}
+
+void proc_read_inode(struct inode * inode)
+{
+ unsigned long ino, pid;
+ struct task_struct * p;
+ int i;
+
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_nlink = 1;
+ inode->i_size = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blocks = 0;
+ inode->i_blksize = 1024;
+ ino = inode->i_ino;
+ pid = ino >> 16;
+ p = task[0];
+ for (i = 0; i < NR_TASKS ; i++)
+ if ((p = task[i]) && (p->pid == pid))
+ break;
+ if (!p || i >= NR_TASKS)
+ return;
+ if (ino == PROC_ROOT_INO) {
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ inode->i_nlink = 2;
+ for (i = 1 ; i < NR_TASKS ; i++)
+ if (task[i])
+ inode->i_nlink++;
+ inode->i_op = &proc_root_inode_operations;
+ return;
+ }
+
+ /* files within /proc/net */
+ if ((ino >= PROC_NET_UNIX) && (ino < PROC_NET_LAST)) {
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_op = &proc_net_inode_operations;
+ return;
+ }
+
+ if (!pid) {
+ switch (ino) {
+ case PROC_KMSG:
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_op = &proc_kmsg_inode_operations;
+ break;
+ case PROC_NET:
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ inode->i_nlink = 2;
+ inode->i_op = &proc_net_inode_operations;
+ break;
+ case PROC_KCORE:
+ inode->i_mode = S_IFREG | S_IRUSR;
+ inode->i_op = &proc_kcore_inode_operations;
+ inode->i_size = high_memory + PAGE_SIZE;
+ break;
+ default:
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_op = &proc_array_inode_operations;
+ break;
+ }
+ return;
+ }
+ ino &= 0x0000ffff;
+ inode->i_uid = p->euid;
+ inode->i_gid = p->egid;
+ switch (ino) {
+ case PROC_PID_INO:
+ inode->i_nlink = 4;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ inode->i_op = &proc_base_inode_operations;
+ return;
+ case PROC_PID_MEM:
+ inode->i_op = &proc_mem_inode_operations;
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ return;
+ case PROC_PID_CWD:
+ case PROC_PID_ROOT:
+ case PROC_PID_EXE:
+ inode->i_op = &proc_link_inode_operations;
+ inode->i_size = 64;
+ inode->i_mode = S_IFLNK | S_IRWXU;
+ return;
+ case PROC_PID_FD:
+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+ inode->i_op = &proc_fd_inode_operations;
+ inode->i_nlink = 2;
+ return;
+ case PROC_PID_ENVIRON:
+ case PROC_PID_CMDLINE:
+ case PROC_PID_STAT:
+ case PROC_PID_STATM:
+ case PROC_PID_MAPS:
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_op = &proc_array_inode_operations;
+ return;
+ }
+ switch (ino >> 8) {
+ case PROC_PID_FD_DIR:
+ ino &= 0xff;
+ if (ino >= NR_OPEN || !p->files->fd[ino])
+ return;
+ inode->i_op = &proc_link_inode_operations;
+ inode->i_size = 64;
+ inode->i_mode = S_IFLNK;
+ if (p->files->fd[ino]->f_mode & 1)
+ inode->i_mode |= S_IRUSR | S_IXUSR;
+ if (p->files->fd[ino]->f_mode & 2)
+ inode->i_mode |= S_IWUSR | S_IXUSR;
+ return;
+ }
+ return;
+}
+
+void proc_write_inode(struct inode * inode)
+{
+ inode->i_dirt=0;
+}
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
new file mode 100644
index 000000000..812ee3dd5
--- /dev/null
+++ b/fs/proc/kmsg.c
@@ -0,0 +1,76 @@
+/*
+ * linux/fs/proc/kmsg.c
+ *
+ * Copyright (C) 1992 by Linus Torvalds
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+extern unsigned long log_size;
+extern struct wait_queue * log_wait;
+
+asmlinkage int sys_syslog(int type, char * bug, int count);
+
+static int kmsg_open(struct inode * inode, struct file * file)
+{
+ return sys_syslog(1,NULL,0);
+}
+
+static void kmsg_release(struct inode * inode, struct file * file)
+{
+ (void) sys_syslog(0,NULL,0);
+}
+
+static int kmsg_read(struct inode * inode, struct file * file,char * buf, int count)
+{
+ return sys_syslog(2,buf,count);
+}
+
+static int kmsg_select(struct inode *inode, struct file *file, int sel_type, select_table * wait)
+{
+ if (sel_type != SEL_IN)
+ return 0;
+ if (log_size)
+ return 1;
+ select_wait(&log_wait, wait);
+ return 0;
+}
+
+
+static struct file_operations proc_kmsg_operations = {
+ NULL, /* kmsg_lseek */
+ kmsg_read,
+ NULL, /* kmsg_write */
+ NULL, /* kmsg_readdir */
+ kmsg_select, /* kmsg_select */
+ NULL, /* kmsg_ioctl */
+ NULL, /* mmap */
+ kmsg_open,
+ kmsg_release,
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_kmsg_inode_operations = {
+ &proc_kmsg_operations, /* default base directory file-ops */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
diff --git a/fs/proc/link.c b/fs/proc/link.c
new file mode 100644
index 000000000..769014f46
--- /dev/null
+++ b/fs/proc/link.c
@@ -0,0 +1,195 @@
+/*
+ * linux/fs/proc/link.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * /proc link-file handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+static int proc_readlink(struct inode *, char *, int);
+static int proc_follow_link(struct inode *, struct inode *, int, int,
+ struct inode **);
+static int proc_fd_dupf(struct inode * inode, struct file * f);
+
+#define PLAN9_SEMANTICS
+
+/*
+ * links can't do much...
+ */
+static struct file_operations proc_fd_link_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read - bad */
+ NULL, /* write - bad */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ proc_fd_dupf, /* very special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_link_inode_operations = {
+ &proc_fd_link_operations,/* file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ proc_readlink, /* readlink */
+ proc_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+/*
+ * This open routine is somewhat of a hack.... what we are doing is
+ * looking up the file structure of the newly opened proc fd file, and
+ * replacing it with the actual file structure of the process's file
+ * descriptor. This allows plan 9 semantics, so that the returned
+ * file descriptor is an dup of the target file descriptor.
+ */
+static int proc_fd_dupf(struct inode * inode, struct file * f)
+{
+ unsigned int pid, ino;
+ int i, fd;
+ struct task_struct * p;
+ struct file *new_f;
+
+ for(fd=0 ; fd<NR_OPEN ; fd++)
+ if (current->files->fd[fd] == f)
+ break;
+ if (fd>=NR_OPEN)
+ return -ENOENT; /* should never happen */
+
+ ino = inode->i_ino;
+ pid = ino >> 16;
+ ino &= 0x0000ffff;
+
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if ((p = task[i]) && p->pid == pid)
+ break;
+
+ if ((i >= NR_TASKS) ||
+ ((ino >> 8) != 1) || !(new_f = p->files->fd[ino & 0x0ff]))
+ return -ENOENT;
+
+ if (new_f->f_mode && !f->f_mode && 3)
+ return -EPERM;
+
+ new_f->f_count++;
+ current->files->fd[fd] = new_f;
+ if (!--f->f_count)
+ iput(f->f_inode);
+ return 0;
+}
+
+static int proc_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ unsigned int pid, ino;
+ struct task_struct * p;
+ struct inode * new_inode;
+ int i;
+
+ *res_inode = NULL;
+ if (dir)
+ iput(dir);
+ if (!inode)
+ return -ENOENT;
+ if (!permission(inode, MAY_EXEC)) {
+ iput(inode);
+ return -EACCES;
+ }
+ ino = inode->i_ino;
+ pid = ino >> 16;
+ ino &= 0x0000ffff;
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if ((p = task[i]) && p->pid == pid)
+ break;
+ if (i >= NR_TASKS) {
+ iput(inode);
+ return -ENOENT;
+ }
+ new_inode = NULL;
+ switch (ino) {
+ case PROC_PID_CWD:
+ new_inode = p->fs->pwd;
+ break;
+ case PROC_PID_ROOT:
+ new_inode = p->fs->root;
+ break;
+ case PROC_PID_EXE: {
+ struct vm_area_struct * vma = p->mm->mmap;
+ while (vma) {
+ if (vma->vm_flags & VM_EXECUTABLE) {
+ new_inode = vma->vm_inode;
+ break;
+ }
+ vma = vma->vm_next;
+ }
+ break;
+ }
+ default:
+ switch (ino >> 8) {
+ case PROC_PID_FD_DIR:
+ ino &= 0xff;
+ if (ino < NR_OPEN && p->files->fd[ino]) {
+#ifdef PLAN9_SEMANTICS
+ if (dir) {
+ *res_inode = inode;
+ return 0;
+ }
+#endif
+ new_inode = p->files->fd[ino]->f_inode;
+ }
+ break;
+ }
+ }
+ iput(inode);
+ if (!new_inode)
+ return -ENOENT;
+ *res_inode = new_inode;
+ new_inode->i_count++;
+ return 0;
+}
+
+static int proc_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ int i;
+ unsigned int dev,ino;
+ char buf[64];
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ i = proc_follow_link(NULL, inode, 0, 0, &inode);
+ if (i)
+ return i;
+ if (!inode)
+ return -EIO;
+ dev = inode->i_dev;
+ ino = inode->i_ino;
+ iput(inode);
+ i = sprintf(buf,"[%04x]:%u", dev, ino);
+ if (buflen > i)
+ buflen = i;
+ i = 0;
+ while (i < buflen)
+ put_fs_byte(buf[i++],buffer++);
+ return i;
+}
diff --git a/fs/proc/mem.c b/fs/proc/mem.c
new file mode 100644
index 000000000..ae043bb0a
--- /dev/null
+++ b/fs/proc/mem.c
@@ -0,0 +1,260 @@
+/*
+ * linux/fs/proc/mem.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+
+/*
+ * mem_write isn't really a good idea right now. It needs
+ * to check a lot more: if the process we try to write to
+ * dies in the middle right now, mem_write will overwrite
+ * kernel memory.. This disables it altogether.
+ */
+#define mem_write NULL
+
+static int mem_read(struct inode * inode, struct file * file,char * buf, int count)
+{
+ unsigned long addr, pid, cr3;
+ char *tmp;
+ unsigned long pte, page;
+ int i;
+
+ if (count < 0)
+ return -EINVAL;
+ pid = inode->i_ino;
+ pid >>= 16;
+ cr3 = 0;
+ for (i = 1 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == pid) {
+ cr3 = task[i]->tss.cr3;
+ break;
+ }
+ if (!cr3)
+ return -EACCES;
+ addr = file->f_pos;
+ tmp = buf;
+ while (count > 0) {
+ if (current->signal & ~current->blocked)
+ break;
+ pte = *PAGE_DIR_OFFSET(cr3,addr);
+ if (!(pte & PAGE_PRESENT))
+ break;
+ pte &= PAGE_MASK;
+ pte += PAGE_PTR(addr);
+ page = *(unsigned long *) pte;
+ if (!(page & 1))
+ break;
+ page &= PAGE_MASK;
+ page += addr & ~PAGE_MASK;
+ i = PAGE_SIZE-(addr & ~PAGE_MASK);
+ if (i > count)
+ i = count;
+ memcpy_tofs(tmp,(void *) page,i);
+ addr += i;
+ tmp += i;
+ count -= i;
+ }
+ file->f_pos = addr;
+ return tmp-buf;
+}
+
+#ifndef mem_write
+
+static int mem_write(struct inode * inode, struct file * file,char * buf, int count)
+{
+ unsigned long addr, pid, cr3;
+ char *tmp;
+ unsigned long pte, page;
+ int i;
+
+ if (count < 0)
+ return -EINVAL;
+ addr = file->f_pos;
+ pid = inode->i_ino;
+ pid >>= 16;
+ cr3 = 0;
+ for (i = 1 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == pid) {
+ cr3 = task[i]->tss.cr3;
+ break;
+ }
+ if (!cr3)
+ return -EACCES;
+ tmp = buf;
+ while (count > 0) {
+ if (current->signal & ~current->blocked)
+ break;
+ pte = *PAGE_DIR_OFFSET(cr3,addr);
+ if (!(pte & PAGE_PRESENT))
+ break;
+ pte &= PAGE_MASK;
+ pte += PAGE_PTR(addr);
+ page = *(unsigned long *) pte;
+ if (!(page & PAGE_PRESENT))
+ break;
+ if (!(page & 2)) {
+ do_wp_page(0,addr,current,0);
+ continue;
+ }
+ page &= PAGE_MASK;
+ page += addr & ~PAGE_MASK;
+ i = PAGE_SIZE-(addr & ~PAGE_MASK);
+ if (i > count)
+ i = count;
+ memcpy_fromfs((void *) page,tmp,i);
+ addr += i;
+ tmp += i;
+ count -= i;
+ }
+ file->f_pos = addr;
+ if (tmp != buf)
+ return tmp-buf;
+ if (current->signal & ~current->blocked)
+ return -ERESTARTSYS;
+ return 0;
+}
+
+#endif
+
+static int mem_lseek(struct inode * inode, struct file * file, off_t offset, int orig)
+{
+ switch (orig) {
+ case 0:
+ file->f_pos = offset;
+ return file->f_pos;
+ case 1:
+ file->f_pos += offset;
+ return file->f_pos;
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+mem_mmap(struct inode * inode, struct file * file,
+ struct vm_area_struct * vma)
+{
+ unsigned long *src_table, *dest_table, stmp, dtmp, cr3;
+ struct vm_area_struct *src_vma = 0;
+ int i;
+
+ /* Get the source's task information */
+
+ cr3 = 0;
+ for (i = 1 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == (inode->i_ino >> 16)) {
+ cr3 = task[i]->tss.cr3;
+ src_vma = task[i]->mm->mmap;
+ break;
+ }
+
+ if (!cr3)
+ return -EACCES;
+
+/* Ensure that we have a valid source area. (Has to be mmap'ed and
+ have valid page information.) We can't map shared memory at the
+ moment because working out the vm_area_struct & nattach stuff isn't
+ worth it. */
+
+ stmp = vma->vm_offset;
+ while (stmp < vma->vm_offset + (vma->vm_end - vma->vm_start)) {
+ while (src_vma && stmp > src_vma->vm_end)
+ src_vma = src_vma->vm_next;
+ if (!src_vma || (src_vma->vm_flags & VM_SHM))
+ return -EINVAL;
+
+ src_table = PAGE_DIR_OFFSET(cr3, stmp);
+ if (!*src_table)
+ return -EINVAL;
+ src_table = (unsigned long *)((*src_table & PAGE_MASK) + PAGE_PTR(stmp));
+ if (!*src_table)
+ return -EINVAL;
+
+ if (stmp < src_vma->vm_start) {
+ if (!(src_vma->vm_flags & VM_GROWSDOWN))
+ return -EINVAL;
+ if (src_vma->vm_end - stmp > current->rlim[RLIMIT_STACK].rlim_cur)
+ return -EINVAL;
+ }
+ stmp += PAGE_SIZE;
+ }
+
+ src_vma = task[i]->mm->mmap;
+ stmp = vma->vm_offset;
+ dtmp = vma->vm_start;
+
+ while (dtmp < vma->vm_end) {
+ while (src_vma && stmp > src_vma->vm_end)
+ src_vma = src_vma->vm_next;
+
+ src_table = PAGE_DIR_OFFSET(cr3, stmp);
+ src_table = (unsigned long *)((*src_table & PAGE_MASK) + PAGE_PTR(stmp));
+
+ dest_table = PAGE_DIR_OFFSET(current->tss.cr3, dtmp);
+
+ if (!*dest_table) {
+ *dest_table = get_free_page(GFP_KERNEL);
+ if (!*dest_table) { oom(current); *dest_table=BAD_PAGE; }
+ else *dest_table |= PAGE_TABLE;
+ }
+
+ dest_table = (unsigned long *)((*dest_table & PAGE_MASK) + PAGE_PTR(dtmp));
+
+ if (!(*src_table & PAGE_PRESENT))
+ do_no_page(src_vma, stmp, PAGE_PRESENT);
+
+ if ((vma->vm_flags & VM_WRITE) && !(*src_table & PAGE_RW))
+ do_wp_page(src_vma, stmp, PAGE_RW | PAGE_PRESENT);
+
+ *src_table |= PAGE_DIRTY;
+ *dest_table = *src_table;
+ mem_map[MAP_NR(*src_table)]++;
+
+ stmp += PAGE_SIZE;
+ dtmp += PAGE_SIZE;
+ }
+
+ invalidate();
+ return 0;
+}
+
+static struct file_operations proc_mem_operations = {
+ mem_lseek,
+ mem_read,
+ mem_write,
+ NULL, /* mem_readdir */
+ NULL, /* mem_select */
+ NULL, /* mem_ioctl */
+ mem_mmap, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_mem_inode_operations = {
+ &proc_mem_operations, /* default base directory file-ops */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
diff --git a/fs/proc/net.c b/fs/proc/net.c
new file mode 100644
index 000000000..601f590d3
--- /dev/null
+++ b/fs/proc/net.c
@@ -0,0 +1,300 @@
+/*
+ * linux/fs/proc/net.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * gjh 3/'93 heim@peanuts.informatik.uni-tuebingen.de (Gerald J. Heim)
+ * most of this file is stolen from base.c
+ * it works, but you shouldn't use it as a guideline
+ * for new proc-fs entries. once i'll make it better.
+ * fvk 3/'93 waltje@uwalt.nl.mugnet.org (Fred N. van Kempen)
+ * cleaned up the whole thing, moved "net" specific code to
+ * the NET kernel layer (where it belonged in the first place).
+ * Michael K. Johnson (johnsonm@stolaf.edu) 3/93
+ * Added support from my previous inet.c. Cleaned things up
+ * quite a bit, modularized the code.
+ * fvk 4/'93 waltje@uwalt.nl.mugnet.org (Fred N. van Kempen)
+ * Renamed "route_get_info()" to "rt_get_info()" for consistency.
+ * Alan Cox (gw4pts@gw4pts.ampr.org) 4/94
+ * Dusted off the code and added IPX. Fixed the 4K limit.
+ * Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de)
+ * /proc/net/snmp.
+ *
+ * proc net directory handling functions
+ */
+#include <linux/autoconf.h>
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+/* forward references */
+static int proc_readnet(struct inode * inode, struct file * file,
+ char * buf, int count);
+static int proc_readnetdir(struct inode *, struct file *,
+ struct dirent *, int);
+static int proc_lookupnet(struct inode *,const char *,int,struct inode **);
+
+/* the get_*_info() functions are in the net code, and are configured
+ in via the standard mechanism... */
+extern int unix_get_info(char *, char **, off_t, int);
+#ifdef CONFIG_INET
+extern int tcp_get_info(char *, char **, off_t, int);
+extern int udp_get_info(char *, char **, off_t, int);
+extern int raw_get_info(char *, char **, off_t, int);
+extern int arp_get_info(char *, char **, off_t, int);
+extern int rarp_get_info(char *, char **, off_t, int);
+extern int dev_get_info(char *, char **, off_t, int);
+extern int rt_get_info(char *, char **, off_t, int);
+extern int snmp_get_info(char *, char **, off_t, int);
+#endif /* CONFIG_INET */
+#ifdef CONFIG_IPX
+extern int ipx_get_info(char *, char **, off_t, int);
+extern int ipx_rt_get_info(char *, char **, off_t, int);
+#endif /* CONFIG_IPX */
+#ifdef CONFIG_AX25
+extern int ax25_get_info(char *, char **, off_t, int);
+extern int ax25_rt_get_info(char *, char **, off_t, int);
+#ifdef CONFIG_NETROM
+extern int nr_get_info(char *, char **, off_t, int);
+extern int nr_nodes_get_info(char *, char **, off_t, int);
+extern int nr_neigh_get_info(char *, char **, off_t, int);
+#endif /* CONFIG_NETROM */
+#endif /* CONFIG_AX25 */
+
+
+static struct file_operations proc_net_operations = {
+ NULL, /* lseek - default */
+ proc_readnet, /* read - bad */
+ NULL, /* write - bad */
+ proc_readnetdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+struct inode_operations proc_net_inode_operations = {
+ &proc_net_operations, /* default net directory file-ops */
+ NULL, /* create */
+ proc_lookupnet, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static struct proc_dir_entry net_dir[] = {
+ { PROC_NET, 1, "." },
+ { PROC_ROOT_INO, 2, ".." },
+ { PROC_NET_UNIX, 4, "unix" },
+#ifdef CONFIG_INET
+ { PROC_NET_ARP, 3, "arp" },
+ { PROC_NET_ROUTE, 5, "route" },
+ { PROC_NET_DEV, 3, "dev" },
+ { PROC_NET_RAW, 3, "raw" },
+ { PROC_NET_TCP, 3, "tcp" },
+ { PROC_NET_UDP, 3, "udp" },
+ { PROC_NET_SNMP, 4, "snmp" },
+#ifdef CONFIG_INET_RARP
+ { PROC_NET_RARP, 4, "rarp"},
+#endif
+#endif /* CONFIG_INET */
+#ifdef CONFIG_IPX
+ { PROC_NET_IPX_ROUTE, 9, "ipx_route" },
+ { PROC_NET_IPX, 3, "ipx" },
+#endif /* CONFIG_IPX */
+#ifdef CONFIG_AX25
+ { PROC_NET_AX25_ROUTE, 10, "ax25_route" },
+ { PROC_NET_AX25, 4, "ax25" },
+#ifdef CONFIG_NETROM
+ { PROC_NET_NR_NODES, 8, "nr_nodes" },
+ { PROC_NET_NR_NEIGH, 8, "nr_neigh" },
+ { PROC_NET_NR, 2, "nr" },
+#endif /* CONFIG_NETROM */
+#endif /* CONFIG_AX25 */
+ { 0, 0, NULL }
+};
+
+#define NR_NET_DIRENTRY ((sizeof (net_dir))/(sizeof (net_dir[0])) - 1)
+
+static int proc_lookupnet(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ struct proc_dir_entry *de;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ for (de = net_dir ; de->name ; de++) {
+ if (!proc_match(len, name, de))
+ continue;
+ *result = iget(dir->i_sb, de->low_ino);
+ iput(dir);
+ if (!*result)
+ return -ENOENT;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static int proc_readnetdir(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ struct proc_dir_entry * de;
+ unsigned int ino;
+ int i,j;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ ino = inode->i_ino;
+ if (((unsigned) filp->f_pos) < NR_NET_DIRENTRY) {
+ de = net_dir + filp->f_pos;
+ filp->f_pos++;
+ i = de->namelen;
+ ino = de->low_ino;
+ put_fs_long(ino, &dirent->d_ino);
+ put_fs_word(i,&dirent->d_reclen);
+ put_fs_byte(0,i+dirent->d_name);
+ j = i;
+ while (i--)
+ put_fs_byte(de->name[i], i+dirent->d_name);
+ return j;
+ }
+ return 0;
+}
+
+
+#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
+
+static int proc_readnet(struct inode * inode, struct file * file,
+ char * buf, int count)
+{
+ char * page;
+ int length;
+ unsigned int ino;
+ int bytes=count;
+ int thistime;
+ int copied=0;
+ char *start;
+
+ if (count < 0)
+ return -EINVAL;
+ if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ ino = inode->i_ino;
+
+ while(bytes>0)
+ {
+ thistime=bytes;
+ if(bytes>PROC_BLOCK_SIZE)
+ thistime=PROC_BLOCK_SIZE;
+
+ switch (ino)
+ {
+ case PROC_NET_UNIX:
+ length = unix_get_info(page,&start,file->f_pos,thistime);
+ break;
+#ifdef CONFIG_INET
+ case PROC_NET_ARP:
+ length = arp_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_ROUTE:
+ length = rt_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_DEV:
+ length = dev_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_RAW:
+ length = raw_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_TCP:
+ length = tcp_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_UDP:
+ length = udp_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_SNMP:
+ length = snmp_get_info(page, &start, file->f_pos,thistime);
+ break;
+#ifdef CONFIG_INET_RARP
+ case PROC_NET_RARP:
+ length = rarp_get_info(page,&start,file->f_pos,thistime);
+ break;
+#endif /* CONFIG_INET_RARP */
+#endif /* CONFIG_INET */
+#ifdef CONFIG_IPX
+ case PROC_NET_IPX_ROUTE:
+ length = ipx_rt_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_IPX:
+ length = ipx_get_info(page,&start,file->f_pos,thistime);
+ break;
+#endif /* CONFIG_IPX */
+#ifdef CONFIG_AX25
+ case PROC_NET_AX25_ROUTE:
+ length = ax25_rt_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_AX25:
+ length = ax25_get_info(page,&start,file->f_pos,thistime);
+ break;
+#ifdef CONFIG_NETROM
+ case PROC_NET_NR_NODES:
+ length = nr_nodes_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_NR_NEIGH:
+ length = nr_neigh_get_info(page,&start,file->f_pos,thistime);
+ break;
+ case PROC_NET_NR:
+ length = nr_get_info(page,&start,file->f_pos,thistime);
+ break;
+#endif /* CONFIG_NETROM */
+#endif /* CONFIG_AX25 */
+
+ default:
+ free_page((unsigned long) page);
+ return -EBADF;
+ }
+
+ /*
+ * We have been given a non page aligned block of
+ * the data we asked for + a bit. We have been given
+ * the start pointer and we know the length..
+ */
+
+ if (length <= 0)
+ break;
+ /*
+ * Copy the bytes
+ */
+ memcpy_tofs(buf+copied, start, length);
+ file->f_pos+=length; /* Move down the file */
+ bytes-=length;
+ copied+=length;
+ if(length<thistime)
+ break; /* End of file */
+ }
+ free_page((unsigned long) page);
+ return copied;
+
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c
new file mode 100644
index 000000000..97cf2ff25
--- /dev/null
+++ b/fs/proc/root.c
@@ -0,0 +1,184 @@
+/*
+ * linux/fs/proc/root.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * proc root directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/config.h>
+
+static int proc_readroot(struct inode *, struct file *, struct dirent *, int);
+static int proc_lookuproot(struct inode *,const char *,int,struct inode **);
+
+static struct file_operations proc_root_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read - bad */
+ NULL, /* write - bad */
+ proc_readroot, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* no fsync */
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+struct inode_operations proc_root_inode_operations = {
+ &proc_root_operations, /* default base directory file-ops */
+ NULL, /* create */
+ proc_lookuproot, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static struct proc_dir_entry root_dir[] = {
+ { PROC_ROOT_INO, 1, "." },
+ { PROC_ROOT_INO, 2, ".." },
+ { PROC_LOADAVG, 7, "loadavg" },
+ { PROC_UPTIME, 6, "uptime" },
+ { PROC_MEMINFO, 7, "meminfo" },
+ { PROC_KMSG, 4, "kmsg" },
+ { PROC_VERSION, 7, "version" },
+ { PROC_SELF, 4, "self" }, /* will change inode # */
+ { PROC_NET, 3, "net" },
+#ifdef CONFIG_DEBUG_MALLOC
+ { PROC_MALLOC, 6, "malloc" },
+#endif
+ { PROC_KCORE, 5, "kcore" },
+ { PROC_MODULES, 7, "modules" },
+ { PROC_STAT, 4, "stat" },
+ { PROC_DEVICES, 7, "devices" },
+ { PROC_INTERRUPTS, 10,"interrupts" },
+ { PROC_FILESYSTEMS, 11,"filesystems" },
+ { PROC_KSYMS, 5, "ksyms" },
+ { PROC_DMA, 3, "dma" },
+};
+
+#define NR_ROOT_DIRENTRY ((sizeof (root_dir))/(sizeof (root_dir[0])))
+
+static int proc_lookuproot(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ unsigned int pid, c;
+ int i, ino;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ i = NR_ROOT_DIRENTRY;
+ while (i-- > 0 && !proc_match(len,name,root_dir+i))
+ /* nothing */;
+ if (i >= 0) {
+ ino = root_dir[i].low_ino;
+ if (ino == PROC_ROOT_INO) {
+ *result = dir;
+ return 0;
+ }
+ if (ino == PROC_SELF) /* self modifying inode ... */
+ ino = (current->pid << 16) + 2;
+ } else {
+ pid = 0;
+ while (len-- > 0) {
+ c = *name - '0';
+ name++;
+ if (c > 9) {
+ pid = 0;
+ break;
+ }
+ pid *= 10;
+ pid += c;
+ if (pid & 0xffff0000) {
+ pid = 0;
+ break;
+ }
+ }
+ for (i = 0 ; i < NR_TASKS ; i++)
+ if (task[i] && task[i]->pid == pid)
+ break;
+ if (!pid || i >= NR_TASKS) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = (pid << 16) + 2;
+ }
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ iput(dir);
+ return 0;
+}
+
+static int proc_readroot(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ struct task_struct * p;
+ unsigned int nr,pid;
+ int i,j;
+
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+repeat:
+ nr = filp->f_pos;
+ if (nr < NR_ROOT_DIRENTRY) {
+ struct proc_dir_entry * de = root_dir + nr;
+
+ filp->f_pos++;
+ i = de->namelen;
+ put_fs_long(de->low_ino, &dirent->d_ino);
+ put_fs_word(i,&dirent->d_reclen);
+ put_fs_byte(0,i+dirent->d_name);
+ j = i;
+ while (i--)
+ put_fs_byte(de->name[i], i+dirent->d_name);
+ return j;
+ }
+ nr -= NR_ROOT_DIRENTRY;
+ if (nr >= NR_TASKS)
+ return 0;
+ filp->f_pos++;
+ p = task[nr];
+ if (!p || !(pid = p->pid))
+ goto repeat;
+ if (pid & 0xffff0000)
+ goto repeat;
+ j = 10;
+ i = 1;
+ while (pid >= j) {
+ j *= 10;
+ i++;
+ }
+ j = i;
+ put_fs_long((pid << 16)+2, &dirent->d_ino);
+ put_fs_word(i, &dirent->d_reclen);
+ put_fs_byte(0, i+dirent->d_name);
+ while (i--) {
+ put_fs_byte('0'+(pid % 10), i+dirent->d_name);
+ pid /= 10;
+ }
+ return j;
+}
diff --git a/fs/read_write.c b/fs/read_write.c
new file mode 100644
index 000000000..5f457b9cb
--- /dev/null
+++ b/fs/read_write.c
@@ -0,0 +1,171 @@
+/*
+ * linux/fs/read_write.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/segment.h>
+
+/*
+ * Count is now a supported feature, but currently only the ext2fs
+ * uses it. A count value of 1 is supported for compatibility with
+ * earlier libraries, but larger values are supported: count should
+ * indicate the total buffer space available for filling with dirents.
+ * The d_off entry in the dirents will then indicate the offset from
+ * each dirent to the next, and the return value will indicate the
+ * number of bytes written. All dirents will be written at
+ * word-aligned addresses. [sct Oct 1994]
+ */
+asmlinkage int sys_readdir(unsigned int fd, struct dirent * dirent, unsigned int count)
+{
+ int error;
+ struct file * file;
+ struct inode * inode;
+
+ if (fd >= NR_OPEN || !(file = current->files->fd[fd]) ||
+ !(inode = file->f_inode))
+ return -EBADF;
+ error = -ENOTDIR;
+ if (file->f_op && file->f_op->readdir) {
+ int size = count;
+ if (count == 1)
+ size = sizeof(*dirent);
+ error = verify_area(VERIFY_WRITE, dirent, size);
+ if (!error)
+ error = file->f_op->readdir(inode,file,dirent,count);
+ }
+ return error;
+}
+
+asmlinkage int sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
+{
+ struct file * file;
+ int tmp = -1;
+
+ if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
+ return -EBADF;
+ if (origin > 2)
+ return -EINVAL;
+ if (file->f_op && file->f_op->lseek)
+ return file->f_op->lseek(file->f_inode,file,offset,origin);
+
+/* this is the default handler if no lseek handler is present */
+ switch (origin) {
+ case 0:
+ tmp = offset;
+ break;
+ case 1:
+ tmp = file->f_pos + offset;
+ break;
+ case 2:
+ if (!file->f_inode)
+ return -EINVAL;
+ tmp = file->f_inode->i_size + offset;
+ break;
+ }
+ if (tmp < 0)
+ return -EINVAL;
+ if (tmp != file->f_pos) {
+ file->f_pos = tmp;
+ file->f_reada = 0;
+ file->f_version = ++event;
+ }
+ return file->f_pos;
+}
+
+asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high,
+ unsigned long offset_low, loff_t * result,
+ unsigned int origin)
+{
+ struct file * file;
+ loff_t tmp = -1;
+ loff_t offset;
+ int err;
+
+ if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
+ return -EBADF;
+ if (origin > 2)
+ return -EINVAL;
+ if ((err = verify_area(VERIFY_WRITE, result, sizeof(loff_t))))
+ return err;
+ offset = (loff_t) (((unsigned long long) offset_high << 32) | offset_low);
+/* there is no fs specific llseek handler */
+ switch (origin) {
+ case 0:
+ tmp = offset;
+ break;
+ case 1:
+ tmp = file->f_pos + offset;
+ break;
+ case 2:
+ if (!file->f_inode)
+ return -EINVAL;
+ tmp = file->f_inode->i_size + offset;
+ break;
+ }
+ if (tmp < 0)
+ return -EINVAL;
+ file->f_pos = tmp;
+ file->f_reada = 0;
+ file->f_version = ++event;
+ memcpy_tofs(result, &file->f_pos, sizeof(loff_t));
+ return 0;
+}
+
+asmlinkage int sys_read(unsigned int fd,char * buf,unsigned int count)
+{
+ int error;
+ struct file * file;
+ struct inode * inode;
+
+ if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
+ return -EBADF;
+ if (!(file->f_mode & 1))
+ return -EBADF;
+ if (!file->f_op || !file->f_op->read)
+ return -EINVAL;
+ if (!count)
+ return 0;
+ error = verify_area(VERIFY_WRITE,buf,count);
+ if (error)
+ return error;
+ return file->f_op->read(inode,file,buf,count);
+}
+
+asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count)
+{
+ int error;
+ struct file * file;
+ struct inode * inode;
+ int written;
+
+ if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
+ return -EBADF;
+ if (!(file->f_mode & 2))
+ return -EBADF;
+ if (!file->f_op || !file->f_op->write)
+ return -EINVAL;
+ if (!count)
+ return 0;
+ error = verify_area(VERIFY_READ,buf,count);
+ if (error)
+ return error;
+ written = file->f_op->write(inode,file,buf,count);
+ /*
+ * If data has been written to the file, remove the setuid and
+ * the setgid bits
+ */
+ if (written > 0 && !suser() && (inode->i_mode & (S_ISUID | S_ISGID))) {
+ struct iattr newattrs;
+ newattrs.ia_mode = inode->i_mode & ~(S_ISUID | S_ISGID);
+ newattrs.ia_valid = ATTR_MODE;
+ notify_change(inode, &newattrs);
+ }
+ return written;
+}
diff --git a/fs/select.c b/fs/select.c
new file mode 100644
index 000000000..e87ae07eb
--- /dev/null
+++ b/fs/select.c
@@ -0,0 +1,258 @@
+/*
+ * This file contains the procedures for the handling of select
+ *
+ * Created for Linux based loosely upon Mathius Lattner's minix
+ * patches by Peter MacDonald. Heavily edited by Linus.
+ *
+ * 4 February 1994
+ * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
+ * flag set in its personality we do *not* modify the given timeout
+ * parameter to reflect time remaining.
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/personality.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+
+/*
+ * Ok, Peter made a complicated, but straightforward multiple_wait() function.
+ * I have rewritten this, taking some shortcuts: This code may not be easy to
+ * follow, but it should be free of race-conditions, and it's practical. If you
+ * understand what I'm doing here, then you understand how the linux
+ * sleep/wakeup mechanism works.
+ *
+ * Two very simple procedures, select_wait() and free_wait() make all the work.
+ * select_wait() is a inline-function defined in <linux/sched.h>, as all select
+ * functions have to call it to add an entry to the select table.
+ */
+
+/*
+ * I rewrote this again to make the select_table size variable, take some
+ * more shortcuts, improve responsiveness, and remove another race that
+ * Linus noticed. -- jrs
+ */
+
+static void free_wait(select_table * p)
+{
+ struct select_table_entry * entry = p->entry + p->nr;
+
+ while (p->nr > 0) {
+ p->nr--;
+ entry--;
+ remove_wait_queue(entry->wait_address,&entry->wait);
+ }
+}
+
+/*
+ * The check function checks the ready status of a file using the vfs layer.
+ *
+ * If the file was not ready we were added to its wait queue. But in
+ * case it became ready just after the check and just before it called
+ * select_wait, we call it again, knowing we are already on its
+ * wait queue this time. The second call is not necessary if the
+ * select_table is NULL indicating an earlier file check was ready
+ * and we aren't going to sleep on the select_table. -- jrs
+ */
+
+static int check(int flag, select_table * wait, struct file * file)
+{
+ struct inode * inode;
+ struct file_operations *fops;
+ int (*select) (struct inode *, struct file *, int, select_table *);
+
+ inode = file->f_inode;
+ if ((fops = file->f_op) && (select = fops->select))
+ return select(inode, file, flag, wait)
+ || (wait && select(inode, file, flag, NULL));
+ if (S_ISREG(inode->i_mode))
+ return 1;
+ return 0;
+}
+
+static int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
+ fd_set *res_in, fd_set *res_out, fd_set *res_ex)
+{
+ int count;
+ select_table wait_table, *wait;
+ struct select_table_entry *entry;
+ unsigned long set;
+ int i,j;
+ int max = -1;
+
+ for (j = 0 ; j < __FDSET_LONGS ; j++) {
+ i = j << 5;
+ if (i >= n)
+ break;
+ set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
+ for ( ; set ; i++,set >>= 1) {
+ if (i >= n)
+ goto end_check;
+ if (!(set & 1))
+ continue;
+ if (!current->files->fd[i])
+ return -EBADF;
+ if (!current->files->fd[i]->f_inode)
+ return -EBADF;
+ max = i;
+ }
+ }
+end_check:
+ n = max + 1;
+ if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ FD_ZERO(res_in);
+ FD_ZERO(res_out);
+ FD_ZERO(res_ex);
+ count = 0;
+ wait_table.nr = 0;
+ wait_table.entry = entry;
+ wait = &wait_table;
+repeat:
+ current->state = TASK_INTERRUPTIBLE;
+ for (i = 0 ; i < n ; i++) {
+ if (FD_ISSET(i,in) && check(SEL_IN,wait,current->files->fd[i])) {
+ FD_SET(i, res_in);
+ count++;
+ wait = NULL;
+ }
+ if (FD_ISSET(i,out) && check(SEL_OUT,wait,current->files->fd[i])) {
+ FD_SET(i, res_out);
+ count++;
+ wait = NULL;
+ }
+ if (FD_ISSET(i,ex) && check(SEL_EX,wait,current->files->fd[i])) {
+ FD_SET(i, res_ex);
+ count++;
+ wait = NULL;
+ }
+ }
+ wait = NULL;
+ if (!count && current->timeout && !(current->signal & ~current->blocked)) {
+ schedule();
+ goto repeat;
+ }
+ free_wait(&wait_table);
+ free_page((unsigned long) entry);
+ current->state = TASK_RUNNING;
+ return count;
+}
+
+/*
+ * We do a VERIFY_WRITE here even though we are only reading this time:
+ * we'll write to it eventually..
+ */
+static int __get_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
+{
+ int error;
+
+ FD_ZERO(fdset);
+ if (!fs_pointer)
+ return 0;
+ error = verify_area(VERIFY_WRITE,fs_pointer,sizeof(fd_set));
+ if (error)
+ return error;
+ while (nr > 0) {
+ *fdset = get_fs_long(fs_pointer);
+ fdset++;
+ fs_pointer++;
+ nr -= 32;
+ }
+ return 0;
+}
+
+static void __set_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
+{
+ if (!fs_pointer)
+ return;
+ while (nr > 0) {
+ put_fs_long(*fdset, fs_pointer);
+ fdset++;
+ fs_pointer++;
+ nr -= 32;
+ }
+}
+
+#define get_fd_set(nr,fsp,fdp) \
+__get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
+
+#define set_fd_set(nr,fsp,fdp) \
+__set_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
+
+/*
+ * We can actually return ERESTARTSYS instead of EINTR, but I'd
+ * like to be certain this leads to no problems. So I return
+ * EINTR just for safety.
+ *
+ * Update: ERESTARTSYS breaks at least the xview clock binary, so
+ * I'm trying ERESTARTNOHAND which restart only when you want to.
+ */
+asmlinkage int sys_select( unsigned long *buffer )
+{
+/* Perform the select(nd, in, out, ex, tv) system call. */
+ int i;
+ fd_set res_in, in, *inp;
+ fd_set res_out, out, *outp;
+ fd_set res_ex, ex, *exp;
+ int n;
+ struct timeval *tvp;
+ unsigned long timeout;
+
+ i = verify_area(VERIFY_READ, buffer, 20);
+ if (i)
+ return i;
+ n = get_fs_long(buffer++);
+ if (n < 0)
+ return -EINVAL;
+ if (n > NR_OPEN)
+ n = NR_OPEN;
+ inp = (fd_set *) get_fs_long(buffer++);
+ outp = (fd_set *) get_fs_long(buffer++);
+ exp = (fd_set *) get_fs_long(buffer++);
+ tvp = (struct timeval *) get_fs_long(buffer);
+ if ((i = get_fd_set(n, inp, &in)) ||
+ (i = get_fd_set(n, outp, &out)) ||
+ (i = get_fd_set(n, exp, &ex))) return i;
+ timeout = ~0UL;
+ if (tvp) {
+ i = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
+ if (i)
+ return i;
+ timeout = ROUND_UP(get_fs_long((unsigned long *)&tvp->tv_usec),(1000000/HZ));
+ timeout += get_fs_long((unsigned long *)&tvp->tv_sec) * HZ;
+ if (timeout)
+ timeout += jiffies + 1;
+ }
+ current->timeout = timeout;
+ i = do_select(n, &in, &out, &ex, &res_in, &res_out, &res_ex);
+ if (current->timeout > jiffies)
+ timeout = current->timeout - jiffies;
+ else
+ timeout = 0;
+ current->timeout = 0;
+ if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+ put_fs_long(timeout/HZ, (unsigned long *) &tvp->tv_sec);
+ timeout %= HZ;
+ timeout *= (1000000/HZ);
+ put_fs_long(timeout, (unsigned long *) &tvp->tv_usec);
+ }
+ if (i < 0)
+ return i;
+ if (!i && (current->signal & ~current->blocked))
+ return -ERESTARTNOHAND;
+ set_fd_set(n, inp, &res_in);
+ set_fd_set(n, outp, &res_out);
+ set_fd_set(n, exp, &res_ex);
+ return i;
+}
diff --git a/fs/stat.c b/fs/stat.c
new file mode 100644
index 000000000..70f5d166e
--- /dev/null
+++ b/fs/stat.c
@@ -0,0 +1,207 @@
+/*
+ * linux/fs/stat.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/segment.h>
+
+static void cp_old_stat(struct inode * inode, struct old_stat * statbuf)
+{
+ struct old_stat tmp;
+
+ printk("VFS: Warning: %s using old stat() call. Recompile your binary.\n",
+ current->comm);
+ tmp.st_dev = inode->i_dev;
+ tmp.st_ino = inode->i_ino;
+ tmp.st_mode = inode->i_mode;
+ tmp.st_nlink = inode->i_nlink;
+ tmp.st_uid = inode->i_uid;
+ tmp.st_gid = inode->i_gid;
+ tmp.st_rdev = inode->i_rdev;
+ tmp.st_size = inode->i_size;
+ if (inode->i_pipe)
+ tmp.st_size = PIPE_SIZE(*inode);
+ tmp.st_atime = inode->i_atime;
+ tmp.st_mtime = inode->i_mtime;
+ tmp.st_ctime = inode->i_ctime;
+ memcpy_tofs(statbuf,&tmp,sizeof(tmp));
+}
+
+static void cp_new_stat(struct inode * inode, struct new_stat * statbuf)
+{
+ struct new_stat tmp;
+ unsigned int blocks, indirect;
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.st_dev = inode->i_dev;
+ tmp.st_ino = inode->i_ino;
+ tmp.st_mode = inode->i_mode;
+ tmp.st_nlink = inode->i_nlink;
+ tmp.st_uid = inode->i_uid;
+ tmp.st_gid = inode->i_gid;
+ tmp.st_rdev = inode->i_rdev;
+ tmp.st_size = inode->i_size;
+ if (inode->i_pipe)
+ tmp.st_size = PIPE_SIZE(*inode);
+ tmp.st_atime = inode->i_atime;
+ tmp.st_mtime = inode->i_mtime;
+ tmp.st_ctime = inode->i_ctime;
+/*
+ * st_blocks and st_blksize are approximated with a simple algorithm if
+ * they aren't supported directly by the filesystem. The minix and msdos
+ * filesystems don't keep track of blocks, so they would either have to
+ * be counted explicitly (by delving into the file itself), or by using
+ * this simple algorithm to get a reasonable (although not 100% accurate)
+ * value.
+ */
+
+/*
+ * Use minix fs values for the number of direct and indirect blocks. The
+ * count is now exact for the minix fs except that it counts zero blocks.
+ * Everything is in BLOCK_SIZE'd units until the assignment to
+ * tmp.st_blksize.
+ */
+#define D_B 7
+#define I_B (BLOCK_SIZE / sizeof(unsigned short))
+
+ if (!inode->i_blksize) {
+ blocks = (tmp.st_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ if (blocks > D_B) {
+ indirect = (blocks - D_B + I_B - 1) / I_B;
+ blocks += indirect;
+ if (indirect > 1) {
+ indirect = (indirect - 1 + I_B - 1) / I_B;
+ blocks += indirect;
+ if (indirect > 1)
+ blocks++;
+ }
+ }
+ tmp.st_blocks = (BLOCK_SIZE / 512) * blocks;
+ tmp.st_blksize = BLOCK_SIZE;
+ } else {
+ tmp.st_blocks = inode->i_blocks;
+ tmp.st_blksize = inode->i_blksize;
+ }
+ memcpy_tofs(statbuf,&tmp,sizeof(tmp));
+}
+
+asmlinkage int sys_stat(char * filename, struct old_stat * statbuf)
+{
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ cp_old_stat(inode,statbuf);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_newstat(char * filename, struct new_stat * statbuf)
+{
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
+ cp_new_stat(inode,statbuf);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_lstat(char * filename, struct old_stat * statbuf)
+{
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
+ cp_old_stat(inode,statbuf);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_newlstat(char * filename, struct new_stat * statbuf)
+{
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
+ cp_new_stat(inode,statbuf);
+ iput(inode);
+ return 0;
+}
+
+asmlinkage int sys_fstat(unsigned int fd, struct old_stat * statbuf)
+{
+ struct file * f;
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode))
+ return -EBADF;
+ cp_old_stat(inode,statbuf);
+ return 0;
+}
+
+asmlinkage int sys_newfstat(unsigned int fd, struct new_stat * statbuf)
+{
+ struct file * f;
+ struct inode * inode;
+ int error;
+
+ error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf));
+ if (error)
+ return error;
+ if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode))
+ return -EBADF;
+ cp_new_stat(inode,statbuf);
+ return 0;
+}
+
+asmlinkage int sys_readlink(const char * path, char * buf, int bufsiz)
+{
+ struct inode * inode;
+ int error;
+
+ if (bufsiz <= 0)
+ return -EINVAL;
+ error = verify_area(VERIFY_WRITE,buf,bufsiz);
+ if (error)
+ return error;
+ error = lnamei(path,&inode);
+ if (error)
+ return error;
+ if (!inode->i_op || !inode->i_op->readlink) {
+ iput(inode);
+ return -EINVAL;
+ }
+ return inode->i_op->readlink(inode,buf,bufsiz);
+}
diff --git a/fs/super.c b/fs/super.c
new file mode 100644
index 000000000..9ead32a3e
--- /dev/null
+++ b/fs/super.c
@@ -0,0 +1,689 @@
+/*
+ * linux/fs/super.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * super.c contains code to handle the super-block tables.
+ */
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+
+extern struct file_operations * get_blkfops(unsigned int);
+extern struct file_operations * get_chrfops(unsigned int);
+
+extern void wait_for_keypress(void);
+
+extern int root_mountflags;
+
+struct super_block super_blocks[NR_SUPER];
+
+static int do_remount_sb(struct super_block *sb, int flags, char * data);
+
+/* this is initialized in init/main.c */
+dev_t ROOT_DEV = 0;
+
+static struct file_system_type * file_systems = NULL;
+
+int register_filesystem(struct file_system_type * fs)
+{
+ struct file_system_type ** tmp;
+
+ if (!fs)
+ return -EINVAL;
+ if (fs->next)
+ return -EBUSY;
+ tmp = &file_systems;
+ while (*tmp) {
+ if (strcmp((*tmp)->name, fs->name) == 0)
+ return -EBUSY;
+ tmp = &(*tmp)->next;
+ }
+ *tmp = fs;
+ return 0;
+}
+
+int unregister_filesystem(struct file_system_type * fs)
+{
+ struct file_system_type ** tmp;
+
+ tmp = &file_systems;
+ while (*tmp) {
+ if (fs == *tmp) {
+ *tmp = fs->next;
+ fs->next = NULL;
+ return 0;
+ }
+ tmp = &(*tmp)->next;
+ }
+ return -EINVAL;
+}
+
+static int fs_index(const char * __name)
+{
+ struct file_system_type * tmp;
+ char * name;
+ int err, index;
+
+ err = getname(__name, &name);
+ if (err)
+ return err;
+ index = 0;
+ for (tmp = file_systems ; tmp ; tmp = tmp->next) {
+ if (strcmp(tmp->name, name) == 0) {
+ putname(name);
+ return index;
+ }
+ index++;
+ }
+ putname(name);
+ return -EINVAL;
+}
+
+static int fs_name(unsigned int index, char * buf)
+{
+ struct file_system_type * tmp;
+ int err, len;
+
+ tmp = file_systems;
+ while (tmp && index > 0) {
+ tmp = tmp->next;
+ index--;
+ }
+ if (!tmp)
+ return -EINVAL;
+ len = strlen(tmp->name) + 1;
+ err = verify_area(VERIFY_WRITE, buf, len);
+ if (err)
+ return err;
+ memcpy_tofs(buf, tmp->name, len);
+ return 0;
+}
+
+static int fs_maxindex(void)
+{
+ struct file_system_type * tmp;
+ int index;
+
+ index = 0;
+ for (tmp = file_systems ; tmp ; tmp = tmp->next)
+ index++;
+ return index;
+}
+
+/*
+ * Whee.. Weird sysv syscall.
+ */
+asmlinkage int sys_sysfs(int option, ...)
+{
+ va_list args;
+ int retval = -EINVAL;
+ unsigned int index;
+
+ va_start(args, option);
+ switch (option) {
+ case 1:
+ retval = fs_index(va_arg(args, const char *));
+ break;
+
+ case 2:
+ index = va_arg(args, unsigned int);
+ retval = fs_name(index, va_arg(args, char *));
+ break;
+
+ case 3:
+ retval = fs_maxindex();
+ break;
+ }
+ va_end(args);
+ return retval;
+}
+
+int get_filesystem_list(char * buf)
+{
+ int len = 0;
+ struct file_system_type * tmp;
+
+ tmp = file_systems;
+ while (tmp && len < PAGE_SIZE - 80) {
+ len += sprintf(buf+len, "%s\t%s\n",
+ tmp->requires_dev ? "" : "nodev",
+ tmp->name);
+ tmp = tmp->next;
+ }
+ return len;
+}
+
+struct file_system_type *get_fs_type(char *name)
+{
+ struct file_system_type * fs = file_systems;
+
+ if (!name)
+ return fs;
+ while (fs) {
+ if (!strcmp(name,fs->name))
+ break;
+ fs = fs->next;
+ }
+ return fs;
+}
+
+void __wait_on_super(struct super_block * sb)
+{
+ struct wait_queue wait = { current, NULL };
+
+ add_wait_queue(&sb->s_wait, &wait);
+repeat:
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (sb->s_lock) {
+ schedule();
+ goto repeat;
+ }
+ remove_wait_queue(&sb->s_wait, &wait);
+ current->state = TASK_RUNNING;
+}
+
+void sync_supers(dev_t dev)
+{
+ struct super_block * sb;
+
+ for (sb = super_blocks + 0 ; sb < super_blocks + NR_SUPER ; sb++) {
+ if (!sb->s_dev)
+ continue;
+ if (dev && sb->s_dev != dev)
+ continue;
+ wait_on_super(sb);
+ if (!sb->s_dev || !sb->s_dirt)
+ continue;
+ if (dev && (dev != sb->s_dev))
+ continue;
+ if (sb->s_op && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ }
+}
+
+static struct super_block * get_super(dev_t dev)
+{
+ struct super_block * s;
+
+ if (!dev)
+ return NULL;
+ s = 0+super_blocks;
+ while (s < NR_SUPER+super_blocks)
+ if (s->s_dev == dev) {
+ wait_on_super(s);
+ if (s->s_dev == dev)
+ return s;
+ s = 0+super_blocks;
+ } else
+ s++;
+ return NULL;
+}
+
+void put_super(dev_t dev)
+{
+ struct super_block * sb;
+
+ if (dev == ROOT_DEV) {
+ printk("VFS: Root device %d/%d: prepare for armageddon\n",
+ MAJOR(dev), MINOR(dev));
+ return;
+ }
+ if (!(sb = get_super(dev)))
+ return;
+ if (sb->s_covered) {
+ printk("VFS: Mounted device %d/%d - tssk, tssk\n",
+ MAJOR(dev), MINOR(dev));
+ return;
+ }
+ if (sb->s_op && sb->s_op->put_super)
+ sb->s_op->put_super(sb);
+}
+
+static struct super_block * read_super(dev_t dev,char *name,int flags,
+ void *data, int silent)
+{
+ struct super_block * s;
+ struct file_system_type *type;
+
+ if (!dev)
+ return NULL;
+ check_disk_change(dev);
+ s = get_super(dev);
+ if (s)
+ return s;
+ if (!(type = get_fs_type(name))) {
+ printk("VFS: on device %d/%d: get_fs_type(%s) failed\n",
+ MAJOR(dev), MINOR(dev), name);
+ return NULL;
+ }
+ for (s = 0+super_blocks ;; s++) {
+ if (s >= NR_SUPER+super_blocks)
+ return NULL;
+ if (!s->s_dev)
+ break;
+ }
+ s->s_dev = dev;
+ s->s_flags = flags;
+ if (!type->read_super(s,data, silent)) {
+ s->s_dev = 0;
+ return NULL;
+ }
+ s->s_dev = dev;
+ s->s_covered = NULL;
+ s->s_rd_only = 0;
+ s->s_dirt = 0;
+ return s;
+}
+
+/*
+ * Unnamed block devices are dummy devices used by virtual
+ * filesystems which don't use real block-devices. -- jrs
+ */
+
+static char unnamed_dev_in_use[256/8] = { 0, };
+
+static dev_t get_unnamed_dev(void)
+{
+ int i;
+
+ for (i = 1; i < 256; i++) {
+ if (!set_bit(i,unnamed_dev_in_use))
+ return (UNNAMED_MAJOR << 8) | i;
+ }
+ return 0;
+}
+
+static void put_unnamed_dev(dev_t dev)
+{
+ if (!dev)
+ return;
+ if (MAJOR(dev) == UNNAMED_MAJOR &&
+ clear_bit(MINOR(dev), unnamed_dev_in_use))
+ return;
+ printk("VFS: put_unnamed_dev: freeing unused device %d/%d\n",
+ MAJOR(dev), MINOR(dev));
+}
+
+static int do_umount(dev_t dev)
+{
+ struct super_block * sb;
+ int retval;
+
+ if (dev==ROOT_DEV) {
+ /* Special case for "unmounting" root. We just try to remount
+ it readonly, and sync() the device. */
+ if (!(sb=get_super(dev)))
+ return -ENOENT;
+ if (!(sb->s_flags & MS_RDONLY)) {
+ fsync_dev(dev);
+ retval = do_remount_sb(sb, MS_RDONLY, 0);
+ if (retval)
+ return retval;
+ }
+ return 0;
+ }
+ if (!(sb=get_super(dev)) || !(sb->s_covered))
+ return -ENOENT;
+ if (!sb->s_covered->i_mount)
+ printk("VFS: umount(%d/%d): mounted inode has i_mount=NULL\n",
+ MAJOR(dev), MINOR(dev));
+ if (!fs_may_umount(dev, sb->s_mounted))
+ return -EBUSY;
+ sb->s_covered->i_mount = NULL;
+ iput(sb->s_covered);
+ sb->s_covered = NULL;
+ iput(sb->s_mounted);
+ sb->s_mounted = NULL;
+ if (sb->s_op && sb->s_op->write_super && sb->s_dirt)
+ sb->s_op->write_super(sb);
+ put_super(dev);
+ return 0;
+}
+
+/*
+ * Now umount can handle mount points as well as block devices.
+ * This is important for filesystems which use unnamed block devices.
+ *
+ * There is a little kludge here with the dummy_inode. The current
+ * vfs release functions only use the r_dev field in the inode so
+ * we give them the info they need without using a real inode.
+ * If any other fields are ever needed by any block device release
+ * functions, they should be faked here. -- jrs
+ */
+
+asmlinkage int sys_umount(char * name)
+{
+ struct inode * inode;
+ dev_t dev;
+ int retval;
+ struct inode dummy_inode;
+ struct file_operations * fops;
+
+ if (!suser())
+ return -EPERM;
+ retval = namei(name,&inode);
+ if (retval) {
+ retval = lnamei(name,&inode);
+ if (retval)
+ return retval;
+ }
+ if (S_ISBLK(inode->i_mode)) {
+ dev = inode->i_rdev;
+ if (IS_NODEV(inode)) {
+ iput(inode);
+ return -EACCES;
+ }
+ } else {
+ if (!inode || !inode->i_sb || inode != inode->i_sb->s_mounted) {
+ iput(inode);
+ return -EINVAL;
+ }
+ dev = inode->i_sb->s_dev;
+ iput(inode);
+ memset(&dummy_inode, 0, sizeof(dummy_inode));
+ dummy_inode.i_rdev = dev;
+ inode = &dummy_inode;
+ }
+ if (MAJOR(dev) >= MAX_BLKDEV) {
+ iput(inode);
+ return -ENXIO;
+ }
+ if (!(retval = do_umount(dev)) && dev != ROOT_DEV) {
+ fops = get_blkfops(MAJOR(dev));
+ if (fops && fops->release)
+ fops->release(inode,NULL);
+ if (MAJOR(dev) == UNNAMED_MAJOR)
+ put_unnamed_dev(dev);
+ }
+ if (inode != &dummy_inode)
+ iput(inode);
+ if (retval)
+ return retval;
+ fsync_dev(dev);
+ return 0;
+}
+
+/*
+ * do_mount() does the actual mounting after sys_mount has done the ugly
+ * parameter parsing. When enough time has gone by, and everything uses the
+ * new mount() parameters, sys_mount() can then be cleaned up.
+ *
+ * We cannot mount a filesystem if it has active, used, or dirty inodes.
+ * We also have to flush all inode-data for this device, as the new mount
+ * might need new info.
+ */
+static int do_mount(dev_t dev, const char * dir, char * type, int flags, void * data)
+{
+ struct inode * dir_i;
+ struct super_block * sb;
+ int error;
+
+ error = namei(dir,&dir_i);
+ if (error)
+ return error;
+ if (dir_i->i_count != 1 || dir_i->i_mount) {
+ iput(dir_i);
+ return -EBUSY;
+ }
+ if (!S_ISDIR(dir_i->i_mode)) {
+ iput(dir_i);
+ return -ENOTDIR;
+ }
+ if (!fs_may_mount(dev)) {
+ iput(dir_i);
+ return -EBUSY;
+ }
+ sb = read_super(dev,type,flags,data,0);
+ if (!sb) {
+ iput(dir_i);
+ return -EINVAL;
+ }
+ if (sb->s_covered) {
+ iput(dir_i);
+ return -EBUSY;
+ }
+ sb->s_covered = dir_i;
+ dir_i->i_mount = sb->s_mounted;
+ return 0; /* we don't iput(dir_i) - see umount */
+}
+
+
+/*
+ * Alters the mount flags of a mounted file system. Only the mount point
+ * is used as a reference - file system type and the device are ignored.
+ * FS-specific mount options can't be altered by remounting.
+ */
+
+static int do_remount_sb(struct super_block *sb, int flags, char *data)
+{
+ int retval;
+
+ if (!(flags & MS_RDONLY ) && sb->s_dev && is_read_only(sb->s_dev))
+ return -EACCES;
+ /*flags |= MS_RDONLY;*/
+ /* If we are remounting RDONLY, make sure there are no rw files open */
+ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
+ if (!fs_may_remount_ro(sb->s_dev))
+ return -EBUSY;
+ if (sb->s_op && sb->s_op->remount_fs) {
+ retval = sb->s_op->remount_fs(sb, &flags, data);
+ if (retval)
+ return retval;
+ }
+ sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) |
+ (flags & MS_RMT_MASK);
+ return 0;
+}
+
+static int do_remount(const char *dir,int flags,char *data)
+{
+ struct inode *dir_i;
+ int retval;
+
+ retval = namei(dir,&dir_i);
+ if (retval)
+ return retval;
+ if (dir_i != dir_i->i_sb->s_mounted) {
+ iput(dir_i);
+ return -EINVAL;
+ }
+ retval = do_remount_sb(dir_i->i_sb, flags, data);
+ iput(dir_i);
+ return retval;
+}
+
+static int copy_mount_options (const void * data, unsigned long *where)
+{
+ int i;
+ unsigned long page;
+ struct vm_area_struct * vma;
+
+ *where = 0;
+ if (!data)
+ return 0;
+
+ for (vma = current->mm->mmap ; ; ) {
+ if (!vma ||
+ (unsigned long) data < vma->vm_start) {
+ return -EFAULT;
+ }
+ if ((unsigned long) data < vma->vm_end)
+ break;
+ vma = vma->vm_next;
+ }
+ i = vma->vm_end - (unsigned long) data;
+ if (PAGE_SIZE <= (unsigned long) i)
+ i = PAGE_SIZE-1;
+ if (!(page = __get_free_page(GFP_KERNEL))) {
+ return -ENOMEM;
+ }
+ memcpy_fromfs((void *) page,data,i);
+ *where = page;
+ return 0;
+}
+
+/*
+ * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
+ * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
+ *
+ * data is a (void *) that can point to any structure up to
+ * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
+ * information (or be NULL).
+ *
+ * NOTE! As old versions of mount() didn't use this setup, the flags
+ * has to have a special 16-bit magic number in the hight word:
+ * 0xC0ED. If this magic word isn't present, the flags and data info
+ * isn't used, as the syscall assumes we are talking to an older
+ * version that didn't understand them.
+ */
+asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
+ unsigned long new_flags, void * data)
+{
+ struct file_system_type * fstype;
+ struct inode * inode;
+ struct file_operations * fops;
+ dev_t dev;
+ int retval;
+ char * t;
+ unsigned long flags = 0;
+ unsigned long page = 0;
+
+ if (!suser())
+ return -EPERM;
+ if ((new_flags &
+ (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
+ retval = copy_mount_options (data, &page);
+ if (retval < 0)
+ return retval;
+ retval = do_remount(dir_name,
+ new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
+ (char *) page);
+ free_page(page);
+ return retval;
+ }
+ retval = copy_mount_options (type, &page);
+ if (retval < 0)
+ return retval;
+ fstype = get_fs_type((char *) page);
+ free_page(page);
+ if (!fstype)
+ return -ENODEV;
+ t = fstype->name;
+ if (fstype->requires_dev) {
+ retval = namei(dev_name,&inode);
+ if (retval)
+ return retval;
+ if (!S_ISBLK(inode->i_mode)) {
+ iput(inode);
+ return -ENOTBLK;
+ }
+ if (IS_NODEV(inode)) {
+ iput(inode);
+ return -EACCES;
+ }
+ dev = inode->i_rdev;
+ if (MAJOR(dev) >= MAX_BLKDEV) {
+ iput(inode);
+ return -ENXIO;
+ }
+ } else {
+ if (!(dev = get_unnamed_dev()))
+ return -EMFILE;
+ inode = NULL;
+ }
+ fops = get_blkfops(MAJOR(dev));
+ if (fops && fops->open) {
+ struct file dummy; /* allows read-write or read-only flag */
+ memset(&dummy, 0, sizeof(dummy));
+ dummy.f_inode = inode;
+ dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
+ retval = fops->open(inode, &dummy);
+ if (retval) {
+ iput(inode);
+ return retval;
+ }
+ }
+ page = 0;
+ if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
+ flags = new_flags & ~MS_MGC_MSK;
+ retval = copy_mount_options(data, &page);
+ if (retval < 0) {
+ iput(inode);
+ return retval;
+ }
+ }
+ retval = do_mount(dev,dir_name,t,flags,(void *) page);
+ free_page(page);
+ if (retval && fops && fops->release)
+ fops->release(inode, NULL);
+ iput(inode);
+ return retval;
+}
+
+void mount_root(void)
+{
+ struct file_system_type * fs_type;
+ struct super_block * sb;
+ struct inode * inode, d_inode;
+ struct file filp;
+ int retval;
+
+ memset(super_blocks, 0, sizeof(super_blocks));
+#ifdef CONFIG_BLK_DEV_FD
+ if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
+ printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
+ wait_for_keypress();
+ }
+#endif
+
+ memset(&filp, 0, sizeof(filp));
+ memset(&d_inode, 0, sizeof(d_inode));
+ d_inode.i_rdev = ROOT_DEV;
+ filp.f_inode = &d_inode;
+ if ( root_mountflags & MS_RDONLY)
+ filp.f_mode = 1; /* read only */
+ else
+ filp.f_mode = 3; /* read write */
+ retval = blkdev_open(&d_inode, &filp);
+ if(retval == -EROFS){
+ root_mountflags |= MS_RDONLY;
+ filp.f_mode = 1;
+ retval = blkdev_open(&d_inode, &filp);
+ }
+
+ for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
+ if(retval)
+ break;
+ if (!fs_type->requires_dev)
+ continue;
+ sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
+ if (sb) {
+ inode = sb->s_mounted;
+ inode->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */
+ sb->s_covered = inode;
+ sb->s_flags = root_mountflags;
+ current->fs->pwd = inode;
+ current->fs->root = inode;
+ printk ("VFS: Mounted root (%s filesystem)%s.\n",
+ fs_type->name,
+ (sb->s_flags & MS_RDONLY) ? " readonly" : "");
+ return;
+ }
+ }
+ panic("VFS: Unable to mount root fs on %02x:%02x",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
+}
diff --git a/fs/sysv/INTRO b/fs/sysv/INTRO
new file mode 100644
index 000000000..9e53cb317
--- /dev/null
+++ b/fs/sysv/INTRO
@@ -0,0 +1,183 @@
+This is the implementation of the SystemV/Coherent filesystem for Linux.
+It grew out of separate filesystem implementations
+
+ Xenix FS Doug Evans <dje@cygnus.com> June 1992
+ SystemV FS Paul B. Monday <pmonday@eecs.wsu.edu> March-June 1993
+ Coherent FS B. Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> June 1993
+
+and was merged together in July 1993.
+
+These filesystems are rather similar. Here is a comparison with Minix FS:
+
+* Linux fdisk reports on partitions
+ - Minix FS 0x81 Linux/Minix
+ - Xenix FS ??
+ - SystemV FS ??
+ - Coherent FS 0x08 AIX bootable
+
+* Size of a block or zone (data allocation unit on disk)
+ - Minix FS 1024
+ - Xenix FS 1024 (also 512 ??)
+ - SystemV FS 1024 (also 512)
+ - Coherent FS 512
+
+* General layout: all have one boot block, one super block and
+ separate areas for inodes and for directories/data.
+ On SystemV Release 2 FS (e.g. Microport) the first track is reserved and
+ all the block numbers (including the super block) are offset by one track.
+
+* Byte ordering of "short" (16 bit entities) on disk:
+ - Minix FS little endian 0 1
+ - Xenix FS little endian 0 1
+ - SystemV FS little endian 0 1
+ - Coherent FS little endian 0 1
+ Of course, this affects only the file system, not the data of files on it!
+
+* Byte ordering of "long" (32 bit entities) on disk:
+ - Minix FS little endian 0 1 2 3
+ - Xenix FS little endian 0 1 2 3
+ - SystemV FS little endian 0 1 2 3
+ - Coherent FS PDP-11 2 3 0 1
+ Of course, this affects only the file system, not the data of files on it!
+
+* Inode on disk: "short", 0 means non-existent, the root dir ino is:
+ - Minix FS 1
+ - Xenix FS, SystemV FS, Coherent FS 2
+
+* Maximum number of hard links to a file:
+ - Minix FS 250
+ - Xenix FS ??
+ - SystemV FS ??
+ - Coherent FS >=10000
+
+* Free inode management:
+ - Minix FS a bitmap
+ - Xenix FS, SystemV FS, Coherent FS
+ There is a cache of a certain number of free inodes in the super-block.
+ When it is exhausted, new free inodes are found using a linear search.
+
+* Free block management:
+ - Minix FS a bitmap
+ - Xenix FS, SystemV FS, Coherent FS
+ Free blocks are organized in a "free list". Maybe a misleading term,
+ since it is not true that every free block contains a pointer to
+ the next free block. Rather, the free blocks are organized in chunks
+ of limited size, and every now and then a free block contains pointers
+ to the free blocks pertaining to the next chunk; the first of these
+ contains pointers and so on. The list terminates with a "block number"
+ 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
+
+* Super-block location:
+ - Minix FS block 1 = bytes 1024..2047
+ - Xenix FS block 1 = bytes 1024..2047
+ - SystemV FS bytes 512..1023
+ - Coherent FS block 1 = bytes 512..1023
+
+* Super-block layout:
+ - Minix FS
+ unsigned short s_ninodes;
+ unsigned short s_nzones;
+ unsigned short s_imap_blocks;
+ unsigned short s_zmap_blocks;
+ unsigned short s_firstdatazone;
+ unsigned short s_log_zone_size;
+ unsigned long s_max_size;
+ unsigned short s_magic;
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short s_firstdatazone;
+ unsigned long s_nzones;
+ unsigned short s_fzone_count;
+ unsigned long s_fzones[NICFREE];
+ unsigned short s_finode_count;
+ unsigned short s_finodes[NICINOD];
+ char s_flock;
+ char s_ilock;
+ char s_modified;
+ char s_rdonly;
+ unsigned long s_time;
+ short s_dinfo[4]; -- SystemV FS only
+ unsigned long s_free_zones;
+ unsigned short s_free_inodes;
+ short s_dinfo[4]; -- Xenix FS only
+ unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
+ char s_fname[6];
+ char s_fpack[6];
+ then they differ considerably:
+ Xenix FS
+ char s_clean;
+ char s_fill[371];
+ long s_magic;
+ long s_type;
+ SystemV FS
+ long s_fill[12 or 14];
+ long s_state;
+ long s_magic;
+ long s_type;
+ Coherent FS
+ unsigned long s_unique;
+ Note that Coherent FS has no magic.
+
+* Inode layout:
+ - Minix FS
+ unsigned short i_mode;
+ unsigned short i_uid;
+ unsigned long i_size;
+ unsigned long i_time;
+ unsigned char i_gid;
+ unsigned char i_nlinks;
+ unsigned short i_zone[7+1+1];
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short i_mode;
+ unsigned short i_nlink;
+ unsigned short i_uid;
+ unsigned short i_gid;
+ unsigned long i_size;
+ unsigned char i_zone[3*(10+1+1+1)];
+ unsigned long i_atime;
+ unsigned long i_mtime;
+ unsigned long i_ctime;
+
+* Regular file data blocks are organized as
+ - Minix FS
+ 7 direct blocks
+ 1 indirect block (pointers to blocks)
+ 1 double-indirect block (pointer to pointers to blocks)
+ - Xenix FS, SystemV FS, Coherent FS
+ 10 direct blocks
+ 1 indirect block (pointers to blocks)
+ 1 double-indirect block (pointer to pointers to blocks)
+ 1 triple-indirect block (pointer to pointers to pointers to blocks)
+
+* Inode size, inodes per block
+ - Minix FS 32 32
+ - Xenix FS 64 16
+ - SystemV FS 64 16
+ - Coherent FS 64 8
+
+* Directory entry on disk
+ - Minix FS
+ unsigned short inode;
+ char name[14/30];
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short inode;
+ char name[14];
+
+* Dir entry size, dir entries per block
+ - Minix FS 16/32 64/32
+ - Xenix FS 16 64
+ - SystemV FS 16 64
+ - Coherent FS 16 32
+
+* How to implement symbolic links such that the host fsck doesn't scream:
+ - Minix FS normal
+ - Xenix FS kludge: as regular files with chmod 1000
+ - SystemV FS ??
+ - Coherent FS kludge: as regular files with chmod 1000
+
+
+Notation: We often speak of a "block" but mean a zone (the allocation unit)
+and not the disk driver's notion of "block".
+
+
+Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de>
+
diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile
new file mode 100644
index 000000000..d4a6ecbd4
--- /dev/null
+++ b/fs/sysv/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the Linux SystemV/Coherent-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= ialloc.o balloc.o inode.o file.o dir.o symlink.o namei.o \
+ fsync.o truncate.o
+
+sysv.o: $(OBJS)
+ $(LD) -r -o sysv.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/sysv/README b/fs/sysv/README
new file mode 100644
index 000000000..d318eb64b
--- /dev/null
+++ b/fs/sysv/README
@@ -0,0 +1,37 @@
+This is the implementation of the SystemV/Coherent filesystem for Linux.
+It implements all of
+ - Xenix FS,
+ - SystemV/386 FS,
+ - Coherent FS.
+
+This is version beta 4.
+
+To install:
+* Answer the 'System V and Coherent filesystem support' question with 'y'
+ when configuring the kernel.
+* To mount a disk or a partition, use
+ mount [-r] -t sysv device mountpoint
+ The file system type names
+ -t sysv
+ -t xenix
+ -t coherent
+ may be used interchangeably, but the last two will eventually disappear.
+
+Bugs in the present implementation:
+- Coherent FS:
+ - The "free list interleave" n:m is currently ignored.
+ - Only file systems with no filesystem name and no pack name are recognized.
+ (See Coherent "man mkfs" for a description of these features.)
+- SystemV Release 2 FS:
+ The superblock is only searched in the blocks 9, 15, 18, which corresponds to the
+ beginning of track 1 on floppy disks. No support for this FS on hard disk yet.
+
+
+Please report any bugs and suggestions to
+ Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhde.de> or
+ Pascal Haible <haible@izfm.uni-stuttgart.de> .
+
+
+Bruno Haible
+<haible@ma2s2.mathematik.uni-karlsruhe.de>
+
diff --git a/fs/sysv/balloc.c b/fs/sysv/balloc.c
new file mode 100644
index 000000000..f0fb850be
--- /dev/null
+++ b/fs/sysv/balloc.c
@@ -0,0 +1,329 @@
+/*
+ * linux/fs/sysv/balloc.c
+ *
+ * minix/bitmap.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext/freelists.c
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * xenix/alloc.c
+ * Copyright (C) 1992 Doug Evans
+ *
+ * coh/alloc.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/balloc.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * This file contains code for allocating/freeing blocks.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+/* We don't trust the value of
+ sb->sv_sbd2->s_tfree = *sb->sv_sb_total_free_blocks
+ but we nevertheless keep it up to date. */
+
+void sysv_free_block(struct super_block * sb, unsigned int block)
+{
+ struct buffer_head * bh;
+ char * bh_data;
+
+ if (!sb) {
+ printk("sysv_free_block: trying to free block on nonexistent device\n");
+ return;
+ }
+ if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) {
+ printk("sysv_free_block: trying to free block not in datazone\n");
+ return;
+ }
+ lock_super(sb);
+ if (*sb->sv_sb_flc_count > sb->sv_flc_size) {
+ printk("sysv_free_block: flc_count > flc_size\n");
+ unlock_super(sb);
+ return;
+ }
+ /* If the free list head in super-block is full, it is copied
+ * into this block being freed:
+ */
+ if (*sb->sv_sb_flc_count == sb->sv_flc_size) {
+ unsigned short * flc_count;
+ unsigned long * flc_blocks;
+
+ bh = sv_getblk(sb, sb->s_dev, block);
+ if (!bh) {
+ printk("sysv_free_block: getblk() failed\n");
+ unlock_super(sb);
+ return;
+ }
+ bh_data = bh->b_data;
+ switch (sb->sv_type) {
+ case FSTYPE_XENIX:
+ flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV4:
+ flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV2:
+ flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_COH:
+ flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ default: panic("sysv_free_block: invalid fs type\n");
+ }
+ *flc_count = *sb->sv_sb_flc_count; /* = sb->sv_flc_size */
+ memcpy(flc_blocks, sb->sv_sb_flc_blocks, *flc_count * sizeof(sysv_zone_t));
+ mark_buffer_dirty(bh, 1);
+ bh->b_uptodate = 1;
+ brelse(bh);
+ *sb->sv_sb_flc_count = 0;
+ } else
+ /* If the free list head in super-block is empty, create a new head
+ * in this block being freed:
+ */
+ if (*sb->sv_sb_flc_count == 0) { /* Applies only to Coherent FS */
+ bh = sv_getblk(sb, sb->s_dev, block);
+ if (!bh) {
+ printk("sysv_free_block: getblk() failed\n");
+ unlock_super(sb);
+ return;
+ }
+ memset(bh->b_data, 0, sb->sv_block_size);
+ /* this implies ((struct ..._freelist_chunk *) bh->b_data)->flc_count = 0; */
+ mark_buffer_dirty(bh, 1);
+ bh->b_uptodate = 1;
+ brelse(bh);
+ /* still *sb->sv_sb_flc_count = 0 */
+ } else {
+ /* Throw away block's contents */
+ bh = sv_get_hash_table(sb, sb->s_dev, block);
+ if (bh)
+ bh->b_dirt = 0;
+ brelse(bh);
+ }
+ if (sb->sv_convert)
+ block = to_coh_ulong(block);
+ sb->sv_sb_flc_blocks[(*sb->sv_sb_flc_count)++] = block;
+ if (sb->sv_convert)
+ *sb->sv_sb_total_free_blocks =
+ to_coh_ulong(from_coh_ulong(*sb->sv_sb_total_free_blocks) + 1);
+ else
+ *sb->sv_sb_total_free_blocks = *sb->sv_sb_total_free_blocks + 1;
+ mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */
+ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1);
+ sb->s_dirt = 1; /* and needs time stamp */
+ unlock_super(sb);
+}
+
+int sysv_new_block(struct super_block * sb)
+{
+ unsigned int block;
+ struct buffer_head * bh;
+ char * bh_data;
+
+ if (!sb) {
+ printk("sysv_new_block: trying to get new block from nonexistent device\n");
+ return 0;
+ }
+ lock_super(sb);
+ if (*sb->sv_sb_flc_count == 0) { /* Applies only to Coherent FS */
+ unlock_super(sb);
+ return 0; /* no blocks available */
+ }
+ block = sb->sv_sb_flc_blocks[(*sb->sv_sb_flc_count)-1];
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (block == 0) { /* Applies only to Xenix FS, SystemV FS */
+ unlock_super(sb);
+ return 0; /* no blocks available */
+ }
+ (*sb->sv_sb_flc_count)--;
+ if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) {
+ printk("sysv_new_block: new block %d is not in data zone\n",block);
+ unlock_super(sb);
+ return 0;
+ }
+ if (*sb->sv_sb_flc_count == 0) { /* the last block continues the free list */
+ unsigned short * flc_count;
+ unsigned long * flc_blocks;
+
+ if (!(bh = sv_bread(sb, sb->s_dev, block))) {
+ printk("sysv_new_block: cannot read free-list block\n");
+ /* retry this same block next time */
+ (*sb->sv_sb_flc_count)++;
+ unlock_super(sb);
+ return 0;
+ }
+ bh_data = bh->b_data;
+ switch (sb->sv_type) {
+ case FSTYPE_XENIX:
+ flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV4:
+ flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV2:
+ flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_COH:
+ flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ default: panic("sysv_new_block: invalid fs type\n");
+ }
+ if (*flc_count > sb->sv_flc_size) {
+ printk("sysv_new_block: free-list block with >flc_size entries\n");
+ brelse(bh);
+ unlock_super(sb);
+ return 0;
+ }
+ *sb->sv_sb_flc_count = *flc_count;
+ memcpy(sb->sv_sb_flc_blocks, flc_blocks, *flc_count * sizeof(sysv_zone_t));
+ brelse(bh);
+ }
+ /* Now the free list head in the superblock is valid again. */
+ bh = sv_getblk(sb, sb->s_dev, block);
+ if (!bh) {
+ printk("sysv_new_block: getblk() failed\n");
+ unlock_super(sb);
+ return 0;
+ }
+ if (bh->b_count != 1) {
+ printk("sysv_new_block: block already in use\n");
+ unlock_super(sb);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->sv_block_size);
+ mark_buffer_dirty(bh, 1);
+ bh->b_uptodate = 1;
+ brelse(bh);
+ if (sb->sv_convert)
+ *sb->sv_sb_total_free_blocks =
+ to_coh_ulong(from_coh_ulong(*sb->sv_sb_total_free_blocks) - 1);
+ else
+ *sb->sv_sb_total_free_blocks = *sb->sv_sb_total_free_blocks - 1;
+ mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */
+ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1);
+ sb->s_dirt = 1; /* and needs time stamp */
+ unlock_super(sb);
+ return block;
+}
+
+unsigned long sysv_count_free_blocks(struct super_block * sb)
+{
+#if 1 /* test */
+ int count, old_count;
+ unsigned int block;
+ struct buffer_head * bh;
+ char * bh_data;
+ int i;
+
+ /* this causes a lot of disk traffic ... */
+ count = 0;
+ lock_super(sb);
+ if (*sb->sv_sb_flc_count > 0) {
+ for (i = *sb->sv_sb_flc_count ; /* i > 0 */ ; ) {
+ block = sb->sv_sb_flc_blocks[--i];
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (block == 0) /* block 0 terminates list */
+ goto done;
+ count++;
+ if (i == 0)
+ break;
+ }
+ /* block = sb->sv_sb_flc_blocks[0], the last block continues the free list */
+ while (1) {
+ unsigned short * flc_count;
+ unsigned long * flc_blocks;
+
+ if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) {
+ printk("sysv_count_free_blocks: new block %d is not in data zone\n",block);
+ break;
+ }
+ if (!(bh = sv_bread(sb, sb->s_dev, block))) {
+ printk("sysv_count_free_blocks: cannot read free-list block\n");
+ break;
+ }
+ bh_data = bh->b_data;
+ switch (sb->sv_type) {
+ case FSTYPE_XENIX:
+ flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV4:
+ flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_SYSV2:
+ flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ case FSTYPE_COH:
+ flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree;
+ flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0];
+ break;
+ default: panic("sysv_count_free_blocks: invalid fs type\n");
+ }
+ if (*flc_count > sb->sv_flc_size) {
+ printk("sysv_count_free_blocks: free-list block with >flc_size entries\n");
+ brelse(bh);
+ break;
+ }
+ if (*flc_count == 0) { /* Applies only to Coherent FS */
+ brelse(bh);
+ break;
+ }
+ for (i = *flc_count ; /* i > 0 */ ; ) {
+ block = flc_blocks[--i];
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (block == 0) /* block 0 terminates list */
+ break;
+ count++;
+ if (i == 0)
+ break;
+ }
+ /* block = flc_blocks[0], the last block continues the free list */
+ brelse(bh);
+ if (block == 0) /* Applies only to Xenix FS and SystemV FS */
+ break;
+ }
+ done: ;
+ }
+ old_count = *sb->sv_sb_total_free_blocks;
+ if (sb->sv_convert)
+ old_count = from_coh_ulong(old_count);
+ if (count != old_count) {
+ printk("sysv_count_free_blocks: free block count was %d, correcting to %d\n",old_count,count);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ *sb->sv_sb_total_free_blocks = (sb->sv_convert ? to_coh_ulong(count) : count);
+ mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified */
+ sb->s_dirt = 1; /* and needs time stamp */
+ }
+ }
+ unlock_super(sb);
+ return count;
+#else
+ int count;
+
+ count = *sb->sv_sb_total_free_blocks;
+ if (sb->sv_convert)
+ count = from_coh_ulong(count);
+ return count;
+#endif
+}
+
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
new file mode 100644
index 000000000..a4b019228
--- /dev/null
+++ b/fs/sysv/dir.c
@@ -0,0 +1,144 @@
+/*
+ * linux/fs/sysv/dir.c
+ *
+ * minix/dir.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * coh/dir.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/dir.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * SystemV/Coherent directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/stat.h>
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int sysv_dir_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ return -EISDIR;
+}
+
+static int sysv_readdir(struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations sysv_dir_operations = {
+ NULL, /* lseek - default */
+ sysv_dir_read, /* read */
+ NULL, /* write - bad */
+ sysv_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync /* default fsync */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations sysv_dir_inode_operations = {
+ &sysv_dir_operations, /* default directory file-ops */
+ sysv_create, /* create */
+ sysv_lookup, /* lookup */
+ sysv_link, /* link */
+ sysv_unlink, /* unlink */
+ sysv_symlink, /* symlink */
+ sysv_mkdir, /* mkdir */
+ sysv_rmdir, /* rmdir */
+ sysv_mknod, /* mknod */
+ sysv_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ sysv_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int sysv_readdir1 (struct inode * inode, struct file * filp,
+ struct dirent * dirent)
+{
+ struct super_block * sb;
+ unsigned int offset,i;
+ char c;
+ struct buffer_head * bh;
+ char* bh_data;
+ struct sysv_dir_entry * de;
+
+ if (!inode || !(sb = inode->i_sb) || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ if ((unsigned long)(filp->f_pos) % SYSV_DIRSIZE)
+ return -EBADF;
+ while (filp->f_pos < inode->i_size) {
+ offset = filp->f_pos & sb->sv_block_size_1;
+ bh = sysv_file_bread(inode, filp->f_pos >> sb->sv_block_size_bits, 0);
+ if (!bh) {
+ filp->f_pos += sb->sv_block_size - offset;
+ continue;
+ }
+ bh_data = bh->b_data;
+ while (offset < sb->sv_block_size && filp->f_pos < inode->i_size) {
+ de = (struct sysv_dir_entry *) (offset + bh_data);
+ offset += SYSV_DIRSIZE;
+ filp->f_pos += SYSV_DIRSIZE;
+ if (de->inode) {
+ struct sysv_dir_entry sde;
+
+ /* Copy the directory entry first, because the directory
+ * might be modified while we sleep in put_fs_byte...
+ */
+ memcpy(&sde, de, sizeof(struct sysv_dir_entry));
+
+ for (i = 0; i < SYSV_NAMELEN; i++)
+ if ((c = sde.name[i]) != 0)
+ put_fs_byte(c,i+dirent->d_name);
+ else
+ break;
+ if (i) {
+ if (sde.inode > inode->i_sb->sv_ninodes)
+ printk("sysv_readdir: Bad inode number on dev 0x%04x, ino %ld, offset 0x%04lx: %d is out of range\n",
+ inode->i_dev, inode->i_ino, (off_t) filp->f_pos - SYSV_DIRSIZE, sde.inode);
+ put_fs_long(sde.inode,&dirent->d_ino);
+ put_fs_byte(0,i+dirent->d_name);
+ put_fs_word(i,&dirent->d_reclen);
+ brelse(bh);
+ return ROUND_UP(NAME_OFFSET(dirent)+i+1);
+ }
+ }
+ }
+ brelse(bh);
+ }
+ return 0;
+}
+
+static int sysv_readdir(struct inode * inode, struct file * filp,
+ struct dirent * dirent, int count)
+{
+ int retval, stored;
+
+ /* compatibility */
+ if (count==1)
+ return sysv_readdir1(inode,filp,dirent);
+
+ stored = 0;
+ while (count >= sizeof(struct dirent)) {
+ retval = sysv_readdir1(inode,filp,dirent);
+ if (retval < 0)
+ return retval;
+ if (!retval)
+ return stored;
+ dirent = (struct dirent *)((char *) dirent + retval);
+ stored += retval;
+ count -= retval;
+ }
+ return stored;
+}
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
new file mode 100644
index 000000000..27f82d51a
--- /dev/null
+++ b/fs/sysv/file.c
@@ -0,0 +1,263 @@
+/*
+ * linux/fs/sysv/file.c
+ *
+ * minix/file.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * coh/file.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/file.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * SystemV/Coherent regular file handling primitives
+ */
+
+#include <asm/segment.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+
+static int sysv_file_write(struct inode *, struct file *, char *, int);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the coh filesystem.
+ */
+static struct file_operations sysv_file_operations = {
+ NULL, /* lseek - default */
+ sysv_file_read, /* read */
+ sysv_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ sysv_sync_file /* fsync */
+};
+
+struct inode_operations sysv_file_inode_operations = {
+ &sysv_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ sysv_bmap, /* bmap */
+ sysv_truncate, /* truncate */
+ NULL /* permission */
+};
+
+int sysv_file_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ struct super_block * sb = inode->i_sb;
+ int read,left,chars;
+ unsigned int block;
+ int blocks, offset;
+ int bhrequest, uptodate;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+ unsigned int size;
+
+ if (!inode) {
+ printk("sysv_file_read: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("sysv_file_read: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+ offset = filp->f_pos;
+ size = inode->i_size;
+ if (offset > size)
+ left = 0;
+ else
+ left = size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ block = offset >> sb->sv_block_size_bits;
+ offset &= sb->sv_block_size_1;
+ size = (size + sb->sv_block_size_1) >> sb->sv_block_size_bits;
+ blocks = (left + offset + sb->sv_block_size_1) >> sb->sv_block_size_bits;
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ blocks += read_ahead[MAJOR(inode->i_dev)] >> (sb->sv_block_size_bits - 9);
+ if (block + blocks > size)
+ blocks = size - block;
+ }
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches.
+ */
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (blocks) {
+ --blocks;
+ *bhb = sysv_getblk(inode, block++, 0);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if (uptodate)
+ break;
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest)
+ ll_rw_block(READ, bhrequest, bhreq);
+
+ do { /* Finish off all I/O that has actually completed */
+ if (*bhe) {
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < sb->sv_block_size - offset)
+ chars = left;
+ else
+ chars = sb->sv_block_size - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-- > 0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ if (!IS_RDONLY(inode))
+ inode->i_atime = CURRENT_TIME;
+ return read;
+}
+
+static int sysv_file_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ struct super_block * sb = inode->i_sb;
+ off_t pos;
+ int written,c;
+ struct buffer_head * bh;
+ char * p;
+
+ if (!inode) {
+ printk("sysv_file_write: inode = NULL\n");
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("sysv_file_write: mode = %07o\n",inode->i_mode);
+ return -EINVAL;
+ }
+/*
+ * ok, append may not work when many processes are writing at the same time
+ * but so what. That way leads to madness anyway.
+ * But we need to protect against simultaneous truncate as we may end up
+ * writing our data into blocks that have meanwhile been incorporated into
+ * the freelist, thereby trashing the freelist.
+ */
+ if (filp->f_flags & O_APPEND)
+ pos = inode->i_size;
+ else
+ pos = filp->f_pos;
+ written = 0;
+ while (written<count) {
+ bh = sysv_getblk (inode, pos >> sb->sv_block_size_bits, 1);
+ if (!bh) {
+ if (!written)
+ written = -ENOSPC;
+ break;
+ }
+ c = sb->sv_block_size - (pos & sb->sv_block_size_1);
+ if (c > count-written)
+ c = count-written;
+ if (c != sb->sv_block_size && !bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ if (!written)
+ written = -EIO;
+ break;
+ }
+ }
+ /* now either c==sb->sv_block_size or bh->b_uptodate */
+ p = (pos & sb->sv_block_size_1) + bh->b_data;
+ pos += c;
+ if (pos > inode->i_size) {
+ inode->i_size = pos;
+ inode->i_dirt = 1;
+ }
+ written += c;
+ memcpy_fromfs(p,buf,c);
+ buf += c;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ brelse(bh);
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ filp->f_pos = pos;
+ inode->i_dirt = 1;
+ return written;
+}
diff --git a/fs/sysv/fsync.c b/fs/sysv/fsync.c
new file mode 100644
index 000000000..9e105077d
--- /dev/null
+++ b/fs/sysv/fsync.c
@@ -0,0 +1,197 @@
+/*
+ * linux/fs/sysv/fsync.c
+ *
+ * minix/fsync.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ *
+ * coh/fsync.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/fsync.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * SystemV/Coherent fsync primitive
+ */
+
+#include <linux/errno.h>
+#include <linux/stat.h>
+
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+
+
+/* return values: 0 means OK/done, 1 means redo, -1 means I/O error. */
+
+/* Sync one block. The block number is
+ * from_coh_ulong(*blockp) if convert=1, *blockp if convert=0.
+ */
+static int sync_block (struct inode * inode, unsigned long * blockp, int convert, int wait)
+{
+ struct buffer_head * bh;
+ unsigned long tmp, block;
+ struct super_block * sb;
+
+ block = tmp = *blockp;
+ if (convert)
+ block = from_coh_ulong(block);
+ if (!block)
+ return 0;
+ sb = inode->i_sb;
+ bh = sv_get_hash_table(sb, inode->i_dev, block);
+ if (!bh)
+ return 0;
+ if (*blockp != tmp) {
+ brelse (bh);
+ return 1;
+ }
+ if (wait && bh->b_req && !bh->b_uptodate) {
+ brelse(bh);
+ return -1;
+ }
+ if (wait || !bh->b_uptodate || !bh->b_dirt) {
+ brelse(bh);
+ return 0;
+ }
+ ll_rw_block(WRITE, 1, &bh);
+ bh->b_count--;
+ return 0;
+}
+
+/* Sync one block full of indirect pointers and read it because we'll need it. */
+static int sync_iblock (struct inode * inode, unsigned long * iblockp, int convert,
+ struct buffer_head * *bh, int wait)
+{
+ int rc;
+ unsigned long tmp, block;
+
+ *bh = NULL;
+ block = tmp = *iblockp;
+ if (convert)
+ block = from_coh_ulong(block);
+ if (!block)
+ return 0;
+ rc = sync_block (inode, iblockp, convert, wait);
+ if (rc)
+ return rc;
+ *bh = sv_bread(inode->i_sb, inode->i_dev, block);
+ if (tmp != *iblockp) {
+ brelse(*bh);
+ *bh = NULL;
+ return 1;
+ }
+ if (!*bh)
+ return -1;
+ return 0;
+}
+
+
+static int sync_direct(struct inode *inode, int wait)
+{
+ int i;
+ int rc, err = 0;
+
+ for (i = 0; i < 10; i++) {
+ rc = sync_block (inode, inode->u.sysv_i.i_data + i, 0, wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ return err;
+}
+
+static int sync_indirect(struct inode *inode, unsigned long *iblockp, int convert, int wait)
+{
+ int i;
+ struct buffer_head * ind_bh;
+ int rc, err = 0;
+ struct super_block * sb;
+
+ rc = sync_iblock (inode, iblockp, convert, &ind_bh, wait);
+ if (rc || !ind_bh)
+ return rc;
+
+ sb = inode->i_sb;
+ for (i = 0; i < sb->sv_ind_per_block; i++) {
+ rc = sync_block (inode,
+ ((unsigned long *) ind_bh->b_data) + i, sb->sv_convert,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(ind_bh);
+ return err;
+}
+
+static int sync_dindirect(struct inode *inode, unsigned long *diblockp, int convert,
+ int wait)
+{
+ int i;
+ struct buffer_head * dind_bh;
+ int rc, err = 0;
+ struct super_block * sb;
+
+ rc = sync_iblock (inode, diblockp, convert, &dind_bh, wait);
+ if (rc || !dind_bh)
+ return rc;
+
+ sb = inode->i_sb;
+ for (i = 0; i < sb->sv_ind_per_block; i++) {
+ rc = sync_indirect (inode,
+ ((unsigned long *) dind_bh->b_data) + i, sb->sv_convert,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(dind_bh);
+ return err;
+}
+
+static int sync_tindirect(struct inode *inode, unsigned long *tiblockp, int convert,
+ int wait)
+{
+ int i;
+ struct buffer_head * tind_bh;
+ int rc, err = 0;
+ struct super_block * sb;
+
+ rc = sync_iblock (inode, tiblockp, convert, &tind_bh, wait);
+ if (rc || !tind_bh)
+ return rc;
+
+ sb = inode->i_sb;
+ for (i = 0; i < sb->sv_ind_per_block; i++) {
+ rc = sync_dindirect (inode,
+ ((unsigned long *) tind_bh->b_data) + i, sb->sv_convert,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(tind_bh);
+ return err;
+}
+
+int sysv_sync_file(struct inode * inode, struct file * file)
+{
+ int wait, err = 0;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+
+ for (wait=0; wait<=1; wait++) {
+ err |= sync_direct(inode, wait);
+ err |= sync_indirect(inode, inode->u.sysv_i.i_data+10, 0, wait);
+ err |= sync_dindirect(inode, inode->u.sysv_i.i_data+11, 0, wait);
+ err |= sync_tindirect(inode, inode->u.sysv_i.i_data+12, 0, wait);
+ }
+ err |= sysv_sync_inode (inode);
+ return (err < 0) ? -EIO : 0;
+}
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
new file mode 100644
index 000000000..f87009100
--- /dev/null
+++ b/fs/sysv/ialloc.c
@@ -0,0 +1,218 @@
+/*
+ * linux/fs/sysv/ialloc.c
+ *
+ * minix/bitmap.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext/freelists.c
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ *
+ * xenix/alloc.c
+ * Copyright (C) 1992 Doug Evans
+ *
+ * coh/alloc.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/ialloc.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * This file contains code for allocating/freeing inodes.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/stddef.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+/* We don't trust the value of
+ sb->sv_sbd2->s_tinode = *sb->sv_sb_total_free_inodes
+ but we nevertheless keep it up to date. */
+
+/* An inode on disk is considered free if both i_mode == 0 and i_nlink == 0. */
+
+/* return &sb->sv_sb_fic_inodes[i] = &sbd->s_inode[i]; */
+static inline sysv_ino_t * sv_sb_fic_inode (struct super_block * sb, unsigned int i)
+{
+ if (sb->sv_bh1 == sb->sv_bh2)
+ return &sb->sv_sb_fic_inodes[i];
+ else {
+ /* 512 byte Xenix FS */
+ unsigned int offset = offsetof(struct xenix_super_block, s_inode[i]);
+ if (offset < 512)
+ return (sysv_ino_t*)(sb->sv_sbd1 + offset);
+ else
+ return (sysv_ino_t*)(sb->sv_sbd2 + offset);
+ }
+}
+
+void sysv_free_inode(struct inode * inode)
+{
+ struct super_block * sb;
+ unsigned int ino;
+ struct buffer_head * bh;
+ struct sysv_inode * raw_inode;
+
+ if (!inode)
+ return;
+ if (!inode->i_dev) {
+ printk("sysv_free_inode: inode has no device\n");
+ return;
+ }
+ if (inode->i_count != 1) {
+ printk("sysv_free_inode: inode has count=%d\n", inode->i_count);
+ return;
+ }
+ if (inode->i_nlink) {
+ printk("sysv_free_inode: inode has nlink=%d\n", inode->i_nlink);
+ return;
+ }
+ if (!(sb = inode->i_sb)) {
+ printk("sysv_free_inode: inode on nonexistent device\n");
+ return;
+ }
+ ino = inode->i_ino;
+ if (ino <= SYSV_ROOT_INO || ino > sb->sv_ninodes) {
+ printk("sysv_free_inode: inode 0,1,2 or nonexistent inode\n");
+ return;
+ }
+ if (!(bh = sv_bread(sb, inode->i_dev, sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits)))) {
+ printk("sysv_free_inode: unable to read inode block on device %d/%d\n",MAJOR(inode->i_dev),MINOR(inode->i_dev));
+ clear_inode(inode);
+ return;
+ }
+ raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1);
+ lock_super(sb);
+ if (*sb->sv_sb_fic_count < sb->sv_fic_size)
+ *sv_sb_fic_inode(sb,(*sb->sv_sb_fic_count)++) = ino;
+ (*sb->sv_sb_total_free_inodes)++;
+ mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */
+ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1);
+ sb->s_dirt = 1; /* and needs time stamp */
+ memset(raw_inode, 0, sizeof(struct sysv_inode));
+ mark_buffer_dirty(bh, 1);
+ unlock_super(sb);
+ brelse(bh);
+ clear_inode(inode);
+}
+
+struct inode * sysv_new_inode(const struct inode * dir)
+{
+ struct inode * inode;
+ struct super_block * sb;
+ struct buffer_head * bh;
+ struct sysv_inode * raw_inode;
+ int i,j,ino,block;
+
+ if (!dir || !(inode = get_empty_inode()))
+ return NULL;
+ sb = dir->i_sb;
+ inode->i_sb = sb;
+ inode->i_flags = inode->i_sb->s_flags;
+ lock_super(sb); /* protect against task switches */
+ if ((*sb->sv_sb_fic_count == 0)
+ || (*sv_sb_fic_inode(sb,(*sb->sv_sb_fic_count)-1) == 0) /* Applies only to SystemV2 FS */
+ ) {
+ /* Rebuild cache of free inodes: */
+ /* i : index into cache slot being filled */
+ /* ino : inode we are trying */
+ /* block : firstinodezone + (ino-1)/inodes_per_block */
+ /* j : (ino-1)%inodes_per_block */
+ /* bh : buffer for block */
+ /* raw_inode : pointer to inode ino in the block */
+ for (i = 0, ino = SYSV_ROOT_INO+1, block = sb->sv_firstinodezone, j = SYSV_ROOT_INO ; i < sb->sv_fic_size && block < sb->sv_firstdatazone ; block++, j = 0) {
+ if (!(bh = sv_bread(sb, sb->s_dev, block))) {
+ printk("sysv_new_inode: unable to read inode table\n");
+ break; /* go with what we've got */
+ /* FIXME: Perhaps try the next block? */
+ }
+ raw_inode = (struct sysv_inode *) bh->b_data + j;
+ for (; j < sb->sv_inodes_per_block && i < sb->sv_fic_size; ino++, j++, raw_inode++) {
+ if (raw_inode->i_mode == 0 && raw_inode->i_nlink == 0)
+ *sv_sb_fic_inode(sb,i++) = ino;
+ }
+ brelse(bh);
+ }
+ if (i == 0) {
+ iput(inode);
+ unlock_super(sb);
+ return NULL; /* no inodes available */
+ }
+ *sb->sv_sb_fic_count = i;
+ }
+ /* Now *sb->sv_sb_fic_count > 0. */
+ ino = *sv_sb_fic_inode(sb,--(*sb->sv_sb_fic_count));
+ mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */
+ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1);
+ sb->s_dirt = 1; /* and needs time stamp */
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_dev = sb->s_dev;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_dirt = 1;
+ inode->i_ino = ino;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
+ insert_inode_hash(inode);
+ /* Change directory entry: */
+ inode->i_mode = 0; /* for sysv_write_inode() */
+ inode->i_size = 0; /* ditto */
+ sysv_write_inode(inode); /* ensure inode not allocated again */
+ /* FIXME: caller may call this too. */
+ inode->i_dirt = 1; /* cleared by sysv_write_inode() */
+ /* That's it. */
+ (*sb->sv_sb_total_free_inodes)--;
+ mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified again */
+ sb->s_dirt = 1; /* and needs time stamp again */
+ unlock_super(sb);
+ return inode;
+}
+
+unsigned long sysv_count_free_inodes(struct super_block * sb)
+{
+#if 1 /* test */
+ struct buffer_head * bh;
+ struct sysv_inode * raw_inode;
+ int j,block,count;
+
+ /* this causes a lot of disk traffic ... */
+ count = 0;
+ lock_super(sb);
+ /* i : index into cache slot being filled */
+ /* ino : inode we are trying */
+ /* block : firstinodezone + (ino-1)/inodes_per_block */
+ /* j : (ino-1)%inodes_per_block */
+ /* bh : buffer for block */
+ /* raw_inode : pointer to inode ino in the block */
+ for (block = sb->sv_firstinodezone, j = SYSV_ROOT_INO ; block < sb->sv_firstdatazone ; block++, j = 0) {
+ if (!(bh = sv_bread(sb, sb->s_dev, block))) {
+ printk("sysv_count_free_inodes: unable to read inode table\n");
+ break; /* go with what we've got */
+ /* FIXME: Perhaps try the next block? */
+ }
+ raw_inode = (struct sysv_inode *) bh->b_data + j;
+ for (; j < sb->sv_inodes_per_block ; j++, raw_inode++)
+ if (raw_inode->i_mode == 0 && raw_inode->i_nlink == 0)
+ count++;
+ brelse(bh);
+ }
+ if (count != *sb->sv_sb_total_free_inodes) {
+ printk("sysv_count_free_inodes: free inode count was %d, correcting to %d\n",(short)(*sb->sv_sb_total_free_inodes),count);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ *sb->sv_sb_total_free_inodes = count;
+ mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified */
+ sb->s_dirt = 1; /* and needs time stamp */
+ }
+ }
+ unlock_super(sb);
+ return count;
+#else
+ return *sb->sv_sb_total_free_inodes;
+#endif
+}
+
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
new file mode 100644
index 000000000..0b61ae7c7
--- /dev/null
+++ b/fs/sysv/inode.c
@@ -0,0 +1,951 @@
+/*
+ * linux/fs/sysv/inode.c
+ *
+ * minix/inode.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * xenix/inode.c
+ * Copyright (C) 1992 Doug Evans
+ *
+ * coh/inode.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/inode.c
+ * Copyright (C) 1993 Paul B. Monday
+ *
+ * sysv/inode.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * This file contains code for allocating/freeing inodes and for read/writing
+ * the superblock.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+
+#include <asm/segment.h>
+
+void sysv_put_inode(struct inode *inode)
+{
+ if (inode->i_nlink)
+ return;
+ inode->i_size = 0;
+ sysv_truncate(inode);
+ sysv_free_inode(inode);
+}
+
+
+static struct super_operations sysv_sops = {
+ sysv_read_inode,
+ sysv_notify_change,
+ sysv_write_inode,
+ sysv_put_inode,
+ sysv_put_super,
+ sysv_write_super,
+ sysv_statfs,
+ NULL
+};
+
+/* The following functions try to recognize specific filesystems.
+ * We recognize:
+ * - Xenix FS by its magic number.
+ * - SystemV FS by its magic number.
+ * - Coherent FS by its funny fname/fpack field.
+ * We discriminate among SystemV4 and SystemV2 FS by the assumption that
+ * the time stamp is not < 01-01-1980.
+ */
+
+static void detected_bs512 (struct super_block *sb)
+{
+ sb->sv_block_size = 512;
+ sb->sv_block_size_1 = 512-1;
+ sb->sv_block_size_bits = 9;
+ sb->sv_block_size_ratio = 2;
+ sb->sv_block_size_ratio_bits = 1;
+ sb->sv_inodes_per_block = 512/64;
+ sb->sv_inodes_per_block_1 = 512/64-1;
+ sb->sv_inodes_per_block_bits = 9-6;
+ sb->sv_toobig_block = 10 +
+ (sb->sv_ind_per_block = 512/4) +
+ (sb->sv_ind_per_block_2 = (512/4)*(512/4)) +
+ (sb->sv_ind_per_block_3 = (512/4)*(512/4)*(512/4));
+ sb->sv_ind_per_block_1 = 512/4-1;
+ sb->sv_ind_per_block_2_1 = (512/4)*(512/4)-1;
+ sb->sv_ind_per_block_2_bits = 2 *
+ (sb->sv_ind_per_block_bits = 9-2);
+ sb->sv_ind_per_block_block_size_1 = (512/4)*512-1;
+ sb->sv_ind_per_block_block_size_bits = (9-2)+9;
+ sb->sv_ind_per_block_2_block_size_1 = (512/4)*(512/4)*512-1;
+ sb->sv_ind_per_block_2_block_size_bits = (9-2)+(9-2)+9;
+ sb->sv_ind0_size = 10 * 512;
+ sb->sv_ind1_size = (10 + (512/4))* 512;
+ sb->sv_ind2_size = (10 + (512/4) + (512/4)*(512/4)) * 512;
+}
+
+static void detected_bs1024 (struct super_block *sb)
+{
+ sb->sv_block_size = 1024;
+ sb->sv_block_size_1 = 1024-1;
+ sb->sv_block_size_bits = 10;
+ sb->sv_block_size_ratio = 1;
+ sb->sv_block_size_ratio_bits = 0;
+ sb->sv_inodes_per_block = 1024/64;
+ sb->sv_inodes_per_block_1 = 1024/64-1;
+ sb->sv_inodes_per_block_bits = 10-6;
+ sb->sv_toobig_block = 10 +
+ (sb->sv_ind_per_block = 1024/4) +
+ (sb->sv_ind_per_block_2 = (1024/4)*(1024/4)) +
+ (sb->sv_ind_per_block_3 = (1024/4)*(1024/4)*(1024/4));
+ sb->sv_ind_per_block_1 = 1024/4-1;
+ sb->sv_ind_per_block_2_1 = (1024/4)*(1024/4)-1;
+ sb->sv_ind_per_block_2_bits = 2 *
+ (sb->sv_ind_per_block_bits = 10-2);
+ sb->sv_ind_per_block_block_size_1 = (1024/4)*1024-1;
+ sb->sv_ind_per_block_block_size_bits = (10-2)+10;
+ sb->sv_ind_per_block_2_block_size_1 = (1024/4)*(1024/4)*1024-1;
+ sb->sv_ind_per_block_2_block_size_bits = (10-2)+(10-2)+10;
+ sb->sv_ind0_size = 10 * 1024;
+ sb->sv_ind1_size = (10 + (1024/4))* 1024;
+ sb->sv_ind2_size = (10 + (1024/4) + (1024/4)*(1024/4)) * 1024;
+}
+
+static const char* detect_xenix (struct super_block *sb, struct buffer_head *bh)
+{
+ struct xenix_super_block * sbd;
+
+ sbd = (struct xenix_super_block *) bh->b_data;
+ if (sbd->s_magic != 0x2b5544)
+ return NULL;
+ switch (sbd->s_type) {
+ case 1: detected_bs512(sb); break;
+ case 2: detected_bs1024(sb); break;
+ default: return NULL;
+ }
+ sb->sv_type = FSTYPE_XENIX;
+ return "Xenix";
+}
+static struct super_block * detected_xenix (struct super_block *sb, struct buffer_head *bh1, struct buffer_head *bh2)
+{
+ struct xenix_super_block * sbd1;
+ struct xenix_super_block * sbd2;
+
+ if (sb->sv_block_size == BLOCK_SIZE)
+ /* block size = 1024, so bh1 = bh2 */
+ sbd1 = sbd2 = (struct xenix_super_block *) bh1->b_data;
+ else {
+ /* block size = 512, so bh1 != bh2 */
+ sbd1 = (struct xenix_super_block *) bh1->b_data;
+ sbd2 = (struct xenix_super_block *) (bh2->b_data - BLOCK_SIZE/2);
+ /* sanity check */
+ if (sbd2->s_magic != 0x2b5544)
+ return NULL;
+ }
+
+ sb->sv_convert = 0;
+ sb->sv_kludge_symlinks = 1;
+ sb->sv_truncate = 1;
+ sb->sv_link_max = XENIX_LINK_MAX;
+ sb->sv_fic_size = XENIX_NICINOD;
+ sb->sv_flc_size = XENIX_NICFREE;
+ sb->sv_bh1 = bh1;
+ sb->sv_bh2 = bh2;
+ sb->sv_sbd1 = (char *) sbd1;
+ sb->sv_sbd2 = (char *) sbd2;
+ sb->sv_sb_fic_count = &sbd1->s_ninode;
+ sb->sv_sb_fic_inodes = &sbd1->s_inode[0];
+ sb->sv_sb_total_free_inodes = &sbd2->s_tinode;
+ sb->sv_sb_flc_count = &sbd1->s_nfree;
+ sb->sv_sb_flc_blocks = &sbd1->s_free[0];
+ sb->sv_sb_total_free_blocks = &sbd2->s_tfree;
+ sb->sv_sb_time = &sbd2->s_time;
+ sb->sv_block_base = 0;
+ sb->sv_firstinodezone = 2;
+ sb->sv_firstdatazone = sbd1->s_isize;
+ sb->sv_nzones = sbd1->s_fsize;
+ sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone;
+ return sb;
+}
+
+static const char* detect_sysv4 (struct super_block *sb, struct buffer_head *bh)
+{
+ struct sysv4_super_block * sbd;
+
+ sbd = (struct sysv4_super_block *) (bh->b_data + BLOCK_SIZE/2);
+ if (sbd->s_magic != 0xfd187e20)
+ return NULL;
+ if (sbd->s_time < 315532800) /* this is likely to happen on SystemV2 FS */
+ return NULL;
+ switch (sbd->s_type) {
+ case 1: detected_bs512(sb); break;
+ case 2: detected_bs1024(sb); break;
+ default: return NULL;
+ }
+ sb->sv_type = FSTYPE_SYSV4;
+ return "SystemV";
+}
+static struct super_block * detected_sysv4 (struct super_block *sb, struct buffer_head *bh)
+{
+ struct sysv4_super_block * sbd;
+
+ if (sb->sv_block_size == BLOCK_SIZE)
+ sbd = (struct sysv4_super_block *) (bh->b_data + BLOCK_SIZE/2);
+ else {
+ sbd = (struct sysv4_super_block *) bh->b_data;
+ /* sanity check */
+ if (sbd->s_magic != 0xfd187e20)
+ return NULL;
+ if (sbd->s_time < 315532800)
+ return NULL;
+ }
+
+ sb->sv_convert = 0;
+ sb->sv_kludge_symlinks = 0; /* ?? */
+ sb->sv_truncate = 1;
+ sb->sv_link_max = SYSV_LINK_MAX;
+ sb->sv_fic_size = SYSV_NICINOD;
+ sb->sv_flc_size = SYSV_NICFREE;
+ sb->sv_bh1 = bh;
+ sb->sv_bh2 = bh;
+ sb->sv_sbd1 = (char *) sbd;
+ sb->sv_sbd2 = (char *) sbd;
+ sb->sv_sb_fic_count = &sbd->s_ninode;
+ sb->sv_sb_fic_inodes = &sbd->s_inode[0];
+ sb->sv_sb_total_free_inodes = &sbd->s_tinode;
+ sb->sv_sb_flc_count = &sbd->s_nfree;
+ sb->sv_sb_flc_blocks = &sbd->s_free[0];
+ sb->sv_sb_total_free_blocks = &sbd->s_tfree;
+ sb->sv_sb_time = &sbd->s_time;
+ sb->sv_block_base = 0;
+ sb->sv_firstinodezone = 2;
+ sb->sv_firstdatazone = sbd->s_isize;
+ sb->sv_nzones = sbd->s_fsize;
+ sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone;
+ return sb;
+}
+
+static const char* detect_sysv2 (struct super_block *sb, struct buffer_head *bh)
+{
+ struct sysv2_super_block * sbd;
+
+ sbd = (struct sysv2_super_block *) (bh->b_data + BLOCK_SIZE/2);
+ if (sbd->s_magic != 0xfd187e20)
+ return NULL;
+ if (sbd->s_time < 315532800) /* this is likely to happen on SystemV4 FS */
+ return NULL;
+ switch (sbd->s_type) {
+ case 1: detected_bs512(sb); break;
+ case 2: detected_bs1024(sb); break;
+ default: return NULL;
+ }
+ sb->sv_type = FSTYPE_SYSV2;
+ return "SystemV Release 2";
+}
+static struct super_block * detected_sysv2 (struct super_block *sb, struct buffer_head *bh)
+{
+ struct sysv2_super_block * sbd;
+
+ if (sb->sv_block_size == BLOCK_SIZE)
+ sbd = (struct sysv2_super_block *) (bh->b_data + BLOCK_SIZE/2);
+ else {
+ sbd = (struct sysv2_super_block *) bh->b_data;
+ /* sanity check */
+ if (sbd->s_magic != 0xfd187e20)
+ return NULL;
+ if (sbd->s_time < 315532800)
+ return NULL;
+ }
+
+ sb->sv_convert = 0;
+ sb->sv_kludge_symlinks = 0; /* ?? */
+ sb->sv_truncate = 1;
+ sb->sv_link_max = SYSV_LINK_MAX;
+ sb->sv_fic_size = SYSV_NICINOD;
+ sb->sv_flc_size = SYSV_NICFREE;
+ sb->sv_bh1 = bh;
+ sb->sv_bh2 = bh;
+ sb->sv_sbd1 = (char *) sbd;
+ sb->sv_sbd2 = (char *) sbd;
+ sb->sv_sb_fic_count = &sbd->s_ninode;
+ sb->sv_sb_fic_inodes = &sbd->s_inode[0];
+ sb->sv_sb_total_free_inodes = &sbd->s_tinode;
+ sb->sv_sb_flc_count = &sbd->s_nfree;
+ sb->sv_sb_flc_blocks = &sbd->s_free[0];
+ sb->sv_sb_total_free_blocks = &sbd->s_tfree;
+ sb->sv_sb_time = &sbd->s_time;
+ sb->sv_block_base = 0;
+ sb->sv_firstinodezone = 2;
+ sb->sv_firstdatazone = sbd->s_isize;
+ sb->sv_nzones = sbd->s_fsize;
+ sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone;
+ return sb;
+}
+
+static const char* detect_coherent (struct super_block *sb, struct buffer_head *bh)
+{
+ struct coh_super_block * sbd;
+
+ sbd = (struct coh_super_block *) (bh->b_data + BLOCK_SIZE/2);
+ if ((memcmp(sbd->s_fname,"noname",6) && memcmp(sbd->s_fname,"xxxxx ",6))
+ || (memcmp(sbd->s_fpack,"nopack",6) && memcmp(sbd->s_fpack,"xxxxx\n",6)))
+ return NULL;
+ detected_bs512(sb);
+ sb->sv_type = FSTYPE_COH;
+ return "Coherent";
+}
+static struct super_block * detected_coherent (struct super_block *sb, struct buffer_head *bh)
+{
+ struct coh_super_block * sbd;
+
+ sbd = (struct coh_super_block *) bh->b_data;
+ /* sanity check */
+ if ((memcmp(sbd->s_fname,"noname",6) && memcmp(sbd->s_fname,"xxxxx ",6))
+ || (memcmp(sbd->s_fpack,"nopack",6) && memcmp(sbd->s_fpack,"xxxxx\n",6)))
+ return NULL;
+
+ sb->sv_convert = 1;
+ sb->sv_kludge_symlinks = 1;
+ sb->sv_truncate = 1;
+ sb->sv_link_max = COH_LINK_MAX;
+ sb->sv_fic_size = COH_NICINOD;
+ sb->sv_flc_size = COH_NICFREE;
+ sb->sv_bh1 = bh;
+ sb->sv_bh2 = bh;
+ sb->sv_sbd1 = (char *) sbd;
+ sb->sv_sbd2 = (char *) sbd;
+ sb->sv_sb_fic_count = &sbd->s_ninode;
+ sb->sv_sb_fic_inodes = &sbd->s_inode[0];
+ sb->sv_sb_total_free_inodes = &sbd->s_tinode;
+ sb->sv_sb_flc_count = &sbd->s_nfree;
+ sb->sv_sb_flc_blocks = &sbd->s_free[0];
+ sb->sv_sb_total_free_blocks = &sbd->s_tfree;
+ sb->sv_sb_time = &sbd->s_time;
+ sb->sv_block_base = 0;
+ sb->sv_firstinodezone = 2;
+ sb->sv_firstdatazone = sbd->s_isize;
+ sb->sv_nzones = from_coh_ulong(sbd->s_fsize);
+ sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone;
+ return sb;
+}
+
+struct super_block *sysv_read_super(struct super_block *sb,void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ const char *found;
+ int dev = sb->s_dev;
+
+ if (1024 != sizeof (struct xenix_super_block))
+ panic("Xenix FS: bad super-block size");
+ if ((512 != sizeof (struct sysv4_super_block))
+ || (512 != sizeof (struct sysv2_super_block)))
+ panic("SystemV FS: bad super-block size");
+ if (500 != sizeof (struct coh_super_block))
+ panic("Coherent FS: bad super-block size");
+ if (64 != sizeof (struct sysv_inode))
+ panic("sysv fs: bad i-node size");
+ lock_super(sb);
+ set_blocksize(dev,BLOCK_SIZE);
+
+ /* Try to read Xenix superblock */
+ if ((bh = bread(dev, 1, BLOCK_SIZE)) != NULL) {
+ if ((found = detect_xenix(sb,bh)) != NULL)
+ goto ok;
+ brelse(bh);
+ }
+ if ((bh = bread(dev, 0, BLOCK_SIZE)) != NULL) {
+ /* Try to recognize SystemV superblock */
+ if ((found = detect_sysv4(sb,bh)) != NULL)
+ goto ok;
+ if ((found = detect_sysv2(sb,bh)) != NULL)
+ goto ok;
+ /* Try to recognize Coherent superblock */
+ if ((found = detect_coherent(sb,bh)) != NULL)
+ goto ok;
+ brelse(bh);
+ }
+ /* Try to recognize SystemV superblock */
+ /* Offset by 1 track, i.e. most probably 9, 15, or 18 kilobytes. */
+ { static int offsets[] = { 9, 15, 18, };
+ int i;
+ for (i = 0; i < sizeof(offsets)/sizeof(offsets[0]); i++)
+ if ((bh = bread(dev, offsets[i], BLOCK_SIZE)) != NULL) {
+ /* Try to recognize SystemV superblock */
+ if ((found = detect_sysv4(sb,bh)) != NULL) {
+ sb->sv_block_base = offsets[i] << sb->sv_block_size_ratio_bits;
+ goto ok;
+ }
+ if ((found = detect_sysv2(sb,bh)) != NULL) {
+ sb->sv_block_base = offsets[i] << sb->sv_block_size_ratio_bits;
+ goto ok;
+ }
+ brelse(bh);
+ }
+ }
+ sb->s_dev=0;
+ unlock_super(sb);
+ if (!silent)
+ printk("VFS: unable to read Xenix/SystemV/Coherent superblock on device %d/%d\n",MAJOR(dev),MINOR(dev));
+ return NULL;
+
+ ok:
+ if (sb->sv_block_size == BLOCK_SIZE) {
+ switch (sb->sv_type) {
+ case FSTYPE_XENIX:
+ if (!detected_xenix(sb,bh,bh))
+ goto bad_superblock;
+ break;
+ case FSTYPE_SYSV4:
+ if (!detected_sysv4(sb,bh))
+ goto bad_superblock;
+ break;
+ case FSTYPE_SYSV2:
+ if (!detected_sysv2(sb,bh))
+ goto bad_superblock;
+ break;
+ default:
+ bad_superblock:
+ brelse(bh);
+ sb->s_dev = 0;
+ unlock_super(sb);
+ printk("SysV FS: cannot read superblock in 1024 byte mode\n");
+ return NULL;
+ }
+ } else {
+ /* Switch to another block size. Unfortunately, we have to
+ release the 1 KB block bh and read it in two parts again. */
+ struct buffer_head *bh1, *bh2;
+ unsigned long blocknr = bh->b_blocknr << sb->sv_block_size_ratio_bits;
+
+ brelse(bh);
+ set_blocksize(dev,sb->sv_block_size);
+ bh1 = NULL; bh2 = NULL;
+ switch (sb->sv_type) {
+ case FSTYPE_XENIX:
+ if ((bh1 = bread(dev, blocknr, sb->sv_block_size)) == NULL)
+ goto bad_superblock2;
+ if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL)
+ goto bad_superblock2;
+ if (!detected_xenix(sb,bh1,bh2))
+ goto bad_superblock2;
+ break;
+ case FSTYPE_SYSV4:
+ if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL)
+ goto bad_superblock2;
+ if (!detected_sysv4(sb,bh2))
+ goto bad_superblock2;
+ break;
+ case FSTYPE_SYSV2:
+ if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL)
+ goto bad_superblock2;
+ if (!detected_sysv2(sb,bh2))
+ goto bad_superblock2;
+ break;
+ case FSTYPE_COH:
+ if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL)
+ goto bad_superblock2;
+ if (!detected_coherent(sb,bh2))
+ goto bad_superblock2;
+ break;
+ default:
+ bad_superblock2:
+ brelse(bh1);
+ brelse(bh2);
+ set_blocksize(sb->s_dev,BLOCK_SIZE);
+ sb->s_dev = 0;
+ unlock_super(sb);
+ printk("SysV FS: cannot read superblock in 512 byte mode\n");
+ return NULL;
+ }
+ }
+ sb->sv_ninodes = (sb->sv_firstdatazone - sb->sv_firstinodezone) << sb->sv_inodes_per_block_bits;
+ if (!silent)
+ printk("VFS: Found a %s FS (block size = %d) on device %d/%d\n",found,sb->sv_block_size,MAJOR(dev),MINOR(dev));
+ sb->s_magic = SYSV_MAGIC_BASE + sb->sv_type;
+ /* The buffer code now supports block size 512 as well as 1024. */
+ sb->s_blocksize = sb->sv_block_size;
+ sb->s_blocksize_bits = sb->sv_block_size_bits;
+ /* set up enough so that it can read an inode */
+ sb->s_dev = dev;
+ sb->s_op = &sysv_sops;
+ sb->s_mounted = iget(sb,SYSV_ROOT_INO);
+ unlock_super(sb);
+ if (!sb->s_mounted) {
+ sysv_put_super(sb);
+ printk("SysV FS: get root inode failed\n");
+ return NULL;
+ }
+ sb->s_dirt = 1;
+ /* brelse(bh); resp. brelse(bh1); brelse(bh2);
+ occurs when the disk is unmounted. */
+ return sb;
+}
+
+/* This is only called on sync() and umount(), when s_dirt=1. */
+void sysv_write_super (struct super_block *sb)
+{
+ lock_super(sb);
+ if (sb->sv_bh1->b_dirt || sb->sv_bh2->b_dirt) {
+ /* If we are going to write out the super block,
+ then attach current time stamp. */
+ unsigned long time = CURRENT_TIME;
+ if (sb->sv_convert)
+ time = to_coh_ulong(time);
+ *sb->sv_sb_time = time;
+ mark_buffer_dirty(sb->sv_bh2, 1);
+ }
+ sb->s_dirt = 0;
+ unlock_super(sb);
+}
+
+void sysv_put_super(struct super_block *sb)
+{
+ /* we can assume sysv_write_super() has already been called */
+ lock_super(sb);
+ brelse(sb->sv_bh1);
+ if (sb->sv_bh1 != sb->sv_bh2) brelse(sb->sv_bh2);
+ /* switch back to default block size */
+ if (sb->s_blocksize != BLOCK_SIZE)
+ set_blocksize(sb->s_dev,BLOCK_SIZE);
+ sb->s_dev = 0;
+ unlock_super(sb);
+}
+
+void sysv_statfs(struct super_block *sb, struct statfs *buf)
+{
+ long tmp;
+
+ put_fs_long(sb->s_magic, &buf->f_type); /* type of filesystem */
+ put_fs_long(sb->sv_block_size, &buf->f_bsize); /* block size */
+ put_fs_long(sb->sv_ndatazones, &buf->f_blocks); /* total data blocks in file system */
+ tmp = sysv_count_free_blocks(sb);
+ put_fs_long(tmp, &buf->f_bfree); /* free blocks in fs */
+ put_fs_long(tmp, &buf->f_bavail); /* free blocks available to non-superuser */
+ put_fs_long(sb->sv_ninodes, &buf->f_files); /* total file nodes in file system */
+ put_fs_long(sysv_count_free_inodes(sb), &buf->f_ffree); /* free file nodes in fs */
+ put_fs_long(SYSV_NAMELEN, &buf->f_namelen);
+ /* Don't know what value to put in buf->f_fsid */ /* file system id */
+}
+
+
+/* bmap support for running executables and shared libraries. */
+
+static inline int inode_bmap(struct super_block * sb, struct inode * inode, int nr)
+{
+ int tmp = inode->u.sysv_i.i_data[nr];
+ if (!tmp)
+ return 0;
+ return tmp + sb->sv_block_base;
+}
+
+static int block_bmap(struct super_block * sb, struct buffer_head * bh, int nr, int convert)
+{
+ int tmp;
+
+ if (!bh)
+ return 0;
+ tmp = ((sysv_zone_t *) bh->b_data) [nr];
+ if (convert)
+ tmp = from_coh_ulong(tmp);
+ brelse(bh);
+ if (!tmp)
+ return 0;
+ return tmp + sb->sv_block_base;
+}
+
+int sysv_bmap(struct inode * inode,int block_nr)
+{
+ unsigned int block = block_nr;
+ struct super_block * sb = inode->i_sb;
+ int convert;
+ int i;
+ struct buffer_head * bh;
+
+ if (block < 10)
+ return inode_bmap(sb,inode,block);
+ block -= 10;
+ convert = sb->sv_convert;
+ if (block < sb->sv_ind_per_block) {
+ i = inode_bmap(sb,inode,10);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ return block_bmap(sb, bh, block, convert);
+ }
+ block -= sb->sv_ind_per_block;
+ if (block < sb->sv_ind_per_block_2) {
+ i = inode_bmap(sb,inode,11);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ i = block_bmap(sb, bh, block >> sb->sv_ind_per_block_bits, convert);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ return block_bmap(sb, bh, block & sb->sv_ind_per_block_1, convert);
+ }
+ block -= sb->sv_ind_per_block_2;
+ if (block < sb->sv_ind_per_block_3) {
+ i = inode_bmap(sb,inode,12);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ i = block_bmap(sb, bh, block >> sb->sv_ind_per_block_2_bits, convert);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ i = block_bmap(sb, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1,convert);
+ if (!i)
+ return 0;
+ bh = bread(inode->i_dev,i,sb->sv_block_size);
+ return block_bmap(sb, bh, block & sb->sv_ind_per_block_1, convert);
+ }
+ if ((int)block<0) {
+ printk("sysv_bmap: block<0");
+ return 0;
+ }
+ printk("sysv_bmap: block>big");
+ return 0;
+}
+
+/* End of bmap support. */
+
+
+/* Access selected blocks of regular files (or directories) */
+
+static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create)
+{
+ struct super_block *sb;
+ unsigned long tmp;
+ unsigned long *p;
+ struct buffer_head * result;
+
+ sb = inode->i_sb;
+ p = inode->u.sysv_i.i_data + nr;
+repeat:
+ tmp = *p;
+ if (tmp) {
+ result = sv_getblk(sb, inode->i_dev, tmp);
+ if (tmp == *p)
+ return result;
+ brelse(result);
+ goto repeat;
+ }
+ if (!create)
+ return NULL;
+ tmp = sysv_new_block(sb);
+ if (!tmp)
+ return NULL;
+ result = sv_getblk(sb, inode->i_dev, tmp);
+ if (*p) {
+ sysv_free_block(sb,tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *p = tmp;
+ inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return result;
+}
+
+static struct buffer_head * block_getblk(struct inode * inode,
+ struct buffer_head * bh, int nr, int create)
+{
+ struct super_block *sb;
+ unsigned long tmp, block;
+ sysv_zone_t *p;
+ struct buffer_head * result;
+
+ if (!bh)
+ return NULL;
+ if (!bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ return NULL;
+ }
+ }
+ sb = inode->i_sb;
+ p = nr + (sysv_zone_t *) bh->b_data;
+repeat:
+ block = tmp = *p;
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (tmp) {
+ result = sv_getblk(sb, bh->b_dev, block);
+ if (tmp == *p) {
+ brelse(bh);
+ return result;
+ }
+ brelse(result);
+ goto repeat;
+ }
+ if (!create) {
+ brelse(bh);
+ return NULL;
+ }
+ block = sysv_new_block(sb);
+ if (!block) {
+ brelse(bh);
+ return NULL;
+ }
+ result = sv_getblk(sb, bh->b_dev, block);
+ if (*p) {
+ sysv_free_block(sb,block);
+ brelse(result);
+ goto repeat;
+ }
+ *p = (sb->sv_convert ? to_coh_ulong(block) : block);
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return result;
+}
+
+struct buffer_head * sysv_getblk(struct inode * inode, unsigned int block, int create)
+{
+ struct super_block * sb = inode->i_sb;
+ struct buffer_head * bh;
+
+ if (block < 10)
+ return inode_getblk(inode,block,create);
+ block -= 10;
+ if (block < sb->sv_ind_per_block) {
+ bh = inode_getblk(inode,10,create);
+ return block_getblk(inode, bh, block, create);
+ }
+ block -= sb->sv_ind_per_block;
+ if (block < sb->sv_ind_per_block_2) {
+ bh = inode_getblk(inode,11,create);
+ bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create);
+ return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create);
+ }
+ block -= sb->sv_ind_per_block_2;
+ if (block < sb->sv_ind_per_block_3) {
+ bh = inode_getblk(inode,12,create);
+ bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create);
+ bh = block_getblk(inode, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1, create);
+ return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create);
+ }
+ if ((int)block<0) {
+ printk("sysv_getblk: block<0");
+ return NULL;
+ }
+ printk("sysv_getblk: block>big");
+ return NULL;
+}
+
+struct buffer_head * sysv_file_bread(struct inode * inode, int block, int create)
+{
+ struct buffer_head * bh;
+
+ bh = sysv_getblk(inode,block,create);
+ if (!bh || bh->b_uptodate)
+ return bh;
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+
+static inline unsigned long read3byte (char * p)
+{
+ return (unsigned long)(*(unsigned short *)p)
+ | (unsigned long)(*(unsigned char *)(p+2)) << 16;
+}
+
+static inline void write3byte (char * p, unsigned long val)
+{
+ *(unsigned short *)p = (unsigned short) val;
+ *(unsigned char *)(p+2) = val >> 16;
+}
+
+static inline unsigned long coh_read3byte (char * p)
+{
+ return (unsigned long)(*(unsigned char *)p) << 16
+ | (unsigned long)(*(unsigned short *)(p+1));
+}
+
+static inline void coh_write3byte (char * p, unsigned long val)
+{
+ *(unsigned char *)p = val >> 16;
+ *(unsigned short *)(p+1) = (unsigned short) val;
+}
+
+void sysv_read_inode(struct inode * inode)
+{
+ struct super_block * sb = inode->i_sb;
+ struct buffer_head * bh;
+ struct sysv_inode * raw_inode;
+ unsigned int block, ino;
+ umode_t mode;
+
+ ino = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+ if (!ino || ino > sb->sv_ninodes) {
+ printk("Bad inode number on dev 0x%04x: %d is out of range\n",
+ inode->i_dev, ino);
+ return;
+ }
+ block = sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits);
+ if (!(bh = sv_bread(sb,inode->i_dev,block))) {
+ printk("Major problem: unable to read inode from dev 0x%04x\n",
+ inode->i_dev);
+ return;
+ }
+ raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1);
+ mode = raw_inode->i_mode;
+ if (sb->sv_kludge_symlinks)
+ mode = from_coh_imode(mode);
+ /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */
+ inode->i_mode = mode;
+ inode->i_uid = raw_inode->i_uid;
+ inode->i_gid = raw_inode->i_gid;
+ inode->i_nlink = raw_inode->i_nlink;
+ if (sb->sv_convert) {
+ inode->i_size = from_coh_ulong(raw_inode->i_size);
+ inode->i_atime = from_coh_ulong(raw_inode->i_atime);
+ inode->i_mtime = from_coh_ulong(raw_inode->i_mtime);
+ inode->i_ctime = from_coh_ulong(raw_inode->i_ctime);
+ } else {
+ inode->i_size = raw_inode->i_size;
+ inode->i_atime = raw_inode->i_atime;
+ inode->i_mtime = raw_inode->i_mtime;
+ inode->i_ctime = raw_inode->i_ctime;
+ }
+ inode->i_blocks = inode->i_blksize = 0;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ inode->i_rdev = raw_inode->i_a.i_rdev;
+ else
+ if (sb->sv_convert)
+ for (block = 0; block < 10+1+1+1; block++)
+ inode->u.sysv_i.i_data[block] =
+ coh_read3byte(&raw_inode->i_a.i_addb[3*block]);
+ else
+ for (block = 0; block < 10+1+1+1; block++)
+ inode->u.sysv_i.i_data[block] =
+ read3byte(&raw_inode->i_a.i_addb[3*block]);
+ brelse(bh);
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &sysv_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &sysv_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &sysv_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+}
+
+/* To avoid inconsistencies between inodes in memory and inodes on disk. */
+extern int sysv_notify_change(struct inode *inode, struct iattr *attr)
+{
+ int error;
+
+ if ((error = inode_change_ok(inode, attr)) != 0)
+ return error;
+
+ if (attr->ia_valid & ATTR_MODE)
+ if (inode->i_sb->sv_kludge_symlinks)
+ if (attr->ia_mode == COH_KLUDGE_SYMLINK_MODE)
+ attr->ia_mode = COH_KLUDGE_NOT_SYMLINK;
+
+ inode_setattr(inode, attr);
+
+ return 0;
+}
+
+static struct buffer_head * sysv_update_inode(struct inode * inode)
+{
+ struct super_block * sb = inode->i_sb;
+ struct buffer_head * bh;
+ struct sysv_inode * raw_inode;
+ unsigned int ino, block;
+ umode_t mode;
+
+ ino = inode->i_ino;
+ if (!ino || ino > sb->sv_ninodes) {
+ printk("Bad inode number on dev 0x%04x: %d is out of range\n",
+ inode->i_dev, ino);
+ inode->i_dirt = 0;
+ return 0;
+ }
+ block = sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits);
+ if (!(bh = sv_bread(sb,inode->i_dev,block))) {
+ printk("unable to read i-node block\n");
+ inode->i_dirt = 0;
+ return 0;
+ }
+ raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1);
+ mode = inode->i_mode;
+ if (sb->sv_kludge_symlinks)
+ mode = to_coh_imode(mode);
+ raw_inode->i_mode = mode;
+ raw_inode->i_uid = inode->i_uid;
+ raw_inode->i_gid = inode->i_gid;
+ raw_inode->i_nlink = inode->i_nlink;
+ if (sb->sv_convert) {
+ raw_inode->i_size = to_coh_ulong(inode->i_size);
+ raw_inode->i_atime = to_coh_ulong(inode->i_atime);
+ raw_inode->i_mtime = to_coh_ulong(inode->i_mtime);
+ raw_inode->i_ctime = to_coh_ulong(inode->i_ctime);
+ } else {
+ raw_inode->i_size = inode->i_size;
+ raw_inode->i_atime = inode->i_atime;
+ raw_inode->i_mtime = inode->i_mtime;
+ raw_inode->i_ctime = inode->i_ctime;
+ }
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_a.i_rdev = inode->i_rdev; /* write 2 or 3 bytes ?? */
+ else
+ if (sb->sv_convert)
+ for (block = 0; block < 10+1+1+1; block++)
+ coh_write3byte(&raw_inode->i_a.i_addb[3*block],inode->u.sysv_i.i_data[block]);
+ else
+ for (block = 0; block < 10+1+1+1; block++)
+ write3byte(&raw_inode->i_a.i_addb[3*block],inode->u.sysv_i.i_data[block]);
+ inode->i_dirt=0;
+ mark_buffer_dirty(bh, 1);
+ return bh;
+}
+
+void sysv_write_inode(struct inode * inode)
+{
+ struct buffer_head *bh;
+ bh = sysv_update_inode(inode);
+ brelse(bh);
+}
+
+int sysv_sync_inode(struct inode * inode)
+{
+ int err = 0;
+ struct buffer_head *bh;
+
+ bh = sysv_update_inode(inode);
+ if (bh && bh->b_dirt) {
+ ll_rw_block(WRITE, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_req && !bh->b_uptodate)
+ {
+ printk ("IO error syncing sysv inode [%04x:%08lx]\n",
+ inode->i_dev, inode->i_ino);
+ err = -1;
+ }
+ }
+ else if (!bh)
+ err = -1;
+ brelse (bh);
+ return err;
+}
+
diff --git a/fs/sysv/mmap.c b/fs/sysv/mmap.c
new file mode 100644
index 000000000..3ec3867a9
--- /dev/null
+++ b/fs/sysv/mmap.c
@@ -0,0 +1,85 @@
+/*
+ * linux/fs/sysv/mmap.c
+ *
+ * mm/memory.c, mm/mmap.c
+ * Copyright (C) 1991, 1992, 1993 Linus Torvalds
+ *
+ * nfs/mmap.c
+ * Copyright (C) 1993 Jon Tombs
+ *
+ * fs/msdos/mmap.c
+ * Copyright (C) 1994 Jacques Gelinas
+ *
+ * fs/sysv/mmap.c
+ * Copyright (C) 1994 Bruno Haible
+ *
+ * SystemV/Coherent mmap handling
+ */
+
+#include <asm/segment.h>
+
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/malloc.h>
+
+/*
+ * Fill in the supplied page for mmap
+ */
+static unsigned long sysv_file_mmap_nopage (struct vm_area_struct * area,
+ unsigned long address, unsigned long page, int no_share)
+{
+ int remaining, count, old_fs;
+ struct file filp;
+
+ address &= PAGE_MASK;
+ /* prepare a file pointer */
+ filp.f_pos = address - area->vm_start + area->vm_offset;
+ filp.f_reada = 0;
+ remaining = area->vm_end - address;
+ if (remaining > PAGE_SIZE)
+ remaining = PAGE_SIZE;
+ /* read from the file. page is in kernel space, not user space. */
+ old_fs = get_fs(); set_fs(get_ds());
+ count = sysv_file_read (area->vm_inode, &filp, (char *)page, remaining);
+ set_fs(old_fs);
+ if (count < 0)
+ count = 0; /* do nothing on I/O error ?? */
+ else
+ remaining -= count;
+ if (remaining > 0)
+ memset((char *)page + count, 0, remaining);
+ return page;
+}
+
+static struct vm_operations_struct sysv_file_mmap = {
+ NULL, /* open */
+ NULL, /* close */
+ sysv_file_mmap_nopage, /* nopage */
+ NULL, /* wppage */
+ NULL, /* share */
+ NULL, /* unmap */
+};
+
+int sysv_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
+{
+ if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported right now */
+ return -EINVAL;
+ if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
+ return -EINVAL;
+ if (!inode->i_sb || !S_ISREG(inode->i_mode))
+ return -EACCES;
+ if (!IS_RDONLY(inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+
+ vma->vm_inode = inode;
+ inode->i_count++;
+ vma->vm_ops = &sysv_file_mmap;
+ return 0;
+}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
new file mode 100644
index 000000000..c9fd77158
--- /dev/null
+++ b/fs/sysv/namei.c
@@ -0,0 +1,822 @@
+/*
+ * linux/fs/sysv/namei.c
+ *
+ * minix/namei.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * coh/namei.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/namei.c
+ * Copyright (C) 1993 Bruno Haible
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+
+/* compare strings: name[0..len-1] (not zero-terminated) and
+ * buffer[0..] (filled with zeroes up to buffer[0..maxlen-1])
+ */
+static inline int namecompare(int len, int maxlen,
+ const char * name, const char * buffer)
+{
+ if (len > maxlen)
+ return 0;
+ if (len < maxlen && buffer[len])
+ return 0;
+ return !memcmp(name, buffer, len);
+}
+
+/*
+ * ok, we cannot use strncmp, as the name is not in our data space. [Now it is!]
+ * Thus we'll have to use sysv_match. No big problem. Match also makes
+ * some sanity tests.
+ *
+ * NOTE! unlike strncmp, sysv_match returns 1 for success, 0 for failure.
+ */
+static int sysv_match(int len, const char * name, struct sysv_dir_entry * de)
+{
+ if (!de->inode || len > SYSV_NAMELEN)
+ return 0;
+ /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
+ if (!len && (de->name[0]=='.') && (de->name[1]=='\0'))
+ return 1;
+ return namecompare(len,SYSV_NAMELEN,name,de->name);
+}
+
+/*
+ * sysv_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ */
+static struct buffer_head * sysv_find_entry(struct inode * dir,
+ const char * name, int namelen, struct sysv_dir_entry ** res_dir)
+{
+ struct super_block * sb;
+ unsigned long pos, block, offset; /* pos = block * block_size + offset */
+ struct buffer_head * bh;
+
+ *res_dir = NULL;
+ if (!dir)
+ return NULL;
+ sb = dir->i_sb;
+ if (namelen > SYSV_NAMELEN)
+ if (sb->sv_truncate)
+ namelen = SYSV_NAMELEN;
+ else
+ return NULL;
+ bh = NULL;
+ pos = block = offset = 0;
+ while (pos < dir->i_size) {
+ if (!bh) {
+ bh = sysv_file_bread(dir,block,0);
+ if (!bh) {
+ /* offset = 0; */ block++;
+ pos += sb->sv_block_size;
+ continue;
+ }
+ }
+ if (sysv_match(namelen, name,
+ *res_dir = (struct sysv_dir_entry *) (bh->b_data + offset) ))
+ return bh;
+ pos += SYSV_DIRSIZE;
+ offset += SYSV_DIRSIZE;
+ if (offset < sb->sv_block_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0; block++;
+ }
+ brelse(bh);
+ *res_dir = NULL;
+ return NULL;
+}
+
+int sysv_lookup(struct inode * dir,const char * name, int len,
+ struct inode ** result)
+{
+ int ino;
+ struct sysv_dir_entry * de;
+ struct buffer_head * bh;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(bh = sysv_find_entry(dir,name,len,&de))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = de->inode;
+ brelse(bh);
+ if (!(*result = iget(dir->i_sb,ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+ iput(dir);
+ return 0;
+}
+
+/*
+ * sysv_add_entry()
+ *
+ * adds a file entry to the specified directory, returning a possible
+ * error value if it fails.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static int sysv_add_entry(struct inode * dir,
+ const char * name, int namelen,
+ struct buffer_head ** res_buf,
+ struct sysv_dir_entry ** res_dir)
+{
+ struct super_block * sb;
+ int i;
+ unsigned long pos, block, offset; /* pos = block * block_size + offset */
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+ *res_buf = NULL;
+ *res_dir = NULL;
+ if (!dir)
+ return -ENOENT;
+ sb = dir->i_sb;
+ if (namelen > SYSV_NAMELEN)
+ if (sb->sv_truncate)
+ namelen = SYSV_NAMELEN;
+ else
+ return -ENAMETOOLONG;
+ if (!namelen)
+ return -ENOENT;
+ bh = NULL;
+ pos = block = offset = 0;
+ while (1) {
+ if (!bh) {
+ bh = sysv_file_bread(dir,block,1);
+ if (!bh)
+ return -ENOSPC;
+ }
+ de = (struct sysv_dir_entry *) (bh->b_data + offset);
+ pos += SYSV_DIRSIZE;
+ offset += SYSV_DIRSIZE;
+ if (pos > dir->i_size) {
+ de->inode = 0;
+ dir->i_size = pos;
+ dir->i_dirt = 1;
+ }
+ if (de->inode) {
+ if (namecompare(namelen, SYSV_NAMELEN, name, de->name)) {
+ brelse(bh);
+ return -EEXIST;
+ }
+ } else {
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ for (i = 0; i < SYSV_NAMELEN ; i++)
+ de->name[i] = (i < namelen) ? name[i] : 0;
+ mark_buffer_dirty(bh, 1);
+ *res_dir = de;
+ break;
+ }
+ if (offset < sb->sv_block_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0; block++;
+ }
+ *res_buf = bh;
+ return 0;
+}
+
+int sysv_create(struct inode * dir,const char * name, int len, int mode,
+ struct inode ** result)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ inode = sysv_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &sysv_file_inode_operations;
+ inode->i_mode = mode;
+ inode->i_dirt = 1;
+ error = sysv_add_entry(dir,name,len, &bh ,&de);
+ if (error) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ *result = inode;
+ return 0;
+}
+
+int sysv_mknod(struct inode * dir, const char * name, int len, int mode, int rdev)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+ if (!dir)
+ return -ENOENT;
+ bh = sysv_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ inode = sysv_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_uid = current->fsuid;
+ inode->i_mode = mode;
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &sysv_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &sysv_dir_inode_operations;
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ }
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &sysv_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (S_ISBLK(mode) || S_ISCHR(mode))
+ inode->i_rdev = rdev;
+ inode->i_dirt = 1;
+ error = sysv_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int sysv_mkdir(struct inode * dir, const char * name, int len, int mode)
+{
+ int error;
+ struct inode * inode;
+ struct buffer_head * bh, *dir_block;
+ struct sysv_dir_entry * de;
+
+ if (!dir) {
+ iput(dir);
+ return -EINVAL;
+ }
+ bh = sysv_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ if (dir->i_nlink >= dir->i_sb->sv_link_max) {
+ iput(dir);
+ return -EMLINK;
+ }
+ inode = sysv_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &sysv_dir_inode_operations;
+ inode->i_size = 2 * SYSV_DIRSIZE;
+ dir_block = sysv_file_bread(inode,0,1);
+ if (!dir_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de = (struct sysv_dir_entry *) (dir_block->b_data + 0*SYSV_DIRSIZE);
+ de->inode = inode->i_ino;
+ strcpy(de->name,"."); /* rest of de->name is zero, see sysv_new_block */
+ de = (struct sysv_dir_entry *) (dir_block->b_data + 1*SYSV_DIRSIZE);
+ de->inode = dir->i_ino;
+ strcpy(de->name,".."); /* rest of de->name is zero, see sysv_new_block */
+ inode->i_nlink = 2;
+ mark_buffer_dirty(dir_block, 1);
+ brelse(dir_block);
+ inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask);
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ inode->i_dirt = 1;
+ error = sysv_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ iput(dir);
+ inode->i_nlink=0;
+ iput(inode);
+ return error;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ dir->i_nlink++;
+ dir->i_dirt = 1;
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return 0;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir(struct inode * inode)
+{
+ struct super_block * sb;
+ unsigned long pos, block, offset; /* pos = block * block_size + offset */
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+ if (!inode)
+ return 1;
+ block = 0;
+ bh = NULL;
+ pos = offset = 2*SYSV_DIRSIZE;
+ if (inode->i_size % SYSV_DIRSIZE)
+ goto bad_dir;
+ if (inode->i_size < pos)
+ goto bad_dir;
+ bh = sysv_file_bread(inode,0,0);
+ if (!bh)
+ goto bad_dir;
+ de = (struct sysv_dir_entry *) (bh->b_data + 0*SYSV_DIRSIZE);
+ if (!de->inode || strcmp(de->name,"."))
+ goto bad_dir;
+ de = (struct sysv_dir_entry *) (bh->b_data + 1*SYSV_DIRSIZE);
+ if (!de->inode || strcmp(de->name,".."))
+ goto bad_dir;
+ sb = inode->i_sb;
+ while (pos < inode->i_size) {
+ if (!bh) {
+ bh = sysv_file_bread(inode,block,0);
+ if (!bh) {
+ /* offset = 0; */ block++;
+ pos += sb->sv_block_size;
+ continue;
+ }
+ }
+ de = (struct sysv_dir_entry *) (bh->b_data + offset);
+ pos += SYSV_DIRSIZE;
+ offset += SYSV_DIRSIZE;
+ if (de->inode) {
+ brelse(bh);
+ return 0;
+ }
+ if (offset < sb->sv_block_size)
+ continue;
+ brelse(bh);
+ bh = NULL;
+ offset = 0; block++;
+ }
+ brelse(bh);
+ return 1;
+bad_dir:
+ brelse(bh);
+ printk("Bad directory on device %04x\n",inode->i_dev);
+ return 1;
+}
+
+int sysv_rmdir(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+ inode = NULL;
+ bh = sysv_find_entry(dir,name,len,&de);
+ retval = -ENOENT;
+ if (!bh)
+ goto end_rmdir;
+ retval = -EPERM;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_rmdir;
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_rmdir;
+ if (inode->i_dev != dir->i_dev)
+ goto end_rmdir;
+ if (inode == dir) /* we may not delete ".", but "../dir" is ok */
+ goto end_rmdir;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -ENOTDIR;
+ goto end_rmdir;
+ }
+ if (!empty_dir(inode)) {
+ retval = -ENOTEMPTY;
+ goto end_rmdir;
+ }
+ if (de->inode != inode->i_ino) {
+ retval = -ENOENT;
+ goto end_rmdir;
+ }
+ if (inode->i_count > 1) {
+ retval = -EBUSY;
+ goto end_rmdir;
+ }
+ if (inode->i_nlink != 2)
+ printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink);
+ de->inode = 0;
+ mark_buffer_dirty(bh, 1);
+ inode->i_nlink=0;
+ inode->i_dirt=1;
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt=1;
+ retval = 0;
+end_rmdir:
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return retval;
+}
+
+int sysv_unlink(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct sysv_dir_entry * de;
+
+repeat:
+ retval = -ENOENT;
+ inode = NULL;
+ bh = sysv_find_entry(dir,name,len,&de);
+ if (!bh)
+ goto end_unlink;
+ if (!(inode = iget(dir->i_sb, de->inode)))
+ goto end_unlink;
+ retval = -EPERM;
+ if (S_ISDIR(inode->i_mode))
+ goto end_unlink;
+ if (de->inode != inode->i_ino) {
+ iput(inode);
+ brelse(bh);
+ current->counter = 0;
+ schedule();
+ goto repeat;
+ }
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_unlink;
+ if (de->inode != inode->i_ino) {
+ retval = -ENOENT;
+ goto end_unlink;
+ }
+ if (!inode->i_nlink) {
+ printk("Deleting nonexistent file (%04x:%lu), %d\n",
+ inode->i_dev,inode->i_ino,inode->i_nlink);
+ inode->i_nlink=1;
+ }
+ de->inode = 0;
+ mark_buffer_dirty(bh, 1);
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ inode->i_nlink--;
+ inode->i_ctime = dir->i_ctime;
+ inode->i_dirt = 1;
+ retval = 0;
+end_unlink:
+ brelse(bh);
+ iput(inode);
+ iput(dir);
+ return retval;
+}
+
+int sysv_symlink(struct inode * dir, const char * name, int len, const char * symname)
+{
+ struct sysv_dir_entry * de;
+ struct inode * inode;
+ struct buffer_head * name_block;
+ char * name_block_data;
+ struct super_block * sb;
+ int i;
+ char c;
+ struct buffer_head * bh;
+
+ if (!(inode = sysv_new_inode(dir))) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_mode = S_IFLNK | 0777;
+ inode->i_op = &sysv_symlink_inode_operations;
+ name_block = sysv_file_bread(inode,0,1);
+ if (!name_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ sb = inode->i_sb;
+ name_block_data = name_block->b_data;
+ i = 0;
+ while (i < sb->sv_block_size_1 && (c = *(symname++)))
+ name_block_data[i++] = c;
+ name_block_data[i] = 0;
+ mark_buffer_dirty(name_block, 1);
+ brelse(name_block);
+ inode->i_size = i;
+ inode->i_dirt = 1;
+ bh = sysv_find_entry(dir,name,len,&de);
+ if (bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ i = sysv_add_entry(dir, name, len, &bh, &de);
+ if (i) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return i;
+ }
+ de->inode = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int sysv_link(struct inode * oldinode, struct inode * dir, const char * name, int len)
+{
+ int error;
+ struct sysv_dir_entry * de;
+ struct buffer_head * bh;
+
+ if (S_ISDIR(oldinode->i_mode)) {
+ iput(oldinode);
+ iput(dir);
+ return -EPERM;
+ }
+ if (oldinode->i_nlink >= oldinode->i_sb->sv_link_max) {
+ iput(oldinode);
+ iput(dir);
+ return -EMLINK;
+ }
+ bh = sysv_find_entry(dir,name,len,&de);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ iput(oldinode);
+ return -EEXIST;
+ }
+ error = sysv_add_entry(dir, name, len, &bh, &de);
+ if (error) {
+ iput(dir);
+ iput(oldinode);
+ return error;
+ }
+ de->inode = oldinode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ oldinode->i_nlink++;
+ oldinode->i_ctime = CURRENT_TIME;
+ oldinode->i_dirt = 1;
+ iput(oldinode);
+ return 0;
+}
+
+/* return 1 if `new' is a subdir of `old' on the same device */
+static int subdir(struct inode * new_inode, struct inode * old_inode)
+{
+ int ino;
+ int result;
+
+ new_inode->i_count++;
+ result = 0;
+ for (;;) {
+ if (new_inode == old_inode) {
+ result = 1;
+ break;
+ }
+ if (new_inode->i_dev != old_inode->i_dev)
+ break;
+ ino = new_inode->i_ino;
+ if (sysv_lookup(new_inode,"..",2,&new_inode))
+ break;
+ if (new_inode->i_ino == ino) /* root dir reached ? */
+ break;
+ }
+ iput(new_inode);
+ return result;
+}
+
+#define PARENT_INO(buffer) \
+(((struct sysv_dir_entry *) ((buffer) + 1*SYSV_DIRSIZE))->inode)
+
+/*
+ * rename uses retrying to avoid race-conditions: at least they should be minimal.
+ * it tries to allocate all the blocks, then sanity-checks, and if the sanity-
+ * checks fail, it tries to restart itself again. Very practical - no changes
+ * are done until we know everything works ok.. and then all the changes can be
+ * done in one fell swoop when we have claimed all the buffers needed.
+ *
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int do_sysv_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct sysv_dir_entry * old_de, * new_de;
+ int retval;
+
+ goto start_up;
+try_again:
+ brelse(old_bh);
+ brelse(new_bh);
+ brelse(dir_bh);
+ iput(old_inode);
+ iput(new_inode);
+ current->counter = 0;
+ schedule();
+start_up:
+ old_inode = new_inode = NULL;
+ old_bh = new_bh = dir_bh = NULL;
+ old_bh = sysv_find_entry(old_dir,old_name,old_len,&old_de);
+ retval = -ENOENT;
+ if (!old_bh)
+ goto end_rename;
+ old_inode = __iget(old_dir->i_sb, old_de->inode, 0); /* don't cross mnt-points */
+ if (!old_inode)
+ goto end_rename;
+ retval = -EPERM;
+ if ((old_dir->i_mode & S_ISVTX) &&
+ current->fsuid != old_inode->i_uid &&
+ current->fsuid != old_dir->i_uid && !fsuser())
+ goto end_rename;
+ new_bh = sysv_find_entry(new_dir,new_name,new_len,&new_de);
+ if (new_bh) {
+ new_inode = __iget(new_dir->i_sb, new_de->inode, 0);
+ if (!new_inode) {
+ brelse(new_bh);
+ new_bh = NULL;
+ }
+ }
+ if (new_inode == old_inode) {
+ retval = 0;
+ goto end_rename;
+ }
+ if (new_inode && S_ISDIR(new_inode->i_mode)) {
+ retval = -EISDIR;
+ if (!S_ISDIR(old_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -ENOTEMPTY;
+ if (!empty_dir(new_inode))
+ goto end_rename;
+ retval = -EBUSY;
+ if (new_inode->i_count > 1)
+ goto end_rename;
+ }
+ retval = -EPERM;
+ if (new_inode && (new_dir->i_mode & S_ISVTX) &&
+ current->fsuid != new_inode->i_uid &&
+ current->fsuid != new_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (S_ISDIR(old_inode->i_mode)) {
+ retval = -ENOTDIR;
+ if (new_inode && !S_ISDIR(new_inode->i_mode))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -EIO;
+ dir_bh = sysv_file_bread(old_inode,0,0);
+ if (!dir_bh)
+ goto end_rename;
+ if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+ if (!new_inode && new_dir->i_nlink >= new_dir->i_sb->sv_link_max)
+ goto end_rename;
+ }
+ if (!new_bh) {
+ retval = sysv_add_entry(new_dir,new_name,new_len,&new_bh,&new_de);
+ if (retval)
+ goto end_rename;
+ }
+/* sanity checking before doing the rename - avoid races */
+ if (new_inode && (new_de->inode != new_inode->i_ino))
+ goto try_again;
+ if (new_de->inode && !new_inode)
+ goto try_again;
+ if (old_de->inode != old_inode->i_ino)
+ goto try_again;
+/* ok, that's it */
+ old_de->inode = 0;
+ new_de->inode = old_inode->i_ino;
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+ old_dir->i_dirt = 1;
+ new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME;
+ new_dir->i_dirt = 1;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_ctime = CURRENT_TIME;
+ new_inode->i_dirt = 1;
+ }
+ mark_buffer_dirty(old_bh, 1);
+ mark_buffer_dirty(new_bh, 1);
+ if (dir_bh) {
+ PARENT_INO(dir_bh->b_data) = new_dir->i_ino;
+ mark_buffer_dirty(dir_bh, 1);
+ old_dir->i_nlink--;
+ old_dir->i_dirt = 1;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_dirt = 1;
+ } else {
+ new_dir->i_nlink++;
+ new_dir->i_dirt = 1;
+ }
+ }
+ retval = 0;
+end_rename:
+ brelse(dir_bh);
+ brelse(old_bh);
+ brelse(new_bh);
+ iput(old_inode);
+ iput(new_inode);
+ iput(old_dir);
+ iput(new_dir);
+ return retval;
+}
+
+/*
+ * Ok, rename also locks out other renames, as they can change the parent of
+ * a directory, and we don't want any races. Other races are checked for by
+ * "do_rename()", which restarts if there are inconsistencies.
+ *
+ * Note that there is no race between different filesystems: it's only within
+ * the same device that races occur: many renames can happen at once, as long
+ * as they are on different partitions.
+ */
+int sysv_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ static struct wait_queue * wait = NULL;
+ static int lock = 0;
+ int result;
+
+ while (lock)
+ sleep_on(&wait);
+ lock = 1;
+ result = do_sysv_rename(old_dir, old_name, old_len,
+ new_dir, new_name, new_len);
+ lock = 0;
+ wake_up(&wait);
+ return result;
+}
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
new file mode 100644
index 000000000..d392816bc
--- /dev/null
+++ b/fs/sysv/symlink.c
@@ -0,0 +1,110 @@
+/*
+ * linux/fs/sysv/symlink.c
+ *
+ * minix/symlink.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * coh/symlink.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/symlink.c
+ * Copyright (C) 1993 Bruno Haible
+ *
+ * SystemV/Coherent symlink handling code
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sysv_fs.h>
+#include <linux/stat.h>
+
+static int sysv_readlink(struct inode *, char *, int);
+static int sysv_follow_link(struct inode *, struct inode *, int, int, struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations sysv_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ sysv_readlink, /* readlink */
+ sysv_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int sysv_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ struct buffer_head * bh;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (current->link_count > 5) {
+ iput(inode);
+ iput(dir);
+ return -ELOOP;
+ }
+ if (!(bh = sysv_file_bread(inode, 0, 0))) { /* is reading 1 block enough ?? */
+ iput(inode);
+ iput(dir);
+ return -EIO;
+ }
+ iput(inode);
+ current->link_count++;
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
+ current->link_count--;
+ brelse(bh);
+ return error;
+}
+
+static int sysv_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ struct buffer_head * bh;
+ char * bh_data;
+ int i;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ if (buflen > inode->i_sb->sv_block_size_1)
+ buflen = inode->i_sb->sv_block_size_1;
+ bh = sysv_file_bread(inode, 0, 0);
+ iput(inode);
+ if (!bh)
+ return 0;
+ bh_data = bh->b_data;
+ i = 0;
+ while (i<buflen && (c = bh_data[i])) {
+ i++;
+ put_fs_byte(c,buffer++);
+ }
+ brelse(bh);
+ return i;
+}
diff --git a/fs/sysv/truncate.c b/fs/sysv/truncate.c
new file mode 100644
index 000000000..21451e6dd
--- /dev/null
+++ b/fs/sysv/truncate.c
@@ -0,0 +1,283 @@
+/*
+ * linux/fs/sysv/truncate.c
+ *
+ * minix/truncate.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * coh/truncate.c
+ * Copyright (C) 1993 Pascal Haible, Bruno Haible
+ *
+ * sysv/truncate.c
+ * Copyright (C) 1993 Bruno Haible
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/sysv_fs.h>
+#include <linux/stat.h>
+
+
+/* Linus' implementation of truncate.
+ * It doesn't need locking because it can tell from looking at bh->b_count
+ * whether a given block is in use elsewhere.
+ */
+
+/*
+ * Truncate has the most races in the whole filesystem: coding it is
+ * a pain in the a**. Especially as I don't do any locking...
+ *
+ * The code may look a bit weird, but that's just because I've tried to
+ * handle things like file-size changes in a somewhat graceful manner.
+ * Anyway, truncating a file at the same time somebody else writes to it
+ * is likely to result in pretty weird behaviour...
+ *
+ * The new code handles normal truncates (size = 0) as well as the more
+ * general case (size = XXX). I hope.
+ */
+
+/* We throw away any data beyond inode->i_size. */
+
+static int trunc_direct(struct inode * inode)
+{
+ struct super_block * sb;
+ unsigned int i;
+ unsigned long * p;
+ unsigned long block;
+ struct buffer_head * bh;
+ int retry = 0;
+
+ sb = inode->i_sb;
+repeat:
+ for (i = ((unsigned long) inode->i_size + sb->sv_block_size_1) >> sb->sv_block_size_bits; i < 10; i++) {
+ p = inode->u.sysv_i.i_data + i;
+ block = *p;
+ if (!block)
+ continue;
+ bh = sv_get_hash_table(sb, inode->i_dev, block);
+ if ((i << sb->sv_block_size_bits) < inode->i_size) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || (block != *p)) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *p = 0;
+ inode->i_dirt = 1;
+ brelse(bh);
+ sysv_free_block(sb,block);
+ }
+ return retry;
+}
+
+static int trunc_indirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt)
+{
+ unsigned long indtmp, indblock;
+ struct super_block * sb;
+ struct buffer_head * indbh;
+ unsigned int i;
+ sysv_zone_t * ind;
+ unsigned long tmp, block;
+ struct buffer_head * bh;
+ int retry = 0;
+
+ indblock = indtmp = *p;
+ if (convert)
+ indblock = from_coh_ulong(indblock);
+ if (!indblock)
+ return 0;
+ sb = inode->i_sb;
+ indbh = sv_bread(sb, inode->i_dev, indblock);
+ if (indtmp != *p) {
+ brelse(indbh);
+ return 1;
+ }
+ if (!indbh) {
+ *p = 0;
+ *dirt = 1;
+ return 0;
+ }
+repeat:
+ if (inode->i_size < offset)
+ i = 0;
+ else
+ i = (inode->i_size - offset + sb->sv_block_size_1) >> sb->sv_block_size_bits;
+ for (; i < sb->sv_ind_per_block; i++) {
+ ind = ((sysv_zone_t *) indbh->b_data) + i;
+ block = tmp = *ind;
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (!block)
+ continue;
+ bh = sv_get_hash_table(sb, inode->i_dev, block);
+ if ((i << sb->sv_block_size_bits) + offset < inode->i_size) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || (tmp != *ind)) {
+ retry = 1;
+ brelse(bh);
+ continue;
+ }
+ *ind = 0;
+ mark_buffer_dirty(indbh, 1);
+ brelse(bh);
+ sysv_free_block(sb,block);
+ }
+ for (i = 0; i < sb->sv_ind_per_block; i++)
+ if (((sysv_zone_t *) indbh->b_data)[i])
+ goto done;
+ if ((indbh->b_count != 1) || (indtmp != *p)) {
+ brelse(indbh);
+ return 1;
+ }
+ *p = 0;
+ *dirt = 1;
+ sysv_free_block(sb,indblock);
+done:
+ brelse(indbh);
+ return retry;
+}
+
+static int trunc_dindirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt)
+{
+ unsigned long indtmp, indblock;
+ struct super_block * sb;
+ struct buffer_head * indbh;
+ unsigned int i;
+ sysv_zone_t * ind;
+ unsigned long tmp, block;
+ int retry = 0;
+
+ indblock = indtmp = *p;
+ if (convert)
+ indblock = from_coh_ulong(indblock);
+ if (!indblock)
+ return 0;
+ sb = inode->i_sb;
+ indbh = sv_bread(sb, inode->i_dev, indblock);
+ if (indtmp != *p) {
+ brelse(indbh);
+ return 1;
+ }
+ if (!indbh) {
+ *p = 0;
+ *dirt = 1;
+ return 0;
+ }
+ if (inode->i_size < offset)
+ i = 0;
+ else
+ i = (inode->i_size - offset + sb->sv_ind_per_block_block_size_1) >> sb->sv_ind_per_block_block_size_bits;
+ for (; i < sb->sv_ind_per_block; i++) {
+ ind = ((sysv_zone_t *) indbh->b_data) + i;
+ block = tmp = *ind;
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (!block)
+ continue;
+ retry |= trunc_indirect(inode,offset+(i<<sb->sv_ind_per_block_bits),ind,sb->sv_convert,&indbh->b_dirt);
+ }
+ for (i = 0; i < sb->sv_ind_per_block; i++)
+ if (((sysv_zone_t *) indbh->b_data)[i])
+ goto done;
+ if ((indbh->b_count != 1) || (indtmp != *p)) {
+ brelse(indbh);
+ return 1;
+ }
+ *p = 0;
+ *dirt = 1;
+ sysv_free_block(sb,indblock);
+done:
+ brelse(indbh);
+ return retry;
+}
+
+static int trunc_tindirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt)
+{
+ unsigned long indtmp, indblock;
+ struct super_block * sb;
+ struct buffer_head * indbh;
+ unsigned int i;
+ sysv_zone_t * ind;
+ unsigned long tmp, block;
+ int retry = 0;
+
+ indblock = indtmp = *p;
+ if (convert)
+ indblock = from_coh_ulong(indblock);
+ if (!indblock)
+ return 0;
+ sb = inode->i_sb;
+ indbh = sv_bread(sb, inode->i_dev, indblock);
+ if (indtmp != *p) {
+ brelse(indbh);
+ return 1;
+ }
+ if (!indbh) {
+ *p = 0;
+ *dirt = 1;
+ return 0;
+ }
+ if (inode->i_size < offset)
+ i = 0;
+ else
+ i = (inode->i_size - offset + sb->sv_ind_per_block_2_block_size_1) >> sb->sv_ind_per_block_2_block_size_bits;
+ for (; i < sb->sv_ind_per_block; i++) {
+ ind = ((sysv_zone_t *) indbh->b_data) + i;
+ block = tmp = *ind;
+ if (sb->sv_convert)
+ block = from_coh_ulong(block);
+ if (!block)
+ continue;
+ retry |= trunc_dindirect(inode,offset+(i<<sb->sv_ind_per_block_2_bits),ind,sb->sv_convert,&indbh->b_dirt);
+ }
+ for (i = 0; i < sb->sv_ind_per_block; i++)
+ if (((sysv_zone_t *) indbh->b_data)[i])
+ goto done;
+ if ((indbh->b_count != 1) || (indtmp != *p)) {
+ brelse(indbh);
+ return 1;
+ }
+ *p = 0;
+ *dirt = 1;
+ sysv_free_block(sb,indblock);
+done:
+ brelse(indbh);
+ return retry;
+}
+
+static int trunc_all(struct inode * inode)
+{
+ struct super_block * sb;
+
+ sb = inode->i_sb;
+ return trunc_direct(inode)
+ | trunc_indirect(inode,sb->sv_ind0_size,&inode->u.sysv_i.i_data[10],0,&inode->i_dirt)
+ | trunc_dindirect(inode,sb->sv_ind1_size,&inode->u.sysv_i.i_data[11],0,&inode->i_dirt)
+ | trunc_tindirect(inode,sb->sv_ind2_size,&inode->u.sysv_i.i_data[12],0,&inode->i_dirt);
+}
+
+
+void sysv_truncate(struct inode * inode)
+{
+ /* If this is called from sysv_put_inode, we needn't worry about
+ * races as we are just losing the last reference to the inode.
+ * If this is called from another place, let's hope it's a regular
+ * file.
+ * Truncating symbolic links is strange. We assume we don't truncate
+ * a directory we are just modifying. We ensure we don't truncate
+ * a regular file we are just writing to, by use of a lock.
+ */
+ if (S_ISLNK(inode->i_mode))
+ printk("sysv_truncate: truncating symbolic link\n");
+ else if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ return;
+ while (trunc_all(inode)) {
+ current->counter = 0;
+ schedule();
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
diff --git a/fs/umsdos/Makefile b/fs/umsdos/Makefile
new file mode 100644
index 000000000..cfba11e63
--- /dev/null
+++ b/fs/umsdos/Makefile
@@ -0,0 +1,44 @@
+#
+# Makefile for the umsdos unix-like filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+ifndef CONFIG_UMSDOS_FS
+CFLAGS := $(CFLAGS) -DMODULE
+endif
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= dir.o emd.o file.o inode.o ioctl.o mangle.o namei.o\
+ rdir.o symlink.o #check.o
+
+umsdos.o: $(OBJS)
+ $(LD) -r -o umsdos.o $(OBJS)
+
+clean:
+ rm -f core *.o *.a *.s
+
+dep:
+ $(CPP) -M *.c > .depend
+
+p:
+ proto *.c >/usr/include/linux/umsdos_fs.p
+
+doc:
+ nadoc -i -p umsdos.doc - /tmp/umsdos.mpg
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/umsdos/README b/fs/umsdos/README
new file mode 100644
index 000000000..4ce8b4148
--- /dev/null
+++ b/fs/umsdos/README
@@ -0,0 +1,84 @@
+Very short explanation for the impatient!!!
+
+Umsdos is a file system driver that run on top the MSDOS fs driver.
+It is written by Jacques Gelinas (jacques@solucorp.qc.ca)
+
+Umsdos is not a file system per se, but a twist to make a boring
+one into a useful one.
+
+It gives you:
+
+ long file name
+ Permissions and owner
+ Links
+ Special files (devices, pipe...)
+ All is need to be a linux root fs.
+
+There is plenty of documentation on it in the source. A formated document
+made from those comments is available from
+sunsite.unc.edu:/pub/Linux/ALPHA/umsdos
+
+Mostly...
+
+You mount a DOS partition like this
+
+mount -t umsdos /dev/hda3 /mnt
+ ^
+---------|
+
+All option are passed to the msdos drivers. Option like uid,gid etc are
+given to msdos.
+
+The default behavior of Umsdos is to do the same thing as the msdos driver
+mostly passing commands to it without much processing. Again, this is
+the default. After doing the mount on a DOS partition, nothing special
+happen. This is why all mount options are passed to the Msdos fs driver.
+
+Umsdos use a special DOS file --linux-.--- to store the information
+which can't be handle by the normal MsDOS file system. This is the trick.
+
+--linux-.--- is optional. There is one per directory.
+
+**** If --linux-.--- is missing, then Umsdos process the directory the
+ same way the msdos driver do. Short file name, no goodies, default
+ owner and permissions. So each directory may have or not this
+ --linux-.---
+
+Now, how to get those --linux-.---.
+
+\begin joke_section
+
+ Well send me a directory content
+ and I will send you one customised for you.
+ $5 per directory. Add any applicable taxes.
+\end joke_section
+
+A utility umssync creates those and maintain them. It is available
+from the same directory above (sunsite) in the file umsdos_progs-0.3.tar.gz.
+A compiled version is available in umsdos-0.3a.bin.tar.gz.
+
+So in our example, after mounting mnt, we do
+
+umssync .
+
+This will promote this directory (a recursive option is available) to full
+umsdos capabilities (long name ...). A ls -l before and after won't show
+much difference however. The file which were there are still there. But now
+you can do all this:
+
+ chmod 644 *
+ chown you.your_groupe *
+ ls >THIS_IS.A.VERY.LONG.NAME
+ ln -s toto tata
+ ls -l
+
+Once a directory is promoted, all subdirectory created will inherit that
+promotion.
+
+What happen if you boot DOS and create files in those promoted directories ?
+Umsdos won't notice new files, but will signal removed file (it won't crash).
+Using umssync in /etc/rc will make sure the DOS directory is in sync with
+the --linux-.---.
+
+Hope this helps!
+
diff --git a/fs/umsdos/check.c b/fs/umsdos/check.c
new file mode 100644
index 000000000..d1102b4ce
--- /dev/null
+++ b/fs/umsdos/check.c
@@ -0,0 +1,55 @@
+#include <asm/system.h>
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+
+extern unsigned long high_memory;
+
+static int check_one_table(unsigned long * page_dir)
+{
+ unsigned long pg_table = *page_dir;
+
+ if (!pg_table)
+ return 0;
+ if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * This function frees up all page tables of a process when it exits.
+ */
+void check_page_tables(void)
+{
+ unsigned long pg_dir;
+ static int err = 0;
+
+ int stack_level = (long)(&pg_dir)-current->kernel_stack_page;
+ if (stack_level < 1500) printk ("** %d ** ",stack_level);
+ pg_dir = current->tss.cr3;
+ if (mem_map[MAP_NR(pg_dir)] > 1) {
+ return;
+ }
+ if (err == 0){
+ unsigned long *page_dir = (unsigned long *) pg_dir;
+ unsigned long *base = page_dir;
+ int i;
+ for (i = 0 ; i < PTRS_PER_PAGE ; i++,page_dir++){
+ int notok = check_one_table(page_dir);
+ if (notok){
+ err++;
+ printk ("|%d| ",page_dir-base);
+ }
+ }
+ if (err) printk ("Erreur MM %d\n",err);
+ }
+}
+
diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c
new file mode 100644
index 000000000..2a668d102
--- /dev/null
+++ b/fs/umsdos/dir.c
@@ -0,0 +1,706 @@
+/*
+ * linux/fs/umsdos/dir.c
+ *
+ * Written 1993 by Jacques Gelinas
+ * Inspired from linux/fs/msdos/... : Werner Almesberger
+ *
+ * Extended MS-DOS directory handling functions
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/limits.h>
+#include <linux/umsdos_fs.h>
+#include <linux/malloc.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+#define UMSDOS_SPECIAL_DIRFPOS 3
+extern struct inode *pseudo_root;
+/*
+ So grep * doesn't complain in the presence of directories.
+*/
+int UMSDOS_dir_read(struct inode *inode,struct file *filp,char *buf,
+ int count)
+{
+ return -EISDIR;
+}
+/*
+ Read count directory entries from directory filp
+ Return a negative value from linux/errno.h.
+ Return > 0 if success (the length of the file name).
+
+ This function is used by the normal readdir VFS entry point and by
+ some function who try to find out info on a file from a pure MSDOS
+ inode. See umsdos_locate_ancestor() below.
+*/
+static int umsdos_readdir_x(
+ struct inode *dir, /* Point to a description of the super block */
+ struct file *filp, /* Point to a directory which is read */
+ struct dirent *dirent, /* Will hold count directory entry */
+ int dirent_in_fs, /* dirent point in user's space ? */
+ int count,
+ struct umsdos_dirent *u_entry, /* Optional umsdos entry */
+ int follow_hlink,
+ off_t *pt_f_pos) /* will hold the offset of the entry in EMD */
+{
+ int ret = 0;
+
+ umsdos_startlookup(dir);
+ if (filp->f_pos == UMSDOS_SPECIAL_DIRFPOS
+ && dir == pseudo_root
+ && dirent_in_fs){
+ /*
+ We don't need to simulate this pseudo directory
+ when umsdos_readdir_x is called for internal operation
+ of umsdos. This is why dirent_in_fs is tested
+ */
+ /* #Specification: pseudo root / directory /DOS
+ When umsdos operates in pseudo root mode (C:\linux is the
+ linux root), it simulate a directory /DOS which points to
+ the real root of the file system.
+ */
+ put_fs_long(dir->i_sb->s_mounted->i_ino,&dirent->d_ino);
+ memcpy_tofs (dirent->d_name,"DOS",3);
+ put_fs_byte(0,dirent->d_name+3);
+ put_fs_word (3,&dirent->d_reclen);
+ if (u_entry != NULL) u_entry->flags = 0;
+ ret = 3;
+ filp->f_pos++;
+ }else if (filp->f_pos < 2
+ || (dir != dir->i_sb->s_mounted && filp->f_pos == 32)){
+ /* #Specification: readdir / . and ..
+ The msdos filesystem manage the . and .. entry properly
+ so the EMD file won't hold any info about it.
+
+ In readdir, we assume that for the root directory
+ the read position will be 0 for ".", 1 for "..". For
+ a non root directory, the read position will be 0 for "."
+ and 32 for "..".
+ */
+ /*
+ This is a trick used by the msdos file system (fs/msdos/dir.c)
+ to manage . and .. for the root directory of a file system.
+ Since there is no such entry in the root, fs/msdos/dir.c
+ use the following:
+
+ if f_pos == 0, return ".".
+ if f_pos == 1, return "..".
+
+ So let msdos handle it
+
+ Since umsdos entries are much larger, we share the same f_pos.
+ if f_pos is 0 or 1 or 32, we are clearly looking at . and
+ ..
+
+ As soon as we get f_pos == 2 or f_pos == 64, then back to
+ 0, but this time we are reading the EMD file.
+
+ Well, not so true. The problem, is that UMSDOS_REC_SIZE is
+ also 64, so as soon as we read the first record in the
+ EMD, we are back at offset 64. So we set the offset
+ to UMSDOS_SPECIAL_DIRFPOS(3) as soon as we have read the
+ .. entry from msdos.
+ */
+ ret = msdos_readdir(dir,filp,dirent,count);
+ if (filp->f_pos == 64) filp->f_pos = UMSDOS_SPECIAL_DIRFPOS;
+ if (u_entry != NULL) u_entry->flags = 0;
+ }else{
+ struct inode *emd_dir = umsdos_emd_dir_lookup(dir,0);
+ if (emd_dir != NULL){
+ if (filp->f_pos <= UMSDOS_SPECIAL_DIRFPOS+1) filp->f_pos = 0;
+ PRINTK (("f_pos %ld i_size %d\n",filp->f_pos,emd_dir->i_size));
+ ret = 0;
+ while (filp->f_pos < emd_dir->i_size){
+ struct umsdos_dirent entry;
+ off_t cur_f_pos = filp->f_pos;
+ if (umsdos_emd_dir_readentry (emd_dir,filp,&entry)!=0){
+ ret = -EIO;
+ break;
+ }else if (entry.name_len != 0){
+ /* #Specification: umsdos / readdir
+ umsdos_readdir() should fill a struct dirent with
+ an inode number. The cheap way to get it is to
+ do a lookup in the MSDOS directory for each
+ entry processed by the readdir() function.
+ This is not very efficient, but very simple. The
+ other way around is to maintain a copy of the inode
+ number in the EMD file. This is a problem because
+ this has to be maintained in sync using tricks.
+ Remember that MSDOS (the OS) does not update the
+ modification time (mtime) of a directory. There is
+ no easy way to tell that a directory was modified
+ during a DOS session and synchronise the EMD file.
+
+ Suggestion welcome.
+
+ So the easy way is used!
+ */
+ struct umsdos_info info;
+ struct inode *inode;
+ int lret;
+ umsdos_parse (entry.name,entry.name_len,&info);
+ info.f_pos = cur_f_pos;
+ *pt_f_pos = cur_f_pos;
+ umsdos_manglename (&info);
+ lret = umsdos_real_lookup (dir,info.fake.fname
+ ,info.fake.len,&inode);
+ PRINTK (("Cherche inode de %s lret %d flags %d\n"
+ ,info.fake.fname,lret,entry.flags));
+ if (lret == 0
+ && (entry.flags & UMSDOS_HLINK)
+ && follow_hlink){
+ struct inode *rinode;
+ lret = umsdos_hlink2inode (inode,&rinode);
+ inode = rinode;
+ }
+ if (lret == 0){
+ /* #Specification: pseudo root / reading real root
+ The pseudo root (/linux) is logically
+ erased from the real root. This mean that
+ ls /DOS, won't show "linux". This avoids
+ infinite recursion /DOS/linux/DOS/linux while
+ walking the file system.
+ */
+ if (inode != pseudo_root){
+ PRINTK (("Trouve ino %d ",inode->i_ino));
+ if (dirent_in_fs){
+ put_fs_long(inode->i_ino,&dirent->d_ino);
+ memcpy_tofs (dirent->d_name,entry.name
+ ,entry.name_len);
+ put_fs_byte(0,dirent->d_name+entry.name_len);
+ put_fs_word (entry.name_len
+ ,&dirent->d_reclen);
+ /* In this case, the caller only needs */
+ /* flags */
+ if (u_entry != NULL){
+ u_entry->flags = entry.flags;
+ }
+ }else{
+ dirent->d_ino = inode->i_ino;
+ memcpy (dirent->d_name,entry.name
+ ,entry.name_len);
+ dirent->d_name[entry.name_len] = '\0';
+ dirent->d_reclen = entry.name_len;
+ if (u_entry != NULL) *u_entry = entry;
+ }
+ ret = entry.name_len;
+ iput (inode);
+ break;
+ }
+ iput (inode);
+ }else{
+ /* #Specification: umsdos / readdir / not in MSDOS
+ During a readdir operation, if the file is not
+ in the MSDOS directory anymore, the entry is
+ removed from the EMD file silently.
+ */
+ ret = umsdos_writeentry (dir,emd_dir,&info,1);
+ if (ret != 0){
+ break;
+ }
+ }
+ }
+ }
+ iput(emd_dir);
+ }
+ }
+ umsdos_endlookup(dir);
+ PRINTK (("read dir %p pos %d ret %d\n",dir,filp->f_pos,ret));
+ return ret;
+}
+/*
+ Read count directory entries from directory filp
+ Return a negative value from linux/errno.h.
+ Return > 0 if success (the length of the file name).
+*/
+static int UMSDOS_readdir(
+ struct inode *dir, /* Point to a description of the super block */
+ struct file *filp, /* Point to a directory which is read */
+ struct dirent *dirent, /* Will hold count directory entry */
+ int count)
+{
+ int ret = -ENOENT;
+ while (1){
+ struct umsdos_dirent entry;
+ off_t f_pos;
+ ret = umsdos_readdir_x (dir,filp,dirent,1,count,&entry,1,&f_pos);
+ if (ret <= 0 || !(entry.flags & UMSDOS_HIDDEN)) break;
+ }
+ return ret;
+}
+/*
+ Complete the inode content with info from the EMD file
+*/
+void umsdos_lookup_patch (
+ struct inode *dir,
+ struct inode *inode,
+ struct umsdos_dirent *entry,
+ off_t emd_pos)
+{
+ /*
+ This function modify the state of a dir inode. It decides
+ if the dir is a umsdos dir or a dos dir. This is done
+ deeper in umsdos_patch_inode() called at the end of this function.
+
+ umsdos_patch_inode() may block because it is doing disk access.
+ At the same time, another process may get here to initialise
+ the same dir inode. There is 3 cases.
+
+ 1-The inode is already initialised. We do nothing.
+ 2-The inode is not initialised. We lock access and do it.
+ 3-Like 2 but another process has lock the inode, so we try
+ to lock it and right after check if initialisation is still
+ needed.
+
+
+ Thanks to the mem option of the kernel command line, it was
+ possible to consistently reproduce this problem by limiting
+ my mem to 4 meg and running X.
+ */
+ /*
+ Do this only if the inode is freshly read, because we will lose
+ the current (updated) content.
+ */
+ /*
+ A lookup of a mount point directory yield the inode into
+ the other fs, so we don't care about initialising it. iget()
+ does this automatically.
+ */
+ if (inode->i_sb == dir->i_sb && !umsdos_isinit(inode)){
+ if (S_ISDIR(inode->i_mode)) umsdos_lockcreate(inode);
+ if (!umsdos_isinit(inode)){
+ /* #Specification: umsdos / lookup / inode info
+ After successfully reading an inode from the MSDOS
+ filesystem, we use the EMD file to complete it.
+ We update the following field.
+
+ uid, gid, atime, ctime, mtime, mode.
+
+ We rely on MSDOS for mtime. If the file
+ was modified during an MSDOS session, at least
+ mtime will be meaningful. We do this only for regular
+ file.
+
+ We don't rely on MSDOS for mtime for directory because
+ the MSDOS directory date is creation time (strange
+ MSDOS behavior) which fit nowhere in the three UNIX
+ time stamp.
+ */
+ if (S_ISREG(entry->mode)) entry->mtime = inode->i_mtime;
+ inode->i_mode = entry->mode;
+ inode->i_rdev = entry->rdev;
+ inode->i_atime = entry->atime;
+ inode->i_ctime = entry->ctime;
+ inode->i_mtime = entry->mtime;
+ inode->i_uid = entry->uid;
+ inode->i_gid = entry->gid;
+ /* #Specification: umsdos / i_nlink
+ The nlink field of an inode is maintain by the MSDOS file system
+ for directory and by UMSDOS for other file. The logic is that
+ MSDOS is already figuring out what to do for directories and
+ does nothing for other files. For MSDOS, there are no hard link
+ so all file carry nlink==1. UMSDOS use some info in the
+ EMD file to plug the correct value.
+ */
+ if (!S_ISDIR(entry->mode)){
+ if (entry->nlink > 0){
+ inode->i_nlink = entry->nlink;
+ }else{
+ printk ("UMSDOS: lookup_patch entry->nlink < 1 ???\n");
+ }
+ }
+ umsdos_patch_inode(inode,dir,emd_pos);
+ }
+ if (S_ISDIR(inode->i_mode)) umsdos_unlockcreate(inode);
+if (inode->u.umsdos_i.i_emd_owner==0) printk ("emd_owner still 0 ???\n");
+ }
+}
+/*
+ Locate entry of an inode in a directory.
+ Return 0 or a negative error code.
+
+ Normally, this function must succeed. It means a strange corruption
+ in the file system if not.
+*/
+int umsdos_inode2entry (
+ struct inode *dir,
+ struct inode *inode,
+ struct umsdos_dirent *entry) /* Will hold the entry */
+{
+ int ret = -ENOENT;
+ if (inode == pseudo_root){
+ /*
+ Quick way to find the name.
+ Also umsdos_readdir_x won't show /linux anyway
+ */
+ memcpy (entry->name,UMSDOS_PSDROOT_NAME,UMSDOS_PSDROOT_LEN+1);
+ entry->name_len = UMSDOS_PSDROOT_LEN;
+ ret = 0;
+ }else{
+ struct inode *emddir = umsdos_emd_dir_lookup(dir,0);
+ iput (emddir);
+ if (emddir == NULL){
+ /* This is a DOS directory */
+ struct file filp;
+ filp.f_reada = 1;
+ filp.f_pos = 0;
+ while (1){
+ struct dirent dirent;
+ if (umsdos_readdir_kmem (dir,&filp,&dirent,1) <= 0){
+ printk ("UMSDOS: can't locate inode %ld in DOS directory???\n"
+ ,inode->i_ino);
+ }else if (dirent.d_ino == inode->i_ino){
+ ret = 0;
+ memcpy (entry->name,dirent.d_name,dirent.d_reclen);
+ entry->name[dirent.d_reclen] = '\0';
+ entry->name_len = dirent.d_reclen;
+ inode->u.umsdos_i.i_dir_owner = dir->i_ino;
+ inode->u.umsdos_i.i_emd_owner = 0;
+ umsdos_setup_dir_inode(inode);
+ break;
+ }
+ }
+ }else{
+ /* skip . and .. see umsdos_readdir_x() */
+ struct file filp;
+ filp.f_reada = 1;
+ filp.f_pos = UMSDOS_SPECIAL_DIRFPOS;
+ while (1){
+ struct dirent dirent;
+ off_t f_pos;
+ if (umsdos_readdir_x(dir,&filp,&dirent
+ ,0,1,entry,0,&f_pos) <= 0){
+ printk ("UMSDOS: can't locate inode %ld in EMD file???\n"
+ ,inode->i_ino);
+ break;
+ }else if (dirent.d_ino == inode->i_ino){
+ ret = 0;
+ umsdos_lookup_patch (dir,inode,entry,f_pos);
+ break;
+ }
+ }
+ }
+ }
+ return ret;
+}
+/*
+ Locate the parent of a directory and the info on that directory
+ Return 0 or a negative error code.
+*/
+static int umsdos_locate_ancestor (
+ struct inode *dir,
+ struct inode **result,
+ struct umsdos_dirent *entry)
+{
+ int ret;
+ umsdos_patch_inode (dir,NULL,0);
+ ret = umsdos_real_lookup (dir,"..",2,result);
+ PRINTK (("result %d %x ",ret,*result));
+ if (ret == 0){
+ struct inode *adir = *result;
+ ret = umsdos_inode2entry (adir,dir,entry);
+ }
+ PRINTK (("\n"));
+ return ret;
+}
+/*
+ Build the path name of an inode (relative to the file system.
+ This function is need to set (pseudo) hard link.
+
+ It uses the same strategy as the standard getcwd().
+*/
+int umsdos_locate_path (
+ struct inode *inode,
+ char *path)
+{
+ int ret = 0;
+ struct inode *dir = inode;
+ char *bpath = (char*)kmalloc(PATH_MAX,GFP_KERNEL);
+ if (bpath == NULL){
+ ret = -ENOMEM;
+ }else{
+ struct umsdos_dirent entry;
+ char *ptbpath = bpath+PATH_MAX-1;
+ *ptbpath = '\0';
+ PRINTK (("locate_path mode %x ",inode->i_mode));
+ if (!S_ISDIR(inode->i_mode)){
+ ret = umsdos_get_dirowner (inode,&dir);
+ PRINTK (("locate_path ret %d ",ret));
+ if (ret == 0){
+ ret = umsdos_inode2entry (dir,inode,&entry);
+ if (ret == 0){
+ ptbpath -= entry.name_len;
+ memcpy (ptbpath,entry.name,entry.name_len);
+ PRINTK (("ptbpath :%s: ",ptbpath));
+ }
+ }
+ }else{
+ dir->i_count++;
+ }
+ if (ret == 0){
+ while (dir != dir->i_sb->s_mounted){
+ struct inode *adir;
+ ret = umsdos_locate_ancestor (dir,&adir,&entry);
+ iput (dir);
+ dir = NULL;
+ PRINTK (("ancestor %d ",ret));
+ if (ret == 0){
+ *--ptbpath = '/';
+ ptbpath -= entry.name_len;
+ memcpy (ptbpath,entry.name,entry.name_len);
+ dir = adir;
+ PRINTK (("ptbpath :%s: ",ptbpath));
+ }else{
+ break;
+ }
+ }
+ }
+ strcpy (path,ptbpath);
+ kfree (bpath);
+ }
+ PRINTK (("\n"));
+ iput (dir);
+ return ret;
+}
+
+/*
+ Return != 0 if an entry is the pseudo DOS entry in the pseudo root.
+*/
+int umsdos_is_pseudodos (
+ struct inode *dir,
+ const char *name,
+ int len)
+{
+ /* #Specification: pseudo root / DOS hard coded
+ The pseudo sub-directory DOS in the pseudo root is hard coded.
+ The name is DOS. This is done this way to help standardised
+ the umsdos layout. The idea is that from now on /DOS is
+ a reserved path and nobody will think of using such a path
+ for a package.
+ */
+ return dir == pseudo_root
+ && len == 3
+ && name[0] == 'D' && name[1] == 'O' && name[2] == 'S';
+}
+/*
+ Check if a file exist in the current directory.
+ Return 0 if ok, negative error code if not (ex: -ENOENT).
+*/
+static int umsdos_lookup_x (
+ struct inode *dir,
+ const char *name,
+ int len,
+ struct inode **result, /* Will hold inode of the file, if successful */
+ int nopseudo) /* Don't care about pseudo root mode */
+{
+ int ret = -ENOENT;
+ *result = NULL;
+ umsdos_startlookup(dir);
+ if (len == 1 && name[0] == '.'){
+ *result = dir;
+ dir->i_count++;
+ ret = 0;
+ }else if (len == 2 && name[0] == '.' && name[1] == '.'){
+ if (pseudo_root != NULL && dir == pseudo_root->i_sb->s_mounted){
+ /* #Specification: pseudo root / .. in real root
+ Whenever a lookup is those in the real root for
+ the directory .., and pseudo root is active, the
+ pseudo root is returned.
+ */
+ ret = 0;
+ *result = pseudo_root;
+ pseudo_root->i_count++;
+ }else{
+ /* #Specification: locating .. / strategy
+ We use the msdos filesystem to locate the parent directory.
+ But it is more complicated than that.
+
+ We have to step back even further to
+ get the parent of the parent, so we can get the EMD
+ of the parent of the parent. Using the EMD file, we can
+ locate all the info on the parent, such a permissions
+ and owner.
+ */
+ ret = umsdos_real_lookup (dir,"..",2,result);
+ PRINTK (("ancestor ret %d dir %p *result %p ",ret,dir,*result));
+ if (ret == 0
+ && *result != dir->i_sb->s_mounted
+ && *result != pseudo_root){
+ struct inode *aadir;
+ struct umsdos_dirent entry;
+ ret = umsdos_locate_ancestor (*result,&aadir,&entry);
+ iput (aadir);
+ }
+ }
+ }else if (umsdos_is_pseudodos(dir,name,len)){
+ /* #Specification: pseudo root / lookup(DOS)
+ A lookup of DOS in the pseudo root will always succeed
+ and return the inode of the real root.
+ */
+ *result = dir->i_sb->s_mounted;
+ (*result)->i_count++;
+ ret = 0;
+ }else{
+ struct umsdos_info info;
+ ret = umsdos_parse (name,len,&info);
+ if (ret == 0) ret = umsdos_findentry (dir,&info,0);
+ PRINTK (("lookup %s pos %d ret %d len %d ",info.fake.fname,info.f_pos,ret
+ ,info.fake.len));
+ if (ret == 0){
+ /* #Specification: umsdos / lookup
+ A lookup for a file is done in two step. First, we locate
+ the file in the EMD file. If not present, we return
+ an error code (-ENOENT). If it is there, we repeat the
+ operation on the msdos file system. If this fails, it means
+ that the file system is not in sync with the emd file.
+ We silently remove this entry from the emd file,
+ and return ENOENT.
+ */
+ struct inode *inode;
+ ret = umsdos_real_lookup (dir,info.fake.fname,info.fake.len,result);
+ inode = *result;
+ if (inode == NULL){
+ printk ("UMSDOS: Erase entry %s, out of sync with MsDOS\n"
+ ,info.fake.fname);
+ umsdos_delentry (dir,&info,S_ISDIR(info.entry.mode));
+ }else{
+ umsdos_lookup_patch (dir,inode,&info.entry,info.f_pos);
+ PRINTK (("lookup ino %d flags %d\n",inode->i_ino
+ ,info.entry.flags));
+ if (info.entry.flags & UMSDOS_HLINK){
+ ret = umsdos_hlink2inode (inode,result);
+ }
+ if (*result == pseudo_root && !nopseudo){
+ /* #Specification: pseudo root / dir lookup
+ For the same reason as readdir, a lookup in /DOS for
+ the pseudo root directory (linux) will fail.
+ */
+ /*
+ This has to be allowed for resolving hard link
+ which are recorded independently of the pseudo-root
+ mode.
+ */
+ iput (pseudo_root);
+ *result = NULL;
+ ret = -ENOENT;
+ }
+ }
+ }
+ }
+ umsdos_endlookup(dir);
+ iput (dir);
+ return ret;
+}
+/*
+ Check if a file exist in the current directory.
+ Return 0 if ok, negative error code if not (ex: -ENOENT).
+*/
+int UMSDOS_lookup (
+ struct inode *dir,
+ const char *name,
+ int len,
+ struct inode **result) /* Will hold inode of the file, if successful */
+{
+ return umsdos_lookup_x(dir,name,len,result,0);
+}
+/*
+ Locate the inode pointed by a (pseudo) hard link
+ Return 0 if ok, a negative error code if not.
+*/
+int umsdos_hlink2inode (struct inode *hlink, struct inode **result)
+{
+ int ret = -EIO;
+ char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL);
+ *result = NULL;
+ if (path == NULL){
+ ret = -ENOMEM;
+ iput (hlink);
+ }else{
+ struct file filp;
+ filp.f_reada = 1;
+ filp.f_pos = 0;
+ PRINTK (("hlink2inode "));
+ if (umsdos_file_read_kmem (hlink,&filp,path,hlink->i_size)
+ ==hlink->i_size){
+ struct inode *dir;
+ char *pt = path;
+ dir = hlink->i_sb->s_mounted;
+ path[hlink->i_size] = '\0';
+ iput (hlink);
+ dir->i_count++;
+ while (1){
+ char *start = pt;
+ int len;
+ while (*pt != '\0' && *pt != '/') pt++;
+ len = (int)(pt - start);
+ if (*pt == '/') *pt++ = '\0';
+ if (dir->u.umsdos_i.i_emd_dir == 0){
+ /* This is a DOS directory */
+ ret = msdos_lookup(dir,start,len,result);
+ }else{
+ ret = umsdos_lookup_x(dir,start,len,result,1);
+ }
+ PRINTK (("h2n lookup :%s: -> %d ",start,ret));
+ if (ret == 0 && *pt != '\0'){
+ dir = *result;
+ }else{
+ break;
+ }
+ }
+ }else{
+ iput (hlink);
+ }
+ PRINTK (("hlink2inode ret = %d %p -> %p\n",ret,hlink,*result));
+ kfree (path);
+ }
+ return ret;
+}
+
+static struct file_operations umsdos_dir_operations = {
+ NULL, /* lseek - default */
+ UMSDOS_dir_read, /* read */
+ NULL, /* write - bad */
+ UMSDOS_readdir, /* readdir */
+ NULL, /* select - default */
+ UMSDOS_ioctl_dir, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* fsync */
+};
+
+struct inode_operations umsdos_dir_inode_operations = {
+ &umsdos_dir_operations, /* default directory file-ops */
+ UMSDOS_create, /* create */
+ UMSDOS_lookup, /* lookup */
+ UMSDOS_link, /* link */
+ UMSDOS_unlink, /* unlink */
+ UMSDOS_symlink, /* symlink */
+ UMSDOS_mkdir, /* mkdir */
+ UMSDOS_rmdir, /* rmdir */
+ UMSDOS_mknod, /* mknod */
+ UMSDOS_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+
+
+
+
+
+
+
+
+
diff --git a/fs/umsdos/emd.c b/fs/umsdos/emd.c
new file mode 100644
index 000000000..e4d6a9470
--- /dev/null
+++ b/fs/umsdos/emd.c
@@ -0,0 +1,505 @@
+/*
+ * linux/fs/umsdos/emd.c
+ *
+ * Written 1993 by Jacques Gelinas
+ *
+ * Extended MS-DOS directory handling functions
+ */
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/kernel.h>
+#include <asm/segment.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/msdos_fs.h>
+#include <linux/umsdos_fs.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+int umsdos_readdir_kmem(
+ struct inode *inode,
+ struct file *filp,
+ struct dirent *dirent,
+ int count)
+{
+ int ret;
+ int old_fs = get_fs();
+ set_fs (KERNEL_DS);
+ ret = msdos_readdir(inode,filp,dirent,count);
+ set_fs (old_fs);
+ return ret;
+}
+/*
+ Read a file into kernel space memory
+*/
+int umsdos_file_read_kmem(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ int ret;
+ int old_fs = get_fs();
+ set_fs (KERNEL_DS);
+ ret = msdos_file_read(inode,filp,buf,count);
+ set_fs (old_fs);
+ return ret;
+}
+/*
+ Write to a file from kernel space
+*/
+int umsdos_file_write_kmem(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ int ret;
+ int old_fs = get_fs();
+ set_fs (KERNEL_DS);
+ ret = msdos_file_write(inode,filp,buf,count);
+ set_fs (old_fs);
+ return ret;
+}
+
+
+/*
+ Write a block of bytes into one EMD file.
+ The block of data is NOT in user space.
+
+ Return 0 if ok, a negative error code if not.
+*/
+int umsdos_emd_dir_write (
+ struct inode *emd_dir,
+ struct file *filp,
+ char *buf, /* buffer in kernel memory, not in user space */
+ int count)
+{
+ int written;
+ filp->f_flags = 0;
+ written = umsdos_file_write_kmem (emd_dir,filp,buf,count);
+ return written != count ? -EIO : 0;
+}
+/*
+ Read a block of bytes from one EMD file.
+ The block of data is NOT in user space.
+ Return 0 if ok, -EIO if any error.
+*/
+int umsdos_emd_dir_read (
+ struct inode *emd_dir,
+ struct file *filp,
+ char *buf, /* buffer in kernel memory, not in user space */
+ int count)
+{
+ int ret = 0;
+ int sizeread;
+ filp->f_flags = 0;
+ sizeread = umsdos_file_read_kmem (emd_dir,filp,buf,count);
+ if (sizeread != count){
+ printk ("UMSDOS: problem with EMD file. Can't read\n");
+ ret = -EIO;
+ }
+ return ret;
+
+}
+/*
+ Locate the EMD file in a directory and optionally, creates it.
+
+ Return NULL if error. If ok, dir->u.umsdos_i.emd_inode
+*/
+struct inode *umsdos_emd_dir_lookup(struct inode *dir, int creat)
+{
+ struct inode *ret = NULL;
+ if (dir->u.umsdos_i.i_emd_dir != 0){
+ ret = iget (dir->i_sb,dir->u.umsdos_i.i_emd_dir);
+ PRINTK (("deja trouve %d %x [%d] "
+ ,dir->u.umsdos_i.i_emd_dir,ret,ret->i_count));
+ }else{
+ umsdos_real_lookup (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN,&ret);
+ PRINTK (("emd_dir_lookup "));
+ if (ret != NULL){
+ PRINTK (("Find --linux "));
+ dir->u.umsdos_i.i_emd_dir = ret->i_ino;
+ }else if (creat){
+ int code;
+ PRINTK (("avant create "));
+ dir->i_count++;
+ code = msdos_create (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN
+ ,S_IFREG|0777,&ret);
+ PRINTK (("Creat EMD code %d ret %x ",code,ret));
+ if (ret != NULL){
+ dir->u.umsdos_i.i_emd_dir = ret->i_ino;
+ }else{
+ printk ("UMSDOS: Can't create EMD file\n");
+ }
+ }
+ }
+ if (ret != NULL){
+ /* Disable UMSDOS_notify_change() for EMD file */
+ ret->u.umsdos_i.i_emd_owner = 0xffffffff;
+ }
+ return ret;
+}
+
+/*
+ Read an entry from the EMD file.
+ Support variable length record.
+ Return -EIO if error, 0 if ok.
+*/
+int umsdos_emd_dir_readentry (
+ struct inode *emd_dir,
+ struct file *filp,
+ struct umsdos_dirent *entry)
+{
+ int ret = umsdos_emd_dir_read(emd_dir,filp,(char*)entry,UMSDOS_REC_SIZE);
+ if (ret == 0){
+ /* Variable size record. Maybe, we have to read some more */
+ int recsize = umsdos_evalrecsize (entry->name_len);
+ if (recsize > UMSDOS_REC_SIZE){
+ ret = umsdos_emd_dir_read(emd_dir,filp
+ ,((char*)entry)+UMSDOS_REC_SIZE,recsize - UMSDOS_REC_SIZE);
+
+ }
+ }
+ return ret;
+}
+/*
+ Write an entry in the EMD file.
+ Return 0 if ok, -EIO if some error.
+*/
+int umsdos_writeentry (
+ struct inode *dir,
+ struct inode *emd_dir,
+ struct umsdos_info *info,
+ int free_entry) /* This entry is deleted, so Write all 0's */
+{
+ int ret = 0;
+ struct file filp;
+ struct umsdos_dirent *entry = &info->entry;
+ struct umsdos_dirent entry0;
+ if (free_entry){
+ /* #Specification: EMD file / empty entries
+ Unused entry in the EMD file are identify
+ by the name_len field equal to 0. However to
+ help future extension (or bug correction :-( ),
+ empty entries are filled with 0.
+ */
+ memset (&entry0,0,sizeof(entry0));
+ entry = &entry0;
+ }else if (entry->name_len > 0){
+ memset (entry->name+entry->name_len,'\0'
+ ,sizeof(entry->name)-entry->name_len);
+ /* #Specification: EMD file / spare bytes
+ 10 bytes are unused in each record of the EMD. They
+ are set to 0 all the time. So it will be possible
+ to do new stuff and rely on the state of those
+ bytes in old EMD file around.
+ */
+ memset (entry->spare,0,sizeof(entry->spare));
+ }
+ filp.f_pos = info->f_pos;
+ filp.f_reada = 0;
+ ret = umsdos_emd_dir_write(emd_dir,&filp,(char*)entry,info->recsize);
+ if (ret != 0){
+ printk ("UMSDOS: problem with EMD file. Can't write\n");
+ }else{
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ }
+ return ret;
+}
+
+#define CHUNK_SIZE (8*UMSDOS_REC_SIZE)
+struct find_buffer{
+ char buffer[CHUNK_SIZE];
+ int pos; /* read offset in buffer */
+ int size; /* Current size of buffer */
+ struct file filp;
+};
+
+/*
+ Fill the read buffer and take care of the byte remaining inside.
+ Unread bytes are simply move to the beginning.
+
+ Return -ENOENT if EOF, 0 if ok, a negative error code if any problem.
+*/
+static int umsdos_fillbuf (
+ struct inode *inode,
+ struct find_buffer *buf)
+{
+ int ret = -ENOENT;
+ int mustmove = buf->size - buf->pos;
+ int mustread;
+ int remain;
+ if (mustmove > 0){
+ memcpy (buf->buffer,buf->buffer+buf->pos,mustmove);
+ }
+ buf->pos = 0;
+ mustread = CHUNK_SIZE - mustmove;
+ remain = inode->i_size - buf->filp.f_pos;
+ if (remain < mustread) mustread = remain;
+ if (mustread > 0){
+ ret = umsdos_emd_dir_read (inode,&buf->filp,buf->buffer+mustmove
+ ,mustread);
+ if (ret == 0) buf->size = mustmove + mustread;
+ }else if (mustmove){
+ buf->size = mustmove;
+ ret = 0;
+ }
+ return ret;
+}
+
+/*
+ General search, locate a name in the EMD file or an empty slot to
+ store it. if info->entry.name_len == 0, search the first empty
+ slot (of the proper size).
+
+ Caller must do iput on *pt_emd_dir.
+
+ Return 0 if found, -ENOENT if not found, another error code if
+ other problem.
+
+ So this routine is used to either find an existing entry or to
+ create a new one, while making sure it is a new one. After you
+ get -ENOENT, you make sure the entry is stuffed correctly and
+ call umsdos_writeentry().
+
+ To delete an entry, you find it, zero out the entry (memset)
+ and call umsdos_writeentry().
+
+ All this to say that umsdos_writeentry must be call after this
+ function since it rely on the f_pos field of info.
+*/
+static int umsdos_find (
+ struct inode *dir,
+ struct umsdos_info *info, /* Hold name and name_len */
+ /* Will hold the entry found */
+ struct inode **pt_emd_dir) /* Will hold the emd_dir inode */
+ /* or NULL if not found */
+{
+ /* #Specification: EMD file structure
+ The EMD file uses a fairly simple layout. It is made of records
+ (UMSDOS_REC_SIZE == 64). When a name can't be written is a single
+ record, multiple contiguous record are allocated.
+ */
+ int ret = -ENOENT;
+ struct inode *emd_dir = umsdos_emd_dir_lookup(dir,1);
+ if (emd_dir != NULL){
+ struct umsdos_dirent *entry = &info->entry;
+ int recsize = info->recsize;
+ struct {
+ off_t posok; /* Position available to store the entry */
+ int found; /* A valid empty position has been found */
+ off_t one; /* One empty position -> maybe <- large enough */
+ int onesize; /* size of empty region starting at one */
+ }empty;
+ /* Read several entries at a time to speed up the search */
+ struct find_buffer buf;
+ buf.pos = 0;
+ buf.size = 0;
+ buf.filp.f_pos = 0;
+ buf.filp.f_reada = 1;
+ empty.found = 0;
+ empty.posok = emd_dir->i_size;
+ empty.onesize = 0;
+ while (1){
+ struct umsdos_dirent *rentry = (struct umsdos_dirent*)
+ (buf.buffer + buf.pos);
+ int file_pos = buf.filp.f_pos - buf.size + buf.pos;
+ if (buf.pos == buf.size){
+ ret = umsdos_fillbuf (emd_dir,&buf);
+ if (ret < 0){
+ /* Not found, so note where it can be added */
+ info->f_pos = empty.posok;
+ break;
+ }
+ }else if (rentry->name_len == 0){
+ /* We are looking for an empty section at least */
+ /* recsize large */
+ if (entry->name_len == 0){
+ info->f_pos = file_pos;
+ ret = 0;
+ break;
+ }else if (!empty.found){
+ if (empty.onesize == 0){
+ /* This is the first empty record of a section */
+ empty.one = file_pos;
+ }
+ /* grow the empty section */
+ empty.onesize += UMSDOS_REC_SIZE;
+ if (empty.onesize == recsize){
+ /* here is a large enough section */
+ empty.posok = empty.one;
+ empty.found = 1;
+ }
+ }
+ buf.pos += UMSDOS_REC_SIZE;
+ }else{
+ int entry_size = umsdos_evalrecsize(rentry->name_len);
+ if (buf.pos+entry_size > buf.size){
+ ret = umsdos_fillbuf (emd_dir,&buf);
+ if (ret < 0){
+ /* Not found, so note where it can be added */
+ info->f_pos = empty.posok;
+ break;
+ }
+ }else{
+ empty.onesize = 0; /* Reset the free slot search */
+ if (entry->name_len == rentry->name_len
+ && memcmp(entry->name,rentry->name,rentry->name_len)
+ ==0){
+ info->f_pos = file_pos;
+ *entry = *rentry;
+ ret = 0;
+ break;
+ }else{
+ buf.pos += entry_size;
+ }
+ }
+ }
+ }
+ umsdos_manglename(info);
+ }
+ *pt_emd_dir = emd_dir;
+ return ret;
+}
+/*
+ Add a new entry in the emd file
+ Return 0 if ok or a negative error code.
+ Return -EEXIST if the entry already exist.
+
+ Complete the information missing in info.
+*/
+int umsdos_newentry (
+ struct inode *dir,
+ struct umsdos_info *info)
+{
+ struct inode *emd_dir;
+ int ret = umsdos_find (dir,info,&emd_dir);
+ if (ret == 0){
+ ret = -EEXIST;
+ }else if (ret == -ENOENT){
+ ret = umsdos_writeentry(dir,emd_dir,info,0);
+ PRINTK (("umsdos_newentry EDM ret = %d\n",ret));
+ }
+ iput (emd_dir);
+ return ret;
+}
+/*
+ Create a new hidden link.
+ Return 0 if ok, an error code if not.
+*/
+int umsdos_newhidden (
+ struct inode *dir,
+ struct umsdos_info *info)
+{
+ struct inode *emd_dir;
+ int ret;
+ umsdos_parse ("..LINK",6,info);
+ info->entry.name_len = 0;
+ ret = umsdos_find (dir,info,&emd_dir);
+ iput (emd_dir);
+ if (ret == -ENOENT || ret == 0){
+ /* #Specification: hard link / hidden name
+ When a hard link is created, the original file is renamed
+ to a hidden name. The name is "..LINKNNN" where NNN is a
+ number define from the entry offset in the EMD file.
+ */
+ info->entry.name_len = sprintf (info->entry.name,"..LINK%ld"
+ ,info->f_pos);
+ ret = 0;
+ }
+ return ret;
+}
+/*
+ Remove an entry from the emd file
+ Return 0 if ok, a negative error code otherwise.
+
+ Complete the information missing in info.
+*/
+int umsdos_delentry (
+ struct inode *dir,
+ struct umsdos_info *info,
+ int isdir)
+{
+ struct inode *emd_dir;
+ int ret = umsdos_find (dir,info,&emd_dir);
+ if (ret == 0){
+ if (info->entry.name_len != 0){
+ if ((isdir != 0) != (S_ISDIR(info->entry.mode) != 0)){
+ if (S_ISDIR(info->entry.mode)){
+ ret = -EISDIR;
+ }else{
+ ret = -ENOTDIR;
+ }
+ }else{
+ ret = umsdos_writeentry(dir,emd_dir,info,1);
+ }
+ }
+ }
+ iput(emd_dir);
+ return ret;
+}
+
+
+/*
+ Verify is a EMD directory is empty.
+ Return 0 if not empty
+ 1 if empty
+ 2 if empty, no EMD file.
+*/
+int umsdos_isempty (struct inode *dir)
+{
+ int ret = 2;
+ struct inode *emd_dir = umsdos_emd_dir_lookup(dir,0);
+ /* If the EMD file does not exist, it is certainly empty :-) */
+ if (emd_dir != NULL){
+ struct file filp;
+ /* Find an empty slot */
+ filp.f_pos = 0;
+ filp.f_reada = 1;
+ filp.f_flags = O_RDONLY;
+ ret = 1;
+ while (filp.f_pos < emd_dir->i_size){
+ struct umsdos_dirent entry;
+ if (umsdos_emd_dir_readentry(emd_dir,&filp,&entry)!=0){
+ ret = 0;
+ break;
+ }else if (entry.name_len != 0){
+ ret = 0;
+ break;
+ }
+ }
+ iput (emd_dir);
+ }
+ return ret;
+}
+
+/*
+ Locate an entry in a EMD directory.
+ Return 0 if ok, errcod if not, generally -ENOENT.
+*/
+int umsdos_findentry (
+ struct inode *dir,
+ struct umsdos_info *info,
+ int expect) /* 0: anything */
+ /* 1: file */
+ /* 2: directory */
+{
+ struct inode *emd_dir;
+ int ret = umsdos_find (dir,info,&emd_dir);
+ if (ret == 0){
+ if (expect != 0){
+ if (S_ISDIR(info->entry.mode)){
+ if (expect != 2) ret = -EISDIR;
+ }else if (expect == 2){
+ ret = -ENOTDIR;
+ }
+ }
+ }
+ iput (emd_dir);
+ return ret;
+}
+
diff --git a/fs/umsdos/file.c b/fs/umsdos/file.c
new file mode 100644
index 000000000..d292ea3c2
--- /dev/null
+++ b/fs/umsdos/file.c
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/umsdos/file.c
+ *
+ * Written 1993 by Jacques Gelinas
+ * inspired from linux/fs/msdos/file.c Werner Almesberger
+ *
+ * Extended MS-DOS regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/msdos_fs.h>
+#include <linux/umsdos_fs.h>
+
+
+#define PRINTK(x)
+#define Printk(x) printk x
+/*
+ Read a file into user space memory
+*/
+static int UMSDOS_file_read(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ /* We have to set the access time because msdos don't care */
+ int ret = msdos_file_read(inode,filp,buf,count);
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ return ret;
+}
+/*
+ Write a file from user space memory
+*/
+static int UMSDOS_file_write(
+ struct inode *inode,
+ struct file *filp,
+ char *buf,
+ int count)
+{
+ return msdos_file_write(inode,filp,buf,count);
+}
+/*
+ Truncate a file to 0 length.
+*/
+static void UMSDOS_truncate(struct inode *inode)
+{
+ PRINTK (("UMSDOS_truncate\n"));
+ msdos_truncate (inode);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
+/*
+ See inode.c
+
+ Some entry point are filled dynamically with function pointers
+ from the msdos file_operations and file_inode_operations.
+
+ The idea is to have the code as independent as possible from
+ the msdos file system.
+*/
+
+struct file_operations umsdos_file_operations = {
+ NULL, /* lseek - default */
+ UMSDOS_file_read, /* read */
+ UMSDOS_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ file_fsync /* fsync */
+};
+
+struct inode_operations umsdos_file_inode_operations = {
+ &umsdos_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ UMSDOS_truncate,/* truncate */
+ NULL, /* permission */
+ msdos_smap /* smap */
+};
+
+
diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c
new file mode 100644
index 000000000..40f7feb68
--- /dev/null
+++ b/fs/umsdos/inode.c
@@ -0,0 +1,513 @@
+/*
+ * linux/fs/umsdos/inode.c
+ *
+ * Written 1993 by Jacques Gelinas
+ * Inspired from linux/fs/msdos/... by Werner Almesberger
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/stat.h>
+#include <linux/umsdos_fs.h>
+
+#ifdef MODULE
+ #include <linux/module.h>
+ #include "../../tools/version.h"
+#endif
+
+struct inode *pseudo_root=NULL; /* Useful to simulate the pseudo DOS */
+ /* directory. See UMSDOS_readdir_x() */
+
+/* #Specification: convention / PRINTK Printk and printk
+ Here is the convention for the use of printk inside fs/umsdos
+
+ printk carry important message (error or status).
+ Printk is for debugging (it is a macro defined at the beginning of
+ most source.
+ PRINTK is a nulled Printk macro.
+
+ This convention makes the source easier to read, and Printk easier
+ to shut off.
+*/
+#define PRINTK(x)
+#define Printk(x) printk x
+
+
+void UMSDOS_put_inode(struct inode *inode)
+{
+ PRINTK (("put inode %x owner %x pos %d dir %x\n",inode
+ ,inode->u.umsdos_i.i_emd_owner,inode->u.umsdos_i.pos
+ ,inode->u.umsdos_i.i_emd_dir));
+ msdos_put_inode(inode);
+}
+
+
+void UMSDOS_put_super(struct super_block *sb)
+{
+ msdos_put_super(sb);
+ #ifdef MODULE
+ MOD_DEC_USE_COUNT;
+ #endif
+}
+
+
+void UMSDOS_statfs(struct super_block *sb,struct statfs *buf)
+{
+ msdos_statfs(sb,buf);
+}
+
+
+/*
+ Call msdos_lookup, but set back the original msdos function table.
+ Return 0 if ok, or a negative error code if not.
+*/
+int umsdos_real_lookup (
+ struct inode *dir,
+ const char *name,
+ int len,
+ struct inode **result) /* Will hold inode of the file, if successful */
+{
+ int ret;
+ dir->i_count++;
+ ret = msdos_lookup (dir,name,len,result);
+ return ret;
+}
+/*
+ Complete the setup of an directory inode.
+ First, it completes the function pointers, then
+ it locates the EMD file. If the EMD is there, then plug the
+ umsdos function table. If not, use the msdos one.
+*/
+void umsdos_setup_dir_inode (struct inode *inode)
+{
+ inode->u.umsdos_i.i_emd_dir = 0;
+ {
+ struct inode *emd_dir = umsdos_emd_dir_lookup (inode,0);
+ extern struct inode_operations umsdos_rdir_inode_operations;
+ inode->i_op = emd_dir != NULL
+ ? &umsdos_dir_inode_operations
+ : &umsdos_rdir_inode_operations;
+ iput (emd_dir);
+ }
+}
+/*
+ Add some info into an inode so it can find its owner quickly
+*/
+void umsdos_set_dirinfo(
+ struct inode *inode,
+ struct inode *dir,
+ off_t f_pos)
+{
+ struct inode *emd_owner = umsdos_emd_dir_lookup(dir,1);
+ inode->u.umsdos_i.i_dir_owner = dir->i_ino;
+ inode->u.umsdos_i.i_emd_owner = emd_owner->i_ino;
+ iput (emd_owner);
+ inode->u.umsdos_i.pos = f_pos;
+}
+/*
+ Tells if an Umsdos inode has been "patched" once.
+ Return != 0 if so.
+*/
+int umsdos_isinit (struct inode *inode)
+{
+#if 1
+ return inode->u.umsdos_i.i_emd_owner != 0;
+#elif 0
+ return inode->i_atime != 0;
+#else
+ return inode->i_count > 1;
+#endif
+}
+/*
+ Connect the proper tables in the inode and add some info.
+*/
+void umsdos_patch_inode (
+ struct inode *inode,
+ struct inode *dir, /* May be NULL */
+ off_t f_pos)
+{
+ /*
+ This function is called very early to setup the inode, somewhat
+ too early (called by UMSDOS_read_inode). At this point, we can't
+ do to much, such as lookup up EMD files and so on. This causes
+ confusion in the kernel. This is why some initialisation
+ will be done when dir != NULL only.
+
+ UMSDOS do run piggy back on top of msdos fs. It looks like something
+ is missing in the VFS to accommodate stacked fs. Still unclear what
+ (quite honestly).
+
+ Well, maybe one! A new entry "may_unmount" which would allow
+ the stacked fs to allocate some inode permanently and release
+ them at the end. Doing that now introduce a problem. unmount
+ always fail because some inodes are in use.
+ */
+ if (!umsdos_isinit(inode)){
+ inode->u.umsdos_i.i_emd_dir = 0;
+ if (S_ISREG(inode->i_mode)){
+ static char is_init = 0;
+ if (!is_init){
+ /*
+ I don't want to change the msdos file system code
+ so I get the address of some subroutine dynamically
+ once.
+ */
+ umsdos_file_inode_operations.bmap = inode->i_op->bmap;
+ inode->i_op = &umsdos_file_inode_operations;
+ is_init = 1;
+ }
+ inode->i_op = &umsdos_file_inode_operations;
+ }else if (S_ISDIR(inode->i_mode)){
+ if (dir != NULL){
+ umsdos_setup_dir_inode(inode);
+ }
+ }else if (S_ISLNK(inode->i_mode)){
+ inode->i_op = &umsdos_symlink_inode_operations;
+ }else if (S_ISCHR(inode->i_mode)){
+ inode->i_op = &chrdev_inode_operations;
+ }else if (S_ISBLK(inode->i_mode)){
+ inode->i_op = &blkdev_inode_operations;
+ }else if (S_ISFIFO(inode->i_mode)){
+ init_fifo(inode);
+ }
+ if (dir != NULL){
+ /* #Specification: inode / umsdos info
+ The first time an inode is seen (inode->i_count == 1),
+ the inode number of the EMD file which control this inode
+ is tagged to this inode. It allows operation such
+ as notify_change to be handled.
+ */
+ /*
+ This is done last because it also control the
+ status of umsdos_isinit()
+ */
+ umsdos_set_dirinfo (inode,dir,f_pos);
+ }
+ }else if (dir != NULL){
+ /*
+ Test to see if the info is maintained.
+ This should be removed when the file system will be proven.
+ */
+ struct inode *emd_owner = umsdos_emd_dir_lookup(dir,1);
+ iput (emd_owner);
+ if (emd_owner->i_ino != inode->u.umsdos_i.i_emd_owner){
+ printk ("UMSDOS: *** EMD_OWNER ??? *** ino = %ld %ld <> %ld "
+ ,inode->i_ino,emd_owner->i_ino,inode->u.umsdos_i.i_emd_owner);
+ }
+ }
+}
+/*
+ Get the inode of the directory which owns this inode.
+ Return 0 if ok, -EIO if error.
+*/
+int umsdos_get_dirowner(
+ struct inode *inode,
+ struct inode **result) /* Hold NULL if any error */
+ /* else, the inode of the directory */
+{
+ int ret = -EIO;
+ unsigned long ino = inode->u.umsdos_i.i_dir_owner;
+ *result = NULL;
+ if (ino == 0){
+ printk ("UMSDOS: umsdos_get_dirowner ino == 0\n");
+ }else{
+ struct inode *dir = *result = iget(inode->i_sb,ino);
+ if (dir != NULL){
+ umsdos_patch_inode (dir,NULL,0);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+/*
+ Load an inode from disk.
+*/
+void UMSDOS_read_inode(struct inode *inode)
+{
+ PRINTK (("read inode %x ino = %d ",inode,inode->i_ino));
+ msdos_read_inode(inode);
+ PRINTK (("ino = %d %d\n",inode->i_ino,inode->i_count));
+ if (S_ISDIR(inode->i_mode)
+ && (inode->u.umsdos_i.u.dir_info.creating != 0
+ || inode->u.umsdos_i.u.dir_info.looking != 0
+ || inode->u.umsdos_i.u.dir_info.p != NULL)){
+ Printk (("read inode %d %d %p\n"
+ ,inode->u.umsdos_i.u.dir_info.creating
+ ,inode->u.umsdos_i.u.dir_info.looking
+ ,inode->u.umsdos_i.u.dir_info.p));
+ }
+ /* #Specification: Inode / post initialisation
+ To completely initialise an inode, we need access to the owner
+ directory, so we can locate more info in the EMD file. This is
+ not available the first time the inode is access, we use
+ a value in the inode to tell if it has been finally initialised.
+
+ At first, we have tried testing i_count but it was causing
+ problem. It is possible that two or more process use the
+ newly accessed inode. While the first one block during
+ the initialisation (probably while reading the EMD file), the
+ others believe all is well because i_count > 1. They go banana
+ with a broken inode. See umsdos_lookup_patch and umsdos_patch_inode.
+ */
+ umsdos_patch_inode(inode,NULL,0);
+}
+
+/*
+ Update the disk with the inode content
+*/
+void UMSDOS_write_inode(struct inode *inode)
+{
+ struct iattr newattrs;
+
+ PRINTK (("UMSDOS_write_inode emd %d\n",inode->u.umsdos_i.i_emd_owner));
+ msdos_write_inode(inode);
+ newattrs.ia_mtime = inode->i_mtime;
+ newattrs.ia_atime = inode->i_atime;
+ newattrs.ia_ctime = inode->i_ctime;
+ newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME | ATTR_CTIME;
+ /*
+ UMSDOS_notify_change is convenient to call here
+ to update the EMD entry associated with this inode.
+ But it has the side effect to re"dirt" the inode.
+ */
+ UMSDOS_notify_change (inode, &newattrs);
+ inode->i_dirt = 0;
+}
+
+int UMSDOS_notify_change(struct inode *inode, struct iattr *attr)
+{
+ int ret = 0;
+
+ if ((ret = inode_change_ok(inode, attr)) != 0)
+ return ret;
+
+ if (inode->i_nlink > 0){
+ /* #Specification: notify_change / i_nlink > 0
+ notify change is only done for inode with nlink > 0. An inode
+ with nlink == 0 is no longer associated with any entry in
+ the EMD file, so there is nothing to update.
+ */
+ unsigned long i_emd_owner = inode->u.umsdos_i.i_emd_owner;
+ if (inode == inode->i_sb->s_mounted){
+ /* #Specification: root inode / attributes
+ I don't know yet how this should work. Normally
+ the attributes (permissions bits, owner, times) of
+ a directory are stored in the EMD file of its parent.
+
+ One thing we could do is store the attributes of the root
+ inode in its own EMD file. A simple entry named "." could
+ be used for this special case. It would be read once
+ when the file system is mounted and update in
+ UMSDOS_notify_change() (right here).
+
+ I am not sure of the behavior of the root inode for
+ a real UNIX file system. For now, this is a nop.
+ */
+ }else if (i_emd_owner != 0xffffffff && i_emd_owner != 0){
+ /* This inode is not a EMD file nor an inode used internally
+ by MSDOS, so we can update its status.
+ See emd.c
+ */
+ struct inode *emd_owner = iget (inode->i_sb,i_emd_owner);
+ PRINTK (("notify change %p ",inode));
+ if (emd_owner == NULL){
+ printk ("UMSDOS: emd_owner = NULL ???");
+ ret = -EPERM;
+ }else{
+ struct file filp;
+ struct umsdos_dirent entry;
+ filp.f_pos = inode->u.umsdos_i.pos;
+ filp.f_reada = 0;
+ PRINTK (("pos = %d ",filp.f_pos));
+ /* Read only the start of the entry since we don't touch */
+ /* the name */
+ ret = umsdos_emd_dir_read (emd_owner,&filp,(char*)&entry
+ ,UMSDOS_REC_SIZE);
+ if (ret == 0){
+ if (attr->ia_valid & ATTR_UID)
+ entry.uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ entry.gid = attr->ia_gid;
+ if (attr->ia_valid & ATTR_MODE)
+ entry.mode = attr->ia_mode;
+ if (attr->ia_valid & ATTR_ATIME)
+ entry.atime = attr->ia_atime;
+ if (attr->ia_valid & ATTR_MTIME)
+ entry.mtime = attr->ia_mtime;
+ if (attr->ia_valid & ATTR_CTIME)
+ entry.ctime = attr->ia_ctime;
+
+ entry.nlink = inode->i_nlink;
+ filp.f_pos = inode->u.umsdos_i.pos;
+ ret = umsdos_emd_dir_write (emd_owner,&filp,(char*)&entry
+ ,UMSDOS_REC_SIZE);
+
+ PRINTK (("notify pos %d ret %d nlink %d "
+ ,inode->u.umsdos_i.pos
+ ,ret,entry.nlink));
+ /* #Specification: notify_change / msdos fs
+ notify_change operation are done only on the
+ EMD file. The msdos fs is not even called.
+ */
+ }
+ iput (emd_owner);
+ }
+ PRINTK (("\n"));
+ }
+ }
+ if (ret == 0)
+ inode_setattr(inode, attr);
+ return ret;
+}
+
+/* #Specification: function name / convention
+ A simple convention for function name has been used in
+ the UMSDOS file system. First all function use the prefix
+ umsdos_ to avoid name clash with other part of the kernel.
+
+ And standard VFS entry point use the prefix UMSDOS (upper case)
+ so it's easier to tell them apart.
+*/
+
+static struct super_operations umsdos_sops = {
+ UMSDOS_read_inode,
+ UMSDOS_notify_change,
+ UMSDOS_write_inode,
+ UMSDOS_put_inode,
+ UMSDOS_put_super,
+ NULL, /* added in 0.96c */
+ UMSDOS_statfs,
+ NULL
+};
+
+/*
+ Read the super block of an Extended MS-DOS FS.
+*/
+struct super_block *UMSDOS_read_super(
+ struct super_block *s,
+ void *data,
+ int silent)
+{
+ /* #Specification: mount / options
+ Umsdos run on top of msdos. Currently, it supports no
+ mount option, but happily pass all option received to
+ the msdos driver. I am not sure if all msdos mount option
+ make sense with Umsdos. Here are at least those who
+ are useful.
+ uid=
+ gid=
+
+ These options affect the operation of umsdos in directories
+ which do not have an EMD file. They behave like normal
+ msdos directory, with all limitation of msdos.
+ */
+ struct super_block *sb = msdos_read_super(s,data,silent);
+ printk ("UMSDOS Alpha 0.5a (compatibility level %d.%d, fast msdos)\n"
+ ,UMSDOS_VERSION,UMSDOS_RELEASE);
+ if (sb != NULL){
+ sb->s_op = &umsdos_sops;
+ PRINTK (("umsdos_read_super %p\n",sb->s_mounted));
+ umsdos_setup_dir_inode (sb->s_mounted);
+ PRINTK (("End umsdos_read_super\n"));
+ if (s == super_blocks){
+ /* #Specification: pseudo root / mount
+ When a umsdos fs is mounted, a special handling is done
+ if it is the root partition. We check for the presence
+ of the file /linux/etc/init or /linux/etc/rc.
+ If one is there, we do a chroot("/linux").
+
+ We check both because (see init/main.c) the kernel
+ try to exec init at different place and if it fails
+ it tries /bin/sh /etc/rc. To be consistent with
+ init/main.c, many more test would have to be done
+ to locate init. Any complain ?
+
+ The chroot is done manually in init/main.c but the
+ info (the inode) is located at mount time and store
+ in a global variable (pseudo_root) which is used at
+ different place in the umsdos driver. There is no
+ need to store this variable elsewhere because it
+ will always be one, not one per mount.
+
+ This feature allows the installation
+ of a linux system within a DOS system in a subdirectory.
+
+ A user may install its linux stuff in c:\linux
+ avoiding any clash with existing DOS file and subdirectory.
+ When linux boots, it hides this fact, showing a normal
+ root directory with /etc /bin /tmp ...
+
+ The word "linux" is hardcoded in /usr/include/linux/umsdos_fs.h
+ in the macro UMSDOS_PSDROOT_NAME.
+ */
+
+ struct inode *pseudo;
+ Printk (("Mounting root\n"));
+ if (umsdos_real_lookup (sb->s_mounted,UMSDOS_PSDROOT_NAME
+ ,UMSDOS_PSDROOT_LEN,&pseudo)==0
+ && S_ISDIR(pseudo->i_mode)){
+ struct inode *etc = NULL;
+ struct inode *rc = NULL;
+ Printk (("/%s is there\n",UMSDOS_PSDROOT_NAME));
+ if (umsdos_real_lookup (pseudo,"etc",3,&etc)==0
+ && S_ISDIR(etc->i_mode)){
+ struct inode *init;
+ Printk (("/%s/etc is there\n",UMSDOS_PSDROOT_NAME));
+ if ((umsdos_real_lookup (etc,"init",4,&init)==0
+ && S_ISREG(init->i_mode))
+ || (umsdos_real_lookup (etc,"rc",2,&rc)==0
+ && S_ISREG(rc->i_mode))){
+ umsdos_setup_dir_inode (pseudo);
+ Printk (("Activating pseudo root /%s\n",UMSDOS_PSDROOT_NAME));
+ pseudo_root = pseudo;
+ pseudo->i_count++;
+ pseudo = NULL;
+ }
+ iput (init);
+ iput (rc);
+ }
+ iput (etc);
+ }
+ iput (pseudo);
+ }
+ #ifdef MODULE
+ MOD_INC_USE_COUNT;
+ #endif
+ }
+ return sb;
+}
+
+
+#ifdef MODULE
+
+char kernel_version[] = UTS_RELEASE;
+
+static struct file_system_type umsdos_fs_type = {
+ UMSDOS_read_super, "umsdos", 1, NULL
+};
+
+int init_module(void)
+{
+ register_filesystem(&umsdos_fs_type);
+ return 0;
+}
+
+void cleanup_module(void)
+{
+ if (MOD_IN_USE)
+ printk("Umsdos: file system in use, remove delayed\n");
+ else
+ {
+ unregister_filesystem(&umsdos_fs_type);
+ }
+}
+
+#endif
+
diff --git a/fs/umsdos/ioctl.c b/fs/umsdos/ioctl.c
new file mode 100644
index 000000000..972571796
--- /dev/null
+++ b/fs/umsdos/ioctl.c
@@ -0,0 +1,259 @@
+/*
+ * linux/fs/umsdos/ioctl.c
+ *
+ * Written 1993 by Jacques Gelinas
+ *
+ * Extended MS-DOS ioctl directory handling functions
+ */
+#include <asm/segment.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/umsdos_fs.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+/*
+ Perform special function on a directory
+*/
+int UMSDOS_ioctl_dir (
+ struct inode *dir,
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long data)
+{
+ int ret = -EPERM;
+ /* #Specification: ioctl / acces
+ Only root (effective id) is allowed to do IOCTL on directory
+ in UMSDOS. EPERM is returned for other user.
+ */
+ if (current->euid == 0
+ || cmd == UMSDOS_GETVERSION){
+ struct umsdos_ioctl *idata = (struct umsdos_ioctl *)data;
+ ret = -EINVAL;
+ /* #Specification: ioctl / prototypes
+ The official prototype for the umsdos ioctl on directory
+ is:
+
+ int ioctl (
+ int fd, // File handle of the directory
+ int cmd, // command
+ struct umsdos_ioctl *data)
+
+ The struct and the commands are defined in linux/umsdos_fs.h.
+
+ umsdos_progs/umsdosio.c provide an interface in C++ to all
+ these ioctl. umsdos_progs/udosctl is a small utility showing
+ all this.
+
+ These ioctl generally allow one to work on the EMD or the
+ DOS directory independently. These are essential to implement
+ the synchronise.
+ */
+ PRINTK (("ioctl %d ",cmd));
+ if (cmd == UMSDOS_GETVERSION){
+ /* #Specification: ioctl / UMSDOS_GETVERSION
+ The field version and release of the structure
+ umsdos_ioctl are filled with the version and release
+ number of the fs code in the kernel. This will allow
+ some form of checking. Users won't be able to run
+ incompatible utility such as the synchroniser (umssync).
+ umsdos_progs/umsdosio.c enforce this checking.
+
+ Return always 0.
+ */
+ put_fs_byte (UMSDOS_VERSION,&idata->version);
+ put_fs_byte (UMSDOS_RELEASE,&idata->release);
+ ret = 0;
+ }else if (cmd == UMSDOS_READDIR_DOS){
+ /* #Specification: ioctl / UMSDOS_READDIR_DOS
+ One entry is read from the DOS directory at the current
+ file position. The entry is put as is in the dos_dirent
+ field of struct umsdos_ioctl.
+
+ Return > 0 if success.
+ */
+ ret = msdos_readdir(dir,filp,&idata->dos_dirent,1);
+ }else if (cmd == UMSDOS_READDIR_EMD){
+ /* #Specification: ioctl / UMSDOS_READDIR_EMD
+ One entry is read from the EMD at the current
+ file position. The entry is put as is in the umsdos_dirent
+ field of struct umsdos_ioctl. The corresponding mangled
+ DOS entry name is put in the dos_dirent field.
+
+ All entries are read including hidden links. Blank
+ entries are skipped.
+
+ Return > 0 if success.
+ */
+ struct inode *emd_dir = umsdos_emd_dir_lookup (dir,0);
+ if (emd_dir != NULL){
+ while (1){
+ if (filp->f_pos >= emd_dir->i_size){
+ ret = 0;
+ break;
+ }else{
+ struct umsdos_dirent entry;
+ off_t f_pos = filp->f_pos;
+ ret = umsdos_emd_dir_readentry (emd_dir,filp,&entry);
+ if (ret < 0){
+ break;
+ }else if (entry.name_len > 0){
+ struct umsdos_info info;
+ ret = entry.name_len;
+ umsdos_parse (entry.name,entry.name_len,&info);
+ info.f_pos = f_pos;
+ umsdos_manglename(&info);
+ memcpy_tofs(&idata->umsdos_dirent,&entry
+ ,sizeof(entry));
+ memcpy_tofs(&idata->dos_dirent.d_name
+ ,info.fake.fname,info.fake.len+1);
+ break;
+ }
+ }
+ }
+ iput (emd_dir);
+ }else{
+ /* The absence of the EMD is simply seen as an EOF */
+ ret = 0;
+ }
+ }else if (cmd == UMSDOS_INIT_EMD){
+ /* #Specification: ioctl / UMSDOS_INIT_EMD
+ The UMSDOS_INIT_EMD command make sure the EMD
+ exist for a directory. If it does not, it is
+ created. Also, it makes sure the directory functions
+ table (struct inode_operations) is set to the UMSDOS
+ semantic. This mean that umssync may be applied to
+ an "opened" msdos directory, and it will change behavior
+ on the fly.
+
+ Return 0 if success.
+ */
+ extern struct inode_operations umsdos_rdir_inode_operations;
+ struct inode *emd_dir = umsdos_emd_dir_lookup (dir,1);
+ ret = emd_dir != NULL;
+ iput (emd_dir);
+
+ dir->i_op = ret
+ ? &umsdos_dir_inode_operations
+ : &umsdos_rdir_inode_operations;
+ }else{
+ struct umsdos_ioctl data;
+ memcpy_fromfs (&data,idata,sizeof(data));
+ if (cmd == UMSDOS_CREAT_EMD){
+ /* #Specification: ioctl / UMSDOS_CREAT_EMD
+ The umsdos_dirent field of the struct umsdos_ioctl is used
+ as is to create a new entry in the EMD of the directory.
+ The DOS directory is not modified.
+ No validation is done (yet).
+
+ Return 0 if success.
+ */
+ struct umsdos_info info;
+ /* This makes sure info.entry and info in general is correctly */
+ /* initialised */
+ memcpy (&info.entry,&data.umsdos_dirent
+ ,sizeof(data.umsdos_dirent));
+ umsdos_parse (data.umsdos_dirent.name
+ ,data.umsdos_dirent.name_len,&info);
+ ret = umsdos_newentry (dir,&info);
+ }else if (cmd == UMSDOS_UNLINK_EMD){
+ /* #Specification: ioctl / UMSDOS_UNLINK_EMD
+ The umsdos_dirent field of the struct umsdos_ioctl is used
+ as is to remove an entry from the EMD of the directory.
+ No validation is done (yet). The mode field is used
+ to validate S_ISDIR or S_ISREG.
+
+ Return 0 if success.
+ */
+ struct umsdos_info info;
+ /* This makes sure info.entry and info in general is correctly */
+ /* initialised */
+ memcpy (&info.entry,&data.umsdos_dirent
+ ,sizeof(data.umsdos_dirent));
+ umsdos_parse (data.umsdos_dirent.name
+ ,data.umsdos_dirent.name_len,&info);
+ ret = umsdos_delentry (dir,&info
+ ,S_ISDIR(data.umsdos_dirent.mode));
+ }else if (cmd == UMSDOS_UNLINK_DOS){
+ /* #Specification: ioctl / UMSDOS_UNLINK_DOS
+ The dos_dirent field of the struct umsdos_ioctl is used to
+ execute a msdos_unlink operation. The d_name and d_reclen
+ fields are used.
+
+ Return 0 if success.
+ */
+ dir->i_count++;
+ ret = msdos_unlink (dir,data.dos_dirent.d_name
+ ,data.dos_dirent.d_reclen);
+ }else if (cmd == UMSDOS_RMDIR_DOS){
+ /* #Specification: ioctl / UMSDOS_RMDIR_DOS
+ The dos_dirent field of the struct umsdos_ioctl is used to
+ execute a msdos_unlink operation. The d_name and d_reclen
+ fields are used.
+
+ Return 0 if success.
+ */
+ dir->i_count++;
+ ret = msdos_rmdir (dir,data.dos_dirent.d_name
+ ,data.dos_dirent.d_reclen);
+ }else if (cmd == UMSDOS_STAT_DOS){
+ /* #Specification: ioctl / UMSDOS_STAT_DOS
+ The dos_dirent field of the struct umsdos_ioctl is
+ used to execute a stat operation in the DOS directory.
+ The d_name and d_reclen fields are used.
+
+ The following field of umsdos_ioctl.stat are filled.
+
+ st_ino,st_mode,st_size,st_atime,st_mtime,st_ctime,
+ Return 0 if success.
+ */
+ struct inode *inode;
+ ret = umsdos_real_lookup (dir,data.dos_dirent.d_name
+ ,data.dos_dirent.d_reclen,&inode);
+ if (ret == 0){
+ data.stat.st_ino = inode->i_ino;
+ data.stat.st_mode = inode->i_mode;
+ data.stat.st_size = inode->i_size;
+ data.stat.st_atime = inode->i_atime;
+ data.stat.st_ctime = inode->i_ctime;
+ data.stat.st_mtime = inode->i_mtime;
+ memcpy_tofs (&idata->stat,&data.stat,sizeof(data.stat));
+ iput (inode);
+ }
+ }else if (cmd == UMSDOS_DOS_SETUP){
+ /* #Specification: ioctl / UMSDOS_DOS_SETUP
+ The UMSDOS_DOS_SETUP ioctl allow changing the
+ default permission of the MsDOS file system driver
+ on the fly. The MsDOS driver apply global permission
+ to every file and directory. Normally these permissions
+ are controlled by a mount option. This is not
+ available for root partition, so a special utility
+ (umssetup) is provided to do this, normally in
+ /etc/rc.local.
+
+ Be aware that this apply ONLY to MsDOS directory
+ (those without EMD --linux-.---). Umsdos directory
+ have independent (standard) permission for each
+ and every file.
+
+ The field umsdos_dirent provide the information needed.
+ umsdos_dirent.uid and gid sets the owner and group.
+ umsdos_dirent.mode set the permissions flags.
+ */
+ dir->i_sb->u.msdos_sb.fs_uid = data.umsdos_dirent.uid;
+ dir->i_sb->u.msdos_sb.fs_gid = data.umsdos_dirent.gid;
+ dir->i_sb->u.msdos_sb.fs_umask = data.umsdos_dirent.mode;
+ ret = 0;
+ }
+ }
+ }
+ PRINTK (("ioctl return %d\n",ret));
+ return ret;
+}
+
+
+
diff --git a/fs/umsdos/mangle.c b/fs/umsdos/mangle.c
new file mode 100644
index 000000000..1f59447e9
--- /dev/null
+++ b/fs/umsdos/mangle.c
@@ -0,0 +1,478 @@
+/*
+ * linux/fs/umsdos/mangle.c
+ *
+ * Written 1993 by Jacques Gelinas
+ *
+ * Control the mangling of file name to fit msdos name space.
+ * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID)
+*/
+#include <linux/errno.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/umsdos_fs.h>
+
+/*
+ Complete the mangling of the MSDOS fake name
+ based on the position of the entry in the EMD file.
+
+ Simply complete the job of umsdos_parse; fill the extension.
+
+ Beware that info->f_pos must be set.
+*/
+void umsdos_manglename (struct umsdos_info *info)
+{
+ if (info->msdos_reject){
+ /* #Specification: file name / non MSDOS conforming / mangling
+ Each non MSDOS conforming file has a special extension
+ build from the entry position in the EMD file.
+
+ This number is then transform in a base 32 number, where
+ each digit is expressed like hexadecimal number, using
+ digit and letter, except it uses 22 letters from 'a' to 'v'.
+ The number 32 comes from 2**5. It is faster to split a binary
+ number using a base which is a power of two. And I was 32
+ when I started this project. Pick your answer :-) .
+
+ If the result is '0', it is replace with '_', simply
+ to make it odd.
+
+ This is true for the first two character of the extension.
+ The last one is taken from a list of odd character, which
+ are:
+
+ { } ( ) ! ` ^ & @
+
+ With this scheme, we can produce 9216 ( 9* 32 * 32)
+ different extensions which should not clash with any useful
+ extension already popular or meaningful. Since most directory
+ have much less than 32 * 32 files in it, the first character
+ of the extension of any mangle name will be {.
+
+ Here are the reason to do this (this kind of mangling).
+
+ -The mangling is deterministic. Just by the extension, we
+ are able to locate the entry in the EMD file.
+
+ -By keeping to beginning of the file name almost unchanged,
+ we are helping the MSDOS user.
+
+ -The mangling produces names not too ugly, so an msdos user
+ may live with it (remember it, type it, etc...).
+
+ -The mangling produces names ugly enough so no one will
+ ever think of using such a name in real life. This is not
+ fool proof. I don't think there is a total solution to this.
+ */
+ union {
+ int entry_num;
+ struct {
+ unsigned num1:5,num2:5,num3:5;
+ }num;
+ } u;
+ char *pt = info->fake.fname + info->fake.len;
+ /* lookup for encoding the last character of the extension */
+ /* It contain valid character after the ugly one to make sure */
+ /* even if someone overflow the 32 * 32 * 9 limit, it still do */
+ /* something */
+ #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
+ static char lookup3[]={
+ SPECIAL_MANGLING,
+ /* This is the start of lookup12 */
+ '_','1','2','3','4','5','6','7','8','9',
+ 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
+ 'p','q','r','s','t','u','v'
+ };
+ #define lookup12 (lookup3+9)
+ u.entry_num = info->f_pos / UMSDOS_REC_SIZE;
+ if (u.entry_num > (9* 32 * 32)){
+ printk ("UMSDOS: More than 9216 file in a directory.\n"
+ "This may break the mangling strategy.\n"
+ "Not a killer problem. See doc.\n");
+ }
+ *pt++ = '.';
+ *pt++ = lookup3 [u.num.num3];
+ *pt++ = lookup12[u.num.num2];
+ *pt++ = lookup12[u.num.num1];
+ *pt = '\0'; /* help doing printk */
+ info->fake.len += 4;
+ info->msdos_reject = 0; /* Avoid mangling twice */
+ }
+}
+
+/*
+ Evaluate the record size needed to store of name of len character.
+ The value returned is a multiple of UMSDOS_REC_SIZE.
+*/
+int umsdos_evalrecsize (int len)
+{
+ struct umsdos_dirent dirent;
+ int nbrec = 1+((len-1+(dirent.name-(char*)&dirent))
+ / UMSDOS_REC_SIZE);
+ return nbrec * UMSDOS_REC_SIZE;
+ /*
+ GLU This should be inlined or something to speed it up to the max.
+ GLU nbrec is absolutely not needed to return the value.
+ */
+}
+#ifdef TEST
+int umsdos_evalrecsize_old (int len)
+{
+ struct umsdos_dirent dirent;
+ int size = len + (dirent.name-(char*)&dirent);
+ int nbrec = size / UMSDOS_REC_SIZE;
+ int extra = size % UMSDOS_REC_SIZE;
+ if (extra > 0) nbrec++;
+ return nbrec * UMSDOS_REC_SIZE;
+}
+#endif
+/*
+ Fill the struct info with the full and msdos name of a file
+ Return 0 if all is ok, a negative error code otherwise.
+*/
+int umsdos_parse (
+ const char *fname,
+ int len,
+ struct umsdos_info *info)
+{
+ int ret = -ENAMETOOLONG;
+ /* #Specification: file name / too long
+ If a file name exceed UMSDOS maxima, the file name is silently
+ truncated. This makes it conformant with the other file system
+ of Linux (minix and ext2 at least).
+ */
+ if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME;
+ {
+ const char *firstpt=NULL; /* First place we saw a . in fname */
+ /* #Specification: file name / non MSDOS conforming / base length 0
+ file name beginning with a period '.' are invalid for MsDOS.
+ It needs absolutely a base name. So the file name is mangled
+ */
+ int ivldchar = fname[0] == '.';/* At least one invalid character */
+ int msdos_len = len;
+ int base_len;
+ /*
+ cardinal_per_size tells if there exist at least one
+ DOS pseudo devices on length n. See the test below.
+ */
+ static const char cardinal_per_size[9]={
+ 0, 0, 0, 1, 1, 0, 1, 0, 1
+ };
+ /*
+ lkp translate all character to acceptable character (for DOS).
+ When lkp[n] == n, it means also it is an acceptable one.
+ So it serve both as a flag and as a translator.
+ */
+ static char lkp[256];
+ static char is_init=0;
+ if (!is_init){
+ /*
+ Initialisation of the array is easier and less error prone
+ like this.
+ */
+ int i;
+ static char *spc = "\"*+,/:;<=>?[\\]|~";
+ is_init = 1;
+ for (i=0; i<=32; i++) lkp[i] = '#';
+ for (i=33; i<'A'; i++) lkp[i] = (char)i;
+ for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A'));
+ for (i='Z'+1; i<127; i++) lkp[i] = (char)i;
+ for (i=128; i<256; i++) lkp[i] = '#';
+
+ lkp['.'] = '_';
+ while (*spc != '\0') lkp[(unsigned char)(*spc++)] = '#';
+ }
+ /* GLU
+ file name which are longer than 8+'.'+3 are invalid for MsDOS.
+ So the file name is to be mangled no more test needed.
+ This Speed Up for long and very long name.
+ The position of the last point is no more necessary anyway.
+ */
+ if (len<=(8+1+3)){
+ const char *pt = fname;
+ const char *endpt = fname + len;
+ while (pt < endpt){
+ if (*pt == '.'){
+ if (firstpt != NULL){
+ /* 2 . in a file name. Reject */
+ ivldchar = 1;
+ break;
+ }else{
+ int extlen = (int)(endpt - pt);
+ firstpt = pt;
+ if (firstpt - fname > 8){
+ /* base name longer than 8: reject */
+ ivldchar = 1;
+ break;
+ }else if (extlen > 4){
+ /* Extension longer than 4 (including .): reject */
+ ivldchar = 1;
+ break;
+ }else if (extlen == 1){
+ /* #Specification: file name / non MSDOS conforming / last char == .
+ If the last character of a file name is
+ a period, mangling is applied. MsDOS do
+ not support those file name.
+ */
+ ivldchar = 1;
+ break;
+ }else if (extlen == 4){
+ /* #Specification: file name / non MSDOS conforming / mangling clash
+ To avoid clash with the umsdos mangling, any file
+ with a special character as the first character
+ of the extension will be mangled. This solve the
+ following problem:
+
+ touch FILE
+ # FILE is invalid for DOS, so mangling is applied
+ # file.{_1 is created in the DOS directory
+ touch file.{_1
+ # To UMSDOS file point to a single DOS entry.
+ # So file.{_1 has to be mangled.
+ */
+ static char special[]={
+ SPECIAL_MANGLING,'\0'
+ };
+ if (strchr(special,firstpt[1])!= NULL){
+ ivldchar = 1;
+ break;
+ }
+ }
+ }
+ }else if (lkp[(unsigned char)(*pt)] != *pt){
+ ivldchar = 1;
+ break;
+ }
+ pt++;
+ }
+ }else{
+ ivldchar = 1;
+ }
+ if (ivldchar
+ || (firstpt == NULL && len > 8)
+ || (len == UMSDOS_EMD_NAMELEN
+ && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){
+ /* #Specification: file name / --linux-.---
+ The name of the EMD file --linux-.--- is map to a mangled
+ name. So UMSDOS does not restrict its use.
+ */
+ /* #Specification: file name / non MSDOS conforming / mangling
+ Non MSDOS conforming file name must use some alias to fit
+ in the MSDOS name space.
+
+ The strategy is simple. The name is simply truncated to
+ 8 char. points are replace with underscore and a
+ number is given as an extension. This number correspond
+ to the entry number in the EMD file. The EMD file
+ only need to carry the real name.
+
+ Upper case is also convert to lower case.
+ Control character are converted to #.
+ Space are converted to #.
+ The following character are also converted to #.
+ " * + , / : ; < = > ? [ \ ] | ~
+
+ Sometime, the problem is not in MsDOS itself but in
+ command.com.
+ */
+ int i;
+ char *pt = info->fake.fname;
+ base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len;
+ /*
+ There is no '.' any more so we know for a fact that
+ the base length is the length.
+ */
+ memcpy (info->fake.fname,fname,msdos_len);
+ for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsigned char)(*pt)];
+ *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */
+ info->msdos_reject = 1;
+ /*
+ The numeric extension is added only when we know
+ the position in the EMD file, in umsdos_newentry(),
+ umsdos_delentry(), and umsdos_findentry().
+ See umsdos_manglename().
+ */
+ }else{
+ /* Conforming MSDOS file name */
+ strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */
+ info->msdos_reject = 0;
+ base_len = firstpt != NULL ? (int)(firstpt - fname) : len;
+ }
+ if (cardinal_per_size[base_len]){
+ /* #Specification: file name / MSDOS devices / mangling
+ To avoid unreachable file from MsDOS, any MsDOS conforming
+ file with a basename equal to one of the MsDOS pseudo
+ devices will be mangled.
+
+ If a file such as "prn" was created, it would be unreachable
+ under MsDOS because prn is assumed to be the printer, even
+ if the file does have an extension.
+
+ Since the extension is unimportant to MsDOS, we must patch
+ the basename also. We simply insert a minus '-'. To avoid
+ conflict with valid file with a minus in front (such as
+ "-prn"), we add an mangled extension like any other
+ mangled file name.
+
+ Here is the list of DOS pseudo devices:
+
+ "prn","con","aux","nul",
+ "lpt1","lpt2","lpt3","lpt4",
+ "com1","com2","com3","com4",
+ "clock$"
+
+ and some standard ones for common DOS programs
+
+ "emmxxxx0","xmsxxxx0","setverxx"
+
+ (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK>
+ for pointing these to me).
+
+ Is there one missing ?
+ */
+ /* This table must be ordered by length */
+ static const char *tbdev[]={
+ "prn","con","aux","nul",
+ "lpt1","lpt2","lpt3","lpt4",
+ "com1","com2","com3","com4",
+ "clock$",
+ "emmxxxx0","xmsxxxx0","setverxx"
+ };
+ /* Tell where to find in tbdev[], the first name of */
+ /* a certain length */
+ static const char start_ind_dev[9]={
+ 0, 0, 0, 4, 12, 12, 13, 13, 16
+ };
+ char basen[9];
+ int i;
+ for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){
+ if (memcmp(info->fake.fname,tbdev[i],base_len)==0){
+ memcpy (basen,info->fake.fname,base_len);
+ basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */
+ /*
+ GLU On ne fait cela que si necessaire, on essaye d'etre le
+ GLU simple dans le cas general (le plus frequent).
+ */
+ info->fake.fname[0] = '-';
+ strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */
+ msdos_len = (base_len==8) ? 8 : base_len + 1;
+ info->msdos_reject = 1;
+ break;
+ }
+ }
+ }
+ info->fake.fname[msdos_len] = '\0'; /* Help doing printk */
+ /* GLU Ce zero devrais deja y etre ! (invariant ?) */
+ info->fake.len = msdos_len;
+ /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/
+ memcpy (info->entry.name,fname,len);
+ info->entry.name_len = len;
+ ret = 0;
+ }
+ /*
+ Evaluate how many record are needed to store this entry.
+ */
+ info->recsize = umsdos_evalrecsize (len);
+ return ret;
+}
+
+#ifdef TEST
+
+struct MANG_TEST{
+ char *fname; /* Name to validate */
+ int msdos_reject; /* Expected msdos_reject flag */
+ char *msname; /* Expected msdos name */
+};
+
+struct MANG_TEST tb[]={
+ "hello", 0, "hello",
+ "hello.1", 0, "hello.1",
+ "hello.1_", 0, "hello.1_",
+ "prm", 0, "prm",
+
+#ifdef PROPOSITION
+ "HELLO", 1, "hello",
+ "Hello.1", 1, "hello.1",
+ "Hello.c", 1, "hello.c",
+#elseif
+/*
+ Je trouve les trois exemples ci-dessous tres "malheureux".
+ Je propose de mettre en minuscule dans un passe preliminaire,
+ et de tester apres si il y a d'autres caracters "mechants".
+ Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement
+ modifiable que ca. Mais c'est pour le principe.
+ Evidemment cela augmente les chances de "Collision",
+ par exemple: entre "HELLO" et "Hello", mais ces problemes
+ peuvent etre traiter ailleur avec les autres collisions.
+*/
+ "HELLO", 1, "hello",
+ "Hello.1", 1, "hello_1",
+ "Hello.c", 1, "hello_c",
+#endif
+
+ "hello.{_1", 1, "hello_{_",
+ "hello\t", 1, "hello#",
+ "hello.1.1", 1, "hello_1_",
+ "hel,lo", 1, "hel#lo",
+ "Salut.Tu.vas.bien?", 1, "salut_tu",
+ ".profile", 1, "_profile",
+ ".xv", 1, "_xv",
+ "toto.", 1, "toto_",
+ "clock$.x", 1, "-clock$",
+ "emmxxxx0", 1, "-emmxxxx",
+ "emmxxxx0.abcd", 1, "-emmxxxx",
+ "aux", 1, "-aux",
+ "prn", 1, "-prn",
+ "prn.abc", 1, "-prn",
+ "PRN", 1, "-prn",
+/*
+GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version
+GLU du mangle par rapport au mangle originale.
+GLU CAUSE: La maniere de calculer la variable baselen.
+GLU Pour toi c'est toujours 3
+GLU Pour moi c'est respectivement 7, 8 et 8
+*/
+ "PRN.abc", 1, "prn_abc",
+ "Prn.abcd", 1, "prn_abcd",
+ "prn.abcd", 1, "prn_abcd",
+ "Prn.abcdefghij", 1, "prn_abcd"
+};
+
+int main (int argc, char *argv[])
+{
+ int i,rold,rnew;
+ printf ("Testing the umsdos_parse.\n");
+ for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){
+ struct MANG_TEST *pttb = tb+i;
+ struct umsdos_info info;
+ int ok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info);
+ if (strcmp(info.fake.fname,pttb->msname)!=0){
+ printf ("**** %s -> ",pttb->fname);
+ printf ("%s <> %s\n",info.fake.fname,pttb->msname);
+ }else if (info.msdos_reject != pttb->msdos_reject){
+ printf ("**** %s -> %s ",pttb->fname,pttb->msname);
+ printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject);
+ }else{
+ printf (" %s -> %s %d\n",pttb->fname,pttb->msname
+ ,pttb->msdos_reject);
+ }
+ }
+ printf ("Testing the new umsdos_evalrecsize.");
+ for (i=0; i<UMSDOS_MAXNAME ; i++){
+ rnew=umsdos_evalrecsize (i);
+ rold=umsdos_evalrecsize_old (i);
+ if (!(i%UMSDOS_REC_SIZE)){
+ printf ("\n%d:\t",i);
+ }
+ if (rnew!=rold){
+ printf ("**** %d newres: %d != %d \n", i, rnew, rold);
+ }else{
+ printf(".");
+ }
+ }
+ printf ("\nEnd of Testing.\n");
+
+ return 0;
+}
+
+#endif
diff --git a/fs/umsdos/namei.c b/fs/umsdos/namei.c
new file mode 100644
index 000000000..567039e14
--- /dev/null
+++ b/fs/umsdos/namei.c
@@ -0,0 +1,1043 @@
+/*
+ * linux/fs/umsdos/namei.c
+ *
+ * Written 1993 by Jacques Gelinas
+ * Inspired from linux/fs/msdos/... by Werner Almesberger
+ *
+ * Maintain and access the --linux alternate directory file.
+*/
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/msdos_fs.h>
+#include <linux/umsdos_fs.h>
+#include <linux/malloc.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+#if 1
+/*
+ Wait for creation exclusivity.
+ Return 0 if the dir was already available.
+ Return 1 if a wait was necessary.
+ When 1 is return, it means a wait was done. It does not
+ mean the directory is available.
+*/
+static int umsdos_waitcreate(struct inode *dir)
+{
+ int ret = 0;
+ if (dir->u.umsdos_i.u.dir_info.creating
+ && dir->u.umsdos_i.u.dir_info.pid != current->pid){
+ sleep_on(&dir->u.umsdos_i.u.dir_info.p);
+ ret = 1;
+ }
+ return ret;
+}
+/*
+ Wait for any lookup process to finish
+*/
+static void umsdos_waitlookup (struct inode *dir)
+{
+ while (dir->u.umsdos_i.u.dir_info.looking){
+ sleep_on(&dir->u.umsdos_i.u.dir_info.p);
+ }
+}
+/*
+ Lock all other process out of this directory.
+*/
+void umsdos_lockcreate (struct inode *dir)
+{
+ /* #Specification: file creation / not atomic
+ File creation is a two step process. First we create (allocate)
+ an entry in the EMD file and then (using the entry offset) we
+ build a unique name for MSDOS. We create this name in the msdos
+ space.
+
+ We have to use semaphore (sleep_on/wake_up) to prevent lookup
+ into a directory when we create a file or directory and to
+ prevent creation while a lookup is going on. Since many lookup
+ may happen at the same time, the semaphore is a counter.
+
+ Only one creation is allowed at the same time. This protection
+ may not be necessary. The problem arise mainly when a lookup
+ or a readdir is done while a file is partially created. The
+ lookup process see that as a "normal" problem and silently
+ erase the file from the EMD file. Normal because a file
+ may be erased during a MSDOS session, but not removed from
+ the EMD file.
+
+ The locking is done on a directory per directory basis. Each
+ directory inode has its wait_queue.
+
+ For some operation like hard link, things even get worse. Many
+ creation must occur at once (atomic). To simplify the design
+ a process is allowed to recursively lock the directory for
+ creation. The pid of the locking process is kept along with
+ a counter so a second level of locking is granted or not.
+ */
+ /*
+ Wait for any creation process to finish except
+ if we (the process) own the lock
+ */
+ while (umsdos_waitcreate(dir)!=0);
+ dir->u.umsdos_i.u.dir_info.creating++;
+ dir->u.umsdos_i.u.dir_info.pid = current->pid;
+ umsdos_waitlookup (dir);
+}
+/*
+ Lock all other process out of those two directories.
+*/
+static void umsdos_lockcreate2 (struct inode *dir1, struct inode *dir2)
+{
+ /*
+ We must check that both directory are available before
+ locking anyone of them. This is to avoid some deadlock.
+ Thanks to dglaude@is1.vub.ac.be (GLAUDE DAVID) for pointing
+ this to me.
+ */
+ while (1){
+ if (umsdos_waitcreate(dir1)==0
+ && umsdos_waitcreate(dir2)==0){
+ /* We own both now */
+ dir1->u.umsdos_i.u.dir_info.creating++;
+ dir1->u.umsdos_i.u.dir_info.pid = current->pid;
+ dir2->u.umsdos_i.u.dir_info.creating++;
+ dir2->u.umsdos_i.u.dir_info.pid = current->pid;
+ break;
+ }
+ }
+ umsdos_waitlookup(dir1);
+ umsdos_waitlookup(dir2);
+}
+/*
+ Wait until creation is finish in this directory.
+*/
+void umsdos_startlookup (struct inode *dir)
+{
+ while (umsdos_waitcreate (dir) != 0);
+ dir->u.umsdos_i.u.dir_info.looking++;
+}
+void check_page_tables(void);
+
+/*
+ Unlock the directory.
+*/
+void umsdos_unlockcreate (struct inode *dir)
+{
+ dir->u.umsdos_i.u.dir_info.creating--;
+ if (dir->u.umsdos_i.u.dir_info.creating < 0){
+ printk ("UMSDOS: dir->u.umsdos_i.u.dir_info.creating < 0: %d"
+ ,dir->u.umsdos_i.u.dir_info.creating);
+ }
+ wake_up (&dir->u.umsdos_i.u.dir_info.p);
+}
+/*
+ Tell directory lookup is over.
+*/
+void umsdos_endlookup (struct inode *dir)
+{
+ dir->u.umsdos_i.u.dir_info.looking--;
+ if (dir->u.umsdos_i.u.dir_info.looking < 0){
+ printk ("UMSDOS: dir->u.umsdos_i.u.dir_info.looking < 0: %d"
+ ,dir->u.umsdos_i.u.dir_info.looking);
+ }
+ wake_up (&dir->u.umsdos_i.u.dir_info.p);
+}
+#else
+static void umsdos_lockcreate (struct inode *dir){}
+static void umsdos_lockcreate2 (struct inode *dir1, struct inode *dir2){}
+void umsdos_startlookup (struct inode *dir){}
+static void umsdos_unlockcreate (struct inode *dir){}
+void umsdos_endlookup (struct inode *dir){}
+#endif
+static int umsdos_nevercreat(
+ struct inode *dir,
+ const char *name, /* Name of the file to add */
+ int len,
+ int errcod) /* Length of the name */
+{
+ int ret = 0;
+ if (umsdos_is_pseudodos(dir,name,len)){
+ /* #Specification: pseudo root / any file creation /DOS
+ The pseudo sub-directory /DOS can't be created!
+ EEXIST is returned.
+
+ The pseudo sub-directory /DOS can't be removed!
+ EPERM is returned.
+ */
+ ret = -EPERM;
+ ret = errcod;
+ }else if (name[0] == '.'
+ && (len == 1 || (len == 2 && name[1] == '.'))){
+ /* #Specification: create / . and ..
+ If one try to creates . or .., it always fail and return
+ EEXIST.
+
+ If one try to delete . or .., it always fail and return
+ EPERM.
+
+ This should be test at the VFS layer level to avoid
+ duplicating this in all file systems. Any comments ?
+ */
+ ret = errcod;
+ }
+ return ret;
+}
+
+/*
+ Add a new file (ordinary or special) into the alternate directory.
+ The file is added to the real MSDOS directory. If successful, it
+ is then added to the EDM file.
+
+ Return the status of the operation. 0 mean success.
+*/
+static int umsdos_create_any (
+ struct inode *dir,
+ const char *name, /* Name of the file to add */
+ int len, /* Length of the name */
+ int mode, /* Permission bit + file type ??? */
+ int rdev, /* major, minor or 0 for ordinary file */
+ /* and symlinks */
+ char flags,
+ struct inode **result) /* Will hold the inode of the newly created */
+ /* file */
+{
+ int ret = umsdos_nevercreat(dir,name,len,-EEXIST);
+ if (ret == 0){
+ struct umsdos_info info;
+ ret = umsdos_parse (name,len,&info);
+ *result = NULL;
+ if (ret == 0){
+ info.entry.mode = mode;
+ info.entry.rdev = rdev;
+ info.entry.flags = flags;
+ info.entry.uid = current->fsuid;
+ info.entry.gid = (dir->i_mode & S_ISGID)
+ ? dir->i_gid : current->fsgid;
+ info.entry.ctime = info.entry.atime = info.entry.mtime
+ = CURRENT_TIME;
+ info.entry.nlink = 1;
+ umsdos_lockcreate(dir);
+ ret = umsdos_newentry (dir,&info);
+ if (ret == 0){
+ dir->i_count++;
+ ret = msdos_create (dir,info.fake.fname,info.fake.len
+ ,S_IFREG|0777,result);
+ if (ret == 0){
+ struct inode *inode = *result;
+ umsdos_lookup_patch (dir,inode,&info.entry,info.f_pos);
+ PRINTK (("inode %p[%d] ",inode,inode->i_count));
+ PRINTK (("Creation OK: [%d] %s %d pos %d\n",dir->i_ino
+ ,info.fake.fname,current->pid,info.f_pos));
+ }else{
+ /* #Specification: create / file exist in DOS
+ Here is a situation. Trying to create a file with
+ UMSDOS. The file is unknown to UMSDOS but already
+ exist in the DOS directory.
+
+ Here is what we are NOT doing:
+
+ We could silently assume that everything is fine
+ and allows the creation to succeed.
+
+ It is possible not all files in the partition
+ are mean to be visible from linux. By trying to create
+ those file in some directory, one user may get access
+ to those file without proper permissions. Looks like
+ a security hole to me. Off course sharing a file system
+ with DOS is some kind of security hole :-)
+
+ So ?
+
+ We return EEXIST in this case.
+ The same is true for directory creation.
+ */
+ if (ret == -EEXIST){
+ printk ("UMSDOS: out of sync, Creation error [%ld], "
+ "deleting %s %d %d pos %ld\n",dir->i_ino
+ ,info.fake.fname,-ret,current->pid,info.f_pos);
+ }
+ umsdos_delentry (dir,&info,0);
+ }
+ PRINTK (("umsdos_create %s ret = %d pos %d\n"
+ ,info.fake.fname,ret,info.f_pos));
+ }
+ umsdos_unlockcreate(dir);
+ }
+ }
+ iput (dir);
+ return ret;
+}
+/*
+ Initialise the new_entry from the old for a rename operation.
+ (Only useful for umsdos_rename_f() below).
+*/
+static void umsdos_ren_init(
+ struct umsdos_info *new_info,
+ struct umsdos_info *old_info,
+ int flags) /* 0 == copy flags from old_name */
+ /* != 0, this is the value of flags */
+{
+ new_info->entry.mode = old_info->entry.mode;
+ new_info->entry.rdev = old_info->entry.rdev;
+ new_info->entry.uid = old_info->entry.uid;
+ new_info->entry.gid = old_info->entry.gid;
+ new_info->entry.ctime = old_info->entry.ctime;
+ new_info->entry.atime = old_info->entry.atime;
+ new_info->entry.mtime = old_info->entry.mtime;
+ new_info->entry.flags = flags ? flags : old_info->entry.flags;
+ new_info->entry.nlink = old_info->entry.nlink;
+}
+
+#define chkstk() \
+ if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page){\
+ printk(KERN_ALERT "UMSDOS: %s magic %x != %lx ligne %d\n" \
+ , current->comm,STACK_MAGIC \
+ ,*(unsigned long *)current->kernel_stack_page \
+ ,__LINE__); \
+ }
+
+/*
+ Rename a file (move) in the file system.
+*/
+static int umsdos_rename_f(
+ struct inode * old_dir,
+ const char * old_name,
+ int old_len,
+ struct inode * new_dir,
+ const char * new_name,
+ int new_len,
+ int flags) /* 0 == copy flags from old_name */
+ /* != 0, this is the value of flags */
+{
+ int ret = EPERM;
+ struct umsdos_info old_info;
+ int old_ret = umsdos_parse (old_name,old_len,&old_info);
+ struct umsdos_info new_info;
+ int new_ret = umsdos_parse (new_name,new_len,&new_info);
+chkstk();
+ PRINTK (("umsdos_rename %d %d ",old_ret,new_ret));
+ if (old_ret == 0 && new_ret == 0){
+ umsdos_lockcreate2(old_dir,new_dir);
+chkstk();
+ PRINTK (("old findentry "));
+ ret = umsdos_findentry(old_dir,&old_info,0);
+chkstk();
+ PRINTK (("ret %d ",ret));
+ if (ret == 0){
+ PRINTK (("new newentry "));
+ umsdos_ren_init(&new_info,&old_info,flags);
+ ret = umsdos_newentry (new_dir,&new_info);
+chkstk();
+ PRINTK (("ret %d %d ",ret,new_info.fake.len));
+ if (ret == 0){
+ PRINTK (("msdos_rename "));
+ old_dir->i_count++;
+ new_dir->i_count++; /* Both inode are needed later */
+ ret = msdos_rename (old_dir
+ ,old_info.fake.fname,old_info.fake.len
+ ,new_dir
+ ,new_info.fake.fname,new_info.fake.len);
+chkstk();
+ PRINTK (("after m_rename ret %d ",ret));
+ if (ret != 0){
+ umsdos_delentry (new_dir,&new_info
+ ,S_ISDIR(new_info.entry.mode));
+chkstk();
+ }else{
+ ret = umsdos_delentry (old_dir,&old_info
+ ,S_ISDIR(old_info.entry.mode));
+chkstk();
+ if (ret == 0){
+ /*
+ This UMSDOS_lookup does not look very useful.
+ It makes sure that the inode of the file will
+ be correctly setup (umsdos_patch_inode()) in
+ case it is already in use.
+
+ Not very efficient ...
+ */
+ struct inode *inode;
+ new_dir->i_count++;
+ PRINTK (("rename lookup len %d %d -- ",new_len,new_info.entry.flags));
+ ret = UMSDOS_lookup (new_dir,new_name,new_len
+ ,&inode);
+chkstk();
+ if (ret != 0){
+ printk ("UMSDOS: partial rename for file %s\n"
+ ,new_info.entry.name);
+ }else{
+ /*
+ Update f_pos so notify_change will succeed
+ if the file was already in use.
+ */
+ umsdos_set_dirinfo (inode,new_dir,new_info.f_pos);
+chkstk();
+ iput (inode);
+ }
+ }
+ }
+ }
+ }
+ umsdos_unlockcreate(old_dir);
+ umsdos_unlockcreate(new_dir);
+ }
+ iput (old_dir);
+ iput (new_dir);
+ PRINTK (("\n"));
+ return ret;
+}
+/*
+ Setup un Symbolic link or a (pseudo) hard link
+ Return a negative error code or 0 if ok.
+*/
+static int umsdos_symlink_x(
+ struct inode * dir,
+ const char * name,
+ int len,
+ const char * symname, /* name will point to this path */
+ int mode,
+ char flags)
+{
+ /* #Specification: symbolic links / strategy
+ A symbolic link is simply a file which hold a path. It is
+ implemented as a normal MSDOS file (not very space efficient :-()
+
+ I see 2 different way to do it. One is to place the link data
+ in unused entry of the EMD file. The other is to have a separate
+ file dedicated to hold all symbolic links data.
+
+ Lets go for simplicity...
+ */
+ struct inode *inode;
+ int ret;
+ dir->i_count++; /* We keep the inode in case we need it */
+ /* later */
+ ret = umsdos_create_any (dir,name,len,mode,0,flags,&inode);
+ PRINTK (("umsdos_symlink ret %d ",ret));
+ if (ret == 0){
+ int len = strlen(symname);
+ struct file filp;
+ filp.f_pos = 0;
+ /* Make the inode acceptable to MSDOS */
+ ret = umsdos_file_write_kmem (inode,&filp,(char*)symname,len);
+ iput (inode);
+ if (ret >= 0){
+ if (ret != len){
+ ret = -EIO;
+ printk ("UMSDOS: "
+ "Can't write symbolic link data\n");
+ }else{
+ ret = 0;
+ }
+ }
+ if (ret != 0){
+ UMSDOS_unlink (dir,name,len);
+ dir = NULL;
+ }
+ }
+ iput (dir);
+ PRINTK (("\n"));
+ return ret;
+}
+/*
+ Setup un Symbolic link.
+ Return a negative error code or 0 if ok.
+*/
+int UMSDOS_symlink(
+ struct inode * dir,
+ const char * name,
+ int len,
+ const char * symname) /* name will point to this path */
+{
+ return umsdos_symlink_x (dir,name,len,symname,S_IFLNK|0777,0);
+}
+/*
+ Add a link to an inode in a directory
+*/
+int UMSDOS_link (
+ struct inode * oldinode,
+ struct inode * dir,
+ const char * name,
+ int len)
+{
+ /* #Specification: hard link / strategy
+ Well ... hard link are difficult to implement on top of an
+ MsDOS fat file system. Unlike UNIX file systems, there are no
+ inode. A directory entry hold the functionality of the inode
+ and the entry.
+
+ We will used the same strategy as a normal Unix file system
+ (with inode) except we will do it symbolically (using paths).
+
+ Because anything can happen during a DOS session (defragment,
+ directory sorting, etc...), we can't rely on MsDOS pseudo
+ inode number to record the link. For this reason, the link
+ will be done using hidden symbolic links. The following
+ scenario illustrate how it work.
+
+ Given a file /foo/file
+
+ ln /foo/file /tmp/file2
+
+ become internally
+
+ mv /foo/file /foo/-LINK1
+ ln -s /foo/-LINK1 /foo/file
+ ln -s /foo/-LINK1 /tmp/file2
+
+ Using this strategy, we can operate on /foo/file or /foo/file2.
+ We can remove one and keep the other, like a normal Unix hard link.
+ We can rename /foo/file or /tmp/file2 independently.
+
+ The entry -LINK1 will be hidden. It will hold a link count.
+ When all link are erased, the hidden file is erased too.
+ */
+ /* #Specification: weakness / hard link
+ The strategy for hard link introduces a side effect that
+ may or may not be acceptable. Here is the sequence
+
+ mkdir subdir1
+ touch subdir1/file
+ mkdir subdir2
+ ln subdir1/file subdir2/file
+ rm subdir1/file
+ rmdir subdir1
+ rmdir: subdir1: Directory not empty
+
+ This happen because there is an invisible file (--link) in
+ subdir1 which is referenced by subdir2/file.
+
+ Any idea ?
+ */
+ /* #Specification: weakness / hard link / rename directory
+ Another weakness of hard link come from the fact that
+ it is based on hidden symbolic links. Here is an example.
+
+ mkdir /subdir1
+ touch /subdir1/file
+ mkdir /subdir2
+ ln /subdir1/file subdir2/file
+ mv /subdir1 subdir3
+ ls -l /subdir2/file
+
+ Since /subdir2/file is a hidden symbolic link
+ to /subdir1/..hlinkNNN, accessing it will fail since
+ /subdir1 does not exist anymore (has been renamed).
+ */
+ int ret = 0;
+ if (S_ISDIR(oldinode->i_mode)){
+ /* #Specification: hard link / directory
+ A hard link can't be made on a directory. EPERM is returned
+ in this case.
+ */
+ ret = -EPERM;
+ }else if ((ret = umsdos_nevercreat(dir,name,len,-EPERM))==0){
+ struct inode *olddir;
+ ret = umsdos_get_dirowner(oldinode,&olddir);
+ PRINTK (("umsdos_link dir_owner = %d -> %p [%d] "
+ ,oldinode->u.umsdos_i.i_dir_owner,olddir,olddir->i_count));
+ if (ret == 0){
+ struct umsdos_dirent entry;
+ umsdos_lockcreate2(dir,olddir);
+ ret = umsdos_inode2entry (olddir,oldinode,&entry);
+ if (ret == 0){
+ PRINTK (("umsdos_link :%s: ino %d flags %d "
+ ,entry.name
+ ,oldinode->i_ino,entry.flags));
+ if (!(entry.flags & UMSDOS_HIDDEN)){
+ /* #Specification: hard link / first hard link
+ The first time a hard link is done on a file, this
+ file must be renamed and hidden. Then an internal
+ symbolic link must be done on the hidden file.
+
+ The second link is done after on this hidden file.
+
+ It is expected that the Linux MSDOS file system
+ keeps the same pseudo inode when a rename operation
+ is done on a file in the same directory.
+ */
+ struct umsdos_info info;
+ ret = umsdos_newhidden (olddir,&info);
+ if (ret == 0){
+ olddir->i_count+=2;
+ PRINTK (("olddir[%d] ",olddir->i_count));
+ ret = umsdos_rename_f (olddir,entry.name
+ ,entry.name_len
+ ,olddir,info.entry.name,info.entry.name_len
+ ,UMSDOS_HIDDEN);
+ if (ret == 0){
+ char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL);
+ if (path == NULL){
+ ret = -ENOMEM;
+ }else{
+ PRINTK (("olddir[%d] ",olddir->i_count));
+ ret = umsdos_locate_path (oldinode,path);
+ PRINTK (("olddir[%d] ",olddir->i_count));
+ if (ret == 0){
+ olddir->i_count++;
+ ret = umsdos_symlink_x (olddir
+ ,entry.name
+ ,entry.name_len,path
+ ,S_IFREG|0777,UMSDOS_HLINK);
+ if (ret == 0){
+ dir->i_count++;
+ ret = umsdos_symlink_x (dir,name,len
+ ,path
+ ,S_IFREG|0777,UMSDOS_HLINK);
+ }
+ }
+ kfree (path);
+ }
+ }
+ }
+ }else{
+ char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL);
+ if (path == NULL){
+ ret = -ENOMEM;
+ }else{
+ ret = umsdos_locate_path (oldinode,path);
+ if (ret == 0){
+ dir->i_count++;
+ ret = umsdos_symlink_x (dir,name,len,path
+ ,S_IFREG|0777,UMSDOS_HLINK);
+ }
+ kfree (path);
+ }
+ }
+ }
+ umsdos_unlockcreate(olddir);
+ umsdos_unlockcreate(dir);
+ }
+ iput (olddir);
+ }
+ if (ret == 0){
+ struct iattr newattrs;
+ oldinode->i_nlink++;
+ newattrs.ia_valid = 0;
+ ret = UMSDOS_notify_change(oldinode, &newattrs);
+ }
+ iput (oldinode);
+ iput (dir);
+ PRINTK (("umsdos_link %d\n",ret));
+ return ret;
+}
+/*
+ Add a new file into the alternate directory.
+ The file is added to the real MSDOS directory. If successful, it
+ is then added to the EDM file.
+
+ Return the status of the operation. 0 mean success.
+*/
+int UMSDOS_create (
+ struct inode *dir,
+ const char *name, /* Name of the file to add */
+ int len, /* Length of the name */
+ int mode, /* Permission bit + file type ??? */
+ struct inode **result) /* Will hold the inode of the newly created */
+ /* file */
+{
+ return umsdos_create_any (dir,name,len,mode,0,0,result);
+}
+/*
+ Add a sub-directory in a directory
+*/
+int UMSDOS_mkdir(
+ struct inode * dir,
+ const char * name,
+ int len,
+ int mode)
+{
+ int ret = umsdos_nevercreat(dir,name,len,-EEXIST);
+ if (ret == 0){
+ struct umsdos_info info;
+ ret = umsdos_parse (name,len,&info);
+ PRINTK (("umsdos_mkdir %d\n",ret));
+ if (ret == 0){
+ info.entry.mode = mode | S_IFDIR;
+ info.entry.rdev = 0;
+ info.entry.uid = current->fsuid;
+ info.entry.gid = (dir->i_mode & S_ISGID)
+ ? dir->i_gid : current->fsgid;
+ info.entry.ctime = info.entry.atime = info.entry.mtime
+ = CURRENT_TIME;
+ info.entry.flags = 0;
+ umsdos_lockcreate(dir);
+ info.entry.nlink = 1;
+ ret = umsdos_newentry (dir,&info);
+ PRINTK (("newentry %d ",ret));
+ if (ret == 0){
+ dir->i_count++;
+ ret = msdos_mkdir (dir,info.fake.fname,info.fake.len,mode);
+ if (ret != 0){
+ umsdos_delentry (dir,&info,1);
+ /* #Specification: mkdir / Directory already exist in DOS
+ We do the same thing as for file creation.
+ For all user it is an error.
+ */
+ }else{
+ /* #Specification: mkdir / umsdos directory / create EMD
+ When we created a new sub-directory in a UMSDOS
+ directory (one with full UMSDOS semantic), we
+ create immediately an EMD file in the new
+ sub-directory so it inherit UMSDOS semantic.
+ */
+ struct inode *subdir;
+ ret = umsdos_real_lookup (dir,info.fake.fname
+ ,info.fake.len,&subdir);
+ if (ret == 0){
+ struct inode *result;
+ ret = msdos_create (subdir,UMSDOS_EMD_FILE
+ ,UMSDOS_EMD_NAMELEN,S_IFREG|0777,&result);
+ subdir = NULL;
+ iput (result);
+ }
+ if (ret < 0){
+ printk ("UMSDOS: Can't create empty --linux-.---\n");
+ }
+ iput (subdir);
+ }
+ }
+ umsdos_unlockcreate(dir);
+ }
+ }
+ PRINTK (("umsdos_mkdir %d\n",ret));
+ iput (dir);
+ return ret;
+}
+/*
+ Add a new device special file into a directory.
+*/
+int UMSDOS_mknod(
+ struct inode * dir,
+ const char * name,
+ int len,
+ int mode,
+ int rdev)
+{
+ /* #Specification: Special files / strategy
+ Device special file, pipes, etc ... are created like normal
+ file in the msdos file system. Of course they remain empty.
+
+ One strategy was to create those files only in the EMD file
+ since they were not important for MSDOS. The problem with
+ that, is that there were not getting inode number allocated.
+ The MSDOS filesystems is playing a nice game to fake inode
+ number, so why not use it.
+
+ The absence of inode number compatible with those allocated
+ for ordinary files was causing major trouble with hard link
+ in particular and other parts of the kernel I guess.
+ */
+ struct inode *inode;
+ int ret = umsdos_create_any (dir,name,len,mode,rdev,0,&inode);
+ iput (inode);
+ return ret;
+}
+
+/*
+ Remove a sub-directory.
+*/
+int UMSDOS_rmdir(
+ struct inode * dir,
+ const char * name,
+ int len)
+{
+ /* #Specification: style / iput strategy
+ In the UMSDOS project, I am trying to apply a single
+ programming style regarding inode management. Many
+ entry point are receiving an inode to act on, and must
+ do an iput() as soon as they are finished with
+ the inode.
+
+ For simple case, there is no problem. When you introduce
+ error checking, you end up with many iput placed around the
+ code.
+
+ The coding style I use all around is one where I am trying
+ to provide independent flow logic (I don't know how to
+ name this). With this style, code is easier to understand
+ but you rapidly get iput() all around. Here is an exemple
+ of what I am trying to avoid.
+
+ if (a){
+ ...
+ if(b){
+ ...
+ }
+ ...
+ if (c){
+ // Complex state. Was b true ?
+ ...
+ }
+ ...
+ }
+ // Weird state
+ if (d){
+ // ...
+ }
+ // Was iput finally done ?
+ return status;
+
+ Here is the style I am using. Still sometime I do the
+ first when things are very simple (or very complicated :-( )
+
+ if (a){
+ if (b){
+ ...
+ }else if (c){
+ // A single state gets here
+ }
+ }else if (d){
+ ...
+ }
+ return status;
+
+ Again, while this help clarifying the code, I often get a lot
+ of iput(), unlike the first style, where I can place few
+ "strategic" iput(). "strategic" also mean, more difficult
+ to place.
+
+ So here is the style I will be using from now on in this project.
+ There is always an iput() at the end of a function (which has
+ to do an iput()). One iput by inode. There is also one iput()
+ at the places where a successful operation is achieved. This
+ iput() is often done by a sub-function (often from the msdos
+ file system). So I get one too many iput() ? At the place
+ where an iput() is done, the inode is simply nulled, disabling
+ the last one.
+
+ if (a){
+ if (b){
+ ...
+ }else if (c){
+ msdos_rmdir(dir,...);
+ dir = NULL;
+ }
+ }else if (d){
+ ...
+ }
+ iput (dir);
+ return status;
+
+ Note that the umsdos_lockcreate() and umsdos_unlockcreate() function
+ pair goes against this practice of "forgetting" the inode as soon
+ as possible.
+ */
+ int ret = umsdos_nevercreat(dir,name,len,-EPERM);
+ if (ret == 0){
+ struct inode *sdir;
+ dir->i_count++;
+ ret = UMSDOS_lookup (dir,name,len,&sdir);
+ PRINTK (("rmdir lookup %d ",ret));
+ if (ret == 0){
+ int empty;
+ umsdos_lockcreate(dir);
+ if (sdir->i_count > 1){
+ ret = -EBUSY;
+ }else if ((empty = umsdos_isempty (sdir)) != 0){
+ PRINTK (("isempty %d i_count %d ",empty,sdir->i_count));
+ if (empty == 1){
+ /* We have to removed the EMD file */
+ ret = msdos_unlink(sdir,UMSDOS_EMD_FILE
+ ,UMSDOS_EMD_NAMELEN);
+ sdir = NULL;
+ }
+ /* sdir must be free before msdos_rmdir() */
+ iput (sdir);
+ sdir = NULL;
+ PRINTK (("isempty ret %d nlink %d ",ret,dir->i_nlink));
+ if (ret == 0){
+ struct umsdos_info info;
+ dir->i_count++;
+ umsdos_parse (name,len,&info);
+ /* The findentry is there only to complete */
+ /* the mangling */
+ umsdos_findentry (dir,&info,2);
+ ret = msdos_rmdir (dir,info.fake.fname
+ ,info.fake.len);
+ if (ret == 0){
+ ret = umsdos_delentry (dir,&info,1);
+ }
+ }
+ }else{
+ /*
+ The subdirectory is not empty, so leave it there
+ */
+ ret = -ENOTEMPTY;
+ }
+ iput(sdir);
+ umsdos_unlockcreate(dir);
+ }
+ }
+ iput (dir);
+ PRINTK (("umsdos_rmdir %d\n",ret));
+ return ret;
+}
+/*
+ Remove a file from the directory.
+*/
+int UMSDOS_unlink (
+ struct inode * dir,
+ const char * name,
+ int len)
+{
+ struct umsdos_info info;
+ int ret = umsdos_nevercreat(dir,name,len,-EPERM);
+ if (ret == 0){
+ ret = umsdos_parse (name,len,&info);
+ if (ret == 0){
+ umsdos_lockcreate(dir);
+ ret = umsdos_findentry(dir,&info,1);
+ if (ret == 0){
+ PRINTK (("UMSDOS_unlink %s ",info.fake.fname));
+ if (info.entry.flags & UMSDOS_HLINK){
+ /* #Specification: hard link / deleting a link
+ When we deletes a file, and this file is a link
+ we must subtract 1 to the nlink field of the
+ hidden link.
+
+ If the count goes to 0, we delete this hidden
+ link too.
+ */
+ /*
+ First, get the inode of the hidden link
+ using the standard lookup function.
+ */
+ struct inode *inode;
+ dir->i_count++;
+ ret = UMSDOS_lookup (dir,name,len,&inode);
+ if (ret == 0){
+ PRINTK (("unlink nlink = %d ",inode->i_nlink));
+ inode->i_nlink--;
+ if (inode->i_nlink == 0){
+ struct inode *hdir = iget(inode->i_sb
+ ,inode->u.umsdos_i.i_dir_owner);
+ struct umsdos_dirent entry;
+ ret = umsdos_inode2entry (hdir,inode,&entry);
+ if (ret == 0){
+ ret = UMSDOS_unlink (hdir,entry.name
+ ,entry.name_len);
+ }else{
+ iput (hdir);
+ }
+ }else{
+ struct iattr newattrs;
+ newattrs.ia_valid = 0;
+ ret = UMSDOS_notify_change (inode, &newattrs);
+ }
+ iput (inode);
+ }
+ }
+ if (ret == 0){
+ ret = umsdos_delentry (dir,&info,0);
+ if (ret == 0){
+ PRINTK (("Avant msdos_unlink %s ",info.fake.fname));
+ dir->i_count++;
+ ret = msdos_unlink_umsdos (dir,info.fake.fname
+ ,info.fake.len);
+ PRINTK (("msdos_unlink %s %o ret %d ",info.fake.fname
+ ,info.entry.mode,ret));
+ }
+ }
+ }
+ umsdos_unlockcreate(dir);
+ }
+ }
+ iput (dir);
+ PRINTK (("umsdos_unlink %d\n",ret));
+ return ret;
+}
+
+/*
+ Rename a file (move) in the file system.
+*/
+int UMSDOS_rename(
+ struct inode * old_dir,
+ const char * old_name,
+ int old_len,
+ struct inode * new_dir,
+ const char * new_name,
+ int new_len)
+{
+ /* #Specification: weakness / rename
+ There is a case where UMSDOS rename has a different behavior
+ than normal UNIX file system. Renaming an open file across
+ directory boundary does not work. Renaming an open file within
+ a directory does work however.
+
+ The problem (not sure) is in the linux VFS msdos driver.
+ I believe this is not a bug but a design feature, because
+ an inode number represent some sort of directory address
+ in the MSDOS directory structure. So moving the file into
+ another directory does not preserve the inode number.
+ */
+ int ret = umsdos_nevercreat(new_dir,new_name,new_len,-EEXIST);
+ if (ret == 0){
+ /* umsdos_rename_f eat the inode and we may need those later */
+ old_dir->i_count++;
+ new_dir->i_count++;
+ ret = umsdos_rename_f (old_dir,old_name,old_len,new_dir,new_name
+ ,new_len,0);
+ if (ret == -EEXIST){
+ /* #Specification: rename / new name exist
+ If the destination name already exist, it will
+ silently be removed. EXT2 does it this way
+ and this is the spec of SUNOS. So does UMSDOS.
+
+ If the destination is an empty directory it will
+ also be removed.
+ */
+ /* #Specification: rename / new name exist / possible flaw
+ The code to handle the deletion of the target (file
+ and directory) use to be in umsdos_rename_f, surrounded
+ by proper directory locking. This was insuring that only
+ one process could achieve a rename (modification) operation
+ in the source and destination directory. This was also
+ insuring the operation was "atomic".
+
+ This has been changed because this was creating a kernel
+ stack overflow (stack is only 4k in the kernel). To avoid
+ the code doing the deletion of the target (if exist) has
+ been moved to a upper layer. umsdos_rename_f is tried
+ once and if it fails with EEXIST, the target is removed
+ and umsdos_rename_f is done again.
+
+ This makes the code cleaner and (not sure) solve a
+ deadlock problem one tester was experiencing.
+
+ The point is to mention that possibly, the semantic of
+ "rename" may be wrong. Anyone dare to check that :-)
+ Be aware that IF it is wrong, to produce the problem you
+ will need two process trying to rename a file to the
+ same target at the same time. Again, I am not sure it
+ is a problem at all.
+ */
+ /* This is not super efficient but should work */
+ new_dir->i_count++;
+ ret = UMSDOS_unlink (new_dir,new_name,new_len);
+chkstk();
+ PRINTK (("rename unlink ret %d %d -- ",ret,new_len));
+ if (ret == -EISDIR){
+ new_dir->i_count++;
+ ret = UMSDOS_rmdir (new_dir,new_name,new_len);
+chkstk();
+ PRINTK (("rename rmdir ret %d -- ",ret));
+ }
+ if (ret == 0){
+ ret = umsdos_rename_f (old_dir,old_name,old_len
+ ,new_dir,new_name,new_len,0);
+ new_dir = old_dir = NULL;
+ }
+ }
+ }
+ iput (new_dir);
+ iput (old_dir);
+ return ret;
+}
+
diff --git a/fs/umsdos/notes b/fs/umsdos/notes
new file mode 100644
index 000000000..3c47d1f4f
--- /dev/null
+++ b/fs/umsdos/notes
@@ -0,0 +1,17 @@
+This file contain idea and things I don't want to forget
+
+Possible bug in fs/read_write.c
+Function sys_readdir()
+
+ There is a call the verify_area that does not take in account
+ the count parameter. I guess it should read
+
+ error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent));
+
+ instead of
+
+ error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent));
+
+ Of course, now , count is always 1
+
+
diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c
new file mode 100644
index 000000000..d7272ed96
--- /dev/null
+++ b/fs/umsdos/rdir.c
@@ -0,0 +1,239 @@
+/*
+ * linux/fs/umsdos/rdir.c
+ *
+ * Written 1994 by Jacques Gelinas
+ *
+ * Extended MS-DOS directory pure MS-DOS handling functions
+ * (For directory without EMD file).
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/limits.h>
+#include <linux/umsdos_fs.h>
+#include <linux/malloc.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+
+extern struct inode *pseudo_root;
+
+static int UMSDOS_rreaddir (
+ struct inode *dir,
+ struct file *filp,
+ struct dirent *dirent,
+ int count)
+{
+ int ret = 0;
+ while (1){
+ ret = msdos_readdir(dir,filp,dirent,count);
+ if (ret == 5
+ && pseudo_root != NULL
+ && dir->i_sb->s_mounted == pseudo_root->i_sb->s_mounted){
+ /*
+ In pseudo root mode, we must eliminate logically
+ the directory linux from the real root.
+ */
+ char name[5];
+ memcpy_fromfs (name,dirent->d_name,5);
+ if (memcmp(name,UMSDOS_PSDROOT_NAME,UMSDOS_PSDROOT_LEN)!=0) break;
+ }else{
+ if (pseudo_root != NULL
+ && ret == 2
+ && dir == dir->i_sb->s_mounted
+ && dir == pseudo_root->i_sb->s_mounted){
+ char name[2];
+ memcpy_fromfs (name,dirent->d_name,2);
+ if (name[0] == '.' && name[1] == '.'){
+ put_fs_long (pseudo_root->i_ino,&dirent->d_ino);
+ }
+ }
+ break;
+ }
+ }
+ return ret;
+}
+
+int UMSDOS_rlookup(
+ struct inode *dir,
+ const char *name,
+ int len,
+ struct inode **result) /* Will hold inode of the file, if successful */
+{
+ int ret;
+ if (pseudo_root != NULL
+ && len == 2
+ && name[0] == '.'
+ && name[1] == '.'
+ && dir == dir->i_sb->s_mounted
+ && dir == pseudo_root->i_sb->s_mounted){
+ *result = pseudo_root;
+ pseudo_root->i_count++;
+ ret = 0;
+ /* #Specification: pseudo root / DOS/..
+ In the real root directory (c:\), the directory ..
+ is the pseudo root (c:\linux).
+ */
+ }else{
+ ret = umsdos_real_lookup (dir,name,len,result);
+ if (ret == 0){
+ struct inode *inode = *result;
+ if (inode == pseudo_root){
+ /* #Specification: pseudo root / DOS/linux
+ Even in the real root directory (c:\), the directory
+ /linux won't show
+ */
+ ret = -ENOENT;
+ iput (pseudo_root);
+ *result = NULL;
+ }else if (S_ISDIR(inode->i_mode)){
+ /* We must place the proper function table */
+ /* depending if this is a MsDOS directory or an UMSDOS directory */
+ umsdos_setup_dir_inode(inode);
+ }
+ }
+ }
+ iput (dir);
+ return ret;
+}
+
+static int UMSDOS_rrmdir (
+ struct inode *dir,
+ const char *name,
+ int len)
+{
+ /* #Specification: dual mode / rmdir in a DOS directory
+ In a DOS (not EMD in it) directory, we use a reverse strategy
+ compared with an Umsdos directory. We assume that a subdirectory
+ of a DOS directory is also a DOS directory. This is not always
+ true (umssync may be used anywhere), but make sense.
+
+ So we call msdos_rmdir() directly. If it failed with a -ENOTEMPTY
+ then we check if it is a Umsdos directory. We check if it is
+ really empty (only . .. and --linux-.--- in it). If it is true
+ we remove the EMD and do a msdos_rmdir() again.
+
+ In a Umsdos directory, we assume all subdirectory are also
+ Umsdos directory, so we check the EMD file first.
+ */
+ int ret;
+ if (umsdos_is_pseudodos(dir,name,len)){
+ /* #Specification: pseudo root / rmdir /DOS
+ The pseudo sub-directory /DOS can't be removed!
+ This is done even if the pseudo root is not a Umsdos
+ directory anymore (very unlikely), but an accident (under
+ MsDOS) is always possible.
+
+ EPERM is returned.
+ */
+ ret = -EPERM;
+ }else{
+ umsdos_lockcreate (dir);
+ dir->i_count++;
+ ret = msdos_rmdir (dir,name,len);
+ if (ret == -ENOTEMPTY){
+ struct inode *sdir;
+ dir->i_count++;
+ ret = UMSDOS_rlookup (dir,name,len,&sdir);
+ PRINTK (("rrmdir lookup %d ",ret));
+ if (ret == 0){
+ int empty;
+ if ((empty = umsdos_isempty (sdir)) != 0){
+ PRINTK (("isempty %d i_count %d ",empty,sdir->i_count));
+ if (empty == 2){
+ /*
+ Not a Umsdos directory, so the previous msdos_rmdir
+ was not lying :-)
+ */
+ ret = -ENOTEMPTY;
+ }else if (empty == 1){
+ /* We have to removed the EMD file */
+ ret = msdos_unlink(sdir,UMSDOS_EMD_FILE
+ ,UMSDOS_EMD_NAMELEN);
+ sdir = NULL;
+ if (ret == 0){
+ dir->i_count++;
+ ret = msdos_rmdir (dir,name,len);
+ }
+ }
+ }else{
+ ret = -ENOTEMPTY;
+ }
+ iput (sdir);
+ }
+ }
+ umsdos_unlockcreate (dir);
+ }
+ iput (dir);
+ return ret;
+}
+
+/* #Specification: dual mode / introduction
+ One goal of UMSDOS is to allow a practical and simple coexistence
+ between MsDOS and Linux in a single partition. Using the EMD file
+ in each directory, UMSDOS add Unix semantics and capabilities to
+ normal DOS file system. To help and simplify coexistence, here is
+ the logic related to the EMD file.
+
+ If it is missing, then the directory is managed by the MsDOS driver.
+ The names are limited to DOS limits (8.3). No links, no device special
+ and pipe and so on.
+
+ If it is there, it is the directory. If it is there but empty, then
+ the directory looks empty. The utility umssync allows synchronisation
+ of the real DOS directory and the EMD.
+
+ Whenever umssync is applied to a directory without EMD, one is
+ created on the fly. The directory is promoted to full unix semantic.
+ Of course, the ls command will show exactly the same content as before
+ the umssync session.
+
+ It is believed that the user/admin will promote directories to unix
+ semantic as needed.
+
+ The strategy to implement this is to use two function table (struct
+ inode_operations). One for true UMSDOS directory and one for directory
+ with missing EMD.
+
+ Functions related to the DOS semantic (but aware of UMSDOS) generally
+ have a "r" prefix (r for real) such as UMSDOS_rlookup, to differentiate
+ from the one with full UMSDOS semantic.
+*/
+static struct file_operations umsdos_rdir_operations = {
+ NULL, /* lseek - default */
+ UMSDOS_dir_read, /* read */
+ NULL, /* write - bad */
+ UMSDOS_rreaddir, /* readdir */
+ NULL, /* select - default */
+ UMSDOS_ioctl_dir, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* fsync */
+};
+
+struct inode_operations umsdos_rdir_inode_operations = {
+ &umsdos_rdir_operations, /* default directory file-ops */
+ msdos_create, /* create */
+ UMSDOS_rlookup, /* lookup */
+ NULL, /* link */
+ msdos_unlink, /* unlink */
+ NULL, /* symlink */
+ msdos_mkdir, /* mkdir */
+ UMSDOS_rrmdir, /* rmdir */
+ NULL, /* mknod */
+ msdos_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+
diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c
new file mode 100644
index 000000000..1b1e561c2
--- /dev/null
+++ b/fs/umsdos/symlink.c
@@ -0,0 +1,145 @@
+/*
+ * linux/fs/umsdos/file.c
+ *
+ * Written 1992 by Jacques Gelinas
+ * inspired from linux/fs/msdos/file.c Werner Almesberger
+ *
+ * Extended MS-DOS regular file handling primitives
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/umsdos_fs.h>
+#include <linux/malloc.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+/*
+ Read the data associate with the symlink.
+ Return length read in buffer or a negative error code.
+*/
+static int umsdos_readlink_x (
+ struct inode *inode,
+ char *buffer,
+ int (*msdos_read)(struct inode *, struct file *, char *, int),
+ int bufsiz)
+{
+ int ret = inode->i_size;
+ struct file filp;
+ filp.f_pos = 0;
+ filp.f_reada = 0;
+ if (ret > bufsiz) ret = bufsiz;
+ if ((*msdos_read) (inode, &filp, buffer,ret) != ret){
+ ret = -EIO;
+ }
+ return ret;
+}
+/*
+ Follow a symbolic link chain by calling open_namei recursively
+ until an inode is found.
+
+ Return 0 if ok, or a negative error code if not.
+*/
+static int UMSDOS_follow_link(
+ struct inode * dir,
+ struct inode * inode,
+ int flag,
+ int mode,
+ struct inode ** res_inode)
+{
+ int ret = -ELOOP;
+ *res_inode = NULL;
+ if (current->link_count < 5) {
+ char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL);
+ if (path == NULL){
+ ret = -ENOMEM;
+ }else{
+ if (!dir) {
+ dir = current->fs[1].root;
+ dir->i_count++;
+ }
+ if (!inode){
+ PRINTK (("symlink: inode = NULL\n"));
+ ret = -ENOENT;
+ }else if (!S_ISLNK(inode->i_mode)){
+ PRINTK (("symlink: Not ISLNK\n"));
+ *res_inode = inode;
+ inode = NULL;
+ ret = 0;
+ }else{
+ ret = umsdos_readlink_x (inode,path
+ ,umsdos_file_read_kmem,PATH_MAX-1);
+ if (ret > 0){
+ path[ret] = '\0';
+ PRINTK (("follow :%s: %d ",path,ret));
+ iput(inode);
+ inode = NULL;
+ current->link_count++;
+ ret = open_namei(path,flag,mode,res_inode,dir);
+ current->link_count--;
+ dir = NULL;
+ }else{
+ ret = -EIO;
+ }
+ }
+ kfree (path);
+ }
+ }
+ iput(inode);
+ iput(dir);
+ PRINTK (("follow_link ret %d\n",ret));
+ return ret;
+}
+
+static int UMSDOS_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ int ret = -EINVAL;
+ if (S_ISLNK(inode->i_mode)) {
+ ret = umsdos_readlink_x (inode,buffer,msdos_file_read,buflen);
+ }
+ PRINTK (("readlink %d %x bufsiz %d\n",ret,inode->i_mode,buflen));
+ iput(inode);
+ return ret;
+
+}
+
+static struct file_operations umsdos_symlink_operations = {
+ NULL, /* lseek - default */
+ NULL, /* read */
+ NULL, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ NULL /* fsync */
+};
+
+struct inode_operations umsdos_symlink_inode_operations = {
+ &umsdos_symlink_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ UMSDOS_readlink, /* readlink */
+ UMSDOS_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+
+
diff --git a/fs/xiafs/Makefile b/fs/xiafs/Makefile
new file mode 100644
index 000000000..097563244
--- /dev/null
+++ b/fs/xiafs/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the XIAFS filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.s.o:
+ $(AS) -o $*.o $<
+
+OBJS= bitmap.o truncate.o namei.o inode.o \
+ file.o dir.o symlink.o fsync.o
+
+xiafs.o: $(OBJS)
+ $(LD) -r -o xiafs.o $(OBJS)
+
+dep:
+ $(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/fs/xiafs/bitmap.c b/fs/xiafs/bitmap.c
new file mode 100644
index 000000000..4dee5cfbb
--- /dev/null
+++ b/fs/xiafs/bitmap.c
@@ -0,0 +1,388 @@
+/*
+ * linux/fs/xiafs/bitmap.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/bitmap.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+/* bitmap.c contains the code that handles the inode and block bitmaps */
+
+#include <linux/sched.h>
+#include <linux/locks.h>
+#include <linux/xia_fs.h>
+#include <linux/stat.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/bitops.h>
+
+#include "xiafs_mac.h"
+
+
+char internal_error_message[]="XIA-FS: internal error %s %d\n";
+
+static int find_first_zero(struct buffer_head *bh, int start_bit, int end_bit)
+{
+ /* This routine searches first 0 bit from (start_bit) to (end_bit-1).
+ * If found the bit is set to 1 and the bit # is returned, otherwise,
+ * -1 is returned. Race condition is avoid by using "btsl" and
+ * "goto repeat". ---Frank.
+ */
+
+ int end, i, j, tmp;
+ u_long *bmap;
+
+ bmap=(u_long *)bh->b_data;
+ end = end_bit >> 5;
+
+repeat:
+ i=start_bit >> 5;
+ if ( (tmp=(~bmap[i]) & (0xffffffff << (start_bit & 31))) )
+ goto zone_found;
+ while (++i < end)
+ if (~bmap[i]) {
+ tmp=~bmap[i];
+ goto zone_found;
+ }
+ if ( !(tmp=~bmap[i] & ((1 << (end_bit & 31)) -1)) )
+ return -1;
+zone_found:
+ for (j=0; j < 32; j++)
+ if (tmp & (1 << j))
+ break;
+ if (set_bit(j,bmap+i)) {
+ start_bit=j + (i << 5) + 1;
+ goto repeat;
+ }
+ mark_buffer_dirty(bh, 1);
+ return j + (i << 5);
+}
+
+static void clear_buf(struct buffer_head * bh)
+{
+ register int i;
+ register long * lp;
+
+ lp=(long *)bh->b_data;
+ for (i= bh->b_size >> 2; i-- > 0; )
+ *lp++=0;
+}
+
+static void que(struct buffer_head * bmap[], int bznr[], int pos)
+{
+ struct buffer_head * tbh;
+ int tmp;
+ int i;
+
+ tbh=bmap[pos];
+ tmp=bznr[pos];
+ for (i=pos; i > 0; i--) {
+ bmap[i]=bmap[i-1];
+ bznr[i]=bznr[i-1];
+ }
+ bmap[0]=tbh;
+ bznr[0]=tmp;
+}
+
+#define get_imap_zone(sb, bit_nr, not_que) \
+ get__map_zone((sb), (sb)->u.xiafs_sb.s_imap_buf, \
+ (sb)->u.xiafs_sb.s_imap_iznr, \
+ (sb)->u.xiafs_sb.s_imap_cached, 1, \
+ (sb)->u.xiafs_sb.s_imap_zones, _XIAFS_IMAP_SLOTS, \
+ bit_nr, not_que)
+
+#define get_zmap_zone(sb, bit_nr, not_que) \
+ get__map_zone((sb), (sb)->u.xiafs_sb.s_zmap_buf, \
+ (sb)->u.xiafs_sb.s_zmap_zznr, \
+ (sb)->u.xiafs_sb.s_zmap_cached, \
+ 1+(sb)->u.xiafs_sb.s_imap_zones, \
+ (sb)->u.xiafs_sb.s_zmap_zones, _XIAFS_ZMAP_SLOTS, \
+ bit_nr, not_que)
+
+static struct buffer_head *
+get__map_zone(struct super_block *sb, struct buffer_head * bmap_buf[],
+ int bznr[], u_char cache, int first_zone,
+ int bmap_zones, int slots, u_long bit_nr, int * not_que)
+{
+ struct buffer_head * tmp_bh;
+ int z_nr, i;
+
+ z_nr = bit_nr >> XIAFS_BITS_PER_Z_BITS(sb);
+ if (z_nr >= bmap_zones) {
+ printk("XIA-FS: bad inode/zone number (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ if (!cache)
+ return bmap_buf[z_nr];
+ lock_super(sb);
+ for (i=0; i < slots; i++)
+ if (bznr[i]==z_nr)
+ break;
+ if (i < slots) { /* cache hit */
+ if (not_que) {
+ *not_que=i;
+ return bmap_buf[i];
+ } else {
+ que(bmap_buf, bznr, i);
+ return bmap_buf[0];
+ }
+ }
+ tmp_bh=bread(sb->s_dev, z_nr+first_zone, XIAFS_ZSIZE(sb)); /* cache not hit */
+ if (!tmp_bh) {
+ printk("XIA-FS: read bitmap failed (%s %d)\n", WHERE_ERR);
+ unlock_super(sb);
+ return NULL;
+ }
+ brelse(bmap_buf[slots-1]);
+ bmap_buf[slots-1]=tmp_bh;
+ bznr[slots-1]=z_nr;
+ if (not_que)
+ *not_que=slots-1;
+ else
+ que(bmap_buf, bznr, slots-1);
+ return tmp_bh;
+}
+
+#define xiafs_unlock_super(sb, cache) if (cache) unlock_super(sb);
+
+#define get_free_ibit(sb, prev_bit) \
+ get_free__bit(sb, sb->u.xiafs_sb.s_imap_buf, \
+ sb->u.xiafs_sb.s_imap_iznr, \
+ sb->u.xiafs_sb.s_imap_cached, \
+ 1, sb->u.xiafs_sb.s_imap_zones, \
+ _XIAFS_IMAP_SLOTS, prev_bit);
+
+#define get_free_zbit(sb, prev_bit) \
+ get_free__bit(sb, sb->u.xiafs_sb.s_zmap_buf, \
+ sb->u.xiafs_sb.s_zmap_zznr, \
+ sb->u.xiafs_sb.s_zmap_cached, \
+ 1 + sb->u.xiafs_sb.s_imap_zones, \
+ sb->u.xiafs_sb.s_zmap_zones, \
+ _XIAFS_ZMAP_SLOTS, prev_bit);
+
+static u_long
+get_free__bit(struct super_block *sb, struct buffer_head * bmap_buf[],
+ int bznr[], u_char cache, int first_zone, int bmap_zones,
+ int slots, u_long prev_bit)
+{
+ struct buffer_head * bh;
+ int not_done=0;
+ u_long pos, start_bit, end_bit, total_bits;
+ int z_nr, tmp;
+
+ total_bits=bmap_zones << XIAFS_BITS_PER_Z_BITS(sb);
+ if (prev_bit >= total_bits)
+ prev_bit=0;
+ pos=prev_bit+1;
+ end_bit=XIAFS_BITS_PER_Z(sb);
+
+ do {
+ if (pos >= total_bits)
+ pos=0;
+ if (!not_done) { /* first time */
+ not_done=1;
+ start_bit= pos & (end_bit-1);
+ } else
+ start_bit=0;
+ if ( pos < prev_bit && pos+end_bit >= prev_bit) { /* last time */
+ not_done=0;
+ end_bit=prev_bit & (end_bit-1); /* only here end_bit modified */
+ }
+ bh = get__map_zone(sb, bmap_buf, bznr, cache, first_zone,
+ bmap_zones, slots, pos, &z_nr);
+ if (!bh)
+ return 0;
+ tmp=find_first_zero(bh, start_bit, end_bit);
+ if (tmp >= 0)
+ break;
+ xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached);
+ pos=(pos & ~(end_bit-1))+end_bit;
+ } while (not_done);
+
+ if (tmp < 0)
+ return 0;
+ if (cache)
+ que(bmap_buf, bznr, z_nr);
+ xiafs_unlock_super(sb, cache);
+ return (pos & ~(XIAFS_BITS_PER_Z(sb)-1))+tmp;
+}
+
+void xiafs_free_zone(struct super_block * sb, int d_addr)
+{
+ struct buffer_head * bh;
+ unsigned int bit, offset;
+
+ if (!sb) {
+ printk(INTERN_ERR);
+ return;
+ }
+ if (d_addr < sb->u.xiafs_sb.s_firstdatazone ||
+ d_addr >= sb->u.xiafs_sb.s_nzones) {
+ printk("XIA-FS: bad zone number (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ bh = get_hash_table(sb->s_dev, d_addr, XIAFS_ZSIZE(sb));
+ if (bh)
+ bh->b_dirt=0;
+ brelse(bh);
+ bit=d_addr - sb->u.xiafs_sb.s_firstdatazone + 1;
+ bh = get_zmap_zone(sb, bit, NULL);
+ if (!bh)
+ return;
+ offset = bit & (XIAFS_BITS_PER_Z(sb) -1);
+ if (!clear_bit(offset, bh->b_data))
+ printk("XIA-FS: dev %04x"
+ " block bit %u (0x%x) already cleared (%s %d)\n",
+ sb->s_dev, bit, bit, WHERE_ERR);
+ mark_buffer_dirty(bh, 1);
+ xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached);
+}
+
+int xiafs_new_zone(struct super_block * sb, u_long prev_addr)
+{
+ struct buffer_head * bh;
+ int prev_znr, tmp;
+
+ if (!sb) {
+ printk(INTERN_ERR);
+ return 0;
+ }
+ if (prev_addr < sb->u.xiafs_sb.s_firstdatazone ||
+ prev_addr >= sb->u.xiafs_sb.s_nzones) {
+ prev_addr=sb->u.xiafs_sb.s_firstdatazone;
+ }
+ prev_znr=prev_addr-sb->u.xiafs_sb.s_firstdatazone+1;
+ tmp=get_free_zbit(sb, prev_znr);
+ if (!tmp)
+ return 0;
+ tmp += sb->u.xiafs_sb.s_firstdatazone -1;
+ if (!(bh = getblk(sb->s_dev, tmp, XIAFS_ZSIZE(sb)))) {
+ printk("XIA-FS: I/O error (%s %d)\n", WHERE_ERR);
+ return 0;
+ }
+ if (bh->b_count != 1) {
+ printk(INTERN_ERR);
+ return 0;
+ }
+ clear_buf(bh);
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return tmp;
+}
+
+void xiafs_free_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct super_block * sb;
+ unsigned long ino;
+
+ if (!inode)
+ return;
+ if (!inode->i_dev || inode->i_count!=1 || inode->i_nlink || !inode->i_sb ||
+ inode->i_ino < 3 || inode->i_ino > inode->i_sb->u.xiafs_sb.s_ninodes) {
+ printk("XIA-FS: bad inode (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ sb = inode->i_sb;
+ ino = inode->i_ino;
+ bh = get_imap_zone(sb, ino, NULL);
+ if (!bh)
+ return;
+ clear_inode(inode);
+ if (!clear_bit(ino & (XIAFS_BITS_PER_Z(sb)-1), bh->b_data))
+ printk("XIA-FS: dev %04x"
+ "inode bit %ld (0x%lx) already cleared (%s %d)\n",
+ inode->i_dev, ino, ino, WHERE_ERR);
+ mark_buffer_dirty(bh, 1);
+ xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached);
+}
+
+struct inode * xiafs_new_inode(struct inode * dir)
+{
+ struct super_block * sb;
+ struct inode * inode;
+ ino_t tmp;
+
+ sb = dir->i_sb;
+ if (!dir || !(inode = get_empty_inode()))
+ return NULL;
+ inode->i_sb = sb;
+ inode->i_flags = inode->i_sb->s_flags;
+
+ tmp=get_free_ibit(sb, dir->i_ino);
+ if (!tmp) {
+ iput(inode);
+ return NULL;
+ }
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_dev = sb->s_dev;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
+ inode->i_dirt = 1;
+ inode->i_ino = tmp;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = NULL;
+ inode->i_blocks = 0;
+ inode->i_blksize = XIAFS_ZSIZE(inode->i_sb);
+ insert_inode_hash(inode);
+ return inode;
+}
+
+static int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 };
+
+static u_long count_zone(struct buffer_head * bh)
+{
+ int i, tmp;
+ u_long sum;
+
+ sum=0;
+ for (i=bh->b_size; i-- > 0; ) {
+ tmp=bh->b_data[i];
+ sum += nibblemap[tmp & 0xf] + nibblemap[(tmp & 0xff) >> 4];
+ }
+ return sum;
+}
+
+unsigned long xiafs_count_free_inodes(struct super_block *sb)
+{
+ struct buffer_head * bh;
+ int izones, i, not_que;
+ u_long sum;
+
+ sum=0;
+ izones=sb->u.xiafs_sb.s_imap_zones;
+ for (i=0; i < izones; i++) {
+ bh=get_imap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), &not_que);
+ if (bh) {
+ sum += count_zone(bh);
+ xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached);
+ }
+ }
+ i=izones << XIAFS_BITS_PER_Z_BITS(sb);
+ return i - sum;
+}
+
+unsigned long xiafs_count_free_zones(struct super_block *sb)
+{
+ struct buffer_head * bh;
+ int zzones, i, not_que;
+ u_long sum;
+
+ sum=0;
+ zzones=sb->u.xiafs_sb.s_zmap_zones;
+ for (i=0; i < zzones; i++) {
+ bh=get_zmap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), &not_que);
+ if (bh) {
+ sum += count_zone(bh);
+ xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached);
+ }
+ }
+ i=zzones << XIAFS_BITS_PER_Z_BITS(sb);
+ return i - sum;
+}
diff --git a/fs/xiafs/dir.c b/fs/xiafs/dir.c
new file mode 100644
index 000000000..d9db56ddc
--- /dev/null
+++ b/fs/xiafs/dir.c
@@ -0,0 +1,135 @@
+/*
+ * linux/fs/xiafs/dir.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/dir.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <asm/segment.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/xia_fs.h>
+#include <linux/stat.h>
+
+#include "xiafs_mac.h"
+
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+#define ROUND_UP(x) (((x)+3) & ~3)
+
+static int xiafs_dir_read(struct inode *, struct file *, char *, int);
+static int xiafs_readdir(struct inode *, struct file *, struct dirent *, int);
+
+static struct file_operations xiafs_dir_operations = {
+ NULL, /* lseek - default */
+ xiafs_dir_read, /* read */
+ NULL, /* write - bad */
+ xiafs_readdir, /* readdir */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ file_fsync /* default fsync */
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations xiafs_dir_inode_operations = {
+ &xiafs_dir_operations, /* default directory file-ops */
+ xiafs_create, /* create */
+ xiafs_lookup, /* lookup */
+ xiafs_link, /* link */
+ xiafs_unlink, /* unlink */
+ xiafs_symlink, /* symlink */
+ xiafs_mkdir, /* mkdir */
+ xiafs_rmdir, /* rmdir */
+ xiafs_mknod, /* mknod */
+ xiafs_rename, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* bmap */
+ xiafs_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int xiafs_dir_read(struct inode * inode,
+ struct file * filp, char * buf, int count)
+{
+ return -EISDIR;
+}
+
+static int xiafs_readdir(struct inode * inode,
+ struct file * filp, struct dirent * dirent, int count)
+{
+ u_int offset, i,ret;
+ struct buffer_head * bh;
+ struct xiafs_direct * de;
+
+ if (!inode || !inode->i_sb || !S_ISDIR(inode->i_mode))
+ return -EBADF;
+ if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1) )
+ return -EBADF;
+ ret = 0;
+ while (!ret && filp->f_pos < inode->i_size) {
+ offset = filp->f_pos & (XIAFS_ZSIZE(inode->i_sb) - 1);
+ bh = xiafs_bread(inode, filp->f_pos >> XIAFS_ZSIZE_BITS(inode->i_sb),0);
+ if (!bh) {
+ filp->f_pos += XIAFS_ZSIZE(inode->i_sb)-offset;
+ continue;
+ }
+ for (i = 0; i < XIAFS_ZSIZE(inode->i_sb) && i < offset; ) {
+ de = (struct xiafs_direct *) (bh->b_data + i);
+ if (!de->d_rec_len)
+ break;
+ i += de->d_rec_len;
+ }
+ offset = i;
+ de = (struct xiafs_direct *) (offset + bh->b_data);
+
+ while (!ret && offset < XIAFS_ZSIZE(inode->i_sb) && filp->f_pos < inode->i_size) {
+ if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes ||
+ de->d_rec_len < 12 ||
+ (char *)de+de->d_rec_len > XIAFS_ZSIZE(inode->i_sb)+bh->b_data ||
+ de->d_name_len < 1 || de->d_name_len + 8 > de->d_rec_len ||
+ de->d_name_len > _XIAFS_NAME_LEN ||
+ de->d_name[de->d_name_len] ) {
+ printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return 0;
+ }
+ offset += de->d_rec_len;
+ filp->f_pos += de->d_rec_len;
+ if (de->d_ino) {
+ for (i = 0; i < de->d_name_len ; i++)
+ put_fs_byte(de->d_name[i],i+dirent->d_name);
+ put_fs_byte(0,i+dirent->d_name);
+ put_fs_long(de->d_ino,&dirent->d_ino);
+ put_fs_word(i,&dirent->d_reclen);
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime=CURRENT_TIME;
+ inode->i_dirt=1;
+ }
+ ret = ROUND_UP(NAME_OFFSET(dirent)+i+1);
+ break;
+ }
+ de = (struct xiafs_direct *) (offset + bh->b_data);
+ }
+ brelse(bh);
+ if (offset > XIAFS_ZSIZE(inode->i_sb)) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ return 0;
+ }
+ }
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime=CURRENT_TIME;
+ inode->i_dirt=1;
+ }
+ return ret;
+}
diff --git a/fs/xiafs/file.c b/fs/xiafs/file.c
new file mode 100644
index 000000000..5678ffd0b
--- /dev/null
+++ b/fs/xiafs/file.c
@@ -0,0 +1,252 @@
+/*
+ * linux/fs/xiafs/file.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/file.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/xia_fs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+
+#include "xiafs_mac.h"
+
+#define NBUF 32
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+static int xiafs_file_read(struct inode *, struct file *, char *, int);
+static int xiafs_file_write(struct inode *, struct file *, char *, int);
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the xiafs filesystem.
+ */
+static struct file_operations xiafs_file_operations = {
+ NULL, /* lseek - default */
+ xiafs_file_read, /* read */
+ xiafs_file_write, /* write */
+ NULL, /* readdir - bad */
+ NULL, /* select - default */
+ NULL, /* ioctl - default */
+ generic_mmap, /* mmap */
+ NULL, /* no special open is needed */
+ NULL, /* release */
+ xiafs_sync_file /* fsync */
+};
+
+struct inode_operations xiafs_file_inode_operations = {
+ &xiafs_file_operations, /* default file operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ xiafs_bmap, /* bmap */
+ xiafs_truncate, /* truncate */
+ NULL /* permission */
+};
+
+static int
+xiafs_file_read(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ int read, left, chars;
+ int zone_nr, zones, f_zones, offset;
+ int bhrequest, uptodate;
+ struct buffer_head ** bhb, ** bhe;
+ struct buffer_head * bhreq[NBUF];
+ struct buffer_head * buflist[NBUF];
+
+ if (!inode) {
+ printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR);
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR);
+ return -EINVAL;
+ }
+ offset = filp->f_pos;
+ left = inode->i_size - offset;
+ if (left > count)
+ left = count;
+ if (left <= 0)
+ return 0;
+ read = 0;
+ zone_nr = offset >> XIAFS_ZSIZE_BITS(inode->i_sb);
+ offset &= XIAFS_ZSIZE(inode->i_sb) -1 ;
+ f_zones =(inode->i_size+XIAFS_ZSIZE(inode->i_sb)-1)>>XIAFS_ZSIZE_BITS(inode->i_sb);
+ zones = (left+offset+XIAFS_ZSIZE(inode->i_sb)-1) >> XIAFS_ZSIZE_BITS(inode->i_sb);
+ bhb = bhe = buflist;
+ if (filp->f_reada) {
+ if(zones < read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb)))
+ zones = read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb));
+ if (zone_nr + zones > f_zones)
+ zones = f_zones - zone_nr;
+ }
+
+ /* We do this in a two stage process. We first try and request
+ as many blocks as we can, then we wait for the first one to
+ complete, and then we try and wrap up as many as are actually
+ done. This routine is rather generic, in that it can be used
+ in a filesystem by substituting the appropriate function in
+ for getblk.
+
+ This routine is optimized to make maximum use of the various
+ buffers and caches. */
+
+ do {
+ bhrequest = 0;
+ uptodate = 1;
+ while (zones--) {
+ *bhb = xiafs_getblk(inode, zone_nr++, 0);
+ if (*bhb && !(*bhb)->b_uptodate) {
+ uptodate = 0;
+ bhreq[bhrequest++] = *bhb;
+ }
+
+ if (++bhb == &buflist[NBUF])
+ bhb = buflist;
+
+ /* If the block we have on hand is uptodate, go ahead
+ and complete processing. */
+ if (uptodate)
+ break;
+ if (bhb == bhe)
+ break;
+ }
+
+ /* Now request them all */
+ if (bhrequest)
+ ll_rw_block(READ, bhrequest, bhreq);
+
+ do { /* Finish off all I/O that has actually completed */
+ if (*bhe) {
+ wait_on_buffer(*bhe);
+ if (!(*bhe)->b_uptodate) { /* read error? */
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ left = 0;
+ break;
+ }
+ }
+ if (left < XIAFS_ZSIZE(inode->i_sb) - offset)
+ chars = left;
+ else
+ chars = XIAFS_ZSIZE(inode->i_sb) - offset;
+ filp->f_pos += chars;
+ left -= chars;
+ read += chars;
+ if (*bhe) {
+ memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
+ brelse(*bhe);
+ buf += chars;
+ } else {
+ while (chars-->0)
+ put_fs_byte(0,buf++);
+ }
+ offset = 0;
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock));
+ } while (left > 0);
+
+/* Release the read-ahead blocks */
+ while (bhe != bhb) {
+ brelse(*bhe);
+ if (++bhe == &buflist[NBUF])
+ bhe = buflist;
+ };
+ if (!read)
+ return -EIO;
+ filp->f_reada = 1;
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+ return read;
+}
+
+static int
+xiafs_file_write(struct inode * inode, struct file * filp, char * buf, int count)
+{
+ off_t pos;
+ int written, c;
+ struct buffer_head * bh;
+ char * cp;
+
+ if (!inode) {
+ printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR);
+ return -EINVAL;
+ }
+ if (!S_ISREG(inode->i_mode)) {
+ printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR);
+ return -EINVAL;
+ }
+/*
+ * ok, append may not work when many processes are writing at the same time
+ * but so what. That way leads to madness anyway.
+ */
+ if (filp->f_flags & O_APPEND)
+ pos = inode->i_size;
+ else
+ pos = filp->f_pos;
+ written = 0;
+ while (written < count) {
+ bh = xiafs_getblk(inode, pos >> XIAFS_ZSIZE_BITS(inode->i_sb), 1);
+ if (!bh) {
+ if (!written)
+ written = -ENOSPC;
+ break;
+ }
+ c = XIAFS_ZSIZE(inode->i_sb) - (pos & (XIAFS_ZSIZE(inode->i_sb) - 1));
+ if (c > count-written)
+ c = count-written;
+ if (c != XIAFS_ZSIZE(inode->i_sb) && !bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ if (!written)
+ written = -EIO;
+ break;
+ }
+ }
+ cp = (pos & (XIAFS_ZSIZE(inode->i_sb)-1)) + bh->b_data;
+ pos += c;
+ if (pos > inode->i_size) {
+ inode->i_size = pos;
+ inode->i_dirt = 1;
+ }
+ written += c;
+ memcpy_fromfs(cp,buf,c);
+ buf += c;
+ bh->b_uptodate = 1;
+ mark_buffer_dirty(bh, 0);
+ brelse(bh);
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ filp->f_pos = pos;
+ inode->i_dirt = 1;
+
+ return written;
+}
diff --git a/fs/xiafs/fsync.c b/fs/xiafs/fsync.c
new file mode 100644
index 000000000..67681b2c6
--- /dev/null
+++ b/fs/xiafs/fsync.c
@@ -0,0 +1,159 @@
+/*
+ * linux/fs/xiafs/fsync.c
+ *
+ * Changes Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ * from
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * xiafs fsync primitive
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/locks.h>
+
+#include <linux/fs.h>
+#include <linux/xia_fs.h>
+
+#include "xiafs_mac.h"
+
+
+#define blocksize (XIAFS_ZSIZE(inode->i_sb))
+#define addr_per_block (XIAFS_ADDRS_PER_Z(inode->i_sb))
+
+static int sync_block (struct inode * inode, unsigned long * block, int wait)
+{
+ struct buffer_head * bh;
+ int tmp;
+
+ if (!*block)
+ return 0;
+ tmp = *block;
+ bh = get_hash_table(inode->i_dev, *block, blocksize);
+ if (!bh)
+ return 0;
+ if (*block != tmp) {
+ brelse (bh);
+ return 1;
+ }
+ if (wait && bh->b_req && !bh->b_uptodate) {
+ brelse(bh);
+ return -1;
+ }
+ if (wait || !bh->b_uptodate || !bh->b_dirt)
+ {
+ brelse(bh);
+ return 0;
+ }
+ ll_rw_block(WRITE, 1, &bh);
+ bh->b_count--;
+ return 0;
+}
+
+static int sync_iblock (struct inode * inode, unsigned long * iblock,
+ struct buffer_head **bh, int wait)
+{
+ int rc, tmp;
+
+ *bh = NULL;
+ tmp = *iblock;
+ if (!tmp)
+ return 0;
+ rc = sync_block (inode, iblock, wait);
+ if (rc)
+ return rc;
+ *bh = bread(inode->i_dev, tmp, blocksize);
+ if (tmp != *iblock) {
+ brelse(*bh);
+ *bh = NULL;
+ return 1;
+ }
+ if (!*bh)
+ return -1;
+ return 0;
+}
+
+
+static int sync_direct(struct inode *inode, int wait)
+{
+ int i;
+ int rc, err = 0;
+
+ for (i = 0; i < 8; i++) {
+ rc = sync_block (inode, inode->u.ext_i.i_data + i, wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ return err;
+}
+
+static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait)
+{
+ int i;
+ struct buffer_head * ind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, iblock, &ind_bh, wait);
+ if (rc || !ind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_block (inode,
+ ((unsigned long *) ind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(ind_bh);
+ return err;
+}
+
+static int sync_dindirect(struct inode *inode, unsigned long *diblock,
+ int wait)
+{
+ int i;
+ struct buffer_head * dind_bh;
+ int rc, err = 0;
+
+ rc = sync_iblock (inode, diblock, &dind_bh, wait);
+ if (rc || !dind_bh)
+ return rc;
+
+ for (i = 0; i < addr_per_block; i++) {
+ rc = sync_indirect (inode,
+ ((unsigned long *) dind_bh->b_data) + i,
+ wait);
+ if (rc > 0)
+ break;
+ if (rc)
+ err = rc;
+ }
+ brelse(dind_bh);
+ return err;
+}
+
+int xiafs_sync_file(struct inode * inode, struct file * file)
+{
+ int wait, err = 0;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+ for (wait=0; wait<=1; wait++)
+ {
+ err |= sync_direct(inode, wait);
+ err |= sync_indirect(inode, &inode->u.xiafs_i.i_ind_zone, wait);
+ err |= sync_dindirect(inode, &inode->u.xiafs_i.i_dind_zone, wait);
+ }
+ err |= xiafs_sync_inode (inode);
+ return (err < 0) ? -EIO : 0;
+}
diff --git a/fs/xiafs/inode.c b/fs/xiafs/inode.c
new file mode 100644
index 000000000..171499a95
--- /dev/null
+++ b/fs/xiafs/inode.c
@@ -0,0 +1,502 @@
+/*
+ * linux/fs/xiafs/inode.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/inode.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <linux/sched.h>
+#include <linux/xia_fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <asm/system.h>
+#include <asm/segment.h>
+
+#include "xiafs_mac.h"
+
+static u_long random_nr;
+
+void xiafs_put_inode(struct inode *inode)
+{
+ if (inode->i_nlink)
+ return;
+ inode->i_size = 0;
+ xiafs_truncate(inode);
+ xiafs_free_inode(inode);
+}
+
+void xiafs_put_super(struct super_block *sb)
+{
+ int i;
+
+ lock_super(sb);
+ sb->s_dev = 0;
+ for(i = 0 ; i < _XIAFS_IMAP_SLOTS ; i++)
+ brelse(sb->u.xiafs_sb.s_imap_buf[i]);
+ for(i = 0 ; i < _XIAFS_ZMAP_SLOTS ; i++)
+ brelse(sb->u.xiafs_sb.s_zmap_buf[i]);
+ unlock_super(sb);
+}
+
+static struct super_operations xiafs_sops = {
+ xiafs_read_inode,
+ NULL,
+ xiafs_write_inode,
+ xiafs_put_inode,
+ xiafs_put_super,
+ NULL,
+ xiafs_statfs,
+ NULL
+};
+
+struct super_block *xiafs_read_super(struct super_block *s, void *data,
+ int silent)
+{
+ struct buffer_head *bh;
+ struct xiafs_super_block *sp;
+ int i, z, dev;
+
+ dev=s->s_dev;
+ lock_super(s);
+
+ set_blocksize(dev, BLOCK_SIZE);
+
+ if (!(bh = bread(dev, 0, BLOCK_SIZE))) {
+ s->s_dev=0;
+ unlock_super(s);
+ printk("XIA-FS: read super_block failed (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ sp = (struct xiafs_super_block *) bh->b_data;
+ s->s_magic = sp->s_magic;
+ if (s->s_magic != _XIAFS_SUPER_MAGIC) {
+ s->s_dev = 0;
+ unlock_super(s);
+ brelse(bh);
+ if (!silent)
+ printk("VFS: Can't find a xiafs filesystem on dev 0x%04x.\n",
+ dev);
+ return NULL;
+ }
+ s->s_blocksize = sp->s_zone_size;
+ s->s_blocksize_bits = 10 + sp->s_zone_shift;
+ if (s->s_blocksize != BLOCK_SIZE &&
+ (s->s_blocksize == 1024 || s->s_blocksize == 2048 ||
+ s->s_blocksize == 4096)) {
+ brelse(bh);
+ set_blocksize(dev, s->s_blocksize);
+ bh = bread (dev, 0, s->s_blocksize);
+ if(!bh) return NULL;
+ sp = (struct xiafs_super_block *) (((char *)bh->b_data) + BLOCK_SIZE) ;
+ };
+ s->u.xiafs_sb.s_nzones = sp->s_nzones;
+ s->u.xiafs_sb.s_ninodes = sp->s_ninodes;
+ s->u.xiafs_sb.s_ndatazones = sp->s_ndatazones;
+ s->u.xiafs_sb.s_imap_zones = sp->s_imap_zones;
+ s->u.xiafs_sb.s_zmap_zones = sp->s_zmap_zones;
+ s->u.xiafs_sb.s_firstdatazone = sp->s_firstdatazone;
+ s->u.xiafs_sb.s_zone_shift = sp->s_zone_shift;
+ s->u.xiafs_sb.s_max_size = sp->s_max_size;
+ brelse(bh);
+ for (i=0;i < _XIAFS_IMAP_SLOTS;i++) {
+ s->u.xiafs_sb.s_imap_buf[i] = NULL;
+ s->u.xiafs_sb.s_imap_iznr[i] = -1;
+ }
+ for (i=0;i < _XIAFS_ZMAP_SLOTS;i++) {
+ s->u.xiafs_sb.s_zmap_buf[i] = NULL;
+ s->u.xiafs_sb.s_zmap_zznr[i] = -1;
+ }
+ z=1;
+ if ( s->u.xiafs_sb.s_imap_zones > _XIAFS_IMAP_SLOTS )
+ s->u.xiafs_sb.s_imap_cached=1;
+ else {
+ s->u.xiafs_sb.s_imap_cached=0;
+ for (i=0 ; i < s->u.xiafs_sb.s_imap_zones ; i++) {
+ if (!(s->u.xiafs_sb.s_imap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s))))
+ goto xiafs_read_super_fail;
+ s->u.xiafs_sb.s_imap_iznr[i]=i;
+ }
+ }
+ if ( s->u.xiafs_sb.s_zmap_zones > _XIAFS_ZMAP_SLOTS )
+ s->u.xiafs_sb.s_zmap_cached=1;
+ else {
+ s->u.xiafs_sb.s_zmap_cached=0;
+ for (i=0 ; i < s->u.xiafs_sb.s_zmap_zones ; i++) {
+ if (!(s->u.xiafs_sb.s_zmap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s))))
+ goto xiafs_read_super_fail;
+ s->u.xiafs_sb.s_zmap_zznr[i]=i;
+ }
+ }
+ /* set up enough so that it can read an inode */
+ s->s_dev = dev;
+ s->s_op = &xiafs_sops;
+ s->s_mounted = iget(s, _XIAFS_ROOT_INO);
+ if (!s->s_mounted)
+ goto xiafs_read_super_fail;
+ unlock_super(s);
+ random_nr=CURRENT_TIME;
+ return s;
+
+xiafs_read_super_fail:
+ for(i=0; i < _XIAFS_IMAP_SLOTS; i++)
+ brelse(s->u.xiafs_sb.s_imap_buf[i]);
+ for(i=0; i < _XIAFS_ZMAP_SLOTS; i++)
+ brelse(s->u.xiafs_sb.s_zmap_buf[i]);
+ s->s_dev=0;
+ unlock_super(s);
+ printk("XIA-FS: read bitmaps failed (%s %d)\n", WHERE_ERR);
+ return NULL;
+}
+
+void xiafs_statfs(struct super_block *sb, struct statfs *buf)
+{
+ long tmp;
+
+ put_fs_long(_XIAFS_SUPER_MAGIC, &buf->f_type);
+ put_fs_long(XIAFS_ZSIZE(sb), &buf->f_bsize);
+ put_fs_long(sb->u.xiafs_sb.s_ndatazones, &buf->f_blocks);
+ tmp = xiafs_count_free_zones(sb);
+ put_fs_long(tmp, &buf->f_bfree);
+ put_fs_long(tmp, &buf->f_bavail);
+ put_fs_long(sb->u.xiafs_sb.s_ninodes, &buf->f_files);
+ put_fs_long(xiafs_count_free_inodes(sb), &buf->f_ffree);
+ put_fs_long(_XIAFS_NAME_LEN, &buf->f_namelen);
+ /* don't know what should be put in buf->f_fsid */
+}
+
+static int zone_bmap(struct buffer_head * bh, int nr)
+{
+ int tmp;
+
+ if (!bh)
+ return 0;
+ tmp = ((u_long *) bh->b_data)[nr];
+ brelse(bh);
+ return tmp;
+}
+
+int xiafs_bmap(struct inode * inode,int zone)
+{
+ int i;
+
+ if (zone < 0) {
+ printk("XIA-FS: block < 0 (%s %d)\n", WHERE_ERR);
+ return 0;
+ }
+ if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR);
+ return 0;
+ }
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ }
+ if (zone < 8)
+ return inode->u.xiafs_i.i_zone[zone];
+ zone -= 8;
+ if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ i = inode->u.xiafs_i.i_ind_zone;
+ if (i)
+ i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)), zone);
+ return i;
+ }
+ zone -= XIAFS_ADDRS_PER_Z(inode->i_sb);
+ i = inode->u.xiafs_i.i_dind_zone;
+ if (i)
+ i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)),
+ zone >> XIAFS_ADDRS_PER_Z_BITS(inode->i_sb));
+ if (i)
+ i= zone_bmap(bread(inode->i_dev,i, XIAFS_ZSIZE(inode->i_sb)),
+ zone & (XIAFS_ADDRS_PER_Z(inode->i_sb)-1));
+ return i;
+}
+
+static u_long get_prev_addr(struct inode * inode, int zone)
+{
+ u_long tmp;
+
+ if (zone > 0)
+ while (--zone >= 0) /* only files with holes suffer */
+ if ((tmp=xiafs_bmap(inode, zone)))
+ return tmp;
+ random_nr=(random_nr+23)%inode->i_sb->u.xiafs_sb.s_ndatazones;
+ return random_nr + inode->i_sb->u.xiafs_sb.s_firstdatazone;
+}
+
+static struct buffer_head *
+dt_getblk(struct inode * inode, u_long *lp, int create, u_long prev_addr)
+{
+ int tmp;
+ struct buffer_head * result;
+
+repeat:
+ if ((tmp=*lp)) {
+ result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (tmp == *lp)
+ return result;
+ brelse(result);
+ goto repeat;
+ }
+ if (!create)
+ return NULL;
+ tmp = xiafs_new_zone(inode->i_sb, prev_addr);
+ if (!tmp)
+ return NULL;
+ result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (*lp) {
+ xiafs_free_zone(inode->i_sb, tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *lp = tmp;
+ inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb);
+ return result;
+}
+
+static struct buffer_head *
+indt_getblk(struct inode * inode, struct buffer_head * bh,
+ int nr, int create, u_long prev_addr)
+{
+ int tmp;
+ u_long *lp;
+ struct buffer_head * result;
+
+ if (!bh)
+ return NULL;
+ if (!bh->b_uptodate) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!bh->b_uptodate) {
+ brelse(bh);
+ return NULL;
+ }
+ }
+ lp = nr + (u_long *) bh->b_data;
+repeat:
+ if ((tmp=*lp)) {
+ result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (tmp == *lp) {
+ brelse(bh);
+ return result;
+ }
+ brelse(result);
+ goto repeat;
+ }
+ if (!create) {
+ brelse(bh);
+ return NULL;
+ }
+ tmp = xiafs_new_zone(inode->i_sb, prev_addr);
+ if (!tmp) {
+ brelse(bh);
+ return NULL;
+ }
+ result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (*lp) {
+ xiafs_free_zone(inode->i_sb, tmp);
+ brelse(result);
+ goto repeat;
+ }
+ *lp = tmp;
+ inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb);
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ return result;
+}
+
+struct buffer_head * xiafs_getblk(struct inode * inode, int zone, int create)
+{
+ struct buffer_head * bh;
+ u_long prev_addr=0;
+
+ if (zone<0) {
+ printk("XIA-FS: zone < 0 (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ if (!create)
+ printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ if (create)
+ prev_addr=get_prev_addr(inode, zone);
+ if (zone < 8)
+ return dt_getblk(inode, zone+inode->u.xiafs_i.i_zone, create, prev_addr);
+ zone -= 8;
+ if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ bh = dt_getblk(inode, &(inode->u.xiafs_i.i_ind_zone), create, prev_addr);
+ bh = indt_getblk(inode, bh, zone, create, prev_addr);
+ return bh;
+ }
+ zone -= XIAFS_ADDRS_PER_Z(inode->i_sb);
+ bh = dt_getblk(inode, &(inode->u.xiafs_i.i_dind_zone), create, prev_addr);
+ bh = indt_getblk(inode, bh, zone>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb),
+ create, prev_addr);
+ bh = indt_getblk(inode, bh, zone&(XIAFS_ADDRS_PER_Z(inode->i_sb)-1),
+ create, prev_addr);
+ return bh;
+}
+
+struct buffer_head * xiafs_bread(struct inode * inode, int zone, int create)
+{
+ struct buffer_head * bh;
+
+ bh = xiafs_getblk(inode, zone, create);
+ if (!bh || bh->b_uptodate)
+ return bh;
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_uptodate)
+ return bh;
+ brelse(bh);
+ return NULL;
+}
+
+void xiafs_read_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct xiafs_inode * raw_inode;
+ int zone;
+ ino_t ino;
+
+ ino = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode=0;
+ if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) {
+ printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones +
+ inode->i_sb->u.xiafs_sb.s_zmap_zones +
+ (ino-1)/ XIAFS_INODES_PER_Z(inode->i_sb);
+ if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) {
+ printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ raw_inode = ((struct xiafs_inode *) bh->b_data) +
+ ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) - 1));
+ inode->i_mode = raw_inode->i_mode;
+ inode->i_uid = raw_inode->i_uid;
+ inode->i_gid = raw_inode->i_gid;
+ inode->i_nlink = raw_inode->i_nlinks;
+ inode->i_size = raw_inode->i_size;
+ inode->i_mtime = raw_inode->i_mtime;
+ inode->i_atime = raw_inode->i_atime;
+ inode->i_ctime = raw_inode->i_ctime;
+ inode->i_blksize = XIAFS_ZSIZE(inode->i_sb);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ inode->i_blocks=0;
+ inode->i_rdev = raw_inode->i_zone[0];
+ } else {
+ XIAFS_GET_BLOCKS(raw_inode, inode->i_blocks);
+ for (zone = 0; zone < 8; zone++)
+ inode->u.xiafs_i.i_zone[zone] = raw_inode->i_zone[zone] & 0xffffff;
+ inode->u.xiafs_i.i_ind_zone = raw_inode->i_ind_zone & 0xffffff;
+ inode->u.xiafs_i.i_dind_zone = raw_inode->i_dind_zone & 0xffffff;
+ }
+ brelse(bh);
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &xiafs_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &xiafs_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &xiafs_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+}
+
+static struct buffer_head * xiafs_update_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ struct xiafs_inode * raw_inode;
+ int zone;
+ ino_t ino;
+
+ if (IS_RDONLY (inode)) {
+ printk("XIA-FS: write_inode on a read-only filesystem (%s %d)\n", WHERE_ERR);
+ inode->i_dirt = 0;
+ return 0;
+ }
+
+ ino = inode->i_ino;
+ if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) {
+ printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR);
+ inode->i_dirt=0;
+ return 0;
+ }
+ zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones +
+ inode->i_sb->u.xiafs_sb.s_zmap_zones +
+ (ino-1) / XIAFS_INODES_PER_Z(inode->i_sb);
+ if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) {
+ printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR);
+ inode->i_dirt=0;
+ return 0;
+ }
+ raw_inode = ((struct xiafs_inode *)bh->b_data) +
+ ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) -1));
+ raw_inode->i_mode = inode->i_mode;
+ raw_inode->i_uid = inode->i_uid;
+ raw_inode->i_gid = inode->i_gid;
+ raw_inode->i_nlinks = inode->i_nlink;
+ raw_inode->i_size = inode->i_size;
+ raw_inode->i_atime = inode->i_atime;
+ raw_inode->i_ctime = inode->i_ctime;
+ raw_inode->i_mtime = inode->i_mtime;
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ raw_inode->i_zone[0] = inode->i_rdev;
+ else {
+ XIAFS_PUT_BLOCKS(raw_inode, inode->i_blocks);
+ for (zone = 0; zone < 8; zone++)
+ raw_inode->i_zone[zone] = (raw_inode->i_zone[zone] & 0xff000000)
+ | (inode->u.xiafs_i.i_zone[zone] & 0xffffff);
+ raw_inode->i_ind_zone = (raw_inode->i_ind_zone & 0xff000000)
+ | (inode->u.xiafs_i.i_ind_zone & 0xffffff);
+ raw_inode->i_dind_zone = (raw_inode->i_dind_zone & 0xff000000)
+ | (inode->u.xiafs_i.i_dind_zone & 0xffffff);
+ }
+ inode->i_dirt=0;
+ mark_buffer_dirty(bh, 1);
+ return bh;
+}
+
+
+void xiafs_write_inode(struct inode * inode)
+{
+ struct buffer_head * bh;
+ bh = xiafs_update_inode(inode);
+ brelse (bh);
+}
+
+int xiafs_sync_inode (struct inode *inode)
+{
+ int err = 0;
+ struct buffer_head *bh;
+
+ bh = xiafs_update_inode(inode);
+ if (bh && bh->b_dirt)
+ {
+ ll_rw_block(WRITE, 1, &bh);
+ wait_on_buffer(bh);
+ if (bh->b_req && !bh->b_uptodate)
+ {
+ printk ("IO error syncing xiafs inode [%04X:%lu]\n",
+ inode->i_dev, inode->i_ino);
+ err = -1;
+ }
+ }
+ else if (!bh)
+ err = -1;
+ brelse (bh);
+ return err;
+}
diff --git a/fs/xiafs/namei.c b/fs/xiafs/namei.c
new file mode 100644
index 000000000..0532b1754
--- /dev/null
+++ b/fs/xiafs/namei.c
@@ -0,0 +1,848 @@
+/*
+ * Linux/fs/xiafs/namei.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/namei.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <linux/sched.h>
+#include <linux/xia_fs.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+
+#include "xiafs_mac.h"
+
+#define RNDUP4(x) ((3+(u_long)(x)) & ~3)
+/*
+ * ok, we cannot use strncmp, as the name is not in our data space.
+ * Thus we'll have to use xiafs_match. No big problem. Match also makes
+ * some sanity tests.
+ *
+ * NOTE! unlike strncmp, xiafs_match returns 1 for success, 0 for failure.
+ */
+static int xiafs_match(int len, const char * name, struct xiafs_direct * dep)
+{
+ int i;
+
+ if (!dep || !dep->d_ino || len > _XIAFS_NAME_LEN)
+ return 0;
+ /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
+ if (!len && (dep->d_name[0]=='.') && (dep->d_name[1]=='\0'))
+ return 1;
+ if (len != dep->d_name_len)
+ return 0;
+ for (i=0; i < len; i++)
+ if (*name++ != dep->d_name[i])
+ return 0;
+ return 1;
+}
+
+/*
+ * xiafs_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ */
+static struct buffer_head *
+xiafs_find_entry(struct inode * inode, const char * name, int namelen,
+ struct xiafs_direct ** res_dir, struct xiafs_direct ** res_pre)
+{
+ int i, zones, pos;
+ struct buffer_head * bh;
+ struct xiafs_direct * dep, * dep_pre;
+
+ *res_dir = NULL;
+ if (!inode)
+ return NULL;
+ if (namelen > _XIAFS_NAME_LEN)
+ return NULL;
+
+ if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1)) {
+ printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb);
+ for (i=0; i < zones; i++ ) {
+ bh = xiafs_bread(inode, i, 0);
+ if (!bh)
+ continue;
+ dep_pre=dep=(struct xiafs_direct *)bh->b_data;
+ if (!i && (dep->d_rec_len != 12 || !dep->d_ino ||
+ dep->d_name_len != 1 || strcmp(dep->d_name, "."))) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return NULL;
+ }
+ pos = 0;
+ while ( pos < XIAFS_ZSIZE(inode->i_sb) ) {
+ if (dep->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes ||
+ dep->d_rec_len < 12 ||
+ dep->d_rec_len+(char *)dep > bh->b_data+XIAFS_ZSIZE(inode->i_sb) ||
+ dep->d_name_len + 8 > dep->d_rec_len || dep->d_name_len <= 0 ||
+ dep->d_name[dep->d_name_len] ) {
+ brelse(bh);
+ return NULL;
+ }
+ if (xiafs_match(namelen, name, dep)) {
+ *res_dir=dep;
+ if (res_pre)
+ *res_pre=dep_pre;
+ return bh;
+ }
+ pos += dep->d_rec_len;
+ dep_pre=dep;
+ dep=(struct xiafs_direct *)(bh->b_data + pos);
+ }
+ brelse(bh);
+ if (pos > XIAFS_ZSIZE(inode->i_sb)) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+int xiafs_lookup(struct inode * dir, const char * name, int len,
+ struct inode ** result)
+{
+ int ino;
+ struct xiafs_direct * dep;
+ struct buffer_head * bh;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ if (!S_ISDIR(dir->i_mode)) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!(bh = xiafs_find_entry(dir, name, len, &dep, NULL))) {
+ iput(dir);
+ return -ENOENT;
+ }
+ ino = dep->d_ino;
+ brelse(bh);
+ if (!(*result = iget(dir->i_sb, ino))) {
+ iput(dir);
+ return -EACCES;
+ }
+ iput(dir);
+ return 0;
+}
+
+/*
+ * xiafs_add_entry()
+ *
+ * adds a file entry to the specified directory, using the same
+ * semantics as xiafs_find_entry(). It returns NULL if it failed.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static struct buffer_head * xiafs_add_entry(struct inode * dir,
+ const char * name, int namelen, struct xiafs_direct ** res_dir,
+ struct xiafs_direct ** res_pre)
+{
+ int i, pos, offset;
+ struct buffer_head * bh;
+ struct xiafs_direct * de, * de_pre;
+
+ *res_dir = NULL;
+ if (!dir || !namelen || namelen > _XIAFS_NAME_LEN)
+ return NULL;
+
+ if (dir->i_size & (XIAFS_ZSIZE(dir->i_sb) - 1)) {
+ printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ pos=0;
+ for ( ; ; ) {
+ bh = xiafs_bread(dir, pos >> XIAFS_ZSIZE_BITS(dir->i_sb), pos ? 1:0);
+ if (!bh)
+ return NULL;
+ de_pre=de=(struct xiafs_direct *)bh->b_data;
+ if (!pos) {
+ if (de->d_rec_len != 12 || !de->d_ino || de->d_name_len != 1 ||
+ strcmp(de->d_name, ".")) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return NULL;
+ }
+ offset = 12;
+ de_pre=de=(struct xiafs_direct *)(bh->b_data+12);
+ } else
+ offset = 0;
+ while (offset < XIAFS_ZSIZE(dir->i_sb)) {
+ if (pos >= dir->i_size) {
+ de->d_ino=0;
+ de->d_name_len=0;
+ de->d_name[0]=0;
+ de->d_rec_len=XIAFS_ZSIZE(dir->i_sb);
+ dir->i_size += XIAFS_ZSIZE(dir->i_sb);
+ dir->i_dirt = 1;
+ } else {
+ if (de->d_ino > dir->i_sb->u.xiafs_sb.s_ninodes ||
+ de->d_rec_len < 12 ||
+ (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(dir->i_sb) ||
+ de->d_name_len + 8 > de->d_rec_len ||
+ de->d_name[de->d_name_len]) {
+ printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return NULL;
+ }
+ if (de->d_ino &&
+ RNDUP4(de->d_name_len)+RNDUP4(namelen)+16<=de->d_rec_len) {
+ i=RNDUP4(de->d_name_len)+8;
+ de_pre=de;
+ de=(struct xiafs_direct *)(i+(u_char *)de_pre);
+ de->d_ino=0;
+ de->d_rec_len=de_pre->d_rec_len-i;
+ de_pre->d_rec_len=i;
+ }
+ }
+ if (!de->d_ino && RNDUP4(namelen)+8 <= de->d_rec_len) {
+ /*
+ * XXX all times should be set by caller upon successful
+ * completion.
+ */
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ memcpy(de->d_name, name, namelen);
+ de->d_name[namelen]=0;
+ de->d_name_len=namelen;
+ mark_buffer_dirty(bh, 1);
+ *res_dir = de;
+ if (res_pre)
+ *res_pre = de_pre;
+ return bh;
+ }
+ offset+=de->d_rec_len;
+ de_pre=de;
+ de=(struct xiafs_direct *)(bh->b_data+offset);
+ }
+ brelse(bh);
+ if (offset > XIAFS_ZSIZE(dir->i_sb)) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ return NULL;
+ }
+ pos+=XIAFS_ZSIZE(dir->i_sb);
+ }
+ return NULL;
+}
+
+int xiafs_create(struct inode * dir, const char * name, int len, int mode,
+ struct inode ** result)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct xiafs_direct * de;
+
+ *result = NULL;
+ if (!dir)
+ return -ENOENT;
+ inode = xiafs_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &xiafs_file_inode_operations;
+ inode->i_mode = mode;
+ inode->i_dirt = 1;
+ bh = xiafs_add_entry(dir, name, len, &de, NULL);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->d_ino = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ *result = inode;
+ return 0;
+}
+
+int xiafs_mknod(struct inode *dir, const char *name, int len, int mode, int rdev)
+{
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct xiafs_direct * de;
+
+ if (!dir)
+ return -ENOENT;
+ bh = xiafs_find_entry(dir,name,len,&de, NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ inode = xiafs_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_uid = current->fsuid;
+ inode->i_mode = mode;
+ inode->i_op = NULL;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &xiafs_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &xiafs_dir_inode_operations;
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ }
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &xiafs_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode))
+ inode->i_op = &chrdev_inode_operations;
+ else if (S_ISBLK(inode->i_mode))
+ inode->i_op = &blkdev_inode_operations;
+ else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ if (S_ISBLK(mode) || S_ISCHR(mode))
+ inode->i_rdev = rdev;
+ inode->i_atime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+ inode->i_dirt = 1;
+ bh = xiafs_add_entry(dir, name, len, &de, NULL);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->d_ino = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int xiafs_mkdir(struct inode * dir, const char * name, int len, int mode)
+{
+ struct inode * inode;
+ struct buffer_head * bh, *dir_block;
+ struct xiafs_direct * de;
+
+ bh = xiafs_find_entry(dir,name,len,&de, NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ if (dir->i_nlink > 64000) {
+ iput(dir);
+ return -EMLINK;
+ }
+ inode = xiafs_new_inode(dir);
+ if (!inode) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_op = &xiafs_dir_inode_operations;
+ inode->i_size = XIAFS_ZSIZE(dir->i_sb);
+ inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ dir_block = xiafs_bread(inode,0,1);
+ if (!dir_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de = (struct xiafs_direct *) dir_block->b_data;
+ de->d_ino=inode->i_ino;
+ strcpy(de->d_name,".");
+ de->d_name_len=1;
+ de->d_rec_len=12;
+ de =(struct xiafs_direct *)(12 + dir_block->b_data);
+ de->d_ino = dir->i_ino;
+ strcpy(de->d_name,"..");
+ de->d_name_len=2;
+ de->d_rec_len=XIAFS_ZSIZE(dir->i_sb)-12;
+ inode->i_nlink = 2;
+ mark_buffer_dirty(dir_block, 1);
+ brelse(dir_block);
+ inode->i_mode = S_IFDIR | (mode & S_IRWXUGO & ~current->fs->umask);
+ if (dir->i_mode & S_ISGID)
+ inode->i_mode |= S_ISGID;
+ inode->i_dirt = 1;
+ bh = xiafs_add_entry(dir, name, len, &de, NULL);
+ if (!bh) {
+ iput(dir);
+ inode->i_nlink=0;
+ iput(inode);
+ return -ENOSPC;
+ }
+ de->d_ino = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ dir->i_nlink++;
+ dir->i_dirt = 1;
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return 0;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir(struct inode * inode)
+{
+ int i, zones, offset;
+ struct buffer_head * bh;
+ struct xiafs_direct * de;
+
+ if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb)-1) ) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ return 1;
+ }
+
+ zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb);
+ for (i=0; i < zones; i++) {
+ bh = xiafs_bread(inode, i, 0);
+ if (!i) {
+ if (!bh) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ return 1;
+ }
+ de=(struct xiafs_direct *)bh->b_data;
+ if (de->d_ino != inode->i_ino || strcmp(".", de->d_name) ||
+ de->d_rec_len != 12 ) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return 1;
+ }
+ de=(struct xiafs_direct *)(12 + bh->b_data);
+ if (!de->d_ino || strcmp("..", de->d_name)) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return 1;
+ }
+ offset=de->d_rec_len+12;
+ }
+ else
+ offset = 0;
+ if (!bh)
+ continue;
+ while (offset < XIAFS_ZSIZE(inode->i_sb)) {
+ de=(struct xiafs_direct *)(bh->b_data+offset);
+ if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes ||
+ de->d_rec_len < 12 ||
+ (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(inode->i_sb) ||
+ de->d_name_len + 8 > de->d_rec_len ||
+ de->d_name[de->d_name_len]) {
+ printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR);
+ brelse(bh);
+ return 1;
+ }
+ if (de->d_ino) {
+ brelse(bh);
+ return 0;
+ }
+ offset+=de->d_rec_len;
+ }
+ brelse(bh);
+ }
+ return 1;
+}
+
+static void xiafs_rm_entry(struct xiafs_direct *de, struct xiafs_direct * de_pre)
+{
+ if (de==de_pre) {
+ de->d_ino=0;
+ return;
+ }
+ while (de_pre->d_rec_len+(u_char *)de_pre < (u_char *)de) {
+ if (de_pre->d_rec_len < 12) {
+ printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ de_pre=(struct xiafs_direct *)(de_pre->d_rec_len+(u_char *)de_pre);
+ }
+ if (de_pre->d_rec_len+(u_char *)de_pre > (u_char *)de) {
+ printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR);
+ return;
+ }
+ de_pre->d_rec_len+=de->d_rec_len;
+}
+
+int xiafs_rmdir(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct xiafs_direct * de, * de_pre;
+
+ inode = NULL;
+ bh = xiafs_find_entry(dir, name, len, &de, &de_pre);
+ retval = -ENOENT;
+ if (!bh)
+ goto end_rmdir;
+ retval = -EPERM;
+ if (!(inode = iget(dir->i_sb, de->d_ino)))
+ goto end_rmdir;
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_rmdir;
+ if (inode->i_dev != dir->i_dev)
+ goto end_rmdir;
+ if (inode == dir) /* we may not delete ".", but "../dir" is ok */
+ goto end_rmdir;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -ENOTDIR;
+ goto end_rmdir;
+ }
+ if (!empty_dir(inode)) {
+ retval = -ENOTEMPTY;
+ goto end_rmdir;
+ }
+ if (inode->i_count > 1) {
+ retval = -EBUSY;
+ goto end_rmdir;
+ }
+ if (inode->i_nlink != 2)
+ printk("XIA-FS: empty directory has nlink!=2 (%s %d)\n", WHERE_ERR);
+ xiafs_rm_entry(de, de_pre);
+ mark_buffer_dirty(bh, 1);
+ inode->i_nlink=0;
+ inode->i_dirt=1;
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt=1;
+ retval = 0;
+end_rmdir:
+ iput(dir);
+ iput(inode);
+ brelse(bh);
+ return retval;
+}
+
+int xiafs_unlink(struct inode * dir, const char * name, int len)
+{
+ int retval;
+ struct inode * inode;
+ struct buffer_head * bh;
+ struct xiafs_direct * de, * de_pre;
+
+repeat:
+ retval = -ENOENT;
+ inode = NULL;
+ bh = xiafs_find_entry(dir, name, len, &de, &de_pre);
+ if (!bh)
+ goto end_unlink;
+ if (!(inode = iget(dir->i_sb, de->d_ino)))
+ goto end_unlink;
+ retval = -EPERM;
+ if (S_ISDIR(inode->i_mode))
+ goto end_unlink;
+ if (de->d_ino != inode->i_ino) {
+ iput(inode);
+ brelse(bh);
+ current->counter = 0;
+ schedule();
+ goto repeat;
+ }
+ if ((dir->i_mode & S_ISVTX) && !fsuser() &&
+ current->fsuid != inode->i_uid &&
+ current->fsuid != dir->i_uid)
+ goto end_unlink;
+ if (!inode->i_nlink) {
+ printk("XIA-FS: Deleting nonexistent file (%s %d)\n", WHERE_ERR);
+ inode->i_nlink=1;
+ }
+ xiafs_rm_entry(de, de_pre);
+ mark_buffer_dirty(bh, 1);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ dir->i_dirt = 1;
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ retval = 0;
+end_unlink:
+ brelse(bh);
+ iput(inode);
+ iput(dir);
+ return retval;
+}
+
+int xiafs_symlink(struct inode * dir, const char * name,
+ int len, const char * symname)
+{
+ struct xiafs_direct * de;
+ struct inode * inode = NULL;
+ struct buffer_head * bh = NULL, * name_block = NULL;
+ int i;
+ char c;
+
+ bh = xiafs_find_entry(dir,name,len, &de, NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ return -EEXIST;
+ }
+ if (!(inode = xiafs_new_inode(dir))) {
+ iput(dir);
+ return -ENOSPC;
+ }
+ inode->i_mode = S_IFLNK | S_IRWXUGO;
+ inode->i_op = &xiafs_symlink_inode_operations;
+ name_block = xiafs_bread(inode,0,1);
+ if (!name_block) {
+ iput(dir);
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ return -ENOSPC;
+ }
+ for (i = 0; i < BLOCK_SIZE-1 && (c=*symname++); i++)
+ name_block->b_data[i] = c;
+ name_block->b_data[i] = 0;
+ mark_buffer_dirty(name_block, 1);
+ brelse(name_block);
+ inode->i_size = i;
+ inode->i_dirt = 1;
+ bh = xiafs_add_entry(dir, name, len, &de, NULL);
+ if (!bh) {
+ inode->i_nlink--;
+ inode->i_dirt = 1;
+ iput(inode);
+ iput(dir);
+ return -ENOSPC;
+ }
+ de->d_ino = inode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ iput(inode);
+ return 0;
+}
+
+int xiafs_link(struct inode * oldinode, struct inode * dir,
+ const char * name, int len)
+{
+ struct xiafs_direct * de;
+ struct buffer_head * bh;
+
+ if (S_ISDIR(oldinode->i_mode)) {
+ iput(oldinode);
+ iput(dir);
+ return -EPERM;
+ }
+ if (oldinode->i_nlink > 64000) {
+ iput(oldinode);
+ iput(dir);
+ return -EMLINK;
+ }
+ bh = xiafs_find_entry(dir, name, len, &de, NULL);
+ if (bh) {
+ brelse(bh);
+ iput(dir);
+ iput(oldinode);
+ return -EEXIST;
+ }
+ bh = xiafs_add_entry(dir, name, len, &de, NULL);
+ if (!bh) {
+ iput(dir);
+ iput(oldinode);
+ return -ENOSPC;
+ }
+ de->d_ino = oldinode->i_ino;
+ mark_buffer_dirty(bh, 1);
+ brelse(bh);
+ iput(dir);
+ oldinode->i_nlink++;
+ oldinode->i_ctime = CURRENT_TIME;
+ oldinode->i_dirt = 1;
+ iput(oldinode);
+ return 0;
+}
+
+static int subdir(struct inode * new_inode, struct inode * old_inode)
+{
+ int ino;
+ int result;
+
+ new_inode->i_count++;
+ result = 0;
+ for (;;) {
+ if (new_inode == old_inode) {
+ result = 1;
+ break;
+ }
+ if (new_inode->i_dev != old_inode->i_dev)
+ break;
+ ino = new_inode->i_ino;
+ if (xiafs_lookup(new_inode,"..",2,&new_inode))
+ break;
+ if (new_inode->i_ino == ino)
+ break;
+ }
+ iput(new_inode);
+ return result;
+}
+
+#define PARENT_INO(buffer) \
+ (((struct xiafs_direct *) ((u_char *)(buffer) + 12))->d_ino)
+
+/*
+ * rename uses retry to avoid race-conditions: at least they should be minimal.
+ * it tries to allocate all the blocks, then sanity-checks, and if the sanity-
+ * checks fail, it tries to restart itself again. Very practical - no changes
+ * are done until we know everything works ok.. and then all the changes can be
+ * done in one fell swoop when we have claimed all the buffers needed.
+ *
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int do_xiafs_rename(struct inode * old_dir, const char * old_name,
+ int old_len, struct inode * new_dir,
+ const char * new_name, int new_len)
+{
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct xiafs_direct * old_de, * old_de_pre, * new_de, * new_de_pre;
+ int retval;
+
+try_again:
+ old_inode = new_inode = NULL;
+ old_bh = new_bh = dir_bh = NULL;
+ old_bh = xiafs_find_entry(old_dir, old_name, old_len, &old_de, &old_de_pre);
+ retval = -ENOENT;
+ if (!old_bh)
+ goto end_rename;
+ old_inode = __iget(old_dir->i_sb, old_de->d_ino, 0); /* don't cross mnt-points */
+ if (!old_inode)
+ goto end_rename;
+ retval = -EPERM;
+ if ((old_dir->i_mode & S_ISVTX) &&
+ current->fsuid != old_inode->i_uid &&
+ current->fsuid != old_dir->i_uid && !fsuser())
+ goto end_rename;
+ new_bh = xiafs_find_entry(new_dir, new_name, new_len, &new_de, NULL);
+ if (new_bh) {
+ new_inode = __iget(new_dir->i_sb, new_de->d_ino, 0);
+ if (!new_inode) {
+ brelse(new_bh);
+ new_bh = NULL;
+ }
+ }
+ if (new_inode == old_inode) {
+ retval = 0;
+ goto end_rename;
+ }
+ if (new_inode && S_ISDIR(new_inode->i_mode)) {
+ retval = -EEXIST;
+ goto end_rename;
+ }
+ retval = -EPERM;
+ if (new_inode && (new_dir->i_mode & S_ISVTX) &&
+ current->fsuid != new_inode->i_uid &&
+ current->fsuid != new_dir->i_uid && !fsuser())
+ goto end_rename;
+ if (S_ISDIR(old_inode->i_mode)) {
+ retval = -EEXIST;
+ if (new_bh)
+ goto end_rename;
+ retval = -EACCES;
+ if (!permission(old_inode, MAY_WRITE))
+ goto end_rename;
+ retval = -EINVAL;
+ if (subdir(new_dir, old_inode))
+ goto end_rename;
+ retval = -EIO;
+ dir_bh = xiafs_bread(old_inode,0,0);
+ if (!dir_bh)
+ goto end_rename;
+ if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+ if (new_dir->i_nlink > 64000)
+ goto end_rename;
+ }
+ if (!new_bh)
+ new_bh = xiafs_add_entry(new_dir, new_name, new_len, &new_de, &new_de_pre);
+ retval = -ENOSPC;
+ if (!new_bh)
+ goto end_rename;
+ /* sanity checking */
+ if ( (new_inode && (new_de->d_ino != new_inode->i_ino))
+ || (new_de->d_ino && !new_inode)
+ || (old_de->d_ino != old_inode->i_ino)) {
+ xiafs_rm_entry(new_de, new_de_pre);
+ brelse(old_bh);
+ brelse(new_bh);
+ brelse(dir_bh);
+ iput(old_inode);
+ iput(new_inode);
+ current->counter=0;
+ schedule();
+ goto try_again;
+ }
+ xiafs_rm_entry(old_de, old_de_pre);
+ new_de->d_ino = old_inode->i_ino;
+ if (new_inode) {
+ new_inode->i_nlink--;
+ new_inode->i_dirt = 1;
+ }
+ mark_buffer_dirty(old_bh, 1);
+ mark_buffer_dirty(new_bh, 1);
+ if (dir_bh) {
+ PARENT_INO(dir_bh->b_data) = new_dir->i_ino;
+ mark_buffer_dirty(dir_bh, 1);
+ old_dir->i_nlink--;
+ new_dir->i_nlink++;
+ old_dir->i_dirt = 1;
+ new_dir->i_dirt = 1;
+ }
+ retval = 0;
+end_rename:
+ brelse(dir_bh);
+ brelse(old_bh);
+ brelse(new_bh);
+ iput(old_inode);
+ iput(new_inode);
+ iput(old_dir);
+ iput(new_dir);
+ return retval;
+}
+
+/*
+ * Ok, rename also locks out other renames, as they can change the parent of
+ * a directory, and we don't want any races. Other races are checked for by
+ * "do_rename()", which restarts if there are inconsistencies.
+ *
+ * Note that there is no race between different filesystems: it's only within
+ * the same device that races occur: many renames can happen at once, as long
+ * as they are on different partitions.
+ */
+int xiafs_rename(struct inode * old_dir, const char * old_name, int old_len,
+ struct inode * new_dir, const char * new_name, int new_len)
+{
+ static struct wait_queue * wait = NULL;
+ static int lock = 0;
+ int result;
+
+ while (lock)
+ sleep_on(&wait);
+ lock = 1;
+ result = do_xiafs_rename(old_dir, old_name, old_len,
+ new_dir, new_name, new_len);
+ lock = 0;
+ wake_up(&wait);
+ return result;
+}
diff --git a/fs/xiafs/symlink.c b/fs/xiafs/symlink.c
new file mode 100644
index 000000000..757ad5796
--- /dev/null
+++ b/fs/xiafs/symlink.c
@@ -0,0 +1,118 @@
+/*
+ * linux/fs/xiafs/symlink.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/symlink.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <asm/segment.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/xia_fs.h>
+#include <linux/stat.h>
+
+static int
+xiafs_readlink(struct inode *, char *, int);
+
+static int
+xiafs_follow_link(struct inode *, struct inode *, int, int, struct inode **);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations xiafs_symlink_inode_operations = {
+ NULL, /* no file-operations */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ xiafs_readlink, /* readlink */
+ xiafs_follow_link, /* follow_link */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static int xiafs_readlink(struct inode * inode, char * buffer, int buflen)
+{
+ struct buffer_head * bh;
+ int i;
+ char c;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(inode);
+ return -EINVAL;
+ }
+ if (buflen > BLOCK_SIZE)
+ buflen = BLOCK_SIZE;
+ bh = xiafs_bread(inode, 0, 0);
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime=CURRENT_TIME;
+ inode->i_dirt=1;
+ }
+ iput(inode);
+ if (!bh)
+ return 0;
+ for (i=0; i < buflen && (c=bh->b_data[i]); i++)
+ put_fs_byte(c, buffer++);
+ if (i < buflen-1)
+ put_fs_byte((char)0, buffer);
+ brelse(bh);
+ return i;
+}
+
+static int xiafs_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
+{
+ int error;
+ struct buffer_head * bh;
+
+ *res_inode = NULL;
+ if (!dir) {
+ dir = current->fs->root;
+ dir->i_count++;
+ }
+ if (!inode) {
+ iput(dir);
+ return -ENOENT;
+ }
+ if (!S_ISLNK(inode->i_mode)) {
+ iput(dir);
+ *res_inode = inode;
+ return 0;
+ }
+ if (!IS_RDONLY (inode)) {
+ inode->i_atime=CURRENT_TIME;
+ inode->i_dirt=1;
+ }
+ if (current->link_count > 5) {
+ iput(inode);
+ iput(dir);
+ return -ELOOP;
+ }
+ if (!(bh = xiafs_bread(inode, 0, 0))) {
+ iput(inode);
+ iput(dir);
+ return -EIO;
+ }
+ iput(inode);
+ current->link_count++;
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
+ current->link_count--;
+ brelse(bh);
+ return error;
+}
+
+
+
diff --git a/fs/xiafs/truncate.c b/fs/xiafs/truncate.c
new file mode 100644
index 000000000..bdb9d39be
--- /dev/null
+++ b/fs/xiafs/truncate.c
@@ -0,0 +1,197 @@
+/*
+ * linux/fs/xiafs/truncate.c
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ *
+ * Based on Linus' minix/truncate.c
+ * Copyright (C) Linus Torvalds, 1991, 1992.
+ *
+ * This software may be redistributed per Linux Copyright.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/xia_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+#include "xiafs_mac.h"
+
+/*
+ * Linus' comment:
+ *
+ * Truncate has the most races in the whole filesystem: coding it is
+ * a pain in the a**. Especially as I don't do any locking...
+ *
+ * The code may look a bit weird, but that's just because I've tried to
+ * handle things like file-size changes in a somewhat graceful manner.
+ * Anyway, truncating a file at the same time somebody else writes to it
+ * is likely to result in pretty weird behaviour...
+ *
+ * The new code handles normal truncates (size = 0) as well as the more
+ * general case (size = XXX). I hope.
+ */
+
+#define DT_ZONE ((inode->i_size + XIAFS_ZSIZE(inode->i_sb) - 1) \
+ >> XIAFS_ZSIZE_BITS(inode->i_sb) )
+
+static int trunc_direct(struct inode * inode)
+{
+ u_long * lp;
+ struct buffer_head * bh;
+ int i, tmp;
+ int retry = 0;
+
+repeat:
+ for (i = DT_ZONE ; i < 8 ; i++) {
+ if (i < DT_ZONE)
+ goto repeat;
+ lp=i + inode->u.xiafs_i.i_zone;
+ if (!(tmp = *lp))
+ continue;
+ bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (i < DT_ZONE) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *lp)
+ retry = 1;
+ else {
+ *lp = 0;
+ inode->i_dirt = 1;
+ inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb);
+ xiafs_free_zone(inode->i_sb, tmp);
+ }
+ brelse(bh);
+ }
+ return retry;
+}
+
+static int trunc_indirect(struct inode * inode, int addr_off, u_long * lp)
+{
+
+#define INDT_ZONE (DT_ZONE - addr_off)
+
+ struct buffer_head * bh, * ind_bh;
+ int i, tmp;
+ u_long * indp;
+ int retry = 0;
+
+ if ( !(tmp=*lp) )
+ return 0;
+ ind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (tmp != *lp) {
+ brelse(ind_bh);
+ return 1;
+ }
+ if (!ind_bh) {
+ *lp = 0;
+ return 0;
+ }
+repeat:
+ for (i = INDT_ZONE<0?0:INDT_ZONE; i < XIAFS_ADDRS_PER_Z(inode->i_sb); i++) {
+ if (i < INDT_ZONE)
+ goto repeat;
+ indp = i+(u_long *) ind_bh->b_data;
+ if (!(tmp=*indp))
+ continue;
+ bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (i < INDT_ZONE) {
+ brelse(bh);
+ goto repeat;
+ }
+ if ((bh && bh->b_count != 1) || tmp != *indp)
+ retry = 1;
+ else {
+ *indp = 0;
+ mark_buffer_dirty(ind_bh, 1);
+ inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb);
+ xiafs_free_zone(inode->i_sb, tmp);
+ }
+ brelse(bh);
+ }
+ indp = (u_long *) ind_bh->b_data;
+ for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*indp++); i++) ;
+ if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ if (ind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *lp;
+ *lp = 0;
+ inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb);
+ xiafs_free_zone(inode->i_sb, tmp);
+ }
+ }
+ brelse(ind_bh);
+ return retry;
+}
+
+static int trunc_dindirect(struct inode * inode)
+{
+
+#define DINDT_ZONE \
+ ((DT_ZONE-XIAFS_ADDRS_PER_Z(inode->i_sb)-8)>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb))
+
+ int i, tmp;
+ struct buffer_head * dind_bh;
+ u_long * dindp, * lp;
+ int retry = 0;
+
+ lp = &(inode->u.xiafs_i.i_dind_zone);
+ if (!(tmp = *lp))
+ return 0;
+ dind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb));
+ if (tmp != *lp) {
+ brelse(dind_bh);
+ return 1;
+ }
+ if (!dind_bh) {
+ *lp = 0;
+ return 0;
+ }
+repeat:
+ for (i=DINDT_ZONE<0?0:DINDT_ZONE ; i < XIAFS_ADDRS_PER_Z(inode->i_sb) ; i ++) {
+ if (i < DINDT_ZONE)
+ goto repeat;
+ dindp = i+(u_long *) dind_bh->b_data;
+ retry |= trunc_indirect(inode,
+ 8+((1+i)<<XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)),
+ dindp);
+ mark_buffer_dirty(dind_bh, 1);
+ }
+ dindp = (u_long *) dind_bh->b_data;
+ for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*dindp++); i++);
+ if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) {
+ if (dind_bh->b_count != 1)
+ retry = 1;
+ else {
+ tmp = *lp;
+ *lp = 0;
+ inode->i_dirt = 1;
+ inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb);
+ xiafs_free_zone(inode->i_sb, tmp);
+ }
+ }
+ brelse(dind_bh);
+ return retry;
+}
+
+void xiafs_truncate(struct inode * inode)
+{
+ int retry;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ while (1) {
+ retry = trunc_direct(inode);
+ retry |= trunc_indirect(inode, 8, &(inode->u.xiafs_i.i_ind_zone));
+ retry |= trunc_dindirect(inode);
+ if (!retry)
+ break;
+ current->counter = 0;
+ schedule();
+ }
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ inode->i_dirt = 1;
+}
diff --git a/fs/xiafs/xiafs_mac.h b/fs/xiafs/xiafs_mac.h
new file mode 100644
index 000000000..05af6e42a
--- /dev/null
+++ b/fs/xiafs/xiafs_mac.h
@@ -0,0 +1,32 @@
+/*
+ * linux/fs/xiafs/xiafs_mac.h
+ *
+ * Copyright (C) Q. Frank Xia, 1993.
+ */
+
+extern char internal_error_message[];
+#define INTERN_ERR internal_error_message, __FILE__, __LINE__
+#define WHERE_ERR __FILE__, __LINE__
+
+#define XIAFS_ZSHIFT(sp) ((sp)->u.xiafs_sb.s_zone_shift)
+#define XIAFS_ZSIZE(sp) (BLOCK_SIZE << XIAFS_ZSHIFT(sp))
+#define XIAFS_ZSIZE_BITS(sp) (BLOCK_SIZE_BITS + XIAFS_ZSHIFT(sp))
+#define XIAFS_ADDRS_PER_Z(sp) (BLOCK_SIZE >> (2 - XIAFS_ZSHIFT(sp)))
+#define XIAFS_ADDRS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS - 2 + XIAFS_ZSHIFT(sp))
+#define XIAFS_BITS_PER_Z(sp) (BLOCK_SIZE << (3 + XIAFS_ZSHIFT(sp)))
+#define XIAFS_BITS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS + 3 + XIAFS_ZSHIFT(sp))
+#define XIAFS_INODES_PER_Z(sp) (_XIAFS_INODES_PER_BLOCK << XIAFS_ZSHIFT(sp))
+
+/* Use the most significant bytes of zone pointers to store block counter. */
+/* This is ugly, but it works. Note, We have another 7 bytes for "expansion". */
+
+#define XIAFS_GET_BLOCKS(row_ip, blocks) \
+ blocks=((((row_ip)->i_zone[0] >> 24) & 0xff )|\
+ (((row_ip)->i_zone[1] >> 16) & 0xff00 )|\
+ (((row_ip)->i_zone[2] >> 8) & 0xff0000 ) )
+
+/* XIAFS_PUT_BLOCKS should be called before saving zone pointers */
+#define XIAFS_PUT_BLOCKS(row_ip, blocks) \
+ (row_ip)->i_zone[2]=((blocks)<< 8) & 0xff000000;\
+ (row_ip)->i_zone[1]=((blocks)<<16) & 0xff000000;\
+ (row_ip)->i_zone[0]=((blocks)<<24) & 0xff000000