diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1994-11-28 11:59:19 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1994-11-28 11:59:19 +0000 |
commit | 1513ff9b7899ab588401c89db0e99903dbf5f886 (patch) | |
tree | f69cc81a940a502ea23d664c3ffb2d215a479667 /fs |
Import of Linus's Linux 1.1.68
Diffstat (limited to 'fs')
130 files changed, 42011 insertions, 0 deletions
diff --git a/fs/ChangeLog b/fs/ChangeLog new file mode 100644 index 000000000..056d07a86 --- /dev/null +++ b/fs/ChangeLog @@ -0,0 +1,10 @@ +Mon Oct 24 23:27:42 1994 Theodore Y. Ts'o (tytso@rt-11) + + * fcntl.c (sys_fcntl): Liberalize security checks which Alan Cox + put in. + +Thu Oct 20 23:44:22 1994 Theodore Y. Ts'o (tytso@rt-11) + + * fcntl.c (sys_fcntl): Add more of a security check to the + F_SETOWN fcntl(). + diff --git a/fs/Makefile b/fs/Makefile new file mode 100644 index 000000000..78dd720ce --- /dev/null +++ b/fs/Makefile @@ -0,0 +1,116 @@ +# +# Makefile for the linux filesystem. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +SUBDIRS = minix ext ext2 msdos proc isofs nfs xiafs umsdos hpfs sysv + +ifdef CONFIG_MINIX_FS +FS_SUBDIRS := $(FS_SUBDIRS) minix +endif +ifdef CONFIG_EXT_FS +FS_SUBDIRS := $(FS_SUBDIRS) ext +endif +ifdef CONFIG_EXT2_FS +FS_SUBDIRS := $(FS_SUBDIRS) ext2 +endif +ifdef CONFIG_MSDOS_FS +FS_SUBDIRS := $(FS_SUBDIRS) msdos +else +MODULE_FS_SUBDIRS := $(MODULE_FS_SUBDIRS) msdos +endif +ifdef CONFIG_PROC_FS +FS_SUBDIRS := $(FS_SUBDIRS) proc +endif +ifdef CONFIG_ISO9660_FS +FS_SUBDIRS := $(FS_SUBDIRS) isofs +endif +ifdef CONFIG_NFS_FS +FS_SUBDIRS := $(FS_SUBDIRS) nfs +endif +ifdef CONFIG_XIA_FS +FS_SUBDIRS := $(FS_SUBDIRS) xiafs +endif +ifdef CONFIG_UMSDOS_FS +FS_SUBDIRS := $(FS_SUBDIRS) umsdos +else +MODULE_FS_SUBDIRS := $(MODULE_FS_SUBDIRS) umsdos +endif +ifdef CONFIG_SYSV_FS +FS_SUBDIRS := $(FS_SUBDIRS) sysv +endif +ifdef CONFIG_HPFS_FS +FS_SUBDIRS := $(FS_SUBDIRS) hpfs +endif + +ifdef CONFIG_BINFMT_ELF +BINFMTS := $(BINFMTS) binfmt_elf.o +else +MODULES := $(MODULES) binfmt_elf.o +endif + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= open.o read_write.o inode.o devices.o file_table.o buffer.o super.o \ + block_dev.o stat.o exec.o pipe.o namei.o fcntl.o ioctl.o \ + select.o fifo.o locks.o filesystems.o dcache.o $(BINFMTS) + +all: fs.o filesystems.a modules modules_fs + +fs.o: $(OBJS) + $(LD) -r -o fs.o $(OBJS) + +filesystems.a: dummy + rm -f filesystems.a + set -e; for i in $(FS_SUBDIRS); do \ + test ! -d $$i || \ + { $(MAKE) -C $$i; $(AR) rcs filesystems.a $$i/$$i.o; }; done + +ifdef MODULES + +modules: + $(MAKE) CFLAGS="$(CFLAGS) -DMODULE" $(MODULES) + (cd ../modules;for i in $(MODULES); do ln -sf ../fs/$$i .; done) + +else + +modules: + +endif + +ifdef MODULE_FS_SUBDIRS + +modules_fs: + set -e; for i in $(MODULE_FS_SUBDIRS); do \ + test ! -d $$i || \ + { $(MAKE) -C $$i; }; done + + +else + +modules_fs: + +endif + +depend dep: + $(CPP) -M *.c > .depend + set -e; for i in $(SUBDIRS); do \ + test ! -d $$i || $(MAKE) -C $$i dep; done + +dummy: + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c new file mode 100644 index 000000000..c2dc5cbca --- /dev/null +++ b/fs/binfmt_elf.c @@ -0,0 +1,791 @@ +/* + * linux/fs/binfmt_elf.c + * + * These are the functions used to load ELF format executables as used + * on SVr4 machines. Information on the format may be found in the book + * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support + * Tools". + * + * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). + */ +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/a.out.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/binfmts.h> +#include <linux/string.h> +#include <linux/fcntl.h> +#include <linux/ptrace.h> +#include <linux/malloc.h> +#include <linux/shm.h> +#include <linux/personality.h> + +#include <asm/segment.h> + +#include <linux/config.h> + +#ifndef CONFIG_BINFMT_ELF +#include <linux/module.h> +#include "../tools/version.h" +#endif + +#include <linux/unistd.h> +typedef int (*sysfun_p)(); +extern sysfun_p sys_call_table[]; +#define SYS(name) (sys_call_table[__NR_##name]) + +#define DLINFO_ITEMS 8 + +#include <linux/elf.h> + +static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); +static int load_elf_library(int fd); + +struct linux_binfmt elf_format = { +#ifdef CONFIG_BINFMT_ELF + NULL, NULL, load_elf_binary, load_elf_library, NULL +#else + NULL, &mod_use_count_, load_elf_binary, load_elf_library, NULL +#endif +}; + +/* We need to explicitly zero any fractional pages + after the data section (i.e. bss). This would + contain the junk from the file that should not + be in memory */ + + +static void padzero(int elf_bss){ + unsigned int fpnt, nbyte; + + if(elf_bss & 0xfff) { + + nbyte = (PAGE_SIZE - (elf_bss & 0xfff)) & 0xfff; + if(nbyte){ + verify_area(VERIFY_WRITE, (void *) elf_bss, nbyte); + + fpnt = elf_bss; + while(fpnt & 0xfff) put_fs_byte(0, fpnt++); + }; + }; +} + +unsigned long * create_elf_tables(char * p,int argc,int envc,struct elfhdr * exec, unsigned int load_addr, int ibcs) +{ + unsigned long *argv,*envp, *dlinfo; + unsigned long * sp; + struct vm_area_struct *mpnt; + + mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL); + if (mpnt) { + mpnt->vm_task = current; + mpnt->vm_start = PAGE_MASK & (unsigned long) p; + mpnt->vm_end = TASK_SIZE; + mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY; +#ifdef VM_STACK_FLAGS + mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_pte = 0; +#else +# ifdef VM_GROWSDOWN + mpnt->vm_flags = VM_GROWSDOWN; +# endif +#endif + mpnt->vm_share = NULL; + mpnt->vm_inode = NULL; + mpnt->vm_offset = 0; + mpnt->vm_ops = NULL; + insert_vm_struct(current, mpnt); +#ifndef VM_GROWSDOWN + current->mm->stk_vma = mpnt; +#endif + + } + sp = (unsigned long *) (0xfffffffc & (unsigned long) p); + if(exec) sp -= DLINFO_ITEMS*2; + dlinfo = sp; + sp -= envc+1; + envp = sp; + sp -= argc+1; + argv = sp; + if (!ibcs) { + put_fs_long((unsigned long)envp,--sp); + put_fs_long((unsigned long)argv,--sp); + } + + /* The constant numbers (0-9) that we are writing here are + described in the header file sys/auxv.h on at least + some versions of SVr4 */ + if(exec) { /* Put this here for an ELF program interpreter */ + struct elf_phdr * eppnt; + eppnt = (struct elf_phdr *) exec->e_phoff; + put_fs_long(3,dlinfo++); put_fs_long(load_addr + exec->e_phoff,dlinfo++); + put_fs_long(4,dlinfo++); put_fs_long(sizeof(struct elf_phdr),dlinfo++); + put_fs_long(5,dlinfo++); put_fs_long(exec->e_phnum,dlinfo++); + put_fs_long(9,dlinfo++); put_fs_long((unsigned long) exec->e_entry,dlinfo++); + put_fs_long(7,dlinfo++); put_fs_long(SHM_RANGE_START,dlinfo++); + put_fs_long(8,dlinfo++); put_fs_long(0,dlinfo++); + put_fs_long(6,dlinfo++); put_fs_long(PAGE_SIZE,dlinfo++); + put_fs_long(0,dlinfo++); put_fs_long(0,dlinfo++); + }; + + put_fs_long((unsigned long)argc,--sp); + current->mm->arg_start = (unsigned long) p; + while (argc-->0) { + put_fs_long((unsigned long) p,argv++); + while (get_fs_byte(p++)) /* nothing */ ; + } + put_fs_long(0,argv); + current->mm->arg_end = current->mm->env_start = (unsigned long) p; + while (envc-->0) { + put_fs_long((unsigned long) p,envp++); + while (get_fs_byte(p++)) /* nothing */ ; + } + put_fs_long(0,envp); + current->mm->env_end = (unsigned long) p; + return sp; +} + + +/* This is much more generalized than the library routine read function, + so we keep this separate. Technically the library read function + is only provided so that we can read a.out libraries that have + an ELF header */ + +static unsigned int load_elf_interp(struct elfhdr * interp_elf_ex, + struct inode * interpreter_inode) +{ + struct file * file; + struct elf_phdr *elf_phdata = NULL; + struct elf_phdr *eppnt; + unsigned int len; + unsigned int load_addr; + int elf_exec_fileno; + int elf_bss; + int old_fs, retval; + unsigned int last_bss; + int error; + int i, k; + + elf_bss = 0; + last_bss = 0; + error = load_addr = 0; + + /* First of all, some simple consistency checks */ + if((interp_elf_ex->e_type != ET_EXEC && + interp_elf_ex->e_type != ET_DYN) || + (interp_elf_ex->e_machine != EM_386 && interp_elf_ex->e_machine != EM_486) || + (!interpreter_inode->i_op || + !interpreter_inode->i_op->default_file_ops->mmap)){ + return 0xffffffff; + }; + + /* Now read in all of the header information */ + + if(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > PAGE_SIZE) + return 0xffffffff; + + elf_phdata = (struct elf_phdr *) + kmalloc(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum, GFP_KERNEL); + if(!elf_phdata) return 0xffffffff; + + old_fs = get_fs(); + set_fs(get_ds()); + retval = read_exec(interpreter_inode, interp_elf_ex->e_phoff, (char *) elf_phdata, + sizeof(struct elf_phdr) * interp_elf_ex->e_phnum); + set_fs(old_fs); + + elf_exec_fileno = open_inode(interpreter_inode, O_RDONLY); + if (elf_exec_fileno < 0) return 0xffffffff; + file = current->files->fd[elf_exec_fileno]; + + eppnt = elf_phdata; + for(i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) + if(eppnt->p_type == PT_LOAD) { + error = do_mmap(file, + eppnt->p_vaddr & 0xfffff000, + eppnt->p_filesz + (eppnt->p_vaddr & 0xfff), + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_DENYWRITE | (interp_elf_ex->e_type == ET_EXEC ? MAP_FIXED : 0), + eppnt->p_offset & 0xfffff000); + + if(!load_addr && interp_elf_ex->e_type == ET_DYN) + load_addr = error; + k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; + if(k > elf_bss) elf_bss = k; + if(error < 0 && error > -1024) break; /* Real error */ + k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; + if(k > last_bss) last_bss = k; + } + + /* Now use mmap to map the library into memory. */ + + + SYS(close)(elf_exec_fileno); + if(error < 0 && error > -1024) { + kfree(elf_phdata); + return 0xffffffff; + } + + padzero(elf_bss); + len = (elf_bss + 0xfff) & 0xfffff000; /* What we have mapped so far */ + + /* Map the last of the bss segment */ + if (last_bss > len) + do_mmap(NULL, len, last_bss-len, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + kfree(elf_phdata); + + return ((unsigned int) interp_elf_ex->e_entry) + load_addr; +} + +static unsigned int load_aout_interp(struct exec * interp_ex, + struct inode * interpreter_inode) +{ + int retval; + unsigned int elf_entry; + + current->mm->brk = interp_ex->a_bss + + (current->mm->end_data = interp_ex->a_data + + (current->mm->end_code = interp_ex->a_text)); + elf_entry = interp_ex->a_entry; + + + if (N_MAGIC(*interp_ex) == OMAGIC) { + do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + retval = read_exec(interpreter_inode, 32, (char *) 0, + interp_ex->a_text+interp_ex->a_data); + } else if (N_MAGIC(*interp_ex) == ZMAGIC || N_MAGIC(*interp_ex) == QMAGIC) { + do_mmap(NULL, 0, interp_ex->a_text+interp_ex->a_data, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + retval = read_exec(interpreter_inode, + N_TXTOFF(*interp_ex) , + (char *) N_TXTADDR(*interp_ex), + interp_ex->a_text+interp_ex->a_data); + } else + retval = -1; + + if(retval >= 0) + do_mmap(NULL, (interp_ex->a_text + interp_ex->a_data + 0xfff) & + 0xfffff000, interp_ex->a_bss, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + if(retval < 0) return 0xffffffff; + return elf_entry; +} + +/* + * These are the functions used to load ELF style executables and shared + * libraries. There is no binary dependent code anywhere else. + */ + +#define INTERPRETER_NONE 0 +#define INTERPRETER_AOUT 1 +#define INTERPRETER_ELF 2 + + +static int +load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) +{ + struct elfhdr elf_ex; + struct elfhdr interp_elf_ex; + struct file * file; + struct exec interp_ex; + struct inode *interpreter_inode; + unsigned int load_addr; + unsigned int interpreter_type = INTERPRETER_NONE; + unsigned char ibcs2_interpreter; + int i; + int old_fs; + int error; + struct elf_phdr * elf_ppnt, *elf_phdata; + int elf_exec_fileno; + unsigned int elf_bss, k, elf_brk; + int retval; + char * elf_interpreter; + unsigned int elf_entry; + int status; + unsigned int start_code, end_code, end_data; + unsigned int elf_stack; + char passed_fileno[6]; + +#ifndef CONFIG_BINFMT_ELF + MOD_INC_USE_COUNT; +#endif + + ibcs2_interpreter = 0; + status = 0; + load_addr = 0; + elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */ + + if (elf_ex.e_ident[0] != 0x7f || + strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) { +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + } + + + /* First of all, some simple consistency checks */ + if(elf_ex.e_type != ET_EXEC || + (elf_ex.e_machine != EM_386 && elf_ex.e_machine != EM_486) || + (!bprm->inode->i_op || !bprm->inode->i_op->default_file_ops || + !bprm->inode->i_op->default_file_ops->mmap)){ +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + }; + + /* Now read in all of the header information */ + + elf_phdata = (struct elf_phdr *) kmalloc(elf_ex.e_phentsize * + elf_ex.e_phnum, GFP_KERNEL); + + old_fs = get_fs(); + set_fs(get_ds()); + retval = read_exec(bprm->inode, elf_ex.e_phoff, (char *) elf_phdata, + elf_ex.e_phentsize * elf_ex.e_phnum); + set_fs(old_fs); + if (retval < 0) { + kfree (elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return retval; + } + + elf_ppnt = elf_phdata; + + elf_bss = 0; + elf_brk = 0; + + elf_exec_fileno = open_inode(bprm->inode, O_RDONLY); + + if (elf_exec_fileno < 0) { + kfree (elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return elf_exec_fileno; + } + + file = current->files->fd[elf_exec_fileno]; + + elf_stack = 0xffffffff; + elf_interpreter = NULL; + start_code = 0; + end_code = 0; + end_data = 0; + + old_fs = get_fs(); + set_fs(get_ds()); + + for(i=0;i < elf_ex.e_phnum; i++){ + if(elf_ppnt->p_type == PT_INTERP) { + /* This is the program interpreter used for shared libraries - + for now assume that this is an a.out format binary */ + + elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, + GFP_KERNEL); + + retval = read_exec(bprm->inode,elf_ppnt->p_offset,elf_interpreter, + elf_ppnt->p_filesz); + /* If the program interpreter is one of these two, + then assume an iBCS2 image. Otherwise assume + a native linux image. */ + if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 || + strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0) + ibcs2_interpreter = 1; +#if 0 + printk("Using ELF interpreter %s\n", elf_interpreter); +#endif + if(retval >= 0) + retval = namei(elf_interpreter, &interpreter_inode); + if(retval >= 0) + retval = read_exec(interpreter_inode,0,bprm->buf,128); + + if(retval >= 0){ + interp_ex = *((struct exec *) bprm->buf); /* exec-header */ + interp_elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */ + + }; + if(retval < 0) { + kfree (elf_phdata); + kfree(elf_interpreter); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return retval; + }; + }; + elf_ppnt++; + }; + + set_fs(old_fs); + + /* Some simple consistency checks for the interpreter */ + if(elf_interpreter){ + interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; + if(retval < 0) { + kfree(elf_interpreter); + kfree(elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ELIBACC; + }; + /* Now figure out which format our binary is */ + if((N_MAGIC(interp_ex) != OMAGIC) && + (N_MAGIC(interp_ex) != ZMAGIC) && + (N_MAGIC(interp_ex) != QMAGIC)) + interpreter_type = INTERPRETER_ELF; + + if (interp_elf_ex.e_ident[0] != 0x7f || + strncmp(&interp_elf_ex.e_ident[1], "ELF",3) != 0) + interpreter_type &= ~INTERPRETER_ELF; + + if(!interpreter_type) + { + kfree(elf_interpreter); + kfree(elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ELIBBAD; + }; + } + + /* OK, we are done with that, now set up the arg stuff, + and then start this sucker up */ + + if (!bprm->sh_bang) { + char * passed_p; + + if(interpreter_type == INTERPRETER_AOUT) { + sprintf(passed_fileno, "%d", elf_exec_fileno); + passed_p = passed_fileno; + + if(elf_interpreter) { + bprm->p = copy_strings(1,&passed_p,bprm->page,bprm->p,2); + bprm->argc++; + }; + }; + if (!bprm->p) { + if(elf_interpreter) { + kfree(elf_interpreter); + } + kfree (elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -E2BIG; + } + } + + /* OK, This is the point of no return */ + flush_old_exec(bprm); + + current->mm->end_data = 0; + current->mm->end_code = 0; + current->mm->start_mmap = ELF_START_MMAP; + current->mm->mmap = NULL; + elf_entry = (unsigned int) elf_ex.e_entry; + + /* Do this so that we can load the interpreter, if need be. We will + change some of these later */ + current->mm->rss = 0; + bprm->p += change_ldt(0, bprm->page); + current->mm->start_stack = bprm->p; + + /* Now we do a little grungy work by mmaping the ELF image into + the correct location in memory. At this point, we assume that + the image should be loaded at fixed address, not at a variable + address. */ + + old_fs = get_fs(); + set_fs(get_ds()); + + elf_ppnt = elf_phdata; + for(i=0;i < elf_ex.e_phnum; i++){ + + if(elf_ppnt->p_type == PT_INTERP) { + /* Set these up so that we are able to load the interpreter */ + /* Now load the interpreter into user address space */ + set_fs(old_fs); + + if(interpreter_type & 1) elf_entry = + load_aout_interp(&interp_ex, interpreter_inode); + + if(interpreter_type & 2) elf_entry = + load_elf_interp(&interp_elf_ex, interpreter_inode); + + old_fs = get_fs(); + set_fs(get_ds()); + + iput(interpreter_inode); + kfree(elf_interpreter); + + if(elf_entry == 0xffffffff) { + printk("Unable to load interpreter\n"); + kfree(elf_phdata); + send_sig(SIGSEGV, current, 0); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return 0; + }; + }; + + + if(elf_ppnt->p_type == PT_LOAD) { + error = do_mmap(file, + elf_ppnt->p_vaddr & 0xfffff000, + elf_ppnt->p_filesz + (elf_ppnt->p_vaddr & 0xfff), + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + elf_ppnt->p_offset & 0xfffff000); + +#ifdef LOW_ELF_STACK + if(elf_ppnt->p_vaddr & 0xfffff000 < elf_stack) + elf_stack = elf_ppnt->p_vaddr & 0xfffff000; +#endif + + if(!load_addr) + load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset; + k = elf_ppnt->p_vaddr; + if(k > start_code) start_code = k; + k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; + if(k > elf_bss) elf_bss = k; + if((elf_ppnt->p_flags | PROT_WRITE) && end_code < k) + end_code = k; + if(end_data < k) end_data = k; + k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; + if(k > elf_brk) elf_brk = k; + }; + elf_ppnt++; + }; + set_fs(old_fs); + + kfree(elf_phdata); + + if(interpreter_type != INTERPRETER_AOUT) SYS(close)(elf_exec_fileno); + current->personality = (ibcs2_interpreter ? PER_SVR4 : PER_LINUX); + + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)--; + current->exec_domain = lookup_exec_domain(current->personality); + current->binfmt = &elf_format; + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)++; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)++; + +#ifndef VM_STACK_FLAGS + current->executable = bprm->inode; + bprm->inode->i_count++; +#endif +#ifdef LOW_ELF_STACK + current->start_stack = p = elf_stack - 4; +#endif + bprm->p -= MAX_ARG_PAGES*PAGE_SIZE; + bprm->p = (unsigned long) + create_elf_tables((char *)bprm->p, + bprm->argc, + bprm->envc, + (interpreter_type == INTERPRETER_ELF ? &elf_ex : NULL), + load_addr, + (interpreter_type == INTERPRETER_AOUT ? 0 : 1)); + if(interpreter_type == INTERPRETER_AOUT) + current->mm->arg_start += strlen(passed_fileno) + 1; + current->mm->start_brk = current->mm->brk = elf_brk; + current->mm->end_code = end_code; + current->mm->start_code = start_code; + current->mm->end_data = end_data; + current->mm->start_stack = bprm->p; + current->suid = current->euid = current->fsuid = bprm->e_uid; + current->sgid = current->egid = current->fsgid = bprm->e_gid; + + /* Calling sys_brk effectively mmaps the pages that we need for the bss and break + sections */ + current->mm->brk = (elf_bss + 0xfff) & 0xfffff000; + SYS(brk)((elf_brk + 0xfff) & 0xfffff000); + + padzero(elf_bss); + +#if 0 + printk("(start_brk) %x\n" , current->mm->start_brk); + printk("(end_code) %x\n" , current->mm->end_code); + printk("(start_code) %x\n" , current->mm->start_code); + printk("(end_data) %x\n" , current->mm->end_data); + printk("(start_stack) %x\n" , current->mm->start_stack); + printk("(brk) %x\n" , current->mm->brk); +#endif + + /* Why this, you ask??? Well SVr4 maps page 0 as read-only, + and some applications "depend" upon this behavior. + Since we do not have the power to recompile these, we + emulate the SVr4 behavior. Sigh. */ + error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE, 0); + + regs->eip = elf_entry; /* eip, magic happens :-) */ + regs->esp = bprm->p; /* stack pointer */ + if (current->flags & PF_PTRACED) + send_sig(SIGTRAP, current, 0); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return 0; +} + +/* This is really simpleminded and specialized - we are loading an + a.out library that is given an ELF header. */ + +static int +load_elf_library(int fd){ + struct file * file; + struct elfhdr elf_ex; + struct elf_phdr *elf_phdata = NULL; + struct inode * inode; + unsigned int len; + int elf_bss; + int old_fs, retval; + unsigned int bss; + int error; + int i,j, k; + +#ifndef CONFIG_BINFMT_ELF + MOD_INC_USE_COUNT; +#endif + + len = 0; + file = current->files->fd[fd]; + inode = file->f_inode; + elf_bss = 0; + + set_fs(KERNEL_DS); + if (file->f_op->read(inode, file, (char *) &elf_ex, sizeof(elf_ex)) != sizeof(elf_ex)) { + SYS(close)(fd); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -EACCES; + } + set_fs(USER_DS); + + if (elf_ex.e_ident[0] != 0x7f || + strncmp(&elf_ex.e_ident[1], "ELF",3) != 0) { +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + } + + /* First of all, some simple consistency checks */ + if(elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || + (elf_ex.e_machine != EM_386 && elf_ex.e_machine != EM_486) || + (!inode->i_op || !inode->i_op->default_file_ops->mmap)){ +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + }; + + /* Now read in all of the header information */ + + if(sizeof(struct elf_phdr) * elf_ex.e_phnum > PAGE_SIZE) { +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + } + + elf_phdata = (struct elf_phdr *) + kmalloc(sizeof(struct elf_phdr) * elf_ex.e_phnum, GFP_KERNEL); + + old_fs = get_fs(); + set_fs(get_ds()); + retval = read_exec(inode, elf_ex.e_phoff, (char *) elf_phdata, + sizeof(struct elf_phdr) * elf_ex.e_phnum); + set_fs(old_fs); + + j = 0; + for(i=0; i<elf_ex.e_phnum; i++) + if((elf_phdata + i)->p_type == PT_LOAD) j++; + + if(j != 1) { + kfree(elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return -ENOEXEC; + }; + + while(elf_phdata->p_type != PT_LOAD) elf_phdata++; + + /* Now use mmap to map the library into memory. */ + error = do_mmap(file, + elf_phdata->p_vaddr & 0xfffff000, + elf_phdata->p_filesz + (elf_phdata->p_vaddr & 0xfff), + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + elf_phdata->p_offset & 0xfffff000); + + k = elf_phdata->p_vaddr + elf_phdata->p_filesz; + if(k > elf_bss) elf_bss = k; + + SYS(close)(fd); + if (error != elf_phdata->p_vaddr & 0xfffff000) { + kfree(elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return error; + } + + padzero(elf_bss); + + len = (elf_phdata->p_filesz + elf_phdata->p_vaddr+ 0xfff) & 0xfffff000; + bss = elf_phdata->p_memsz + elf_phdata->p_vaddr; + if (bss > len) + do_mmap(NULL, len, bss-len, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + kfree(elf_phdata); +#ifndef CONFIG_BINFMT_ELF + MOD_DEC_USE_COUNT; +#endif + return 0; +} + +#ifndef CONFIG_BINFMT_ELF +char kernel_version[] = UTS_RELEASE; + +int init_module(void) { + /* Install the COFF, ELF and XOUT loaders. + * N.B. We *rely* on the table being the right size with the + * right number of free slots... + */ + register_binfmt(&elf_format); + return 0; +} + +void cleanup_module( void) { + + if (MOD_IN_USE) + printk(KERN_INFO "iBCS: module is in use, remove delayed\n"); + + /* Remove the COFF and ELF loaders. */ + unregister_binfmt(&elf_format); +} +#endif diff --git a/fs/block_dev.c b/fs/block_dev.c new file mode 100644 index 000000000..d19af6fa0 --- /dev/null +++ b/fs/block_dev.c @@ -0,0 +1,313 @@ +/* + * linux/fs/block_dev.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/locks.h> +#include <linux/fcntl.h> +#include <asm/segment.h> +#include <asm/system.h> + +extern int *blk_size[]; +extern int *blksize_size[]; + +#define NBUF 64 + +int block_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + int blocksize, blocksize_bits, i, j, buffercount,write_error; + int block, blocks; + loff_t offset; + int chars; + int written = 0; + int cluster_list[8]; + struct buffer_head * bhlist[NBUF]; + int blocks_per_cluster; + unsigned int size; + unsigned int dev; + struct buffer_head * bh, *bufferlist[NBUF]; + register char * p; + int excess; + + write_error = buffercount = 0; + dev = inode->i_rdev; + if ( is_read_only( inode->i_rdev )) + return -EPERM; + blocksize = BLOCK_SIZE; + if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)]) + blocksize = blksize_size[MAJOR(dev)][MINOR(dev)]; + + i = blocksize; + blocksize_bits = 0; + while(i != 1) { + blocksize_bits++; + i >>= 1; + } + + blocks_per_cluster = PAGE_SIZE / blocksize; + + block = filp->f_pos >> blocksize_bits; + offset = filp->f_pos & (blocksize-1); + + if (blk_size[MAJOR(dev)]) + size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits; + else + size = INT_MAX; + while (count>0) { + if (block >= size) + return written; + chars = blocksize - offset; + if (chars > count) + chars=count; + +#if 0 + if (chars == blocksize) + bh = getblk(dev, block, blocksize); + else + bh = breada(dev,block,block+1,block+2,-1); + +#else + for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i; + if((block % blocks_per_cluster) == 0) + generate_cluster(dev, cluster_list, blocksize); + bh = getblk(dev, block, blocksize); + + if (chars != blocksize && !bh->b_uptodate) { + if(!filp->f_reada || + !read_ahead[MAJOR(dev)]) { + /* We do this to force the read of a single buffer */ + brelse(bh); + bh = bread(dev,block,blocksize); + } else { + /* Read-ahead before write */ + blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2; + if (block + blocks > size) blocks = size - block; + if (blocks > NBUF) blocks=NBUF; + excess = (block + blocks) % blocks_per_cluster; + if ( blocks > excess ) + blocks -= excess; + bhlist[0] = bh; + for(i=1; i<blocks; i++){ + if(((i+block) % blocks_per_cluster) == 0) { + for(j=0; j<blocks_per_cluster; j++) cluster_list[j] = block+i+j; + generate_cluster(dev, cluster_list, blocksize); + }; + bhlist[i] = getblk (dev, block+i, blocksize); + if(!bhlist[i]){ + while(i >= 0) brelse(bhlist[i--]); + return written? written: -EIO; + }; + }; + ll_rw_block(READ, blocks, bhlist); + for(i=1; i<blocks; i++) brelse(bhlist[i]); + wait_on_buffer(bh); + + }; + }; +#endif + block++; + if (!bh) + return written?written:-EIO; + p = offset + bh->b_data; + offset = 0; + filp->f_pos += chars; + written += chars; + count -= chars; + memcpy_fromfs(p,buf,chars); + p += chars; + buf += chars; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + if (filp->f_flags & O_SYNC) + bufferlist[buffercount++] = bh; + else + brelse(bh); + if (buffercount == NBUF){ + ll_rw_block(WRITE, buffercount, bufferlist); + for(i=0; i<buffercount; i++){ + wait_on_buffer(bufferlist[i]); + if (!bufferlist[i]->b_uptodate) + write_error=1; + brelse(bufferlist[i]); + } + buffercount=0; + } + if(write_error) + break; + } + if ( buffercount ){ + ll_rw_block(WRITE, buffercount, bufferlist); + for(i=0; i<buffercount; i++){ + wait_on_buffer(bufferlist[i]); + if (!bufferlist[i]->b_uptodate) + write_error=1; + brelse(bufferlist[i]); + } + } + filp->f_reada = 1; + if(write_error) + return -EIO; + return written; +} + +int block_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + unsigned int block; + loff_t offset; + int blocksize; + int blocksize_bits, i; + unsigned int blocks, rblocks, left; + int bhrequest, uptodate; + int cluster_list[8]; + int blocks_per_cluster; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * buflist[NBUF]; + struct buffer_head * bhreq[NBUF]; + unsigned int chars; + loff_t size; + unsigned int dev; + int read; + int excess; + + dev = inode->i_rdev; + blocksize = BLOCK_SIZE; + if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)]) + blocksize = blksize_size[MAJOR(dev)][MINOR(dev)]; + i = blocksize; + blocksize_bits = 0; + while (i != 1) { + blocksize_bits++; + i >>= 1; + } + + offset = filp->f_pos; + if (blk_size[MAJOR(dev)]) + size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS; + else + size = INT_MAX; + + blocks_per_cluster = PAGE_SIZE / blocksize; + + if (offset > size) + left = 0; + else + left = size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = offset >> blocksize_bits; + offset &= blocksize-1; + size >>= blocksize_bits; + rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits; + bhb = bhe = buflist; + if (filp->f_reada) { + if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9)) + blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9); + excess = (block + blocks) % blocks_per_cluster; + if ( blocks > excess ) + blocks -= excess; + if (rblocks > blocks) + blocks = rblocks; + + } + if (block + blocks > size) + blocks = size - block; + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. */ + + do { + bhrequest = 0; + uptodate = 1; + while (blocks) { + --blocks; +#if 1 + if((block % blocks_per_cluster) == 0) { + for(i=0; i<blocks_per_cluster; i++) cluster_list[i] = block+i; + generate_cluster(dev, cluster_list, blocksize); + } +#endif + *bhb = getblk(dev, block++, blocksize); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if (uptodate) + break; + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) { + ll_rw_block(READ, bhrequest, bhreq); + refill_freelist(blocksize); + } + + do { /* Finish off all I/O that has actually completed */ + if (*bhe) { + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < blocksize - offset) + chars = left; + else + chars = blocksize - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-->0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + if (!read) + return -EIO; + filp->f_reada = 1; + return read; +} + +int block_fsync(struct inode *inode, struct file *filp) +{ + return fsync_dev (inode->i_rdev); +} diff --git a/fs/buffer.c b/fs/buffer.c new file mode 100644 index 000000000..6416a1f71 --- /dev/null +++ b/fs/buffer.c @@ -0,0 +1,1849 @@ +/* + * linux/fs/buffer.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'buffer.c' implements the buffer-cache functions. Race-conditions have + * been avoided by NEVER letting an interrupt change a buffer (except for the + * data, of course), but instead letting the caller do it. + */ + +/* + * NOTE! There is one discordant note here: checking floppies for + * disk change. This is where it fits best, I think, as it should + * invalidate changed floppy-disk-caches. + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/errno.h> +#include <linux/malloc.h> + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/io.h> + +#define NR_SIZES 4 +static char buffersize_index[9] = {-1, 0, 1, -1, 2, -1, -1, -1, 3}; +static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096}; + +#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9]) + +static int grow_buffers(int pri, int size); +static int shrink_specific_buffers(unsigned int priority, int size); +static int maybe_shrink_lav_buffers(int); + +static int nr_hash = 0; /* Size of hash table */ +static struct buffer_head ** hash_table; +struct buffer_head ** buffer_pages; +static struct buffer_head * lru_list[NR_LIST] = {NULL, }; +static struct buffer_head * free_list[NR_SIZES] = {NULL, }; +static struct buffer_head * unused_list = NULL; +static struct wait_queue * buffer_wait = NULL; + +int nr_buffers = 0; +int nr_buffers_type[NR_LIST] = {0,}; +int nr_buffers_size[NR_SIZES] = {0,}; +int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},}; +int buffer_usage[NR_SIZES] = {0,}; /* Usage counts used to determine load average */ +int buffers_lav[NR_SIZES] = {0,}; /* Load average of buffer usage */ +int nr_free[NR_SIZES] = {0,}; +int buffermem = 0; +int nr_buffer_heads = 0; +extern int *blksize_size[]; + +/* Here is the parameter block for the bdflush process. */ +static void wakeup_bdflush(int); + +#define N_PARAM 9 +#define LAV + +static union bdflush_param{ + struct { + int nfract; /* Percentage of buffer cache dirty to + activate bdflush */ + int ndirty; /* Maximum number of dirty blocks to write out per + wake-cycle */ + int nrefill; /* Number of clean buffers to try and obtain + each time we call refill */ + int nref_dirt; /* Dirty buffer threshold for activating bdflush + when trying to refill buffers. */ + int clu_nfract; /* Percentage of buffer cache to scan to + search for free clusters */ + int age_buffer; /* Time for normal buffer to age before + we flush it */ + int age_super; /* Time for superblock to age before we + flush it */ + int lav_const; /* Constant used for load average (time + constant */ + int lav_ratio; /* Used to determine how low a lav for a + particular size can go before we start to + trim back the buffers */ + } b_un; + unsigned int data[N_PARAM]; +} bdf_prm = {{25, 500, 64, 256, 15, 3000, 500, 1884, 2}}; + +/* The lav constant is set for 1 minute, as long as the update process runs + every 5 seconds. If you change the frequency of update, the time + constant will also change. */ + + +/* These are the min and max parameter values that we will allow to be assigned */ +static int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1}; +static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5}; + +/* + * Rewrote the wait-routines to use the "new" wait-queue functionality, + * and getting rid of the cli-sti pairs. The wait-queue routines still + * need cli-sti, but now it's just a couple of 386 instructions or so. + * + * Note that the real wait_on_buffer() is an inline function that checks + * if 'b_wait' is set before calling this, so that the queues aren't set + * up unnecessarily. + */ +void __wait_on_buffer(struct buffer_head * bh) +{ + struct wait_queue wait = { current, NULL }; + + bh->b_count++; + add_wait_queue(&bh->b_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (bh->b_lock) { + schedule(); + goto repeat; + } + remove_wait_queue(&bh->b_wait, &wait); + bh->b_count--; + current->state = TASK_RUNNING; +} + +/* Call sync_buffers with wait!=0 to ensure that the call does not + return until all buffer writes have completed. Sync() may return + before the writes have finished; fsync() may not. */ + + +/* Godamity-damn. Some buffers (bitmaps for filesystems) + spontaneously dirty themselves without ever brelse being called. + We will ultimately want to put these in a separate list, but for + now we search all of the lists for dirty buffers */ + +static int sync_buffers(dev_t dev, int wait) +{ + int i, retry, pass = 0, err = 0; + int nlist, ncount; + struct buffer_head * bh, *next; + + /* One pass for no-wait, three for wait: + 0) write out all dirty, unlocked buffers; + 1) write out all dirty buffers, waiting if locked; + 2) wait for completion by waiting for all buffers to unlock. */ + repeat: + retry = 0; + repeat2: + ncount = 0; + /* We search all lists as a failsafe mechanism, not because we expect + there to be dirty buffers on any of the other lists. */ + for(nlist = 0; nlist < NR_LIST; nlist++) + { + repeat1: + bh = lru_list[nlist]; + if(!bh) continue; + for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) { + if(bh->b_list != nlist) goto repeat1; + next = bh->b_next_free; + if(!lru_list[nlist]) break; + if (dev && bh->b_dev != dev) + continue; + if (bh->b_lock) + { + /* Buffer is locked; skip it unless wait is + requested AND pass > 0. */ + if (!wait || !pass) { + retry = 1; + continue; + } + wait_on_buffer (bh); + goto repeat2; + } + /* If an unlocked buffer is not uptodate, there has + been an IO error. Skip it. */ + if (wait && bh->b_req && !bh->b_lock && + !bh->b_dirt && !bh->b_uptodate) { + err = 1; + printk("Weird - unlocked, clean and not uptodate buffer on list %d %x %lu\n", nlist, bh->b_dev, bh->b_blocknr); + continue; + } + /* Don't write clean buffers. Don't write ANY buffers + on the third pass. */ + if (!bh->b_dirt || pass>=2) + continue; + /* don't bother about locked buffers */ + if (bh->b_lock) + continue; + bh->b_count++; + bh->b_flushtime = 0; + ll_rw_block(WRITE, 1, &bh); + + if(nlist != BUF_DIRTY) { + printk("[%d %x %ld] ", nlist, bh->b_dev, bh->b_blocknr); + ncount++; + }; + bh->b_count--; + retry = 1; + } + } + if (ncount) printk("sys_sync: %d dirty buffers not on dirty list\n", ncount); + + /* If we are waiting for the sync to succeed, and if any dirty + blocks were written, then repeat; on the second pass, only + wait for buffers being written (do not pass to write any + more buffers on the second pass). */ + if (wait && retry && ++pass<=2) + goto repeat; + return err; +} + +void sync_dev(dev_t dev) +{ + sync_buffers(dev, 0); + sync_supers(dev); + sync_inodes(dev); + sync_buffers(dev, 0); +} + +int fsync_dev(dev_t dev) +{ + sync_buffers(dev, 0); + sync_supers(dev); + sync_inodes(dev); + return sync_buffers(dev, 1); +} + +asmlinkage int sys_sync(void) +{ + sync_dev(0); + return 0; +} + +int file_fsync (struct inode *inode, struct file *filp) +{ + return fsync_dev(inode->i_dev); +} + +asmlinkage int sys_fsync(unsigned int fd) +{ + struct file * file; + struct inode * inode; + + if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) + return -EBADF; + if (!file->f_op || !file->f_op->fsync) + return -EINVAL; + if (file->f_op->fsync(inode,file)) + return -EIO; + return 0; +} + +void invalidate_buffers(dev_t dev) +{ + int i; + int nlist; + struct buffer_head * bh; + + for(nlist = 0; nlist < NR_LIST; nlist++) { + bh = lru_list[nlist]; + for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; + bh = bh->b_next_free) { + if (bh->b_dev != dev) + continue; + wait_on_buffer(bh); + if (bh->b_dev == dev) + bh->b_flushtime = bh->b_uptodate = + bh->b_dirt = bh->b_req = 0; + } + } +} + +#define _hashfn(dev,block) (((unsigned)(dev^block))%nr_hash) +#define hash(dev,block) hash_table[_hashfn(dev,block)] + +static inline void remove_from_hash_queue(struct buffer_head * bh) +{ + if (bh->b_next) + bh->b_next->b_prev = bh->b_prev; + if (bh->b_prev) + bh->b_prev->b_next = bh->b_next; + if (hash(bh->b_dev,bh->b_blocknr) == bh) + hash(bh->b_dev,bh->b_blocknr) = bh->b_next; + bh->b_next = bh->b_prev = NULL; +} + +static inline void remove_from_lru_list(struct buffer_head * bh) +{ + if (!(bh->b_prev_free) || !(bh->b_next_free)) + panic("VFS: LRU block list corrupted"); + if (bh->b_dev == 0xffff) panic("LRU list corrupted"); + bh->b_prev_free->b_next_free = bh->b_next_free; + bh->b_next_free->b_prev_free = bh->b_prev_free; + + if (lru_list[bh->b_list] == bh) + lru_list[bh->b_list] = bh->b_next_free; + if(lru_list[bh->b_list] == bh) + lru_list[bh->b_list] = NULL; + bh->b_next_free = bh->b_prev_free = NULL; +} + +static inline void remove_from_free_list(struct buffer_head * bh) +{ + int isize = BUFSIZE_INDEX(bh->b_size); + if (!(bh->b_prev_free) || !(bh->b_next_free)) + panic("VFS: Free block list corrupted"); + if(bh->b_dev != 0xffff) panic("Free list corrupted"); + if(!free_list[isize]) + panic("Free list empty"); + nr_free[isize]--; + if(bh->b_next_free == bh) + free_list[isize] = NULL; + else { + bh->b_prev_free->b_next_free = bh->b_next_free; + bh->b_next_free->b_prev_free = bh->b_prev_free; + if (free_list[isize] == bh) + free_list[isize] = bh->b_next_free; + }; + bh->b_next_free = bh->b_prev_free = NULL; +} + +static inline void remove_from_queues(struct buffer_head * bh) +{ + if(bh->b_dev == 0xffff) { + remove_from_free_list(bh); /* Free list entries should not be + in the hash queue */ + return; + }; + nr_buffers_type[bh->b_list]--; + nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--; + remove_from_hash_queue(bh); + remove_from_lru_list(bh); +} + +static inline void put_last_lru(struct buffer_head * bh) +{ + if (!bh) + return; + if (bh == lru_list[bh->b_list]) { + lru_list[bh->b_list] = bh->b_next_free; + return; + } + if(bh->b_dev == 0xffff) panic("Wrong block for lru list"); + remove_from_lru_list(bh); +/* add to back of free list */ + + if(!lru_list[bh->b_list]) { + lru_list[bh->b_list] = bh; + lru_list[bh->b_list]->b_prev_free = bh; + }; + + bh->b_next_free = lru_list[bh->b_list]; + bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; + lru_list[bh->b_list]->b_prev_free->b_next_free = bh; + lru_list[bh->b_list]->b_prev_free = bh; +} + +static inline void put_last_free(struct buffer_head * bh) +{ + int isize; + if (!bh) + return; + + isize = BUFSIZE_INDEX(bh->b_size); + bh->b_dev = 0xffff; /* So it is obvious we are on the free list */ +/* add to back of free list */ + + if(!free_list[isize]) { + free_list[isize] = bh; + bh->b_prev_free = bh; + }; + + nr_free[isize]++; + bh->b_next_free = free_list[isize]; + bh->b_prev_free = free_list[isize]->b_prev_free; + free_list[isize]->b_prev_free->b_next_free = bh; + free_list[isize]->b_prev_free = bh; +} + +static inline void insert_into_queues(struct buffer_head * bh) +{ +/* put at end of free list */ + + if(bh->b_dev == 0xffff) { + put_last_free(bh); + return; + }; + if(!lru_list[bh->b_list]) { + lru_list[bh->b_list] = bh; + bh->b_prev_free = bh; + }; + if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted"); + bh->b_next_free = lru_list[bh->b_list]; + bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; + lru_list[bh->b_list]->b_prev_free->b_next_free = bh; + lru_list[bh->b_list]->b_prev_free = bh; + nr_buffers_type[bh->b_list]++; + nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++; +/* put the buffer in new hash-queue if it has a device */ + bh->b_prev = NULL; + bh->b_next = NULL; + if (!bh->b_dev) + return; + bh->b_next = hash(bh->b_dev,bh->b_blocknr); + hash(bh->b_dev,bh->b_blocknr) = bh; + if (bh->b_next) + bh->b_next->b_prev = bh; +} + +static struct buffer_head * find_buffer(dev_t dev, int block, int size) +{ + struct buffer_head * tmp; + + for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next) + if (tmp->b_dev==dev && tmp->b_blocknr==block) + if (tmp->b_size == size) + return tmp; + else { + printk("VFS: Wrong blocksize on device %d/%d\n", + MAJOR(dev), MINOR(dev)); + return NULL; + } + return NULL; +} + +/* + * Why like this, I hear you say... The reason is race-conditions. + * As we don't lock buffers (unless we are reading them, that is), + * something might happen to it while we sleep (ie a read-error + * will force it bad). This shouldn't really happen currently, but + * the code is ready. + */ +struct buffer_head * get_hash_table(dev_t dev, int block, int size) +{ + struct buffer_head * bh; + + for (;;) { + if (!(bh=find_buffer(dev,block,size))) + return NULL; + bh->b_count++; + wait_on_buffer(bh); + if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size) + return bh; + bh->b_count--; + } +} + +void set_blocksize(dev_t dev, int size) +{ + int i, nlist; + struct buffer_head * bh, *bhnext; + + if (!blksize_size[MAJOR(dev)]) + return; + + switch(size) { + default: panic("Invalid blocksize passed to set_blocksize"); + case 512: case 1024: case 2048: case 4096:; + } + + if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) { + blksize_size[MAJOR(dev)][MINOR(dev)] = size; + return; + } + if (blksize_size[MAJOR(dev)][MINOR(dev)] == size) + return; + sync_buffers(dev, 2); + blksize_size[MAJOR(dev)][MINOR(dev)] = size; + + /* We need to be quite careful how we do this - we are moving entries + around on the free list, and we can get in a loop if we are not careful.*/ + + for(nlist = 0; nlist < NR_LIST; nlist++) { + bh = lru_list[nlist]; + for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) { + if(!bh) break; + bhnext = bh->b_next_free; + if (bh->b_dev != dev) + continue; + if (bh->b_size == size) + continue; + + wait_on_buffer(bh); + if (bh->b_dev == dev && bh->b_size != size) { + bh->b_uptodate = bh->b_dirt = bh->b_req = + bh->b_flushtime = 0; + }; + remove_from_hash_queue(bh); + } + } +} + +#define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock) + +void refill_freelist(int size) +{ + struct buffer_head * bh, * tmp; + struct buffer_head * candidate[NR_LIST]; + unsigned int best_time, winner; + int isize = BUFSIZE_INDEX(size); + int buffers[NR_LIST]; + int i; + int needed; + + /* First see if we even need this. Sometimes it is advantageous + to request some blocks in a filesystem that we know that we will + be needing ahead of time. */ + + if (nr_free[isize] > 100) + return; + + /* If there are too many dirty buffers, we wake up the update process + now so as to ensure that there are still clean buffers available + for user processes to use (and dirty) */ + + /* We are going to try and locate this much memory */ + needed =bdf_prm.b_un.nrefill * size; + + while (nr_free_pages > min_free_pages*2 && needed > 0 && + grow_buffers(GFP_BUFFER, size)) { + needed -= PAGE_SIZE; + } + + if(needed <= 0) return; + + /* See if there are too many buffers of a different size. + If so, victimize them */ + + while(maybe_shrink_lav_buffers(size)) + { + if(!grow_buffers(GFP_BUFFER, size)) break; + needed -= PAGE_SIZE; + if(needed <= 0) return; + }; + + /* OK, we cannot grow the buffer cache, now try and get some + from the lru list */ + + /* First set the candidate pointers to usable buffers. This + should be quick nearly all of the time. */ + +repeat0: + for(i=0; i<NR_LIST; i++){ + if(i == BUF_DIRTY || i == BUF_SHARED || + nr_buffers_type[i] == 0) { + candidate[i] = NULL; + buffers[i] = 0; + continue; + } + buffers[i] = nr_buffers_type[i]; + for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--) + { + if(buffers[i] < 0) panic("Here is the problem"); + tmp = bh->b_next_free; + if (!bh) break; + + if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 || + bh->b_dirt) { + refile_buffer(bh); + continue; + }; + + if (bh->b_count || bh->b_size != size) + continue; + + /* Buffers are written in the order they are placed + on the locked list. If we encounter a locked + buffer here, this means that the rest of them + are also locked */ + if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) { + buffers[i] = 0; + break; + } + + if (BADNESS(bh)) continue; + break; + }; + if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */ + else candidate[i] = bh; + if(candidate[i] && candidate[i]->b_count) panic("Here is the problem"); + } + + repeat: + if(needed <= 0) return; + + /* Now see which candidate wins the election */ + + winner = best_time = UINT_MAX; + for(i=0; i<NR_LIST; i++){ + if(!candidate[i]) continue; + if(candidate[i]->b_lru_time < best_time){ + best_time = candidate[i]->b_lru_time; + winner = i; + } + } + + /* If we have a winner, use it, and then get a new candidate from that list */ + if(winner != UINT_MAX) { + i = winner; + bh = candidate[i]; + candidate[i] = bh->b_next_free; + if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */ + if (bh->b_count || bh->b_size != size) + panic("Busy buffer in candidate list\n"); + if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1) + panic("Shared buffer in candidate list\n"); + if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n"); + + if(bh->b_dev == 0xffff) panic("Wrong list"); + remove_from_queues(bh); + bh->b_dev = 0xffff; + put_last_free(bh); + needed -= bh->b_size; + buffers[i]--; + if(buffers[i] < 0) panic("Here is the problem"); + + if(buffers[i] == 0) candidate[i] = NULL; + + /* Now all we need to do is advance the candidate pointer + from the winner list to the next usable buffer */ + if(candidate[i] && buffers[i] > 0){ + if(buffers[i] <= 0) panic("Here is another problem"); + for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) { + if(buffers[i] < 0) panic("Here is the problem"); + tmp = bh->b_next_free; + if (!bh) break; + + if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 || + bh->b_dirt) { + refile_buffer(bh); + continue; + }; + + if (bh->b_count || bh->b_size != size) + continue; + + /* Buffers are written in the order they are + placed on the locked list. If we encounter + a locked buffer here, this means that the + rest of them are also locked */ + if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) { + buffers[i] = 0; + break; + } + + if (BADNESS(bh)) continue; + break; + }; + if(!buffers[i]) candidate[i] = NULL; /* Nothing here */ + else candidate[i] = bh; + if(candidate[i] && candidate[i]->b_count) + panic("Here is the problem"); + } + + goto repeat; + } + + if(needed <= 0) return; + + /* Too bad, that was not enough. Try a little harder to grow some. */ + + if (nr_free_pages > 5) { + if (grow_buffers(GFP_BUFFER, size)) { + needed -= PAGE_SIZE; + goto repeat0; + }; + } + + /* and repeat until we find something good */ + if (!grow_buffers(GFP_ATOMIC, size)) + wakeup_bdflush(1); + needed -= PAGE_SIZE; + goto repeat0; +} + +/* + * Ok, this is getblk, and it isn't very clear, again to hinder + * race-conditions. Most of the code is seldom used, (ie repeating), + * so it should be much more efficient than it looks. + * + * The algorithm is changed: hopefully better, and an elusive bug removed. + * + * 14.02.92: changed it to sync dirty buffers a bit: better performance + * when the filesystem starts to get full of dirty blocks (I hope). + */ +struct buffer_head * getblk(dev_t dev, int block, int size) +{ + struct buffer_head * bh; + int isize = BUFSIZE_INDEX(size); + + /* Update this for the buffer size lav. */ + buffer_usage[isize]++; + + /* If there are too many dirty buffers, we wake up the update process + now so as to ensure that there are still clean buffers available + for user processes to use (and dirty) */ +repeat: + bh = get_hash_table(dev, block, size); + if (bh) { + if (bh->b_uptodate && !bh->b_dirt) + put_last_lru(bh); + if(!bh->b_dirt) bh->b_flushtime = 0; + return bh; + } + + while(!free_list[isize]) refill_freelist(size); + + if (find_buffer(dev,block,size)) + goto repeat; + + bh = free_list[isize]; + remove_from_free_list(bh); + +/* OK, FINALLY we know that this buffer is the only one of it's kind, */ +/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */ + bh->b_count=1; + bh->b_dirt=0; + bh->b_lock=0; + bh->b_uptodate=0; + bh->b_flushtime = 0; + bh->b_req=0; + bh->b_dev=dev; + bh->b_blocknr=block; + insert_into_queues(bh); + return bh; +} + +void set_writetime(struct buffer_head * buf, int flag) +{ + int newtime; + + if (buf->b_dirt){ + /* Move buffer to dirty list if jiffies is clear */ + newtime = jiffies + (flag ? bdf_prm.b_un.age_super : + bdf_prm.b_un.age_buffer); + if(!buf->b_flushtime || buf->b_flushtime > newtime) + buf->b_flushtime = newtime; + } else { + buf->b_flushtime = 0; + } +} + + +static char buffer_disposition[] = {BUF_CLEAN, BUF_SHARED, BUF_LOCKED, BUF_SHARED, + BUF_DIRTY, BUF_DIRTY, BUF_DIRTY, BUF_DIRTY}; + +void refile_buffer(struct buffer_head * buf){ + int i, dispose; + i = 0; + if(buf->b_dev == 0xffff) panic("Attempt to refile free buffer\n"); + if(mem_map[MAP_NR((unsigned long) buf->b_data)] != 1) i = 1; + if(buf->b_lock) i |= 2; + if(buf->b_dirt) i |= 4; + dispose = buffer_disposition[i]; + if(buf->b_list == BUF_SHARED && dispose == BUF_CLEAN) + dispose = BUF_UNSHARED; + if(dispose == -1) panic("Bad buffer settings (%d)\n", i); + if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies; + if(dispose != buf->b_list) { + if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED) + buf->b_lru_time = jiffies; + if(dispose == BUF_LOCKED && + (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super) + dispose = BUF_LOCKED1; + remove_from_queues(buf); + buf->b_list = dispose; + insert_into_queues(buf); + if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > + (nr_buffers - nr_buffers_type[BUF_SHARED]) * + bdf_prm.b_un.nfract/100) + wakeup_bdflush(0); + } +} + +void brelse(struct buffer_head * buf) +{ + if (!buf) + return; + wait_on_buffer(buf); + + /* If dirty, mark the time this buffer should be written back */ + set_writetime(buf, 0); + refile_buffer(buf); + + if (buf->b_count) { + if (--buf->b_count) + return; + wake_up(&buffer_wait); + return; + } + printk("VFS: brelse: Trying to free free buffer\n"); +} + +/* + * bread() reads a specified block and returns the buffer that contains + * it. It returns NULL if the block was unreadable. + */ +struct buffer_head * bread(dev_t dev, int block, int size) +{ + struct buffer_head * bh; + + if (!(bh = getblk(dev, block, size))) { + printk("VFS: bread: READ error on device %d/%d\n", + MAJOR(dev), MINOR(dev)); + return NULL; + } + if (bh->b_uptodate) + return bh; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + +/* + * Ok, breada can be used as bread, but additionally to mark other + * blocks for reading as well. End the argument list with a negative + * number. + */ + +#define NBUF 16 + +struct buffer_head * breada(dev_t dev, int block, int bufsize, + unsigned int pos, unsigned int filesize) +{ + struct buffer_head * bhlist[NBUF]; + unsigned int blocks; + struct buffer_head * bh; + int index; + int i, j; + + if (pos >= filesize) + return NULL; + + if (block < 0 || !(bh = getblk(dev,block,bufsize))) + return NULL; + + index = BUFSIZE_INDEX(bh->b_size); + + if (bh->b_uptodate) + return bh; + + blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index); + + if (blocks > (read_ahead[MAJOR(dev)] >> index)) + blocks = read_ahead[MAJOR(dev)] >> index; + if (blocks > NBUF) + blocks = NBUF; + + bhlist[0] = bh; + j = 1; + for(i=1; i<blocks; i++) { + bh = getblk(dev,block+i,bufsize); + if (bh->b_uptodate) { + brelse(bh); + break; + } + bhlist[j++] = bh; + } + + /* Request the read for these buffers, and then release them */ + ll_rw_block(READ, j, bhlist); + + for(i=1; i<j; i++) + brelse(bhlist[i]); + + /* Wait for this buffer, and then continue on */ + bh = bhlist[0]; + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + +/* + * See fs/inode.c for the weird use of volatile.. + */ +static void put_unused_buffer_head(struct buffer_head * bh) +{ + struct wait_queue * wait; + + wait = ((volatile struct buffer_head *) bh)->b_wait; + memset(bh,0,sizeof(*bh)); + ((volatile struct buffer_head *) bh)->b_wait = wait; + bh->b_next_free = unused_list; + unused_list = bh; +} + +static void get_more_buffer_heads(void) +{ + int i; + struct buffer_head * bh; + + if (unused_list) + return; + + if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER))) + return; + + for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) { + bh->b_next_free = unused_list; /* only make link */ + unused_list = bh++; + } +} + +static struct buffer_head * get_unused_buffer_head(void) +{ + struct buffer_head * bh; + + get_more_buffer_heads(); + if (!unused_list) + return NULL; + bh = unused_list; + unused_list = bh->b_next_free; + bh->b_next_free = NULL; + bh->b_data = NULL; + bh->b_size = 0; + bh->b_req = 0; + return bh; +} + +/* + * Create the appropriate buffers when given a page for data area and + * the size of each buffer.. Use the bh->b_this_page linked list to + * follow the buffers created. Return NULL if unable to create more + * buffers. + */ +static struct buffer_head * create_buffers(unsigned long page, unsigned long size) +{ + struct buffer_head *bh, *head; + unsigned long offset; + + head = NULL; + offset = PAGE_SIZE; + while ((offset -= size) < PAGE_SIZE) { + bh = get_unused_buffer_head(); + if (!bh) + goto no_grow; + bh->b_this_page = head; + head = bh; + bh->b_data = (char *) (page+offset); + bh->b_size = size; + bh->b_dev = 0xffff; /* Flag as unused */ + } + return head; +/* + * In case anything failed, we just free everything we got. + */ +no_grow: + bh = head; + while (bh) { + head = bh; + bh = bh->b_this_page; + put_unused_buffer_head(head); + } + return NULL; +} + +static void read_buffers(struct buffer_head * bh[], int nrbuf) +{ + int i; + int bhnum = 0; + struct buffer_head * bhr[8]; + + for (i = 0 ; i < nrbuf ; i++) { + if (bh[i] && !bh[i]->b_uptodate) + bhr[bhnum++] = bh[i]; + } + if (bhnum) + ll_rw_block(READ, bhnum, bhr); + for (i = 0 ; i < nrbuf ; i++) { + if (bh[i]) { + wait_on_buffer(bh[i]); + } + } +} + +static unsigned long check_aligned(struct buffer_head * first, unsigned long address, + dev_t dev, int *b, int size) +{ + struct buffer_head * bh[8]; + unsigned long page; + unsigned long offset; + int block; + int nrbuf; + + page = (unsigned long) first->b_data; + if (page & ~PAGE_MASK) { + brelse(first); + return 0; + } + mem_map[MAP_NR(page)]++; + bh[0] = first; + nrbuf = 1; + for (offset = size ; offset < PAGE_SIZE ; offset += size) { + block = *++b; + if (!block) + goto no_go; + first = get_hash_table(dev, block, size); + if (!first) + goto no_go; + bh[nrbuf++] = first; + if (page+offset != (unsigned long) first->b_data) + goto no_go; + } + read_buffers(bh,nrbuf); /* make sure they are actually read correctly */ + while (nrbuf-- > 0) + brelse(bh[nrbuf]); + free_page(address); + ++current->mm->min_flt; + return page; +no_go: + while (nrbuf-- > 0) + brelse(bh[nrbuf]); + free_page(page); + return 0; +} + +static unsigned long try_to_load_aligned(unsigned long address, + dev_t dev, int b[], int size) +{ + struct buffer_head * bh, * tmp, * arr[8]; + unsigned long offset; + int isize = BUFSIZE_INDEX(size); + int * p; + int block; + + bh = create_buffers(address, size); + if (!bh) + return 0; + /* do any of the buffers already exist? punt if so.. */ + p = b; + for (offset = 0 ; offset < PAGE_SIZE ; offset += size) { + block = *(p++); + if (!block) + goto not_aligned; + if (find_buffer(dev, block, size)) + goto not_aligned; + } + tmp = bh; + p = b; + block = 0; + while (1) { + arr[block++] = bh; + bh->b_count = 1; + bh->b_dirt = 0; + bh->b_flushtime = 0; + bh->b_uptodate = 0; + bh->b_req = 0; + bh->b_dev = dev; + bh->b_blocknr = *(p++); + bh->b_list = BUF_CLEAN; + nr_buffers++; + nr_buffers_size[isize]++; + insert_into_queues(bh); + if (bh->b_this_page) + bh = bh->b_this_page; + else + break; + } + buffermem += PAGE_SIZE; + bh->b_this_page = tmp; + mem_map[MAP_NR(address)]++; + buffer_pages[MAP_NR(address)] = bh; + read_buffers(arr,block); + while (block-- > 0) + brelse(arr[block]); + ++current->mm->maj_flt; + return address; +not_aligned: + while ((tmp = bh) != NULL) { + bh = bh->b_this_page; + put_unused_buffer_head(tmp); + } + return 0; +} + +/* + * Try-to-share-buffers tries to minimize memory use by trying to keep + * both code pages and the buffer area in the same page. This is done by + * (a) checking if the buffers are already aligned correctly in memory and + * (b) if none of the buffer heads are in memory at all, trying to load + * them into memory the way we want them. + * + * This doesn't guarantee that the memory is shared, but should under most + * circumstances work very well indeed (ie >90% sharing of code pages on + * demand-loadable executables). + */ +static inline unsigned long try_to_share_buffers(unsigned long address, + dev_t dev, int *b, int size) +{ + struct buffer_head * bh; + int block; + + block = b[0]; + if (!block) + return 0; + bh = get_hash_table(dev, block, size); + if (bh) + return check_aligned(bh, address, dev, b, size); + return try_to_load_aligned(address, dev, b, size); +} + +/* + * bread_page reads four buffers into memory at the desired address. It's + * a function of its own, as there is some speed to be got by reading them + * all at the same time, not waiting for one to be read, and then another + * etc. This also allows us to optimize memory usage by sharing code pages + * and filesystem buffers.. + */ +unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int no_share) +{ + struct buffer_head * bh[8]; + unsigned long where; + int i, j; + + if (!no_share) { + where = try_to_share_buffers(address, dev, b, size); + if (where) + return where; + } + ++current->mm->maj_flt; + for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) { + bh[i] = NULL; + if (b[i]) + bh[i] = getblk(dev, b[i], size); + } + read_buffers(bh,i); + where = address; + for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) { + if (bh[i]) { + if (bh[i]->b_uptodate) + memcpy((void *) where, bh[i]->b_data, size); + brelse(bh[i]); + } + } + return address; +} + +/* + * Try to increase the number of buffers available: the size argument + * is used to determine what kind of buffers we want. + */ +static int grow_buffers(int pri, int size) +{ + unsigned long page; + struct buffer_head *bh, *tmp; + struct buffer_head * insert_point; + int isize; + + if ((size & 511) || (size > PAGE_SIZE)) { + printk("VFS: grow_buffers: size = %d\n",size); + return 0; + } + + isize = BUFSIZE_INDEX(size); + + if (!(page = __get_free_page(pri))) + return 0; + bh = create_buffers(page, size); + if (!bh) { + free_page(page); + return 0; + } + + insert_point = free_list[isize]; + + tmp = bh; + while (1) { + nr_free[isize]++; + if (insert_point) { + tmp->b_next_free = insert_point->b_next_free; + tmp->b_prev_free = insert_point; + insert_point->b_next_free->b_prev_free = tmp; + insert_point->b_next_free = tmp; + } else { + tmp->b_prev_free = tmp; + tmp->b_next_free = tmp; + } + insert_point = tmp; + ++nr_buffers; + if (tmp->b_this_page) + tmp = tmp->b_this_page; + else + break; + } + free_list[isize] = bh; + buffer_pages[MAP_NR(page)] = bh; + tmp->b_this_page = bh; + wake_up(&buffer_wait); + buffermem += PAGE_SIZE; + return 1; +} + +/* + * try_to_free() checks if all the buffers on this particular page + * are unused, and free's the page if so. + */ +static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp) +{ + unsigned long page; + struct buffer_head * tmp, * p; + int isize = BUFSIZE_INDEX(bh->b_size); + + *bhp = bh; + page = (unsigned long) bh->b_data; + page &= PAGE_MASK; + tmp = bh; + do { + if (!tmp) + return 0; + if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait) + return 0; + tmp = tmp->b_this_page; + } while (tmp != bh); + tmp = bh; + do { + p = tmp; + tmp = tmp->b_this_page; + nr_buffers--; + nr_buffers_size[isize]--; + if (p == *bhp) + { + *bhp = p->b_prev_free; + if (p == *bhp) /* Was this the last in the list? */ + *bhp = NULL; + } + remove_from_queues(p); + put_unused_buffer_head(p); + } while (tmp != bh); + buffermem -= PAGE_SIZE; + buffer_pages[MAP_NR(page)] = NULL; + free_page(page); + return !mem_map[MAP_NR(page)]; +} + + +/* + * Consult the load average for buffers and decide whether or not + * we should shrink the buffers of one size or not. If we decide yes, + * do it and return 1. Else return 0. Do not attempt to shrink size + * that is specified. + * + * I would prefer not to use a load average, but the way things are now it + * seems unavoidable. The way to get rid of it would be to force clustering + * universally, so that when we reclaim buffers we always reclaim an entire + * page. Doing this would mean that we all need to move towards QMAGIC. + */ + +static int maybe_shrink_lav_buffers(int size) +{ + int nlist; + int isize; + int total_lav, total_n_buffers, n_sizes; + + /* Do not consider the shared buffers since they would not tend + to have getblk called very often, and this would throw off + the lav. They are not easily reclaimable anyway (let the swapper + make the first move). */ + + total_lav = total_n_buffers = n_sizes = 0; + for(nlist = 0; nlist < NR_SIZES; nlist++) + { + total_lav += buffers_lav[nlist]; + if(nr_buffers_size[nlist]) n_sizes++; + total_n_buffers += nr_buffers_size[nlist]; + total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; + } + + /* See if we have an excessive number of buffers of a particular + size - if so, victimize that bunch. */ + + isize = (size ? BUFSIZE_INDEX(size) : -1); + + if (n_sizes > 1) + for(nlist = 0; nlist < NR_SIZES; nlist++) + { + if(nlist == isize) continue; + if(nr_buffers_size[nlist] && + bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < + total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED])) + if(shrink_specific_buffers(6, bufferindex_size[nlist])) + return 1; + } + return 0; +} +/* + * Try to free up some pages by shrinking the buffer-cache + * + * Priority tells the routine how hard to try to shrink the + * buffers: 3 means "don't bother too much", while a value + * of 0 means "we'd better get some free pages now". + */ +int shrink_buffers(unsigned int priority) +{ + if (priority < 2) { + sync_buffers(0,0); + } + + if(priority == 2) wakeup_bdflush(1); + + if(maybe_shrink_lav_buffers(0)) return 1; + + /* No good candidate size - take any size we can find */ + return shrink_specific_buffers(priority, 0); +} + +static int shrink_specific_buffers(unsigned int priority, int size) +{ + struct buffer_head *bh; + int nlist; + int i, isize, isize1; + +#ifdef DEBUG + if(size) printk("Shrinking buffers of size %d\n", size); +#endif + /* First try the free lists, and see if we can get a complete page + from here */ + isize1 = (size ? BUFSIZE_INDEX(size) : -1); + + for(isize = 0; isize<NR_SIZES; isize++){ + if(isize1 != -1 && isize1 != isize) continue; + bh = free_list[isize]; + if(!bh) continue; + for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) { + if (bh->b_count || !bh->b_this_page) + continue; + if (try_to_free(bh, &bh)) + return 1; + if(!bh) break; /* Some interrupt must have used it after we + freed the page. No big deal - keep looking */ + } + } + + /* Not enough in the free lists, now try the lru list */ + + for(nlist = 0; nlist < NR_LIST; nlist++) { + repeat1: + if(priority > 3 && nlist == BUF_SHARED) continue; + bh = lru_list[nlist]; + if(!bh) continue; + i = nr_buffers_type[nlist] >> priority; + for ( ; i-- > 0 ; bh = bh->b_next_free) { + /* We may have stalled while waiting for I/O to complete. */ + if(bh->b_list != nlist) goto repeat1; + if (bh->b_count || !bh->b_this_page) + continue; + if(size && bh->b_size != size) continue; + if (bh->b_lock) + if (priority) + continue; + else + wait_on_buffer(bh); + if (bh->b_dirt) { + bh->b_count++; + bh->b_flushtime = 0; + ll_rw_block(WRITEA, 1, &bh); + bh->b_count--; + continue; + } + if (try_to_free(bh, &bh)) + return 1; + if(!bh) break; + } + } + return 0; +} + + +void show_buffers(void) +{ + struct buffer_head * bh; + int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; + int shared; + int nlist, isize; + + printk("Buffer memory: %6dkB\n",buffermem>>10); + printk("Buffer heads: %6d\n",nr_buffer_heads); + printk("Buffer blocks: %6d\n",nr_buffers); + + for(nlist = 0; nlist < NR_LIST; nlist++) { + shared = found = locked = dirty = used = lastused = 0; + bh = lru_list[nlist]; + if(!bh) continue; + do { + found++; + if (bh->b_lock) + locked++; + if (bh->b_dirt) + dirty++; + if(mem_map[MAP_NR(((unsigned long) bh->b_data))] !=1) shared++; + if (bh->b_count) + used++, lastused = found; + bh = bh->b_next_free; + } while (bh != lru_list[nlist]); + printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n", + nlist, found, used, lastused, locked, dirty, shared); + }; + printk("Size [LAV] Free Clean Unshar Lck Lck1 Dirty Shared\n"); + for(isize = 0; isize<NR_SIZES; isize++){ + printk("%5d [%5d]: %7d ", bufferindex_size[isize], + buffers_lav[isize], nr_free[isize]); + for(nlist = 0; nlist < NR_LIST; nlist++) + printk("%7d ", nr_buffers_st[isize][nlist]); + printk("\n"); + } +} + +/* + * try_to_reassign() checks if all the buffers on this particular page + * are unused, and reassign to a new cluster them if this is true. + */ +static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp, + dev_t dev, unsigned int starting_block) +{ + unsigned long page; + struct buffer_head * tmp, * p; + + *bhp = bh; + page = (unsigned long) bh->b_data; + page &= PAGE_MASK; + if(mem_map[MAP_NR(page)] != 1) return 0; + tmp = bh; + do { + if (!tmp) + return 0; + + if (tmp->b_count || tmp->b_dirt || tmp->b_lock) + return 0; + tmp = tmp->b_this_page; + } while (tmp != bh); + tmp = bh; + + while((unsigned int) tmp->b_data & (PAGE_SIZE - 1)) + tmp = tmp->b_this_page; + + /* This is the buffer at the head of the page */ + bh = tmp; + do { + p = tmp; + tmp = tmp->b_this_page; + remove_from_queues(p); + p->b_dev=dev; + p->b_uptodate = 0; + p->b_req = 0; + p->b_blocknr=starting_block++; + insert_into_queues(p); + } while (tmp != bh); + return 1; +} + +/* + * Try to find a free cluster by locating a page where + * all of the buffers are unused. We would like this function + * to be atomic, so we do not call anything that might cause + * the process to sleep. The priority is somewhat similar to + * the priority used in shrink_buffers. + * + * My thinking is that the kernel should end up using whole + * pages for the buffer cache as much of the time as possible. + * This way the other buffers on a particular page are likely + * to be very near each other on the free list, and we will not + * be expiring data prematurely. For now we only cannibalize buffers + * of the same size to keep the code simpler. + */ +static int reassign_cluster(dev_t dev, + unsigned int starting_block, int size) +{ + struct buffer_head *bh; + int isize = BUFSIZE_INDEX(size); + int i; + + /* We want to give ourselves a really good shot at generating + a cluster, and since we only take buffers from the free + list, we "overfill" it a little. */ + + while(nr_free[isize] < 32) refill_freelist(size); + + bh = free_list[isize]; + if(bh) + for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) { + if (!bh->b_this_page) continue; + if (try_to_reassign(bh, &bh, dev, starting_block)) + return 4; + } + return 0; +} + +/* This function tries to generate a new cluster of buffers + * from a new page in memory. We should only do this if we have + * not expanded the buffer cache to the maximum size that we allow. + */ +static unsigned long try_to_generate_cluster(dev_t dev, int block, int size) +{ + struct buffer_head * bh, * tmp, * arr[8]; + int isize = BUFSIZE_INDEX(size); + unsigned long offset; + unsigned long page; + int nblock; + + page = get_free_page(GFP_NOBUFFER); + if(!page) return 0; + + bh = create_buffers(page, size); + if (!bh) { + free_page(page); + return 0; + }; + nblock = block; + for (offset = 0 ; offset < PAGE_SIZE ; offset += size) { + if (find_buffer(dev, nblock++, size)) + goto not_aligned; + } + tmp = bh; + nblock = 0; + while (1) { + arr[nblock++] = bh; + bh->b_count = 1; + bh->b_dirt = 0; + bh->b_flushtime = 0; + bh->b_lock = 0; + bh->b_uptodate = 0; + bh->b_req = 0; + bh->b_dev = dev; + bh->b_list = BUF_CLEAN; + bh->b_blocknr = block++; + nr_buffers++; + nr_buffers_size[isize]++; + insert_into_queues(bh); + if (bh->b_this_page) + bh = bh->b_this_page; + else + break; + } + buffermem += PAGE_SIZE; + buffer_pages[MAP_NR(page)] = bh; + bh->b_this_page = tmp; + while (nblock-- > 0) + brelse(arr[nblock]); + return 4; +not_aligned: + while ((tmp = bh) != NULL) { + bh = bh->b_this_page; + put_unused_buffer_head(tmp); + } + free_page(page); + return 0; +} + +unsigned long generate_cluster(dev_t dev, int b[], int size) +{ + int i, offset; + + for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) { + if(i && b[i]-1 != b[i-1]) return 0; /* No need to cluster */ + if(find_buffer(dev, b[i], size)) return 0; + }; + + /* OK, we have a candidate for a new cluster */ + + /* See if one size of buffer is over-represented in the buffer cache, + if so reduce the numbers of buffers */ + if(maybe_shrink_lav_buffers(size)) + { + int retval; + retval = try_to_generate_cluster(dev, b[0], size); + if(retval) return retval; + }; + + if (nr_free_pages > min_free_pages*2) + return try_to_generate_cluster(dev, b[0], size); + else + return reassign_cluster(dev, b[0], size); +} + +/* + * This initializes the initial buffer free list. nr_buffers_type is set + * to one less the actual number of buffers, as a sop to backwards + * compatibility --- the old code did this (I think unintentionally, + * but I'm not sure), and programs in the ps package expect it. + * - TYT 8/30/92 + */ +void buffer_init(void) +{ + int i; + int isize = BUFSIZE_INDEX(BLOCK_SIZE); + + if (high_memory >= 4*1024*1024) { + if(high_memory >= 16*1024*1024) + nr_hash = 16381; + else + nr_hash = 4093; + } else { + nr_hash = 997; + }; + + hash_table = (struct buffer_head **) vmalloc(nr_hash * + sizeof(struct buffer_head *)); + + + buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * + sizeof(struct buffer_head *)); + for (i = 0 ; i < MAP_NR(high_memory) ; i++) + buffer_pages[i] = NULL; + + for (i = 0 ; i < nr_hash ; i++) + hash_table[i] = NULL; + lru_list[BUF_CLEAN] = 0; + grow_buffers(GFP_KERNEL, BLOCK_SIZE); + if (!free_list[isize]) + panic("VFS: Unable to initialize buffer free list!"); + return; +} + +/* This is a simple kernel daemon, whose job it is to provide a dynamically + * response to dirty buffers. Once this process is activated, we write back + * a limited number of buffers to the disks and then go back to sleep again. + * In effect this is a process which never leaves kernel mode, and does not have + * any user memory associated with it except for the stack. There is also + * a kernel stack page, which obviously must be separate from the user stack. + */ +struct wait_queue * bdflush_wait = NULL; +struct wait_queue * bdflush_done = NULL; + +static int bdflush_running = 0; + +static void wakeup_bdflush(int wait) +{ + if(!bdflush_running){ + printk("Warning - bdflush not running\n"); + sync_buffers(0,0); + return; + }; + wake_up(&bdflush_wait); + if(wait) sleep_on(&bdflush_done); +} + + + +/* + * Here we attempt to write back old buffers. We also try and flush inodes + * and supers as well, since this function is essentially "update", and + * otherwise there would be no way of ensuring that these quantities ever + * get written back. Ideally, we would have a timestamp on the inodes + * and superblocks so that we could write back only the old ones as well + */ + +asmlinkage int sync_old_buffers(void) +{ + int i, isize; + int ndirty, nwritten; + int nlist; + int ncount; + struct buffer_head * bh, *next; + + sync_supers(0); + sync_inodes(0); + + ncount = 0; +#ifdef DEBUG + for(nlist = 0; nlist < NR_LIST; nlist++) +#else + for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++) +#endif + { + ndirty = 0; + nwritten = 0; + repeat: + bh = lru_list[nlist]; + if(bh) + for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { + /* We may have stalled while waiting for I/O to complete. */ + if(bh->b_list != nlist) goto repeat; + next = bh->b_next_free; + if(!lru_list[nlist]) { + printk("Dirty list empty %d\n", i); + break; + } + + /* Clean buffer on dirty list? Refile it */ + if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock) + { + refile_buffer(bh); + continue; + } + + if (bh->b_lock || !bh->b_dirt) + continue; + ndirty++; + if(bh->b_flushtime > jiffies) continue; + nwritten++; + bh->b_count++; + bh->b_flushtime = 0; +#ifdef DEBUG + if(nlist != BUF_DIRTY) ncount++; +#endif + ll_rw_block(WRITE, 1, &bh); + bh->b_count--; + } + } +#ifdef DEBUG + if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount); + printk("Wrote %d/%d buffers\n", nwritten, ndirty); +#endif + + /* We assume that we only come through here on a regular + schedule, like every 5 seconds. Now update load averages. + Shift usage counts to prevent overflow. */ + for(isize = 0; isize<NR_SIZES; isize++){ + CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]); + buffer_usage[isize] = 0; + }; + return 0; +} + + +/* This is the interface to bdflush. As we get more sophisticated, we can + * pass tuning parameters to this "process", to adjust how it behaves. If you + * invoke this again after you have done this once, you would simply modify + * the tuning parameters. We would want to verify each parameter, however, + * to make sure that it is reasonable. */ + +asmlinkage int sys_bdflush(int func, int data) +{ + int i, error; + int ndirty; + int nlist; + int ncount; + struct buffer_head * bh, *next; + + if (!suser()) + return -EPERM; + + if (func == 1) + return sync_old_buffers(); + + /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */ + if (func >= 2) { + i = (func-2) >> 1; + if (i < 0 || i >= N_PARAM) + return -EINVAL; + if((func & 1) == 0) { + error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int)); + if (error) + return error; + put_fs_long(bdf_prm.data[i], data); + return 0; + }; + if (data < bdflush_min[i] || data > bdflush_max[i]) + return -EINVAL; + bdf_prm.data[i] = data; + return 0; + }; + + if (bdflush_running) + return -EBUSY; /* Only one copy of this running at one time */ + bdflush_running++; + + /* OK, from here on is the daemon */ + + for (;;) { +#ifdef DEBUG + printk("bdflush() activated..."); +#endif + + ncount = 0; +#ifdef DEBUG + for(nlist = 0; nlist < NR_LIST; nlist++) +#else + for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++) +#endif + { + ndirty = 0; + repeat: + bh = lru_list[nlist]; + if(bh) + for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; + bh = next) { + /* We may have stalled while waiting for I/O to complete. */ + if(bh->b_list != nlist) goto repeat; + next = bh->b_next_free; + if(!lru_list[nlist]) { + printk("Dirty list empty %d\n", i); + break; + } + + /* Clean buffer on dirty list? Refile it */ + if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock) + { + refile_buffer(bh); + continue; + } + + if (bh->b_lock || !bh->b_dirt) + continue; + /* Should we write back buffers that are shared or not?? + currently dirty buffers are not shared, so it does not matter */ + bh->b_count++; + ndirty++; + bh->b_flushtime = 0; + ll_rw_block(WRITE, 1, &bh); +#ifdef DEBUG + if(nlist != BUF_DIRTY) ncount++; +#endif + bh->b_count--; + } + } +#ifdef DEBUG + if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount); + printk("sleeping again.\n"); +#endif + wake_up(&bdflush_done); + + /* If there are still a lot of dirty buffers around, skip the sleep + and flush some more */ + + if(nr_buffers_type[BUF_DIRTY] < (nr_buffers - nr_buffers_type[BUF_SHARED]) * + bdf_prm.b_un.nfract/100) { + if (current->signal & (1 << (SIGKILL-1))) { + bdflush_running--; + return 0; + } + current->signal = 0; + interruptible_sleep_on(&bdflush_wait); + } + } +} + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 8 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -8 + * c-argdecl-indent: 8 + * c-label-offset: -8 + * c-continued-statement-offset: 8 + * c-continued-brace-offset: 0 + * End: + */ diff --git a/fs/dcache.c b/fs/dcache.c new file mode 100644 index 000000000..a40bdf316 --- /dev/null +++ b/fs/dcache.c @@ -0,0 +1,253 @@ +/* + * linux/fs/dcache.c + * + * (C) Copyright 1994 Linus Torvalds + */ + +/* + * The directory cache is a "two-level" cache, each level doing LRU on + * its entries. Adding new entries puts them at the end of the LRU + * queue on the first-level cache, while the second-level cache is + * fed by any cache hits. + * + * The idea is that new additions (from readdir(), for example) will not + * flush the cache of entries that have really been used. + * + * There is a global hash-table over both caches that hashes the entries + * based on the directory inode number and device as well as on a + * string-hash computed over the name. + */ + +#include <stddef.h> + +#include <linux/fs.h> +#include <linux/string.h> + +/* + * Don't bother caching long names.. They just take up space in the cache, and + * for a name cache you just want to cache the "normal" names anyway which tend + * to be short. + */ +#define DCACHE_NAME_LEN 15 +#define DCACHE_SIZE 64 + +struct hash_list { + struct dir_cache_entry * next; + struct dir_cache_entry * prev; +}; + +/* + * The dir_cache_entry must be in this order: we do ugly things with the pointers + */ +struct dir_cache_entry { + struct hash_list h; + unsigned long dev; + unsigned long dir; + unsigned long version; + unsigned long ino; + unsigned char name_len; + char name[DCACHE_NAME_LEN]; + struct dir_cache_entry ** lru_head; + struct dir_cache_entry * next_lru, * prev_lru; +}; + +#define COPYDATA(de, newde) \ +memcpy((void *) &newde->dev, (void *) &de->dev, \ +4*sizeof(unsigned long) + 1 + DCACHE_NAME_LEN) + +static struct dir_cache_entry level1_cache[DCACHE_SIZE]; +static struct dir_cache_entry level2_cache[DCACHE_SIZE]; + +/* + * The LRU-lists are doubly-linked circular lists, and do not change in size + * so these pointers always have something to point to (after _init) + */ +static struct dir_cache_entry * level1_head; +static struct dir_cache_entry * level2_head; + +/* + * The hash-queues are also doubly-linked circular lists, but the head is + * itself on the doubly-linked list, not just a pointer to the first entry. + */ +#define DCACHE_HASH_QUEUES 19 +#define hash_fn(dev,dir,namehash) (((dev) ^ (dir) ^ (namehash)) % DCACHE_HASH_QUEUES) + +static struct hash_list hash_table[DCACHE_HASH_QUEUES]; + +static inline void remove_lru(struct dir_cache_entry * de) +{ + de->next_lru->prev_lru = de->prev_lru; + de->prev_lru->next_lru = de->next_lru; +} + +static inline void add_lru(struct dir_cache_entry * de, struct dir_cache_entry *head) +{ + de->next_lru = head; + de->prev_lru = head->prev_lru; + de->prev_lru->next_lru = de; + head->prev_lru = de; +} + +static inline void update_lru(struct dir_cache_entry * de) +{ + if (de == *de->lru_head) + *de->lru_head = de->next_lru; + else { + remove_lru(de); + add_lru(de,*de->lru_head); + } +} + +/* + * Stupid name"hash" algorithm. Write something better if you want to, + * but I doubt it matters that much + */ +static inline unsigned long namehash(const char * name, int len) +{ + return len * *(unsigned char *) name; +} + +/* + * Hash queue manipulation. Look out for the casts.. + */ +static inline void remove_hash(struct dir_cache_entry * de) +{ + if (de->h.next) { + de->h.next->h.prev = de->h.prev; + de->h.prev->h.next = de->h.next; + de->h.next = NULL; + } +} + +static inline void add_hash(struct dir_cache_entry * de, struct hash_list * hash) +{ + de->h.next = hash->next; + de->h.prev = (struct dir_cache_entry *) hash; + hash->next->h.prev = de; + hash->next = de; +} + +/* + * Find a directory cache entry given all the necessary info. + */ +static struct dir_cache_entry * find_entry(struct inode * dir, const char * name, int len, struct hash_list * hash) +{ + struct dir_cache_entry * de = hash->next; + + for (de = hash->next ; de != (struct dir_cache_entry *) hash ; de = de->h.next) { + if (de->dev != dir->i_dev) + continue; + if (de->dir != dir->i_ino) + continue; + if (de->version != dir->i_version) + continue; + if (de->name_len != len) + continue; + if (memcmp(de->name, name, len)) + continue; + return de; + } + return NULL; +} + +/* + * Move a successfully used entry to level2. If already at level2, + * move it to the end of the LRU queue.. + */ +static inline void move_to_level2(struct dir_cache_entry * old_de, struct hash_list * hash) +{ + struct dir_cache_entry * de; + + if (old_de->lru_head == &level2_head) { + update_lru(old_de); + return; + } + de = level2_head; + level2_head = de->next_lru; + remove_hash(de); + COPYDATA(old_de, de); + add_hash(de, hash); +} + +int dcache_lookup(struct inode * dir, const char * name, int len, unsigned long * ino) +{ + struct hash_list * hash; + struct dir_cache_entry *de; + + if (len > DCACHE_NAME_LEN) + return 0; + hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len)); + de = find_entry(dir, name, len, hash); + if (!de) + return 0; + *ino = de->ino; + move_to_level2(de, hash); + return 1; +} + +void dcache_add(struct inode * dir, const char * name, int len, unsigned long ino) +{ + struct hash_list * hash; + struct dir_cache_entry *de; + + if (len > DCACHE_NAME_LEN) + return; + hash = hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name,len)); + if ((de = find_entry(dir, name, len, hash)) != NULL) { + de->ino = ino; + update_lru(de); + return; + } + de = level1_head; + level1_head = de->next_lru; + remove_hash(de); + de->dev = dir->i_dev; + de->dir = dir->i_ino; + de->version = dir->i_version; + de->ino = ino; + de->name_len = len; + memcpy(de->name, name, len); + add_hash(de, hash); +} + +unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end) +{ + int i; + struct dir_cache_entry * p; + + /* + * Init level1 LRU lists.. + */ + p = level1_cache; + do { + p[1].prev_lru = p; + p[0].next_lru = p+1; + p[0].lru_head = &level1_head; + } while (++p < level1_cache + DCACHE_SIZE-1); + level1_cache[0].prev_lru = p; + p[0].next_lru = &level1_cache[0]; + p[0].lru_head = &level1_head; + level1_head = level1_cache; + + /* + * Init level2 LRU lists.. + */ + p = level2_cache; + do { + p[1].prev_lru = p; + p[0].next_lru = p+1; + p[0].lru_head = &level2_head; + } while (++p < level2_cache + DCACHE_SIZE-1); + level2_cache[0].prev_lru = p; + p[0].next_lru = &level2_cache[0]; + p[0].lru_head = &level2_head; + level2_head = level2_cache; + + /* + * Empty hash queues.. + */ + for (i = 0 ; i < DCACHE_HASH_QUEUES ; i++) + hash_table[i].next = hash_table[i].next = + (struct dir_cache_entry *) &hash_table[i]; + return mem_start; +} diff --git a/fs/devices.c b/fs/devices.c new file mode 100644 index 000000000..e79ea07d5 --- /dev/null +++ b/fs/devices.c @@ -0,0 +1,276 @@ +/* + * linux/fs/devices.c + * + * (C) 1993 Matthias Urlichs -- collected common code and tables. + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/fs.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/errno.h> + +struct device_struct { + const char * name; + struct file_operations * fops; +}; + +static struct device_struct chrdevs[MAX_CHRDEV] = { + { NULL, NULL }, +}; + +static struct device_struct blkdevs[MAX_BLKDEV] = { + { NULL, NULL }, +}; + +int get_device_list(char * page) +{ + int i; + int len; + + len = sprintf(page, "Character devices:\n"); + for (i = 0; i < MAX_CHRDEV ; i++) { + if (chrdevs[i].fops) { + len += sprintf(page+len, "%2d %s\n", i, chrdevs[i].name); + } + } + len += sprintf(page+len, "\nBlock devices:\n"); + for (i = 0; i < MAX_BLKDEV ; i++) { + if (blkdevs[i].fops) { + len += sprintf(page+len, "%2d %s\n", i, blkdevs[i].name); + } + } + return len; +} + +struct file_operations * get_blkfops(unsigned int major) +{ + if (major >= MAX_BLKDEV) + return NULL; + return blkdevs[major].fops; +} + +struct file_operations * get_chrfops(unsigned int major) +{ + if (major >= MAX_CHRDEV) + return NULL; + return chrdevs[major].fops; +} + +int register_chrdev(unsigned int major, const char * name, struct file_operations *fops) +{ + if (major == 0) { + for (major = MAX_CHRDEV-1; major > 0; major--) { + if (chrdevs[major].fops == fops) + return major; + } + for (major = MAX_CHRDEV-1; major > 0; major--) { + if (chrdevs[major].fops == NULL) { + chrdevs[major].name = name; + chrdevs[major].fops = fops; + return major; + } + } + return -EBUSY; + } + if (major >= MAX_CHRDEV) + return -EINVAL; + if (chrdevs[major].fops && chrdevs[major].fops != fops) + return -EBUSY; + chrdevs[major].name = name; + chrdevs[major].fops = fops; + return 0; +} + +int register_blkdev(unsigned int major, const char * name, struct file_operations *fops) +{ + if (major == 0) { + for (major = MAX_BLKDEV-1; major > 0; major--) { + if (blkdevs[major].fops == fops) + return major; + } + for (major = MAX_BLKDEV-1; major > 0; major--) { + if (blkdevs[major].fops == NULL) { + blkdevs[major].name = name; + blkdevs[major].fops = fops; + return major; + } + } + return -EBUSY; + } + if (major >= MAX_BLKDEV) + return -EINVAL; + if (blkdevs[major].fops && blkdevs[major].fops != fops) + return -EBUSY; + blkdevs[major].name = name; + blkdevs[major].fops = fops; + return 0; +} + +int unregister_chrdev(unsigned int major, const char * name) +{ + if (major >= MAX_CHRDEV) + return -EINVAL; + if (!chrdevs[major].fops) + return -EINVAL; + if (strcmp(chrdevs[major].name, name)) + return -EINVAL; + chrdevs[major].name = NULL; + chrdevs[major].fops = NULL; + return 0; +} + +int unregister_blkdev(unsigned int major, const char * name) +{ + if (major >= MAX_BLKDEV) + return -EINVAL; + if (!blkdevs[major].fops) + return -EINVAL; + if (strcmp(blkdevs[major].name, name)) + return -EINVAL; + blkdevs[major].name = NULL; + blkdevs[major].fops = NULL; + return 0; +} + +/* + * This routine checks whether a removable media has been changed, + * and invalidates all buffer-cache-entries in that case. This + * is a relatively slow routine, so we have to try to minimize using + * it. Thus it is called only upon a 'mount' or 'open'. This + * is the best way of combining speed and utility, I think. + * People changing diskettes in the middle of an operation deserve + * to loose :-) + */ +int check_disk_change(dev_t dev) +{ + int i; + struct file_operations * fops; + + i = MAJOR(dev); + if (i >= MAX_BLKDEV || (fops = blkdevs[i].fops) == NULL) + return 0; + if (fops->check_media_change == NULL) + return 0; + if (!fops->check_media_change(dev)) + return 0; + + printk("VFS: Disk change detected on device %d/%d\n", + MAJOR(dev), MINOR(dev)); + for (i=0 ; i<NR_SUPER ; i++) + if (super_blocks[i].s_dev == dev) + put_super(super_blocks[i].s_dev); + invalidate_inodes(dev); + invalidate_buffers(dev); + + if (fops->revalidate) + fops->revalidate(dev); + return 1; +} + +/* + * Called every time a block special file is opened + */ +int blkdev_open(struct inode * inode, struct file * filp) +{ + int i; + + i = MAJOR(inode->i_rdev); + if (i >= MAX_BLKDEV || !blkdevs[i].fops) + return -ENODEV; + filp->f_op = blkdevs[i].fops; + if (filp->f_op->open) + return filp->f_op->open(inode,filp); + return 0; +} + +/* + * Dummy default file-operations: the only thing this does + * is contain the open that then fills in the correct operations + * depending on the special file... + */ +struct file_operations def_blk_fops = { + NULL, /* lseek */ + NULL, /* read */ + NULL, /* write */ + NULL, /* readdir */ + NULL, /* select */ + NULL, /* ioctl */ + NULL, /* mmap */ + blkdev_open, /* open */ + NULL, /* release */ +}; + +struct inode_operations blkdev_inode_operations = { + &def_blk_fops, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +/* + * Called every time a character special file is opened + */ +int chrdev_open(struct inode * inode, struct file * filp) +{ + int i; + + i = MAJOR(inode->i_rdev); + if (i >= MAX_CHRDEV || !chrdevs[i].fops) + return -ENODEV; + filp->f_op = chrdevs[i].fops; + if (filp->f_op->open) + return filp->f_op->open(inode,filp); + return 0; +} + +/* + * Dummy default file-operations: the only thing this does + * is contain the open that then fills in the correct operations + * depending on the special file... + */ +struct file_operations def_chr_fops = { + NULL, /* lseek */ + NULL, /* read */ + NULL, /* write */ + NULL, /* readdir */ + NULL, /* select */ + NULL, /* ioctl */ + NULL, /* mmap */ + chrdev_open, /* open */ + NULL, /* release */ +}; + +struct inode_operations chrdev_inode_operations = { + &def_chr_fops, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; diff --git a/fs/exec.c b/fs/exec.c new file mode 100644 index 000000000..586098cd0 --- /dev/null +++ b/fs/exec.c @@ -0,0 +1,961 @@ +/* + * linux/fs/exec.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * #!-checking implemented by tytso. + */ + +/* + * Demand-loading implemented 01.12.91 - no need to read anything but + * the header into memory. The inode of the executable is put into + * "current->executable", and page faults do the actual loading. Clean. + * + * Once more I can proudly say that linux stood up to being changed: it + * was less than 2 hours work to get demand-loading completely implemented. + * + * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, + * current->executable is only used by the procfs. This allows a dispatch + * table to check for several different types of binary formats. We keep + * trying until we recognize the file or we run out of supported binary + * formats. + */ + +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/a.out.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/malloc.h> +#include <linux/binfmts.h> +#include <linux/personality.h> + +#include <asm/system.h> +#include <asm/segment.h> + +asmlinkage int sys_exit(int exit_code); +asmlinkage int sys_brk(unsigned long); + +static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); +static int load_aout_library(int fd); +static int aout_core_dump(long signr, struct pt_regs * regs); + +/* + * Here are the actual binaries that will be accepted: + * add more with "register_binfmt()".. + */ +extern struct linux_binfmt elf_format; + +static struct linux_binfmt aout_format = { +#ifndef CONFIG_BINFMT_ELF + NULL, NULL, load_aout_binary, load_aout_library, aout_core_dump +#else + &elf_format, NULL, load_aout_binary, load_aout_library, aout_core_dump +#endif +}; + +static struct linux_binfmt *formats = &aout_format; + +int register_binfmt(struct linux_binfmt * fmt) +{ + struct linux_binfmt ** tmp = &formats; + + if (!fmt) + return -EINVAL; + if (fmt->next) + return -EBUSY; + while (*tmp) { + if (fmt == *tmp) + return -EBUSY; + tmp = &(*tmp)->next; + } + *tmp = fmt; + return 0; +} + +int unregister_binfmt(struct linux_binfmt * fmt) +{ + struct linux_binfmt ** tmp = &formats; + + while (*tmp) { + if (fmt == *tmp) { + *tmp = fmt->next; + return 0; + } + tmp = &(*tmp)->next; + } + return -EINVAL; +} + +int open_inode(struct inode * inode, int mode) +{ + int error, fd; + struct file *f, **fpp; + + if (!inode->i_op || !inode->i_op->default_file_ops) + return -EINVAL; + f = get_empty_filp(); + if (!f) + return -EMFILE; + fd = 0; + fpp = current->files->fd; + for (;;) { + if (!*fpp) + break; + if (++fd > NR_OPEN) + return -ENFILE; + fpp++; + } + *fpp = f; + f->f_flags = mode; + f->f_mode = (mode+1) & O_ACCMODE; + f->f_inode = inode; + f->f_pos = 0; + f->f_reada = 0; + f->f_op = inode->i_op->default_file_ops; + if (f->f_op->open) { + error = f->f_op->open(inode,f); + if (error) { + *fpp = NULL; + f->f_count--; + return error; + } + } + inode->i_count++; + return fd; +} + +/* + * These are the only things you should do on a core-file: use only these + * macros to write out all the necessary info. + */ +#define DUMP_WRITE(addr,nr) \ +while (file.f_op->write(inode,&file,(char *)(addr),(nr)) != (nr)) goto close_coredump + +#define DUMP_SEEK(offset) \ +if (file.f_op->lseek) { \ + if (file.f_op->lseek(inode,&file,(offset),0) != (offset)) \ + goto close_coredump; \ +} else file.f_pos = (offset) + +/* + * Routine writes a core dump image in the current directory. + * Currently only a stub-function. + * + * Note that setuid/setgid files won't make a core-dump if the uid/gid + * changed due to the set[u|g]id. It's enforced by the "current->dumpable" + * field, which also makes sure the core-dumps won't be recursive if the + * dumping of the process results in another error.. + */ +static int aout_core_dump(long signr, struct pt_regs * regs) +{ + struct inode * inode = NULL; + struct file file; + unsigned short fs; + int has_dumped = 0; + char corefile[6+sizeof(current->comm)]; + int i; + register int dump_start, dump_size; + struct user dump; + + if (!current->dumpable) + return 0; + current->dumpable = 0; + +/* See if we have enough room to write the upage. */ + if (current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE) + return 0; + fs = get_fs(); + set_fs(KERNEL_DS); + memcpy(corefile,"core.",5); +#if 0 + memcpy(corefile+5,current->comm,sizeof(current->comm)); +#else + corefile[4] = '\0'; +#endif + if (open_namei(corefile,O_CREAT | 2 | O_TRUNC,0600,&inode,NULL)) { + inode = NULL; + goto end_coredump; + } + if (!S_ISREG(inode->i_mode)) + goto end_coredump; + if (!inode->i_op || !inode->i_op->default_file_ops) + goto end_coredump; + if (get_write_access(inode)) + goto end_coredump; + file.f_mode = 3; + file.f_flags = 0; + file.f_count = 1; + file.f_inode = inode; + file.f_pos = 0; + file.f_reada = 0; + file.f_op = inode->i_op->default_file_ops; + if (file.f_op->open) + if (file.f_op->open(inode,&file)) + goto done_coredump; + if (!file.f_op->write) + goto close_coredump; + has_dumped = 1; +/* changed the size calculations - should hopefully work better. lbt */ + dump.magic = CMAGIC; + dump.start_code = 0; + dump.start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump.u_tsize = ((unsigned long) current->mm->end_code) >> 12; + dump.u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> 12; + dump.u_dsize -= dump.u_tsize; + dump.u_ssize = 0; + for(i=0; i<8; i++) dump.u_debugreg[i] = current->debugreg[i]; + if (dump.start_stack < TASK_SIZE) + dump.u_ssize = ((unsigned long) (TASK_SIZE - dump.start_stack)) >> 12; +/* If the size of the dump file exceeds the rlimit, then see what would happen + if we wrote the stack, but not the data area. */ + if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > + current->rlim[RLIMIT_CORE].rlim_cur) + dump.u_dsize = 0; +/* Make sure we have enough room to write the stack and data areas. */ + if ((dump.u_ssize+1) * PAGE_SIZE > + current->rlim[RLIMIT_CORE].rlim_cur) + dump.u_ssize = 0; + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); + dump.u_ar0 = (struct pt_regs *)(((int)(&dump.regs)) -((int)(&dump))); + dump.signal = signr; + dump.regs = *regs; +/* Flag indicating the math stuff is valid. We don't support this for the + soft-float routines yet */ + if (hard_math) { + if ((dump.u_fpvalid = current->used_math) != 0) { + if (last_task_used_math == current) + __asm__("clts ; fnsave %0": :"m" (dump.i387)); + else + memcpy(&dump.i387,¤t->tss.i387.hard,sizeof(dump.i387)); + } + } else { + /* we should dump the emulator state here, but we need to + convert it into standard 387 format first.. */ + dump.u_fpvalid = 0; + } + set_fs(KERNEL_DS); +/* struct user */ + DUMP_WRITE(&dump,sizeof(dump)); +/* Now dump all of the user data. Include malloced stuff as well */ + DUMP_SEEK(PAGE_SIZE); +/* now we start writing out the user space info */ + set_fs(USER_DS); +/* Dump the data area */ + if (dump.u_dsize != 0) { + dump_start = dump.u_tsize << 12; + dump_size = dump.u_dsize << 12; + DUMP_WRITE(dump_start,dump_size); + } +/* Now prepare to dump the stack area */ + if (dump.u_ssize != 0) { + dump_start = dump.start_stack; + dump_size = dump.u_ssize << 12; + DUMP_WRITE(dump_start,dump_size); + } +/* Finally dump the task struct. Not be used by gdb, but could be useful */ + set_fs(KERNEL_DS); + DUMP_WRITE(current,sizeof(*current)); +close_coredump: + if (file.f_op->release) + file.f_op->release(inode,&file); +done_coredump: + put_write_access(inode); +end_coredump: + set_fs(fs); + iput(inode); + return has_dumped; +} + +/* + * Note that a shared library must be both readable and executable due to + * security reasons. + * + * Also note that we take the address to load from from the file itself. + */ +asmlinkage int sys_uselib(const char * library) +{ + int fd, retval; + struct file * file; + struct linux_binfmt * fmt; + + fd = sys_open(library, 0, 0); + if (fd < 0) + return fd; + file = current->files->fd[fd]; + retval = -ENOEXEC; + if (file && file->f_inode && file->f_op && file->f_op->read) { + for (fmt = formats ; fmt ; fmt = fmt->next) { + int (*fn)(int) = fmt->load_shlib; + if (!fn) + break; + retval = fn(fd); + if (retval != -ENOEXEC) + break; + } + } + sys_close(fd); + return retval; +} + +/* + * create_tables() parses the env- and arg-strings in new user + * memory and creates the pointer tables from them, and puts their + * addresses on the "stack", returning the new stack pointer value. + */ +unsigned long * create_tables(char * p,int argc,int envc,int ibcs) +{ + unsigned long *argv,*envp; + unsigned long * sp; + struct vm_area_struct *mpnt; + + mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL); + if (mpnt) { + mpnt->vm_task = current; + mpnt->vm_start = PAGE_MASK & (unsigned long) p; + mpnt->vm_end = TASK_SIZE; + mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY; + mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_share = NULL; + mpnt->vm_ops = NULL; + mpnt->vm_offset = 0; + mpnt->vm_inode = NULL; + mpnt->vm_pte = 0; + insert_vm_struct(current, mpnt); + } + sp = (unsigned long *) (0xfffffffc & (unsigned long) p); + sp -= envc+1; + envp = sp; + sp -= argc+1; + argv = sp; + if (!ibcs) { + put_fs_long((unsigned long)envp,--sp); + put_fs_long((unsigned long)argv,--sp); + } + put_fs_long((unsigned long)argc,--sp); + current->mm->arg_start = (unsigned long) p; + while (argc-->0) { + put_fs_long((unsigned long) p,argv++); + while (get_fs_byte(p++)) /* nothing */ ; + } + put_fs_long(0,argv); + current->mm->arg_end = current->mm->env_start = (unsigned long) p; + while (envc-->0) { + put_fs_long((unsigned long) p,envp++); + while (get_fs_byte(p++)) /* nothing */ ; + } + put_fs_long(0,envp); + current->mm->env_end = (unsigned long) p; + return sp; +} + +/* + * count() counts the number of arguments/envelopes + * + * We also do some limited EFAULT checking: this isn't complete, but + * it does cover most cases. I'll have to do this correctly some day.. + */ +static int count(char ** argv) +{ + int error, i = 0; + char ** tmp, *p; + + if ((tmp = argv) != NULL) { + error = verify_area(VERIFY_READ, tmp, sizeof(char *)); + if (error) + return error; + while ((p = (char *) get_fs_long((unsigned long *) (tmp++))) != NULL) { + i++; + error = verify_area(VERIFY_READ, p, 1); + if (error) + return error; + } + } + return i; +} + +/* + * 'copy_string()' copies argument/envelope strings from user + * memory to free pages in kernel mem. These are in a format ready + * to be put directly into the top of new user memory. + * + * Modified by TYT, 11/24/91 to add the from_kmem argument, which specifies + * whether the string and the string array are from user or kernel segments: + * + * from_kmem argv * argv ** + * 0 user space user space + * 1 kernel space user space + * 2 kernel space kernel space + * + * We do this by playing games with the fs segment register. Since it + * is expensive to load a segment register, we try to avoid calling + * set_fs() unless we absolutely have to. + */ +unsigned long copy_strings(int argc,char ** argv,unsigned long *page, + unsigned long p, int from_kmem) +{ + char *tmp, *pag = NULL; + int len, offset = 0; + unsigned long old_fs, new_fs; + + if (!p) + return 0; /* bullet-proofing */ + new_fs = get_ds(); + old_fs = get_fs(); + if (from_kmem==2) + set_fs(new_fs); + while (argc-- > 0) { + if (from_kmem == 1) + set_fs(new_fs); + if (!(tmp = (char *)get_fs_long(((unsigned long *)argv)+argc))) + panic("VFS: argc is wrong"); + if (from_kmem == 1) + set_fs(old_fs); + len=0; /* remember zero-padding */ + do { + len++; + } while (get_fs_byte(tmp++)); + if (p < len) { /* this shouldn't happen - 128kB */ + set_fs(old_fs); + return 0; + } + while (len) { + --p; --tmp; --len; + if (--offset < 0) { + offset = p % PAGE_SIZE; + if (from_kmem==2) + set_fs(old_fs); + if (!(pag = (char *) page[p/PAGE_SIZE]) && + !(pag = (char *) page[p/PAGE_SIZE] = + (unsigned long *) get_free_page(GFP_USER))) + return 0; + if (from_kmem==2) + set_fs(new_fs); + + } + *(pag + offset) = get_fs_byte(tmp); + } + } + if (from_kmem==2) + set_fs(old_fs); + return p; +} + +unsigned long change_ldt(unsigned long text_size,unsigned long * page) +{ + unsigned long code_limit,data_limit,code_base,data_base; + int i; + + code_limit = TASK_SIZE; + data_limit = TASK_SIZE; + code_base = data_base = 0; + current->mm->start_code = code_base; + data_base += data_limit; + for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) { + data_base -= PAGE_SIZE; + if (page[i]) { + current->mm->rss++; + put_dirty_page(current,page[i],data_base); + } + } + return data_limit; +} + +/* + * Read in the complete executable. This is used for "-N" files + * that aren't on a block boundary, and for files on filesystems + * without bmap support. + */ +int read_exec(struct inode *inode, unsigned long offset, + char * addr, unsigned long count) +{ + struct file file; + int result = -ENOEXEC; + + if (!inode->i_op || !inode->i_op->default_file_ops) + goto end_readexec; + file.f_mode = 1; + file.f_flags = 0; + file.f_count = 1; + file.f_inode = inode; + file.f_pos = 0; + file.f_reada = 0; + file.f_op = inode->i_op->default_file_ops; + if (file.f_op->open) + if (file.f_op->open(inode,&file)) + goto end_readexec; + if (!file.f_op || !file.f_op->read) + goto close_readexec; + if (file.f_op->lseek) { + if (file.f_op->lseek(inode,&file,offset,0) != offset) + goto close_readexec; + } else + file.f_pos = offset; + if (get_fs() == USER_DS) { + result = verify_area(VERIFY_WRITE, addr, count); + if (result) + goto close_readexec; + } + result = file.f_op->read(inode, &file, addr, count); +close_readexec: + if (file.f_op->release) + file.f_op->release(inode,&file); +end_readexec: + return result; +} + + +/* + * This function flushes out all traces of the currently running executable so + * that a new one can be started + */ + +void flush_old_exec(struct linux_binprm * bprm) +{ + int i; + int ch; + char * name; + struct vm_area_struct * mpnt, *mpnt1; + + current->dumpable = 1; + name = bprm->filename; + for (i=0; (ch = *(name++)) != '\0';) { + if (ch == '/') + i = 0; + else + if (i < 15) + current->comm[i++] = ch; + } + current->comm[i] = '\0'; + /* Release all of the old mmap stuff. */ + + mpnt = current->mm->mmap; + current->mm->mmap = NULL; + while (mpnt) { + mpnt1 = mpnt->vm_next; + if (mpnt->vm_ops && mpnt->vm_ops->close) + mpnt->vm_ops->close(mpnt); + if (mpnt->vm_inode) + iput(mpnt->vm_inode); + kfree(mpnt); + mpnt = mpnt1; + } + + /* Flush the old ldt stuff... */ + if (current->ldt) { + free_page((unsigned long) current->ldt); + current->ldt = NULL; + for (i=1 ; i<NR_TASKS ; i++) { + if (task[i] == current) { + set_ldt_desc(gdt+(i<<1)+ + FIRST_LDT_ENTRY,&default_ldt, 1); + load_ldt(i); + } + } + } + + for (i=0 ; i<8 ; i++) current->debugreg[i] = 0; + + if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || + !permission(bprm->inode,MAY_READ)) + current->dumpable = 0; + current->signal = 0; + for (i=0 ; i<32 ; i++) { + current->sigaction[i].sa_mask = 0; + current->sigaction[i].sa_flags = 0; + if (current->sigaction[i].sa_handler != SIG_IGN) + current->sigaction[i].sa_handler = NULL; + } + for (i=0 ; i<NR_OPEN ; i++) + if (FD_ISSET(i,¤t->files->close_on_exec)) + sys_close(i); + FD_ZERO(¤t->files->close_on_exec); + clear_page_tables(current); + if (last_task_used_math == current) + last_task_used_math = NULL; + current->used_math = 0; +} + +/* + * sys_execve() executes a new program. + */ +int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs) +{ + struct linux_binprm bprm; + struct linux_binfmt * fmt; + unsigned long old_fs; + int i; + int retval; + int sh_bang = 0; + + if (regs->cs != USER_CS) + return -EINVAL; + bprm.p = PAGE_SIZE*MAX_ARG_PAGES-4; + for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */ + bprm.page[i] = 0; + retval = open_namei(filename, 0, 0, &bprm.inode, NULL); + if (retval) + return retval; + bprm.filename = filename; + if ((bprm.argc = count(argv)) < 0) + return bprm.argc; + if ((bprm.envc = count(envp)) < 0) + return bprm.envc; + +restart_interp: + if (!S_ISREG(bprm.inode->i_mode)) { /* must be regular file */ + retval = -EACCES; + goto exec_error2; + } + if (IS_NOEXEC(bprm.inode)) { /* FS mustn't be mounted noexec */ + retval = -EPERM; + goto exec_error2; + } + if (!bprm.inode->i_sb) { + retval = -EACCES; + goto exec_error2; + } + i = bprm.inode->i_mode; + if (IS_NOSUID(bprm.inode) && (((i & S_ISUID) && bprm.inode->i_uid != current-> + euid) || ((i & S_ISGID) && !in_group_p(bprm.inode->i_gid))) && !suser()) { + retval = -EPERM; + goto exec_error2; + } + /* make sure we don't let suid, sgid files be ptraced. */ + if (current->flags & PF_PTRACED) { + bprm.e_uid = current->euid; + bprm.e_gid = current->egid; + } else { + bprm.e_uid = (i & S_ISUID) ? bprm.inode->i_uid : current->euid; + bprm.e_gid = (i & S_ISGID) ? bprm.inode->i_gid : current->egid; + } + if (!permission(bprm.inode, MAY_EXEC) || + (!(bprm.inode->i_mode & 0111) && fsuser())) { + retval = -EACCES; + goto exec_error2; + } + /* better not execute files which are being written to */ + if (bprm.inode->i_wcount > 0) { + retval = -ETXTBSY; + goto exec_error2; + } + memset(bprm.buf,0,sizeof(bprm.buf)); + old_fs = get_fs(); + set_fs(get_ds()); + retval = read_exec(bprm.inode,0,bprm.buf,128); + set_fs(old_fs); + if (retval < 0) + goto exec_error2; + if ((bprm.buf[0] == '#') && (bprm.buf[1] == '!') && (!sh_bang)) { + /* + * This section does the #! interpretation. + * Sorta complicated, but hopefully it will work. -TYT + */ + + char *cp, *interp, *i_name, *i_arg; + + iput(bprm.inode); + bprm.buf[127] = '\0'; + if ((cp = strchr(bprm.buf, '\n')) == NULL) + cp = bprm.buf+127; + *cp = '\0'; + while (cp > bprm.buf) { + cp--; + if ((*cp == ' ') || (*cp == '\t')) + *cp = '\0'; + else + break; + } + for (cp = bprm.buf+2; (*cp == ' ') || (*cp == '\t'); cp++); + if (!cp || *cp == '\0') { + retval = -ENOEXEC; /* No interpreter name found */ + goto exec_error1; + } + interp = i_name = cp; + i_arg = 0; + for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) { + if (*cp == '/') + i_name = cp+1; + } + while ((*cp == ' ') || (*cp == '\t')) + *cp++ = '\0'; + if (*cp) + i_arg = cp; + /* + * OK, we've parsed out the interpreter name and + * (optional) argument. + */ + if (sh_bang++ == 0) { + bprm.p = copy_strings(bprm.envc, envp, bprm.page, bprm.p, 0); + bprm.p = copy_strings(--bprm.argc, argv+1, bprm.page, bprm.p, 0); + } + /* + * Splice in (1) the interpreter's name for argv[0] + * (2) (optional) argument to interpreter + * (3) filename of shell script + * + * This is done in reverse order, because of how the + * user environment and arguments are stored. + */ + bprm.p = copy_strings(1, &bprm.filename, bprm.page, bprm.p, 2); + bprm.argc++; + if (i_arg) { + bprm.p = copy_strings(1, &i_arg, bprm.page, bprm.p, 2); + bprm.argc++; + } + bprm.p = copy_strings(1, &i_name, bprm.page, bprm.p, 2); + bprm.argc++; + if (!bprm.p) { + retval = -E2BIG; + goto exec_error1; + } + /* + * OK, now restart the process with the interpreter's inode. + * Note that we use open_namei() as the name is now in kernel + * space, and we don't need to copy it. + */ + retval = open_namei(interp, 0, 0, &bprm.inode, NULL); + if (retval) + goto exec_error1; + goto restart_interp; + } + if (!sh_bang) { + bprm.p = copy_strings(bprm.envc,envp,bprm.page,bprm.p,0); + bprm.p = copy_strings(bprm.argc,argv,bprm.page,bprm.p,0); + if (!bprm.p) { + retval = -E2BIG; + goto exec_error2; + } + } + + bprm.sh_bang = sh_bang; + for (fmt = formats ; fmt ; fmt = fmt->next) { + int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; + if (!fn) + break; + retval = fn(&bprm, regs); + if (retval >= 0) { + iput(bprm.inode); + current->did_exec = 1; + return retval; + } + if (retval != -ENOEXEC) + break; + } +exec_error2: + iput(bprm.inode); +exec_error1: + for (i=0 ; i<MAX_ARG_PAGES ; i++) + free_page(bprm.page[i]); + return(retval); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage int sys_execve(struct pt_regs regs) +{ + int error; + char * filename; + + error = getname((char *) regs.ebx, &filename); + if (error) + return error; + error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); + putname(filename); + return error; +} + +static void set_brk(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN(start); + end = PAGE_ALIGN(end); + if (end <= start) + return; + do_mmap(NULL, start, end - start, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE, 0); +} + +/* + * These are the functions used to load a.out style executables and shared + * libraries. There is no binary dependent code anywhere else. + */ + +static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) +{ + struct exec ex; + struct file * file; + int fd, error; + unsigned long p = bprm->p; + unsigned long fd_offset; + + ex = *((struct exec *) bprm->buf); /* exec-header */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && + N_MAGIC(ex) != QMAGIC) || + ex.a_trsize || ex.a_drsize || + bprm->inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) { + return -ENOEXEC; + } + + current->personality = PER_LINUX; + fd_offset = N_TXTOFF(ex); + if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) { + printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n"); + return -ENOEXEC; + } + + if (N_MAGIC(ex) == ZMAGIC && ex.a_text && + (fd_offset < bprm->inode->i_sb->s_blocksize)) { + printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n"); + return -ENOEXEC; + } + + /* OK, This is the point of no return */ + flush_old_exec(bprm); + + current->mm->brk = ex.a_bss + + (current->mm->start_brk = + (current->mm->end_data = ex.a_data + + (current->mm->end_code = ex.a_text + + (current->mm->start_code = N_TXTADDR(ex))))); + current->mm->rss = 0; + current->mm->mmap = NULL; + current->suid = current->euid = current->fsuid = bprm->e_uid; + current->sgid = current->egid = current->fsgid = bprm->e_gid; + if (N_MAGIC(ex) == OMAGIC) { + do_mmap(NULL, 0, ex.a_text+ex.a_data, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + read_exec(bprm->inode, 32, (char *) 0, ex.a_text+ex.a_data); + } else { + if (ex.a_text & 0xfff || ex.a_data & 0xfff) + printk(KERN_NOTICE "executable not page aligned\n"); + + fd = open_inode(bprm->inode, O_RDONLY); + + if (fd < 0) + return fd; + file = current->files->fd[fd]; + if (!file->f_op || !file->f_op->mmap) { + sys_close(fd); + do_mmap(NULL, 0, ex.a_text+ex.a_data, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + read_exec(bprm->inode, fd_offset, + (char *) N_TXTADDR(ex), ex.a_text+ex.a_data); + goto beyond_if; + } + + error = do_mmap(file, N_TXTADDR(ex), ex.a_text, + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, + fd_offset); + + if (error != N_TXTADDR(ex)) { + sys_close(fd); + send_sig(SIGKILL, current, 0); + return error; + } + + error = do_mmap(file, N_TXTADDR(ex) + ex.a_text, ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, + fd_offset + ex.a_text); + sys_close(fd); + if (error != N_TXTADDR(ex) + ex.a_text) { + send_sig(SIGKILL, current, 0); + return error; + } + } +beyond_if: + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)--; + current->exec_domain = lookup_exec_domain(current->personality); + current->binfmt = &aout_format; + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)++; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)++; + + set_brk(current->mm->start_brk, current->mm->brk); + + p += change_ldt(ex.a_text,bprm->page); + p -= MAX_ARG_PAGES*PAGE_SIZE; + p = (unsigned long)create_tables((char *)p, + bprm->argc, bprm->envc, + current->personality != PER_LINUX); + current->mm->start_stack = p; + regs->eip = ex.a_entry; /* eip, magic happens :-) */ + regs->esp = p; /* stack pointer */ + if (current->flags & PF_PTRACED) + send_sig(SIGTRAP, current, 0); + return 0; +} + + +static int load_aout_library(int fd) +{ + struct file * file; + struct exec ex; + struct inode * inode; + unsigned int len; + unsigned int bss; + unsigned int start_addr; + int error; + + file = current->files->fd[fd]; + inode = file->f_inode; + + set_fs(KERNEL_DS); + if (file->f_op->read(inode, file, (char *) &ex, sizeof(ex)) != sizeof(ex)) { + return -EACCES; + } + set_fs(USER_DS); + + /* We come in here for the regular a.out style of shared libraries */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || ex.a_trsize || + ex.a_drsize || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || + inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) { + return -ENOEXEC; + } + if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && + (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { + printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); + return -ENOEXEC; + } + + if (N_FLAGS(ex)) return -ENOEXEC; + + /* For QMAGIC, the starting address is 0x20 into the page. We mask + this off to get the starting address for the page */ + + start_addr = ex.a_entry & 0xfffff000; + + /* Now use mmap to map the library into memory. */ + error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + N_TXTOFF(ex)); + if (error != start_addr) + return error; + len = PAGE_ALIGN(ex.a_text + ex.a_data); + bss = ex.a_text + ex.a_data + ex.a_bss; + if (bss > len) + do_mmap(NULL, start_addr + len, bss-len, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_PRIVATE|MAP_FIXED, 0); + return 0; +} diff --git a/fs/ext/Makefile b/fs/ext/Makefile new file mode 100644 index 000000000..5e23319c8 --- /dev/null +++ b/fs/ext/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the linux ext-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= freelists.o truncate.o namei.o inode.o \ + file.o dir.o symlink.o fsync.o + +ext.o: $(OBJS) + $(LD) -r -o ext.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/ext/dir.c b/fs/ext/dir.c new file mode 100644 index 000000000..10e30fafa --- /dev/null +++ b/fs/ext/dir.c @@ -0,0 +1,131 @@ +/* + * linux/fs/ext/dir.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/ext_fs.h> +#include <linux/stat.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int ext_dir_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + return -EISDIR; +} + +static int ext_readdir(struct inode *, struct file *, struct dirent *, int); + +static struct file_operations ext_dir_operations = { + NULL, /* lseek - default */ + ext_dir_read, /* read */ + NULL, /* write - bad */ + ext_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync /* fsync */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations ext_dir_inode_operations = { + &ext_dir_operations, /* default directory file-ops */ + ext_create, /* create */ + ext_lookup, /* lookup */ + ext_link, /* link */ + ext_unlink, /* unlink */ + ext_symlink, /* symlink */ + ext_mkdir, /* mkdir */ + ext_rmdir, /* rmdir */ + ext_mknod, /* mknod */ + ext_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + ext_truncate, /* truncate */ + NULL /* permission */ +}; + +static int ext_readdir(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + unsigned int i; + unsigned int ret; + off_t offset; + char c; + struct buffer_head * bh; + struct ext_dir_entry * de; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + if ((filp->f_pos & 7) != 0) + return -EBADF; + ret = 0; + while (!ret && filp->f_pos < inode->i_size) { + offset = filp->f_pos & 1023; + bh = ext_bread(inode,(filp->f_pos)>>BLOCK_SIZE_BITS,0); + if (!bh) { + filp->f_pos += 1024-offset; + continue; + } + for (i = 0; i < 1024 && i < offset; ) { + de = (struct ext_dir_entry *) (bh->b_data + i); + if (!de->rec_len) + break; + i += de->rec_len; + } + offset = i; + de = (struct ext_dir_entry *) (offset + bh->b_data); + while (!ret && offset < 1024 && filp->f_pos < inode->i_size) { + if (de->rec_len < 8 || de->rec_len % 8 != 0 || + de->rec_len < de->name_len + 8 || + (de->rec_len + (off_t) filp->f_pos - 1) / 1024 > ((off_t) filp->f_pos / 1024)) { + printk ("ext_readdir: bad dir entry, skipping\n"); + printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n", + inode->i_dev, inode->i_ino, offset, de->rec_len, de->name_len); + filp->f_pos += 1024-offset; + if (filp->f_pos > inode->i_size) + filp->f_pos = inode->i_size; + continue; + } + offset += de->rec_len; + filp->f_pos += de->rec_len; + if (de->inode) { + for (i = 0; i < de->name_len; i++) + if ((c = de->name[i]) != 0) + put_fs_byte(c,i+dirent->d_name); + else + break; + if (i) { + put_fs_long(de->inode,&dirent->d_ino); + put_fs_byte(0,i+dirent->d_name); + put_fs_word(i,&dirent->d_reclen); + ret = ROUND_UP(NAME_OFFSET(dirent)+i+1); + break; + } + } + de = (struct ext_dir_entry *) ((char *) de + + de->rec_len); + } + brelse(bh); + } + return ret; +} diff --git a/fs/ext/file.c b/fs/ext/file.c new file mode 100644 index 000000000..f32cdd898 --- /dev/null +++ b/fs/ext/file.c @@ -0,0 +1,258 @@ +/* + * linux/fs/ext/file.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#include <linux/fs.h> +#include <linux/ext_fs.h> + +static int ext_file_read(struct inode *, struct file *, char *, int); +static int ext_file_write(struct inode *, struct file *, char *, int); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the ext filesystem. + */ +static struct file_operations ext_file_operations = { + NULL, /* lseek - default */ + ext_file_read, /* read */ + ext_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + ext_sync_file /* fsync */ +}; + +struct inode_operations ext_file_inode_operations = { + &ext_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + ext_bmap, /* bmap */ + ext_truncate, /* truncate */ + NULL /* permission */ +}; + +static int ext_file_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + int read,left,chars; + int block, blocks, offset; + int bhrequest, uptodate; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + unsigned int size; + + if (!inode) { + printk("ext_file_read: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("ext_file_read: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + offset = filp->f_pos; + size = inode->i_size; + if (offset > size) + left = 0; + else + left = size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = offset >> BLOCK_SIZE_BITS; + offset &= BLOCK_SIZE-1; + size = (size + (BLOCK_SIZE-1)) >> BLOCK_SIZE_BITS; + blocks = (left + offset + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; + bhb = bhe = buflist; + if (filp->f_reada) { + if(blocks < read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9)) + blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9); + if (block + blocks > size) + blocks = size - block; + } + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. */ + + do { + bhrequest = 0; + uptodate = 1; + while (blocks) { + --blocks; + *bhb = ext_getblk(inode, block++, 0); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if (uptodate) + break; + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) + ll_rw_block(READ, bhrequest, bhreq); + + do { /* Finish off all I/O that has actually completed */ + if (*bhe) { + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < BLOCK_SIZE - offset) + chars = left; + else + chars = BLOCK_SIZE - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-->0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + if (!read) + return -EIO; + filp->f_reada = 1; + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + return read; +} + +static int ext_file_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + off_t pos; + int written,c; + struct buffer_head * bh; + char * p; + + if (!inode) { + printk("ext_file_write: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("ext_file_write: mode = %07o\n",inode->i_mode); + return -EINVAL; + } +/* + * ok, append may not work when many processes are writing at the same time + * but so what. That way leads to madness anyway. + */ + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else + pos = filp->f_pos; + written = 0; + while (written<count) { + bh = ext_getblk(inode,pos/BLOCK_SIZE,1); + if (!bh) { + if (!written) + written = -ENOSPC; + break; + } + c = BLOCK_SIZE - (pos % BLOCK_SIZE); + if (c > count-written) + c = count-written; + if (c != BLOCK_SIZE && !bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + if (!written) + written = -EIO; + break; + } + } + p = (pos % BLOCK_SIZE) + bh->b_data; + pos += c; + if (pos > inode->i_size) { + inode->i_size = pos; + inode->i_dirt = 1; + } + written += c; + memcpy_fromfs(p,buf,c); + buf += c; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + brelse(bh); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + filp->f_pos = pos; + inode->i_dirt = 1; + return written; +} diff --git a/fs/ext/freelists.c b/fs/ext/freelists.c new file mode 100644 index 000000000..29c4c4289 --- /dev/null +++ b/fs/ext/freelists.c @@ -0,0 +1,341 @@ +/* + * linux/fs/ext/freelists.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + */ + +/* freelists.c contains the code that handles the inode and block free lists */ + + +/* + + The free blocks are managed by a linked list. The super block contains the + number of the first free block. This block contains 254 numbers of other + free blocks and the number of the next block in the list. + + When an ext fs is mounted, the number of the first free block is stored + in s->u.ext_sb.s_firstfreeblocknumber and the block header is stored in + s->u.ext_sb.s_firstfreeblock. u.ext_sb.s_freeblockscount contains the count + of free blocks. + + The free inodes are also managed by a linked list in a similar way. The + super block contains the number of the first free inode. This inode contains + 14 numbers of other free inodes and the number of the next inode in the list. + + The number of the first free inode is stored in + s->u.ext_sb.s_firstfreeinodenumber and the header of the block containing + the inode is stored in s->u.ext_sb.s_firstfreeinodeblock. + u.ext_sb.s_freeinodescount contains the count of free inodes. + +*/ + +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/stat.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/locks.h> + +void ext_free_block(struct super_block * sb, int block) +{ + struct buffer_head * bh; + struct ext_free_block * efb; + + if (!sb) { + printk("trying to free block on non-existent device\n"); + return; + } + lock_super (sb); + if (block < sb->u.ext_sb.s_firstdatazone || + block >= sb->u.ext_sb.s_nzones) { + printk("trying to free block not in datazone\n"); + return; + } + bh = get_hash_table(sb->s_dev, block, sb->s_blocksize); + if (bh) + bh->b_dirt=0; + brelse(bh); + if (sb->u.ext_sb.s_firstfreeblock) + efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; + if (!sb->u.ext_sb.s_firstfreeblock || efb->count == 254) { +#ifdef EXTFS_DEBUG +printk("ext_free_block: block full, skipping to %d\n", block); +#endif + if (sb->u.ext_sb.s_firstfreeblock) + brelse (sb->u.ext_sb.s_firstfreeblock); + if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev, + block, sb->s_blocksize))) + panic ("ext_free_block: unable to read block to free\n"); + efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; + efb->next = sb->u.ext_sb.s_firstfreeblocknumber; + efb->count = 0; + sb->u.ext_sb.s_firstfreeblocknumber = block; + } else { + efb->free[efb->count++] = block; + } + sb->u.ext_sb.s_freeblockscount ++; + sb->s_dirt = 1; + mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1); + unlock_super (sb); + return; +} + +int ext_new_block(struct super_block * sb) +{ + struct buffer_head * bh; + struct ext_free_block * efb; + int j; + + if (!sb) { + printk("trying to get new block from non-existent device\n"); + return 0; + } + if (!sb->u.ext_sb.s_firstfreeblock) + return 0; + lock_super (sb); + efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; + if (efb->count) { + j = efb->free[--efb->count]; + mark_buffer_dirty(sb->u.ext_sb.s_firstfreeblock, 1); + } else { +#ifdef EXTFS_DEBUG +printk("ext_new_block: block empty, skipping to %d\n", efb->next); +#endif + j = sb->u.ext_sb.s_firstfreeblocknumber; + sb->u.ext_sb.s_firstfreeblocknumber = efb->next; + brelse (sb->u.ext_sb.s_firstfreeblock); + if (!sb->u.ext_sb.s_firstfreeblocknumber) { + sb->u.ext_sb.s_firstfreeblock = NULL; + } else { + if (!(sb->u.ext_sb.s_firstfreeblock = bread (sb->s_dev, + sb->u.ext_sb.s_firstfreeblocknumber, + sb->s_blocksize))) + panic ("ext_new_block: unable to read next free block\n"); + } + } + if (j < sb->u.ext_sb.s_firstdatazone || j > sb->u.ext_sb.s_nzones) { + printk ("ext_new_block: blk = %d\n", j); + printk("allocating block not in data zone\n"); + return 0; + } + sb->u.ext_sb.s_freeblockscount --; + sb->s_dirt = 1; + + if (!(bh=getblk(sb->s_dev, j, sb->s_blocksize))) { + printk("new_block: cannot get block"); + return 0; + } + memset(bh->b_data, 0, BLOCK_SIZE); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse(bh); +#ifdef EXTFS_DEBUG +printk("ext_new_block: allocating block %d\n", j); +#endif + unlock_super (sb); + return j; +} + +unsigned long ext_count_free_blocks(struct super_block *sb) +{ +#ifdef EXTFS_DEBUG + struct buffer_head * bh; + struct ext_free_block * efb; + unsigned long count, block; + + lock_super (sb); + if (!sb->u.ext_sb.s_firstfreeblock) + count = 0; + else { + efb = (struct ext_free_block *) sb->u.ext_sb.s_firstfreeblock->b_data; + count = efb->count + 1; + block = efb->next; + while (block) { + if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) { + printk ("ext_count_free: error while reading free blocks list\n"); + block = 0; + } else { + efb = (struct ext_free_block *) bh->b_data; + count += efb->count + 1; + block = efb->next; + brelse (bh); + } + } + } +printk("ext_count_free_blocks: stored = %d, computed = %d\n", + sb->u.ext_sb.s_freeblockscount, count); + unlock_super (sb); + return count; +#else + return sb->u.ext_sb.s_freeblockscount; +#endif +} + +void ext_free_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct ext_free_inode * efi; + struct super_block * sb; + unsigned long block; + unsigned long ino; + dev_t dev; + + if (!inode) + return; + if (!inode->i_dev) { + printk("free_inode: inode has no device\n"); + return; + } + if (inode->i_count != 1) { + printk("free_inode: inode has count=%d\n",inode->i_count); + return; + } + if (inode->i_nlink) { + printk("free_inode: inode has nlink=%d\n",inode->i_nlink); + return; + } + if (!inode->i_sb) { + printk("free_inode: inode on non-existent device\n"); + return; + } + sb = inode->i_sb; + ino = inode->i_ino; + dev = inode->i_dev; + clear_inode(inode); + lock_super (sb); + if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) { + printk("free_inode: inode 0 or non-existent inode\n"); + unlock_super (sb); + return; + } + if (sb->u.ext_sb.s_firstfreeinodeblock) + efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + + (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK; + if (!sb->u.ext_sb.s_firstfreeinodeblock || efi->count == 14) { +#ifdef EXTFS_DEBUG +printk("ext_free_inode: inode full, skipping to %d\n", ino); +#endif + if (sb->u.ext_sb.s_firstfreeinodeblock) + brelse (sb->u.ext_sb.s_firstfreeinodeblock); + block = 2 + (ino - 1) / EXT_INODES_PER_BLOCK; + if (!(bh = bread(dev, block, sb->s_blocksize))) + panic("ext_free_inode: unable to read inode block\n"); + efi = ((struct ext_free_inode *) bh->b_data) + + (ino - 1) % EXT_INODES_PER_BLOCK; + efi->next = sb->u.ext_sb.s_firstfreeinodenumber; + efi->count = 0; + sb->u.ext_sb.s_firstfreeinodenumber = ino; + sb->u.ext_sb.s_firstfreeinodeblock = bh; + } else { + efi->free[efi->count++] = ino; + } + sb->u.ext_sb.s_freeinodescount ++; + sb->s_dirt = 1; + mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1); + unlock_super (sb); +} + +struct inode * ext_new_inode(const struct inode * dir) +{ + struct super_block * sb; + struct inode * inode; + struct ext_free_inode * efi; + unsigned long block; + int j; + + if (!dir || !(inode=get_empty_inode())) + return NULL; + sb = dir->i_sb; + inode->i_sb = sb; + inode->i_flags = sb->s_flags; + if (!sb->u.ext_sb.s_firstfreeinodeblock) + return 0; + lock_super (sb); + efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + + (sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK; + if (efi->count) { + j = efi->free[--efi->count]; + mark_buffer_dirty(sb->u.ext_sb.s_firstfreeinodeblock, 1); + } else { +#ifdef EXTFS_DEBUG +printk("ext_free_inode: inode empty, skipping to %d\n", efi->next); +#endif + j = sb->u.ext_sb.s_firstfreeinodenumber; + if (efi->next > sb->u.ext_sb.s_ninodes) { + printk ("efi->next = %ld\n", efi->next); + panic ("ext_new_inode: bad inode number in free list\n"); + } + sb->u.ext_sb.s_firstfreeinodenumber = efi->next; + block = 2 + (((unsigned long) efi->next) - 1) / EXT_INODES_PER_BLOCK; + brelse (sb->u.ext_sb.s_firstfreeinodeblock); + if (!sb->u.ext_sb.s_firstfreeinodenumber) { + sb->u.ext_sb.s_firstfreeinodeblock = NULL; + } else { + if (!(sb->u.ext_sb.s_firstfreeinodeblock = + bread(sb->s_dev, block, sb->s_blocksize))) + panic ("ext_new_inode: unable to read next free inode block\n"); + } + } + sb->u.ext_sb.s_freeinodescount --; + sb->s_dirt = 1; + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_dev = sb->s_dev; + inode->i_uid = current->fsuid; + inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; + inode->i_dirt = 1; + inode->i_ino = j; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = NULL; + inode->i_blocks = inode->i_blksize = 0; + insert_inode_hash(inode); +#ifdef EXTFS_DEBUG +printk("ext_new_inode : allocating inode %d\n", inode->i_ino); +#endif + unlock_super (sb); + return inode; +} + +unsigned long ext_count_free_inodes(struct super_block *sb) +{ +#ifdef EXTFS_DEBUG + struct buffer_head * bh; + struct ext_free_inode * efi; + unsigned long count, block, ino; + + lock_super (sb); + if (!sb->u.ext_sb.s_firstfreeinodeblock) + count = 0; + else { + efi = ((struct ext_free_inode *) sb->u.ext_sb.s_firstfreeinodeblock->b_data) + + ((sb->u.ext_sb.s_firstfreeinodenumber-1)%EXT_INODES_PER_BLOCK); + count = efi->count + 1; + ino = efi->next; + while (ino) { + if (ino < 1 || ino > sb->u.ext_sb.s_ninodes) { + printk ("u.ext_sb.s_firstfreeinodenumber = %d, ino = %d\n", + (int) sb->u.ext_sb.s_firstfreeinodenumber,ino); + panic ("ext_count_fre_inodes: bad inode number in free list\n"); + } + block = 2 + ((ino - 1) / EXT_INODES_PER_BLOCK); + if (!(bh = bread (sb->s_dev, block, sb->s_blocksize))) { + printk ("ext_count_free_inodes: error while reading free inodes list\n"); + block = 0; + } else { + efi = ((struct ext_free_inode *) bh->b_data) + + ((ino - 1) % EXT_INODES_PER_BLOCK); + count += efi->count + 1; + ino = efi->next; + brelse (bh); + } + } + } +printk("ext_count_free_inodes: stored = %d, computed = %d\n", + sb->u.ext_sb.s_freeinodescount, count); + unlock_super (sb); + return count; +#else + return sb->u.ext_sb.s_freeinodescount; +#endif +} diff --git a/fs/ext/fsync.c b/fs/ext/fsync.c new file mode 100644 index 000000000..bb20383cc --- /dev/null +++ b/fs/ext/fsync.c @@ -0,0 +1,185 @@ + +/* + * linux/fs/ext/fsync.c + * + * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * from + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * from + * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds + * + * extfs fsync primitive + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/locks.h> + +#include <linux/fs.h> +#include <linux/ext_fs.h> + + +#define blocksize BLOCK_SIZE +#define addr_per_block 256 + +static int sync_block (struct inode * inode, unsigned long * block, int wait) +{ + struct buffer_head * bh; + int tmp; + + if (!*block) + return 0; + tmp = *block; + bh = get_hash_table(inode->i_dev, *block, blocksize); + if (!bh) + return 0; + if (*block != tmp) { + brelse (bh); + return 1; + } + if (wait && bh->b_req && !bh->b_uptodate) { + brelse(bh); + return -1; + } + if (wait || !bh->b_uptodate || !bh->b_dirt) + { + brelse(bh); + return 0; + } + ll_rw_block(WRITE, 1, &bh); + bh->b_count--; + return 0; +} + +static int sync_iblock (struct inode * inode, unsigned long * iblock, + struct buffer_head **bh, int wait) +{ + int rc, tmp; + + *bh = NULL; + tmp = *iblock; + if (!tmp) + return 0; + rc = sync_block (inode, iblock, wait); + if (rc) + return rc; + *bh = bread(inode->i_dev, tmp, blocksize); + if (tmp != *iblock) { + brelse(*bh); + *bh = NULL; + return 1; + } + if (!*bh) + return -1; + return 0; +} + + +static int sync_direct(struct inode *inode, int wait) +{ + int i; + int rc, err = 0; + + for (i = 0; i < 9; i++) { + rc = sync_block (inode, inode->u.ext_i.i_data + i, wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + return err; +} + +static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait) +{ + int i; + struct buffer_head * ind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, iblock, &ind_bh, wait); + if (rc || !ind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_block (inode, + ((unsigned long *) ind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(ind_bh); + return err; +} + +static int sync_dindirect(struct inode *inode, unsigned long *diblock, + int wait) +{ + int i; + struct buffer_head * dind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, diblock, &dind_bh, wait); + if (rc || !dind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_indirect (inode, + ((unsigned long *) dind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(dind_bh); + return err; +} + +static int sync_tindirect(struct inode *inode, unsigned long *tiblock, + int wait) +{ + int i; + struct buffer_head * tind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, tiblock, &tind_bh, wait); + if (rc || !tind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_dindirect (inode, + ((unsigned long *) tind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(tind_bh); + return err; +} + +int ext_sync_file(struct inode * inode, struct file *file) +{ + int wait, err = 0; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + for (wait=0; wait<=1; wait++) + { + err |= sync_direct(inode, wait); + err |= sync_indirect(inode, inode->u.ext_i.i_data+9, wait); + err |= sync_dindirect(inode, inode->u.ext_i.i_data+10, wait); + err |= sync_tindirect(inode, inode->u.ext_i.i_data+11, wait); + } + err |= ext_sync_inode (inode); + return (err < 0) ? -EIO : 0; +} diff --git a/fs/ext/inode.c b/fs/ext/inode.c new file mode 100644 index 000000000..b3ca2e2cf --- /dev/null +++ b/fs/ext/inode.c @@ -0,0 +1,444 @@ +/* + * linux/fs/ext/inode.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#include <asm/system.h> +#include <asm/segment.h> + +void ext_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; + ext_truncate(inode); + ext_free_inode(inode); +} + +void ext_put_super(struct super_block *sb) +{ + + lock_super(sb); + sb->s_dev = 0; + if (sb->u.ext_sb.s_firstfreeinodeblock) + brelse (sb->u.ext_sb.s_firstfreeinodeblock); + if (sb->u.ext_sb.s_firstfreeblock) + brelse (sb->u.ext_sb.s_firstfreeblock); + unlock_super(sb); + return; +} + +static struct super_operations ext_sops = { + ext_read_inode, + NULL, + ext_write_inode, + ext_put_inode, + ext_put_super, + ext_write_super, + ext_statfs, + NULL +}; + +struct super_block *ext_read_super(struct super_block *s,void *data, + int silent) +{ + struct buffer_head *bh; + struct ext_super_block *es; + int dev = s->s_dev,block; + + lock_super(s); + set_blocksize(dev, BLOCK_SIZE); + if (!(bh = bread(dev, 1, BLOCK_SIZE))) { + s->s_dev=0; + unlock_super(s); + printk("EXT-fs: unable to read superblock\n"); + return NULL; + } + es = (struct ext_super_block *) bh->b_data; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->u.ext_sb.s_ninodes = es->s_ninodes; + s->u.ext_sb.s_nzones = es->s_nzones; + s->u.ext_sb.s_firstdatazone = es->s_firstdatazone; + s->u.ext_sb.s_log_zone_size = es->s_log_zone_size; + s->u.ext_sb.s_max_size = es->s_max_size; + s->s_magic = es->s_magic; + s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock; + s->u.ext_sb.s_freeblockscount = es->s_freeblockscount; + s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode; + s->u.ext_sb.s_freeinodescount = es->s_freeinodescount; + brelse(bh); + if (s->s_magic != EXT_SUPER_MAGIC) { + s->s_dev = 0; + unlock_super(s); + if (!silent) + printk("VFS: Can't find an extfs filesystem on dev 0x%04x.\n", + dev); + return NULL; + } + if (!s->u.ext_sb.s_firstfreeblocknumber) + s->u.ext_sb.s_firstfreeblock = NULL; + else + if (!(s->u.ext_sb.s_firstfreeblock = bread(dev, + s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) { + printk("ext_read_super: unable to read first free block\n"); + s->s_dev = 0; + unlock_super(s); + return NULL; + } + if (!s->u.ext_sb.s_firstfreeinodenumber) + s->u.ext_sb.s_firstfreeinodeblock = NULL; + else { + block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK; + if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) { + printk("ext_read_super: unable to read first free inode block\n"); + brelse(s->u.ext_sb.s_firstfreeblock); + s->s_dev = 0; + unlock_super (s); + return NULL; + } + } + unlock_super(s); + /* set up enough so that it can read an inode */ + s->s_dev = dev; + s->s_op = &ext_sops; + if (!(s->s_mounted = iget(s,EXT_ROOT_INO))) { + s->s_dev=0; + printk("EXT-fs: get root inode failed\n"); + return NULL; + } + return s; +} + +void ext_write_super (struct super_block *sb) +{ + struct buffer_head * bh; + struct ext_super_block * es; + + if (!(bh = bread(sb->s_dev, 1, BLOCK_SIZE))) { + printk ("ext_write_super: bread failed\n"); + return; + } + es = (struct ext_super_block *) bh->b_data; + es->s_firstfreeblock = sb->u.ext_sb.s_firstfreeblocknumber; + es->s_freeblockscount = sb->u.ext_sb.s_freeblockscount; + es->s_firstfreeinode = sb->u.ext_sb.s_firstfreeinodenumber; + es->s_freeinodescount = sb->u.ext_sb.s_freeinodescount; + mark_buffer_dirty(bh, 1); + brelse (bh); + sb->s_dirt = 0; +} + +void ext_statfs (struct super_block *sb, struct statfs *buf) +{ + long tmp; + + put_fs_long(EXT_SUPER_MAGIC, &buf->f_type); + put_fs_long(1024, &buf->f_bsize); + put_fs_long(sb->u.ext_sb.s_nzones << sb->u.ext_sb.s_log_zone_size, + &buf->f_blocks); + tmp = ext_count_free_blocks(sb); + put_fs_long(tmp, &buf->f_bfree); + put_fs_long(tmp, &buf->f_bavail); + put_fs_long(sb->u.ext_sb.s_ninodes, &buf->f_files); + put_fs_long(ext_count_free_inodes(sb), &buf->f_ffree); + put_fs_long(EXT_NAME_LEN, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ +} + +#define inode_bmap(inode,nr) ((inode)->u.ext_i.i_data[(nr)]) + +static int block_bmap(struct buffer_head * bh, int nr) +{ + int tmp; + + if (!bh) + return 0; + tmp = ((unsigned long *) bh->b_data)[nr]; + brelse(bh); + return tmp; +} + +int ext_bmap(struct inode * inode,int block) +{ + int i; + + if (block<0) { + printk("ext_bmap: block<0"); + return 0; + } + if (block >= 9+256+256*256+256*256*256) { + printk("ext_bmap: block>big"); + return 0; + } + if (block<9) + return inode_bmap(inode,block); + block -= 9; + if (block<256) { + i = inode_bmap(inode,9); + if (!i) + return 0; + return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block); + } + block -= 256; + if (block<256*256) { + i = inode_bmap(inode,10); + if (!i) + return 0; + i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>8); + if (!i) + return 0; + return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255); + } + block -= 256*256; + i = inode_bmap(inode,11); + if (!i) + return 0; + i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>16); + if (!i) + return 0; + i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),(block>>8) & 255); + if (!i) + return 0; + return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 255); +} + +static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create) +{ + int tmp; + unsigned long * p; + struct buffer_head * result; + + p = inode->u.ext_i.i_data + nr; +repeat: + tmp = *p; + if (tmp) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } + if (!create) + return NULL; + tmp = ext_new_block(inode->i_sb); + if (!tmp) + return NULL; + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (*p) { + ext_free_block(inode->i_sb,tmp); + brelse(result); + goto repeat; + } + *p = tmp; + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + return result; +} + +static struct buffer_head * block_getblk(struct inode * inode, + struct buffer_head * bh, int nr, int create) +{ + int tmp; + unsigned long * p; + struct buffer_head * result; + + if (!bh) + return NULL; + if (!bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + return NULL; + } + } + p = nr + (unsigned long *) bh->b_data; +repeat: + tmp = *p; + if (tmp) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } + if (!create) { + brelse(bh); + return NULL; + } + tmp = ext_new_block(inode->i_sb); + if (!tmp) { + brelse(bh); + return NULL; + } + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (*p) { + ext_free_block(inode->i_sb,tmp); + brelse(result); + goto repeat; + } + *p = tmp; + mark_buffer_dirty(bh, 1); + brelse(bh); + return result; +} + +struct buffer_head * ext_getblk(struct inode * inode, int block, int create) +{ + struct buffer_head * bh; + + if (block<0) { + printk("ext_getblk: block<0\n"); + return NULL; + } + if (block >= 9+256+256*256+256*256*256) { + printk("ext_getblk: block>big\n"); + return NULL; + } + if (block<9) + return inode_getblk(inode,block,create); + block -= 9; + if (block<256) { + bh = inode_getblk(inode,9,create); + return block_getblk(inode,bh,block,create); + } + block -= 256; + if (block<256*256) { + bh = inode_getblk(inode,10,create); + bh = block_getblk(inode,bh,block>>8,create); + return block_getblk(inode,bh,block & 255,create); + } + block -= 256*256; + bh = inode_getblk(inode,11,create); + bh = block_getblk(inode,bh,block>>16,create); + bh = block_getblk(inode,bh,(block>>8) & 255,create); + return block_getblk(inode,bh,block & 255,create); +} + +struct buffer_head * ext_bread(struct inode * inode, int block, int create) +{ + struct buffer_head * bh; + + bh = ext_getblk(inode,block,create); + if (!bh || bh->b_uptodate) + return bh; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + +void ext_read_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct ext_inode * raw_inode; + int block; + + block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK; + if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) + panic("unable to read i-node block"); + raw_inode = ((struct ext_inode *) bh->b_data) + + (inode->i_ino-1)%EXT_INODES_PER_BLOCK; + inode->i_mode = raw_inode->i_mode; + inode->i_uid = raw_inode->i_uid; + inode->i_gid = raw_inode->i_gid; + inode->i_nlink = raw_inode->i_nlinks; + inode->i_size = raw_inode->i_size; + inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time; + inode->i_blocks = inode->i_blksize = 0; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = raw_inode->i_zone[0]; + else for (block = 0; block < 12; block++) + inode->u.ext_i.i_data[block] = raw_inode->i_zone[block]; + brelse(bh); + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &ext_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &ext_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &ext_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); +} + +static struct buffer_head * ext_update_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct ext_inode * raw_inode; + int block; + + block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK; + if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) + panic("unable to read i-node block"); + raw_inode = ((struct ext_inode *)bh->b_data) + + (inode->i_ino-1)%EXT_INODES_PER_BLOCK; + raw_inode->i_mode = inode->i_mode; + raw_inode->i_uid = inode->i_uid; + raw_inode->i_gid = inode->i_gid; + raw_inode->i_nlinks = inode->i_nlink; + raw_inode->i_size = inode->i_size; + raw_inode->i_time = inode->i_mtime; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_zone[0] = inode->i_rdev; + else for (block = 0; block < 12; block++) + raw_inode->i_zone[block] = inode->u.ext_i.i_data[block]; + mark_buffer_dirty(bh, 1); + inode->i_dirt=0; + return bh; +} + +void ext_write_inode(struct inode * inode) +{ + struct buffer_head *bh; + bh = ext_update_inode (inode); + brelse(bh); +} + +int ext_sync_inode (struct inode *inode) +{ + int err = 0; + struct buffer_head *bh; + + bh = ext_update_inode(inode); + if (bh && bh->b_dirt) + { + ll_rw_block(WRITE, 1, &bh); + wait_on_buffer(bh); + if (bh->b_req && !bh->b_uptodate) + { + printk ("IO error syncing ext inode [%04x:%08lx]\n", + inode->i_dev, inode->i_ino); + err = -1; + } + } + else if (!bh) + err = -1; + brelse (bh); + return err; +} + diff --git a/fs/ext/namei.c b/fs/ext/namei.c new file mode 100644 index 000000000..85a411e94 --- /dev/null +++ b/fs/ext/namei.c @@ -0,0 +1,893 @@ +/* + * linux/fs/ext/namei.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/errno.h> + +#include <asm/segment.h> + +/* + * comment out this line if you want names > EXT_NAME_LEN chars to be + * truncated. Else they will be disallowed. + */ +/* #define NO_TRUNCATE */ + +/* + * EXT_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a power of 2 and must be greater or equal than 8 + * because a directory entry needs 8 bytes for its fixed part + * (4 bytes for the inode, 2 bytes for the entry length and 2 bytes + * for the name length) + */ +#define EXT_DIR_PAD 8 + +/* + * + * EXT_DIR_MIN_SIZE is the minimal size of a directory entry + * + * During allocations, a directory entry is split into 2 ones + * *ONLY* if the size of the unused part is greater than or + * equal to EXT_DIR_MIN_SIZE + */ +#define EXT_DIR_MIN_SIZE 12 + +/* + * ok, we cannot use strncmp, as the name is not in our data space. + * Thus we'll have to use ext_match. No big problem. Match also makes + * some sanity tests. + * + * NOTE! unlike strncmp, ext_match returns 1 for success, 0 for failure. + */ +static int ext_match(int len,const char * name,struct ext_dir_entry * de) +{ + if (!de || !de->inode || len > EXT_NAME_LEN) + return 0; + /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ + if (!len && (de->name[0]=='.') && (de->name[1]=='\0')) + return 1; + if (len != de->name_len) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * ext_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + * + * addition for the ext file system : this function returns the previous + * and next directory entries in the parameters prev_dir and next_dir + */ +static struct buffer_head * ext_find_entry(struct inode * dir, + const char * name, int namelen, struct ext_dir_entry ** res_dir, + struct ext_dir_entry ** prev_dir, struct ext_dir_entry ** next_dir) +{ + long offset; + struct buffer_head * bh; + struct ext_dir_entry * de; + + *res_dir = NULL; + if (!dir) + return NULL; +#ifdef NO_TRUNCATE + if (namelen > EXT_NAME_LEN) + return NULL; +#else + if (namelen > EXT_NAME_LEN) + namelen = EXT_NAME_LEN; +#endif + bh = ext_bread(dir,0,0); + if (!bh) + return NULL; + if (prev_dir) + *prev_dir = NULL; + if (next_dir) + *next_dir = NULL; + offset = 0; + de = (struct ext_dir_entry *) bh->b_data; + while (offset < dir->i_size) { + if ((char *)de >= BLOCK_SIZE+bh->b_data) { + brelse(bh); + bh = NULL; + bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0); + if (!bh) + continue; + de = (struct ext_dir_entry *) bh->b_data; + if (prev_dir) + *prev_dir = NULL; + } + if (de->rec_len < 8 || de->rec_len % 8 != 0 || + de->rec_len < de->name_len + 8 || + (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) { + printk ("ext_find_entry: bad dir entry\n"); + printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n", + dir->i_dev, dir->i_ino, offset, de->rec_len, de->name_len); + de = (struct ext_dir_entry *) (bh->b_data+BLOCK_SIZE); + offset = ((offset / BLOCK_SIZE) + 1) * BLOCK_SIZE; + continue; +/* brelse (bh); + return NULL; */ + } + if (ext_match(namelen,name,de)) { + *res_dir = de; + if (next_dir) + if (offset + de->rec_len < dir->i_size && + ((char *)de) + de->rec_len < BLOCK_SIZE+bh->b_data) + *next_dir = (struct ext_dir_entry *) + ((char *) de + de->rec_len); + else + *next_dir = NULL; + return bh; + } + offset += de->rec_len; + if (prev_dir) + *prev_dir = de; + de = (struct ext_dir_entry *) ((char *) de + de->rec_len); + } + brelse(bh); + return NULL; +} + +int ext_lookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + int ino; + struct ext_dir_entry * de; + struct buffer_head * bh; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + if (!(bh = ext_find_entry(dir,name,len,&de,NULL,NULL))) { + iput(dir); + return -ENOENT; + } + ino = de->inode; + brelse(bh); + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -EACCES; + } + iput(dir); + return 0; +} + +/* + * ext_add_entry() + * + * adds a file entry to the specified directory, using the same + * semantics as ext_find_entry(). It returns NULL if it failed. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static struct buffer_head * ext_add_entry(struct inode * dir, + const char * name, int namelen, struct ext_dir_entry ** res_dir) +{ + int i; + long offset; + unsigned short rec_len; + struct buffer_head * bh; + struct ext_dir_entry * de, * de1; + + *res_dir = NULL; + if (!dir) + return NULL; +#ifdef NO_TRUNCATE + if (namelen > EXT_NAME_LEN) + return NULL; +#else + if (namelen > EXT_NAME_LEN) + namelen = EXT_NAME_LEN; +#endif + if (!namelen) + return NULL; + bh = ext_bread(dir,0,0); + if (!bh) + return NULL; + rec_len = ((8 + namelen + EXT_DIR_PAD - 1) / EXT_DIR_PAD) * EXT_DIR_PAD; + offset = 0; + de = (struct ext_dir_entry *) bh->b_data; + while (1) { + if ((char *)de >= BLOCK_SIZE+bh->b_data && offset < dir->i_size) { +#ifdef EXTFS_DEBUG +printk ("ext_add_entry: skipping to next block\n"); +#endif + brelse(bh); + bh = NULL; + bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,0); + if (!bh) + return NULL; + de = (struct ext_dir_entry *) bh->b_data; + } + if (offset >= dir->i_size) { + /* Check that the directory entry fits in the block */ + if (offset % BLOCK_SIZE == 0 || + (BLOCK_SIZE - (offset % BLOCK_SIZE)) < rec_len) { + if ((offset % BLOCK_SIZE) != 0) { + /* If the entry does not fit in the + block, the remainder of the block + becomes an unused entry */ + de->inode = 0; + de->rec_len = BLOCK_SIZE + - (offset & (BLOCK_SIZE - 1)); + de->name_len = 0; + offset += de->rec_len; + dir->i_size += de->rec_len; + dir->i_dirt = 1; +#if 0 + dir->i_ctime = CURRENT_TIME; +#endif + mark_buffer_dirty(bh, 1); + } + brelse (bh); + bh = NULL; +#ifdef EXTFS_DEBUG +printk ("ext_add_entry : creating next block\n"); +#endif + bh = ext_bread(dir,offset>>BLOCK_SIZE_BITS,1); + if (!bh) + return NULL; /* Other thing to do ??? */ + de = (struct ext_dir_entry *) bh->b_data; + } + /* Allocate the entry */ + de->inode=0; + de->rec_len = rec_len; + dir->i_size += de->rec_len; + dir->i_dirt = 1; +#if 0 + dir->i_ctime = CURRENT_TIME; +#endif + } + if (de->rec_len < 8 || de->rec_len % 4 != 0 || + de->rec_len < de->name_len + 8 || + (((char *) de) + de->rec_len-1 >= BLOCK_SIZE+bh->b_data)) { + printk ("ext_addr_entry: bad dir entry\n"); + printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n", + dir->i_dev, dir->i_ino, offset, de->rec_len, de->name_len); + brelse (bh); + return NULL; + } + if (!de->inode && de->rec_len >= rec_len) { + if (de->rec_len > rec_len + && de->rec_len - rec_len >= EXT_DIR_MIN_SIZE) { + /* The found entry is too big : it is split + into 2 ones : + - the 1st one will be used to hold the name, + - the 2nd one is unused */ + de1 = (struct ext_dir_entry *) ((char *) de + rec_len); + de1->inode = 0; + de1->rec_len = de->rec_len - rec_len; + de1->name_len = 0; + de->rec_len = rec_len; + } + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + de->name_len = namelen; + for (i=0; i < namelen ; i++) + de->name[i] = name[i]; + mark_buffer_dirty(bh, 1); + *res_dir = de; + return bh; + } + offset += de->rec_len; + de = (struct ext_dir_entry *) ((char *) de + de->rec_len); + } + brelse(bh); + return NULL; +} + +int ext_create(struct inode * dir,const char * name, int len, int mode, + struct inode ** result) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext_dir_entry * de; + + *result = NULL; + if (!dir) + return -ENOENT; + inode = ext_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &ext_file_inode_operations; + inode->i_mode = mode; + inode->i_dirt = 1; + bh = ext_add_entry(dir,name,len,&de); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + *result = inode; + return 0; +} + +int ext_mknod(struct inode * dir, const char * name, int len, int mode, int rdev) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext_dir_entry * de; + + if (!dir) + return -ENOENT; + bh = ext_find_entry(dir,name,len,&de,NULL,NULL); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + inode = ext_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_uid = current->fsuid; + inode->i_mode = mode; + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &ext_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ext_dir_inode_operations; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + } + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &ext_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (S_ISBLK(mode) || S_ISCHR(mode)) + inode->i_rdev = rdev; +#if 0 + inode->i_mtime = inode->i_atime = CURRENT_TIME; +#endif + inode->i_dirt = 1; + bh = ext_add_entry(dir,name,len,&de); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int ext_mkdir(struct inode * dir, const char * name, int len, int mode) +{ + struct inode * inode; + struct buffer_head * bh, *dir_block; + struct ext_dir_entry * de; + + bh = ext_find_entry(dir,name,len,&de,NULL,NULL); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + inode = ext_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &ext_dir_inode_operations; + inode->i_size = 2 * 16; /* Each entry is coded on 16 bytes for "." and ".." + - 4 bytes for the inode number, + - 2 bytes for the record length + - 2 bytes for the name length + - 8 bytes for the name */ +#if 0 + inode->i_mtime = inode->i_atime = CURRENT_TIME; +#endif + dir_block = ext_bread(inode,0,1); + if (!dir_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + de = (struct ext_dir_entry *) dir_block->b_data; + de->inode=inode->i_ino; + de->rec_len=16; + de->name_len=1; + strcpy(de->name,"."); + de = (struct ext_dir_entry *) ((char *) de + de->rec_len); + de->inode = dir->i_ino; + de->rec_len=16; + de->name_len=2; + strcpy(de->name,".."); + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse(dir_block); + inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask); + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + inode->i_dirt = 1; + bh = ext_add_entry(dir,name,len,&de); + if (!bh) { + iput(dir); + inode->i_nlink=0; + iput(inode); + return -ENOSPC; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + dir->i_nlink++; + dir->i_dirt = 1; + iput(dir); + iput(inode); + brelse(bh); + return 0; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir(struct inode * inode) +{ + unsigned long offset; + struct buffer_head * bh; + struct ext_dir_entry * de, * de1; + + if (inode->i_size < 2 * 12 || !(bh = ext_bread(inode,0,0))) { + printk("warning - bad directory on dev %04x\n",inode->i_dev); + return 1; + } + de = (struct ext_dir_entry *) bh->b_data; + de1 = (struct ext_dir_entry *) ((char *) de + de->rec_len); + if (de->inode != inode->i_ino || !de1->inode || + strcmp(".",de->name) || strcmp("..",de1->name)) { + printk("warning - bad directory on dev %04x\n",inode->i_dev); + return 1; + } + offset = de->rec_len + de1->rec_len; + de = (struct ext_dir_entry *) ((char *) de1 + de1->rec_len); + while (offset < inode->i_size ) { + if ((void *) de >= (void *) (bh->b_data+BLOCK_SIZE)) { + brelse(bh); + bh = ext_bread(inode, offset >> BLOCK_SIZE_BITS,1); + if (!bh) { + offset += BLOCK_SIZE; + continue; + } + de = (struct ext_dir_entry *) bh->b_data; + } + if (de->rec_len < 8 || de->rec_len %4 != 0 || + de->rec_len < de->name_len + 8) { + printk ("empty_dir: bad dir entry\n"); + printk ("dev=%d, dir=%ld, offset=%ld, rec_len=%d, name_len=%d\n", + inode->i_dev, inode->i_ino, offset, de->rec_len, de->name_len); + brelse (bh); + return 1; + } + if (de->inode) { + brelse(bh); + return 0; + } + offset += de->rec_len; + de = (struct ext_dir_entry *) ((char *) de + de->rec_len); + } + brelse(bh); + return 1; +} + +static inline void ext_merge_entries (struct ext_dir_entry * de, + struct ext_dir_entry * pde, struct ext_dir_entry * nde) +{ + if (nde && !nde->inode) + de->rec_len += nde->rec_len; + if (pde && !pde->inode) + pde->rec_len += de->rec_len; +} + +int ext_rmdir(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext_dir_entry * de, * pde, * nde; + + inode = NULL; + bh = ext_find_entry(dir,name,len,&de,&pde,&nde); + retval = -ENOENT; + if (!bh) + goto end_rmdir; + retval = -EPERM; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_rmdir; + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_rmdir; + if (inode->i_dev != dir->i_dev) + goto end_rmdir; + if (inode == dir) /* we may not delete ".", but "../dir" is ok */ + goto end_rmdir; + if (!S_ISDIR(inode->i_mode)) { + retval = -ENOTDIR; + goto end_rmdir; + } + if (!empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + if (inode->i_count > 1) { + retval = -EBUSY; + goto end_rmdir; + } + if (inode->i_nlink != 2) + printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink); + de->inode = 0; + de->name_len = 0; + ext_merge_entries (de, pde, nde); + mark_buffer_dirty(bh, 1); + inode->i_nlink=0; + inode->i_dirt=1; + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt=1; + retval = 0; +end_rmdir: + iput(dir); + iput(inode); + brelse(bh); + return retval; +} + +int ext_unlink(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext_dir_entry * de, * pde, * nde; + + retval = -ENOENT; + inode = NULL; + bh = ext_find_entry(dir,name,len,&de,&pde,&nde); + if (!bh) + goto end_unlink; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_unlink; + retval = -EPERM; + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_unlink; + if (S_ISDIR(inode->i_mode)) + goto end_unlink; + if (!inode->i_nlink) { + printk("Deleting nonexistent file (%04x:%ld), %d\n", + inode->i_dev,inode->i_ino,inode->i_nlink); + inode->i_nlink=1; + } + de->inode = 0; + de->name_len = 0; + ext_merge_entries (de, pde, nde); + mark_buffer_dirty(bh, 1); + inode->i_nlink--; + inode->i_dirt = 1; + inode->i_ctime = CURRENT_TIME; + dir->i_ctime = dir->i_mtime = inode->i_ctime; + dir->i_dirt = 1; + retval = 0; +end_unlink: + brelse(bh); + iput(inode); + iput(dir); + return retval; +} + +int ext_symlink(struct inode * dir, const char * name, int len, const char * symname) +{ + struct ext_dir_entry * de; + struct inode * inode = NULL; + struct buffer_head * bh = NULL, * name_block = NULL; + int i; + char c; + + if (!(inode = ext_new_inode(dir))) { + iput(dir); + return -ENOSPC; + } + inode->i_mode = S_IFLNK | 0777; + inode->i_op = &ext_symlink_inode_operations; + name_block = ext_bread(inode,0,1); + if (!name_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + i = 0; + while (i < 1023 && (c = *(symname++))) + name_block->b_data[i++] = c; + name_block->b_data[i] = 0; + mark_buffer_dirty(name_block, 1); + brelse(name_block); + inode->i_size = i; + inode->i_dirt = 1; + bh = ext_find_entry(dir,name,len,&de,NULL,NULL); + if (bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + brelse(bh); + iput(dir); + return -EEXIST; + } + bh = ext_add_entry(dir,name,len,&de); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int ext_link(struct inode * oldinode, struct inode * dir, const char * name, int len) +{ + struct ext_dir_entry * de; + struct buffer_head * bh; + + if (S_ISDIR(oldinode->i_mode)) { + iput(oldinode); + iput(dir); + return -EPERM; + } + if (oldinode->i_nlink > 32000) { + iput(oldinode); + iput(dir); + return -EMLINK; + } + bh = ext_find_entry(dir,name,len,&de,NULL,NULL); + if (bh) { + brelse(bh); + iput(dir); + iput(oldinode); + return -EEXIST; + } + bh = ext_add_entry(dir,name,len,&de); + if (!bh) { + iput(dir); + iput(oldinode); + return -ENOSPC; + } + de->inode = oldinode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + oldinode->i_nlink++; + oldinode->i_ctime = CURRENT_TIME; + oldinode->i_dirt = 1; + iput(oldinode); + return 0; +} + +static int subdir(struct inode * new_inode, struct inode * old_inode) +{ + int ino; + int result; + + new_inode->i_count++; + result = 0; + for (;;) { + if (new_inode == old_inode) { + result = 1; + break; + } + if (new_inode->i_dev != old_inode->i_dev) + break; + ino = new_inode->i_ino; + if (ext_lookup(new_inode,"..",2,&new_inode)) + break; + if (new_inode->i_ino == ino) + break; + } + iput(new_inode); + return result; +} + +#define PARENT_INO(buffer) \ +((struct ext_dir_entry *) ((char *) buffer + \ +((struct ext_dir_entry *) buffer)->rec_len))->inode + +#define PARENT_NAME(buffer) \ +((struct ext_dir_entry *) ((char *) buffer + \ +((struct ext_dir_entry *) buffer)->rec_len))->name + +/* + * rename uses retrying to avoid race-conditions: at least they should be minimal. + * it tries to allocate all the blocks, then sanity-checks, and if the sanity- + * checks fail, it tries to restart itself again. Very practical - no changes + * are done until we know everything works ok.. and then all the changes can be + * done in one fell swoop when we have claimed all the buffers needed. + * + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int do_ext_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext_dir_entry * old_de, * new_de, * pde, * nde; + int retval; + + goto start_up; +try_again: + brelse(old_bh); + brelse(new_bh); + brelse(dir_bh); + iput(old_inode); + iput(new_inode); + current->counter = 0; + schedule(); +start_up: + old_inode = new_inode = NULL; + old_bh = new_bh = dir_bh = NULL; + old_bh = ext_find_entry(old_dir,old_name,old_len,&old_de,&pde,&nde); + retval = -ENOENT; + if (!old_bh) + goto end_rename; + old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */ + if (!old_inode) + goto end_rename; + retval = -EPERM; + if ((old_dir->i_mode & S_ISVTX) && + current->fsuid != old_inode->i_uid && + current->fsuid != old_dir->i_uid && !fsuser()) + goto end_rename; + new_bh = ext_find_entry(new_dir,new_name,new_len,&new_de,NULL,NULL); + if (new_bh) { + new_inode = __iget(new_dir->i_sb, new_de->inode,0); /* don't cross mnt-points */ + if (!new_inode) { + brelse(new_bh); + new_bh = NULL; + } + } + if (new_inode == old_inode) { + retval = 0; + goto end_rename; + } + if (new_inode && S_ISDIR(new_inode->i_mode)) { + retval = -EEXIST; + goto end_rename; + } + retval = -EPERM; + if (new_inode && (new_dir->i_mode & S_ISVTX) && + current->fsuid != new_inode->i_uid && + current->fsuid != new_dir->i_uid && !fsuser()) + goto end_rename; + if (S_ISDIR(old_inode->i_mode)) { + retval = -EEXIST; + if (new_bh) + goto end_rename; + retval = -EACCES; + if (!permission(old_inode, MAY_WRITE)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -EIO; + dir_bh = ext_bread(old_inode,0,0); + if (!dir_bh) + goto end_rename; + if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) + goto end_rename; + } + if (!new_bh) + new_bh = ext_add_entry(new_dir,new_name,new_len,&new_de); + retval = -ENOSPC; + if (!new_bh) + goto end_rename; +/* sanity checking before doing the rename - avoid races */ + if (new_inode && (new_de->inode != new_inode->i_ino)) + goto try_again; + if (new_de->inode && !new_inode) + goto try_again; + if (old_de->inode != old_inode->i_ino) + goto try_again; +/* ok, that's it */ + old_de->inode = 0; + old_de->name_len = 0; + new_de->inode = old_inode->i_ino; + ext_merge_entries (old_de, pde, nde); + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_dirt = 1; + } + mark_buffer_dirty(old_bh, 1); + mark_buffer_dirty(new_bh, 1); + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = new_dir->i_ino; + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + new_dir->i_nlink++; + old_dir->i_dirt = 1; + new_dir->i_dirt = 1; + } + retval = 0; +end_rename: + brelse(dir_bh); + brelse(old_bh); + brelse(new_bh); + iput(old_inode); + iput(new_inode); + iput(old_dir); + iput(new_dir); + return retval; +} + +/* + * Ok, rename also locks out other renames, as they can change the parent of + * a directory, and we don't want any races. Other races are checked for by + * "do_rename()", which restarts if there are inconsistencies. + * + * Note that there is no race between different filesystems: it's only within + * the same device that races occur: many renames can happen at once, as long + * as they are on different partitions. + */ +int ext_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + static struct wait_queue * wait = NULL; + static int lock = 0; + int result; + + while (lock) + sleep_on(&wait); + lock = 1; + result = do_ext_rename(old_dir, old_name, old_len, + new_dir, new_name, new_len); + lock = 0; + wake_up(&wait); + return result; +} diff --git a/fs/ext/symlink.c b/fs/ext/symlink.c new file mode 100644 index 000000000..8c84bc622 --- /dev/null +++ b/fs/ext/symlink.c @@ -0,0 +1,108 @@ +/* + * linux/fs/ext/symlink.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext symlink handling code + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/ext_fs.h> +#include <linux/stat.h> + +static int ext_readlink(struct inode *, char *, int); +static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations ext_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + ext_readlink, /* readlink */ + ext_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int ext_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + struct buffer_head * bh; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput(dir); + iput(inode); + return -ELOOP; + } + if (!(bh = ext_bread(inode, 0, 0))) { + iput(inode); + iput(dir); + return -EIO; + } + iput(inode); + current->link_count++; + error = open_namei(bh->b_data,flag,mode,res_inode,dir); + current->link_count--; + brelse(bh); + return error; +} + +static int ext_readlink(struct inode * inode, char * buffer, int buflen) +{ + struct buffer_head * bh; + int i; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + if (buflen > 1023) + buflen = 1023; + bh = ext_bread(inode, 0, 0); + iput(inode); + if (!bh) + return 0; + i = 0; + while (i<buflen && (c = bh->b_data[i])) { + i++; + put_fs_byte(c,buffer++); + } + brelse(bh); + return i; +} diff --git a/fs/ext/truncate.c b/fs/ext/truncate.c new file mode 100644 index 000000000..a2b485821 --- /dev/null +++ b/fs/ext/truncate.c @@ -0,0 +1,252 @@ +/* + * linux/fs/ext/truncate.c + * + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * from + * + * linux/fs/minix/truncate.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/ext_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/errno.h> + +/* + * Truncate has the most races in the whole filesystem: coding it is + * a pain in the a**. Especially as I don't do any locking... + * + * The code may look a bit weird, but that's just because I've tried to + * handle things like file-size changes in a somewhat graceful manner. + * Anyway, truncating a file at the same time somebody else writes to it + * is likely to result in pretty weird behaviour... + * + * The new code handles normal truncates (size = 0) as well as the more + * general case (size = XXX). I hope. + */ + +static int trunc_direct(struct inode * inode) +{ + int i, tmp; + unsigned long * p; + struct buffer_head * bh; + int retry = 0; +#define DIRECT_BLOCK ((inode->i_size + 1023) >> 10) + +repeat: + for (i = DIRECT_BLOCK ; i < 9 ; i++) { + p = inode->u.ext_i.i_data+i; + if (!(tmp = *p)) + continue; + bh = getblk(inode->i_dev,tmp,BLOCK_SIZE); + if (i < DIRECT_BLOCK) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *p) { + retry = 1; + brelse(bh); + continue; + } + *p = 0; + inode->i_dirt = 1; + brelse(bh); + ext_free_block(inode->i_sb,tmp); + } + return retry; +} + +static int trunc_indirect(struct inode * inode, int offset, unsigned long * p) +{ + int i, tmp; + struct buffer_head * bh; + struct buffer_head * ind_bh; + unsigned long * ind; + int retry = 0; +#define INDIRECT_BLOCK (DIRECT_BLOCK-offset) + + tmp = *p; + if (!tmp) + return 0; + ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp != *p) { + brelse(ind_bh); + return 1; + } + if (!ind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = INDIRECT_BLOCK ; i < 256 ; i++) { + if (i < 0) + i = 0; + if (i < INDIRECT_BLOCK) + goto repeat; + ind = i+(unsigned long *) ind_bh->b_data; + tmp = *ind; + if (!tmp) + continue; + bh = getblk(inode->i_dev,tmp,BLOCK_SIZE); + if (i < INDIRECT_BLOCK) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *ind) { + retry = 1; + brelse(bh); + continue; + } + *ind = 0; + mark_buffer_dirty(ind_bh, 1); + brelse(bh); + ext_free_block(inode->i_sb,tmp); + } + ind = (unsigned long *) ind_bh->b_data; + for (i = 0; i < 256; i++) + if (*(ind++)) + break; + if (i >= 256) + if (ind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_dirt = 1; + ext_free_block(inode->i_sb,tmp); + } + brelse(ind_bh); + return retry; +} + +static int trunc_dindirect(struct inode * inode, int offset, unsigned long * p) +{ + int i,tmp; + struct buffer_head * dind_bh; + unsigned long * dind; + int retry = 0; +#define DINDIRECT_BLOCK ((DIRECT_BLOCK-offset)>>8) + + tmp = *p; + if (!tmp) + return 0; + dind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp != *p) { + brelse(dind_bh); + return 1; + } + if (!dind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = DINDIRECT_BLOCK ; i < 256 ; i ++) { + if (i < 0) + i = 0; + if (i < DINDIRECT_BLOCK) + goto repeat; + dind = i+(unsigned long *) dind_bh->b_data; + tmp = *dind; + if (!tmp) + continue; + retry |= trunc_indirect(inode,offset+(i<<8),dind); + mark_buffer_dirty(dind_bh, 1); + } + dind = (unsigned long *) dind_bh->b_data; + for (i = 0; i < 256; i++) + if (*(dind++)) + break; + if (i >= 256) + if (dind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_dirt = 1; + ext_free_block(inode->i_sb,tmp); + } + brelse(dind_bh); + return retry; +} + +static int trunc_tindirect(struct inode * inode) +{ + int i,tmp; + struct buffer_head * tind_bh; + unsigned long * tind, * p; + int retry = 0; +#define TINDIRECT_BLOCK ((DIRECT_BLOCK-(256*256+256+9))>>16) + + p = inode->u.ext_i.i_data+11; + if (!(tmp = *p)) + return 0; + tind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp != *p) { + brelse(tind_bh); + return 1; + } + if (!tind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = TINDIRECT_BLOCK ; i < 256 ; i ++) { + if (i < 0) + i = 0; + if (i < TINDIRECT_BLOCK) + goto repeat; + tind = i+(unsigned long *) tind_bh->b_data; + retry |= trunc_dindirect(inode,9+256+256*256+(i<<16),tind); + mark_buffer_dirty(tind_bh, 1); + } + tind = (unsigned long *) tind_bh->b_data; + for (i = 0; i < 256; i++) + if (*(tind++)) + break; + if (i >= 256) + if (tind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_dirt = 1; + ext_free_block(inode->i_sb,tmp); + } + brelse(tind_bh); + return retry; +} + +void ext_truncate(struct inode * inode) +{ + int retry; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + while (1) { + retry = trunc_direct(inode); + retry |= trunc_indirect(inode,9,inode->u.ext_i.i_data+9); + retry |= trunc_dindirect(inode,9+256,inode->u.ext_i.i_data+10); + retry |= trunc_tindirect(inode); + if (!retry) + break; + current->counter = 0; + schedule(); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; +} + +/* + * Called when a inode is released. Note that this is different + * from ext_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. + */ +void ext_release(struct inode * inode, struct file * filp) +{ + printk("ext_release not implemented\n"); +} diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES new file mode 100644 index 000000000..b760d18c7 --- /dev/null +++ b/fs/ext2/CHANGES @@ -0,0 +1,140 @@ +Changes from version 0.5 to version 0.5a +======================================== + - Some cleanups in the error messages (some versions of syslog contain + a bug which truncates an error message if it contains '\n'). + - Check that no data can be written to a file past the 2GB limit. + - The famous readdir() bug has been fixed by Stephen Tweedie. + - Added a revision level in the superblock. + - Full support for O_SYNC flag of the open system call. + - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes + ext2fs to consider user #uid like root for the reserved blocks. + `resgid' acts the same way with group #gid. New fields in the + superblock contain default values for resuid and resgid and can + be modified by tune2fs. + Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>. + - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs + to remove the blocks used for FS structures from the total block + count in statfs. With `minixdf', ext2fs mimics Minix behavior + in statfs (i.e. it returns the total number of blocks on the + partition). This is intended to make bde happy :-) + - New file attributes: + - Immutable files cannot be modified. Data cannot be written to + these files. They cannot be removed, renamed and new links cannot + be created. Even root cannot modify the files. He has to remove + the immutable attribute first. + - Append-only files: can only be written in append-mode when writing. + They cannot be removed, renamed and new links cannot be created. + Note: files may only be added to an append-only directory. + - No-dump files: the attribute is not used by the kernel. My port + of dump uses it to avoid backing up files which are not important. + - New check in ext2_check_dir_entry: the inode number is checked. + - Support for big file systems: the copy of the FS descriptor is now + dynamically allocated (previous versions used a fixed size array). + This allows to mount 2GB+ FS. + - Reorganization of the ext2_inode structure to allow other operating + systems to create specific fields if they use ext2fs as their native + file system. Currently, ext2fs is only implemented in Linux but + will soon be part of Gnu Hurd and of Masix. + +Changes from version 0.4b to version 0.5 +======================================== + - New superblock fields: s_lastcheck and s_checkinterval added + by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks + of the file system + - Real random numbers for secure rm added by Pierre del Perugia + <delperug@gla.ecoledoc.ibp.fr> + - The mount warnings related to the state of a fs are not printed + if the fs is mounted read-only, idea by Nick Holloway + <alfie@dcs.warwick.ac.uk> + +Changes from version 0.4a to version 0.4b +========================================= + - Copyrights changed to include the name of my laboratory. + - Clean up of balloc.c and ialloc.c. + - More consistency checks. + - Block preallocation added by Stephen Tweedie. + - Direct reads of directories disallowed. + - Readahead implemented in readdir by Stephen Tweedie. + - Bugs in block and inodes allocation fixed. + - Readahead implemented in ext2_find_entry by Chip Salzenberg. + - New mount options: + `check=none|normal|strict' + `debug' + `errors=continue|remount-ro|panic' + `grpid', `bsdgroups' + `nocheck' + `nogrpid', `sysvgroups' + - truncate() now tries to deallocate contiguous blocks in a single call + to ext2_free_blocks(). + - lots of cosmetic changes. + +Changes from version 0.4 to version 0.4a +======================================== + - the `sync' option support is now complete. Version 0.4 was not + supporting it when truncating a file. I have tested the synchronous + writes and they work but they make the system very slow :-( I have + to work again on this to make it faster. + - when detecting an error on a mounted filesystem, version 0.4 used + to try to write a flag in the super block even if the filesystem had + been mounted read-only. This is fixed. + - the `sb=#' option now causes the kernel code to use the filesystem + descriptors located at block #+1. Version 0.4 used the superblock + backup located at block # but used the main copy of the descriptors. + - a new file attribute `S' is supported. This attribute causes + synchronous writes but is applied to a file not to the entire file + system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for + suggesting it). + - the directory cache is inhibited by default. The cache management + code seems to be buggy and I have to look at it carefully before + using it again. + - deleting a file with the `s' attribute (secure deletion) causes its + blocks to be overwritten with random values not with zeros (thanks to + Michael A. Griffith <grif@cs.ucr.edu> for suggesting it). + - lots of cosmetic changes have been made. + +Changes from version 0.3 to version 0.4 +======================================= + - Three new mount options are supported: `check', `sync' and `sb=#'. + `check' tells the kernel code to make more consistency checks + when the file system is mounted. Currently, the kernel code checks + that the blocks and inodes bitmaps are consistent with the free + blocks and inodes counts. More checks will be added in future + releases. + `sync' tells the kernel code to use synchronous writes when updating + an inode, a bitmap, a directory entry or an indirect block. This + can make the file system much slower but can be a big win for files + recovery in case of a crash (and we can now say to the BSD folks + that Linux also supports synchronous updates :-). + `sb=#' tells the kernel code to use an alternate super block instead + of its master copy. `#' is the number of the block (counted in + 1024 bytes blocks) which contains the alternate super block. + An ext2 file system typically contains backups of the super block + at blocks 8193, 16385, and so on. + - I have change the meaning of the valid flag used by e2fsck. it + now contains the state of the file system. If the kernel code + detects an inconsistency while the file system is mounted, it flags + it as erroneous and e2fsck will detect that on next run. + - The super block now contains a mount counter. This counter is + incremented each time the file system is mounted read/write. When + this counter becomes bigger than a maximal mount counts (also stored + in the super block), e2fsck checks the file system, even if it had + been unmounted cleanly, and resets this counter to 0. + - File attributes are now supported. One can associate a set of + attributes to a file. Three attributes are defined: + `c': the file is marked for automatic compression, + `s': the file is marked for secure deletion: when the file is + deleted, its blocks are zeroed and written back to the disk, + `u': the file is marked for undeletion: when the file is deleted, + its contents are saved to allow a future undeletion. + Currently, only the `s' attribute is implemented in the kernel + code. Support for the other attributes will be added in a future + release. + - a few bugs related to times updates have been fixed by Bruce + Evans and me. + - a bug related to the links count of deleted inodes has been fixed. + Previous versions used to keep the links count set to 1 when a file + was deleted. The new version now sets links_count to 0 when deleting + the last link. + - a race condition when deallocating an inode has been fixed by + Stephen Tweedie. + diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile new file mode 100644 index 000000000..599f2ad8f --- /dev/null +++ b/fs/ext2/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the linux ext2-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= acl.o balloc.o bitmap.o dir.o file.o fsync.o ialloc.o \ + inode.o ioctl.o namei.o super.o symlink.o truncate.o + +ext2.o: $(OBJS) + $(LD) -r -o ext2.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c new file mode 100644 index 000000000..91ef7c8cc --- /dev/null +++ b/fs/ext2/acl.c @@ -0,0 +1,50 @@ +/* + * linux/fs/ext2/acl.c + * + * Copyright (C) 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +/* + * This file will contain the Access Control Lists management for the + * second extended file system. + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/sched.h> +#include <linux/stat.h> + +/* + * ext2_permission () + * + * Check for access rights + */ +int ext2_permission (struct inode * inode, int mask) +{ + unsigned short mode = inode->i_mode; + + /* + * Nobody gets write access to an immutable file + */ + if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) + return 0; + /* + * Special case, access is always granted for root + */ + if (fsuser()) + return 1; + /* + * If no ACL, checks using the file mode + */ + else if (current->fsuid == inode->i_uid) + mode >>= 6; + else if (in_group_p (inode->i_gid)) + mode >>= 3; + if (((mode & mask & S_IRWXO) == mask)) + return 1; + else + return 0; +} diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c new file mode 100644 index 000000000..bc6faa7ed --- /dev/null +++ b/fs/ext2/balloc.c @@ -0,0 +1,582 @@ +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * balloc.c contains the blocks allocation and deallocation routines + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/locks.h> + +#include <asm/bitops.h> + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +static struct ext2_group_desc * get_group_desc (struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long desc; + struct ext2_group_desc * gdp; + + if (block_group >= sb->u.ext2_sb.s_groups_count) + ext2_panic (sb, "get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->u.ext2_sb.s_groups_count); + + group_desc = block_group / EXT2_DESC_PER_BLOCK(sb); + desc = block_group % EXT2_DESC_PER_BLOCK(sb); + if (!sb->u.ext2_sb.s_group_desc[group_desc]) + ext2_panic (sb, "get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, desc); + gdp = (struct ext2_group_desc *) + sb->u.ext2_sb.s_group_desc[group_desc]->b_data; + if (bh) + *bh = sb->u.ext2_sb.s_group_desc[group_desc]; + return gdp + desc; +} + +static void read_block_bitmap (struct super_block * sb, + unsigned int block_group, + unsigned long bitmap_nr) +{ + struct ext2_group_desc * gdp; + struct buffer_head * bh; + + gdp = get_group_desc (sb, block_group, NULL); + bh = bread (sb->s_dev, gdp->bg_block_bitmap, sb->s_blocksize); + if (!bh) + ext2_panic (sb, "read_block_bitmap", + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %lu", + block_group, gdp->bg_block_bitmap); + sb->u.ext2_sb.s_block_bitmap_number[bitmap_nr] = block_group; + sb->u.ext2_sb.s_block_bitmap[bitmap_nr] = bh; +} + +/* + * load_block_bitmap loads the block bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load__block_bitmap (struct super_block * sb, + unsigned int block_group) +{ + int i, j; + unsigned long block_bitmap_number; + struct buffer_head * block_bitmap; + + if (block_group >= sb->u.ext2_sb.s_groups_count) + ext2_panic (sb, "load_block_bitmap", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->u.ext2_sb.s_groups_count); + + if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->u.ext2_sb.s_block_bitmap[block_group]) { + if (sb->u.ext2_sb.s_block_bitmap_number[block_group] != + block_group) + ext2_panic (sb, "load_block_bitmap", + "block_group != block_bitmap_number"); + else + return block_group; + } else { + read_block_bitmap (sb, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->u.ext2_sb.s_loaded_block_bitmaps && + sb->u.ext2_sb.s_block_bitmap_number[i] != block_group; i++) + ; + if (i < sb->u.ext2_sb.s_loaded_block_bitmaps && + sb->u.ext2_sb.s_block_bitmap_number[i] == block_group) { + block_bitmap_number = sb->u.ext2_sb.s_block_bitmap_number[i]; + block_bitmap = sb->u.ext2_sb.s_block_bitmap[i]; + for (j = i; j > 0; j--) { + sb->u.ext2_sb.s_block_bitmap_number[j] = + sb->u.ext2_sb.s_block_bitmap_number[j - 1]; + sb->u.ext2_sb.s_block_bitmap[j] = + sb->u.ext2_sb.s_block_bitmap[j - 1]; + } + sb->u.ext2_sb.s_block_bitmap_number[0] = block_bitmap_number; + sb->u.ext2_sb.s_block_bitmap[0] = block_bitmap; + } else { + if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->u.ext2_sb.s_loaded_block_bitmaps++; + else + brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) { + sb->u.ext2_sb.s_block_bitmap_number[j] = + sb->u.ext2_sb.s_block_bitmap_number[j - 1]; + sb->u.ext2_sb.s_block_bitmap[j] = + sb->u.ext2_sb.s_block_bitmap[j - 1]; + } + read_block_bitmap (sb, block_group, 0); + } + return 0; +} + +static inline int load_block_bitmap (struct super_block * sb, + unsigned int block_group) +{ + if (sb->u.ext2_sb.s_loaded_block_bitmaps > 0 && + sb->u.ext2_sb.s_block_bitmap_number[0] == block_group) + return 0; + + if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED && + sb->u.ext2_sb.s_block_bitmap_number[block_group] == block_group && + sb->u.ext2_sb.s_block_bitmap[block_group]) + return block_group; + + return load__block_bitmap (sb, block_group); +} + +void ext2_free_blocks (struct super_block * sb, unsigned long block, + unsigned long count) +{ + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + unsigned long i; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es; + + if (!sb) { + printk ("ext2_free_blocks: nonexistent device"); + return; + } + lock_super (sb); + es = sb->u.ext2_sb.s_es; + if (block < es->s_first_data_block || + (block + count) > es->s_blocks_count) { + ext2_error (sb, "ext2_free_blocks", + "Freeing blocks not in datazone - " + "block = %lu, count = %lu", block, count); + unlock_super (sb); + return; + } + + ext2_debug ("freeing block %lu\n", block); + + block_group = (block - es->s_first_data_block) / + EXT2_BLOCKS_PER_GROUP(sb); + bit = (block - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb); + if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) + ext2_panic (sb, "ext2_free_blocks", + "Freeing blocks across group boundary - " + "Block = %lu, count = %lu", + block, count); + bitmap_nr = load_block_bitmap (sb, block_group); + bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; + gdp = get_group_desc (sb, block_group, &bh2); + + if (test_opt (sb, CHECK_STRICT) && + (in_range (gdp->bg_block_bitmap, block, count) || + in_range (gdp->bg_inode_bitmap, block, count) || + in_range (block, gdp->bg_inode_table, + sb->u.ext2_sb.s_itb_per_group) || + in_range (block + count - 1, gdp->bg_inode_table, + sb->u.ext2_sb.s_itb_per_group))) + ext2_panic (sb, "ext2_free_blocks", + "Freeing blocks in system zones - " + "Block = %lu, count = %lu", + block, count); + + for (i = 0; i < count; i++) { + if (!clear_bit (bit + i, bh->b_data)) + ext2_warning (sb, "ext2_free_blocks", + "bit already cleared for block %lu", + block); + else { + gdp->bg_free_blocks_count++; + es->s_free_blocks_count++; + } + } + + mark_buffer_dirty(bh2, 1); + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNC) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + sb->s_dirt = 1; + unlock_super (sb); + return; +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + */ +int ext2_new_block (struct super_block * sb, unsigned long goal, + unsigned long * prealloc_count, + unsigned long * prealloc_block) +{ + struct buffer_head * bh; + struct buffer_head * bh2; + char * p, * r; + int i, j, k, tmp; + unsigned long lmap; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es; + +#ifdef EXT2FS_DEBUG + static int goal_hits = 0, goal_attempts = 0; +#endif + if (!sb) { + printk ("ext2_new_block: nonexistent device"); + return 0; + } + lock_super (sb); + es = sb->u.ext2_sb.s_es; + if (es->s_free_blocks_count <= es->s_r_blocks_count && + (!fsuser() && (sb->u.ext2_sb.s_resuid != current->fsuid) && + (sb->u.ext2_sb.s_resgid == 0 || + !in_group_p (sb->u.ext2_sb.s_resgid)))) { + unlock_super (sb); + return 0; + } + + ext2_debug ("goal=%lu.\n", goal); + +repeat: + /* + * First, test whether the goal block is free. + */ + if (goal < es->s_first_data_block || goal >= es->s_blocks_count) + goal = es->s_first_data_block; + i = (goal - es->s_first_data_block) / EXT2_BLOCKS_PER_GROUP(sb); + gdp = get_group_desc (sb, i, &bh2); + if (gdp->bg_free_blocks_count > 0) { + j = ((goal - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb)); +#ifdef EXT2FS_DEBUG + if (j) + goal_attempts++; +#endif + bitmap_nr = load_block_bitmap (sb, i); + bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; + + ext2_debug ("goal is at %d:%d.\n", i, j); + + if (!test_bit(j, bh->b_data)) { +#ifdef EXT2FS_DEBUG + goal_hits++; + ext2_debug ("goal bit allocated.\n"); +#endif + goto got_block; + } + if (j) { + /* + * The goal was occupied; search forward for a free + * block within the next 32 blocks + */ + lmap = ((((unsigned long *) bh->b_data)[j >> 5]) >> + ((j & 31) + 1)); + if (j < EXT2_BLOCKS_PER_GROUP(sb) - 32) + lmap |= (((unsigned long *) bh->b_data)[(j >> 5) + 1]) << + (31 - (j & 31)); + else + lmap |= 0xffffffff << (31 - (j & 31)); + if (lmap != 0xffffffffl) { + k = ffz(lmap) + 1; + if ((j + k) < EXT2_BLOCKS_PER_GROUP(sb)) { + j += k; + goto got_block; + } + } + } + + ext2_debug ("Bit not found near goal\n"); + + /* + * There has been no free block found in the near vicinity + * of the goal: do a search forward through the block groups, + * searching in each group first for an entire free byte in + * the bitmap and then for any free bit. + * + * Search first in the remainder of the current group; then, + * cyclicly search through the rest of the groups. + */ + p = ((char *) bh->b_data) + (j >> 3); + r = memscan(p, 0, (EXT2_BLOCKS_PER_GROUP(sb) - j + 7) >> 3); + k = (r - ((char *) bh->b_data)) << 3; + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto search_back; + } + k = find_next_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb), + j); + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto got_block; + } + } + + ext2_debug ("Bit not found in block group %d.\n", i); + + /* + * Now search the rest of the groups. We assume that + * i and gdp correctly point to the last group visited. + */ + for (k = 0; k < sb->u.ext2_sb.s_groups_count; k++) { + i++; + if (i >= sb->u.ext2_sb.s_groups_count) + i = 0; + gdp = get_group_desc (sb, i, &bh2); + if (gdp->bg_free_blocks_count > 0) + break; + } + if (k >= sb->u.ext2_sb.s_groups_count) { + unlock_super (sb); + return 0; + } + bitmap_nr = load_block_bitmap (sb, i); + bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; + r = memscan(bh->b_data, 0, EXT2_BLOCKS_PER_GROUP(sb) >> 3); + j = (r - bh->b_data) << 3; + if (j < EXT2_BLOCKS_PER_GROUP(sb)) + goto search_back; + else + j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb)); + if (j >= EXT2_BLOCKS_PER_GROUP(sb)) { + ext2_error (sb, "ext2_new_block", + "Free blocks count corrupted for block group %d", i); + unlock_super (sb); + return 0; + } + +search_back: + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards up to 7 bits to find the + * start of this group of free blocks. + */ + for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh->b_data); k++, j--); + +got_block: + + ext2_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count); + + tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + es->s_first_data_block; + + if (test_opt (sb, CHECK_STRICT) && + (tmp == gdp->bg_block_bitmap || + tmp == gdp->bg_inode_bitmap || + in_range (tmp, gdp->bg_inode_table, sb->u.ext2_sb.s_itb_per_group))) + ext2_panic (sb, "ext2_new_block", + "Allocating block in system zone - " + "block = %u", tmp); + + if (set_bit (j, bh->b_data)) { + ext2_warning (sb, "ext2_new_block", + "bit already set for block %d", j); + goto repeat; + } + + ext2_debug ("found bit %d\n", j); + + /* + * Do block preallocation now if required. + */ +#ifdef EXT2_PREALLOCATE + if (prealloc_block) { + *prealloc_count = 0; + *prealloc_block = tmp + 1; + for (k = 1; + k < 8 && (j + k) < EXT2_BLOCKS_PER_GROUP(sb); k++) { + if (set_bit (j + k, bh->b_data)) + break; + (*prealloc_count)++; + } + gdp->bg_free_blocks_count -= *prealloc_count; + es->s_free_blocks_count -= *prealloc_count; + ext2_debug ("Preallocated a further %lu bits.\n", + *prealloc_count); + } +#endif + + j = tmp; + + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNC) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + + if (j >= es->s_blocks_count) { + ext2_error (sb, "ext2_new_block", + "block >= blocks count - " + "block_group = %d, block=%d", i, j); + unlock_super (sb); + return 0; + } + if (!(bh = getblk (sb->s_dev, j, sb->s_blocksize))) { + ext2_error (sb, "ext2_new_block", "cannot get block %d", j); + unlock_super (sb); + return 0; + } + memset(bh->b_data, 0, sb->s_blocksize); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse (bh); + + ext2_debug ("allocating block %d. " + "Goal hits %d of %d.\n", j, goal_hits, goal_attempts); + + gdp->bg_free_blocks_count--; + mark_buffer_dirty(bh2, 1); + es->s_free_blocks_count--; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + unlock_super (sb); + return j; +} + +unsigned long ext2_count_free_blocks (struct super_block * sb) +{ +#ifdef EXT2FS_DEBUG + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (sb, i); + x = ext2_count_free (sb->u.ext2_sb.s_block_bitmap[bitmap_nr], + sb->s_blocksize); + printk ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", + es->s_free_blocks_count, desc_count, bitmap_count); + unlock_super (sb); + return bitmap_count; +#else + return sb->u.ext2_sb.s_es->s_free_blocks_count; +#endif +} + +static inline int block_in_use (unsigned long block, + struct super_block * sb, + unsigned char * map) +{ + return test_bit ((block - sb->u.ext2_sb.s_es->s_first_data_block) % + EXT2_BLOCKS_PER_GROUP(sb), map); +} + +void ext2_check_blocks_bitmap (struct super_block * sb) +{ + struct buffer_head * bh; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + unsigned long desc_blocks; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i, j; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + desc_blocks = (sb->u.ext2_sb.s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (sb, i); + bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; + + if (!test_bit (0, bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Superblock in group %d is marked free", i); + + for (j = 0; j < desc_blocks; j++) + if (!test_bit (j + 1, bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Descriptor block #%d in group " + "%d is marked free", j, i); + + if (!block_in_use (gdp->bg_block_bitmap, sb, bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Block bitmap for group %d is marked free", + i); + + if (!block_in_use (gdp->bg_inode_bitmap, sb, bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Inode bitmap for group %d is marked free", + i); + + for (j = 0; j < sb->u.ext2_sb.s_itb_per_group; j++) + if (!block_in_use (gdp->bg_inode_table + j, sb, bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Block #%d of the inode table in " + "group %d is marked free", j, i); + + x = ext2_count_free (bh, sb->s_blocksize); + if (gdp->bg_free_blocks_count != x) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + if (es->s_free_blocks_count != bitmap_count) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count in super block, " + "stored = %lu, counted = %lu", + es->s_free_blocks_count, bitmap_count); + unlock_super (sb); +} diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c new file mode 100644 index 000000000..1084da16d --- /dev/null +++ b/fs/ext2/bitmap.c @@ -0,0 +1,25 @@ +/* + * linux/fs/ext2/bitmap.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include <linux/fs.h> +#include <linux/ext2_fs.h> + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c new file mode 100644 index 000000000..c98139bc6 --- /dev/null +++ b/fs/ext2/dir.c @@ -0,0 +1,227 @@ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/sched.h> +#include <linux/stat.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int ext2_dir_read (struct inode * inode, struct file * filp, + char * buf, int count) +{ + return -EISDIR; +} + +static int ext2_readdir (struct inode *, struct file *, struct dirent *, int); + +static struct file_operations ext2_dir_operations = { + NULL, /* lseek - default */ + ext2_dir_read, /* read */ + NULL, /* write - bad */ + ext2_readdir, /* readdir */ + NULL, /* select - default */ + ext2_ioctl, /* ioctl */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations ext2_dir_inode_operations = { + &ext2_dir_operations, /* default directory file-ops */ + ext2_create, /* create */ + ext2_lookup, /* lookup */ + ext2_link, /* link */ + ext2_unlink, /* unlink */ + ext2_symlink, /* symlink */ + ext2_mkdir, /* mkdir */ + ext2_rmdir, /* rmdir */ + ext2_mknod, /* mknod */ + ext2_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + ext2_truncate, /* truncate */ + ext2_permission, /* permission */ + NULL /* smap */ +}; + +int ext2_check_dir_entry (char * function, struct inode * dir, + struct ext2_dir_entry * de, struct buffer_head * bh, + unsigned long offset) +{ + char * error_msg = NULL; + + if (de->rec_len < EXT2_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (de->rec_len % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len)) + error_msg = "rec_len is too small for name_len"; + else if (dir && ((char *) de - bh->b_data) + de->rec_len > + dir->i_sb->s_blocksize) + error_msg = "directory entry across blocks"; + else if (dir && de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count) + error_msg = "inode out of bounds"; + + if (error_msg != NULL) + ext2_error (dir->i_sb, function, "bad directory entry: %s\n" + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + error_msg, offset, de->inode, de->rec_len, + de->name_len); + return error_msg == NULL ? 1 : 0; +} + +static int ext2_readdir (struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + unsigned long offset, blk; + int i, num, stored, dlen; + struct buffer_head * bh, * tmp, * bha[16]; + struct ext2_dir_entry * de; + struct super_block * sb; + int err, version; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + sb = inode->i_sb; + + stored = 0; + bh = NULL; + offset = filp->f_pos & (sb->s_blocksize - 1); + + while (count > 0 && !stored && filp->f_pos < inode->i_size) { + blk = (filp->f_pos) >> EXT2_BLOCK_SIZE_BITS(sb); + bh = ext2_bread (inode, blk, 0, &err); + if (!bh) { + filp->f_pos += sb->s_blocksize - offset; + continue; + } + + /* + * Do the readahead + */ + if (!offset) { + for (i = 16 >> (EXT2_BLOCK_SIZE_BITS(sb) - 9), num = 0; + i > 0; i--) { + tmp = ext2_getblk (inode, ++blk, 0, &err); + if (tmp && !tmp->b_uptodate && !tmp->b_lock) + bha[num++] = tmp; + else + brelse (tmp); + } + if (num) { + ll_rw_block (READA, num, bha); + for (i = 0; i < num; i++) + brelse (bha[i]); + } + } + +revalidate: + /* If the dir block has changed since the last call to + * readdir(2), then we might be pointing to an invalid + * dirent right now. Scan from the start of the block + * to make sure. */ + if (filp->f_version != inode->i_version) { + for (i = 0; i < sb->s_blocksize && i < offset; ) { + de = (struct ext2_dir_entry *) + (bh->b_data + i); + /* It's too expensive to do a full + * dirent test each time round this + * loop, but we do have to test at + * least that it is non-zero. A + * failure will be detected in the + * dirent test below. */ + if (de->rec_len < EXT2_DIR_REC_LEN(1)) + break; + i += de->rec_len; + } + offset = i; + filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) + | offset; + filp->f_version = inode->i_version; + } + + while (count > 0 && filp->f_pos < inode->i_size + && offset < sb->s_blocksize) { + de = (struct ext2_dir_entry *) (bh->b_data + offset); + if (!ext2_check_dir_entry ("ext2_readdir", inode, de, + bh, offset)) { + /* On error, skip the f_pos to the + next block. */ + filp->f_pos = (filp->f_pos & (sb->s_blocksize - 1)) + + sb->s_blocksize; + brelse (bh); + return stored; + } + if (de->inode) { + dlen = ROUND_UP(NAME_OFFSET(dirent) + + de->name_len + 1); + /* Old libc libraries always use a + count of 1. */ + if (count == 1 && !stored) + count = dlen; + if (count < dlen) { + count = 0; + break; + } + + /* We might block in the next section + * if the data destination is + * currently swapped out. So, use a + * version stamp to detect whether or + * not the directory has been modified + * during the copy operation. */ + version = inode->i_version; + i = de->name_len; + memcpy_tofs (dirent->d_name, de->name, i); + put_fs_long (de->inode, &dirent->d_ino); + put_fs_byte (0, dirent->d_name + i); + put_fs_word (i, &dirent->d_reclen); + put_fs_long (dlen, &dirent->d_off); + if (version != inode->i_version) + goto revalidate; + dcache_add(inode, de->name, de->name_len, + de->inode); + + stored += dlen; + count -= dlen; + ((char *) dirent) += dlen; + } + offset += de->rec_len; + filp->f_pos += de->rec_len; + } + offset = 0; + brelse (bh); + } + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + return stored; +} diff --git a/fs/ext2/file.c b/fs/ext2/file.c new file mode 100644 index 000000000..20628b349 --- /dev/null +++ b/fs/ext2/file.c @@ -0,0 +1,354 @@ +/* + * linux/fs/ext2/file.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 fs regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/fcntl.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#include <linux/fs.h> +#include <linux/ext2_fs.h> + +static int ext2_file_read (struct inode *, struct file *, char *, int); +static int ext2_file_write (struct inode *, struct file *, char *, int); +static void ext2_release_file (struct inode *, struct file *); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the ext2 filesystem. + */ +static struct file_operations ext2_file_operations = { + NULL, /* lseek - default */ + ext2_file_read, /* read */ + ext2_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + ext2_ioctl, /* ioctl */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + ext2_release_file, /* release */ + ext2_sync_file, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +struct inode_operations ext2_file_inode_operations = { + &ext2_file_operations,/* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + ext2_bmap, /* bmap */ + ext2_truncate, /* truncate */ + ext2_permission, /* permission */ + NULL /* smap */ +}; + +static int ext2_file_read (struct inode * inode, struct file * filp, + char * buf, int count) +{ + int read, left, chars; + int block, blocks, offset; + int bhrequest, uptodate; + int clusterblocks; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + struct super_block * sb; + unsigned int size; + int err; + + if (!inode) { + printk ("ext2_file_read: inode = NULL\n"); + return -EINVAL; + } + sb = inode->i_sb; + if (!S_ISREG(inode->i_mode)) { + ext2_warning (sb, "ext2_file_read", "mode = %07o", + inode->i_mode); + return -EINVAL; + } + offset = filp->f_pos; + size = inode->i_size; + if (offset > size) + left = 0; + else + left = size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = offset >> EXT2_BLOCK_SIZE_BITS(sb); + offset &= (sb->s_blocksize - 1); + size = (size + sb->s_blocksize - 1) >> EXT2_BLOCK_SIZE_BITS(sb); + blocks = (left + offset + sb->s_blocksize - 1) >> EXT2_BLOCK_SIZE_BITS(sb); + bhb = bhe = buflist; + if (filp->f_reada) { + if (blocks < read_ahead[MAJOR(inode->i_dev)] >> (EXT2_BLOCK_SIZE_BITS(sb) - 9)) + blocks = read_ahead[MAJOR(inode->i_dev)] >> (EXT2_BLOCK_SIZE_BITS(sb) - 9); + if (block + blocks > size) + blocks = size - block; + } + + /* + * We do this in a two stage process. We first try and request + * as many blocks as we can, then we wait for the first one to + * complete, and then we try and wrap up as many as are actually + * done. This routine is rather generic, in that it can be used + * in a filesystem by substituting the appropriate function in + * for getblk + * + * This routine is optimized to make maximum use of the various + * buffers and caches. + */ + + clusterblocks = 0; + + do { + bhrequest = 0; + uptodate = 1; + while (blocks) { + --blocks; +#if 1 + if(!clusterblocks) clusterblocks = ext2_getcluster(inode, block); + if(clusterblocks) clusterblocks--; +#endif + + *bhb = ext2_getblk (inode, block++, 0, &err); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* + * If the block we have on hand is uptodate, go ahead + * and complete processing + */ + if (uptodate) + break; + + if (bhb == bhe) + break; + } + + /* + * Now request them all + */ + if (bhrequest) + ll_rw_block (READ, bhrequest, bhreq); + + do { + /* + * Finish off all I/O that has actually completed + */ + if (*bhe) { + wait_on_buffer (*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < sb->s_blocksize - offset) + chars = left; + else + chars = sb->s_blocksize - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs (buf, offset + (*bhe)->b_data, + chars); + brelse (*bhe); + buf += chars; + } else { + while (chars-- > 0) + put_fs_byte (0, buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + + /* + * Release the read-ahead blocks + */ + while (bhe != bhb) { + brelse (*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } + if (!read) + return -EIO; + filp->f_reada = 1; + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + return read; +} + +static int ext2_file_write (struct inode * inode, struct file * filp, + char * buf, int count) +{ + const loff_t two_gb = 2147483647; + loff_t pos; + off_t pos2; + int written, c; + struct buffer_head * bh, *bufferlist[NBUF]; + char * p; + struct super_block * sb; + int err; + int i,buffercount,write_error; + + write_error = buffercount = 0; + if (!inode) { + printk("ext2_file_write: inode = NULL\n"); + return -EINVAL; + } + sb = inode->i_sb; + if (sb->s_flags & MS_RDONLY) + /* + * This fs has been automatically remounted ro because of errors + */ + return -ENOSPC; + + if (!S_ISREG(inode->i_mode)) { + ext2_warning (sb, "ext2_file_write", "mode = %07o", + inode->i_mode); + return -EINVAL; + } + down(&inode->i_sem); + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else + pos = filp->f_pos; + pos2 = (off_t) pos; + /* + * If a file has been opened in synchronous mode, we have to ensure + * that meta-data will also be written synchronously. Thus, we + * set the i_osync field. This field is tested by the allocation + * routines. + */ + if (filp->f_flags & O_SYNC) + inode->u.ext2_i.i_osync++; + written = 0; + while (written < count) { + if (pos > two_gb) { + if (!written) + written = -EFBIG; + break; + } + bh = ext2_getblk (inode, pos2 / sb->s_blocksize, 1, &err); + if (!bh) { + if (!written) + written = err; + break; + } + c = sb->s_blocksize - (pos2 % sb->s_blocksize); + if (c > count-written) + c = count - written; + if (c != sb->s_blocksize && !bh->b_uptodate) { + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (!bh->b_uptodate) { + brelse (bh); + if (!written) + written = -EIO; + break; + } + } + p = (pos2 % sb->s_blocksize) + bh->b_data; + pos2 += c; + pos += c; + written += c; + memcpy_fromfs (p, buf, c); + buf += c; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + if (filp->f_flags & O_SYNC) + bufferlist[buffercount++] = bh; + else + brelse(bh); + if (buffercount == NBUF){ + ll_rw_block(WRITE, buffercount, bufferlist); + for(i=0; i<buffercount; i++){ + wait_on_buffer(bufferlist[i]); + if (!bufferlist[i]->b_uptodate) + write_error=1; + brelse(bufferlist[i]); + } + buffercount=0; + } + if(write_error) + break; + } + if ( buffercount ){ + ll_rw_block(WRITE, buffercount, bufferlist); + for(i=0; i<buffercount; i++){ + wait_on_buffer(bufferlist[i]); + if (!bufferlist[i]->b_uptodate) + write_error=1; + brelse(bufferlist[i]); + } + } + if (pos > inode->i_size) + inode->i_size = pos; + if (filp->f_flags & O_SYNC) + inode->u.ext2_i.i_osync--; + up(&inode->i_sem); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + filp->f_pos = pos; + inode->i_dirt = 1; + return written; +} + +/* + * Called when a inode is released. Note that this is different + * from ext2_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. + */ +static void ext2_release_file (struct inode * inode, struct file * filp) +{ + if (filp->f_mode & 2) + ext2_discard_prealloc (inode); +} diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c new file mode 100644 index 000000000..2f79c4749 --- /dev/null +++ b/fs/ext2/fsync.c @@ -0,0 +1,198 @@ +/* + * linux/fs/ext2/fsync.c + * + * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * from + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * from + * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2fs fsync primitive + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/fcntl.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/locks.h> + + +#define blocksize (EXT2_BLOCK_SIZE(inode->i_sb)) +#define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb)) + +static int sync_block (struct inode * inode, unsigned long * block, int wait) +{ + struct buffer_head * bh; + int tmp; + + if (!*block) + return 0; + tmp = *block; + bh = get_hash_table (inode->i_dev, *block, blocksize); + if (!bh) + return 0; + if (*block != tmp) { + brelse (bh); + return 1; + } + if (wait && bh->b_req && !bh->b_uptodate) { + brelse (bh); + return -1; + } + if (wait || !bh->b_uptodate || !bh->b_dirt) { + brelse (bh); + return 0; + } + ll_rw_block (WRITE, 1, &bh); + bh->b_count--; + return 0; +} + +static int sync_iblock (struct inode * inode, unsigned long * iblock, + struct buffer_head ** bh, int wait) +{ + int rc, tmp; + + *bh = NULL; + tmp = *iblock; + if (!tmp) + return 0; + rc = sync_block (inode, iblock, wait); + if (rc) + return rc; + *bh = bread (inode->i_dev, tmp, blocksize); + if (tmp != *iblock) { + brelse (*bh); + *bh = NULL; + return 1; + } + if (!*bh) + return -1; + return 0; +} + + +static int sync_direct (struct inode * inode, int wait) +{ + int i; + int rc, err = 0; + + for (i = 0; i < EXT2_NDIR_BLOCKS; i++) { + rc = sync_block (inode, inode->u.ext2_i.i_data + i, wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + return err; +} + +static int sync_indirect (struct inode * inode, unsigned long * iblock, + int wait) +{ + int i; + struct buffer_head * ind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, iblock, &ind_bh, wait); + if (rc || !ind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_block (inode, + ((unsigned long *) ind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse (ind_bh); + return err; +} + +static int sync_dindirect (struct inode * inode, unsigned long * diblock, + int wait) +{ + int i; + struct buffer_head * dind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, diblock, &dind_bh, wait); + if (rc || !dind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_indirect (inode, + ((unsigned long *) dind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse (dind_bh); + return err; +} + +static int sync_tindirect (struct inode * inode, unsigned long * tiblock, + int wait) +{ + int i; + struct buffer_head * tind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, tiblock, &tind_bh, wait); + if (rc || !tind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_dindirect (inode, + ((unsigned long *) tind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse (tind_bh); + return err; +} + +int ext2_sync_file (struct inode * inode, struct file * file) +{ + int wait, err = 0; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + if (S_ISLNK(inode->i_mode) && !(inode->i_blocks)) + /* + * Don't sync fast links! + */ + goto skip; + + for (wait=0; wait<=1; wait++) + { + err |= sync_direct (inode, wait); + err |= sync_indirect (inode, + inode->u.ext2_i.i_data+EXT2_IND_BLOCK, + wait); + err |= sync_dindirect (inode, + inode->u.ext2_i.i_data+EXT2_DIND_BLOCK, + wait); + err |= sync_tindirect (inode, + inode->u.ext2_i.i_data+EXT2_TIND_BLOCK, + wait); + } +skip: + err |= ext2_sync_inode (inode); + return (err < 0) ? -EIO : 0; +} diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c new file mode 100644 index 000000000..69c9e2224 --- /dev/null +++ b/fs/ext2/ialloc.c @@ -0,0 +1,554 @@ +/* + * linux/fs/ext2/ialloc.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * ialloc.c contains the inodes allocation and deallocation routines + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +#include <asm/bitops.h> + +static struct ext2_group_desc * get_group_desc (struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long desc; + struct ext2_group_desc * gdp; + + if (block_group >= sb->u.ext2_sb.s_groups_count) + ext2_panic (sb, "get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->u.ext2_sb.s_groups_count); + + group_desc = block_group / EXT2_DESC_PER_BLOCK(sb); + desc = block_group % EXT2_DESC_PER_BLOCK(sb); + if (!sb->u.ext2_sb.s_group_desc[group_desc]) + ext2_panic (sb, "get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, desc); + gdp = (struct ext2_group_desc *) + sb->u.ext2_sb.s_group_desc[group_desc]->b_data; + if (bh) + *bh = sb->u.ext2_sb.s_group_desc[group_desc]; + return gdp + desc; +} + +static void read_inode_bitmap (struct super_block * sb, + unsigned long block_group, + unsigned int bitmap_nr) +{ + struct ext2_group_desc * gdp; + struct buffer_head * bh; + + gdp = get_group_desc (sb, block_group, NULL); + bh = bread (sb->s_dev, gdp->bg_inode_bitmap, sb->s_blocksize); + if (!bh) + ext2_panic (sb, "read_inode_bitmap", + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %lu", + block_group, gdp->bg_inode_bitmap); + sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group; + sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh; +} + +/* + * load_inode_bitmap loads the inode bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load_inode_bitmap (struct super_block * sb, + unsigned int block_group) +{ + int i, j; + unsigned long inode_bitmap_number; + struct buffer_head * inode_bitmap; + + if (block_group >= sb->u.ext2_sb.s_groups_count) + ext2_panic (sb, "load_inode_bitmap", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->u.ext2_sb.s_groups_count); + if (sb->u.ext2_sb.s_loaded_inode_bitmaps > 0 && + sb->u.ext2_sb.s_inode_bitmap_number[0] == block_group) + return 0; + if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->u.ext2_sb.s_inode_bitmap[block_group]) { + if (sb->u.ext2_sb.s_inode_bitmap_number[block_group] != block_group) + ext2_panic (sb, "load_inode_bitmap", + "block_group != inode_bitmap_number"); + else + return block_group; + } else { + read_inode_bitmap (sb, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->u.ext2_sb.s_loaded_inode_bitmaps && + sb->u.ext2_sb.s_inode_bitmap_number[i] != block_group; + i++) + ; + if (i < sb->u.ext2_sb.s_loaded_inode_bitmaps && + sb->u.ext2_sb.s_inode_bitmap_number[i] == block_group) { + inode_bitmap_number = sb->u.ext2_sb.s_inode_bitmap_number[i]; + inode_bitmap = sb->u.ext2_sb.s_inode_bitmap[i]; + for (j = i; j > 0; j--) { + sb->u.ext2_sb.s_inode_bitmap_number[j] = + sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; + sb->u.ext2_sb.s_inode_bitmap[j] = + sb->u.ext2_sb.s_inode_bitmap[j - 1]; + } + sb->u.ext2_sb.s_inode_bitmap_number[0] = inode_bitmap_number; + sb->u.ext2_sb.s_inode_bitmap[0] = inode_bitmap; + } else { + if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->u.ext2_sb.s_loaded_inode_bitmaps++; + else + brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) { + sb->u.ext2_sb.s_inode_bitmap_number[j] = + sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; + sb->u.ext2_sb.s_inode_bitmap[j] = + sb->u.ext2_sb.s_inode_bitmap[j - 1]; + } + read_inode_bitmap (sb, block_group, 0); + } + return 0; +} + +/* + * This function sets the deletion time for the inode + * + * This may be used one day by an 'undelete' program + */ +static void set_inode_dtime (struct inode * inode, + struct ext2_group_desc * gdp) +{ + unsigned long inode_block; + struct buffer_head * bh; + struct ext2_inode * raw_inode; + + inode_block = gdp->bg_inode_table + (((inode->i_ino - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) / + EXT2_INODES_PER_BLOCK(inode->i_sb)); + bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize); + if (!bh) + ext2_panic (inode->i_sb, "set_inode_dtime", + "Cannot load inode table block - " + "inode=%lu, inode_block=%lu", + inode->i_ino, inode_block); + raw_inode = ((struct ext2_inode *) bh->b_data) + + (((inode->i_ino - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) % + EXT2_INODES_PER_BLOCK(inode->i_sb)); + raw_inode->i_links_count = 0; + raw_inode->i_dtime = CURRENT_TIME; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(inode)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); +} + +void ext2_free_inode (struct inode * inode) +{ + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es; + + if (!inode) + return; + if (!inode->i_dev) { + printk ("ext2_free_inode: inode has no device\n"); + return; + } + if (inode->i_count > 1) { + printk ("ext2_free_inode: inode has count=%d\n", + inode->i_count); + return; + } + if (inode->i_nlink) { + printk ("ext2_free_inode: inode has nlink=%d\n", + inode->i_nlink); + return; + } + if (!inode->i_sb) { + printk("ext2_free_inode: inode on nonexistent device\n"); + return; + } + + ext2_debug ("freeing inode %lu\n", inode->i_ino); + + sb = inode->i_sb; + lock_super (sb); + if (inode->i_ino < EXT2_FIRST_INO || + inode->i_ino > sb->u.ext2_sb.s_es->s_inodes_count) { + ext2_error (sb, "free_inode", + "reserved inode or nonexistent inode"); + unlock_super (sb); + return; + } + es = sb->u.ext2_sb.s_es; + block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(sb); + bit = (inode->i_ino - 1) % EXT2_INODES_PER_GROUP(sb); + bitmap_nr = load_inode_bitmap (sb, block_group); + bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr]; + if (!clear_bit (bit, bh->b_data)) + ext2_warning (sb, "ext2_free_inode", + "bit already cleared for inode %lu", inode->i_ino); + else { + gdp = get_group_desc (sb, block_group, &bh2); + gdp->bg_free_inodes_count++; + if (S_ISDIR(inode->i_mode)) + gdp->bg_used_dirs_count--; + mark_buffer_dirty(bh2, 1); + es->s_free_inodes_count++; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + set_inode_dtime (inode, gdp); + } + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNC) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + + sb->s_dirt = 1; + clear_inode (inode); + unlock_super (sb); +} + +/* + * This function increments the inode version number + * + * This may be used one day by the NFS server + */ +static void inc_inode_version (struct inode * inode, + struct ext2_group_desc *gdp, + int mode) +{ + unsigned long inode_block; + struct buffer_head * bh; + struct ext2_inode * raw_inode; + + inode_block = gdp->bg_inode_table + (((inode->i_ino - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) / + EXT2_INODES_PER_BLOCK(inode->i_sb)); + bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize); + if (!bh) { + ext2_error (inode->i_sb, "inc_inode_version", + "Cannot load inode table block - " + "inode=%lu, inode_block=%lu\n", + inode->i_ino, inode_block); + inode->u.ext2_i.i_version = 1; + return; + } + raw_inode = ((struct ext2_inode *) bh->b_data) + + (((inode->i_ino - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) % + EXT2_INODES_PER_BLOCK(inode->i_sb)); + raw_inode->i_version++; + inode->u.ext2_i.i_version = raw_inode->i_version; + mark_buffer_dirty(bh, 1); + brelse (bh); +} + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +struct inode * ext2_new_inode (const struct inode * dir, int mode) +{ + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + int i, j, avefreei; + struct inode * inode; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_group_desc * tmp; + struct ext2_super_block * es; + + if (!dir || !(inode = get_empty_inode ())) + return NULL; + sb = dir->i_sb; + inode->i_sb = sb; + inode->i_flags = sb->s_flags; + lock_super (sb); + es = sb->u.ext2_sb.s_es; +repeat: + gdp = NULL; i=0; + + if (S_ISDIR(mode)) { + avefreei = es->s_free_inodes_count / + sb->u.ext2_sb.s_groups_count; +/* I am not yet convinced that this next bit is necessary. + i = dir->u.ext2_i.i_block_group; + for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { + tmp = get_group_desc (sb, i, &bh2); + if ((tmp->bg_used_dirs_count << 8) < + tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + else + i = ++i % sb->u.ext2_sb.s_groups_count; + } +*/ + if (!gdp) { + for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { + tmp = get_group_desc (sb, j, &bh2); + if (tmp->bg_free_inodes_count && + tmp->bg_free_inodes_count >= avefreei) { + if (!gdp || + (tmp->bg_free_blocks_count > + gdp->bg_free_blocks_count)) { + i = j; + gdp = tmp; + } + } + } + } + } + else + { + /* + * Try to place the inode in it's parent directory + */ + i = dir->u.ext2_i.i_block_group; + tmp = get_group_desc (sb, i, &bh2); + if (tmp->bg_free_inodes_count) + gdp = tmp; + else + { + /* + * Use a quadratic hash to find a group with a + * free inode + */ + for (j = 1; j < sb->u.ext2_sb.s_groups_count; j <<= 1) { + i += j; + if (i >= sb->u.ext2_sb.s_groups_count) + i -= sb->u.ext2_sb.s_groups_count; + tmp = get_group_desc (sb, i, &bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + if (!gdp) { + /* + * That failed: try linear search for a free inode + */ + i = dir->u.ext2_i.i_block_group + 1; + for (j = 2; j < sb->u.ext2_sb.s_groups_count; j++) { + if (++i >= sb->u.ext2_sb.s_groups_count) + i = 0; + tmp = get_group_desc (sb, i, &bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + } + + if (!gdp) { + unlock_super (sb); + iput(inode); + return NULL; + } + bitmap_nr = load_inode_bitmap (sb, i); + bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr]; + if ((j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_INODES_PER_GROUP(sb))) < + EXT2_INODES_PER_GROUP(sb)) { + if (set_bit (j, bh->b_data)) { + ext2_warning (sb, "ext2_new_inode", + "bit already set for inode %d", j); + goto repeat; + } + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNC) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + } else { + if (gdp->bg_free_inodes_count != 0) { + ext2_error (sb, "ext2_new_inode", + "Free inodes count corrupted in group %d", + i); + unlock_super (sb); + iput (inode); + return NULL; + } + goto repeat; + } + j += i * EXT2_INODES_PER_GROUP(sb) + 1; + if (j < EXT2_FIRST_INO || j > es->s_inodes_count) { + ext2_error (sb, "ext2_new_inode", + "reserved inode or inode > inodes count - " + "block_group = %d,inode=%d", i, j); + unlock_super (sb); + iput (inode); + return NULL; + } + gdp->bg_free_inodes_count--; + if (S_ISDIR(mode)) + gdp->bg_used_dirs_count++; + mark_buffer_dirty(bh2, 1); + es->s_free_inodes_count--; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + inode->i_mode = mode; + inode->i_sb = sb; + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_dev = sb->s_dev; + inode->i_uid = current->fsuid; + if (test_opt (sb, GRPID)) + inode->i_gid = dir->i_gid; + else if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + inode->i_dirt = 1; + inode->i_ino = j; + inode->i_blksize = sb->s_blocksize; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags; + if (S_ISLNK(mode)) + inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL | EXT2_APPEND_FL); + inode->u.ext2_i.i_faddr = 0; + inode->u.ext2_i.i_frag_no = 0; + inode->u.ext2_i.i_frag_size = 0; + inode->u.ext2_i.i_file_acl = 0; + inode->u.ext2_i.i_dir_acl = 0; + inode->u.ext2_i.i_dtime = 0; + inode->u.ext2_i.i_block_group = i; + inode->i_op = NULL; + if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) + inode->i_flags |= MS_SYNC; + insert_inode_hash(inode); + inc_inode_version (inode, gdp, mode); + + ext2_debug ("allocating inode %lu\n", inode->i_ino); + + unlock_super (sb); + return inode; +} + +unsigned long ext2_count_free_inodes (struct super_block * sb) +{ +#ifdef EXT2FS_DEBUG + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (sb, i); + x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + printk ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", + es->s_free_inodes_count, desc_count, bitmap_count); + unlock_super (sb); + return desc_count; +#else + return sb->u.ext2_sb.s_es->s_free_inodes_count; +#endif +} + +void ext2_check_inodes_bitmap (struct super_block * sb) +{ + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (sb, i); + x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + if (gdp->bg_free_inodes_count != x) + ext2_error (sb, "ext2_check_inodes_bitmap", + "Wrong free inodes count in group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + if (es->s_free_inodes_count != bitmap_count) + ext2_error (sb, "ext2_check_inodes_bitmap", + "Wrong free inodes count in super block, " + "stored = %lu, counted = %lu", + es->s_free_inodes_count, bitmap_count); + unlock_super (sb); +} diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c new file mode 100644 index 000000000..633c33e4f --- /dev/null +++ b/fs/ext2/inode.c @@ -0,0 +1,667 @@ +/* + * linux/fs/ext2/inode.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +void ext2_put_inode (struct inode * inode) +{ + ext2_discard_prealloc (inode); + if (inode->i_nlink || inode->i_ino == EXT2_ACL_IDX_INO || + inode->i_ino == EXT2_ACL_DATA_INO) + return; + inode->i_size = 0; + if (inode->i_blocks) + ext2_truncate (inode); + ext2_free_inode (inode); +} + +#define inode_bmap(inode, nr) ((inode)->u.ext2_i.i_data[(nr)]) + +static int block_bmap (struct buffer_head * bh, int nr) +{ + int tmp; + + if (!bh) + return 0; + tmp = ((unsigned long *) bh->b_data)[nr]; + brelse (bh); + return tmp; +} + +/* + * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the + * superblock in the same manner as are ext2_free_blocks and + * ext2_new_block. We just wait on the super rather than locking it + * here, since ext2_new_block will do the necessary locking and we + * can't block until then. + */ +void ext2_discard_prealloc (struct inode * inode) +{ +#ifdef EXT2_PREALLOCATE + if (inode->u.ext2_i.i_prealloc_count) { + int i = inode->u.ext2_i.i_prealloc_count; + inode->u.ext2_i.i_prealloc_count = 0; + ext2_free_blocks (inode->i_sb, + inode->u.ext2_i.i_prealloc_block, + i); + } +#endif +} + +static int ext2_alloc_block (struct inode * inode, unsigned long goal) +{ +#ifdef EXT2FS_DEBUG + static unsigned long alloc_hits = 0, alloc_attempts = 0; +#endif + unsigned long result; + struct buffer_head * bh; + + wait_on_super (inode->i_sb); + +#ifdef EXT2_PREALLOCATE + if (inode->u.ext2_i.i_prealloc_count && + (goal == inode->u.ext2_i.i_prealloc_block || + goal + 1 == inode->u.ext2_i.i_prealloc_block)) + { + result = inode->u.ext2_i.i_prealloc_block++; + inode->u.ext2_i.i_prealloc_count--; + ext2_debug ("preallocation hit (%lu/%lu).\n", + ++alloc_hits, ++alloc_attempts); + + /* It doesn't matter if we block in getblk() since + we have already atomically allocated the block, and + are only clearing it now. */ + if (!(bh = getblk (inode->i_sb->s_dev, result, + inode->i_sb->s_blocksize))) { + ext2_error (inode->i_sb, "ext2_alloc_block", + "cannot get block %lu", result); + return 0; + } + memset(bh->b_data, 0, inode->i_sb->s_blocksize); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse (bh); + } else { + ext2_discard_prealloc (inode); + ext2_debug ("preallocation miss (%lu/%lu).\n", + alloc_hits, ++alloc_attempts); + if (S_ISREG(inode->i_mode)) + result = ext2_new_block + (inode->i_sb, goal, + &inode->u.ext2_i.i_prealloc_count, + &inode->u.ext2_i.i_prealloc_block); + else + result = ext2_new_block (inode->i_sb, goal, 0, 0); + } +#else + result = ext2_new_block (inode->i_sb, goal, 0, 0); +#endif + + return result; +} + + +int ext2_bmap (struct inode * inode, int block) +{ + int i; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + + if (block < 0) { + ext2_warning (inode->i_sb, "ext2_bmap", "block < 0"); + return 0; + } + if (block >= EXT2_NDIR_BLOCKS + addr_per_block + + addr_per_block * addr_per_block + + addr_per_block * addr_per_block * addr_per_block) { + ext2_warning (inode->i_sb, "ext2_bmap", "block > big"); + return 0; + } + if (block < EXT2_NDIR_BLOCKS) + return inode_bmap (inode, block); + block -= EXT2_NDIR_BLOCKS; + if (block < addr_per_block) { + i = inode_bmap (inode, EXT2_IND_BLOCK); + if (!i) + return 0; + return block_bmap (bread (inode->i_dev, i, + inode->i_sb->s_blocksize), block); + } + block -= addr_per_block; + if (block < addr_per_block * addr_per_block) { + i = inode_bmap (inode, EXT2_DIND_BLOCK); + if (!i) + return 0; + i = block_bmap (bread (inode->i_dev, i, + inode->i_sb->s_blocksize), + block / addr_per_block); + if (!i) + return 0; + return block_bmap (bread (inode->i_dev, i, + inode->i_sb->s_blocksize), + block & (addr_per_block - 1)); + } + block -= addr_per_block * addr_per_block; + i = inode_bmap (inode, EXT2_TIND_BLOCK); + if (!i) + return 0; + i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), + block / (addr_per_block * addr_per_block)); + if (!i) + return 0; + i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), + (block / addr_per_block) & (addr_per_block - 1)); + if (!i) + return 0; + return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), + block & (addr_per_block - 1)); +} + +static struct buffer_head * inode_getblk (struct inode * inode, int nr, + int create, int new_block, int * err) +{ + int tmp, goal = 0; + unsigned long * p; + struct buffer_head * result; + int blocks = inode->i_sb->s_blocksize / 512; + + p = inode->u.ext2_i.i_data + nr; +repeat: + tmp = *p; + if (tmp) { + result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize); + if (tmp == *p) + return result; + brelse (result); + goto repeat; + } + if (!create || new_block >= + (current->rlim[RLIMIT_FSIZE].rlim_cur >> + EXT2_BLOCK_SIZE_BITS(inode->i_sb))) { + *err = -EFBIG; + return NULL; + } + if (inode->u.ext2_i.i_next_alloc_block == new_block) + goal = inode->u.ext2_i.i_next_alloc_goal; + + ext2_debug ("hint = %d,", goal); + + if (!goal) { + for (tmp = nr - 1; tmp >= 0; tmp--) { + if (inode->u.ext2_i.i_data[tmp]) { + goal = inode->u.ext2_i.i_data[tmp]; + break; + } + } + if (!goal) + goal = (inode->u.ext2_i.i_block_group * + EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + + inode->i_sb->u.ext2_sb.s_es->s_first_data_block; + } + + ext2_debug ("goal = %d.\n", goal); + + tmp = ext2_alloc_block (inode, goal); + if (!tmp) + return NULL; + result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize); + if (*p) { + ext2_free_blocks (inode->i_sb, tmp, 1); + brelse (result); + goto repeat; + } + *p = tmp; + inode->u.ext2_i.i_next_alloc_block = new_block; + inode->u.ext2_i.i_next_alloc_goal = tmp; + inode->i_ctime = CURRENT_TIME; + inode->i_blocks += blocks; + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) + ext2_sync_inode (inode); + else + inode->i_dirt = 1; + return result; +} + +static struct buffer_head * block_getblk (struct inode * inode, + struct buffer_head * bh, int nr, + int create, int blocksize, + int new_block, int * err) +{ + int tmp, goal = 0; + unsigned long * p; + struct buffer_head * result; + int blocks = inode->i_sb->s_blocksize / 512; + + if (!bh) + return NULL; + if (!bh->b_uptodate) { + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (!bh->b_uptodate) { + brelse (bh); + return NULL; + } + } + p = (unsigned long *) bh->b_data + nr; +repeat: + tmp = *p; + if (tmp) { + result = getblk (bh->b_dev, tmp, blocksize); + if (tmp == *p) { + brelse (bh); + return result; + } + brelse (result); + goto repeat; + } + if (!create || new_block >= + (current->rlim[RLIMIT_FSIZE].rlim_cur >> + EXT2_BLOCK_SIZE_BITS(inode->i_sb))) { + brelse (bh); + *err = -EFBIG; + return NULL; + } + if (inode->u.ext2_i.i_next_alloc_block == new_block) + goal = inode->u.ext2_i.i_next_alloc_goal; + if (!goal) { + for (tmp = nr - 1; tmp >= 0; tmp--) { + if (((unsigned long *) bh->b_data)[tmp]) { + goal = ((unsigned long *)bh->b_data)[tmp]; + break; + } + } + if (!goal) + goal = bh->b_blocknr; + } + tmp = ext2_alloc_block (inode, goal); + if (!tmp) { + brelse (bh); + return NULL; + } + result = getblk (bh->b_dev, tmp, blocksize); + if (*p) { + ext2_free_blocks (inode->i_sb, tmp, 1); + brelse (result); + goto repeat; + } + *p = tmp; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + inode->i_ctime = CURRENT_TIME; + inode->i_blocks += blocks; + inode->i_dirt = 1; + inode->u.ext2_i.i_next_alloc_block = new_block; + inode->u.ext2_i.i_next_alloc_goal = tmp; + brelse (bh); + return result; +} + +static int block_getcluster (struct inode * inode, struct buffer_head * bh, + int nr, + int blocksize) +{ + unsigned long * p; + int firstblock = 0; + int result = 0; + int i; + + /* Check to see if clustering possible here. */ + + if(!bh) return 0; + + if(nr % (PAGE_SIZE / inode->i_sb->s_blocksize) != 0) goto out; + if(nr + 3 > EXT2_ADDR_PER_BLOCK(inode->i_sb)) goto out; + + for(i=0; i< (PAGE_SIZE / inode->i_sb->s_blocksize); i++) { + p = (unsigned long *) bh->b_data + nr + i; + + /* All blocks in cluster must already be allocated */ + if(*p == 0) goto out; + + /* See if aligned correctly */ + if(i==0) firstblock = *p; + else if(*p != firstblock + i) goto out; + }; + + p = (unsigned long *) bh->b_data + nr; + result = generate_cluster(bh->b_dev, (int *) p, blocksize); + + out: + brelse(bh); + return result; +} + +struct buffer_head * ext2_getblk (struct inode * inode, long block, + int create, int * err) +{ + struct buffer_head * bh; + unsigned long b; + unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + + *err = -EIO; + if (block < 0) { + ext2_warning (inode->i_sb, "ext2_getblk", "block < 0"); + return NULL; + } + if (block > EXT2_NDIR_BLOCKS + addr_per_block + + addr_per_block * addr_per_block + + addr_per_block * addr_per_block * addr_per_block) { + ext2_warning (inode->i_sb, "ext2_getblk", "block > big"); + return NULL; + } + /* + * If this is a sequential block allocation, set the next_alloc_block + * to this block now so that all the indblock and data block + * allocations use the same goal zone + */ + + ext2_debug ("block %lu, next %lu, goal %lu.\n", block, + inode->u.ext2_i.i_next_alloc_block, + inode->u.ext2_i.i_next_alloc_goal); + + if (block == inode->u.ext2_i.i_next_alloc_block + 1) { + inode->u.ext2_i.i_next_alloc_block++; + inode->u.ext2_i.i_next_alloc_goal++; + } + + *err = -ENOSPC; + b = block; + if (block < EXT2_NDIR_BLOCKS) + return inode_getblk (inode, block, create, b, err); + block -= EXT2_NDIR_BLOCKS; + if (block < addr_per_block) { + bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err); + return block_getblk (inode, bh, block, create, + inode->i_sb->s_blocksize, b, err); + } + block -= addr_per_block; + if (block < addr_per_block * addr_per_block) { + bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err); + bh = block_getblk (inode, bh, block / addr_per_block, create, + inode->i_sb->s_blocksize, b, err); + return block_getblk (inode, bh, block & (addr_per_block - 1), + create, inode->i_sb->s_blocksize, b, err); + } + block -= addr_per_block * addr_per_block; + bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err); + bh = block_getblk (inode, bh, block/(addr_per_block * addr_per_block), + create, inode->i_sb->s_blocksize, b, err); + bh = block_getblk (inode, bh, (block/addr_per_block) & (addr_per_block - 1), + create, inode->i_sb->s_blocksize, b, err); + return block_getblk (inode, bh, block & (addr_per_block - 1), create, + inode->i_sb->s_blocksize, b, err); +} + +int ext2_getcluster (struct inode * inode, long block) +{ + struct buffer_head * bh; + int err, create; + unsigned long b; + unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + + create = 0; + err = -EIO; + if (block < 0) { + ext2_warning (inode->i_sb, "ext2_getblk", "block < 0"); + return 0; + } + if (block > EXT2_NDIR_BLOCKS + addr_per_block + + addr_per_block * addr_per_block + + addr_per_block * addr_per_block * addr_per_block) { + ext2_warning (inode->i_sb, "ext2_getblk", "block > big"); + return 0; + } + + err = -ENOSPC; + b = block; + if (block < EXT2_NDIR_BLOCKS) return 0; + + block -= EXT2_NDIR_BLOCKS; + + if (block < addr_per_block) { + bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, &err); + return block_getcluster (inode, bh, block, + inode->i_sb->s_blocksize); + } + block -= addr_per_block; + if (block < addr_per_block * addr_per_block) { + bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, &err); + bh = block_getblk (inode, bh, block / addr_per_block, create, + inode->i_sb->s_blocksize, b, &err); + return block_getcluster (inode, bh, block & (addr_per_block - 1), + inode->i_sb->s_blocksize); + } + block -= addr_per_block * addr_per_block; + bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, &err); + bh = block_getblk (inode, bh, block/(addr_per_block * addr_per_block), + create, inode->i_sb->s_blocksize, b, &err); + bh = block_getblk (inode, bh, (block/addr_per_block) & (addr_per_block - 1), + create, inode->i_sb->s_blocksize, b, &err); + return block_getcluster (inode, bh, block & (addr_per_block - 1), + inode->i_sb->s_blocksize); +} + +struct buffer_head * ext2_bread (struct inode * inode, int block, + int create, int *err) +{ + struct buffer_head * bh; + + bh = ext2_getblk (inode, block, create, err); + if (!bh || bh->b_uptodate) + return bh; + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (bh->b_uptodate) + return bh; + brelse (bh); + *err = -EIO; + return NULL; +} + +void ext2_read_inode (struct inode * inode) +{ + struct buffer_head * bh; + struct ext2_inode * raw_inode; + unsigned long block_group; + unsigned long group_desc; + unsigned long desc; + unsigned long block; + struct ext2_group_desc * gdp; + + if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && + inode->i_ino != EXT2_ACL_DATA_INO && inode->i_ino < EXT2_FIRST_INO) || + inode->i_ino > inode->i_sb->u.ext2_sb.s_es->s_inodes_count) { + ext2_error (inode->i_sb, "ext2_read_inode", + "bad inode number: %lu", inode->i_ino); + return; + } + block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); + if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) + ext2_panic (inode->i_sb, "ext2_read_inode", + "group >= groups count"); + group_desc = block_group / EXT2_DESC_PER_BLOCK(inode->i_sb); + desc = block_group % EXT2_DESC_PER_BLOCK(inode->i_sb); + bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc]; + if (!bh) + ext2_panic (inode->i_sb, "ext2_read_inode", + "Descriptor not loaded"); + gdp = (struct ext2_group_desc *) bh->b_data; + block = gdp[desc].bg_inode_table + + (((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) + / EXT2_INODES_PER_BLOCK(inode->i_sb)); + if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) + ext2_panic (inode->i_sb, "ext2_read_inode", + "unable to read i-node block - " + "inode=%lu, block=%lu", inode->i_ino, block); + raw_inode = ((struct ext2_inode *) bh->b_data) + + (inode->i_ino - 1) % EXT2_INODES_PER_BLOCK(inode->i_sb); + inode->i_mode = raw_inode->i_mode; + inode->i_uid = raw_inode->i_uid; + inode->i_gid = raw_inode->i_gid; + inode->i_nlink = raw_inode->i_links_count; + inode->i_size = raw_inode->i_size; + inode->i_atime = raw_inode->i_atime; + inode->i_ctime = raw_inode->i_ctime; + inode->i_mtime = raw_inode->i_mtime; + inode->u.ext2_i.i_dtime = raw_inode->i_dtime; + inode->i_blksize = inode->i_sb->s_blocksize; + inode->i_blocks = raw_inode->i_blocks; + inode->i_version = ++event; + inode->u.ext2_i.i_flags = raw_inode->i_flags; + inode->u.ext2_i.i_faddr = raw_inode->i_faddr; + inode->u.ext2_i.i_frag_no = raw_inode->i_frag; + inode->u.ext2_i.i_frag_size = raw_inode->i_fsize; + inode->u.ext2_i.i_osync = 0; + inode->u.ext2_i.i_file_acl = raw_inode->i_file_acl; + inode->u.ext2_i.i_dir_acl = raw_inode->i_dir_acl; + inode->u.ext2_i.i_version = raw_inode->i_version; + inode->u.ext2_i.i_block_group = block_group; + inode->u.ext2_i.i_next_alloc_block = 0; + inode->u.ext2_i.i_next_alloc_goal = 0; + if (inode->u.ext2_i.i_prealloc_count) + ext2_error (inode->i_sb, "ext2_read_inode", + "New inode has non-zero prealloc count!"); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = raw_inode->i_block[0]; + else for (block = 0; block < EXT2_N_BLOCKS; block++) + inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; + brelse (bh); + inode->i_op = NULL; + if (inode->i_ino == EXT2_ACL_IDX_INO || + inode->i_ino == EXT2_ACL_DATA_INO) + /* Nothing to do */ ; + else if (S_ISREG(inode->i_mode)) + inode->i_op = &ext2_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &ext2_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &ext2_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) + inode->i_flags |= MS_SYNC; + if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) + inode->i_flags |= S_APPEND; + if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; +} + +static struct buffer_head * ext2_update_inode (struct inode * inode) +{ + struct buffer_head * bh; + struct ext2_inode * raw_inode; + unsigned long block_group; + unsigned long group_desc; + unsigned long desc; + unsigned long block; + struct ext2_group_desc * gdp; + + if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino < EXT2_FIRST_INO) || + inode->i_ino > inode->i_sb->u.ext2_sb.s_es->s_inodes_count) { + ext2_error (inode->i_sb, "ext2_write_inode", + "bad inode number: %lu", inode->i_ino); + return 0; + } + block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); + if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) + ext2_panic (inode->i_sb, "ext2_write_inode", + "group >= groups count"); + group_desc = block_group / EXT2_DESC_PER_BLOCK(inode->i_sb); + desc = block_group % EXT2_DESC_PER_BLOCK(inode->i_sb); + bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc]; + if (!bh) + ext2_panic (inode->i_sb, "ext2_write_inode", + "Descriptor not loaded"); + gdp = (struct ext2_group_desc *) bh->b_data; + block = gdp[desc].bg_inode_table + + (((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) + / EXT2_INODES_PER_BLOCK(inode->i_sb)); + if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) + ext2_panic (inode->i_sb, "ext2_write_inode", + "unable to read i-node block - " + "inode=%lu, block=%lu", inode->i_ino, block); + raw_inode = ((struct ext2_inode *)bh->b_data) + + (inode->i_ino - 1) % EXT2_INODES_PER_BLOCK(inode->i_sb); + raw_inode->i_mode = inode->i_mode; + raw_inode->i_uid = inode->i_uid; + raw_inode->i_gid = inode->i_gid; + raw_inode->i_links_count = inode->i_nlink; + raw_inode->i_size = inode->i_size; + raw_inode->i_atime = inode->i_atime; + raw_inode->i_ctime = inode->i_ctime; + raw_inode->i_mtime = inode->i_mtime; + raw_inode->i_blocks = inode->i_blocks; + raw_inode->i_dtime = inode->u.ext2_i.i_dtime; + raw_inode->i_flags = inode->u.ext2_i.i_flags; + raw_inode->i_faddr = inode->u.ext2_i.i_faddr; + raw_inode->i_frag = inode->u.ext2_i.i_frag_no; + raw_inode->i_fsize = inode->u.ext2_i.i_frag_size; + raw_inode->i_file_acl = inode->u.ext2_i.i_file_acl; + raw_inode->i_dir_acl = inode->u.ext2_i.i_dir_acl; + raw_inode->i_version = inode->u.ext2_i.i_version; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_block[0] = inode->i_rdev; + else for (block = 0; block < EXT2_N_BLOCKS; block++) + raw_inode->i_block[block] = inode->u.ext2_i.i_data[block]; + mark_buffer_dirty(bh, 1); + inode->i_dirt = 0; + return bh; +} + +void ext2_write_inode (struct inode * inode) +{ + struct buffer_head * bh; + bh = ext2_update_inode (inode); + brelse (bh); +} + +int ext2_sync_inode (struct inode *inode) +{ + int err = 0; + struct buffer_head *bh; + + bh = ext2_update_inode (inode); + if (bh && bh->b_dirt) + { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + if (bh->b_req && !bh->b_uptodate) + { + printk ("IO error syncing ext2 inode [%04x:%08lx]\n", + inode->i_dev, inode->i_ino); + err = -1; + } + } + else if (!bh) + err = -1; + brelse (bh); + return err; +} diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c new file mode 100644 index 000000000..447968ef0 --- /dev/null +++ b/fs/ext2/ioctl.c @@ -0,0 +1,75 @@ +/* + * linux/fs/ext2/ioctl.c + * + * Copyright (C) 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/ioctl.h> +#include <linux/sched.h> + +int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, + unsigned long arg) +{ + int err; + unsigned long flags; + + ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { + case EXT2_IOC_GETFLAGS: + if ((err = verify_area (VERIFY_WRITE, (long *) arg, sizeof(long)))) + return err; + put_fs_long (inode->u.ext2_i.i_flags, (long *) arg); + return 0; + case EXT2_IOC_SETFLAGS: + flags = get_fs_long ((long *) arg); + /* + * Only the super-user can change the IMMUTABLE flag + */ + if ((flags & EXT2_IMMUTABLE_FL) ^ + (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL)) { + /* This test looks nicer. Thanks to Pauline Middelink */ + if (!fsuser()) + return -EPERM; + } else + if ((current->fsuid != inode->i_uid) && !fsuser()) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + inode->u.ext2_i.i_flags = flags; + if (flags & EXT2_APPEND_FL) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + return 0; + case EXT2_IOC_GETVERSION: + if ((err = verify_area (VERIFY_WRITE, (long *) arg, sizeof(long)))) + return err; + put_fs_long (inode->u.ext2_i.i_version, (long *) arg); + return 0; + case EXT2_IOC_SETVERSION: + if ((current->fsuid != inode->i_uid) && !fsuser()) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + inode->u.ext2_i.i_version = get_fs_long ((long *) arg); + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + return 0; + default: + return -EINVAL; + } +} diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c new file mode 100644 index 000000000..f56c5404e --- /dev/null +++ b/fs/ext2/namei.c @@ -0,0 +1,1098 @@ +/* + * linux/fs/ext2/namei.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/fcntl.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +/* + * comment out this line if you want names > EXT2_NAME_LEN chars to be + * truncated. Else they will be disallowed. + */ +/* #define NO_TRUNCATE */ + +/* + * define how far ahead to read directories while searching them. + */ +#define NAMEI_RA_CHUNKS 2 +#define NAMEI_RA_BLOCKS 4 +#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) +#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + */ +static int ext2_match (int len, const char * const name, + struct ext2_dir_entry * de) +{ + if (!de || !de->inode || len > EXT2_NAME_LEN) + return 0; + /* + * "" means "." ---> so paths like "/usr/lib//libc.a" work + */ + if (!len && de->name_len == 1 && (de->name[0] == '.') && + (de->name[1] == '\0')) + return 1; + if (len != de->name_len) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * ext2_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +static struct buffer_head * ext2_find_entry (struct inode * dir, + const char * const name, int namelen, + struct ext2_dir_entry ** res_dir) +{ + struct super_block * sb; + struct buffer_head * bh_use[NAMEI_RA_SIZE]; + struct buffer_head * bh_read[NAMEI_RA_SIZE]; + unsigned long offset; + int block, toread, i, err; + + *res_dir = NULL; + if (!dir) + return NULL; + sb = dir->i_sb; + +#ifdef NO_TRUNCATE + if (namelen > EXT2_NAME_LEN) + return NULL; +#else + if (namelen > EXT2_NAME_LEN) + namelen = EXT2_NAME_LEN; +#endif + + memset (bh_use, 0, sizeof (bh_use)); + toread = 0; + for (block = 0; block < NAMEI_RA_SIZE; ++block) { + struct buffer_head * bh; + + if ((block << EXT2_BLOCK_SIZE_BITS (sb)) >= dir->i_size) + break; + bh = ext2_getblk (dir, block, 0, &err); + bh_use[block] = bh; + if (bh && !bh->b_uptodate) + bh_read[toread++] = bh; + } + + block = 0; + offset = 0; + while (offset < dir->i_size) { + struct buffer_head * bh; + struct ext2_dir_entry * de; + char * dlimit; + + if ((block % NAMEI_RA_BLOCKS) == 0 && toread) { + ll_rw_block (READ, toread, bh_read); + toread = 0; + } + bh = bh_use[block % NAMEI_RA_SIZE]; + if (!bh) + ext2_panic (sb, "ext2_find_entry", + "buffer head pointer is NULL"); + wait_on_buffer (bh); + if (!bh->b_uptodate) { + /* + * read error: all bets are off + */ + break; + } + + de = (struct ext2_dir_entry *) bh->b_data; + dlimit = bh->b_data + sb->s_blocksize; + while ((char *) de < dlimit) { + if (!ext2_check_dir_entry ("ext2_find_entry", dir, + de, bh, offset)) + goto failure; + if (de->inode != 0 && ext2_match (namelen, name, de)) { + for (i = 0; i < NAMEI_RA_SIZE; ++i) { + if (bh_use[i] != bh) + brelse (bh_use[i]); + } + *res_dir = de; + return bh; + } + offset += de->rec_len; + de = (struct ext2_dir_entry *) + ((char *) de + de->rec_len); + } + + brelse (bh); + if (((block + NAMEI_RA_SIZE) << EXT2_BLOCK_SIZE_BITS (sb)) >= + dir->i_size) + bh = NULL; + else + bh = ext2_getblk (dir, block + NAMEI_RA_SIZE, 0, &err); + bh_use[block++ % NAMEI_RA_SIZE] = bh; + if (bh && !bh->b_uptodate) + bh_read[toread++] = bh; + } + +failure: + for (i = 0; i < NAMEI_RA_SIZE; ++i) + brelse (bh_use[i]); + return NULL; +} + +int ext2_lookup (struct inode * dir, const char * name, int len, + struct inode ** result) +{ + unsigned long ino; + struct ext2_dir_entry * de; + struct buffer_head * bh; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput (dir); + return -ENOENT; + } + if (dcache_lookup(dir, name, len, &ino)) { + if (!ino) { + iput(dir); + return -ENOENT; + } + if (!(*result = iget (dir->i_sb, ino))) { + iput (dir); + return -EACCES; + } + iput (dir); + return 0; + } + ino = dir->i_version; + if (!(bh = ext2_find_entry (dir, name, len, &de))) { + if (ino == dir->i_version) + dcache_add(dir, name, len, 0); + iput (dir); + return -ENOENT; + } + ino = de->inode; + dcache_add(dir, name, len, ino); + brelse (bh); + if (!(*result = iget (dir->i_sb, ino))) { + iput (dir); + return -EACCES; + } + iput (dir); + return 0; +} + +/* + * ext2_add_entry() + * + * adds a file entry to the specified directory, using the same + * semantics as ext2_find_entry(). It returns NULL if it failed. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static struct buffer_head * ext2_add_entry (struct inode * dir, + const char * name, int namelen, + struct ext2_dir_entry ** res_dir, + int *err) +{ + unsigned long offset; + unsigned short rec_len; + struct buffer_head * bh; + struct ext2_dir_entry * de, * de1; + struct super_block * sb; + + *err = -EINVAL; + *res_dir = NULL; + if (!dir) + return NULL; + sb = dir->i_sb; +#ifdef NO_TRUNCATE + if (namelen > EXT2_NAME_LEN) + return NULL; +#else + if (namelen > EXT2_NAME_LEN) + namelen = EXT2_NAME_LEN; +#endif + if (!namelen) + return NULL; + /* + * Is this a busy deleted directory? Can't create new files if so + */ + if (dir->i_size == 0) + { + *err = -ENOENT; + return NULL; + } + bh = ext2_bread (dir, 0, 0, err); + if (!bh) + return NULL; + rec_len = EXT2_DIR_REC_LEN(namelen); + offset = 0; + de = (struct ext2_dir_entry *) bh->b_data; + *err = -ENOSPC; + while (1) { + if ((char *)de >= sb->s_blocksize + bh->b_data) { + brelse (bh); + bh = NULL; + bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, err); + if (!bh) + return NULL; + if (dir->i_size <= offset) { + if (dir->i_size == 0) { + *err = -ENOENT; + return NULL; + } + + ext2_debug ("creating next block\n"); + + de = (struct ext2_dir_entry *) bh->b_data; + de->inode = 0; + de->rec_len = sb->s_blocksize; + dir->i_size = offset + sb->s_blocksize; + dir->i_dirt = 1; + } else { + + ext2_debug ("skipping to next block\n"); + + de = (struct ext2_dir_entry *) bh->b_data; + } + } + if (!ext2_check_dir_entry ("ext2_add_entry", dir, de, bh, + offset)) { + *err = -ENOENT; + brelse (bh); + return NULL; + } + if (de->inode != 0 && ext2_match (namelen, name, de)) { + *err = -EEXIST; + brelse (bh); + return NULL; + } + if ((de->inode == 0 && de->rec_len >= rec_len) || + (de->rec_len >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) { + offset += de->rec_len; + if (de->inode) { + de1 = (struct ext2_dir_entry *) ((char *) de + + EXT2_DIR_REC_LEN(de->name_len)); + de1->rec_len = de->rec_len - + EXT2_DIR_REC_LEN(de->name_len); + de->rec_len = EXT2_DIR_REC_LEN(de->name_len); + de = de1; + } + de->inode = 0; + de->name_len = namelen; + memcpy (de->name, name, namelen); + /* + * XXX shouldn't update any times until successful + * completion of syscall, but too many callers depend + * on this. + * + * XXX similarly, too many callers depend on + * ext2_new_inode() setting the times, but error + * recovery deletes the inode, so the worst that can + * happen is that the times are slightly out of date + * and/or different from the directory change time. + */ + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + dir->i_dirt = 1; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + *res_dir = de; + *err = 0; + return bh; + } + offset += de->rec_len; + de = (struct ext2_dir_entry *) ((char *) de + de->rec_len); + } + brelse (bh); + return NULL; +} + +/* + * ext2_delete_entry deletes a directory entry by merging it with the + * previous entry + */ +static int ext2_delete_entry (struct ext2_dir_entry * dir, + struct buffer_head * bh) +{ + struct ext2_dir_entry * de, * pde; + int i; + + i = 0; + pde = NULL; + de = (struct ext2_dir_entry *) bh->b_data; + while (i < bh->b_size) { + if (!ext2_check_dir_entry ("ext2_delete_entry", NULL, + de, bh, i)) + return -EIO; + if (de == dir) { + if (pde) + pde->rec_len += dir->rec_len; + dir->inode = 0; + return 0; + } + i += de->rec_len; + pde = de; + de = (struct ext2_dir_entry *) ((char *) de + de->rec_len); + } + return -ENOENT; +} + +int ext2_create (struct inode * dir,const char * name, int len, int mode, + struct inode ** result) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry * de; + int err; + + *result = NULL; + if (!dir) + return -ENOENT; + inode = ext2_new_inode (dir, mode); + if (!inode) { + iput (dir); + return -ENOSPC; + } + inode->i_op = &ext2_file_inode_operations; + inode->i_mode = mode; + inode->i_dirt = 1; + bh = ext2_add_entry (dir, name, len, &de, &err); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + iput (dir); + return err; + } + de->inode = inode->i_ino; + dir->i_version = ++event; + dcache_add(dir, de->name, de->name_len, de->inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + iput (dir); + *result = inode; + return 0; +} + +int ext2_mknod (struct inode * dir, const char * name, int len, int mode, + int rdev) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry * de; + int err; + + if (!dir) + return -ENOENT; + bh = ext2_find_entry (dir, name, len, &de); + if (bh) { + brelse (bh); + iput (dir); + return -EEXIST; + } + inode = ext2_new_inode (dir, mode); + if (!inode) { + iput (dir); + return -ENOSPC; + } + inode->i_uid = current->fsuid; + inode->i_mode = mode; + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &ext2_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ext2_dir_inode_operations; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + } + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &ext2_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (S_ISBLK(mode) || S_ISCHR(mode)) + inode->i_rdev = rdev; + inode->i_dirt = 1; + bh = ext2_add_entry (dir, name, len, &de, &err); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + iput (dir); + return err; + } + de->inode = inode->i_ino; + dir->i_version = ++event; + dcache_add(dir, de->name, de->name_len, de->inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + iput (dir); + iput (inode); + return 0; +} + +int ext2_mkdir (struct inode * dir, const char * name, int len, int mode) +{ + struct inode * inode; + struct buffer_head * bh, * dir_block; + struct ext2_dir_entry * de; + int err; + + if (!dir) + return -ENOENT; + bh = ext2_find_entry (dir, name, len, &de); + if (bh) { + brelse (bh); + iput (dir); + return -EEXIST; + } + if (dir->i_nlink >= EXT2_LINK_MAX) { + iput (dir); + return -EMLINK; + } + inode = ext2_new_inode (dir, S_IFDIR); + if (!inode) { + iput (dir); + return -ENOSPC; + } + inode->i_op = &ext2_dir_inode_operations; + inode->i_size = inode->i_sb->s_blocksize; + dir_block = ext2_bread (inode, 0, 1, &err); + if (!dir_block) { + iput (dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + return err; + } + inode->i_blocks = inode->i_sb->s_blocksize / 512; + de = (struct ext2_dir_entry *) dir_block->b_data; + de->inode = inode->i_ino; + de->name_len = 1; + de->rec_len = EXT2_DIR_REC_LEN(de->name_len); + strcpy (de->name, "."); + de = (struct ext2_dir_entry *) ((char *) de + de->rec_len); + de->inode = dir->i_ino; + de->rec_len = inode->i_sb->s_blocksize - EXT2_DIR_REC_LEN(1); + de->name_len = 2; + strcpy (de->name, ".."); + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse (dir_block); + inode->i_mode = S_IFDIR | (mode & S_IRWXUGO & ~current->fs->umask); + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + inode->i_dirt = 1; + bh = ext2_add_entry (dir, name, len, &de, &err); + if (!bh) { + iput (dir); + inode->i_nlink = 0; + inode->i_dirt = 1; + iput (inode); + return err; + } + de->inode = inode->i_ino; + dir->i_version = ++event; + dcache_add(dir, de->name, de->name_len, de->inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + dir->i_nlink++; + dir->i_dirt = 1; + iput (dir); + iput (inode); + brelse (bh); + return 0; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir (struct inode * inode) +{ + unsigned long offset; + struct buffer_head * bh; + struct ext2_dir_entry * de, * de1; + struct super_block * sb; + int err; + + sb = inode->i_sb; + if (inode->i_size < EXT2_DIR_REC_LEN(1) + EXT2_DIR_REC_LEN(2) || + !(bh = ext2_bread (inode, 0, 0, &err))) { + ext2_warning (inode->i_sb, "empty_dir", + "bad directory (dir %lu)", inode->i_ino); + return 1; + } + de = (struct ext2_dir_entry *) bh->b_data; + de1 = (struct ext2_dir_entry *) ((char *) de + de->rec_len); + if (de->inode != inode->i_ino || !de1->inode || + strcmp (".", de->name) || strcmp ("..", de1->name)) { + ext2_warning (inode->i_sb, "empty_dir", + "bad directory (dir %lu)", inode->i_ino); + return 1; + } + offset = de->rec_len + de1->rec_len; + de = (struct ext2_dir_entry *) ((char *) de1 + de1->rec_len); + while (offset < inode->i_size ) { + if ((void *) de >= (void *) (bh->b_data + sb->s_blocksize)) { + brelse (bh); + bh = ext2_bread (inode, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, &err); + if (!bh) { + offset += sb->s_blocksize; + continue; + } + de = (struct ext2_dir_entry *) bh->b_data; + } + if (!ext2_check_dir_entry ("empty_dir", inode, de, bh, + offset)) { + brelse (bh); + return 1; + } + if (de->inode) { + brelse (bh); + return 0; + } + offset += de->rec_len; + de = (struct ext2_dir_entry *) ((char *) de + de->rec_len); + } + brelse (bh); + return 1; +} + +int ext2_rmdir (struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry * de; + +repeat: + if (!dir) + return -ENOENT; + inode = NULL; + bh = ext2_find_entry (dir, name, len, &de); + retval = -ENOENT; + if (!bh) + goto end_rmdir; + retval = -EPERM; + if (!(inode = iget (dir->i_sb, de->inode))) + goto end_rmdir; + if (inode->i_dev != dir->i_dev) + goto end_rmdir; + if (de->inode != inode->i_ino) { + iput(inode); + brelse(bh); + current->counter = 0; + schedule(); + goto repeat; + } + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_rmdir; + if (inode == dir) /* we may not delete ".", but "../dir" is ok */ + goto end_rmdir; + if (!S_ISDIR(inode->i_mode)) { + retval = -ENOTDIR; + goto end_rmdir; + } + down(&inode->i_sem); + if (!empty_dir (inode)) + retval = -ENOTEMPTY; + else if (de->inode != inode->i_ino) + retval = -ENOENT; + else { + if (inode->i_count > 1) { + /* + * Are we deleting the last instance of a busy directory? + * Better clean up if so. + * + * Make directory empty (it will be truncated when finally + * dereferenced). This also inhibits ext2_add_entry. + */ + inode->i_size = 0; + } + retval = ext2_delete_entry (de, bh); + dir->i_version = ++event; + } + up(&inode->i_sem); + if (retval) + goto end_rmdir; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + if (inode->i_nlink != 2) + ext2_warning (inode->i_sb, "ext2_rmdir", + "empty directory has nlink!=2 (%d)", + inode->i_nlink); + inode->i_version = ++event; + inode->i_nlink = 0; + inode->i_dirt = 1; + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; +end_rmdir: + iput (dir); + iput (inode); + brelse (bh); + return retval; +} + +int ext2_unlink (struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry * de; + +repeat: + if (!dir) + return -ENOENT; + retval = -ENOENT; + inode = NULL; + bh = ext2_find_entry (dir, name, len, &de); + if (!bh) + goto end_unlink; + if (!(inode = iget (dir->i_sb, de->inode))) + goto end_unlink; + retval = -EPERM; + if (S_ISDIR(inode->i_mode)) + goto end_unlink; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + goto end_unlink; + if (de->inode != inode->i_ino) { + iput(inode); + brelse(bh); + current->counter = 0; + schedule(); + goto repeat; + } + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_unlink; + if (!inode->i_nlink) { + ext2_warning (inode->i_sb, "ext2_unlink", + "Deleting nonexistent file (%lu), %d", + inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + retval = ext2_delete_entry (de, bh); + if (retval) + goto end_unlink; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + inode->i_nlink--; + inode->i_dirt = 1; + inode->i_ctime = dir->i_ctime; + retval = 0; +end_unlink: + brelse (bh); + iput (inode); + iput (dir); + return retval; +} + +int ext2_symlink (struct inode * dir, const char * name, int len, + const char * symname) +{ + struct ext2_dir_entry * de; + struct inode * inode = NULL; + struct buffer_head * bh = NULL, * name_block = NULL; + char * link; + int i, err; + int l; + char c; + + if (!(inode = ext2_new_inode (dir, S_IFLNK))) { + iput (dir); + return -ENOSPC; + } + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_op = &ext2_symlink_inode_operations; + for (l = 0; l < inode->i_sb->s_blocksize - 1 && + symname [l]; l++) + ; + if (l >= EXT2_N_BLOCKS * sizeof (unsigned long)) { + + ext2_debug ("l=%d, normal symlink\n", l); + + name_block = ext2_bread (inode, 0, 1, &err); + if (!name_block) { + iput (dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + return err; + } + link = name_block->b_data; + } else { + link = (char *) inode->u.ext2_i.i_data; + + ext2_debug ("l=%d, fast symlink\n", l); + + } + i = 0; + while (i < inode->i_sb->s_blocksize - 1 && (c = *(symname++))) + link[i++] = c; + link[i] = 0; + if (name_block) { + mark_buffer_dirty(name_block, 1); + brelse (name_block); + } + inode->i_size = i; + inode->i_dirt = 1; + bh = ext2_find_entry (dir, name, len, &de); + if (bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + brelse (bh); + iput (dir); + return -EEXIST; + } + bh = ext2_add_entry (dir, name, len, &de, &err); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput (inode); + iput (dir); + return err; + } + de->inode = inode->i_ino; + dir->i_version = ++event; + dcache_add(dir, de->name, de->name_len, de->inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + iput (dir); + iput (inode); + return 0; +} + +int ext2_link (struct inode * oldinode, struct inode * dir, + const char * name, int len) +{ + struct ext2_dir_entry * de; + struct buffer_head * bh; + int err; + + if (S_ISDIR(oldinode->i_mode)) { + iput (oldinode); + iput (dir); + return -EPERM; + } + if (IS_APPEND(oldinode) || IS_IMMUTABLE(oldinode)) { + iput (oldinode); + iput (dir); + return -EPERM; + } + if (oldinode->i_nlink >= EXT2_LINK_MAX) { + iput (oldinode); + iput (dir); + return -EMLINK; + } + bh = ext2_find_entry (dir, name, len, &de); + if (bh) { + brelse (bh); + iput (dir); + iput (oldinode); + return -EEXIST; + } + bh = ext2_add_entry (dir, name, len, &de, &err); + if (!bh) { + iput (dir); + iput (oldinode); + return err; + } + de->inode = oldinode->i_ino; + dir->i_version = ++event; + dcache_add(dir, de->name, de->name_len, de->inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + iput (dir); + oldinode->i_nlink++; + oldinode->i_ctime = CURRENT_TIME; + oldinode->i_dirt = 1; + iput (oldinode); + return 0; +} + +static int subdir (struct inode * new_inode, struct inode * old_inode) +{ + int ino; + int result; + + new_inode->i_count++; + result = 0; + for (;;) { + if (new_inode == old_inode) { + result = 1; + break; + } + if (new_inode->i_dev != old_inode->i_dev) + break; + ino = new_inode->i_ino; + if (ext2_lookup (new_inode, "..", 2, &new_inode)) + break; + if (new_inode->i_ino == ino) + break; + } + iput (new_inode); + return result; +} + +#define PARENT_INO(buffer) \ + ((struct ext2_dir_entry *) ((char *) buffer + \ + ((struct ext2_dir_entry *) buffer)->rec_len))->inode + +#define PARENT_NAME(buffer) \ + ((struct ext2_dir_entry *) ((char *) buffer + \ + ((struct ext2_dir_entry *) buffer)->rec_len))->name + +/* + * rename uses retrying to avoid race-conditions: at least they should be + * minimal. + * it tries to allocate all the blocks, then sanity-checks, and if the sanity- + * checks fail, it tries to restart itself again. Very practical - no changes + * are done until we know everything works ok.. and then all the changes can be + * done in one fell swoop when we have claimed all the buffers needed. + * + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int do_ext2_rename (struct inode * old_dir, const char * old_name, + int old_len, struct inode * new_dir, + const char * new_name, int new_len) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext2_dir_entry * old_de, * new_de; + int retval; + + goto start_up; +try_again: + if (new_bh && new_de) { + ext2_delete_entry(new_de, new_bh); + new_dir->i_version = ++event; + } + brelse (old_bh); + brelse (new_bh); + brelse (dir_bh); + iput (old_inode); + iput (new_inode); + current->counter = 0; + schedule (); +start_up: + old_inode = new_inode = NULL; + old_bh = new_bh = dir_bh = NULL; + new_de = NULL; + old_bh = ext2_find_entry (old_dir, old_name, old_len, &old_de); + retval = -ENOENT; + if (!old_bh) + goto end_rename; + old_inode = __iget (old_dir->i_sb, old_de->inode, 0); /* don't cross mnt-points */ + if (!old_inode) + goto end_rename; + retval = -EPERM; + if ((old_dir->i_mode & S_ISVTX) && + current->fsuid != old_inode->i_uid && + current->fsuid != old_dir->i_uid && !fsuser()) + goto end_rename; + if (IS_APPEND(old_inode) || IS_IMMUTABLE(old_inode)) + goto end_rename; + new_bh = ext2_find_entry (new_dir, new_name, new_len, &new_de); + if (new_bh) { + new_inode = __iget (new_dir->i_sb, new_de->inode, 0); /* no mntp cross */ + if (!new_inode) { + brelse (new_bh); + new_bh = NULL; + } + } + if (new_inode == old_inode) { + retval = 0; + goto end_rename; + } + if (new_inode && S_ISDIR(new_inode->i_mode)) { + retval = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir (new_dir, old_inode)) + goto end_rename; + retval = -ENOTEMPTY; + if (!empty_dir (new_inode)) + goto end_rename; + retval = -EBUSY; + if (new_inode->i_count > 1) + goto end_rename; + } + retval = -EPERM; + if (new_inode && (new_dir->i_mode & S_ISVTX) && + current->fsuid != new_inode->i_uid && + current->fsuid != new_dir->i_uid && !fsuser()) + goto end_rename; + if (S_ISDIR(old_inode->i_mode)) { + retval = -ENOTDIR; + if (new_inode && !S_ISDIR(new_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir (new_dir, old_inode)) + goto end_rename; + dir_bh = ext2_bread (old_inode, 0, 0, &retval); + if (!dir_bh) + goto end_rename; + if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (!new_inode && new_dir->i_nlink >= EXT2_LINK_MAX) + goto end_rename; + } + if (!new_bh) + new_bh = ext2_add_entry (new_dir, new_name, new_len, &new_de, + &retval); + if (!new_bh) + goto end_rename; + new_dir->i_version = ++event; + /* + * sanity checking before doing the rename - avoid races + */ + if (new_inode && (new_de->inode != new_inode->i_ino)) + goto try_again; + if (new_de->inode && !new_inode) + goto try_again; + if (old_de->inode != old_inode->i_ino) + goto try_again; + /* + * ok, that's it + */ + new_de->inode = old_inode->i_ino; + dcache_add(new_dir, new_de->name, new_de->name_len, new_de->inode); + retval = ext2_delete_entry (old_de, old_bh); + if (retval == -ENOENT) + goto try_again; + if (retval) + goto end_rename; + old_dir->i_version = ++event; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + new_inode->i_dirt = 1; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; + old_dir->i_dirt = 1; + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = new_dir->i_ino; + dcache_add(old_inode, "..", 2, new_dir->i_ino); + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + old_dir->i_dirt = 1; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_dirt = 1; + } else { + new_dir->i_nlink++; + new_dir->i_dirt = 1; + } + } + mark_buffer_dirty(old_bh, 1); + if (IS_SYNC(old_dir)) { + ll_rw_block (WRITE, 1, &old_bh); + wait_on_buffer (old_bh); + } + mark_buffer_dirty(new_bh, 1); + if (IS_SYNC(new_dir)) { + ll_rw_block (WRITE, 1, &new_bh); + wait_on_buffer (new_bh); + } + retval = 0; +end_rename: + brelse (dir_bh); + brelse (old_bh); + brelse (new_bh); + iput (old_inode); + iput (new_inode); + iput (old_dir); + iput (new_dir); + return retval; +} + +/* + * Ok, rename also locks out other renames, as they can change the parent of + * a directory, and we don't want any races. Other races are checked for by + * "do_rename()", which restarts if there are inconsistencies. + * + * Note that there is no race between different filesystems: it's only within + * the same device that races occur: many renames can happen at once, as long + * as they are on different partitions. + * + * In the second extended file system, we use a lock flag stored in the memory + * super-block. This way, we really lock other renames only if they occur + * on the same file system + */ +int ext2_rename (struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + int result; + + while (old_dir->i_sb->u.ext2_sb.s_rename_lock) + sleep_on (&old_dir->i_sb->u.ext2_sb.s_rename_wait); + old_dir->i_sb->u.ext2_sb.s_rename_lock = 1; + result = do_ext2_rename (old_dir, old_name, old_len, new_dir, + new_name, new_len); + old_dir->i_sb->u.ext2_sb.s_rename_lock = 0; + wake_up (&old_dir->i_sb->u.ext2_sb.s_rename_wait); + return result; +} diff --git a/fs/ext2/super.c b/fs/ext2/super.c new file mode 100644 index 000000000..37fae41ad --- /dev/null +++ b/fs/ext2/super.c @@ -0,0 +1,755 @@ +/* + * linux/fs/ext2/super.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <stdarg.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/malloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +void ext2_error (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + char buf[1024]; + va_list args; + + if (!(sb->s_flags & MS_RDONLY)) { + sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS; + sb->u.ext2_sb.s_es->s_state |= EXT2_ERROR_FS; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + } + va_start (args, fmt); + vsprintf (buf, fmt, args); + va_end (args); + if (test_opt (sb, ERRORS_PANIC) || + (sb->u.ext2_sb.s_es->s_errors == EXT2_ERRORS_PANIC && + !test_opt (sb, ERRORS_CONT) && !test_opt (sb, ERRORS_RO))) + panic ("EXT2-fs panic (device %d/%d): %s: %s\n", + MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf); + printk (KERN_CRIT "EXT2-fs error (device %d/%d): %s: %s\n", + MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf); + if (test_opt (sb, ERRORS_RO) || + (sb->u.ext2_sb.s_es->s_errors == EXT2_ERRORS_RO && + !test_opt (sb, ERRORS_CONT) && !test_opt (sb, ERRORS_PANIC))) { + printk ("Remounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + } +} + +NORET_TYPE void ext2_panic (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + char buf[1024]; + va_list args; + + if (!(sb->s_flags & MS_RDONLY)) { + sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS; + sb->u.ext2_sb.s_es->s_state |= EXT2_ERROR_FS; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + } + va_start (args, fmt); + vsprintf (buf, fmt, args); + va_end (args); + panic ("EXT2-fs panic (device %d/%d): %s: %s\n", + MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf); +} + +void ext2_warning (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + char buf[1024]; + va_list args; + + va_start (args, fmt); + vsprintf (buf, fmt, args); + va_end (args); + printk (KERN_WARNING "EXT2-fs warning (device %d/%d): %s: %s\n", + MAJOR(sb->s_dev), MINOR(sb->s_dev), function, buf); +} + +void ext2_put_super (struct super_block * sb) +{ + int db_count; + int i; + + lock_super (sb); + if (!(sb->s_flags & MS_RDONLY)) { + sb->u.ext2_sb.s_es->s_state = sb->u.ext2_sb.s_mount_state; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + } + sb->s_dev = 0; + db_count = sb->u.ext2_sb.s_db_per_group; + for (i = 0; i < db_count; i++) + if (sb->u.ext2_sb.s_group_desc[i]) + brelse (sb->u.ext2_sb.s_group_desc[i]); + kfree_s (sb->u.ext2_sb.s_group_desc, + db_count * sizeof (struct buffer_head *)); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (sb->u.ext2_sb.s_inode_bitmap[i]) + brelse (sb->u.ext2_sb.s_inode_bitmap[i]); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (sb->u.ext2_sb.s_block_bitmap[i]) + brelse (sb->u.ext2_sb.s_block_bitmap[i]); + brelse (sb->u.ext2_sb.s_sbh); + unlock_super (sb); + return; +} + +static struct super_operations ext2_sops = { + ext2_read_inode, + NULL, + ext2_write_inode, + ext2_put_inode, + ext2_put_super, + ext2_write_super, + ext2_statfs, + ext2_remount +}; + +#ifdef EXT2FS_PRE_02B_COMPAT + +static int convert_pre_02b_fs (struct super_block * sb, + struct buffer_head * bh) +{ + struct ext2_super_block * es; + struct ext2_old_group_desc old_group_desc [BLOCK_SIZE / sizeof (struct ext2_old_group_desc)]; + struct ext2_group_desc * gdp; + struct buffer_head * bh2; + int groups_count; + int i; + + es = (struct ext2_super_block *) bh->b_data; + bh2 = bread (sb->s_dev, 2, BLOCK_SIZE); + if (!bh2) { + printk ("Cannot read descriptor blocks while converting !\n"); + return 0; + } + memcpy (old_group_desc, bh2->b_data, BLOCK_SIZE); + groups_count = (sb->u.ext2_sb.s_blocks_count - + sb->u.ext2_sb.s_first_data_block + + (EXT2_BLOCK_SIZE(sb) * 8) - 1) / + (EXT2_BLOCK_SIZE(sb) * 8); + memset (bh2->b_data, 0, BLOCK_SIZE); + gdp = (struct ext2_group_desc *) bh2->b_data; + for (i = 0; i < groups_count; i++) { + gdp[i].bg_block_bitmap = old_group_desc[i].bg_block_bitmap; + gdp[i].bg_inode_bitmap = old_group_desc[i].bg_inode_bitmap; + gdp[i].bg_inode_table = old_group_desc[i].bg_inode_table; + gdp[i].bg_free_blocks_count = old_group_desc[i].bg_free_blocks_count; + gdp[i].bg_free_inodes_count = old_group_desc[i].bg_free_inodes_count; + } + mark_buffer_dirty(bh2, 1); + brelse (bh2); + es->s_magic = EXT2_SUPER_MAGIC; + mark_buffer_dirty(bh, 1); + sb->s_magic = EXT2_SUPER_MAGIC; + return 1; +} + +#endif + +/* + * This function has been shamelessly adapted from the msdos fs + */ +static int parse_options (char * options, unsigned long * sb_block, + unsigned short *resuid, unsigned short * resgid, + unsigned long * mount_options) +{ + char * this_char; + char * value; + + if (!options) + return 1; + for (this_char = strtok (options, ","); + this_char != NULL; + this_char = strtok (NULL, ",")) { + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "check")) { + if (!value || !*value) + set_opt (*mount_options, CHECK_NORMAL); + else if (!strcmp (value, "none")) { + clear_opt (*mount_options, CHECK_NORMAL); + clear_opt (*mount_options, CHECK_STRICT); + } + else if (strcmp (value, "normal")) + set_opt (*mount_options, CHECK_NORMAL); + else if (strcmp (value, "strict")) { + set_opt (*mount_options, CHECK_NORMAL); + set_opt (*mount_options, CHECK_STRICT); + } + else { + printk ("EXT2-fs: Invalid check option: %s\n", + value); + return 0; + } + } + else if (!strcmp (this_char, "debug")) + set_opt (*mount_options, DEBUG); + else if (!strcmp (this_char, "errors")) { + if (!value || !*value) { + printk ("EXT2-fs: the errors option requires " + "an argument"); + return 0; + } + if (!strcmp (value, "continue")) { + clear_opt (*mount_options, ERRORS_RO); + clear_opt (*mount_options, ERRORS_PANIC); + set_opt (*mount_options, ERRORS_CONT); + } + else if (!strcmp (value, "remount-ro")) { + clear_opt (*mount_options, ERRORS_CONT); + clear_opt (*mount_options, ERRORS_PANIC); + set_opt (*mount_options, ERRORS_RO); + } + else if (!strcmp (value, "panic")) { + clear_opt (*mount_options, ERRORS_CONT); + clear_opt (*mount_options, ERRORS_RO); + set_opt (*mount_options, ERRORS_PANIC); + } + else { + printk ("EXT2-fs: Invalid errors option: %s\n", + value); + return 0; + } + } + else if (!strcmp (this_char, "grpid") || + !strcmp (this_char, "bsdgroups")) + set_opt (*mount_options, GRPID); + else if (!strcmp (this_char, "minixdf")) + set_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nocheck")) { + clear_opt (*mount_options, CHECK_NORMAL); + clear_opt (*mount_options, CHECK_STRICT); + } + else if (!strcmp (this_char, "nogrpid") || + !strcmp (this_char, "sysvgroups")) + clear_opt (*mount_options, GRPID); + else if (!strcmp (this_char, "resgid")) { + if (!value || !*value) { + printk ("EXT2-fs: the resgid option requires " + "an argument"); + return 0; + } + *resgid = simple_strtoul (value, &value, 0); + if (*value) { + printk ("EXT2-fs: Invalid resgid option: %s\n", + value); + return 0; + } + } + else if (!strcmp (this_char, "resuid")) { + if (!value || !*value) { + printk ("EXT2-fs: the resuid option requires " + "an argument"); + return 0; + } + *resuid = simple_strtoul (value, &value, 0); + if (*value) { + printk ("EXT2-fs: Invalid resuid option: %s\n", + value); + return 0; + } + } + else if (!strcmp (this_char, "sb")) { + if (!value || !*value) { + printk ("EXT2-fs: the sb option requires " + "an argument"); + return 0; + } + *sb_block = simple_strtoul (value, &value, 0); + if (*value) { + printk ("EXT2-fs: Invalid sb option: %s\n", + value); + return 0; + } + } + else { + printk ("EXT2-fs: Unrecognized mount option %s\n", this_char); + return 0; + } + } + return 1; +} + +static void ext2_setup_super (struct super_block * sb, + struct ext2_super_block * es) +{ + if (es->s_rev_level > EXT2_CURRENT_REV) { + printk ("EXT2-fs warning: revision level too high, " + "forcing read/only mode\n"); + sb->s_flags |= MS_RDONLY; + } + if (!(sb->s_flags & MS_RDONLY)) { + if (!(sb->u.ext2_sb.s_mount_state & EXT2_VALID_FS)) + printk ("EXT2-fs warning: mounting unchecked fs, " + "running e2fsck is recommended\n"); + else if ((sb->u.ext2_sb.s_mount_state & EXT2_ERROR_FS)) + printk ("EXT2-fs warning: mounting fs with errors, " + "running e2fsck is recommended\n"); + else if (es->s_max_mnt_count >= 0 && + es->s_mnt_count >= (unsigned short) es->s_max_mnt_count) + printk ("EXT2-fs warning: maximal mount count reached, " + "running e2fsck is recommended\n"); + else if (es->s_checkinterval && + (es->s_lastcheck + es->s_checkinterval <= CURRENT_TIME)) + printk ("EXT2-fs warning: checktime reached, " + "running e2fsck is recommended\n"); + es->s_state &= ~EXT2_VALID_FS; + if (!es->s_max_mnt_count) + es->s_max_mnt_count = EXT2_DFL_MAX_MNT_COUNT; + es->s_mnt_count++; + es->s_mtime = CURRENT_TIME; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + if (test_opt (sb, DEBUG)) + printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " + "bpg=%lu, ipg=%lu, mo=%04lx]\n", + EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, + sb->u.ext2_sb.s_frag_size, + sb->u.ext2_sb.s_groups_count, + EXT2_BLOCKS_PER_GROUP(sb), + EXT2_INODES_PER_GROUP(sb), + sb->u.ext2_sb.s_mount_opt); + if (test_opt (sb, CHECK)) { + ext2_check_blocks_bitmap (sb); + ext2_check_inodes_bitmap (sb); + } + } +} + +static int ext2_check_descriptors (struct super_block * sb) +{ + int i; + int desc_block = 0; + unsigned long block = sb->u.ext2_sb.s_es->s_first_data_block; + struct ext2_group_desc * gdp = NULL; + + ext2_debug ("Checking group descriptors"); + + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) + { + if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext2_group_desc *) sb->u.ext2_sb.s_group_desc[desc_block++]->b_data; + if (gdp->bg_block_bitmap < block || + gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Block bitmap for group %d" + " not in group (block %lu)!", + i, gdp->bg_block_bitmap); + return 0; + } + if (gdp->bg_inode_bitmap < block || + gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Inode bitmap for group %d" + " not in group (block %lu)!", + i, gdp->bg_inode_bitmap); + return 0; + } + if (gdp->bg_inode_table < block || + gdp->bg_inode_table + sb->u.ext2_sb.s_itb_per_group >= + block + EXT2_BLOCKS_PER_GROUP(sb)) + { + ext2_error (sb, "ext2_check_descriptors", + "Inode table for group %d" + " not in group (block %lu)!", + i, gdp->bg_inode_table); + return 0; + } + block += EXT2_BLOCKS_PER_GROUP(sb); + gdp++; + } + return 1; +} + +struct super_block * ext2_read_super (struct super_block * sb, void * data, + int silent) +{ + struct buffer_head * bh; + struct ext2_super_block * es; + unsigned long sb_block = 1; + unsigned short resuid = EXT2_DEF_RESUID; + unsigned short resgid = EXT2_DEF_RESGID; + unsigned long logic_sb_block = 1; + int dev = sb->s_dev; + int db_count; + int i, j; +#ifdef EXT2FS_PRE_02B_COMPAT + int fs_converted = 0; +#endif + + set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL); + if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, + &sb->u.ext2_sb.s_mount_opt)) { + sb->s_dev = 0; + return NULL; + } + + lock_super (sb); + set_blocksize (dev, BLOCK_SIZE); + if (!(bh = bread (dev, sb_block, BLOCK_SIZE))) { + sb->s_dev = 0; + unlock_super (sb); + printk ("EXT2-fs: unable to read superblock\n"); + return NULL; + } + /* + * Note: s_es must be initialized s_es as soon as possible because + * some ext2 macro-instructions depend on its value + */ + es = (struct ext2_super_block *) bh->b_data; + sb->u.ext2_sb.s_es = es; + sb->s_magic = es->s_magic; + if (sb->s_magic != EXT2_SUPER_MAGIC +#ifdef EXT2FS_PRE_02B_COMPAT + && sb->s_magic != EXT2_PRE_02B_MAGIC +#endif + ) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + if (!silent) + printk ("VFS: Can't find an ext2 filesystem on dev %d/%d.\n", + MAJOR(dev), MINOR(dev)); + return NULL; + } + sb->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; + sb->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(sb); + if (sb->s_blocksize != BLOCK_SIZE && + (sb->s_blocksize == 1024 || sb->s_blocksize == 2048 || + sb->s_blocksize == 4096)) { + unsigned long offset; + + brelse (bh); + set_blocksize (dev, sb->s_blocksize); + logic_sb_block = (sb_block*BLOCK_SIZE) / sb->s_blocksize; + offset = (sb_block*BLOCK_SIZE) % sb->s_blocksize; + bh = bread (dev, logic_sb_block, sb->s_blocksize); + if(!bh) + return NULL; + es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); + sb->u.ext2_sb.s_es = es; + if (es->s_magic != EXT2_SUPER_MAGIC) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: Magic mismatch, very weird !\n"); + return NULL; + } + } + sb->u.ext2_sb.s_frag_size = EXT2_MIN_FRAG_SIZE << + es->s_log_frag_size; + if (sb->u.ext2_sb.s_frag_size) + sb->u.ext2_sb.s_frags_per_block = sb->s_blocksize / + sb->u.ext2_sb.s_frag_size; + else + sb->s_magic = 0; + sb->u.ext2_sb.s_blocks_per_group = es->s_blocks_per_group; + sb->u.ext2_sb.s_frags_per_group = es->s_frags_per_group; + sb->u.ext2_sb.s_inodes_per_group = es->s_inodes_per_group; + sb->u.ext2_sb.s_inodes_per_block = sb->s_blocksize / + sizeof (struct ext2_inode); + sb->u.ext2_sb.s_itb_per_group = sb->u.ext2_sb.s_inodes_per_group / + sb->u.ext2_sb.s_inodes_per_block; + sb->u.ext2_sb.s_desc_per_block = sb->s_blocksize / + sizeof (struct ext2_group_desc); + sb->u.ext2_sb.s_sbh = bh; + sb->u.ext2_sb.s_es = es; + if (resuid != EXT2_DEF_RESUID) + sb->u.ext2_sb.s_resuid = resuid; + else + sb->u.ext2_sb.s_resuid = es->s_def_resuid; + if (resgid != EXT2_DEF_RESGID) + sb->u.ext2_sb.s_resgid = resgid; + else + sb->u.ext2_sb.s_resgid = es->s_def_resgid; + sb->u.ext2_sb.s_mount_state = es->s_state; + sb->u.ext2_sb.s_rename_lock = 0; + sb->u.ext2_sb.s_rename_wait = NULL; +#ifdef EXT2FS_PRE_02B_COMPAT + if (sb->s_magic == EXT2_PRE_02B_MAGIC) { + if (es->s_blocks_count > 262144) { + /* + * fs > 256 MB can't be converted + */ + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: trying to mount a pre-0.2b file" + "system which cannot be converted\n"); + return NULL; + } + printk ("EXT2-fs: mounting a pre 0.2b file system, " + "will try to convert the structure\n"); + if (!(sb->s_flags & MS_RDONLY)) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: cannot convert a read-only fs\n"); + return NULL; + } + if (!convert_pre_02b_fs (sb, bh)) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: conversion failed !!!\n"); + return NULL; + } + printk ("EXT2-fs: conversion succeeded !!!\n"); + fs_converted = 1; + } +#endif + if (sb->s_magic != EXT2_SUPER_MAGIC) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + if (!silent) + printk ("VFS: Can't find an ext2 filesystem on dev %d/%d.\n", + MAJOR(dev), MINOR(dev)); + return NULL; + } + if (sb->s_blocksize != bh->b_size) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + if (!silent) + printk ("VFS: Unsupported blocksize on dev 0x%04x.\n", + dev); + return NULL; + } + + if (sb->s_blocksize != sb->u.ext2_sb.s_frag_size) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", + sb->u.ext2_sb.s_frag_size, sb->s_blocksize); + return NULL; + } + + sb->u.ext2_sb.s_groups_count = (es->s_blocks_count - + es->s_first_data_block + + EXT2_BLOCKS_PER_GROUP(sb) - 1) / + EXT2_BLOCKS_PER_GROUP(sb); + db_count = (sb->u.ext2_sb.s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + sb->u.ext2_sb.s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); + if (sb->u.ext2_sb.s_group_desc == NULL) { + sb->s_dev = 0; + unlock_super (sb); + brelse (bh); + printk ("EXT2-fs: not enough memory\n"); + return NULL; + } + for (i = 0; i < db_count; i++) { + sb->u.ext2_sb.s_group_desc[i] = bread (dev, logic_sb_block + i + 1, + sb->s_blocksize); + if (!sb->u.ext2_sb.s_group_desc[i]) { + sb->s_dev = 0; + unlock_super (sb); + for (j = 0; j < i; j++) + brelse (sb->u.ext2_sb.s_group_desc[j]); + kfree_s (sb->u.ext2_sb.s_group_desc, + db_count * sizeof (struct buffer_head *)); + brelse (bh); + printk ("EXT2-fs: unable to read group descriptors\n"); + return NULL; + } + } + if (!ext2_check_descriptors (sb)) { + sb->s_dev = 0; + unlock_super (sb); + for (j = 0; j < db_count; j++) + brelse (sb->u.ext2_sb.s_group_desc[j]); + kfree_s (sb->u.ext2_sb.s_group_desc, + db_count * sizeof (struct buffer_head *)); + brelse (bh); + printk ("EXT2-fs: group descriptors corrupted !\n"); + return NULL; + } + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { + sb->u.ext2_sb.s_inode_bitmap_number[i] = 0; + sb->u.ext2_sb.s_inode_bitmap[i] = NULL; + sb->u.ext2_sb.s_block_bitmap_number[i] = 0; + sb->u.ext2_sb.s_block_bitmap[i] = NULL; + } + sb->u.ext2_sb.s_loaded_inode_bitmaps = 0; + sb->u.ext2_sb.s_loaded_block_bitmaps = 0; + sb->u.ext2_sb.s_db_per_group = db_count; + unlock_super (sb); + /* + * set up enough so that it can read an inode + */ + sb->s_dev = dev; + sb->s_op = &ext2_sops; + if (!(sb->s_mounted = iget (sb, EXT2_ROOT_INO))) { + sb->s_dev = 0; + for (i = 0; i < db_count; i++) + if (sb->u.ext2_sb.s_group_desc[i]) + brelse (sb->u.ext2_sb.s_group_desc[i]); + kfree_s (sb->u.ext2_sb.s_group_desc, + db_count * sizeof (struct buffer_head *)); + brelse (bh); + printk ("EXT2-fs: get root inode failed\n"); + return NULL; + } +#ifdef EXT2FS_PRE_02B_COMPAT + if (fs_converted) { + for (i = 0; i < db_count; i++) + mark_buffer_dirty(sb->u.ext2_sb.s_group_desc[i], 1); + sb->s_dirt = 1; + } +#endif + ext2_setup_super (sb, es); + return sb; +} + +static void ext2_commit_super (struct super_block * sb, + struct ext2_super_block * es) +{ + es->s_wtime = CURRENT_TIME; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 0; +} + +/* + * In the second extended file system, it is not necessary to + * write the super block since we use a mapping of the + * disk super block in a buffer. + * + * However, this function is still used to set the fs valid + * flags to 0. We need to set this flag to 0 since the fs + * may have been checked while mounted and e2fsck may have + * set s_state to EXT2_VALID_FS after some corrections. + */ + +void ext2_write_super (struct super_block * sb) +{ + struct ext2_super_block * es; + + if (!(sb->s_flags & MS_RDONLY)) { + es = sb->u.ext2_sb.s_es; + + ext2_debug ("setting valid to 0\n"); + + if (es->s_state & EXT2_VALID_FS) { + es->s_state &= ~EXT2_VALID_FS; + es->s_mtime = CURRENT_TIME; + } + ext2_commit_super (sb, es); + } + sb->s_dirt = 0; +} + +int ext2_remount (struct super_block * sb, int * flags, char * data) +{ + struct ext2_super_block * es; + unsigned short resuid = sb->u.ext2_sb.s_resuid; + unsigned short resgid = sb->u.ext2_sb.s_resgid; + unsigned long new_mount_opt; + unsigned long tmp; + + /* + * Allow the "check" option to be passed as a remount option. + */ + set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL); + if (!parse_options (data, &tmp, &resuid, &resgid, + &new_mount_opt)) + return -EINVAL; + + sb->u.ext2_sb.s_mount_opt = new_mount_opt; + sb->u.ext2_sb.s_resuid = resuid; + sb->u.ext2_sb.s_resgid = resgid; + es = sb->u.ext2_sb.s_es; + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + return 0; + if (*flags & MS_RDONLY) { + if (es->s_state & EXT2_VALID_FS || + !(sb->u.ext2_sb.s_mount_state & EXT2_VALID_FS)) + return 0; + /* + * OK, we are remounting a valid rw partition rdonly, so set + * the rdonly flag and then mark the partition as valid again. + */ + es->s_state = sb->u.ext2_sb.s_mount_state; + es->s_mtime = CURRENT_TIME; + mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); + sb->s_dirt = 1; + ext2_commit_super (sb, es); + } + else { + /* + * Mounting a RDONLY partition read-write, so reread and + * store the current valid flag. (It may have been changed + * by e2fsck since we originally mounted the partition.) + */ + sb->u.ext2_sb.s_mount_state = es->s_state; + sb->s_flags &= ~MS_RDONLY; + ext2_setup_super (sb, es); + } + return 0; +} + +void ext2_statfs (struct super_block * sb, struct statfs * buf) +{ + long tmp; + unsigned long overhead; + unsigned long overhead_per_group; + + if (test_opt (sb, MINIX_DF)) + overhead = 0; + else { + /* + * Compute the overhead (FS structures) + */ + overhead_per_group = 1 /* super block */ + + sb->u.ext2_sb.s_db_per_group /* descriptors */ + + 1 /* block bitmap */ + + 1 /* inode bitmap */ + + sb->u.ext2_sb.s_itb_per_group /* inode table */; + overhead = sb->u.ext2_sb.s_es->s_first_data_block + + sb->u.ext2_sb.s_groups_count * overhead_per_group; + } + + put_fs_long (EXT2_SUPER_MAGIC, &buf->f_type); + put_fs_long (sb->s_blocksize, &buf->f_bsize); + put_fs_long (sb->u.ext2_sb.s_es->s_blocks_count - overhead, + &buf->f_blocks); + tmp = ext2_count_free_blocks (sb); + put_fs_long (tmp, &buf->f_bfree); + if (tmp >= sb->u.ext2_sb.s_es->s_r_blocks_count) + put_fs_long (tmp - sb->u.ext2_sb.s_es->s_r_blocks_count, + &buf->f_bavail); + else + put_fs_long (0, &buf->f_bavail); + put_fs_long (sb->u.ext2_sb.s_es->s_inodes_count, &buf->f_files); + put_fs_long (ext2_count_free_inodes (sb), &buf->f_ffree); + put_fs_long (EXT2_NAME_LEN, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ +} diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c new file mode 100644 index 000000000..7d85ed74c --- /dev/null +++ b/fs/ext2/symlink.c @@ -0,0 +1,127 @@ +/* + * linux/fs/ext2/symlink.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 symlink handling code + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/sched.h> +#include <linux/stat.h> + +static int ext2_readlink (struct inode *, char *, int); +static int ext2_follow_link (struct inode *, struct inode *, int, int, + struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations ext2_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + ext2_readlink, /* readlink */ + ext2_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL /* smap */ +}; + +static int ext2_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + struct buffer_head * bh = NULL; + char * link; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput (dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput (dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput (dir); + iput (inode); + return -ELOOP; + } + if (inode->i_blocks) { + if (!(bh = ext2_bread (inode, 0, 0, &error))) { + iput (dir); + iput (inode); + return -EIO; + } + link = bh->b_data; + } else + link = (char *) inode->u.ext2_i.i_data; + current->link_count++; + error = open_namei (link, flag, mode, res_inode, dir); + current->link_count--; + iput (inode); + if (bh) + brelse (bh); + return error; +} + +static int ext2_readlink (struct inode * inode, char * buffer, int buflen) +{ + struct buffer_head * bh = NULL; + char * link; + int i, err; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput (inode); + return -EINVAL; + } + if (buflen > inode->i_sb->s_blocksize - 1) + buflen = inode->i_sb->s_blocksize - 1; + if (inode->i_blocks) { + bh = ext2_bread (inode, 0, 0, &err); + if (!bh) { + iput (inode); + return 0; + } + link = bh->b_data; + } + else + link = (char *) inode->u.ext2_i.i_data; + i = 0; + while (i < buflen && (c = link[i])) { + i++; + put_fs_byte (c, buffer++); + } + iput (inode); + if (bh) + brelse (bh); + return i; +} diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c new file mode 100644 index 000000000..10a1fd236 --- /dev/null +++ b/fs/ext2/truncate.c @@ -0,0 +1,349 @@ +/* + * linux/fs/ext2/truncate.c + * + * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/truncate.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * Real random numbers for secure rm added 94/02/18 + * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr> + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/fcntl.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/locks.h> +#include <linux/string.h> + +static int ext2_secrm_seed = 152; /* Random generator base */ + +#define RANDOM_INT (ext2_secrm_seed = ext2_secrm_seed * 69069l +1) + +/* + * Truncate has the most races in the whole filesystem: coding it is + * a pain in the a**. Especially as I don't do any locking... + * + * The code may look a bit weird, but that's just because I've tried to + * handle things like file-size changes in a somewhat graceful manner. + * Anyway, truncating a file at the same time somebody else writes to it + * is likely to result in pretty weird behaviour... + * + * The new code handles normal truncates (size = 0) as well as the more + * general case (size = XXX). I hope. + */ + +static int trunc_direct (struct inode * inode) +{ + int i, tmp; + unsigned long * p; + struct buffer_head * bh; + unsigned long block_to_free = 0; + unsigned long free_count = 0; + int retry = 0; + int blocks = inode->i_sb->s_blocksize / 512; +#define DIRECT_BLOCK ((inode->i_size + inode->i_sb->s_blocksize - 1) / \ + inode->i_sb->s_blocksize) + int direct_block = DIRECT_BLOCK; + +repeat: + for (i = direct_block ; i < EXT2_NDIR_BLOCKS ; i++) { + p = inode->u.ext2_i.i_data + i; + tmp = *p; + if (!tmp) + continue; + if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) + bh = getblk (inode->i_dev, tmp, + inode->i_sb->s_blocksize); + else + bh = get_hash_table (inode->i_dev, tmp, + inode->i_sb->s_blocksize); + if (i < direct_block) { + brelse (bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *p) { + retry = 1; + brelse (bh); + continue; + } + *p = 0; + inode->i_blocks -= blocks; + inode->i_dirt = 1; + if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) { + memset(bh->b_data, RANDOM_INT, inode->i_sb->s_blocksize); + mark_buffer_dirty(bh, 1); + } + brelse (bh); + if (free_count == 0) { + block_to_free = tmp; + free_count++; + } else if (free_count > 0 && block_to_free == tmp - free_count) + free_count++; + else { + ext2_free_blocks (inode->i_sb, block_to_free, free_count); + block_to_free = tmp; + free_count = 1; + } +/* ext2_free_blocks (inode->i_sb, tmp, 1); */ + } + if (free_count > 0) + ext2_free_blocks (inode->i_sb, block_to_free, free_count); + return retry; +} + +static int trunc_indirect (struct inode * inode, int offset, unsigned long * p) +{ + int i, tmp; + struct buffer_head * bh; + struct buffer_head * ind_bh; + unsigned long * ind; + unsigned long block_to_free = 0; + unsigned long free_count = 0; + int retry = 0; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + int blocks = inode->i_sb->s_blocksize / 512; +#define INDIRECT_BLOCK ((int)DIRECT_BLOCK - offset) + int indirect_block = INDIRECT_BLOCK; + + tmp = *p; + if (!tmp) + return 0; + ind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize); + if (tmp != *p) { + brelse (ind_bh); + return 1; + } + if (!ind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = indirect_block ; i < addr_per_block ; i++) { + if (i < 0) + i = 0; + if (i < indirect_block) + goto repeat; + ind = i + (unsigned long *) ind_bh->b_data; + tmp = *ind; + if (!tmp) + continue; + if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) + bh = getblk (inode->i_dev, tmp, + inode->i_sb->s_blocksize); + else + bh = get_hash_table (inode->i_dev, tmp, + inode->i_sb->s_blocksize); + if (i < indirect_block) { + brelse (bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *ind) { + retry = 1; + brelse (bh); + continue; + } + *ind = 0; + mark_buffer_dirty(ind_bh, 1); + if (inode->u.ext2_i.i_flags & EXT2_SECRM_FL) { + memset(bh->b_data, RANDOM_INT, inode->i_sb->s_blocksize); + mark_buffer_dirty(bh, 1); + } + brelse (bh); + if (free_count == 0) { + block_to_free = tmp; + free_count++; + } else if (free_count > 0 && block_to_free == tmp - free_count) + free_count++; + else { + ext2_free_blocks (inode->i_sb, block_to_free, free_count); + block_to_free = tmp; + free_count = 1; + } +/* ext2_free_blocks (inode->i_sb, tmp, 1); */ + inode->i_blocks -= blocks; + inode->i_dirt = 1; + } + if (free_count > 0) + ext2_free_blocks (inode->i_sb, block_to_free, free_count); + ind = (unsigned long *) ind_bh->b_data; + for (i = 0; i < addr_per_block; i++) + if (*(ind++)) + break; + if (i >= addr_per_block) + if (ind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_blocks -= blocks; + inode->i_dirt = 1; + ext2_free_blocks (inode->i_sb, tmp, 1); + } + if (IS_SYNC(inode) && ind_bh->b_dirt) { + ll_rw_block (WRITE, 1, &ind_bh); + wait_on_buffer (ind_bh); + } + brelse (ind_bh); + return retry; +} + +static int trunc_dindirect (struct inode * inode, int offset, + unsigned long * p) +{ + int i, tmp; + struct buffer_head * dind_bh; + unsigned long * dind; + int retry = 0; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + int blocks = inode->i_sb->s_blocksize / 512; +#define DINDIRECT_BLOCK (((int)DIRECT_BLOCK - offset) / addr_per_block) + int dindirect_block = DINDIRECT_BLOCK; + + tmp = *p; + if (!tmp) + return 0; + dind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize); + if (tmp != *p) { + brelse (dind_bh); + return 1; + } + if (!dind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = dindirect_block ; i < addr_per_block ; i++) { + if (i < 0) + i = 0; + if (i < dindirect_block) + goto repeat; + dind = i + (unsigned long *) dind_bh->b_data; + tmp = *dind; + if (!tmp) + continue; + retry |= trunc_indirect (inode, offset + (i * addr_per_block), + dind); + mark_buffer_dirty(dind_bh, 1); + } + dind = (unsigned long *) dind_bh->b_data; + for (i = 0; i < addr_per_block; i++) + if (*(dind++)) + break; + if (i >= addr_per_block) + if (dind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_blocks -= blocks; + inode->i_dirt = 1; + ext2_free_blocks (inode->i_sb, tmp, 1); + } + if (IS_SYNC(inode) && dind_bh->b_dirt) { + ll_rw_block (WRITE, 1, &dind_bh); + wait_on_buffer (dind_bh); + } + brelse (dind_bh); + return retry; +} + +static int trunc_tindirect (struct inode * inode) +{ + int i, tmp; + struct buffer_head * tind_bh; + unsigned long * tind, * p; + int retry = 0; + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + int blocks = inode->i_sb->s_blocksize / 512; +#define TINDIRECT_BLOCK (((int)DIRECT_BLOCK - (addr_per_block * addr_per_block + \ + addr_per_block + EXT2_NDIR_BLOCKS)) / \ + (addr_per_block * addr_per_block)) + int tindirect_block = TINDIRECT_BLOCK; + + p = inode->u.ext2_i.i_data + EXT2_TIND_BLOCK; + if (!(tmp = *p)) + return 0; + tind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize); + if (tmp != *p) { + brelse (tind_bh); + return 1; + } + if (!tind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = tindirect_block ; i < addr_per_block ; i++) { + if (i < 0) + i = 0; + if (i < tindirect_block) + goto repeat; + tind = i + (unsigned long *) tind_bh->b_data; + retry |= trunc_dindirect(inode, EXT2_NDIR_BLOCKS + + addr_per_block + (i + 1) * addr_per_block * addr_per_block, + tind); + mark_buffer_dirty(tind_bh, 1); + } + tind = (unsigned long *) tind_bh->b_data; + for (i = 0; i < addr_per_block; i++) + if (*(tind++)) + break; + if (i >= addr_per_block) + if (tind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_blocks -= blocks; + inode->i_dirt = 1; + ext2_free_blocks (inode->i_sb, tmp, 1); + } + if (IS_SYNC(inode) && tind_bh->b_dirt) { + ll_rw_block (WRITE, 1, &tind_bh); + wait_on_buffer (tind_bh); + } + brelse (tind_bh); + return retry; +} + +void ext2_truncate (struct inode * inode) +{ + int retry; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext2_discard_prealloc(inode); + while (1) { + down(&inode->i_sem); + retry = trunc_direct(inode); + retry |= trunc_indirect (inode, EXT2_IND_BLOCK, + (unsigned long *) &inode->u.ext2_i.i_data[EXT2_IND_BLOCK]); + retry |= trunc_dindirect (inode, EXT2_IND_BLOCK + + EXT2_ADDR_PER_BLOCK(inode->i_sb), + (unsigned long *) &inode->u.ext2_i.i_data[EXT2_DIND_BLOCK]); + retry |= trunc_tindirect (inode); + up(&inode->i_sem); + if (!retry) + break; + if (IS_SYNC(inode) && inode->i_dirt) + ext2_sync_inode (inode); + current->counter = 0; + schedule (); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; +} diff --git a/fs/fcntl.c b/fs/fcntl.c new file mode 100644 index 000000000..d3226eb01 --- /dev/null +++ b/fs/fcntl.c @@ -0,0 +1,188 @@ +/* + * linux/fs/fcntl.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/string.h> + +extern int fcntl_getlk(unsigned int, struct flock *); +extern int fcntl_setlk(unsigned int, unsigned int, struct flock *); +extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg); + +static int dupfd(unsigned int fd, unsigned int arg) +{ + if (fd >= NR_OPEN || !current->files->fd[fd]) + return -EBADF; + if (arg >= NR_OPEN) + return -EINVAL; + while (arg < NR_OPEN) + if (current->files->fd[arg]) + arg++; + else + break; + if (arg >= NR_OPEN) + return -EMFILE; + FD_CLR(arg, ¤t->files->close_on_exec); + (current->files->fd[arg] = current->files->fd[fd])->f_count++; + return arg; +} + +asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd) +{ + if (oldfd >= NR_OPEN || !current->files->fd[oldfd]) + return -EBADF; + if (newfd == oldfd) + return newfd; + /* + * errno's for dup2() are slightly different than for fcntl(F_DUPFD) + * for historical reasons. + */ + if (newfd > NR_OPEN) /* historical botch - should have been >= */ + return -EBADF; /* dupfd() would return -EINVAL */ +#if 1 + if (newfd == NR_OPEN) + return -EBADF; /* dupfd() does return -EINVAL and that may + * even be the standard! But that is too + * weird for now. + */ +#endif + sys_close(newfd); + return dupfd(oldfd,newfd); +} + +asmlinkage int sys_dup(unsigned int fildes) +{ + return dupfd(fildes,0); +} + +asmlinkage int sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct file * filp; + struct task_struct *p; + int task_found = 0; + + if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) + return -EBADF; + switch (cmd) { + case F_DUPFD: + return dupfd(fd,arg); + case F_GETFD: + return FD_ISSET(fd, ¤t->files->close_on_exec); + case F_SETFD: + if (arg&1) + FD_SET(fd, ¤t->files->close_on_exec); + else + FD_CLR(fd, ¤t->files->close_on_exec); + return 0; + case F_GETFL: + return filp->f_flags; + case F_SETFL: + /* + * In the case of an append-only file, O_APPEND + * cannot be cleared + */ + if (IS_APPEND(filp->f_inode) && !(arg & O_APPEND)) + return -EPERM; + if ((arg & FASYNC) && !(filp->f_flags & FASYNC) && + filp->f_op->fasync) + filp->f_op->fasync(filp->f_inode, filp, 1); + if (!(arg & FASYNC) && (filp->f_flags & FASYNC) && + filp->f_op->fasync) + filp->f_op->fasync(filp->f_inode, filp, 0); + filp->f_flags &= ~(O_APPEND | O_NONBLOCK | FASYNC); + filp->f_flags |= arg & (O_APPEND | O_NONBLOCK | + FASYNC); + return 0; + case F_GETLK: + return fcntl_getlk(fd, (struct flock *) arg); + case F_SETLK: + return fcntl_setlk(fd, cmd, (struct flock *) arg); + case F_SETLKW: + return fcntl_setlk(fd, cmd, (struct flock *) arg); + case F_GETOWN: + /* + * XXX If f_owner is a process group, the + * negative return value will get converted + * into an error. Oops. If we keep the the + * current syscall conventions, the only way + * to fix this will be in libc. + */ + return filp->f_owner; + case F_SETOWN: + /* + * Add the security checks - AC. Without + * this there is a massive Linux security + * hole here - consider what happens if + * you do something like + * + * fcntl(0,F_SETOWN,some_root_process); + * getchar(); + * + * and input a line! + * + * BTW: Don't try this for fun. Several Unix + * systems I tried this on fall for the + * trick! + * + * I had to fix this botch job as Linux + * kill_fasync asserts priv making it a + * free all user process killer! + * + * Changed to make the security checks more + * liberal. -- TYT + */ + if (current->pgrp == -arg || current->pid == arg) + goto fasync_ok; + + for_each_task(p) { + if ((p->pid == arg) || (p->pid == -arg) || + (p->pgrp == -arg)) { + task_found++; + if ((p->session != current->session) && + (p->uid != current->uid) && + (p->euid != current->euid) && + !suser()) + return -EPERM; + break; + } + } + if ((task_found == 0) && !suser()) + return -EINVAL; + fasync_ok: + filp->f_owner = arg; + if (S_ISSOCK (filp->f_inode->i_mode)) + sock_fcntl (filp, F_SETOWN, arg); + return 0; + default: + /* sockets need a few special fcntls. */ + if (S_ISSOCK (filp->f_inode->i_mode)) + { + return (sock_fcntl (filp, cmd, arg)); + } + return -EINVAL; + } +} + +void kill_fasync(struct fasync_struct *fa, int sig) +{ + while (fa) { + if (fa->magic != FASYNC_MAGIC) { + printk("kill_fasync: bad magic number in " + "fasync_struct!\n"); + return; + } + if (fa->fa_file->f_owner > 0) + kill_proc(fa->fa_file->f_owner, sig, 1); + else + kill_pg(-fa->fa_file->f_owner, sig, 1); + fa = fa->fa_next; + } +} diff --git a/fs/fifo.c b/fs/fifo.c new file mode 100644 index 000000000..ecd9bc232 --- /dev/null +++ b/fs/fifo.c @@ -0,0 +1,161 @@ +/* + * linux/fs/fifo.c + * + * written by Paul H. Hargrove + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> + +static int fifo_open(struct inode * inode,struct file * filp) +{ + int retval = 0; + unsigned long page; + + switch( filp->f_mode ) { + + case 1: + /* + * O_RDONLY + * POSIX.1 says that O_NONBLOCK means return with the FIFO + * opened, even when there is no process writing the FIFO. + */ + filp->f_op = &connecting_fifo_fops; + if (!PIPE_READERS(*inode)++) + wake_up_interruptible(&PIPE_WAIT(*inode)); + if (!(filp->f_flags & O_NONBLOCK) && !PIPE_WRITERS(*inode)) { + PIPE_RD_OPENERS(*inode)++; + while (!PIPE_WRITERS(*inode)) { + if (current->signal & ~current->blocked) { + retval = -ERESTARTSYS; + break; + } + interruptible_sleep_on(&PIPE_WAIT(*inode)); + } + if (!--PIPE_RD_OPENERS(*inode)) + wake_up_interruptible(&PIPE_WAIT(*inode)); + } + while (PIPE_WR_OPENERS(*inode)) + interruptible_sleep_on(&PIPE_WAIT(*inode)); + if (PIPE_WRITERS(*inode)) + filp->f_op = &read_fifo_fops; + if (retval && !--PIPE_READERS(*inode)) + wake_up_interruptible(&PIPE_WAIT(*inode)); + break; + + case 2: + /* + * O_WRONLY + * POSIX.1 says that O_NONBLOCK means return -1 with + * errno=ENXIO when there is no process reading the FIFO. + */ + if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) { + retval = -ENXIO; + break; + } + filp->f_op = &write_fifo_fops; + if (!PIPE_WRITERS(*inode)++) + wake_up_interruptible(&PIPE_WAIT(*inode)); + if (!PIPE_READERS(*inode)) { + PIPE_WR_OPENERS(*inode)++; + while (!PIPE_READERS(*inode)) { + if (current->signal & ~current->blocked) { + retval = -ERESTARTSYS; + break; + } + interruptible_sleep_on(&PIPE_WAIT(*inode)); + } + if (!--PIPE_WR_OPENERS(*inode)) + wake_up_interruptible(&PIPE_WAIT(*inode)); + } + while (PIPE_RD_OPENERS(*inode)) + interruptible_sleep_on(&PIPE_WAIT(*inode)); + if (retval && !--PIPE_WRITERS(*inode)) + wake_up_interruptible(&PIPE_WAIT(*inode)); + break; + + case 3: + /* + * O_RDWR + * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. + * This implementation will NEVER block on a O_RDWR open, since + * the process can at least talk to itself. + */ + filp->f_op = &rdwr_fifo_fops; + if (!PIPE_READERS(*inode)++) + wake_up_interruptible(&PIPE_WAIT(*inode)); + while (PIPE_WR_OPENERS(*inode)) + interruptible_sleep_on(&PIPE_WAIT(*inode)); + if (!PIPE_WRITERS(*inode)++) + wake_up_interruptible(&PIPE_WAIT(*inode)); + while (PIPE_RD_OPENERS(*inode)) + interruptible_sleep_on(&PIPE_WAIT(*inode)); + break; + + default: + retval = -EINVAL; + } + if (retval || PIPE_BASE(*inode)) + return retval; + page = __get_free_page(GFP_KERNEL); + if (PIPE_BASE(*inode)) { + free_page(page); + return 0; + } + if (!page) + return -ENOMEM; + PIPE_LOCK(*inode) = 0; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_BASE(*inode) = (char *) page; + return 0; +} + +/* + * Dummy default file-operations: the only thing this does + * is contain the open that then fills in the correct operations + * depending on the access mode of the file... + */ +static struct file_operations def_fifo_fops = { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + fifo_open, /* will set read or write pipe_fops */ + NULL, + NULL +}; + +static struct inode_operations fifo_inode_operations = { + &def_fifo_fops, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +void init_fifo(struct inode * inode) +{ + inode->i_op = &fifo_inode_operations; + inode->i_pipe = 1; + PIPE_LOCK(*inode) = 0; + PIPE_BASE(*inode) = NULL; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; + PIPE_WAIT(*inode) = NULL; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; +} diff --git a/fs/file_table.c b/fs/file_table.c new file mode 100644 index 000000000..6438162a0 --- /dev/null +++ b/fs/file_table.c @@ -0,0 +1,90 @@ +/* + * linux/fs/file_table.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/mm.h> + +struct file * first_file; +int nr_files = 0; + +static void insert_file_free(struct file *file) +{ + file->f_next = first_file; + file->f_prev = first_file->f_prev; + file->f_next->f_prev = file; + file->f_prev->f_next = file; + first_file = file; +} + +static void remove_file_free(struct file *file) +{ + if (first_file == file) + first_file = first_file->f_next; + if (file->f_next) + file->f_next->f_prev = file->f_prev; + if (file->f_prev) + file->f_prev->f_next = file->f_next; + file->f_next = file->f_prev = NULL; +} + +static void put_last_free(struct file *file) +{ + remove_file_free(file); + file->f_prev = first_file->f_prev; + file->f_prev->f_next = file; + file->f_next = first_file; + file->f_next->f_prev = file; +} + +void grow_files(void) +{ + struct file * file; + int i; + + file = (struct file *) get_free_page(GFP_KERNEL); + + if (!file) + return; + + nr_files+=i= PAGE_SIZE/sizeof(struct file); + + if (!first_file) + file->f_next = file->f_prev = first_file = file++, i--; + + for (; i ; i--) + insert_file_free(file++); +} + +unsigned long file_table_init(unsigned long start, unsigned long end) +{ + first_file = NULL; + return start; +} + +struct file * get_empty_filp(void) +{ + int i; + struct file * f; + + if (!first_file) + grow_files(); +repeat: + for (f = first_file, i=0; i < nr_files; i++, f = f->f_next) + if (!f->f_count) { + remove_file_free(f); + memset(f,0,sizeof(*f)); + put_last_free(f); + f->f_count = 1; + f->f_version = ++event; + return f; + } + if (nr_files < NR_FILE) { + grow_files(); + goto repeat; + } + return NULL; +} diff --git a/fs/filesystems.c b/fs/filesystems.c new file mode 100644 index 000000000..7bcc695c5 --- /dev/null +++ b/fs/filesystems.c @@ -0,0 +1,99 @@ +/* + * linux/fs/filesystems.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * table of configured filesystems + */ + +#include <linux/config.h> +#include <linux/fs.h> + +#include <linux/minix_fs.h> +#include <linux/ext_fs.h> +#include <linux/ext2_fs.h> +#include <linux/xia_fs.h> +#include <linux/msdos_fs.h> +#include <linux/umsdos_fs.h> +#include <linux/proc_fs.h> +#include <linux/nfs_fs.h> +#include <linux/iso_fs.h> +#include <linux/sysv_fs.h> +#include <linux/hpfs_fs.h> + +extern void device_setup(void); + +/* This may be used only once, enforced by 'static int callable' */ +asmlinkage int sys_setup(void) +{ + static int callable = 1; + + if (!callable) + return -1; + callable = 0; + + device_setup(); + +#ifdef CONFIG_MINIX_FS + register_filesystem(&(struct file_system_type) + {minix_read_super, "minix", 1, NULL}); +#endif + +#ifdef CONFIG_EXT_FS + register_filesystem(&(struct file_system_type) + {ext_read_super, "ext", 1, NULL}); +#endif + +#ifdef CONFIG_EXT2_FS + register_filesystem(&(struct file_system_type) + {ext2_read_super, "ext2", 1, NULL}); +#endif + +#ifdef CONFIG_XIA_FS + register_filesystem(&(struct file_system_type) + {xiafs_read_super, "xiafs", 1, NULL}); +#endif +#ifdef CONFIG_UMSDOS_FS + register_filesystem(&(struct file_system_type) + {UMSDOS_read_super, "umsdos", 1, NULL}); +#endif + +#ifdef CONFIG_MSDOS_FS + register_filesystem(&(struct file_system_type) + {msdos_read_super, "msdos", 1, NULL}); +#endif + +#ifdef CONFIG_PROC_FS + register_filesystem(&(struct file_system_type) + {proc_read_super, "proc", 0, NULL}); +#endif + +#ifdef CONFIG_NFS_FS + register_filesystem(&(struct file_system_type) + {nfs_read_super, "nfs", 0, NULL}); +#endif + +#ifdef CONFIG_ISO9660_FS + register_filesystem(&(struct file_system_type) + {isofs_read_super, "iso9660", 1, NULL}); +#endif + +#ifdef CONFIG_SYSV_FS + register_filesystem(&(struct file_system_type) + {sysv_read_super, "xenix", 1, NULL}); + + register_filesystem(&(struct file_system_type) + {sysv_read_super, "sysv", 1, NULL}); + + register_filesystem(&(struct file_system_type) + {sysv_read_super, "coherent", 1, NULL}); +#endif + +#ifdef CONFIG_HPFS_FS + register_filesystem(&(struct file_system_type) + {hpfs_read_super, "hpfs", 1, NULL}); +#endif + + mount_root(); + return 0; +} diff --git a/fs/hpfs/Makefile b/fs/hpfs/Makefile new file mode 100644 index 000000000..94ab74d5d --- /dev/null +++ b/fs/hpfs/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for the linux HPFS filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= hpfs_fs.o + +hpfs.o: $(OBJS) + ln -f hpfs_fs.o hpfs.o + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/hpfs/README b/fs/hpfs/README new file mode 100644 index 000000000..7e4fe88ca --- /dev/null +++ b/fs/hpfs/README @@ -0,0 +1,25 @@ +Linux can read, but not write, OS/2 HPFS partitions. + +Mount options are the same as for msdos partitions. + + uid=nnn All files in the partition will be owned by user id nnn. + gid=nnn All files in the partition will be in group nnn. + umask=nnn The permission mask (see umask(1)) for the partition. + conv=binary Data is returned exactly as is, with CRLF's. [default] + conv=text (Carriage return, line feed) is replaced with newline. + conv=auto Chooses, file by file, conv=binary or conv=text (by guessing) + +There is one mount option unique to HPFS. + + case=lower Convert file names to lower case. [default] + case=asis Return file names as is, in mixed case. + +Case is not significant in filename matching, like real HPFS. + + +Command line example + mkdir -p /os2/c + mount -t hpfs -o uid=100,gid=100 /dev/sda6 /os2/c + +/etc/fstab example + /dev/sdb5 /d/f hpfs ro,uid=402,gid=402,umask=002 diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h new file mode 100644 index 000000000..3121a415d --- /dev/null +++ b/fs/hpfs/hpfs.h @@ -0,0 +1,498 @@ +/* The paper + + Duncan, Roy + Design goals and implementation of the new High Performance File System + Microsoft Systems Journal Sept 1989 v4 n5 p1(13) + + describes what HPFS looked like when it was new, and it is the source + of most of the information given here. The rest is conjecture. + + For definitive information on the Duncan paper, see it, not this file. + For definitive information on HPFS, ask somebody else -- this is guesswork. + There are certain to be many mistakes. */ + +/* Notation */ + +typedef unsigned secno; /* sector number, partition relative */ + +typedef secno dnode_secno; /* sector number of a dnode */ +typedef secno fnode_secno; /* sector number of an fnode */ +typedef secno anode_secno; /* sector number of an anode */ + +/* sector 0 */ + +/* The boot block is very like a FAT boot block, except that the + 29h signature byte is 28h instead, and the ID string is "HPFS". */ + +struct hpfs_boot_block +{ + unsigned char jmp[3]; + unsigned char oem_id[8]; + unsigned char bytes_per_sector[2]; /* 512 */ + unsigned char sectors_per_cluster; + unsigned char n_reserved_sectors[2]; + unsigned char n_fats; + unsigned char n_rootdir_entries[2]; + unsigned char n_sectors_s[2]; + unsigned char media_byte; + unsigned short sectors_per_fat; + unsigned short sectors_per_track; + unsigned short heads_per_cyl; + unsigned int n_hidden_sectors; + unsigned int n_sectors_l; /* size of partition */ + unsigned char drive_number; + unsigned char mbz; + unsigned char sig_28h; /* 28h */ + unsigned char vol_serno[4]; + unsigned char vol_label[11]; + unsigned char sig_hpfs[8]; /* "HPFS " */ + unsigned char pad[448]; + unsigned short magic; /* aa55 */ +}; + + +/* sector 16 */ + +/* The super block has the pointer to the root directory. */ + +#define SB_MAGIC 0xf995e849 + +struct hpfs_super_block +{ + unsigned magic; /* f995 e849 */ + unsigned magic1; /* fa53 e9c5, more magic? */ + unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/ + fnode_secno root; /* fnode of root directory */ + secno n_sectors; /* size of filesystem */ + unsigned n_badblocks; /* number of bad blocks */ + secno bitmaps; /* pointers to free space bit maps */ + unsigned zero1; /* 0 */ + secno badblocks; /* bad block list */ + unsigned zero3; /* 0 */ + time_t last_chkdsk; /* date last checked, 0 if never */ + unsigned zero4; /* 0 */ + secno n_dir_band; /* number of sectors in dir band */ + secno dir_band_start; /* first sector in dir band */ + secno dir_band_end; /* last sector in dir band */ + secno dir_band_bitmap; /* free space map, 1 dnode per bit */ + unsigned zero5[8]; /* 0 */ + secno scratch_dnodes; /* ?? 8 preallocated sectors near dir + band, 4-aligned. */ + unsigned zero6[103]; /* 0 */ +}; + + +/* sector 17 */ + +/* The spare block has pointers to spare sectors. */ + +#define SP_MAGIC 0xf9911849 + +struct hpfs_spare_block +{ + unsigned magic; /* f991 1849 */ + unsigned magic1; /* fa52 29c5, more magic? */ + + unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ + unsigned flag1234: 4; /* unknown flags */ + unsigned fast: 1; /* partition was fast formatted */ + unsigned flag6to31: 26; /* unknown flags */ + + secno hotfix_map; /* info about remapped bad sectors */ + unsigned n_spares_used; /* number of hotfixes */ + unsigned n_spares; /* number of spares in hotfix map */ + unsigned n_dnode_spares_free; /* spare dnodes unused */ + unsigned n_dnode_spares; /* length of spare_dnodes[] list, + follows in this block*/ + secno code_page_dir; /* code page directory block */ + unsigned n_code_pages; /* number of code pages */ + unsigned large_numbers[2]; /* ?? */ + unsigned zero1[15]; + dnode_secno spare_dnodes[20]; /* emergency free dnode list */ + unsigned zero2[81]; /* room for more? */ +}; + +/* The bad block list is 4 sectors long. The first word must be zero, + the remaining words give n_badblocks bad block numbers. + I bet you can see it coming... */ + +#define BAD_MAGIC 0 + +/* The hotfix map is 4 sectors long. It looks like + + secno from[n_spares]; + secno to[n_spares]; + + The to[] list is initialized to point to n_spares preallocated empty + sectors. The from[] list contains the sector numbers of bad blocks + which have been remapped to corresponding sectors in the to[] list. + n_spares_used gives the length of the from[] list. */ + + +/* Sectors 18 and 19 are preallocated and unused. + Maybe they're spares for 16 and 17, but simple substitution fails. */ + + +/* The code page info pointed to by the spare block consists of an index + block and blocks containing character maps. The following is pretty + sketchy, but Linux doesn't use code pages so it doesn't matter. */ + +/* block pointed to by spareblock->code_page_dir */ + +#define CP_DIR_MAGIC 0x494521f7 + +struct code_page_directory +{ + unsigned magic; /* 4945 21f7 */ + unsigned n_code_pages; /* number of pointers following */ + unsigned zero1[2]; + struct { + unsigned short ix; /* index */ + unsigned short code_page_number; /* code page number */ + unsigned bounds; /* matches corresponding word + in data block */ + secno code_page_data; /* sector number of a code_page_data + containing c.p. array */ + unsigned index; /* index in c.p. array in that sector*/ + } array[31]; /* unknown length */ +}; + +/* blocks pointed to by code_page_directory */ + +#define CP_DATA_MAGIC 0x894521f7 + +struct code_page_data +{ + unsigned magic; /* 8945 21f7 */ + unsigned n_used; /* # elements used in c_p_data[] */ + unsigned bounds[3]; /* looks a bit like + (beg1,end1), (beg2,end2) + one byte each */ + unsigned short offs[3]; /* offsets from start of sector + to start of c_p_data[ix] */ + struct { + unsigned short ix; /* index */ + unsigned short code_page_number; /* code page number */ + unsigned short zero1; + unsigned char map[128]; /* map for chars 80..ff */ + unsigned short zero2; + } code_page[3]; + unsigned char incognita[78]; +}; + + +/* Free space bitmaps are 4 sectors long, which is 16384 bits. + 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap. + Bit order in the maps is little-endian. 0 means taken, 1 means free. + + Bit map sectors are marked allocated in the bit maps, and so are sectors + off the end of the partition. + + Band 0 is sectors 0-3fff, its map is in sectors 18-1b. + Band 1 is 4000-7fff, its map is in 7ffc-7fff. + Band 2 is 8000-ffff, its map is in 8000-8003. + The remaining bands have maps in their first (even) or last (odd) 4 sectors + -- if the last, partial, band is odd its map is in its last 4 sectors. + + The bitmap locations are given in a table pointed to by the super block. + No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is + just where they usually are. + + The "directory band" is a bunch of sectors preallocated for dnodes. + It has a 4-sector free space bitmap of its own. Each bit in the map + corresponds to one 4-sector dnode, bit 0 of the map corresponding to + the first 4 sectors of the directory band. The entire band is marked + allocated in the main bitmap. The super block gives the locations + of the directory band and its bitmap. ("band" doesn't mean it is + 8 meg long; it isn't.) */ + + +/* dnode: directory. 4 sectors long */ + +/* A directory is a tree of dnodes. The fnode for a directory + contains one pointer, to the root dnode of the tree. The fnode + never moves, the dnodes do the B-tree thing, splitting and merging + as files are added and removed. */ + +#define DNODE_MAGIC 0x77e40aae + +struct dnode { + unsigned magic; /* 77e4 0aae */ + unsigned first_free; /* offset from start of dnode to + first free dir entry */ + unsigned increment_me; /* some kind of activity counter? + Neither HPFS.IFS nor CHKDSK cares + if you change this word */ + secno up; /* (root dnode) directory's fnode + (nonroot) parent dnode */ + dnode_secno self; /* pointer to this dnode */ + unsigned char dirent[2028]; /* one or more dirents */ +}; + +struct hpfs_dirent { + unsigned short length; /* offset to next dirent */ + unsigned first: 1; /* set on phony ^A^A (".") entry */ + unsigned flag1: 1; + unsigned down: 1; /* down pointer present (after name) */ + unsigned last: 1; /* set on phony \377 entry */ + unsigned flag4: 1; + unsigned flag5: 1; + unsigned flag6: 1; + unsigned has_needea: 1; /* ?? some EA has NEEDEA set + I have no idea why this is + interesting in a dir entry */ + unsigned read_only: 1; /* dos attrib */ + unsigned hidden: 1; /* dos attrib */ + unsigned system: 1; /* dos attrib */ + unsigned flag11: 1; /* would be volume label dos attrib */ + unsigned directory: 1; /* dos attrib */ + unsigned archive: 1; /* dos attrib */ + unsigned not_8x3: 1; /* name is not 8.3 */ + unsigned flag15: 1; + fnode_secno fnode; /* fnode giving allocation info */ + time_t write_date; /* mtime */ + unsigned file_size; /* file length, bytes */ + time_t read_date; /* atime */ + time_t creation_date; /* ctime */ + unsigned ea_size; /* total EA length, bytes */ + unsigned char zero1; + unsigned char locality; /* 0=unk 1=seq 2=random 3=both */ + unsigned char namelen, name[1]; /* file name */ + /* dnode_secno down; btree down pointer, if present, + follows name on next word boundary, or maybe it's + precedes next dirent, which is on a word boundary. */ +}; + +/* The b-tree down pointer from a dir entry */ + +static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) +{ + return *(dnode_secno *) ((void *) de + de->length - 4); +} + +/* The first dir entry in a dnode */ + +static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) +{ + return (void *) dnode->dirent; +} + +/* The end+1 of the dir entries */ + +static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) +{ + return (void *) dnode + dnode->first_free; +} + +/* The dir entry after dir entry de */ + +static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) +{ + return (void *) de + de->length; +} + + +/* B+ tree: allocation info in fnodes and anodes */ + +/* dnodes point to fnodes which are responsible for listing the sectors + assigned to the file. This is done with trees of (length,address) + pairs. (Actually triples, of (length, file-address, disk-address) + which can represent holes. Find out if HPFS does that.) + At any rate, fnodes contain a small tree; if subtrees are needed + they occupy essentially a full block in anodes. A leaf-level tree node + has 3-word entries giving sector runs, a non-leaf node has 2-word + entries giving subtree pointers. A flag in the header says which. */ + +struct bplus_leaf_node +{ + unsigned file_secno; /* first file sector in extent */ + unsigned length; /* length, sectors */ + secno disk_secno; /* first corresponding disk sector */ +}; + +struct bplus_internal_node +{ + unsigned file_secno; /* subtree maps sectors < this */ + anode_secno down; /* pointer to subtree */ +}; + +struct bplus_header +{ + unsigned flag0: 1; + unsigned flag1: 1; + unsigned flag2: 1; + unsigned flag3: 1; + unsigned flag4: 1; + unsigned fnode_parent: 1; /* ? we're pointed to by an fnode, + the data btree or some ea or the + main ea bootage pointer ea_secno */ + /* also can get set in fnodes, which + may be a chkdsk glitch or may mean + this bit is irrelevant in fnodes, + or this interpretation is all wet */ + unsigned flag6: 1; + unsigned internal: 1; /* 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ + unsigned char fill[3]; + unsigned char n_free_nodes; /* free nodes in following array */ + unsigned char n_used_nodes; /* used nodes in following array */ + unsigned short first_free; /* offset from start of header to + first free node in array */ + union { + struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving + subtree pointers */ + struct bplus_leaf_node external[0]; /* (external) 3-word entries giving + sector runs */ + } u; +}; + +/* fnode: root of allocation b+ tree, and EA's */ + +/* Every file and every directory has one fnode, pointed to by the directory + entry and pointing to the file's sectors or directory's root dnode. EA's + are also stored here, and there are said to be ACL's somewhere here too. */ + +#define FNODE_MAGIC 0xf7e40aae + +struct fnode +{ + unsigned magic; /* f7e4 0aae */ + unsigned zero1[2]; + unsigned char len, name[15]; /* true length, truncated name */ + fnode_secno up; /* pointer to file's directory fnode */ + unsigned zero2[3]; + unsigned ea_size_l; /* length of disk-resident ea's */ + secno ea_secno; /* first sector of disk-resident ea's*/ + unsigned short ea_size_s; /* length of fnode-resident ea's */ + + unsigned flag0: 1; + unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ + unsigned flag2: 1; + unsigned flag3: 1; + unsigned flag4: 1; + unsigned flag5: 1; + unsigned flag6: 1; + unsigned flag7: 1; + unsigned dirflag: 1; /* 1 -> directory. first & only extent + points to dnode. */ + unsigned flag9: 1; + unsigned flag10: 1; + unsigned flag11: 1; + unsigned flag12: 1; + unsigned flag13: 1; + unsigned flag14: 1; + unsigned flag15: 1; + + struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ + union { + struct bplus_leaf_node external[8]; + struct bplus_internal_node internal[12]; + } u; + + unsigned file_size; /* file length, bytes */ + unsigned n_needea; /* number of EA's with NEEDEA set */ + unsigned zero4[4]; + unsigned ea_offs; /* offset from start of fnode + to first fnode-resident ea */ + unsigned zero5[2]; + unsigned char ea[316]; /* zero or more EA's, packed together + with no alignment padding. + (Do not use this name, get here + via fnode + ea_offs. I think.) */ +}; + + +/* anode: 99.44% pure allocation tree */ + +#define ANODE_MAGIC 0x37e40aae + +struct anode +{ + unsigned magic; /* 37e4 0aae */ + anode_secno self; /* pointer to this anode */ + secno up; /* parent anode or fnode */ + + struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ + union { + struct bplus_leaf_node external[40]; + struct bplus_internal_node internal[60]; + } u; + + unsigned fill[3]; /* unused */ +}; + + +/* extended attributes. + + A file's EA info is stored as a list of (name,value) pairs. It is + usually in the fnode, but (if it's large) it is moved to a single + sector run outside the fnode, or to multiple runs with an anode tree + that points to them. + + The value of a single EA is stored along with the name, or (if large) + it is moved to a single sector run, or multiple runs pointed to by an + anode tree, pointed to by the value field of the (name,value) pair. + + Flags in the EA tell whether the value is immediate, in a single sector + run, or in multiple runs. Flags in the fnode tell whether the EA list + is immediate, in a single run, or in multiple runs. */ + +struct extended_attribute +{ + unsigned indirect: 1; /* 1 -> value gives sector number + where real value starts */ + unsigned anode: 1; /* 1 -> sector is an anode + that points to fragmented value */ + unsigned flag2: 1; + unsigned flag3: 1; + unsigned flag4: 1; + unsigned flag5: 1; + unsigned flag6: 1; + unsigned needea: 1; /* required ea */ + unsigned char namelen; /* length of name, bytes */ + unsigned short valuelen; /* length of value, bytes */ + /* + unsigned char name[namelen]; ascii attrib name + unsigned char nul; terminating '\0', not counted + unsigned char value[valuelen]; value, arbitrary + if this.indirect, valuelen is 8 and the value is + unsigned length; real length of value, bytes + secno secno; sector address where it starts + if this.anode, the above sector number is the root of an anode tree + which points to the value. + */ +}; + +static inline unsigned char *ea_name (struct extended_attribute *ea) +{ + return (void *) ea + sizeof *ea; +} + +static inline unsigned char *ea_value (struct extended_attribute *ea) +{ + return (void *) ea + sizeof *ea + ea->namelen + 1; +} + +static inline struct extended_attribute * + ea_next_ea (struct extended_attribute *ea) +{ + return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen; +} + +static inline unsigned ea_indirect_length (struct extended_attribute *ea) +{ + unsigned *v = (void *) ea_value (ea); + return v[0]; +} + +static inline secno ea_indirect_secno (struct extended_attribute *ea) +{ + unsigned *v = (void *) ea_value (ea); + return v[1]; +} + +/* + Local Variables: + comment-column: 40 + End: +*/ diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c new file mode 100644 index 000000000..c05cf56ab --- /dev/null +++ b/fs/hpfs/hpfs_fs.c @@ -0,0 +1,1727 @@ +/* + * linux/fs/hpfs/hpfs_fs.c + * read-only HPFS + * version 1.0 + * + * Chris Smith 1993 + * + * Sources & references: + * Duncan, _Design ... of HPFS_, MSJ 4(5) (C) 1989 Microsoft Corp + * linux/fs/minix Copyright (C) 1991, 1992, 1993 Linus Torvalds + * linux/fs/msdos Written 1992, 1993 by Werner Almesberger + * linux/fs/isofs Copyright (C) 1991 Eric Youngdale + */ + +#include <linux/fs.h> +#include <linux/hpfs_fs.h> +#include <linux/errno.h> +#include <linux/malloc.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/locks.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <asm/bitops.h> +#include <asm/segment.h> + +#include "hpfs.h" + +/* + * HPFS is a mixture of 512-byte blocks and 2048-byte blocks. The 2k blocks + * are used for directories and bitmaps. For bmap to work, we must run the + * file system with 512-byte blocks. The 2k blocks are assembled in buffers + * obtained from kmalloc. + * + * For a file's i-number we use the sector number of its fnode, coded. + * (Directory ino's are even, file ino's are odd, and ino >> 1 is the + * sector address of the fnode. This is a hack to allow lookup() to + * tell read_inode() whether it is necessary to read the fnode.) + * + * The map_xxx routines all read something into a buffer and return a + * pointer somewhere in the buffer. The caller must do the brelse. + * The other routines are balanced. + * + * For details on the data structures see hpfs.h and the Duncan paper. + * + * Overview + * + * [ The names of these data structures, except fnode, are not Microsoft's + * or IBM's. I don't know what names they use. The semantics described + * here are those of this implementation, and any coincidence between it + * and real HPFS is to be hoped for but not guaranteed by me, and + * certainly not guaranteed by MS or IBM. Who know nothing about this. ] + * + * [ Also, the following will make little sense if you haven't read the + * Duncan paper, which is excellent. ] + * + * HPFS is a tree. There are 3 kinds of nodes. A directory is a tree + * of dnodes, and a file's allocation info is a tree of sector runs + * stored in fnodes and anodes. + * + * The top pointer is in the super block, it points to the fnode of the + * root directory. + * + * The root directory -- all directories -- gives file names, dates &c, + * and fnode addresses. If the directory fits in one dnode, that's it, + * otherwise the top dnode points to other dnodes, forming a tree. A + * dnode tree (one directory) might look like + * + * ((a b c) d (e f g) h (i j) k l (m n o p)) + * + * The subtrees appear between the files. Each dir entry contains, along + * with the name and fnode, a dnode pointer to the subtree that precedes it + * (if there is one; a flag tells that). The first entry in every directory + * is ^A^A, the "." entry for the directory itself. The last entry in every + * dnode is \377, a fake entry whose only valid fields are the bit marking + * it last and the down pointer to the subtree preceding it, if any. + * + * The "value" field of directory entries is an fnode address. The fnode + * tells where the sectors of the file are. The fnode for a subdirectory + * contains one pointer, to the root dnode of the subdirectory. The fnode + * for a data file contains, in effect, a tiny anode. (Most of the space + * in fnodes is for extended attributes.) + * + * anodes and the anode part of fnodes are trees of extents. An extent + * is a (length, disk address) pair, labeled with the file address being + * mapped. E.g., + * + * (0: 3@1000 3: 1@2000 4: 2@10) + * + * means the file:disk sector map (0:1000 1:1001 2:1002 3:2000 4:10 5:11). + * + * There is space for 8 file:len@disk triples in an fnode, or for 40 in an + * anode. If this is insufficient, subtrees are used, as in + * + * (6: (0: 3@1000 3: 1@2000 4: 2@10) 12: (6: 3@8000 9: 1@9000 10: 2@20)) + * + * The label on a subtree is the first address *after* that tree. The + * subtrees are always anodes. The label:subtree pairs require only + * two words each, so non-leaf subtrees have a different format; there + * is room for 12 label:subtree pairs in an fnode, or 60 in an anode. + * + * Within a directory, each dnode contains a pointer up to its parent + * dnode. The root dnode points up to the directory's fnode. + * + * Each fnode contains a pointer to the directory that contains it + * (to the fnode of the directory). So this pointer in a directory + * fnode is "..". + * + * On the disk, dnodes are all together in the center of the partition, + * and HPFS even manages to put all the dnodes for a single directory + * together, generally. fnodes are out with the data. anodes are seldom + * seen -- in fact noncontiguous files are seldom seen. I think this is + * partly the open() call that lets programs specify the length of an + * output file when they know it, and partly because HPFS.IFS really is + * very good at resisting fragmentation. + */ + +/* notation */ + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +#define little_ushort(x) (*(unsigned short *) &(x)) +typedef void nonconst; + +/* super block ops */ + +static void hpfs_read_inode(struct inode *); +static void hpfs_put_super(struct super_block *); +static void hpfs_statfs(struct super_block *, struct statfs *); +static int hpfs_remount_fs(struct super_block *, int *, char *); + +static const struct super_operations hpfs_sops = +{ + hpfs_read_inode, /* read_inode */ + NULL, /* notify_change */ + NULL, /* write_inode */ + NULL, /* put_inode */ + hpfs_put_super, /* put_super */ + NULL, /* write_super */ + hpfs_statfs, /* statfs */ + hpfs_remount_fs, /* remount_fs */ +}; + +/* file ops */ + +static int hpfs_file_read(struct inode *, struct file *, char *, int); +static secno hpfs_bmap(struct inode *, unsigned); + +static const struct file_operations hpfs_file_ops = +{ + NULL, /* lseek - default */ + hpfs_file_read, /* read */ + NULL, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + file_fsync, /* fsync */ +}; + +static const struct inode_operations hpfs_file_iops = +{ + (nonconst *) & hpfs_file_ops, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + (int (*)(struct inode *, int)) + &hpfs_bmap, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ +}; + +/* directory ops */ + +static int hpfs_dir_read(struct inode *inode, struct file *filp, + char *buf, int count); +static int hpfs_readdir(struct inode *inode, struct file *filp, + struct dirent *dirent, int count); +static int hpfs_lookup(struct inode *, const char *, int, struct inode **); + +static const struct file_operations hpfs_dir_ops = +{ + NULL, /* lseek - default */ + hpfs_dir_read, /* read */ + NULL, /* write - bad */ + hpfs_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync, /* fsync */ +}; + +static const struct inode_operations hpfs_dir_iops = +{ + (nonconst *) & hpfs_dir_ops, /* default directory file ops */ + NULL, /* create */ + hpfs_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ +}; + +/* Four 512-byte buffers and the 2k block obtained by concatenating them */ + +struct quad_buffer_head { + struct buffer_head *bh[4]; + void *data; +}; + +/* forwards */ + +static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, + int *lowercase, int *conv); +static int check_warn(int not_ok, + const char *p1, const char *p2, const char *p3); +static int zerop(void *addr, unsigned len); +static void count_dnodes(struct inode *inode, dnode_secno dno, + unsigned *n_dnodes, unsigned *n_subdirs); +static unsigned count_bitmap(struct super_block *s); +static unsigned count_one_bitmap(dev_t dev, secno secno); +static secno bplus_lookup(struct inode *inode, struct bplus_header *b, + secno file_secno, struct buffer_head **bhp); +static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, + const unsigned char *name, unsigned len, + struct quad_buffer_head *qbh); +static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, + struct quad_buffer_head *qbh); +static void write_one_dirent(struct dirent *dirent, const unsigned char *name, + unsigned namelen, ino_t ino, int lowercase); +static dnode_secno dir_subdno(struct inode *inode, unsigned pos); +static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno, + int n, + struct quad_buffer_head *qbh); +static unsigned choose_conv(unsigned char *p, unsigned len); +static unsigned convcpy_tofs(unsigned char *out, unsigned char *in, + unsigned len); +static dnode_secno fnode_dno(dev_t dev, ino_t ino); +static struct fnode *map_fnode(dev_t dev, ino_t ino, + struct buffer_head **bhp); +static struct anode *map_anode(dev_t dev, unsigned secno, + struct buffer_head **bhp); +static struct dnode *map_dnode(dev_t dev, unsigned secno, + struct quad_buffer_head *qbh); +static void *map_sector(dev_t dev, unsigned secno, struct buffer_head **bhp); +static void *map_4sectors(dev_t dev, unsigned secno, + struct quad_buffer_head *qbh); +static void brelse4(struct quad_buffer_head *qbh); + +/* + * make inode number for a file + */ + +static inline ino_t file_ino(fnode_secno secno) +{ + return secno << 1 | 1; +} + +/* + * make inode number for a directory + */ + +static inline ino_t dir_ino(fnode_secno secno) +{ + return secno << 1; +} + +/* + * get fnode address from an inode number + */ + +static inline fnode_secno ino_secno(ino_t ino) +{ + return ino >> 1; +} + +/* + * test for directory's inode number + */ + +static inline int ino_is_dir(ino_t ino) +{ + return (ino & 1) == 0; +} + +/* + * conv= options + */ + +#define CONV_BINARY 0 /* no conversion */ +#define CONV_TEXT 1 /* crlf->newline */ +#define CONV_AUTO 2 /* decide based on file contents */ + +/* + * local time (HPFS) to GMT (Unix) + */ + +static inline time_t local_to_gmt(time_t t) +{ + extern struct timezone sys_tz; + return t + sys_tz.tz_minuteswest * 60; +} + +/* super block ops */ + +/* + * mount. This gets one thing, the root directory inode. It does a + * bunch of guessed-at consistency checks. + */ + +struct super_block *hpfs_read_super(struct super_block *s, + void *options, int silent) +{ + struct hpfs_boot_block *bootblock; + struct hpfs_super_block *superblock; + struct hpfs_spare_block *spareblock; + struct hpfs_dirent *de; + struct buffer_head *bh0, *bh1, *bh2; + struct quad_buffer_head qbh; + dnode_secno root_dno; + dev_t dev; + uid_t uid; + gid_t gid; + umode_t umask; + int lowercase; + int conv; + int dubious; + + /* + * Get the mount options + */ + + if (!parse_opts(options, &uid, &gid, &umask, &lowercase, &conv)) { + printk("HPFS: syntax error in mount options. Not mounted.\n"); + s->s_dev = 0; + return 0; + } + + /* + * Fill in the super block struct + */ + + lock_super(s); + dev = s->s_dev; + set_blocksize(dev, 512); + + /* + * fetch sectors 0, 16, 17 + */ + + bootblock = map_sector(dev, 0, &bh0); + if (!bootblock) + goto bail; + + superblock = map_sector(dev, 16, &bh1); + if (!superblock) + goto bail0; + + spareblock = map_sector(dev, 17, &bh2); + if (!spareblock) + goto bail1; + + /* + * Check that this fs looks enough like a known one that we can find + * and read the root directory. + */ + + if (bootblock->magic != 0xaa55 + || superblock->magic != SB_MAGIC + || spareblock->magic != SP_MAGIC + || bootblock->sig_28h != 0x28 + || memcmp(&bootblock->sig_hpfs, "HPFS ", 8) + || little_ushort(bootblock->bytes_per_sector) != 512) { + printk("HPFS: hpfs_read_super: Not HPFS\n"); + goto bail2; + } + + /* + * Check for inconsistencies -- possibly wrong guesses here, possibly + * filesystem problems. + */ + + dubious = 0; + + dubious |= check_warn(spareblock->dirty != 0, + "`Improperly stopped'", "flag is set", "run CHKDSK"); + dubious |= check_warn(spareblock->n_spares_used != 0, + "Spare blocks", "may be in use", "run CHKDSK"); + + /* + * Above errors mean we could get wrong answers if we proceed, + * so don't + */ + + if (dubious) + goto bail2; + + dubious |= check_warn((spareblock->n_dnode_spares != + spareblock->n_dnode_spares_free), + "Spare dnodes", "may be in use", "run CHKDSK"); + dubious |= check_warn(superblock->zero1 != 0, + "#1", "unknown word nonzero", "investigate"); + dubious |= check_warn(superblock->zero3 != 0, + "#3", "unknown word nonzero", "investigate"); + dubious |= check_warn(superblock->zero4 != 0, + "#4", "unknown word nonzero", "investigate"); + dubious |= check_warn(!zerop(superblock->zero5, + sizeof superblock->zero5), + "#5", "unknown word nonzero", "investigate"); + dubious |= check_warn(!zerop(superblock->zero6, + sizeof superblock->zero6), + "#6", "unknown word nonzero", "investigate"); + + if (dubious) + printk("HPFS: Proceeding, but operation may be unreliable\n"); + + /* + * set fs read only + */ + + s->s_flags |= MS_RDONLY; + + /* + * fill in standard stuff + */ + + s->s_magic = HPFS_SUPER_MAGIC; + s->s_blocksize = 512; + s->s_blocksize_bits = 9; + s->s_op = (struct super_operations *) &hpfs_sops; + + /* + * fill in hpfs stuff + */ + + s->s_hpfs_root = dir_ino(superblock->root); + s->s_hpfs_fs_size = superblock->n_sectors; + s->s_hpfs_dirband_size = superblock->n_dir_band / 4; + s->s_hpfs_dmap = superblock->dir_band_bitmap; + s->s_hpfs_bitmaps = superblock->bitmaps; + s->s_hpfs_uid = uid; + s->s_hpfs_gid = gid; + s->s_hpfs_mode = 0777 & ~umask; + s->s_hpfs_n_free = -1; + s->s_hpfs_n_free_dnodes = -1; + s->s_hpfs_lowercase = lowercase; + s->s_hpfs_conv = conv; + + /* + * done with the low blocks + */ + + brelse(bh2); + brelse(bh1); + brelse(bh0); + + /* + * all set. try it out. + */ + + s->s_mounted = iget(s, s->s_hpfs_root); + unlock_super(s); + + if (!s->s_mounted) { + printk("HPFS: hpfs_read_super: inode get failed\n"); + s->s_dev = 0; + return 0; + } + + /* + * find the root directory's . pointer & finish filling in the inode + */ + + root_dno = fnode_dno(dev, s->s_hpfs_root); + if (root_dno) + de = map_dirent(s->s_mounted, root_dno, "\001\001", 2, &qbh); + if (!root_dno || !de) { + printk("HPFS: " + "hpfs_read_super: root dir isn't in the root dir\n"); + s->s_dev = 0; + return 0; + } + + s->s_mounted->i_atime = local_to_gmt(de->read_date); + s->s_mounted->i_mtime = local_to_gmt(de->write_date); + s->s_mounted->i_ctime = local_to_gmt(de->creation_date); + + brelse4(&qbh); + return s; + + bail2: + brelse(bh2); + bail1: + brelse(bh1); + bail0: + brelse(bh0); + bail: + s->s_dev = 0; + unlock_super(s); + return 0; +} + +static int check_warn(int not_ok, + const char *p1, const char *p2, const char *p3) +{ + if (not_ok) + printk("HPFS: %s %s. Please %s\n", p1, p2, p3); + return not_ok; +} + +static int zerop(void *addr, unsigned len) +{ + unsigned char *p = addr; + return p[0] == 0 && memcmp(p, p + 1, len - 1) == 0; +} + +/* + * A tiny parser for option strings, stolen from dosfs. + */ + +static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, + int *lowercase, int *conv) +{ + char *p, *rhs; + + *uid = current->uid; + *gid = current->gid; + *umask = current->fs->umask; + *lowercase = 1; + *conv = CONV_BINARY; + + if (!opts) + return 1; + + for (p = strtok(opts, ","); p != 0; p = strtok(0, ",")) { + if ((rhs = strchr(p, '=')) != 0) + *rhs++ = '\0'; + if (!strcmp(p, "uid")) { + if (!rhs || !*rhs) + return 0; + *uid = simple_strtoul(rhs, &rhs, 0); + if (*rhs) + return 0; + } + else if (!strcmp(p, "gid")) { + if (!rhs || !*rhs) + return 0; + *gid = simple_strtoul(rhs, &rhs, 0); + if (*rhs) + return 0; + } + else if (!strcmp(p, "umask")) { + if (!rhs || !*rhs) + return 0; + *umask = simple_strtoul(rhs, &rhs, 8); + if (*rhs) + return 0; + } + else if (!strcmp(p, "case")) { + if (!strcmp(rhs, "lower")) + *lowercase = 1; + else if (!strcmp(rhs, "asis")) + *lowercase = 0; + else + return 0; + } + else if (!strcmp(p, "conv")) { + if (!strcmp(rhs, "binary")) + *conv = CONV_BINARY; + else if (!strcmp(rhs, "text")) + *conv = CONV_TEXT; + else if (!strcmp(rhs, "auto")) + *conv = CONV_AUTO; + else + return 0; + } + else + return 0; + } + + return 1; +} + +/* + * read_inode. This is called with exclusive access to a new inode that + * has only (i_dev,i_ino) set. It is responsible for filling in the rest. + * We leave the dates blank, to be filled in from the dir entry. + * + * NOTE that there must be no sleeping from the return in this routine + * until lookup() finishes filling in the inode, otherwise the partly + * completed inode would be visible during the sleep. + * + * It is done in this strange and sinful way because the alternative + * is to read the fnode, find the dir pointer in it, read that fnode + * to get the dnode pointer, search through that whole directory for + * the ino we're reading, and get the dates. It works that way, but + * ls sounds like fsck. + */ + +static void hpfs_read_inode(struct inode *inode) +{ + struct super_block *s = inode->i_sb; + + /* be ready to bail out */ + + inode->i_op = 0; + inode->i_mode = 0; + + if (inode->i_ino == 0 + || ino_secno(inode->i_ino) >= inode->i_sb->s_hpfs_fs_size) { + printk("HPFS: read_inode: bad ino\n"); + return; + } + + /* + * canned stuff + */ + + inode->i_uid = s->s_hpfs_uid; + inode->i_gid = s->s_hpfs_gid; + inode->i_mode = s->s_hpfs_mode; + inode->i_hpfs_conv = s->s_hpfs_conv; + + inode->i_hpfs_dno = 0; + inode->i_hpfs_n_secs = 0; + inode->i_hpfs_file_sec = 0; + inode->i_hpfs_disk_sec = 0; + inode->i_hpfs_dpos = 0; + inode->i_hpfs_dsubdno = 0; + + /* + * figure out whether we are looking at a directory or a file + */ + + if (ino_is_dir(inode->i_ino)) + inode->i_mode |= S_IFDIR; + else { + inode->i_mode |= S_IFREG; + inode->i_mode &= ~0111; + } + + /* + * these fields must be filled in from the dir entry, which we don't + * have but lookup does. It will fill them in before letting the + * inode out of its grasp. + */ + + inode->i_atime = 0; + inode->i_mtime = 0; + inode->i_ctime = 0; + inode->i_size = 0; + + /* + * fill in the rest + */ + + if (S_ISREG(inode->i_mode)) { + + inode->i_op = (struct inode_operations *) &hpfs_file_iops; + inode->i_nlink = 1; + inode->i_blksize = 512; + + } + else { + unsigned n_dnodes, n_subdirs; + struct buffer_head *bh0; + struct fnode *fnode = map_fnode(inode->i_dev, + inode->i_ino, &bh0); + + if (!fnode) { + printk("HPFS: read_inode: no fnode\n"); + inode->i_mode = 0; + return; + } + + inode->i_hpfs_parent_dir = dir_ino(fnode->up); + inode->i_hpfs_dno = fnode->u.external[0].disk_secno; + + brelse(bh0); + + n_dnodes = n_subdirs = 0; + count_dnodes(inode, inode->i_hpfs_dno, &n_dnodes, &n_subdirs); + + inode->i_op = (struct inode_operations *) &hpfs_dir_iops; + inode->i_blksize = 512; /* 2048 here confuses ls & du & ... */ + inode->i_blocks = 4 * n_dnodes; + inode->i_size = 512 * inode->i_blocks; + inode->i_nlink = 2 + n_subdirs; + } +} + +/* + * unmount. + */ + +static void hpfs_put_super(struct super_block *s) +{ + lock_super(s); + s->s_dev = 0; + unlock_super(s); +} + +/* + * statfs. For free inode counts we report the count of dnodes in the + * directory band -- not exactly right but pretty analogous. + */ + +static void hpfs_statfs(struct super_block *s, struct statfs *buf) +{ + /* + * count the bits in the bitmaps, unless we already have + */ + + if (s->s_hpfs_n_free == -1) { + s->s_hpfs_n_free = count_bitmap(s); + s->s_hpfs_n_free_dnodes = + count_one_bitmap(s->s_dev, s->s_hpfs_dmap); + } + + /* + * fill in the user statfs struct + */ + + put_fs_long(s->s_magic, &buf->f_type); + put_fs_long(512, &buf->f_bsize); + put_fs_long(s->s_hpfs_fs_size, &buf->f_blocks); + put_fs_long(s->s_hpfs_n_free, &buf->f_bfree); + put_fs_long(s->s_hpfs_n_free, &buf->f_bavail); + put_fs_long(s->s_hpfs_dirband_size, &buf->f_files); + put_fs_long(s->s_hpfs_n_free_dnodes, &buf->f_ffree); + put_fs_long(254, &buf->f_namelen); +} + +/* + * remount. Don't let read only be turned off. + */ + +static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) +{ + if (!(*flags & MS_RDONLY)) + return -EINVAL; + return 0; +} + +/* + * count the dnodes in a directory, and the subdirs. + */ + +static void count_dnodes(struct inode *inode, dnode_secno dno, + unsigned *n_dnodes, unsigned *n_subdirs) +{ + struct quad_buffer_head qbh; + struct dnode *dnode; + struct hpfs_dirent *de; + struct hpfs_dirent *de_end; + + dnode = map_dnode(inode->i_dev, dno, &qbh); + if (!dnode) + return; + de = dnode_first_de(dnode); + de_end = dnode_end_de(dnode); + + (*n_dnodes)++; + + for (; de < de_end; de = de_next_de(de)) { + if (de->down) + count_dnodes(inode, de_down_pointer(de), + n_dnodes, n_subdirs); + if (de->directory && !de->first) + (*n_subdirs)++; + if (de->last || de->length == 0) + break; + } + + brelse4(&qbh); +} + +/* + * count the bits in the free space bit maps + */ + +static unsigned count_bitmap(struct super_block *s) +{ + unsigned n, count, n_bands; + secno *bitmaps; + struct quad_buffer_head qbh; + + /* + * there is one bit map for each 16384 sectors + */ + n_bands = (s->s_hpfs_fs_size + 0x3fff) >> 14; + + /* + * their locations are given in an array pointed to by the super + * block + */ + bitmaps = map_4sectors(s->s_dev, s->s_hpfs_bitmaps, &qbh); + if (!bitmaps) + return 0; + + count = 0; + + /* + * map each one and count the free sectors + */ + for (n = 0; n < n_bands; n++) + if (bitmaps[n] == 0) + printk("HPFS: bit map pointer missing\n"); + else + count += count_one_bitmap(s->s_dev, bitmaps[n]); + + brelse4(&qbh); + return count; +} + +/* + * Read in one bit map, count the bits, return the count. + */ + +static unsigned count_one_bitmap(dev_t dev, secno secno) +{ + struct quad_buffer_head qbh; + char *bits; + unsigned i, count; + + bits = map_4sectors(dev, secno, &qbh); + if (!bits) + return 0; + + count = 0; + + for (i = 0; i < 8 * 2048; i++) + count += (test_bit(i, bits) != 0); + brelse4(&qbh); + + return count; +} + +/* file ops */ + +/* + * read. Read the bytes, put them in buf, return the count. + */ + +static int hpfs_file_read(struct inode *inode, struct file *filp, + char *buf, int count) +{ + unsigned q, r, n, n0; + struct buffer_head *bh; + char *block; + char *start; + + if (inode == 0 || !S_ISREG(inode->i_mode)) + return -EINVAL; + + /* + * truncate count at EOF + */ + if (count > inode->i_size - (off_t) filp->f_pos) + count = inode->i_size - filp->f_pos; + + start = buf; + while (count > 0) { + /* + * get file sector number, offset in sector, length to end of + * sector + */ + q = filp->f_pos >> 9; + r = filp->f_pos & 511; + n = 512 - r; + + /* + * get length to copy to user buffer + */ + if (n > count) + n = count; + + /* + * read the sector, copy to user + */ + block = map_sector(inode->i_dev, hpfs_bmap(inode, q), &bh); + if (!block) + return -EIO; + + /* + * but first decide if it has \r\n, if the mount option said + * to do that + */ + if (inode->i_hpfs_conv == CONV_AUTO) + inode->i_hpfs_conv = choose_conv(block + r, n); + + if (inode->i_hpfs_conv == CONV_BINARY) { + /* + * regular copy, output length is same as input + * length + */ + memcpy_tofs(buf, block + r, n); + n0 = n; + } + else { + /* + * squeeze out \r, output length varies + */ + n0 = convcpy_tofs(buf, block + r, n); + if (count > inode->i_size - (off_t) filp->f_pos - n + n0) + count = inode->i_size - filp->f_pos - n + n0; + } + + brelse(bh); + + /* + * advance input n bytes, output n0 bytes + */ + filp->f_pos += n; + buf += n0; + count -= n0; + } + + return buf - start; +} + +/* + * This routine implements conv=auto. Return CONV_BINARY or CONV_TEXT. + */ + +static unsigned choose_conv(unsigned char *p, unsigned len) +{ + unsigned tvote, bvote; + unsigned c; + + tvote = bvote = 0; + + while (len--) { + c = *p++; + if (c < ' ') + if (c == '\r' && len && *p == '\n') + tvote += 10; + else if (c == '\t' || c == '\n'); + else + bvote += 5; + else if (c < '\177') + tvote++; + else + bvote += 5; + } + + if (tvote > bvote) + return CONV_TEXT; + else + return CONV_BINARY; +} + +/* + * This routine implements conv=text. :s/crlf/nl/ + */ + +static unsigned convcpy_tofs(unsigned char *out, unsigned char *in, + unsigned len) +{ + unsigned char *start = out; + + while (len--) { + unsigned c = *in++; + if (c == '\r' && (len == 0 || *in == '\n')); + else + put_fs_byte(c, out++); + } + + return out - start; +} + +/* + * Return the disk sector number containing a file sector. + */ + +static secno hpfs_bmap(struct inode *inode, unsigned file_secno) +{ + unsigned n, disk_secno; + struct fnode *fnode; + struct buffer_head *bh; + + /* + * There is one sector run cached in the inode. See if the sector is + * in it. + */ + + n = file_secno - inode->i_hpfs_file_sec; + if (n < inode->i_hpfs_n_secs) + return inode->i_hpfs_disk_sec + n; + + /* + * No, read the fnode and go find the sector. + */ + + else { + fnode = map_fnode(inode->i_dev, inode->i_ino, &bh); + if (!fnode) + return 0; + disk_secno = bplus_lookup(inode, &fnode->btree, + file_secno, &bh); + brelse(bh); + return disk_secno; + } +} + +/* + * Search allocation tree *b for the given file sector number and return + * the disk sector number. Buffer *bhp has the tree in it, and can be + * reused for subtrees when access to *b is no longer needed. + * *bhp is busy on entry and exit. + */ + +static secno bplus_lookup(struct inode *inode, struct bplus_header *b, + secno file_secno, struct buffer_head **bhp) +{ + int i; + + /* + * A leaf-level tree gives a list of sector runs. Find the one + * containing the file sector we want, cache the map info in the + * inode for later, and return the corresponding disk sector. + */ + + if (!b->internal) { + struct bplus_leaf_node *n = b->u.external; + for (i = 0; i < b->n_used_nodes; i++) { + unsigned t = file_secno - n[i].file_secno; + if (t < n[i].length) { + inode->i_hpfs_file_sec = n[i].file_secno; + inode->i_hpfs_disk_sec = n[i].disk_secno; + inode->i_hpfs_n_secs = n[i].length; + return n[i].disk_secno + t; + } + } + } + + /* + * A non-leaf tree gives a list of subtrees. Find the one containing + * the file sector we want, read it in, and recurse to search it. + */ + + else { + struct bplus_internal_node *n = b->u.internal; + for (i = 0; i < b->n_used_nodes; i++) { + if (file_secno < n[i].file_secno) { + struct anode *anode; + anode_secno ano = n[i].down; + brelse(*bhp); + anode = map_anode(inode->i_dev, ano, bhp); + if (!anode) + break; + return bplus_lookup(inode, &anode->btree, + file_secno, bhp); + } + } + } + + /* + * If we get here there was a hole in the file. As far as I know we + * never do get here, but falling off the end would be indelicate. So + * return a pointer to a handy all-zero sector. This is not a + * reasonable way to handle files with holes if they really do + * happen. + */ + + printk("HPFS: bplus_lookup: sector not found\n"); + return 15; +} + +/* directory ops */ + +/* + * lookup. Search the specified directory for the specified name, set + * *result to the corresponding inode. + * + * lookup uses the inode number to tell read_inode whether it is reading + * the inode of a directory or a file -- file ino's are odd, directory + * ino's are even. read_inode avoids i/o for file inodes; everything + * needed is up here in the directory. (And file fnodes are out in + * the boondocks.) + */ + +static int hpfs_lookup(struct inode *dir, const char *name, int len, + struct inode **result) +{ + struct quad_buffer_head qbh; + struct hpfs_dirent *de; + struct inode *inode; + ino_t ino; + + /* In case of madness */ + + *result = 0; + if (dir == 0) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) + goto bail; + + /* + * Read in the directory entry. "." is there under the name ^A^A . + * Always read the dir even for . and .. in case we need the dates. + */ + + if (name[0] == '.' && len == 1) + de = map_dirent(dir, dir->i_hpfs_dno, "\001\001", 2, &qbh); + else if (name[0] == '.' && name[1] == '.' && len == 2) + de = map_dirent(dir, + fnode_dno(dir->i_dev, dir->i_hpfs_parent_dir), + "\001\001", 2, &qbh); + else + de = map_dirent(dir, dir->i_hpfs_dno, name, len, &qbh); + + /* + * This is not really a bailout, just means file not found. + */ + + if (!de) + goto bail; + + /* + * Get inode number, what we're after. + */ + + if (de->directory) + ino = dir_ino(de->fnode); + else + ino = file_ino(de->fnode); + + /* + * Go find or make an inode. + */ + + if (!(inode = iget(dir->i_sb, ino))) + goto bail1; + + /* + * Fill in the info from the directory if this is a newly created + * inode. + */ + + if (!inode->i_atime) { + inode->i_atime = local_to_gmt(de->read_date); + inode->i_mtime = local_to_gmt(de->write_date); + inode->i_ctime = local_to_gmt(de->creation_date); + if (de->read_only) + inode->i_mode &= ~0222; + if (!de->directory) { + inode->i_size = de->file_size; + /* + * i_blocks should count the fnode and any anodes. + * We count 1 for the fnode and don't bother about + * anodes -- the disk heads are on the directory band + * and we want them to stay there. + */ + inode->i_blocks = 1 + ((inode->i_size + 511) >> 9); + } + } + + brelse4(&qbh); + + /* + * Made it. + */ + + *result = inode; + iput(dir); + return 0; + + /* + * Didn't. + */ + bail1: + brelse4(&qbh); + bail: + iput(dir); + return -ENOENT; +} + +/* + * Compare two counted strings ignoring case. + * HPFS directory order sorts letters as if they're upper case. + */ + +static inline int memcasecmp(const unsigned char *s1, const unsigned char *s2, + unsigned n) +{ + int t; + + if (n != 0) + do { + unsigned c1 = *s1++; + unsigned c2 = *s2++; + if (c1 - 'a' < 26) + c1 -= 040; + if (c2 - 'a' < 26) + c2 -= 040; + if ((t = c1 - c2) != 0) + return t; + } while (--n != 0); + + return 0; +} + +/* + * Search a directory for the given name, return a pointer to its dir entry + * and a pointer to the buffer containing it. + */ + +static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, + const unsigned char *name, unsigned len, + struct quad_buffer_head *qbh) +{ + struct dnode *dnode; + struct hpfs_dirent *de; + struct hpfs_dirent *de_end; + int t, l; + + /* + * read the dnode at the root of our subtree + */ + dnode = map_dnode(inode->i_dev, dno, qbh); + if (!dnode) + return 0; + + /* + * get pointers to start and end+1 of dir entries + */ + de = dnode_first_de(dnode); + de_end = dnode_end_de(dnode); + + /* + * look through the entries for the name we're after + */ + for ( ; de < de_end; de = de_next_de(de)) { + + /* + * compare names + */ + l = len < de->namelen ? len : de->namelen; + t = memcasecmp(name, de->name, l); + + /* + * initial substring matches, compare lengths + */ + if (t == 0) { + t = len - de->namelen; + /* bingo */ + if (t == 0) + return de; + } + + /* + * wanted name .lt. dir name => not present. + */ + if (t < 0) { + /* + * if there is a subtree, search it. + */ + if (de->down) { + dnode_secno sub_dno = de_down_pointer(de); + brelse4(qbh); + return map_dirent(inode, sub_dno, + name, len, qbh); + } + else + break; + } + + /* + * de->last is set on the last name in the dnode (it's always + * a "\377" pseudo entry). de->length == 0 means we're about + * to infinite loop. This test does nothing in a well-formed + * dnode. + */ + if (de->last || de->length == 0) + break; + } + + /* + * name not found. + */ + + return 0; +} + +/* + * readdir. Return exactly 1 dirent. (I tried and tried, but currently + * the interface with libc just does not permit more than 1. If it gets + * fixed, throw this out and just walk the tree and write records into + * the user buffer.) + * + * We keep track of our position in the dnode tree with a sort of + * dewey-decimal record of subtree locations. Like so: + * + * (1 (1.1 1.2 1.3) 2 3 (3.1 (3.1.1 3.1.2) 3.2 3.3 (3.3.1)) 4) + * + * Subtrees appear after their file, out of lexical order, + * which would be before their file. It's easier. + * + * A directory can't hold more than 56 files, so 6 bits are used for + * position numbers. If the tree is so deep that the position encoding + * doesn't fit, I'm sure something absolutely fascinating happens. + * + * The actual sequence of f_pos values is + * 0 => . -1 => .. 1 1.1 ... 8.9 9 => files -2 => eof + * + * The directory inode caches one position-to-dnode correspondence so + * we won't have to repeatedly scan the top levels of the tree. + */ + +static int hpfs_readdir(struct inode *inode, struct file *filp, + struct dirent *dirent, int likely_story) +{ + struct quad_buffer_head qbh; + struct hpfs_dirent *de; + int namelen, lc; + ino_t ino; + + if (inode == 0 + || inode->i_sb == 0 + || !S_ISDIR(inode->i_mode)) + return -EBADF; + + lc = inode->i_sb->s_hpfs_lowercase; + + switch ((off_t) filp->f_pos) { + case 0: + write_one_dirent(dirent, ".", 1, inode->i_ino, lc); + filp->f_pos = -1; + return ROUND_UP(NAME_OFFSET(dirent) + 2); + + case -1: + write_one_dirent(dirent, "..", 2, + inode->i_hpfs_parent_dir, lc); + filp->f_pos = 1; + return ROUND_UP(NAME_OFFSET(dirent) + 3); + + case -2: + return 0; + + default: + de = map_pos_dirent(inode, &filp->f_pos, &qbh); + if (!de) { + filp->f_pos = -2; + return 0; + } + + namelen = de->namelen; + if (de->directory) + ino = dir_ino(de->fnode); + else + ino = file_ino(de->fnode); + write_one_dirent(dirent, de->name, namelen, ino, lc); + brelse4(&qbh); + + return ROUND_UP(NAME_OFFSET(dirent) + namelen + 1); + } +} + +/* + * Send the given name and ino off to the user dirent struct at *dirent. + * Blam it to lowercase if the mount option said to. + * + * Note that Linux d_reclen is the length of the file name, and has nothing + * to do with the length of the dirent record. + */ + +static void write_one_dirent(struct dirent *dirent, const unsigned char *name, + unsigned namelen, ino_t ino, int lowercase) +{ + unsigned n; + + put_fs_long(ino, &dirent->d_ino); + put_fs_word(namelen, &dirent->d_reclen); + + if (lowercase) + for (n = namelen; n != 0;) { + unsigned t = name[--n]; + if (t - 'A' < 26) + t += 040; + put_fs_byte(t, &dirent->d_name[n]); + } + else + memcpy_tofs(dirent->d_name, name, namelen); + + put_fs_byte(0, &dirent->d_name[namelen]); +} + +/* + * Map the dir entry at subtree coordinates given by *posp, and + * increment *posp to point to the following dir entry. + */ + +static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, + struct quad_buffer_head *qbh) +{ + unsigned pos, q, r; + dnode_secno dno; + struct hpfs_dirent *de; + + /* + * Get the position code and split off the rightmost index r + */ + + pos = *posp; + q = pos >> 6; + r = pos & 077; + + /* + * Get the sector address of the dnode + * pointed to by the leading part q + */ + + dno = dir_subdno(inode, q); + if (!dno) + return 0; + + /* + * Get the entry at index r in dnode q + */ + + de = map_nth_dirent(inode->i_dev, dno, r, qbh); + + /* + * If none, we're out of files in this dnode. Ascend. + */ + + if (!de) { + if (q == 0) + return 0; + *posp = q + 1; + return map_pos_dirent(inode, posp, qbh); + } + + /* + * If a subtree is here, descend. + */ + + if (de->down) + *posp = pos << 6 | 1; + else + *posp = pos + 1; + + /* + * Don't return the ^A^A and \377 entries. + */ + + if (de->first || de->last) { + brelse4(qbh); + return map_pos_dirent(inode, posp, qbh); + } + else + return de; +} + +/* + * Return the address of the dnode with subtree coordinates given by pos. + */ + +static dnode_secno dir_subdno(struct inode *inode, unsigned pos) +{ + struct hpfs_dirent *de; + struct quad_buffer_head qbh; + + /* + * 0 is the root dnode + */ + + if (pos == 0) + return inode->i_hpfs_dno; + + /* + * we have one pos->dnode translation cached in the inode + */ + + else if (pos == inode->i_hpfs_dpos) + return inode->i_hpfs_dsubdno; + + /* + * otherwise go look + */ + + else { + unsigned q = pos >> 6; + unsigned r = pos & 077; + dnode_secno dno; + + /* + * dnode at position q + */ + dno = dir_subdno(inode, q); + if (dno == 0) + return 0; + + /* + * entry at index r + */ + de = map_nth_dirent(inode->i_dev, dno, r, &qbh); + if (!de || !de->down) + return 0; + + /* + * get the dnode down pointer + */ + dno = de_down_pointer(de); + brelse4(&qbh); + + /* + * cache it for next time + */ + inode->i_hpfs_dpos = pos; + inode->i_hpfs_dsubdno = dno; + return dno; + } +} + +/* + * Return the dir entry at index n in dnode dno, or 0 if there isn't one + */ + +static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno, + int n, + struct quad_buffer_head *qbh) +{ + int i; + struct hpfs_dirent *de, *de_end; + struct dnode *dnode = map_dnode(dev, dno, qbh); + + de = dnode_first_de(dnode); + de_end = dnode_end_de(dnode); + + for (i = 1; de < de_end; i++, de = de_next_de(de)) { + if (i == n) + return de; + if (de->last || de->length == 0) + break; + } + + brelse4(qbh); + return 0; +} + +static int hpfs_dir_read(struct inode *inode, struct file *filp, + char *buf, int count) +{ + return -EISDIR; +} + +/* Return the dnode pointer in a directory fnode */ + +static dnode_secno fnode_dno(dev_t dev, ino_t ino) +{ + struct buffer_head *bh; + struct fnode *fnode; + dnode_secno dno; + + fnode = map_fnode(dev, ino, &bh); + if (!fnode) + return 0; + + dno = fnode->u.external[0].disk_secno; + brelse(bh); + return dno; +} + +/* Map an fnode into a buffer and return pointers to it and to the buffer. */ + +static struct fnode *map_fnode(dev_t dev, ino_t ino, struct buffer_head **bhp) +{ + struct fnode *fnode; + + if (ino == 0) { + printk("HPFS: missing fnode\n"); + return 0; + } + + fnode = map_sector(dev, ino_secno(ino), bhp); + if (fnode) + if (fnode->magic != FNODE_MAGIC) { + printk("HPFS: map_fnode: bad fnode pointer\n"); + brelse(*bhp); + return 0; + } + return fnode; +} + +/* Map an anode into a buffer and return pointers to it and to the buffer. */ + +static struct anode *map_anode(dev_t dev, unsigned secno, + struct buffer_head **bhp) +{ + struct anode *anode; + + if (secno == 0) { + printk("HPFS: missing anode\n"); + return 0; + } + + anode = map_sector(dev, secno, bhp); + if (anode) + if (anode->magic != ANODE_MAGIC || anode->self != secno) { + printk("HPFS: map_anode: bad anode pointer\n"); + brelse(*bhp); + return 0; + } + return anode; +} + +/* Map a dnode into a buffer and return pointers to it and to the buffer. */ + +static struct dnode *map_dnode(dev_t dev, unsigned secno, + struct quad_buffer_head *qbh) +{ + struct dnode *dnode; + + if (secno == 0) { + printk("HPFS: missing dnode\n"); + return 0; + } + + dnode = map_4sectors(dev, secno, qbh); + if (dnode) + if (dnode->magic != DNODE_MAGIC || dnode->self != secno) { + printk("HPFS: map_dnode: bad dnode pointer\n"); + brelse4(qbh); + return 0; + } + return dnode; +} + +/* Map a sector into a buffer and return pointers to it and to the buffer. */ + +static void *map_sector(dev_t dev, unsigned secno, struct buffer_head **bhp) +{ + struct buffer_head *bh; + + if ((*bhp = bh = bread(dev, secno, 512)) != 0) + return bh->b_data; + else { + printk("HPFS: map_sector: read error\n"); + return 0; + } +} + +/* Map 4 sectors into a 4buffer and return pointers to it and to the buffer. */ + +static void *map_4sectors(dev_t dev, unsigned secno, + struct quad_buffer_head *qbh) +{ + struct buffer_head *bh; + char *data; + + if (secno & 3) { + printk("HPFS: map_4sectors: unaligned read\n"); + return 0; + } + + qbh->data = data = kmalloc(2048, GFP_KERNEL); + if (!data) + goto bail; + + qbh->bh[0] = bh = breada(dev, secno, 512, 0, UINT_MAX); + if (!bh) + goto bail0; + memcpy(data, bh->b_data, 512); + + qbh->bh[1] = bh = bread(dev, secno + 1, 512); + if (!bh) + goto bail1; + memcpy(data + 512, bh->b_data, 512); + + qbh->bh[2] = bh = bread(dev, secno + 2, 512); + if (!bh) + goto bail2; + memcpy(data + 2 * 512, bh->b_data, 512); + + qbh->bh[3] = bh = bread(dev, secno + 3, 512); + if (!bh) + goto bail3; + memcpy(data + 3 * 512, bh->b_data, 512); + + return data; + + bail3: + brelse(qbh->bh[2]); + bail2: + brelse(qbh->bh[1]); + bail1: + brelse(qbh->bh[0]); + bail0: + kfree_s(data, 2048); + bail: + printk("HPFS: map_4sectors: read error\n"); + return 0; +} + +/* Deallocate a 4-buffer block */ + +static void brelse4(struct quad_buffer_head *qbh) +{ + brelse(qbh->bh[3]); + brelse(qbh->bh[2]); + brelse(qbh->bh[1]); + brelse(qbh->bh[0]); + kfree_s(qbh->data, 2048); +} diff --git a/fs/inode.c b/fs/inode.c new file mode 100644 index 000000000..7278b850e --- /dev/null +++ b/fs/inode.c @@ -0,0 +1,572 @@ +/* + * linux/fs/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> + +#include <asm/system.h> + +static struct inode_hash_entry { + struct inode * inode; + int updating; +} hash_table[NR_IHASH]; + +static struct inode * first_inode; +static struct wait_queue * inode_wait = NULL; +static int nr_inodes = 0, nr_free_inodes = 0; + +static inline int const hashfn(dev_t dev, unsigned int i) +{ + return (dev ^ i) % NR_IHASH; +} + +static inline struct inode_hash_entry * const hash(dev_t dev, int i) +{ + return hash_table + hashfn(dev, i); +} + +static void insert_inode_free(struct inode *inode) +{ + inode->i_next = first_inode; + inode->i_prev = first_inode->i_prev; + inode->i_next->i_prev = inode; + inode->i_prev->i_next = inode; + first_inode = inode; +} + +static void remove_inode_free(struct inode *inode) +{ + if (first_inode == inode) + first_inode = first_inode->i_next; + if (inode->i_next) + inode->i_next->i_prev = inode->i_prev; + if (inode->i_prev) + inode->i_prev->i_next = inode->i_next; + inode->i_next = inode->i_prev = NULL; +} + +void insert_inode_hash(struct inode *inode) +{ + struct inode_hash_entry *h; + h = hash(inode->i_dev, inode->i_ino); + + inode->i_hash_next = h->inode; + inode->i_hash_prev = NULL; + if (inode->i_hash_next) + inode->i_hash_next->i_hash_prev = inode; + h->inode = inode; +} + +static void remove_inode_hash(struct inode *inode) +{ + struct inode_hash_entry *h; + h = hash(inode->i_dev, inode->i_ino); + + if (h->inode == inode) + h->inode = inode->i_hash_next; + if (inode->i_hash_next) + inode->i_hash_next->i_hash_prev = inode->i_hash_prev; + if (inode->i_hash_prev) + inode->i_hash_prev->i_hash_next = inode->i_hash_next; + inode->i_hash_prev = inode->i_hash_next = NULL; +} + +static void put_last_free(struct inode *inode) +{ + remove_inode_free(inode); + inode->i_prev = first_inode->i_prev; + inode->i_prev->i_next = inode; + inode->i_next = first_inode; + inode->i_next->i_prev = inode; +} + +void grow_inodes(void) +{ + struct inode * inode; + int i; + + if (!(inode = (struct inode*) get_free_page(GFP_KERNEL))) + return; + + i=PAGE_SIZE / sizeof(struct inode); + nr_inodes += i; + nr_free_inodes += i; + + if (!first_inode) + inode->i_next = inode->i_prev = first_inode = inode++, i--; + + for ( ; i ; i-- ) + insert_inode_free(inode++); +} + +unsigned long inode_init(unsigned long start, unsigned long end) +{ + memset(hash_table, 0, sizeof(hash_table)); + first_inode = NULL; + return start; +} + +static void __wait_on_inode(struct inode *); + +static inline void wait_on_inode(struct inode * inode) +{ + if (inode->i_lock) + __wait_on_inode(inode); +} + +static inline void lock_inode(struct inode * inode) +{ + wait_on_inode(inode); + inode->i_lock = 1; +} + +static inline void unlock_inode(struct inode * inode) +{ + inode->i_lock = 0; + wake_up(&inode->i_wait); +} + +/* + * Note that we don't want to disturb any wait-queues when we discard + * an inode. + * + * Argghh. Got bitten by a gcc problem with inlining: no way to tell + * the compiler that the inline asm function 'memset' changes 'inode'. + * I've been searching for the bug for days, and was getting desperate. + * Finally looked at the assembler output... Grrr. + * + * The solution is the weird use of 'volatile'. Ho humm. Have to report + * it to the gcc lists, and hope we can do this more cleanly some day.. + */ +void clear_inode(struct inode * inode) +{ + struct wait_queue * wait; + + wait_on_inode(inode); + remove_inode_hash(inode); + remove_inode_free(inode); + wait = ((volatile struct inode *) inode)->i_wait; + if (inode->i_count) + nr_free_inodes++; + memset(inode,0,sizeof(*inode)); + ((volatile struct inode *) inode)->i_wait = wait; + insert_inode_free(inode); +} + +int fs_may_mount(dev_t dev) +{ + struct inode * inode, * next; + int i; + + next = first_inode; + for (i = nr_inodes ; i > 0 ; i--) { + inode = next; + next = inode->i_next; /* clear_inode() changes the queues.. */ + if (inode->i_dev != dev) + continue; + if (inode->i_count || inode->i_dirt || inode->i_lock) + return 0; + clear_inode(inode); + } + return 1; +} + +int fs_may_umount(dev_t dev, struct inode * mount_root) +{ + struct inode * inode; + int i; + + inode = first_inode; + for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) { + if (inode->i_dev != dev || !inode->i_count) + continue; + if (inode == mount_root && inode->i_count == 1) + continue; + return 0; + } + return 1; +} + +int fs_may_remount_ro(dev_t dev) +{ + struct file * file; + int i; + + /* Check that no files are currently opened for writing. */ + for (file = first_file, i=0; i<nr_files; i++, file=file->f_next) { + if (!file->f_count || !file->f_inode || + file->f_inode->i_dev != dev) + continue; + if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2)) + return 0; + } + return 1; +} + +static void write_inode(struct inode * inode) +{ + if (!inode->i_dirt) + return; + wait_on_inode(inode); + if (!inode->i_dirt) + return; + if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) { + inode->i_dirt = 0; + return; + } + inode->i_lock = 1; + inode->i_sb->s_op->write_inode(inode); + unlock_inode(inode); +} + +static void read_inode(struct inode * inode) +{ + lock_inode(inode); + if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode) + inode->i_sb->s_op->read_inode(inode); + unlock_inode(inode); +} + +/* POSIX UID/GID verification for setting inode attributes */ +int inode_change_ok(struct inode *inode, struct iattr *attr) +{ + /* Make sure a caller can chown */ + if ((attr->ia_valid & ATTR_UID) && + (current->fsuid != inode->i_uid || + attr->ia_uid != inode->i_uid) && !fsuser()) + return -EPERM; + + /* Make sure caller can chgrp */ + if ((attr->ia_valid & ATTR_GID) && + (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) && + !fsuser()) + return -EPERM; + + /* Make sure a caller can chmod */ + if (attr->ia_valid & ATTR_MODE) { + if ((current->fsuid != inode->i_uid) && !fsuser()) + return -EPERM; + /* Also check the setgid bit! */ + if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid : + inode->i_gid)) + attr->ia_mode &= ~S_ISGID; + } + + /* Check for setting the inode time */ + if ((attr->ia_valid & ATTR_ATIME_SET) && + ((current->fsuid != inode->i_uid) && !fsuser())) + return -EPERM; + if ((attr->ia_valid & ATTR_MTIME_SET) && + ((current->fsuid != inode->i_uid) && !fsuser())) + return -EPERM; + + + return 0; +} + +/* + * Set the appropriate attributes from an attribute structure into + * the inode structure. + */ +void inode_setattr(struct inode *inode, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (attr->ia_valid & ATTR_SIZE) + inode->i_size = attr->ia_size; + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; + if (attr->ia_valid & ATTR_MODE) { + inode->i_mode = attr->ia_mode; + if (!fsuser() && !in_group_p(inode->i_gid)) + inode->i_mode &= ~S_ISGID; + } + inode->i_dirt = 1; +} + +/* + * notify_change is called for inode-changing operations such as + * chown, chmod, utime, and truncate. It is guaranteed (unlike + * write_inode) to be called from the context of the user requesting + * the change. It is not called for ordinary access-time updates. + * NFS uses this to get the authentication correct. -- jrs + */ + +int notify_change(struct inode * inode, struct iattr *attr) +{ + int retval; + + if (inode->i_sb && inode->i_sb->s_op && + inode->i_sb->s_op->notify_change) + return inode->i_sb->s_op->notify_change(inode, attr); + + if ((retval = inode_change_ok(inode, attr)) != 0) + return retval; + + inode_setattr(inode, attr); + return 0; +} + +/* + * bmap is needed for demand-loading and paging: if this function + * doesn't exist for a filesystem, then those things are impossible: + * executables cannot be run from the filesystem etc... + * + * This isn't as bad as it sounds: the read-routines might still work, + * so the filesystem would be otherwise ok (for example, you might have + * a DOS filesystem, which doesn't lend itself to bmap very well, but + * you could still transfer files to/from the filesystem) + */ +int bmap(struct inode * inode, int block) +{ + if (inode->i_op && inode->i_op->bmap) + return inode->i_op->bmap(inode,block); + return 0; +} + +void invalidate_inodes(dev_t dev) +{ + struct inode * inode, * next; + int i; + + next = first_inode; + for(i = nr_inodes ; i > 0 ; i--) { + inode = next; + next = inode->i_next; /* clear_inode() changes the queues.. */ + if (inode->i_dev != dev) + continue; + if (inode->i_count || inode->i_dirt || inode->i_lock) { + printk("VFS: inode busy on removed device %d/%d\n", MAJOR(dev), MINOR(dev)); + continue; + } + clear_inode(inode); + } +} + +void sync_inodes(dev_t dev) +{ + int i; + struct inode * inode; + + inode = first_inode; + for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) { + if (dev && inode->i_dev != dev) + continue; + wait_on_inode(inode); + if (inode->i_dirt) + write_inode(inode); + } +} + +void iput(struct inode * inode) +{ + if (!inode) + return; + wait_on_inode(inode); + if (!inode->i_count) { + printk("VFS: iput: trying to free free inode\n"); + printk("VFS: device %d/%d, inode %lu, mode=0%07o\n", + MAJOR(inode->i_rdev), MINOR(inode->i_rdev), + inode->i_ino, inode->i_mode); + return; + } + if (inode->i_pipe) + wake_up_interruptible(&PIPE_WAIT(*inode)); +repeat: + if (inode->i_count>1) { + inode->i_count--; + return; + } + wake_up(&inode_wait); + if (inode->i_pipe) { + unsigned long page = (unsigned long) PIPE_BASE(*inode); + PIPE_BASE(*inode) = NULL; + free_page(page); + } + if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) { + inode->i_sb->s_op->put_inode(inode); + if (!inode->i_nlink) + return; + } + if (inode->i_dirt) { + write_inode(inode); /* we can sleep - so do again */ + wait_on_inode(inode); + goto repeat; + } + inode->i_count--; + nr_free_inodes++; + return; +} + +struct inode * get_empty_inode(void) +{ + struct inode * inode, * best; + int i; + + if (nr_inodes < NR_INODE && nr_free_inodes < (nr_inodes >> 2)) + grow_inodes(); +repeat: + inode = first_inode; + best = NULL; + for (i = 0; i<nr_inodes; inode = inode->i_next, i++) { + if (!inode->i_count) { + if (!best) + best = inode; + if (!inode->i_dirt && !inode->i_lock) { + best = inode; + break; + } + } + } + if (!best || best->i_dirt || best->i_lock) + if (nr_inodes < NR_INODE) { + grow_inodes(); + goto repeat; + } + inode = best; + if (!inode) { + printk("VFS: No free inodes - contact Linus\n"); + sleep_on(&inode_wait); + goto repeat; + } + if (inode->i_lock) { + wait_on_inode(inode); + goto repeat; + } + if (inode->i_dirt) { + write_inode(inode); + goto repeat; + } + if (inode->i_count) + goto repeat; + clear_inode(inode); + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_version = ++event; + inode->i_sem.count = 1; + nr_free_inodes--; + if (nr_free_inodes < 0) { + printk ("VFS: get_empty_inode: bad free inode count.\n"); + nr_free_inodes = 0; + } + return inode; +} + +struct inode * get_pipe_inode(void) +{ + struct inode * inode; + extern struct inode_operations pipe_inode_operations; + + if (!(inode = get_empty_inode())) + return NULL; + if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) { + iput(inode); + return NULL; + } + inode->i_op = &pipe_inode_operations; + inode->i_count = 2; /* sum of readers/writers */ + PIPE_WAIT(*inode) = NULL; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_LOCK(*inode) = 0; + inode->i_pipe = 1; + inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = PAGE_SIZE; + return inode; +} + +struct inode * __iget(struct super_block * sb, int nr, int crossmntp) +{ + static struct wait_queue * update_wait = NULL; + struct inode_hash_entry * h; + struct inode * inode; + struct inode * empty = NULL; + + if (!sb) + panic("VFS: iget with sb==NULL"); + h = hash(sb->s_dev, nr); +repeat: + for (inode = h->inode; inode ; inode = inode->i_hash_next) + if (inode->i_dev == sb->s_dev && inode->i_ino == nr) + goto found_it; + if (!empty) { + h->updating++; + empty = get_empty_inode(); + if (!--h->updating) + wake_up(&update_wait); + if (empty) + goto repeat; + return (NULL); + } + inode = empty; + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = nr; + inode->i_flags = sb->s_flags; + put_last_free(inode); + insert_inode_hash(inode); + read_inode(inode); + goto return_it; + +found_it: + if (!inode->i_count) + nr_free_inodes--; + inode->i_count++; + wait_on_inode(inode); + if (inode->i_dev != sb->s_dev || inode->i_ino != nr) { + printk("Whee.. inode changed from under us. Tell Linus\n"); + iput(inode); + goto repeat; + } + if (crossmntp && inode->i_mount) { + struct inode * tmp = inode->i_mount; + tmp->i_count++; + iput(inode); + inode = tmp; + wait_on_inode(inode); + } + if (empty) + iput(empty); + +return_it: + while (h->updating) + sleep_on(&update_wait); + return inode; +} + +/* + * The "new" scheduling primitives (new as of 0.97 or so) allow this to + * be done without disabling interrupts (other than in the actual queue + * updating things: only a couple of 386 instructions). This should be + * much better for interrupt latency. + */ +static void __wait_on_inode(struct inode * inode) +{ + struct wait_queue wait = { current, NULL }; + + add_wait_queue(&inode->i_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (inode->i_lock) { + schedule(); + goto repeat; + } + remove_wait_queue(&inode->i_wait, &wait); + current->state = TASK_RUNNING; +} diff --git a/fs/ioctl.c b/fs/ioctl.c new file mode 100644 index 000000000..22d0f4d10 --- /dev/null +++ b/fs/ioctl.c @@ -0,0 +1,99 @@ +/* + * linux/fs/ioctl.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/termios.h> +#include <linux/fcntl.h> /* for f_flags values */ + +static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) +{ + int error; + int block; + + switch (cmd) { + case FIBMAP: + if (filp->f_inode->i_op == NULL) + return -EBADF; + if (filp->f_inode->i_op->bmap == NULL) + return -EINVAL; + error = verify_area(VERIFY_WRITE,(void *) arg,4); + if (error) + return error; + block = get_fs_long((long *) arg); + block = filp->f_inode->i_op->bmap(filp->f_inode,block); + put_fs_long(block,(long *) arg); + return 0; + case FIGETBSZ: + if (filp->f_inode->i_sb == NULL) + return -EBADF; + error = verify_area(VERIFY_WRITE,(void *) arg,4); + if (error) + return error; + put_fs_long(filp->f_inode->i_sb->s_blocksize, + (long *) arg); + return 0; + case FIONREAD: + error = verify_area(VERIFY_WRITE,(void *) arg,4); + if (error) + return error; + put_fs_long(filp->f_inode->i_size - filp->f_pos, + (long *) arg); + return 0; + } + if (filp->f_op && filp->f_op->ioctl) + return filp->f_op->ioctl(filp->f_inode, filp, cmd,arg); + return -EINVAL; +} + + +asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct file * filp; + int on; + + if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) + return -EBADF; + switch (cmd) { + case FIOCLEX: + FD_SET(fd, ¤t->files->close_on_exec); + return 0; + + case FIONCLEX: + FD_CLR(fd, ¤t->files->close_on_exec); + return 0; + + case FIONBIO: + on = get_fs_long((unsigned long *) arg); + if (on) + filp->f_flags |= O_NONBLOCK; + else + filp->f_flags &= ~O_NONBLOCK; + return 0; + + case FIOASYNC: /* O_SYNC is not yet implemented, + but it's here for completeness. */ + on = get_fs_long ((unsigned long *) arg); + if (on) + filp->f_flags |= O_SYNC; + else + filp->f_flags &= ~O_SYNC; + return 0; + + default: + if (filp->f_inode && S_ISREG(filp->f_inode->i_mode)) + return file_ioctl(filp,cmd,arg); + + if (filp->f_op && filp->f_op->ioctl) + return filp->f_op->ioctl(filp->f_inode, filp, cmd,arg); + + return -EINVAL; + } +} diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile new file mode 100644 index 000000000..a780af479 --- /dev/null +++ b/fs/isofs/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for the linux isofs-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= namei.o inode.o file.o dir.o util.o rock.o symlink.o + +isofs.o: $(OBJS) + $(LD) -r -o isofs.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c new file mode 100644 index 000000000..b1934db04 --- /dev/null +++ b/fs/isofs/dir.c @@ -0,0 +1,258 @@ +/* + * linux/fs/isofs/dir.c + * + * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem. + * + * (C) 1991 Linus Torvalds - minix filesystem + * + * isofs directory handling functions + */ + +#include <linux/errno.h> + +#include <asm/segment.h> + +#include <linux/fs.h> +#include <linux/iso_fs.h> +#include <linux/kernel.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/sched.h> +#include <linux/locks.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int isofs_readdir(struct inode *, struct file *, struct dirent *, int); + +static struct file_operations isofs_dir_operations = { + NULL, /* lseek - default */ + NULL, /* read */ + NULL, /* write - bad */ + isofs_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* fsync */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations isofs_dir_inode_operations = { + &isofs_dir_operations, /* default directory file-ops */ + NULL, /* create */ + isofs_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + isofs_bmap, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int isofs_readdir(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); + unsigned char bufbits = ISOFS_BUFFER_BITS(inode); + unsigned int block,offset,i, j; + char c = 0; + int inode_number; + struct buffer_head * bh; + void * cpnt = NULL; + unsigned int old_offset; + int dlen, rrflag; + int high_sierra = 0; + char * dpnt, *dpnt1; + struct iso_directory_record * de; + + dpnt1 = NULL; + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + + offset = filp->f_pos & (bufsize - 1); + block = isofs_bmap(inode,filp->f_pos>>bufbits); + + if(!block) return 0; + + if(!(bh = breada(inode->i_dev, block, bufsize, filp->f_pos, inode->i_size))) + return 0; + + while (filp->f_pos < inode->i_size) { +#ifdef DEBUG + printk("Block, offset, f_pos: %x %x %x\n", + block, offset, filp->f_pos); +#endif + de = (struct iso_directory_record *) (bh->b_data + offset); + inode_number = (block << bufbits) + (offset & (bufsize - 1)); + + /* If the length byte is zero, we should move on to the next + CDROM sector. If we are at the end of the directory, we + kick out of the while loop. */ + + if (*((unsigned char *) de) == 0) { + brelse(bh); + offset = 0; + filp->f_pos = ((filp->f_pos & ~(ISOFS_BLOCK_SIZE - 1)) + + ISOFS_BLOCK_SIZE); + block = isofs_bmap(inode,(filp->f_pos)>>bufbits); + if (!block + || !(bh = breada(inode->i_dev, block, bufsize, filp->f_pos, + inode->i_size))) + return 0; + continue; + } + + /* Make sure that the entire directory record is in the + current bh block. + If not, we malloc a buffer, and put the two halves together, + so that we can cleanly read the block */ + + old_offset = offset; + offset += *((unsigned char *) de); + filp->f_pos += *((unsigned char *) de); + + if (offset > bufsize) { + unsigned int frag1; + frag1 = bufsize - old_offset; + cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL); + if (!cpnt) return 0; + memcpy(cpnt, bh->b_data + old_offset, frag1); + de = (struct iso_directory_record *) ((char *)cpnt); + brelse(bh); + offset = filp->f_pos & (bufsize - 1); + block = isofs_bmap(inode,(filp->f_pos)>> bufbits); + if (!block + || !(bh = breada(inode->i_dev, block, bufsize, + filp->f_pos, inode->i_size))) { + kfree(cpnt); + return 0; + }; + memcpy((char *)cpnt+frag1, bh->b_data, offset); + } + + /* Handle the case of the '.' directory */ + + rrflag = 0; + i = 1; + if (de->name_len[0] == 1 && de->name[0] == 0) { + put_fs_byte('.',dirent->d_name); + inode_number = inode->i_ino; + dpnt = "."; + } + + /* Handle the case of the '..' directory */ + + else if (de->name_len[0] == 1 && de->name[0] == 1) { + put_fs_byte('.',dirent->d_name); + put_fs_byte('.',dirent->d_name+1); + i = 2; + dpnt = ".."; + if((inode->i_sb->u.isofs_sb.s_firstdatazone) != inode->i_ino) + inode_number = inode->u.isofs_i.i_backlink; + else + inode_number = inode->i_ino; + + /* This should never happen, but who knows. Try to be forgiving */ + if(inode_number == -1) { + inode_number = + isofs_lookup_grandparent(inode, + find_rock_ridge_relocation(de, inode)); + if(inode_number == -1){ /* Should never happen */ + printk("Backlink not properly set.\n"); + goto out; + }; + } + } + + /* Handle everything else. Do name translation if there + is no Rock Ridge NM field. */ + + else { + /* Do not report hidden or associated files */ + high_sierra = inode->i_sb->u.isofs_sb.s_high_sierra; + if (de->flags[-high_sierra] & 5) { + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + }; + continue; + } + dlen = de->name_len[0]; + dpnt = de->name; + i = dlen; + rrflag = get_rock_ridge_filename(de, &dpnt, &dlen, inode); + if (rrflag) { + if (rrflag == -1) { /* This is a rock ridge reloc dir */ + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + }; + continue; + }; + i = dlen; + } + else + if(inode->i_sb->u.isofs_sb.s_mapping == 'n') { + dpnt1 = dpnt; + dpnt = kmalloc(dlen, GFP_KERNEL); + if (!dpnt) goto out; + for (i = 0; i < dlen && i < NAME_MAX; i++) { + if (!(c = dpnt1[i])) break; + if (c >= 'A' && c <= 'Z') c |= 0x20; /* lower case */ + if (c == '.' && i == dlen-3 && de->name[i+1] == ';' && de->name[i+2] == '1') + break; /* Drop trailing '.;1' (ISO9660:1988 7.5.1 requires period) */ + if (c == ';' && i == dlen-2 && de->name[i+1] == '1') + break; /* Drop trailing ';1' */ + if (c == ';') c = '.'; /* Convert remaining ';' to '.' */ + dpnt[i] = c; + } + } + for(j=0; j<i; j++) + put_fs_byte(dpnt[j],j+dirent->d_name); /* And save it */ + if(dpnt1) { + kfree(dpnt); + dpnt = dpnt1; + } + + dcache_add(inode, dpnt, i, inode_number); + }; +#if 0 + printk("Nchar: %d\n",i); +#endif + + if (rrflag) kfree(dpnt); + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + }; + + if (i) { + put_fs_long(inode_number, &dirent->d_ino); + put_fs_byte(0,i+dirent->d_name); + put_fs_word(i,&dirent->d_reclen); + brelse(bh); + return ROUND_UP(NAME_OFFSET(dirent) + i + 1); + } + } + /* We go here for any condition we cannot handle. We also drop through + to here at the end of the directory. */ + out: + if (cpnt) + kfree(cpnt); + brelse(bh); + return 0; +} + + + diff --git a/fs/isofs/file.c b/fs/isofs/file.c new file mode 100644 index 000000000..ee0877d7b --- /dev/null +++ b/fs/isofs/file.c @@ -0,0 +1,260 @@ +/* + * linux/fs/isofs/file.c + * + * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem. + * + * (C) 1991 Linus Torvalds - minix filesystem + * + * isofs regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/iso_fs.h> +#include <linux/fcntl.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#include <linux/dirent.h> + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#include <linux/fs.h> +#include <linux/iso_fs.h> + +static int isofs_file_read(struct inode *, struct file *, char *, int); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the isofs filesystem. + */ +static struct file_operations isofs_file_operations = { + NULL, /* lseek - default */ + isofs_file_read, /* read */ + NULL, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + NULL /* fsync */ +}; + +struct inode_operations isofs_file_inode_operations = { + &isofs_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + isofs_bmap, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +/* This is a heuristic to determine if a file is text of binary. If it + * is text, then we translate all 0x0d characters to spaces. If the 0x0d + * character is not preceded or followed by a 0x0a, then we turn it into + * a 0x0a. A control-Z is also turned into a linefeed. + */ + +static inline void unixify_to_fs(char * outbuf, char * buffer, int chars, + int mode) +{ + char outchar; + + while(chars--){ + outchar = *buffer; + if(outchar == 0x1a) outchar = 0x0a; + if(outchar == 0x0d){ + if(mode == ISOFS_FILE_TEXT_M) outchar = 0x0a; + if(mode == ISOFS_FILE_TEXT) outchar = ' '; + } + put_fs_byte(outchar, outbuf++); + buffer++; + } +} + +/*This function determines if a given file has a DOS-like text format or not*/ + +static void isofs_determine_filetype(struct inode * inode) +{ + int block; + int result, i; + struct buffer_head * bh; + unsigned char * pnt; + + block = isofs_bmap(inode,0); + if (block && (bh = bread(inode->i_dev,block, ISOFS_BUFFER_SIZE(inode)))) { + pnt = (unsigned char *) bh->b_data; + result = ISOFS_FILE_TEXT_M; + for(i=0;i<(inode->i_size < ISOFS_BUFFER_SIZE(inode) ? inode->i_size : ISOFS_BUFFER_SIZE(inode)); + i++,pnt++){ + if(*pnt & 0x80) {result = ISOFS_FILE_BINARY; break;}; + if(*pnt >= 0x20 || *pnt == 0x1a) continue; + if(*pnt == 0x0a) {result = ISOFS_FILE_TEXT; continue;}; + if(*pnt >= 0x9 && *pnt <= 0x0d) continue; + result = ISOFS_FILE_BINARY; + break; + } + brelse(bh); + inode->u.isofs_i.i_file_format = result; + } +} + +static int isofs_file_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + int read,left,chars; + int block, blocks, offset, total_blocks; + int bhrequest; + int ra_blocks, max_block, nextblock; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + + if (!inode) { + printk("isofs_file_read: inode = NULL\n"); + return -EINVAL; + } + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + printk("isofs_file_read: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + if (inode->u.isofs_i.i_file_format == ISOFS_FILE_UNKNOWN) + isofs_determine_filetype(inode); + if (filp->f_pos > inode->i_size) + left = 0; + else + left = inode->i_size - filp->f_pos; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = filp->f_pos >> ISOFS_BUFFER_BITS(inode); + offset = (inode->u.isofs_i.i_first_extent + filp->f_pos) + & (ISOFS_BUFFER_SIZE(inode)-1); + blocks = (left + offset + ISOFS_BUFFER_SIZE(inode) - 1) / ISOFS_BUFFER_SIZE(inode); + bhb = bhe = buflist; + + ra_blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9); + if(ra_blocks > blocks) blocks = ra_blocks; + + /* + * this is for stopping read ahead at EOF. It's important for + * reading PhotoCD's, because they have many small data tracks instead + * of one big. And between two data-tracks are some unreadable sectors. + * A read ahead after a EOF may try to read such an unreadable sector. + * kraxel@cs.tu-berlin.de (Gerd Knorr) + */ + total_blocks = (inode->i_size + (1 << ISOFS_BUFFER_BITS(inode)) - 1) + >> ISOFS_BUFFER_BITS(inode); + if (block + blocks > total_blocks) + blocks = total_blocks - block; + + max_block = (inode->i_size + BLOCK_SIZE - 1)/BLOCK_SIZE; + nextblock = -1; + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. */ + + do { + bhrequest = 0; + while (blocks) { + int uptodate; + --blocks; + *bhb = getblk(inode->i_dev,isofs_bmap(inode, block++), ISOFS_BUFFER_SIZE(inode)); + uptodate = 1; + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + }; + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if(uptodate) break; + + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) + ll_rw_block(READ, bhrequest, bhreq); + + do{ /* Finish off all I/O that has actually completed */ + if (*bhe) {/* test for valid buffer */ + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + + if (left < ISOFS_BUFFER_SIZE(inode) - offset) + chars = left; + else + chars = ISOFS_BUFFER_SIZE(inode) - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + if (inode->u.isofs_i.i_file_format == ISOFS_FILE_TEXT || + inode->u.isofs_i.i_file_format == ISOFS_FILE_TEXT_M) + unixify_to_fs(buf, offset+(*bhe)->b_data, chars, + inode->u.isofs_i.i_file_format); + else + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-->0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while( bhe != bhb && (*bhe == 0 || !(*bhe)->b_lock) && + (left > 0)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + if (*bhe) brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + + filp->f_reada = 1; + + if (!read) + return -EIO; + return read; +} diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c new file mode 100644 index 000000000..c1754e337 --- /dev/null +++ b/fs/isofs/inode.c @@ -0,0 +1,707 @@ +/* + * linux/fs/isofs/inode.c + * + * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO9660 filesystem. + * + * (C) 1991 Linus Torvalds - minix filesystem + */ + +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/iso_fs.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/malloc.h> +#include <linux/errno.h> + +#include <asm/system.h> +#include <asm/segment.h> + +#ifdef LEAK_CHECK +static int check_malloc = 0; +static int check_bread = 0; +#endif + +void isofs_put_super(struct super_block *sb) +{ + lock_super(sb); + +#ifdef LEAK_CHECK + printk("Outstanding mallocs:%d, outstanding buffers: %d\n", + check_malloc, check_bread); +#endif + sb->s_dev = 0; + unlock_super(sb); + return; +} + +static struct super_operations isofs_sops = { + isofs_read_inode, + NULL, /* notify_change */ + NULL, /* write_inode */ + NULL, /* put_inode */ + isofs_put_super, + NULL, /* write_super */ + isofs_statfs, + NULL +}; + +struct iso9660_options{ + char map; + char rock; + char cruft; + unsigned char conversion; + unsigned int blocksize; + gid_t gid; + uid_t uid; +}; + +static int parse_options(char *options, struct iso9660_options * popt) +{ + char *this_char,*value; + + popt->map = 'n'; + popt->rock = 'y'; + popt->cruft = 'n'; + popt->conversion = 'a'; + popt->blocksize = 1024; + popt->gid = 0; + popt->uid = 0; + if (!options) return 1; + for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) { + if (strncmp(this_char,"norock",6) == 0) { + popt->rock = 'n'; + continue; + }; + if (strncmp(this_char,"cruft",5) == 0) { + popt->cruft = 'y'; + continue; + }; + if ((value = strchr(this_char,'=')) != NULL) + *value++ = 0; + if (!strcmp(this_char,"map") && value) { + if (value[0] && !value[1] && strchr("on",*value)) + popt->map = *value; + else if (!strcmp(value,"off")) popt->map = 'o'; + else if (!strcmp(value,"normal")) popt->map = 'n'; + else return 0; + } + else if (!strcmp(this_char,"conv") && value) { + if (value[0] && !value[1] && strchr("btma",*value)) + popt->conversion = *value; + else if (!strcmp(value,"binary")) popt->conversion = 'b'; + else if (!strcmp(value,"text")) popt->conversion = 't'; + else if (!strcmp(value,"mtext")) popt->conversion = 'm'; + else if (!strcmp(value,"auto")) popt->conversion = 'a'; + else return 0; + } + else if (value && + (!strcmp(this_char,"block") || + !strcmp(this_char,"uid") || + !strcmp(this_char,"gid"))) { + char * vpnt = value; + unsigned int ivalue; + ivalue = 0; + while(*vpnt){ + if(*vpnt < '0' || *vpnt > '9') break; + ivalue = ivalue * 10 + (*vpnt - '0'); + vpnt++; + }; + if (*vpnt) return 0; + switch(*this_char) { + case 'b': + if (ivalue != 1024 && ivalue != 2048) return 0; + popt->blocksize = ivalue; + break; + case 'u': + popt->uid = ivalue; + break; + case 'g': + popt->gid = ivalue; + break; + } + } + else return 0; + } + return 1; +} + +struct super_block *isofs_read_super(struct super_block *s,void *data, + int silent) +{ + struct buffer_head *bh; + int iso_blknum; + unsigned int blocksize_bits; + int high_sierra; + int dev=s->s_dev; + struct iso_volume_descriptor *vdp; + struct hs_volume_descriptor *hdp; + + struct iso_primary_descriptor *pri = NULL; + struct hs_primary_descriptor *h_pri = NULL; + + struct iso_directory_record *rootp; + + struct iso9660_options opt; + + if (!parse_options((char *) data,&opt)) { + s->s_dev = 0; + return NULL; + } + +#if 0 + printk("map = %c\n", opt.map); + printk("rock = %c\n", opt.rock); + printk("cruft = %c\n", opt.cruft); + printk("conversion = %c\n", opt.conversion); + printk("blocksize = %d\n", opt.blocksize); + printk("gid = %d\n", opt.gid); + printk("uid = %d\n", opt.uid); +#endif + + blocksize_bits = 0; + { + int i = opt.blocksize; + while (i != 1){ + blocksize_bits++; + i >>=1; + }; + }; + set_blocksize(dev, opt.blocksize); + + lock_super(s); + + s->u.isofs_sb.s_high_sierra = high_sierra = 0; /* default is iso9660 */ + + for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) { + if (!(bh = bread(dev, iso_blknum << (ISOFS_BLOCK_BITS-blocksize_bits), opt.blocksize))) { + s->s_dev=0; + printk("isofs_read_super: bread failed, dev 0x%x iso_blknum %d\n", + dev, iso_blknum); + unlock_super(s); + return NULL; + } + + vdp = (struct iso_volume_descriptor *)bh->b_data; + hdp = (struct hs_volume_descriptor *)bh->b_data; + + + if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { + if (isonum_711 (hdp->type) != ISO_VD_PRIMARY) + goto out; + if (isonum_711 (hdp->type) == ISO_VD_END) + goto out; + + s->u.isofs_sb.s_high_sierra = 1; + high_sierra = 1; + opt.rock = 'n'; + h_pri = (struct hs_primary_descriptor *)vdp; + break; + }; + + if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { + if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) + goto out; + if (isonum_711 (vdp->type) == ISO_VD_END) + goto out; + + pri = (struct iso_primary_descriptor *)vdp; + break; + }; + + brelse(bh); + } + if(iso_blknum == 100) { + if (!silent) + printk("Unable to identify CD-ROM format.\n"); + s->s_dev = 0; + unlock_super(s); + return NULL; + }; + + + if(high_sierra){ + rootp = (struct iso_directory_record *) h_pri->root_directory_record; + if (isonum_723 (h_pri->volume_set_size) != 1) { + printk("Multi-volume disks not (yet) supported.\n"); + goto out; + }; + s->u.isofs_sb.s_nzones = isonum_733 (h_pri->volume_space_size); + s->u.isofs_sb.s_log_zone_size = isonum_723 (h_pri->logical_block_size); + s->u.isofs_sb.s_max_size = isonum_733(h_pri->volume_space_size); + } else { + rootp = (struct iso_directory_record *) pri->root_directory_record; + if (isonum_723 (pri->volume_set_size) != 1) { + printk("Multi-volume disks not (yet) supported.\n"); + goto out; + }; + s->u.isofs_sb.s_nzones = isonum_733 (pri->volume_space_size); + s->u.isofs_sb.s_log_zone_size = isonum_723 (pri->logical_block_size); + s->u.isofs_sb.s_max_size = isonum_733(pri->volume_space_size); + } + + s->u.isofs_sb.s_ninodes = 0; /* No way to figure this out easily */ + + /* RDE: convert log zone size to bit shift */ + + switch (s -> u.isofs_sb.s_log_zone_size) + { case 512: s -> u.isofs_sb.s_log_zone_size = 9; break; + case 1024: s -> u.isofs_sb.s_log_zone_size = 10; break; + case 2048: s -> u.isofs_sb.s_log_zone_size = 11; break; + + default: + printk("Bad logical zone size %ld\n", s -> u.isofs_sb.s_log_zone_size); + goto out; + } + + /* RDE: data zone now byte offset! */ + + s->u.isofs_sb.s_firstdatazone = (isonum_733( rootp->extent) + << s -> u.isofs_sb.s_log_zone_size); + s->s_magic = ISOFS_SUPER_MAGIC; + + /* The CDROM is read-only, has no nodes (devices) on it, and since + all of the files appear to be owned by root, we really do not want + to allow suid. (suid or devices will not show up unless we have + Rock Ridge extensions) */ + + s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; + + brelse(bh); + + printk("Max size:%ld Log zone size:%ld\n", + s->u.isofs_sb.s_max_size, + 1UL << s->u.isofs_sb.s_log_zone_size); + printk("First datazone:%ld Root inode number %d\n", + s->u.isofs_sb.s_firstdatazone >> s -> u.isofs_sb.s_log_zone_size, + isonum_733 (rootp->extent) << s -> u.isofs_sb.s_log_zone_size); + if(high_sierra) printk("Disc in High Sierra format.\n"); + unlock_super(s); + /* set up enough so that it can read an inode */ + + s->s_dev = dev; + s->s_op = &isofs_sops; + s->u.isofs_sb.s_mapping = opt.map; + s->u.isofs_sb.s_rock = (opt.rock == 'y' ? 1 : 0); + s->u.isofs_sb.s_conversion = opt.conversion; + s->u.isofs_sb.s_cruft = opt.cruft; + s->u.isofs_sb.s_uid = opt.uid; + s->u.isofs_sb.s_gid = opt.gid; + s->s_blocksize = opt.blocksize; + s->s_blocksize_bits = blocksize_bits; + s->s_mounted = iget(s, isonum_733 (rootp->extent) << s -> u.isofs_sb.s_log_zone_size); + unlock_super(s); + + if (!(s->s_mounted)) { + s->s_dev=0; + printk("get root inode failed\n"); + return NULL; + } + + if(!check_disk_change(s->s_dev)) return s; + out: /* Kick out for various error conditions */ + brelse(bh); + s->s_dev = 0; + unlock_super(s); + return NULL; +} + +void isofs_statfs (struct super_block *sb, struct statfs *buf) +{ + put_fs_long(ISOFS_SUPER_MAGIC, &buf->f_type); + put_fs_long(1 << ISOFS_BLOCK_BITS, &buf->f_bsize); + put_fs_long(sb->u.isofs_sb.s_nzones, &buf->f_blocks); + put_fs_long(0, &buf->f_bfree); + put_fs_long(0, &buf->f_bavail); + put_fs_long(sb->u.isofs_sb.s_ninodes, &buf->f_files); + put_fs_long(0, &buf->f_ffree); + put_fs_long(NAME_MAX, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ +} + +int isofs_bmap(struct inode * inode,int block) +{ + + if (block<0) { + printk("_isofs_bmap: block<0"); + return 0; + } + return (inode->u.isofs_i.i_first_extent >> ISOFS_BUFFER_BITS(inode)) + block; +} + +void isofs_read_inode(struct inode * inode) +{ + unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); + struct buffer_head * bh; + struct iso_directory_record * raw_inode; + unsigned char *pnt = NULL; + void *cpnt = NULL; + int high_sierra; + int block; + int i; + + block = inode->i_ino >> ISOFS_BUFFER_BITS(inode); + if (!(bh=bread(inode->i_dev,block, bufsize))) { + printk("unable to read i-node block"); + goto fail; + } + + pnt = ((unsigned char *) bh->b_data + + (inode->i_ino & (bufsize - 1))); + raw_inode = ((struct iso_directory_record *) pnt); + high_sierra = inode->i_sb->u.isofs_sb.s_high_sierra; + + if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize){ + int frag1, offset; + + offset = (inode->i_ino & (bufsize - 1)); + frag1 = bufsize - offset; + cpnt = kmalloc(*pnt,GFP_KERNEL); + if (cpnt == NULL) { + printk(KERN_INFO "NoMem ISO inode %lu\n",inode->i_ino); + brelse(bh); + goto fail; + } + memcpy(cpnt, bh->b_data + offset, frag1); + brelse(bh); + if (!(bh = bread(inode->i_dev,++block, bufsize))) { + kfree(cpnt); + printk("unable to read i-node block"); + goto fail; + } + offset += *pnt - bufsize; + memcpy((char *)cpnt+frag1, bh->b_data, offset); + pnt = ((unsigned char *) cpnt); + raw_inode = ((struct iso_directory_record *) pnt); + } + + inode->i_mode = S_IRUGO; /* Everybody gets to read the file. */ + inode->i_nlink = 1; + + if (raw_inode->flags[-high_sierra] & 2) { + inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; + inode->i_nlink = 1; /* Set to 1. We know there are 2, but + the find utility tries to optimize + if it is 2, and it screws up. It is + easier to give 1 which tells find to + do it the hard way. */ + } else { + inode->i_mode = S_IRUGO; /* Everybody gets to read the file. */ + inode->i_nlink = 1; + inode->i_mode |= S_IFREG; +/* If there are no periods in the name, then set the execute permission bit */ + for(i=0; i< raw_inode->name_len[0]; i++) + if(raw_inode->name[i]=='.' || raw_inode->name[i]==';') + break; + if(i == raw_inode->name_len[0] || raw_inode->name[i] == ';') + inode->i_mode |= S_IXUGO; /* execute permission */ + } + inode->i_uid = inode->i_sb->u.isofs_sb.s_uid; + inode->i_gid = inode->i_sb->u.isofs_sb.s_gid; + inode->i_size = isonum_733 (raw_inode->size); + + /* There are defective discs out there - we do this to protect + ourselves. A cdrom will never contain more than 700Mb */ + if((inode->i_size < 0 || inode->i_size > 700000000) && + inode->i_sb->u.isofs_sb.s_cruft == 'n') { + printk("Warning: defective cdrom. Enabling \"cruft\" mount option.\n"); + inode->i_sb->u.isofs_sb.s_cruft = 'y'; + } + +/* Some dipshit decided to store some other bit of information in the high + byte of the file length. Catch this and holler. WARNING: this will make + it impossible for a file to be > 16Mb on the CDROM!!!*/ + + if(inode->i_sb->u.isofs_sb.s_cruft == 'y' && + inode->i_size & 0xff000000){ +/* printk("Illegal format on cdrom. Pester manufacturer.\n"); */ + inode->i_size &= 0x00ffffff; + } + + if (raw_inode->interleave[0]) { + printk("Interleaved files not (yet) supported.\n"); + inode->i_size = 0; + } + + /* I have no idea what file_unit_size is used for, so + we will flag it for now */ + if(raw_inode->file_unit_size[0] != 0){ + printk("File unit size != 0 for ISO file (%ld).\n",inode->i_ino); + } + + /* I have no idea what other flag bits are used for, so + we will flag it for now */ +#ifdef DEBUG + if((raw_inode->flags[-high_sierra] & ~2)!= 0){ + printk("Unusual flag settings for ISO file (%ld %x).\n", + inode->i_ino, raw_inode->flags[-high_sierra]); + } +#endif + +#ifdef DEBUG + printk("Get inode %d: %d %d: %d\n",inode->i_ino, block, + ((int)pnt) & 0x3ff, inode->i_size); +#endif + + inode->i_mtime = inode->i_atime = inode->i_ctime = + iso_date(raw_inode->date, high_sierra); + + inode->u.isofs_i.i_first_extent = (isonum_733 (raw_inode->extent) + + isonum_711 (raw_inode->ext_attr_length)) + << inode -> i_sb -> u.isofs_sb.s_log_zone_size; + + inode->u.isofs_i.i_backlink = 0xffffffff; /* Will be used for previous directory */ + switch (inode->i_sb->u.isofs_sb.s_conversion){ + case 'a': + inode->u.isofs_i.i_file_format = ISOFS_FILE_UNKNOWN; /* File type */ + break; + case 'b': + inode->u.isofs_i.i_file_format = ISOFS_FILE_BINARY; /* File type */ + break; + case 't': + inode->u.isofs_i.i_file_format = ISOFS_FILE_TEXT; /* File type */ + break; + case 'm': + inode->u.isofs_i.i_file_format = ISOFS_FILE_TEXT_M; /* File type */ + break; + } + +/* Now test for possible Rock Ridge extensions which will override some of + these numbers in the inode structure. */ + + if (!high_sierra) + parse_rock_ridge_inode(raw_inode, inode); + +#ifdef DEBUG + printk("Inode: %x extent: %x\n",inode->i_ino, inode->u.isofs_i.i_first_extent); +#endif + brelse(bh); + + inode->i_op = NULL; + + /* A volume number of 0 is nonsense. Disable checking if we see + this */ + if (inode->i_sb->u.isofs_sb.s_cruft == 'n' && + isonum_723 (raw_inode->volume_sequence_number) == 0) { + printk("Warning: defective cdrom. Enabling \"cruft\" mount option.\n"); + inode->i_sb->u.isofs_sb.s_cruft = 'y'; + } + + if (inode->i_sb->u.isofs_sb.s_cruft != 'y' && + isonum_723 (raw_inode->volume_sequence_number) != 1) { + printk("Multi volume CD somehow got mounted.\n"); + } else { + if (S_ISREG(inode->i_mode)) + inode->i_op = &isofs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &isofs_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &isofs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + } + if (cpnt) { + kfree (cpnt); + cpnt = NULL; + } + return; + fail: + /* With a data error we return this information */ + inode->i_mtime = inode->i_atime = inode->i_ctime = 0; + inode->u.isofs_i.i_first_extent = 0; + inode->u.isofs_i.i_backlink = 0xffffffff; + inode->i_size = 0; + inode->i_nlink = 1; + inode->i_uid = inode->i_gid = 0; + inode->i_mode = S_IFREG; /*Regular file, no one gets to read*/ + inode->i_op = NULL; + return; +} + +/* There are times when we need to know the inode number of a parent of + a particular directory. When control passes through a routine that + has access to the parent information, it fills it into the inode structure, + but sometimes the inode gets flushed out of the queue, and someone + remembers the number. When they try to open up again, we have lost + the information. The '..' entry on the disc points to the data area + for a particular inode, so we can follow these links back up, but since + we do not know the inode number, we do not actually know how large the + directory is. The disc is almost always correct, and there is + enough error checking on the drive itself, but an open ended search + makes me a little nervous. + + The bsd iso filesystem uses the extent number for an inode, and this + would work really nicely for us except that the read_inode function + would not have any clean way of finding the actual directory record + that goes with the file. If we had such info, then it would pay + to change the inode numbers and eliminate this function. +*/ + +int isofs_lookup_grandparent(struct inode * parent, int extent) +{ + unsigned long bufsize = ISOFS_BUFFER_SIZE(parent); + unsigned char bufbits = ISOFS_BUFFER_BITS(parent); + unsigned int block,offset; + int parent_dir, inode_number; + int old_offset; + void * cpnt = NULL; + int result; + int directory_size; + struct buffer_head * bh; + struct iso_directory_record * de; + + offset = 0; + block = extent << (ISOFS_BLOCK_BITS - bufbits); + if (!(bh = bread(parent->i_dev, block, bufsize))) return -1; + + while (1 == 1) { + de = (struct iso_directory_record *) (bh->b_data + offset); + if (*((unsigned char *) de) == 0) + { + brelse(bh); + return -1; + } + + offset += *((unsigned char *) de); + + if (offset >= bufsize) + { + printk(".. Directory not in first block" + " of directory.\n"); + brelse(bh); + return -1; + } + + if (de->name_len[0] == 1 && de->name[0] == 1) + { + parent_dir = find_rock_ridge_relocation(de, parent); + directory_size = isonum_733 (de->size); + brelse(bh); + break; + } + } +#ifdef DEBUG + printk("Parent dir:%x\n",parent_dir); +#endif + /* Now we know the extent where the parent dir starts on. */ + + result = -1; + + offset = 0; + block = parent_dir << (ISOFS_BLOCK_BITS - bufbits); + if (!block || !(bh = bread(parent->i_dev,block, bufsize))) + return -1; + + for(;;) + { + de = (struct iso_directory_record *) (bh->b_data + offset); + inode_number = (block << bufbits)+(offset & (bufsize - 1)); + + /* If the length byte is zero, we should move on to the next + CDROM sector. If we are at the end of the directory, we + kick out of the while loop. */ + + if (*((unsigned char *) de) == 0) + { + brelse(bh); + offset = 0; + block++; + directory_size -= bufsize; + if(directory_size < 0) return -1; + if((block & 1) && (ISOFS_BLOCK_BITS - bufbits)) + return -1; + if (!block + || !(bh = bread(parent->i_dev,block, bufsize))) + return -1; + continue; + } + + /* Make sure that the entire directory record is in the current + bh block. If not, we malloc a buffer, and put the two + halves together, so that we can cleanly read the block. */ + + old_offset = offset; + offset += *((unsigned char *) de); + + if (offset >= bufsize) + { + unsigned int frag1; + frag1 = bufsize - old_offset; + cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL); + if (!cpnt) return -1; + memcpy(cpnt, bh->b_data + old_offset, frag1); + de = (struct iso_directory_record *) ((char *)cpnt); + brelse(bh); + offset -= bufsize; + directory_size -= bufsize; + if(directory_size < 0) return -1; + block++; + if(!(bh = bread(parent->i_dev,block,bufsize))) { + kfree(cpnt); + return -1; + }; + memcpy((char *)cpnt+frag1, bh->b_data, offset); + } + + if (find_rock_ridge_relocation(de, parent) == extent){ + result = inode_number; + goto out; + } + + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + } + } + + /* We go here for any condition we cannot handle. + We also drop through to here at the end of the directory. */ + + out: + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + } + brelse(bh); +#ifdef DEBUG + printk("Resultant Inode %d\n",result); +#endif + return result; +} + +#ifdef LEAK_CHECK +#undef malloc +#undef free_s +#undef bread +#undef brelse + +void * leak_check_malloc(unsigned int size){ + void * tmp; + check_malloc++; + tmp = kmalloc(size, GFP_KERNEL); + return tmp; +} + +void leak_check_free_s(void * obj, int size){ + check_malloc--; + return kfree_s(obj, size); +} + +struct buffer_head * leak_check_bread(int dev, int block, int size){ + check_bread++; + return bread(dev, block, size); +} + +void leak_check_brelse(struct buffer_head * bh){ + check_bread--; + return brelse(bh); +} + +#endif diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c new file mode 100644 index 000000000..1473b1b7f --- /dev/null +++ b/fs/isofs/namei.c @@ -0,0 +1,268 @@ +/* + * linux/fs/isofs/namei.c + * + * (C) 1992 Eric Youngdale Modified for ISO9660 filesystem. + * + * (C) 1991 Linus Torvalds - minix filesystem + */ + +#include <linux/sched.h> +#include <linux/iso_fs.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <asm/segment.h> +#include <linux/malloc.h> + +#include <linux/errno.h> + +/* + * ok, we cannot use strncmp, as the name is not in our data space. + * Thus we'll have to use isofs_match. No big problem. Match also makes + * some sanity tests. + * + * NOTE! unlike strncmp, isofs_match returns 1 for success, 0 for failure. + */ +static int isofs_match(int len,const char * name, char * compare, int dlen) +{ + if (!compare) + return 0; + + /* check special "." and ".." files */ + if (dlen == 1) { + /* "." */ + if (compare[0] == 0) { + if (!len) + return 1; + compare = "."; + } else if (compare[0] == 1) { + compare = ".."; + dlen = 2; + } + } +#if 0 + if (len <= 2) printk("Match: %d %d %s %d %d \n",len,dlen,compare,de->name[0], dlen); +#endif + + if (dlen != len) + return 0; + return !memcmp(name, compare, len); +} + +/* + * isofs_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as an inode number). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +static struct buffer_head * isofs_find_entry(struct inode * dir, + const char * name, int namelen, unsigned long * ino, unsigned long * ino_back) +{ + unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); + unsigned char bufbits = ISOFS_BUFFER_BITS(dir); + unsigned int block, i, f_pos, offset, inode_number; + struct buffer_head * bh; + void * cpnt = NULL; + unsigned int old_offset; + unsigned int backlink; + int dlen, rrflag, match; + int high_sierra = 0; + char * dpnt; + struct iso_directory_record * de; + char c; + + *ino = 0; + if (!dir) return NULL; + + if (!(block = dir->u.isofs_i.i_first_extent)) return NULL; + + f_pos = 0; + + offset = f_pos & (bufsize - 1); + block = isofs_bmap(dir,f_pos >> bufbits); + + if (!block || !(bh = bread(dir->i_dev,block,bufsize))) return NULL; + + while (f_pos < dir->i_size) { + de = (struct iso_directory_record *) (bh->b_data + offset); + backlink = dir->i_ino; + inode_number = (block << bufbits) + (offset & (bufsize - 1)); + + /* If byte is zero, this is the end of file, or time to move to + the next sector. Usually 2048 byte boundaries. */ + + if (*((unsigned char *) de) == 0) { + brelse(bh); + offset = 0; + f_pos = ((f_pos & ~(ISOFS_BLOCK_SIZE - 1)) + + ISOFS_BLOCK_SIZE); + block = isofs_bmap(dir,f_pos>>bufbits); + if (!block || !(bh = bread(dir->i_dev,block,bufsize))) + return 0; + continue; /* Will kick out if past end of directory */ + } + + old_offset = offset; + offset += *((unsigned char *) de); + f_pos += *((unsigned char *) de); + + /* Handle case where the directory entry spans two blocks. + Usually 1024 byte boundaries */ + if (offset >= bufsize) { + unsigned int frag1; + frag1 = bufsize - old_offset; + cpnt = kmalloc(*((unsigned char *) de),GFP_KERNEL); + if (!cpnt) return 0; + memcpy(cpnt, bh->b_data + old_offset, frag1); + + de = (struct iso_directory_record *) cpnt; + brelse(bh); + offset = f_pos & (bufsize - 1); + block = isofs_bmap(dir,f_pos>>bufbits); + if (!block || !(bh = bread(dir->i_dev,block,bufsize))) { + kfree(cpnt); + return 0; + }; + memcpy((char *)cpnt+frag1, bh->b_data, offset); + } + + /* Handle the '.' case */ + + if (de->name[0]==0 && de->name_len[0]==1) { + inode_number = dir->i_ino; + backlink = 0; + } + + /* Handle the '..' case */ + + if (de->name[0]==1 && de->name_len[0]==1) { +#if 0 + printk("Doing .. (%d %d)", + dir->i_sb->s_firstdatazone, + dir->i_ino); +#endif + if((dir->i_sb->u.isofs_sb.s_firstdatazone) != dir->i_ino) + inode_number = dir->u.isofs_i.i_backlink; + else + inode_number = dir->i_ino; + backlink = 0; + } + + /* Do not report hidden or associated files */ + high_sierra = dir->i_sb->u.isofs_sb.s_high_sierra; + if (de->flags[-high_sierra] & 5) { + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + }; + continue; + } + + dlen = de->name_len[0]; + dpnt = de->name; + /* Now convert the filename in the buffer to lower case */ + rrflag = get_rock_ridge_filename(de, &dpnt, &dlen, dir); + if (rrflag) { + if (rrflag == -1) goto out; /* Relocated deep directory */ + } else { + if(dir->i_sb->u.isofs_sb.s_mapping == 'n') { + for (i = 0; i < dlen; i++) { + c = dpnt[i]; + if (c >= 'A' && c <= 'Z') c |= 0x20; /* lower case */ + if (c == ';' && i == dlen-2 && dpnt[i+1] == '1') { + dlen -= 2; + break; + } + if (c == ';') c = '.'; + de->name[i] = c; + } + /* This allows us to match with and without a trailing + period. */ + if(dpnt[dlen-1] == '.' && namelen == dlen-1) + dlen--; + } + } + match = isofs_match(namelen,name,dpnt,dlen); + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + } + + if(rrflag) kfree(dpnt); + if (match) { + if(inode_number == -1) { + /* Should only happen for the '..' entry */ + inode_number = + isofs_lookup_grandparent(dir, + find_rock_ridge_relocation(de,dir)); + if(inode_number == -1){ + /* Should never happen */ + printk("Backlink not properly set.\n"); + goto out; + } + } + *ino = inode_number; + *ino_back = backlink; + return bh; + } + } + out: + if (cpnt) + kfree(cpnt); + brelse(bh); + return NULL; +} + +int isofs_lookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + unsigned long ino, ino_back; + struct buffer_head * bh; + +#ifdef DEBUG + printk("lookup: %x %d\n",dir->i_ino, len); +#endif + *result = NULL; + if (!dir) + return -ENOENT; + + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + + ino = 0; + + if (dcache_lookup(dir, name, len, &ino)) ino_back = dir->i_ino; + + if (!ino) { + if (!(bh = isofs_find_entry(dir,name,len, &ino, &ino_back))) { + iput(dir); + return -ENOENT; + } + if (ino_back == dir->i_ino) + dcache_add(dir, name, len, ino); + brelse(bh); + }; + + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -EACCES; + } + + /* We need this backlink for the ".." entry unless the name that we + are looking up traversed a mount point (in which case the inode + may not even be on an iso9660 filesystem, and writing to + u.isofs_i would only cause memory corruption). + */ + + if (ino_back && !(*result)->i_pipe && (*result)->i_sb == dir->i_sb) { + (*result)->u.isofs_i.i_backlink = ino_back; + } + + iput(dir); + return 0; +} diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c new file mode 100644 index 000000000..686c1d910 --- /dev/null +++ b/fs/isofs/rock.c @@ -0,0 +1,523 @@ +/* + * linux/fs/isofs/rock.c + * + * (C) 1992, 1993 Eric Youngdale + * + * Rock Ridge Extensions to iso9660 + */ +#include <linux/config.h> +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/iso_fs.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/malloc.h> + +#include "rock.h" + +/* These functions are designed to read the system areas of a directory record + * and extract relevant information. There are different functions provided + * depending upon what information we need at the time. One function fills + * out an inode structure, a second one extracts a filename, a third one + * returns a symbolic link name, and a fourth one returns the extent number + * for the file. */ + +#define SIG(A,B) ((A << 8) | B) + + +/* This is a way of ensuring that we have something in the system + use fields that is compatible with Rock Ridge */ +#define CHECK_SP(FAIL) \ + if(rr->u.SP.magic[0] != 0xbe) FAIL; \ + if(rr->u.SP.magic[1] != 0xef) FAIL; + +/* We define a series of macros because each function must do exactly the + same thing in certain places. We use the macros to ensure that everything + is done correctly */ + +#define CONTINUE_DECLS \ + int cont_extent = 0, cont_offset = 0, cont_size = 0; \ + void * buffer = 0 + +#define CHECK_CE \ + {cont_extent = isonum_733(rr->u.CE.extent); \ + cont_offset = isonum_733(rr->u.CE.offset); \ + cont_size = isonum_733(rr->u.CE.size);} + +#define SETUP_ROCK_RIDGE(DE,CHR,LEN) \ + {LEN= sizeof(struct iso_directory_record) + DE->name_len[0]; \ + if(LEN & 1) LEN++; \ + CHR = ((unsigned char *) DE) + LEN; \ + LEN = *((unsigned char *) DE) - LEN;} + +#define MAYBE_CONTINUE(LABEL,DEV) \ + {if (buffer) kfree(buffer); \ + if (cont_extent){ \ + int block, offset, offset1; \ + struct buffer_head * bh; \ + buffer = kmalloc(cont_size,GFP_KERNEL); \ + if (!buffer) goto out; \ + block = cont_extent; \ + offset = cont_offset; \ + offset1 = 0; \ + if(ISOFS_BUFFER_SIZE(DEV) == 1024) { \ + block <<= 1; \ + if (offset >= 1024) block++; \ + offset &= 1023; \ + if(offset + cont_size >= 1024) { \ + bh = bread(DEV->i_dev, block++, ISOFS_BUFFER_SIZE(DEV)); \ + if(!bh) {printk("Unable to read continuation Rock Ridge record\n"); \ + kfree(buffer); \ + buffer = NULL; } else { \ + memcpy(buffer, bh->b_data + offset, 1024 - offset); \ + brelse(bh); \ + offset1 = 1024 - offset; \ + offset = 0;} \ + } \ + }; \ + if(buffer) { \ + bh = bread(DEV->i_dev, block, ISOFS_BUFFER_SIZE(DEV)); \ + if(bh){ \ + memcpy(buffer + offset1, bh->b_data + offset, cont_size - offset1); \ + brelse(bh); \ + chr = (unsigned char *) buffer; \ + len = cont_size; \ + cont_extent = 0; \ + cont_size = 0; \ + cont_offset = 0; \ + goto LABEL; \ + }; \ + } \ + printk("Unable to read rock-ridge attributes\n"); \ + }} + +/* This is the inner layer of the get filename routine, and is called + for each system area and continuation record related to the file */ + +int find_rock_ridge_relocation(struct iso_directory_record * de, + struct inode * inode) { + int flag; + int len; + int retval; + unsigned char * chr; + CONTINUE_DECLS; + flag = 0; + + /* If this is a '..' then we are looking for the parent, otherwise we + are looking for the child */ + + if (de->name[0]==1 && de->name_len[0]==1) flag = 1; + /* Return value if we do not find appropriate record. */ + retval = isonum_733 (de->extent); + + if (!inode->i_sb->u.isofs_sb.s_rock) return retval; + + SETUP_ROCK_RIDGE(de, chr, len); + repeat: + { + int rrflag, sig; + struct rock_ridge * rr; + + while (len > 1){ /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *) chr; + if (rr->len == 0) goto out; /* Something got screwed up here */ + sig = (chr[0] << 8) + chr[1]; + chr += rr->len; + len -= rr->len; + + switch(sig){ + case SIG('R','R'): + rrflag = rr->u.RR.flags[0]; + if (flag && !(rrflag & RR_PL)) goto out; + if (!flag && !(rrflag & RR_CL)) goto out; + break; + case SIG('S','P'): + CHECK_SP(goto out); + break; + case SIG('C','L'): +#ifdef DEBUG + printk("RR: CL\n"); +#endif + if (flag == 0) { + retval = isonum_733(rr->u.CL.location); + goto out; + }; + break; + case SIG('P','L'): +#ifdef DEBUG + printk("RR: PL\n"); +#endif + if (flag != 0) { + retval = isonum_733(rr->u.PL.location); + goto out; + }; + break; + case SIG('C','E'): + CHECK_CE; /* This tells is if there is a continuation record */ + break; + default: + break; + } + }; + }; + MAYBE_CONTINUE(repeat, inode); + return retval; + out: + if(buffer) kfree(buffer); + return retval; +} + +int get_rock_ridge_filename(struct iso_directory_record * de, + char ** name, int * namlen, struct inode * inode) +{ + int len; + unsigned char * chr; + CONTINUE_DECLS; + char * retname = NULL; + int retnamlen = 0, truncate=0; + + if (!inode->i_sb->u.isofs_sb.s_rock) return 0; + + SETUP_ROCK_RIDGE(de, chr, len); + repeat: + { + struct rock_ridge * rr; + int sig; + + while (len > 1){ /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *) chr; + if (rr->len == 0) goto out; /* Something got screwed up here */ + sig = (chr[0] << 8) + chr[1]; + chr += rr->len; + len -= rr->len; + + switch(sig){ + case SIG('R','R'): + if((rr->u.RR.flags[0] & RR_NM) == 0) goto out; + break; + case SIG('S','P'): + CHECK_SP(goto out); + break; + case SIG('C','E'): + CHECK_CE; + break; + case SIG('N','M'): + if (truncate) break; + if (rr->u.NM.flags & ~1) { + printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags); + break; + }; + if (!retname){ + retname = (char *) kmalloc (255,GFP_KERNEL); + /* This may be a waste, but we only + need this for a moment. The layers + that call this function should + deallocate the mem fairly soon + after control is returned */ + + if (!retname) goto out; + *retname = 0; /* Zero length string */ + retnamlen = 0; + }; + if((strlen(retname) + rr->len - 5) >= 254) { + truncate = 1; + break; + }; + strncat(retname, rr->u.NM.name, rr->len - 5); + retnamlen += rr->len - 5; + break; + case SIG('R','E'): +#ifdef DEBUG + printk("RR: RE (%x)\n", inode->i_ino); +#endif + if (buffer) kfree(buffer); + if (retname) kfree(retname); + return -1; + default: + break; + } + }; + } + MAYBE_CONTINUE(repeat,inode); + if(retname){ + *name = retname; + *namlen = retnamlen; + return 1; + }; + return 0; /* This file did not have a NM field */ + out: + if(buffer) kfree(buffer); + if (retname) kfree(retname); + return 0; +} + +int parse_rock_ridge_inode(struct iso_directory_record * de, + struct inode * inode){ + int len; + unsigned char * chr; + CONTINUE_DECLS; + + if (!inode->i_sb->u.isofs_sb.s_rock) return 0; + + SETUP_ROCK_RIDGE(de, chr, len); + repeat: + { + int cnt, sig; + struct inode * reloc; + struct rock_ridge * rr; + int rootflag; + + while (len > 1){ /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *) chr; + if (rr->len == 0) goto out; /* Something got screwed up here */ + sig = (chr[0] << 8) + chr[1]; + chr += rr->len; + len -= rr->len; + + switch(sig){ + case SIG('R','R'): + if((rr->u.RR.flags[0] & + (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out; + break; + case SIG('S','P'): + CHECK_SP(goto out); + break; + case SIG('C','E'): + CHECK_CE; + break; + case SIG('E','R'): + printk("ISO9660 Extensions: "); + { int p; + for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]); + }; + printk("\n"); + break; + case SIG('P','X'): + inode->i_mode = isonum_733(rr->u.PX.mode); + inode->i_nlink = isonum_733(rr->u.PX.n_links); + inode->i_uid = isonum_733(rr->u.PX.uid); + inode->i_gid = isonum_733(rr->u.PX.gid); + break; + case SIG('P','N'): + { int high, low; + high = isonum_733(rr->u.PN.dev_high); + low = isonum_733(rr->u.PN.dev_low); + inode->i_rdev = ((high << 8) | (low & 0xff)) & 0xffff; + }; + break; + case SIG('T','F'): + /* Some RRIP writers incorrectly place ctime in the TF_CREATE field. + Try and handle this correctly for either case. */ + cnt = 0; /* Rock ridge never appears on a High Sierra disk */ + if(rr->u.TF.flags & TF_CREATE) + inode->i_ctime = iso_date(rr->u.TF.times[cnt++].time, 0); + if(rr->u.TF.flags & TF_MODIFY) + inode->i_mtime = iso_date(rr->u.TF.times[cnt++].time, 0); + if(rr->u.TF.flags & TF_ACCESS) + inode->i_atime = iso_date(rr->u.TF.times[cnt++].time, 0); + if(rr->u.TF.flags & TF_ATTRIBUTES) + inode->i_ctime = iso_date(rr->u.TF.times[cnt++].time, 0); + break; + case SIG('S','L'): + {int slen; + struct SL_component * slp; + slen = rr->len - 5; + slp = &rr->u.SL.link; + inode->i_size = 0; + while (slen > 1){ + rootflag = 0; + switch(slp->flags &~1){ + case 0: + inode->i_size += slp->len; + break; + case 2: + inode->i_size += 1; + break; + case 4: + inode->i_size += 2; + break; + case 8: + rootflag = 1; + inode->i_size += 1; + break; + default: + printk("Symlink component flag not implemented\n"); + }; + slen -= slp->len + 2; + slp = (struct SL_component *) (((char *) slp) + slp->len + 2); + + if(slen < 2) break; + if(!rootflag) inode->i_size += 1; + }; + }; + break; + case SIG('R','E'): + printk("Attempt to read inode for relocated directory\n"); + goto out; + case SIG('C','L'): +#ifdef DEBUG + printk("RR CL (%x)\n",inode->i_ino); +#endif + inode->u.isofs_i.i_first_extent = isonum_733(rr->u.CL.location) << + inode -> i_sb -> u.isofs_sb.s_log_zone_size; + reloc = iget(inode->i_sb, inode->u.isofs_i.i_first_extent); + inode->i_mode = reloc->i_mode; + inode->i_nlink = reloc->i_nlink; + inode->i_uid = reloc->i_uid; + inode->i_gid = reloc->i_gid; + inode->i_rdev = reloc->i_rdev; + inode->i_size = reloc->i_size; + inode->i_atime = reloc->i_atime; + inode->i_ctime = reloc->i_ctime; + inode->i_mtime = reloc->i_mtime; + iput(reloc); + break; + default: + break; + } + }; + } + MAYBE_CONTINUE(repeat,inode); + return 0; + out: + if(buffer) kfree(buffer); + return 0; +} + + +/* Returns the name of the file that this inode is symlinked to. This is + in malloc'd memory, so it needs to be freed, once we are through with it */ + +char * get_rock_ridge_symlink(struct inode * inode) +{ + unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); + unsigned char bufbits = ISOFS_BUFFER_BITS(inode); + struct buffer_head * bh; + unsigned char * pnt; + void * cpnt = NULL; + char * rpnt; + struct iso_directory_record * raw_inode; + CONTINUE_DECLS; + int block; + int sig; + int rootflag; + int len; + unsigned char * chr; + struct rock_ridge * rr; + + if (!inode->i_sb->u.isofs_sb.s_rock) + panic("Cannot have symlink with high sierra variant of iso filesystem\n"); + + rpnt = 0; + + block = inode->i_ino >> bufbits; + if (!(bh=bread(inode->i_dev,block, bufsize))) { + printk("unable to read i-node block"); + return NULL; + }; + + pnt = ((unsigned char *) bh->b_data) + (inode->i_ino & (bufsize - 1)); + + raw_inode = ((struct iso_directory_record *) pnt); + + if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize){ + int frag1, offset; + + offset = (inode->i_ino & (bufsize - 1)); + frag1 = bufsize - offset; + cpnt = kmalloc(*pnt,GFP_KERNEL); + if(!cpnt) return NULL; + memcpy(cpnt, bh->b_data + offset, frag1); + brelse(bh); + if (!(bh = bread(inode->i_dev,++block, bufsize))) { + kfree(cpnt); + printk("unable to read i-node block"); + return NULL; + }; + offset += *pnt - bufsize; + memcpy((char *)cpnt+frag1, bh->b_data, offset); + pnt = ((unsigned char *) cpnt); + raw_inode = ((struct iso_directory_record *) pnt); + }; + + /* Now test for possible Rock Ridge extensions which will override some of + these numbers in the inode structure. */ + + SETUP_ROCK_RIDGE(raw_inode, chr, len); + + repeat: + while (len > 1){ /* There may be one byte for padding somewhere */ + if (rpnt) break; + rr = (struct rock_ridge *) chr; + if (rr->len == 0) goto out; /* Something got screwed up here */ + sig = (chr[0] << 8) + chr[1]; + chr += rr->len; + len -= rr->len; + + switch(sig){ + case SIG('R','R'): + if((rr->u.RR.flags[0] & RR_SL) == 0) goto out; + break; + case SIG('S','P'): + CHECK_SP(goto out); + break; + case SIG('S','L'): + {int slen; + struct SL_component * slp; + slen = rr->len - 5; + slp = &rr->u.SL.link; + while (slen > 1){ + if (!rpnt){ + rpnt = (char *) kmalloc (inode->i_size +1, GFP_KERNEL); + if (!rpnt) goto out; + *rpnt = 0; + }; + rootflag = 0; + switch(slp->flags &~1){ + case 0: + strncat(rpnt,slp->text, slp->len); + break; + case 2: + strcat(rpnt,"."); + break; + case 4: + strcat(rpnt,".."); + break; + case 8: + rootflag = 1; + strcat(rpnt,"/"); + break; + default: + printk("Symlink component flag not implemented (%d)\n",slen); + }; + slen -= slp->len + 2; + slp = (struct SL_component *) (((char *) slp) + slp->len + 2); + + if(slen < 2) break; + if(!rootflag) strcat(rpnt,"/"); + }; + break; + default: + break; + } + }; + }; + MAYBE_CONTINUE(repeat,inode); + brelse(bh); + + if (cpnt) { + kfree(cpnt); + cpnt = NULL; + }; + + return rpnt; + out: + if(buffer) kfree(buffer); + return 0; +} + + + + + + diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h new file mode 100644 index 000000000..36057b8fa --- /dev/null +++ b/fs/isofs/rock.h @@ -0,0 +1,111 @@ +/* These structs are used by the system-use-sharing protocol, in which the + Rock Ridge extensions are embedded. It is quite possible that other + extensions are present on the disk, and this is fine as long as they + all use SUSP */ + +struct SU_SP{ + unsigned char magic[2]; + unsigned char skip; +}; + +struct SU_CE{ + char extent[8]; + char offset[8]; + char size[8]; +}; + +struct SU_ER{ + unsigned char len_id; + unsigned char len_des; + unsigned char len_src; + unsigned char ext_ver; + char data[0]; +}; + +struct RR_RR{ + char flags[1]; +}; + +struct RR_PX{ + char mode[8]; + char n_links[8]; + char uid[8]; + char gid[8]; +}; + +struct RR_PN{ + char dev_high[8]; + char dev_low[8]; +}; + + +struct SL_component{ + unsigned char flags; + unsigned char len; + char text[0]; +}; + +struct RR_SL{ + unsigned char flags; + struct SL_component link; +}; + +struct RR_NM{ + unsigned char flags; + char name[0]; +}; + +struct RR_CL{ + char location[8]; +}; + +struct RR_PL{ + char location[8]; +}; + +struct stamp{ + char time[7]; +}; + +struct RR_TF{ + char flags; + struct stamp times[0]; /* Variable number of these beasts */ +}; + +/* These are the bits and their meanings for flags in the TF structure. */ +#define TF_CREATE 1 +#define TF_MODIFY 2 +#define TF_ACCESS 4 +#define TF_ATTRIBUTES 8 +#define TF_BACKUP 16 +#define TF_EXPIRATION 32 +#define TF_EFFECTIVE 64 +#define TF_LONG_FORM 128 + +struct rock_ridge{ + char signature[2]; + unsigned char len; + unsigned char version; + union{ + struct SU_SP SP; + struct SU_CE CE; + struct SU_ER ER; + struct RR_RR RR; + struct RR_PX PX; + struct RR_PN PN; + struct RR_SL SL; + struct RR_NM NM; + struct RR_CL CL; + struct RR_PL PL; + struct RR_TF TF; + } u; +}; + +#define RR_PX 1 /* POSIX attributes */ +#define RR_PN 2 /* POSIX devices */ +#define RR_SL 4 /* Symbolic link */ +#define RR_NM 8 /* Alternate Name */ +#define RR_CL 16 /* Child link */ +#define RR_PL 32 /* Parent link */ +#define RR_RE 64 /* Relocation directory */ +#define RR_TF 128 /* Timestamps */ diff --git a/fs/isofs/symlink.c b/fs/isofs/symlink.c new file mode 100644 index 000000000..fa4a45ba6 --- /dev/null +++ b/fs/isofs/symlink.c @@ -0,0 +1,106 @@ +/* + * linux/fs/isofs/symlink.c + * + * (C) 1992 Eric Youngdale Modified for ISO9660 filesystem. + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * isofs symlink handling code. This is only used with the Rock Ridge + * extensions to iso9660 + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/iso_fs.h> +#include <linux/stat.h> +#include <linux/malloc.h> + +static int isofs_readlink(struct inode *, char *, int); +static int isofs_follow_link(struct inode *, struct inode *, int, int, struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations isofs_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + isofs_readlink, /* readlink */ + isofs_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int isofs_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + char * pnt; + + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + *res_inode = NULL; + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if ((current->link_count > 5) || + !(pnt = get_rock_ridge_symlink(inode))) { + iput(dir); + iput(inode); + *res_inode = NULL; + return -ELOOP; + } + iput(inode); + current->link_count++; + error = open_namei(pnt,flag,mode,res_inode,dir); + current->link_count--; + kfree(pnt); + return error; +} + +static int isofs_readlink(struct inode * inode, char * buffer, int buflen) +{ + char * pnt; + int i; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + + if (buflen > 1023) + buflen = 1023; + pnt = get_rock_ridge_symlink(inode); + + iput(inode); + if (!pnt) + return 0; + i = 0; + + while (i<buflen && (c = pnt[i])) { + i++; + put_fs_byte(c,buffer++); + } + kfree(pnt); + return i; +} diff --git a/fs/isofs/util.c b/fs/isofs/util.c new file mode 100644 index 000000000..dbeee868d --- /dev/null +++ b/fs/isofs/util.c @@ -0,0 +1,131 @@ +/* + * linux/fs/isofs/util.c + * + * The special functions in the file are numbered according to the section + * of the iso 9660 standard in which they are described. isonum_733 will + * convert numbers according to section 7.3.3, etc. + * + * isofs special functions. This file was lifted in its entirety from + * the bsd386 iso9660 filesystem, by Pace Williamson. + */ + + +int +isonum_711 (char * p) +{ + return (*p & 0xff); +} + +int +isonum_712 (char * p) +{ + int val; + + val = *p; + if (val & 0x80) + val |= 0xffffff00; + return (val); +} + +int +isonum_721 (char * p) +{ + return ((p[0] & 0xff) | ((p[1] & 0xff) << 8)); +} + +int +isonum_722 (char * p) +{ + return (((p[0] & 0xff) << 8) | (p[1] & 0xff)); +} + +int +isonum_723 (char * p) +{ +#if 0 + if (p[0] != p[3] || p[1] != p[2]) { + fprintf (stderr, "invalid format 7.2.3 number\n"); + exit (1); + } +#endif + return (isonum_721 (p)); +} + +int +isonum_731 (char * p) +{ + return ((p[0] & 0xff) + | ((p[1] & 0xff) << 8) + | ((p[2] & 0xff) << 16) + | ((p[3] & 0xff) << 24)); +} + +int +isonum_732 (char * p) +{ + return (((p[0] & 0xff) << 24) + | ((p[1] & 0xff) << 16) + | ((p[2] & 0xff) << 8) + | (p[3] & 0xff)); +} + +int +isonum_733 (char * p) +{ +#if 0 + int i; + + for (i = 0; i < 4; i++) { + if (p[i] != p[7-i]) { + fprintf (stderr, "bad format 7.3.3 number\n"); + exit (1); + } + } +#endif + return (isonum_731 (p)); +} + +/* We have to convert from a MM/DD/YY format to the unix ctime format. We have to + take into account leap years and all of that good stuff. Unfortunately, the kernel + does not have the information on hand to take into account daylight savings time, + so there will be cases (roughly half the time) where the dates are off by one hour. */ +int iso_date(char * p, int flag) +{ + int year, month, day, hour ,minute, second, tz; + int crtime, days, i; + + year = p[0] - 70; + month = p[1]; + day = p[2]; + hour = p[3]; + minute = p[4]; + second = p[5]; + if (flag == 0) tz = p[6]; /* High sierra has no time zone */ + else tz = 0; + + if (year < 0) { + crtime = 0; + } else { + int monlen[12] = {31,28,31,30,31,30,31,31,30,31,30,31}; + days = year * 365; + if (year > 2) + days += (year+1) / 4; + for (i = 1; i < month; i++) + days += monlen[i-1]; + if (((year+2) % 4) == 0 && month > 2) + days++; + days += day - 1; + crtime = ((((days * 24) + hour) * 60 + minute) * 60) + + second; + + /* sign extend */ + if (tz & 0x80) + tz |= (-1 << 8); + + /* timezone offset is unreliable on some disks */ + if (-48 <= tz && tz <= 52) + crtime += tz * 15 * 60; + } + return crtime; +} + diff --git a/fs/locks.c b/fs/locks.c new file mode 100644 index 000000000..d1de73ad0 --- /dev/null +++ b/fs/locks.c @@ -0,0 +1,506 @@ +/* + * linux/fs/locks.c + * + * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. + * Doug Evans, 92Aug07, dje@sspiff.uucp. + * + * Deadlock Detection added by Kelly Carmichael, kelly@[142.24.8.65] + * September 17, 1994. + * + * FIXME: one thing isn't handled yet: + * - mandatory locks (requires lots of changes elsewhere) + * + * Edited by Kai Petzke, wpp@marie.physik.tu-berlin.de + * + * Converted file_lock_table to a linked list from an array, which eliminates + * the limits on how many active file locks are open - Chad Page + * (pageone@netcom.com), November 27, 1994 + */ + +#define DEADLOCK_DETECTION + +#include <asm/segment.h> + +#include <linux/malloc.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/fcntl.h> + +#define OFFSET_MAX ((off_t)0x7fffffff) /* FIXME: move elsewhere? */ + +static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l, + unsigned int fd); +static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl); +static int overlap(struct file_lock *fl1, struct file_lock *fl2); +static int lock_it(struct file *filp, struct file_lock *caller, unsigned int fd); +static struct file_lock *alloc_lock(struct file_lock **pos, struct file_lock *fl, + unsigned int fd); +static void free_lock(struct file_lock **fl); +#ifdef DEADLOCK_DETECTION +int locks_deadlocked(int my_pid,int blocked_pid); +#endif + +static struct file_lock *file_lock_table = NULL; +static struct file_lock *file_lock_free_list = NULL; + +int fcntl_getlk(unsigned int fd, struct flock *l) +{ + int error; + struct flock flock; + struct file *filp; + struct file_lock *fl,file_lock; + + if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) + return -EBADF; + error = verify_area(VERIFY_WRITE,l, sizeof(*l)); + if (error) + return error; + memcpy_fromfs(&flock, l, sizeof(flock)); + if (flock.l_type == F_UNLCK) + return -EINVAL; + if (!copy_flock(filp, &file_lock, &flock, fd)) + return -EINVAL; + + for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (conflict(&file_lock, fl)) { + flock.l_pid = fl->fl_owner->pid; + flock.l_start = fl->fl_start; + flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : + fl->fl_end - fl->fl_start + 1; + flock.l_whence = fl->fl_whence; + flock.l_type = fl->fl_type; + memcpy_tofs(l, &flock, sizeof(flock)); + return 0; + } + } + + flock.l_type = F_UNLCK; /* no conflict found */ + memcpy_tofs(l, &flock, sizeof(flock)); + return 0; +} + +/* + * This function implements both F_SETLK and F_SETLKW. + */ + +int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) +{ + int error; + struct file *filp; + struct file_lock *fl,file_lock; + struct flock flock; + + /* + * Get arguments and validate them ... + */ + + if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) + return -EBADF; + error = verify_area(VERIFY_READ, l, sizeof(*l)); + if (error) + return error; + memcpy_fromfs(&flock, l, sizeof(flock)); + if (!copy_flock(filp, &file_lock, &flock, fd)) + return -EINVAL; + switch (file_lock.fl_type) { + case F_RDLCK : + if (!(filp->f_mode & 1)) + return -EBADF; + break; + case F_WRLCK : + if (!(filp->f_mode & 2)) + return -EBADF; + break; + case F_SHLCK : + if (!(filp->f_mode & 3)) + return -EBADF; + file_lock.fl_type = F_RDLCK; + break; + case F_EXLCK : + if (!(filp->f_mode & 3)) + return -EBADF; + file_lock.fl_type = F_WRLCK; + break; + case F_UNLCK : + break; + } + + /* + * Scan for a conflicting lock ... + */ + + if (file_lock.fl_type != F_UNLCK) { +repeat: + for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!conflict(&file_lock, fl)) + continue; + /* + * File is locked by another process. If this is + * F_SETLKW wait for the lock to be released. + */ + if (cmd == F_SETLKW) { + if (current->signal & ~current->blocked) + return -ERESTARTSYS; +#ifdef DEADLOCK_DETECTION + if (locks_deadlocked(file_lock.fl_owner->pid,fl->fl_owner->pid)) return -EDEADLOCK; +#endif + interruptible_sleep_on(&fl->fl_wait); + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + goto repeat; + } + return -EAGAIN; + } + } + + /* + * Lock doesn't conflict with any other lock ... + */ + + return lock_it(filp, &file_lock, fd); +} + +#ifdef DEADLOCK_DETECTION +/* + * This function tests for deadlock condition before putting a process to sleep + * this detection scheme is recursive... we may need some test as to make it + * exit if the function gets stuck due to bad lock data. + */ + +int locks_deadlocked(int my_pid,int blocked_pid) +{ + int ret_val; + struct wait_queue *dlock_wait; + struct file_lock *fl; + for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) { + if (fl->fl_owner == NULL) continue; /* not a used lock */ + if (fl->fl_owner->pid != my_pid) continue; + if (fl->fl_wait == NULL) continue; /* no queues */ + dlock_wait = fl->fl_wait; + do { + if (dlock_wait->task != NULL) { + if (dlock_wait->task->pid == blocked_pid) return -EDEADLOCK; + ret_val = locks_deadlocked(dlock_wait->task->pid,blocked_pid); + if (ret_val) return -EDEADLOCK; + } + dlock_wait = dlock_wait->next; + } while (dlock_wait != NULL); + } + return 0; +} +#endif + +/* + * This function is called when the file is closed. + */ + +void fcntl_remove_locks(struct task_struct *task, struct file *filp, + unsigned int fd) +{ + struct file_lock *fl; + struct file_lock **before; + + /* Find first lock owned by caller ... */ + + before = &filp->f_inode->i_flock; + while ((fl = *before) && (task != fl->fl_owner || fd != fl->fl_fd)) + before = &fl->fl_next; + + /* The list is sorted by owner and fd ... */ + + while ((fl = *before) && task == fl->fl_owner && fd == fl->fl_fd) + free_lock(before); +} + +/* + * Verify a "struct flock" and copy it to a "struct file_lock" ... + * Result is a boolean indicating success. + */ + +static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l, + unsigned int fd) +{ + off_t start; + + if (!filp->f_inode) /* just in case */ + return 0; + if (l->l_type != F_UNLCK && l->l_type != F_RDLCK && l->l_type != F_WRLCK + && l->l_type != F_SHLCK && l->l_type != F_EXLCK) + return 0; + switch (l->l_whence) { + case 0 /*SEEK_SET*/ : start = 0; break; + case 1 /*SEEK_CUR*/ : start = filp->f_pos; break; + case 2 /*SEEK_END*/ : start = filp->f_inode->i_size; break; + default : return 0; + } + if ((start += l->l_start) < 0 || l->l_len < 0) + return 0; + fl->fl_type = l->l_type; + fl->fl_start = start; /* we record the absolute position */ + fl->fl_whence = 0; /* FIXME: do we record {l_start} as passed? */ + if (l->l_len == 0 || (fl->fl_end = start + l->l_len - 1) < 0) + fl->fl_end = OFFSET_MAX; + fl->fl_owner = current; + fl->fl_fd = fd; + fl->fl_wait = NULL; /* just for cleanliness */ + return 1; +} + +/* + * Determine if lock {sys_fl} blocks lock {caller_fl} ... + */ + +static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +{ + if ( caller_fl->fl_owner == sys_fl->fl_owner + && caller_fl->fl_fd == sys_fl->fl_fd) + return 0; + if (!overlap(caller_fl, sys_fl)) + return 0; + switch (caller_fl->fl_type) { + case F_RDLCK : + return sys_fl->fl_type != F_RDLCK; + case F_WRLCK : + return 1; /* overlapping region not owned by caller */ + } + return 0; /* shouldn't get here, but just in case */ +} + +static int overlap(struct file_lock *fl1, struct file_lock *fl2) +{ + return fl1->fl_end >= fl2->fl_start && fl2->fl_end >= fl1->fl_start; +} + +/* + * Add a lock to a file ... + * Result is 0 for success or -ENOLCK. + * + * We merge adjacent locks whenever possible. + * + * WARNING: We assume the lock doesn't conflict with any other lock. + */ + +/* + * Rewritten by Kai Petzke: + * We sort the lock list first by owner, then by the starting address. + * + * To make freeing a lock much faster, we keep a pointer to the lock before the + * actual one. But the real gain of the new coding was, that lock_it() and + * unlock_it() became one function. + * + * To all purists: Yes, I use a few goto's. Just pass on to the next function. + */ + +static int lock_it(struct file *filp, struct file_lock *caller, unsigned int fd) +{ + struct file_lock *fl; + struct file_lock *left = 0; + struct file_lock *right = 0; + struct file_lock **before; + int added = 0; + + /* + * Find the first old lock with the same owner as the new lock. + */ + + before = &filp->f_inode->i_flock; + while ((fl = *before) && + (caller->fl_owner != fl->fl_owner || + caller->fl_fd != fl->fl_fd)) + before = &fl->fl_next; + + /* + * Look up all locks of this owner. + */ + + while ( (fl = *before) + && caller->fl_owner == fl->fl_owner + && caller->fl_fd == fl->fl_fd) { + /* + * Detect adjacent or overlapping regions (if same lock type) + */ + if (caller->fl_type == fl->fl_type) { + if (fl->fl_end < caller->fl_start - 1) + goto next_lock; + /* + * If the next lock in the list has entirely bigger + * addresses than the new one, insert the lock here. + */ + if (fl->fl_start > caller->fl_end + 1) + break; + + /* + * If we come here, the new and old lock are of the + * same type and adjacent or overlapping. Make one + * lock yielding from the lower start address of both + * locks to the higher end address. + */ + if (fl->fl_start > caller->fl_start) + fl->fl_start = caller->fl_start; + else + caller->fl_start = fl->fl_start; + if (fl->fl_end < caller->fl_end) + fl->fl_end = caller->fl_end; + else + caller->fl_end = fl->fl_end; + if (added) { + free_lock(before); + continue; + } + caller = fl; + added = 1; + goto next_lock; + } + /* + * Processing for different lock types is a bit more complex. + */ + if (fl->fl_end < caller->fl_start) + goto next_lock; + if (fl->fl_start > caller->fl_end) + break; + if (caller->fl_type == F_UNLCK) + added = 1; + if (fl->fl_start < caller->fl_start) + left = fl; + /* + * If the next lock in the list has a higher end address than + * the new one, insert the new one here. + */ + if (fl->fl_end > caller->fl_end) { + right = fl; + break; + } + if (fl->fl_start >= caller->fl_start) { + /* + * The new lock completely replaces an old one (This may + * happen several times). + */ + if (added) { + free_lock(before); + continue; + } + /* + * Replace the old lock with the new one. Wake up + * anybody waiting for the old one, as the change in + * lock type might satisfy his needs. + */ + wake_up(&fl->fl_wait); + fl->fl_start = caller->fl_start; + fl->fl_end = caller->fl_end; + fl->fl_type = caller->fl_type; + caller = fl; + added = 1; + } + /* + * Go on to next lock. + */ +next_lock: + before = &(*before)->fl_next; + } + + if (! added) { + if (caller->fl_type == F_UNLCK) { +/* + * XXX - under iBCS-2, attempting to unlock a not-locked region is + * not considered an error condition, although I'm not sure if this + * should be a default behavior (it makes porting to native Linux easy) + * or a personality option. + * + * Does Xopen/1170 say anything about this? + * - drew@Colorado.EDU + */ +#if 0 + return -EINVAL; +#else + return 0; +#endif + } + if (! (caller = alloc_lock(before, caller, fd))) + return -ENOLCK; + } + if (right) { + if (left == right) { + /* + * The new lock breaks the old one in two pieces, so we + * have to allocate one more lock (in this case, even + * F_UNLCK may fail!). + */ + if (! (left = alloc_lock(before, right, fd))) { + if (! added) + free_lock(before); + return -ENOLCK; + } + } + right->fl_start = caller->fl_end + 1; + } + if (left) + left->fl_end = caller->fl_start - 1; + return 0; +} + +/* + * File_lock() inserts a lock at the position pos of the linked list. + * + * Modified to create a new node if no free entries available - Chad Page + * + */ + +static struct file_lock *alloc_lock(struct file_lock **pos, + struct file_lock *fl, + unsigned int fd) +{ + struct file_lock *tmp; + + tmp = file_lock_free_list; + + if (tmp == NULL) + { + /* Okay, let's make a new file_lock structure... */ + tmp = (struct file_lock *)kmalloc(sizeof(struct file_lock), GFP_KERNEL); + tmp -> fl_owner = NULL; + tmp -> fl_next = file_lock_free_list; + tmp -> fl_nextlink = file_lock_table; + file_lock_table = tmp; + } + else + { + /* remove from free list */ + file_lock_free_list = tmp->fl_next; + } + + if (tmp->fl_owner != NULL) + panic("alloc_lock: broken free list\n"); + + *tmp = *fl; + + tmp->fl_next = *pos; /* insert into file's list */ + *pos = tmp; + + tmp->fl_owner = current; /* FIXME: needed? */ + tmp->fl_fd = fd; /* FIXME: needed? */ + tmp->fl_wait = NULL; + return tmp; +} + +/* + * Add a lock to the free list ... + */ + +static void free_lock(struct file_lock **fl_p) +{ + struct file_lock *fl; + + fl = *fl_p; + if (fl->fl_owner == NULL) /* sanity check */ + panic("free_lock: broken lock list\n"); + + *fl_p = (*fl_p)->fl_next; + + fl->fl_next = file_lock_free_list; /* add to free list */ + file_lock_free_list = fl; + fl->fl_owner = NULL; /* for sanity checks */ + + wake_up(&fl->fl_wait); +} diff --git a/fs/minix/Makefile b/fs/minix/Makefile new file mode 100644 index 000000000..20e7f3dae --- /dev/null +++ b/fs/minix/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the linux minix-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= bitmap.o truncate.o namei.o inode.o \ + file.o dir.o symlink.o fsync.o + +minix.o: $(OBJS) + $(LD) -r -o minix.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c new file mode 100644 index 000000000..d42b86eea --- /dev/null +++ b/fs/minix/bitmap.c @@ -0,0 +1,208 @@ +/* + * linux/fs/minix/bitmap.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* bitmap.c contains the code that handles the inode and block bitmaps */ + +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/stat.h> +#include <linux/kernel.h> +#include <linux/string.h> + +#include <asm/bitops.h> + +static int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; + +static unsigned long count_used(struct buffer_head *map[], unsigned numblocks, + unsigned numbits) +{ + unsigned i, j, end, sum = 0; + struct buffer_head *bh; + + for (i=0; (i<numblocks) && numbits; i++) { + if (!(bh=map[i])) + return(0); + if (numbits >= (8*BLOCK_SIZE)) { + end = BLOCK_SIZE; + numbits -= 8*BLOCK_SIZE; + } else { + int tmp; + end = numbits >> 3; + numbits &= 0x7; + tmp = bh->b_data[end] & ((1<<numbits)-1); + sum += nibblemap[tmp&0xf] + nibblemap[(tmp>>4)&0xf]; + numbits = 0; + } + for (j=0; j<end; j++) + sum += nibblemap[bh->b_data[j] & 0xf] + + nibblemap[(bh->b_data[j]>>4)&0xf]; + } + return(sum); +} + +void minix_free_block(struct super_block * sb, int block) +{ + struct buffer_head * bh; + unsigned int bit,zone; + + if (!sb) { + printk("trying to free block on nonexistent device\n"); + return; + } + if (block < sb->u.minix_sb.s_firstdatazone || + block >= sb->u.minix_sb.s_nzones) { + printk("trying to free block not in datazone\n"); + return; + } + bh = get_hash_table(sb->s_dev,block,BLOCK_SIZE); + if (bh) + bh->b_dirt=0; + brelse(bh); + zone = block - sb->u.minix_sb.s_firstdatazone + 1; + bit = zone & 8191; + zone >>= 13; + bh = sb->u.minix_sb.s_zmap[zone]; + if (!bh) { + printk("minix_free_block: nonexistent bitmap buffer\n"); + return; + } + if (!clear_bit(bit,bh->b_data)) + printk("free_block (%04x:%d): bit already cleared\n",sb->s_dev,block); + mark_buffer_dirty(bh, 1); + return; +} + +int minix_new_block(struct super_block * sb) +{ + struct buffer_head * bh; + int i,j; + + if (!sb) { + printk("trying to get new block from nonexistent device\n"); + return 0; + } +repeat: + j = 8192; + for (i=0 ; i<8 ; i++) + if ((bh=sb->u.minix_sb.s_zmap[i]) != NULL) + if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192) + break; + if (i>=8 || !bh || j>=8192) + return 0; + if (set_bit(j,bh->b_data)) { + printk("new_block: bit already set"); + goto repeat; + } + mark_buffer_dirty(bh, 1); + j += i*8192 + sb->u.minix_sb.s_firstdatazone-1; + if (j < sb->u.minix_sb.s_firstdatazone || + j >= sb->u.minix_sb.s_nzones) + return 0; + if (!(bh = getblk(sb->s_dev,j,BLOCK_SIZE))) { + printk("new_block: cannot get block"); + return 0; + } + memset(bh->b_data, 0, BLOCK_SIZE); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse(bh); + return j; +} + +unsigned long minix_count_free_blocks(struct super_block *sb) +{ + return (sb->u.minix_sb.s_nzones - count_used(sb->u.minix_sb.s_zmap,sb->u.minix_sb.s_zmap_blocks,sb->u.minix_sb.s_nzones)) + << sb->u.minix_sb.s_log_zone_size; +} + +void minix_free_inode(struct inode * inode) +{ + struct buffer_head * bh; + unsigned long ino; + + if (!inode) + return; + if (!inode->i_dev) { + printk("free_inode: inode has no device\n"); + return; + } + if (inode->i_count != 1) { + printk("free_inode: inode has count=%d\n",inode->i_count); + return; + } + if (inode->i_nlink) { + printk("free_inode: inode has nlink=%d\n",inode->i_nlink); + return; + } + if (!inode->i_sb) { + printk("free_inode: inode on nonexistent device\n"); + return; + } + if (inode->i_ino < 1 || inode->i_ino >= inode->i_sb->u.minix_sb.s_ninodes) { + printk("free_inode: inode 0 or nonexistent inode\n"); + return; + } + ino = inode->i_ino; + if (!(bh=inode->i_sb->u.minix_sb.s_imap[ino >> 13])) { + printk("free_inode: nonexistent imap in superblock\n"); + return; + } + clear_inode(inode); + if (!clear_bit(ino & 8191, bh->b_data)) + printk("free_inode: bit %lu already cleared.\n",ino); + mark_buffer_dirty(bh, 1); +} + +struct inode * minix_new_inode(const struct inode * dir) +{ + struct super_block * sb; + struct inode * inode; + struct buffer_head * bh; + int i,j; + + if (!dir || !(inode = get_empty_inode())) + return NULL; + sb = dir->i_sb; + inode->i_sb = sb; + inode->i_flags = inode->i_sb->s_flags; + j = 8192; + for (i=0 ; i<8 ; i++) + if ((bh = inode->i_sb->u.minix_sb.s_imap[i]) != NULL) + if ((j=find_first_zero_bit(bh->b_data, 8192)) < 8192) + break; + if (!bh || j >= 8192) { + iput(inode); + return NULL; + } + if (set_bit(j,bh->b_data)) { /* shouldn't happen */ + printk("new_inode: bit already set"); + iput(inode); + return NULL; + } + mark_buffer_dirty(bh, 1); + j += i*8192; + if (!j || j >= inode->i_sb->u.minix_sb.s_ninodes) { + iput(inode); + return NULL; + } + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_dev = sb->s_dev; + inode->i_uid = current->fsuid; + inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; + inode->i_dirt = 1; + inode->i_ino = j; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = NULL; + inode->i_blocks = inode->i_blksize = 0; + insert_inode_hash(inode); + return inode; +} + +unsigned long minix_count_free_inodes(struct super_block *sb) +{ + return sb->u.minix_sb.s_ninodes - count_used(sb->u.minix_sb.s_imap,sb->u.minix_sb.s_imap_blocks,sb->u.minix_sb.s_ninodes); +} diff --git a/fs/minix/dir.c b/fs/minix/dir.c new file mode 100644 index 000000000..6ece61971 --- /dev/null +++ b/fs/minix/dir.c @@ -0,0 +1,108 @@ +/* + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * minix directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/minix_fs.h> +#include <linux/stat.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int minix_dir_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + return -EISDIR; +} + +static int minix_readdir(struct inode *, struct file *, struct dirent *, int); + +static struct file_operations minix_dir_operations = { + NULL, /* lseek - default */ + minix_dir_read, /* read */ + NULL, /* write - bad */ + minix_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync /* default fsync */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations minix_dir_inode_operations = { + &minix_dir_operations, /* default directory file-ops */ + minix_create, /* create */ + minix_lookup, /* lookup */ + minix_link, /* link */ + minix_unlink, /* unlink */ + minix_symlink, /* symlink */ + minix_mkdir, /* mkdir */ + minix_rmdir, /* rmdir */ + minix_mknod, /* mknod */ + minix_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + minix_truncate, /* truncate */ + NULL /* permission */ +}; + +static int minix_readdir(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + unsigned int offset,i,ret; + int version; + char c; + struct buffer_head * bh; + struct minix_dir_entry * de; + struct minix_sb_info * info; + + if (!inode || !inode->i_sb || !S_ISDIR(inode->i_mode)) + return -EBADF; + info = &inode->i_sb->u.minix_sb; + if (filp->f_pos & (info->s_dirsize - 1)) + return -EBADF; + ret = 0; + while (!ret && filp->f_pos < inode->i_size) { + offset = filp->f_pos & 1023; + bh = minix_bread(inode,(filp->f_pos)>>BLOCK_SIZE_BITS,0); + if (!bh) { + filp->f_pos += 1024-offset; + continue; + } + while (!ret && offset < 1024 && filp->f_pos < inode->i_size) { + de = (struct minix_dir_entry *) (offset + bh->b_data); + offset += info->s_dirsize; + filp->f_pos += info->s_dirsize; +retry: + if (de->inode) { + version = inode->i_version; + for (i = 0; i < info->s_namelen; i++) + if ((c = de->name[i]) != 0) + put_fs_byte(c,i+dirent->d_name); + else + break; + if (i) { + put_fs_long(de->inode,&dirent->d_ino); + put_fs_byte(0,i+dirent->d_name); + put_fs_word(i,&dirent->d_reclen); + if (version != inode->i_version) + goto retry; + ret = ROUND_UP(NAME_OFFSET(dirent)+i+1); + } + } + } + brelse(bh); + } + return ret; +} diff --git a/fs/minix/file.c b/fs/minix/file.c new file mode 100644 index 000000000..670fb5e75 --- /dev/null +++ b/fs/minix/file.c @@ -0,0 +1,246 @@ +/* + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * minix regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#include <linux/fs.h> +#include <linux/minix_fs.h> + +static int minix_file_read(struct inode *, struct file *, char *, int); +static int minix_file_write(struct inode *, struct file *, char *, int); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the minix filesystem. + */ +static struct file_operations minix_file_operations = { + NULL, /* lseek - default */ + minix_file_read, /* read */ + minix_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + minix_sync_file /* fsync */ +}; + +struct inode_operations minix_file_inode_operations = { + &minix_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + minix_bmap, /* bmap */ + minix_truncate, /* truncate */ + NULL /* permission */ +}; + +static int minix_file_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + int read,left,chars; + int block, blocks, offset; + int bhrequest, uptodate; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + unsigned int size; + + if (!inode) { + printk("minix_file_read: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("minix_file_read: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + offset = filp->f_pos; + size = inode->i_size; + if (offset > size) + left = 0; + else + left = size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = offset >> BLOCK_SIZE_BITS; + offset &= BLOCK_SIZE-1; + size = (size + (BLOCK_SIZE-1)) >> BLOCK_SIZE_BITS; + blocks = (left + offset + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; + bhb = bhe = buflist; + if (filp->f_reada) { + if(blocks < read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9)) + blocks = read_ahead[MAJOR(inode->i_dev)] / (BLOCK_SIZE >> 9); + if (block + blocks > size) + blocks = size - block; + } + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. */ + + do { + bhrequest = 0; + uptodate = 1; + while (blocks) { + --blocks; + *bhb = minix_getblk(inode, block++, 0); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if (uptodate) + break; + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) + ll_rw_block(READ, bhrequest, bhreq); + + do { /* Finish off all I/O that has actually completed */ + if (*bhe) { + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < BLOCK_SIZE - offset) + chars = left; + else + chars = BLOCK_SIZE - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-->0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + if (!read) + return -EIO; + filp->f_reada = 1; + if (!IS_RDONLY(inode)) + inode->i_atime = CURRENT_TIME; + return read; +} + +static int minix_file_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + off_t pos; + int written,c; + struct buffer_head * bh; + char * p; + + if (!inode) { + printk("minix_file_write: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("minix_file_write: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + down(&inode->i_sem); + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else + pos = filp->f_pos; + written = 0; + while (written < count) { + bh = minix_getblk(inode,pos/BLOCK_SIZE,1); + if (!bh) { + if (!written) + written = -ENOSPC; + break; + } + c = BLOCK_SIZE - (pos % BLOCK_SIZE); + if (c > count-written) + c = count-written; + if (c != BLOCK_SIZE && !bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + if (!written) + written = -EIO; + break; + } + } + p = (pos % BLOCK_SIZE) + bh->b_data; + pos += c; + written += c; + memcpy_fromfs(p,buf,c); + buf += c; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + brelse(bh); + } + if (pos > inode->i_size) + inode->i_size = pos; + up(&inode->i_sem); + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + filp->f_pos = pos; + inode->i_dirt = 1; + return written; +} diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c new file mode 100644 index 000000000..737a5bfcd --- /dev/null +++ b/fs/minix/fsync.c @@ -0,0 +1,159 @@ +/* + * linux/fs/minix/fsync.c + * + * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * from + * Copyright (C) 1991, 1992 Linus Torvalds + * + * minix fsync primitive + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/locks.h> + +#include <linux/fs.h> +#include <linux/minix_fs.h> + + +#define blocksize BLOCK_SIZE +#define addr_per_block 512 + +static int sync_block (struct inode * inode, unsigned short * block, int wait) +{ + struct buffer_head * bh; + unsigned short tmp; + + if (!*block) + return 0; + tmp = *block; + bh = get_hash_table(inode->i_dev, *block, blocksize); + if (!bh) + return 0; + if (*block != tmp) { + brelse (bh); + return 1; + } + if (wait && bh->b_req && !bh->b_uptodate) { + brelse(bh); + return -1; + } + if (wait || !bh->b_uptodate || !bh->b_dirt) + { + brelse(bh); + return 0; + } + ll_rw_block(WRITE, 1, &bh); + bh->b_count--; + return 0; +} + +static int sync_iblock (struct inode * inode, unsigned short * iblock, + struct buffer_head **bh, int wait) +{ + int rc; + unsigned short tmp; + + *bh = NULL; + tmp = *iblock; + if (!tmp) + return 0; + rc = sync_block (inode, iblock, wait); + if (rc) + return rc; + *bh = bread(inode->i_dev, tmp, blocksize); + if (tmp != *iblock) { + brelse(*bh); + *bh = NULL; + return 1; + } + if (!*bh) + return -1; + return 0; +} + + +static int sync_direct(struct inode *inode, int wait) +{ + int i; + int rc, err = 0; + + for (i = 0; i < 7; i++) { + rc = sync_block (inode, inode->u.minix_i.i_data + i, wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + return err; +} + +static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait) +{ + int i; + struct buffer_head * ind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, iblock, &ind_bh, wait); + if (rc || !ind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_block (inode, + ((unsigned short *) ind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(ind_bh); + return err; +} + +static int sync_dindirect(struct inode *inode, unsigned short *diblock, + int wait) +{ + int i; + struct buffer_head * dind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, diblock, &dind_bh, wait); + if (rc || !dind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_indirect (inode, + ((unsigned short *) dind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(dind_bh); + return err; +} + +int minix_sync_file(struct inode * inode, struct file * file) +{ + int wait, err = 0; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + + for (wait=0; wait<=1; wait++) + { + err |= sync_direct(inode, wait); + err |= sync_indirect(inode, inode->u.minix_i.i_data+7, wait); + err |= sync_dindirect(inode, inode->u.minix_i.i_data+8, wait); + } + err |= minix_sync_inode (inode); + return (err < 0) ? -EIO : 0; +} diff --git a/fs/minix/inode.c b/fs/minix/inode.c new file mode 100644 index 000000000..2aeb538ee --- /dev/null +++ b/fs/minix/inode.c @@ -0,0 +1,513 @@ +/* + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/bitops.h> + +void minix_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; + minix_truncate(inode); + minix_free_inode(inode); +} + +static void minix_commit_super (struct super_block * sb, + struct minix_super_block * ms) +{ + mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1); + sb->s_dirt = 0; +} + +void minix_write_super (struct super_block * sb) +{ + struct minix_super_block * ms; + + if (!(sb->s_flags & MS_RDONLY)) { + ms = sb->u.minix_sb.s_ms; + + if (ms->s_state & MINIX_VALID_FS) + ms->s_state &= ~MINIX_VALID_FS; + minix_commit_super (sb, ms); + } + sb->s_dirt = 0; +} + + +void minix_put_super(struct super_block *sb) +{ + int i; + + lock_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { + sb->u.minix_sb.s_ms->s_state = sb->u.minix_sb.s_mount_state; + mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1); + } + sb->s_dev = 0; + for(i = 0 ; i < MINIX_I_MAP_SLOTS ; i++) + brelse(sb->u.minix_sb.s_imap[i]); + for(i = 0 ; i < MINIX_Z_MAP_SLOTS ; i++) + brelse(sb->u.minix_sb.s_zmap[i]); + brelse (sb->u.minix_sb.s_sbh); + unlock_super(sb); + return; +} + +static struct super_operations minix_sops = { + minix_read_inode, + NULL, + minix_write_inode, + minix_put_inode, + minix_put_super, + minix_write_super, + minix_statfs, + minix_remount +}; + +int minix_remount (struct super_block * sb, int * flags, char * data) +{ + struct minix_super_block * ms; + + ms = sb->u.minix_sb.s_ms; + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + return 0; + if (*flags & MS_RDONLY) { + if (ms->s_state & MINIX_VALID_FS || + !(sb->u.minix_sb.s_mount_state & MINIX_VALID_FS)) + return 0; + /* Mounting a rw partition read-only. */ + ms->s_state = sb->u.minix_sb.s_mount_state; + mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1); + sb->s_dirt = 1; + minix_commit_super (sb, ms); + } + else { + /* Mount a partition which is read-only, read-write. */ + sb->u.minix_sb.s_mount_state = ms->s_state; + ms->s_state &= ~MINIX_VALID_FS; + mark_buffer_dirty(sb->u.minix_sb.s_sbh, 1); + sb->s_dirt = 1; + + if (!(sb->u.minix_sb.s_mount_state & MINIX_VALID_FS)) + printk ("MINIX-fs warning: remounting unchecked fs, " + "running fsck is recommended.\n"); + else if ((sb->u.minix_sb.s_mount_state & MINIX_ERROR_FS)) + printk ("MINIX-fs warning: remounting fs with errors, " + "running fsck is recommended.\n"); + } + return 0; +} + + +struct super_block *minix_read_super(struct super_block *s,void *data, + int silent) +{ + struct buffer_head *bh; + struct minix_super_block *ms; + int i,dev=s->s_dev,block; + + if (32 != sizeof (struct minix_inode)) + panic("bad i-node size"); + lock_super(s); + set_blocksize(dev, BLOCK_SIZE); + if (!(bh = bread(dev,1,BLOCK_SIZE))) { + s->s_dev=0; + unlock_super(s); + printk("MINIX-fs: unable to read superblock\n"); + return NULL; + } + ms = (struct minix_super_block *) bh->b_data; + s->u.minix_sb.s_ms = ms; + s->u.minix_sb.s_sbh = bh; + s->u.minix_sb.s_mount_state = ms->s_state; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->u.minix_sb.s_ninodes = ms->s_ninodes; + s->u.minix_sb.s_nzones = ms->s_nzones; + s->u.minix_sb.s_imap_blocks = ms->s_imap_blocks; + s->u.minix_sb.s_zmap_blocks = ms->s_zmap_blocks; + s->u.minix_sb.s_firstdatazone = ms->s_firstdatazone; + s->u.minix_sb.s_log_zone_size = ms->s_log_zone_size; + s->u.minix_sb.s_max_size = ms->s_max_size; + s->s_magic = ms->s_magic; + if (s->s_magic == MINIX_SUPER_MAGIC) { + s->u.minix_sb.s_dirsize = 16; + s->u.minix_sb.s_namelen = 14; + } else if (s->s_magic == MINIX_SUPER_MAGIC2) { + s->u.minix_sb.s_dirsize = 32; + s->u.minix_sb.s_namelen = 30; + } else { + s->s_dev = 0; + unlock_super(s); + brelse(bh); + if (!silent) + printk("VFS: Can't find a minix filesystem on dev 0x%04x.\n", dev); + return NULL; + } + for (i=0;i < MINIX_I_MAP_SLOTS;i++) + s->u.minix_sb.s_imap[i] = NULL; + for (i=0;i < MINIX_Z_MAP_SLOTS;i++) + s->u.minix_sb.s_zmap[i] = NULL; + block=2; + for (i=0 ; i < s->u.minix_sb.s_imap_blocks ; i++) + if ((s->u.minix_sb.s_imap[i]=bread(dev,block,BLOCK_SIZE)) != NULL) + block++; + else + break; + for (i=0 ; i < s->u.minix_sb.s_zmap_blocks ; i++) + if ((s->u.minix_sb.s_zmap[i]=bread(dev,block,BLOCK_SIZE)) != NULL) + block++; + else + break; + if (block != 2+s->u.minix_sb.s_imap_blocks+s->u.minix_sb.s_zmap_blocks) { + for(i=0;i<MINIX_I_MAP_SLOTS;i++) + brelse(s->u.minix_sb.s_imap[i]); + for(i=0;i<MINIX_Z_MAP_SLOTS;i++) + brelse(s->u.minix_sb.s_zmap[i]); + s->s_dev=0; + unlock_super(s); + brelse(bh); + printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); + return NULL; + } + set_bit(0,s->u.minix_sb.s_imap[0]->b_data); + set_bit(0,s->u.minix_sb.s_zmap[0]->b_data); + unlock_super(s); + /* set up enough so that it can read an inode */ + s->s_dev = dev; + s->s_op = &minix_sops; + s->s_mounted = iget(s,MINIX_ROOT_INO); + if (!s->s_mounted) { + s->s_dev = 0; + brelse(bh); + printk("MINIX-fs: get root inode failed\n"); + return NULL; + } + if (!(s->s_flags & MS_RDONLY)) { + ms->s_state &= ~MINIX_VALID_FS; + mark_buffer_dirty(bh, 1); + s->s_dirt = 1; + } + if (!(s->u.minix_sb.s_mount_state & MINIX_VALID_FS)) + printk ("MINIX-fs: mounting unchecked file system, " + "running fsck is recommended.\n"); + else if (s->u.minix_sb.s_mount_state & MINIX_ERROR_FS) + printk ("MINIX-fs: mounting file system with errors, " + "running fsck is recommended.\n"); + return s; +} + +void minix_statfs(struct super_block *sb, struct statfs *buf) +{ + long tmp; + + put_fs_long(MINIX_SUPER_MAGIC, &buf->f_type); + put_fs_long(1024, &buf->f_bsize); + tmp = sb->u.minix_sb.s_nzones - sb->u.minix_sb.s_firstdatazone; + tmp <<= sb->u.minix_sb.s_log_zone_size; + put_fs_long(tmp, &buf->f_blocks); + tmp = minix_count_free_blocks(sb); + put_fs_long(tmp, &buf->f_bfree); + put_fs_long(tmp, &buf->f_bavail); + put_fs_long(sb->u.minix_sb.s_ninodes, &buf->f_files); + put_fs_long(minix_count_free_inodes(sb), &buf->f_ffree); + put_fs_long(sb->u.minix_sb.s_namelen, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ +} + +#define inode_bmap(inode,nr) ((inode)->u.minix_i.i_data[(nr)]) + +static int block_bmap(struct buffer_head * bh, int nr) +{ + int tmp; + + if (!bh) + return 0; + tmp = ((unsigned short *) bh->b_data)[nr]; + brelse(bh); + return tmp; +} + +int minix_bmap(struct inode * inode,int block) +{ + int i; + + if (block<0) { + printk("minix_bmap: block<0"); + return 0; + } + if (block >= 7+512+512*512) { + printk("minix_bmap: block>big"); + return 0; + } + if (block < 7) + return inode_bmap(inode,block); + block -= 7; + if (block < 512) { + i = inode_bmap(inode,7); + if (!i) + return 0; + return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block); + } + block -= 512; + i = inode_bmap(inode,8); + if (!i) + return 0; + i = block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block>>9); + if (!i) + return 0; + return block_bmap(bread(inode->i_dev,i,BLOCK_SIZE),block & 511); +} + +static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create) +{ + int tmp; + unsigned short *p; + struct buffer_head * result; + + p = inode->u.minix_i.i_data + nr; +repeat: + tmp = *p; + if (tmp) { + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } + if (!create) + return NULL; + tmp = minix_new_block(inode->i_sb); + if (!tmp) + return NULL; + result = getblk(inode->i_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb,tmp); + brelse(result); + goto repeat; + } + *p = tmp; + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + return result; +} + +static struct buffer_head * block_getblk(struct inode * inode, + struct buffer_head * bh, int nr, int create) +{ + int tmp; + unsigned short *p; + struct buffer_head * result; + + if (!bh) + return NULL; + if (!bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + return NULL; + } + } + p = nr + (unsigned short *) bh->b_data; +repeat: + tmp = *p; + if (tmp) { + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } + if (!create) { + brelse(bh); + return NULL; + } + tmp = minix_new_block(inode->i_sb); + if (!tmp) { + brelse(bh); + return NULL; + } + result = getblk(bh->b_dev, tmp, BLOCK_SIZE); + if (*p) { + minix_free_block(inode->i_sb,tmp); + brelse(result); + goto repeat; + } + *p = tmp; + mark_buffer_dirty(bh, 1); + brelse(bh); + return result; +} + +struct buffer_head * minix_getblk(struct inode * inode, int block, int create) +{ + struct buffer_head * bh; + + if (block<0) { + printk("minix_getblk: block<0"); + return NULL; + } + if (block >= 7+512+512*512) { + printk("minix_getblk: block>big"); + return NULL; + } + if (block < 7) + return inode_getblk(inode,block,create); + block -= 7; + if (block < 512) { + bh = inode_getblk(inode,7,create); + return block_getblk(inode, bh, block, create); + } + block -= 512; + bh = inode_getblk(inode,8,create); + bh = block_getblk(inode, bh, block>>9, create); + return block_getblk(inode, bh, block & 511, create); +} + +struct buffer_head * minix_bread(struct inode * inode, int block, int create) +{ + struct buffer_head * bh; + + bh = minix_getblk(inode,block,create); + if (!bh || bh->b_uptodate) + return bh; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + +void minix_read_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct minix_inode * raw_inode; + int block, ino; + + ino = inode->i_ino; + inode->i_op = NULL; + inode->i_mode = 0; + if (!ino || ino >= inode->i_sb->u.minix_sb.s_ninodes) { + printk("Bad inode number on dev 0x%04x: %d is out of range\n", + inode->i_dev, ino); + return; + } + block = 2 + inode->i_sb->u.minix_sb.s_imap_blocks + + inode->i_sb->u.minix_sb.s_zmap_blocks + + (ino-1)/MINIX_INODES_PER_BLOCK; + if (!(bh=bread(inode->i_dev,block, BLOCK_SIZE))) { + printk("Major problem: unable to read inode from dev 0x%04x\n", + inode->i_dev); + return; + } + raw_inode = ((struct minix_inode *) bh->b_data) + + (ino-1)%MINIX_INODES_PER_BLOCK; + inode->i_mode = raw_inode->i_mode; + inode->i_uid = raw_inode->i_uid; + inode->i_gid = raw_inode->i_gid; + inode->i_nlink = raw_inode->i_nlinks; + inode->i_size = raw_inode->i_size; + inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time; + inode->i_blocks = inode->i_blksize = 0; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = raw_inode->i_zone[0]; + else for (block = 0; block < 9; block++) + inode->u.minix_i.i_data[block] = raw_inode->i_zone[block]; + brelse(bh); + if (S_ISREG(inode->i_mode)) + inode->i_op = &minix_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &minix_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &minix_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); +} + +static struct buffer_head * minix_update_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct minix_inode * raw_inode; + int ino, block; + + ino = inode->i_ino; + if (!ino || ino >= inode->i_sb->u.minix_sb.s_ninodes) { + printk("Bad inode number on dev 0x%04x: %d is out of range\n", + inode->i_dev, ino); + inode->i_dirt = 0; + return 0; + } + block = 2 + inode->i_sb->u.minix_sb.s_imap_blocks + inode->i_sb->u.minix_sb.s_zmap_blocks + + (ino-1)/MINIX_INODES_PER_BLOCK; + if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE))) { + printk("unable to read i-node block\n"); + inode->i_dirt = 0; + return 0; + } + raw_inode = ((struct minix_inode *)bh->b_data) + + (ino-1)%MINIX_INODES_PER_BLOCK; + raw_inode->i_mode = inode->i_mode; + raw_inode->i_uid = inode->i_uid; + raw_inode->i_gid = inode->i_gid; + raw_inode->i_nlinks = inode->i_nlink; + raw_inode->i_size = inode->i_size; + raw_inode->i_time = inode->i_mtime; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_zone[0] = inode->i_rdev; + else for (block = 0; block < 9; block++) + raw_inode->i_zone[block] = inode->u.minix_i.i_data[block]; + inode->i_dirt=0; + mark_buffer_dirty(bh, 1); + return bh; +} + +void minix_write_inode(struct inode * inode) +{ + struct buffer_head *bh; + bh = minix_update_inode(inode); + brelse(bh); +} + +int minix_sync_inode(struct inode * inode) +{ + int err = 0; + struct buffer_head *bh; + + bh = minix_update_inode(inode); + if (bh && bh->b_dirt) + { + ll_rw_block(WRITE, 1, &bh); + wait_on_buffer(bh); + if (bh->b_req && !bh->b_uptodate) + { + printk ("IO error syncing minix inode [%04x:%08lx]\n", + inode->i_dev, inode->i_ino); + err = -1; + } + } + else if (!bh) + err = -1; + brelse (bh); + return err; +} diff --git a/fs/minix/namei.c b/fs/minix/namei.c new file mode 100644 index 000000000..8246e3ce7 --- /dev/null +++ b/fs/minix/namei.c @@ -0,0 +1,830 @@ +/* + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/errno.h> + +#include <asm/segment.h> + +/* + * comment out this line if you want names > info->s_namelen chars to be + * truncated. Else they will be disallowed (ENAMETOOLONG). + */ +/* #define NO_TRUNCATE */ + +static inline int namecompare(int len, int maxlen, + const char * name, const char * buffer) +{ + if (len > maxlen) + return 0; + if (len < maxlen && buffer[len]) + return 0; + return !memcmp(name, buffer, len); +} + +/* + * ok, we cannot use strncmp, as the name is not in our data space. + * Thus we'll have to use minix_match. No big problem. Match also makes + * some sanity tests. + * + * NOTE! unlike strncmp, minix_match returns 1 for success, 0 for failure. + */ +static int minix_match(int len, const char * name, + struct buffer_head * bh, unsigned long * offset, + struct minix_sb_info * info) +{ + struct minix_dir_entry * de; + + de = (struct minix_dir_entry *) (bh->b_data + *offset); + *offset += info->s_dirsize; + if (!de->inode || len > info->s_namelen) + return 0; + /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ + if (!len && (de->name[0]=='.') && (de->name[1]=='\0')) + return 1; + return namecompare(len,info->s_namelen,name,de->name); +} + +/* + * minix_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +static struct buffer_head * minix_find_entry(struct inode * dir, + const char * name, int namelen, struct minix_dir_entry ** res_dir) +{ + unsigned long block, offset; + struct buffer_head * bh; + struct minix_sb_info * info; + + *res_dir = NULL; + if (!dir || !dir->i_sb) + return NULL; + info = &dir->i_sb->u.minix_sb; + if (namelen > info->s_namelen) { +#ifdef NO_TRUNCATE + return NULL; +#else + namelen = info->s_namelen; +#endif + } + bh = NULL; + block = offset = 0; + while (block*BLOCK_SIZE+offset < dir->i_size) { + if (!bh) { + bh = minix_bread(dir,block,0); + if (!bh) { + block++; + continue; + } + } + *res_dir = (struct minix_dir_entry *) (bh->b_data + offset); + if (minix_match(namelen,name,bh,&offset,info)) + return bh; + if (offset < bh->b_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; + block++; + } + brelse(bh); + *res_dir = NULL; + return NULL; +} + +int minix_lookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + int ino; + struct minix_dir_entry * de; + struct buffer_head * bh; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + if (!(bh = minix_find_entry(dir,name,len,&de))) { + iput(dir); + return -ENOENT; + } + ino = de->inode; + brelse(bh); + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -EACCES; + } + iput(dir); + return 0; +} + +/* + * minix_add_entry() + * + * adds a file entry to the specified directory, returning a possible + * error value if it fails. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static int minix_add_entry(struct inode * dir, + const char * name, int namelen, + struct buffer_head ** res_buf, + struct minix_dir_entry ** res_dir) +{ + int i; + unsigned long block, offset; + struct buffer_head * bh; + struct minix_dir_entry * de; + struct minix_sb_info * info; + + *res_buf = NULL; + *res_dir = NULL; + if (!dir || !dir->i_sb) + return -ENOENT; + info = &dir->i_sb->u.minix_sb; + if (namelen > info->s_namelen) { +#ifdef NO_TRUNCATE + return -ENAMETOOLONG; +#else + namelen = info->s_namelen; +#endif + } + if (!namelen) + return -ENOENT; + bh = NULL; + block = offset = 0; + while (1) { + if (!bh) { + bh = minix_bread(dir,block,1); + if (!bh) + return -ENOSPC; + } + de = (struct minix_dir_entry *) (bh->b_data + offset); + offset += info->s_dirsize; + if (block*bh->b_size + offset > dir->i_size) { + de->inode = 0; + dir->i_size = block*bh->b_size + offset; + dir->i_dirt = 1; + } + if (de->inode) { + if (namecompare(namelen, info->s_namelen, name, de->name)) { + brelse(bh); + return -EEXIST; + } + } else { + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + for (i = 0; i < info->s_namelen ; i++) + de->name[i] = (i < namelen) ? name[i] : 0; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + *res_dir = de; + break; + } + if (offset < bh->b_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; + block++; + } + *res_buf = bh; + return 0; +} + +int minix_create(struct inode * dir,const char * name, int len, int mode, + struct inode ** result) +{ + int error; + struct inode * inode; + struct buffer_head * bh; + struct minix_dir_entry * de; + + *result = NULL; + if (!dir) + return -ENOENT; + inode = minix_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &minix_file_inode_operations; + inode->i_mode = mode; + inode->i_dirt = 1; + error = minix_add_entry(dir,name,len, &bh ,&de); + if (error) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + *result = inode; + return 0; +} + +int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rdev) +{ + int error; + struct inode * inode; + struct buffer_head * bh; + struct minix_dir_entry * de; + + if (!dir) + return -ENOENT; + bh = minix_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + inode = minix_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_uid = current->fsuid; + inode->i_mode = mode; + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &minix_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &minix_dir_inode_operations; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + } + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &minix_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (S_ISBLK(mode) || S_ISCHR(mode)) + inode->i_rdev = rdev; + inode->i_dirt = 1; + error = minix_add_entry(dir, name, len, &bh, &de); + if (error) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int minix_mkdir(struct inode * dir, const char * name, int len, int mode) +{ + int error; + struct inode * inode; + struct buffer_head * bh, *dir_block; + struct minix_dir_entry * de; + struct minix_sb_info * info; + + if (!dir || !dir->i_sb) { + iput(dir); + return -EINVAL; + } + info = &dir->i_sb->u.minix_sb; + bh = minix_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + if (dir->i_nlink >= MINIX_LINK_MAX) { + iput(dir); + return -EMLINK; + } + inode = minix_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &minix_dir_inode_operations; + inode->i_size = 2 * info->s_dirsize; + dir_block = minix_bread(inode,0,1); + if (!dir_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + de = (struct minix_dir_entry *) dir_block->b_data; + de->inode=inode->i_ino; + strcpy(de->name,"."); + de = (struct minix_dir_entry *) (dir_block->b_data + info->s_dirsize); + de->inode = dir->i_ino; + strcpy(de->name,".."); + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse(dir_block); + inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask); + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + inode->i_dirt = 1; + error = minix_add_entry(dir, name, len, &bh, &de); + if (error) { + iput(dir); + inode->i_nlink=0; + iput(inode); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + dir->i_nlink++; + dir->i_dirt = 1; + iput(dir); + iput(inode); + brelse(bh); + return 0; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir(struct inode * inode) +{ + unsigned int block, offset; + struct buffer_head * bh; + struct minix_dir_entry * de; + struct minix_sb_info * info; + + if (!inode || !inode->i_sb) + return 1; + info = &inode->i_sb->u.minix_sb; + block = 0; + bh = NULL; + offset = 2*info->s_dirsize; + if (inode->i_size & (info->s_dirsize-1)) + goto bad_dir; + if (inode->i_size < offset) + goto bad_dir; + bh = minix_bread(inode,0,0); + if (!bh) + goto bad_dir; + de = (struct minix_dir_entry *) bh->b_data; + if (!de->inode || strcmp(de->name,".")) + goto bad_dir; + de = (struct minix_dir_entry *) (bh->b_data + info->s_dirsize); + if (!de->inode || strcmp(de->name,"..")) + goto bad_dir; + while (block*BLOCK_SIZE+offset < inode->i_size) { + if (!bh) { + bh = minix_bread(inode,block,0); + if (!bh) { + block++; + continue; + } + } + de = (struct minix_dir_entry *) (bh->b_data + offset); + offset += info->s_dirsize; + if (de->inode) { + brelse(bh); + return 0; + } + if (offset < bh->b_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; + block++; + } + brelse(bh); + return 1; +bad_dir: + brelse(bh); + printk("Bad directory on device %04x\n",inode->i_dev); + return 1; +} + +int minix_rmdir(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct minix_dir_entry * de; + + inode = NULL; + bh = minix_find_entry(dir,name,len,&de); + retval = -ENOENT; + if (!bh) + goto end_rmdir; + retval = -EPERM; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_rmdir; + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_rmdir; + if (inode->i_dev != dir->i_dev) + goto end_rmdir; + if (inode == dir) /* we may not delete ".", but "../dir" is ok */ + goto end_rmdir; + if (!S_ISDIR(inode->i_mode)) { + retval = -ENOTDIR; + goto end_rmdir; + } + if (!empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + if (de->inode != inode->i_ino) { + retval = -ENOENT; + goto end_rmdir; + } + if (inode->i_count > 1) { + retval = -EBUSY; + goto end_rmdir; + } + if (inode->i_nlink != 2) + printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink); + de->inode = 0; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + inode->i_nlink=0; + inode->i_dirt=1; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_nlink--; + dir->i_dirt=1; + retval = 0; +end_rmdir: + iput(dir); + iput(inode); + brelse(bh); + return retval; +} + +int minix_unlink(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct minix_dir_entry * de; + +repeat: + retval = -ENOENT; + inode = NULL; + bh = minix_find_entry(dir,name,len,&de); + if (!bh) + goto end_unlink; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_unlink; + retval = -EPERM; + if (S_ISDIR(inode->i_mode)) + goto end_unlink; + if (de->inode != inode->i_ino) { + iput(inode); + brelse(bh); + current->counter = 0; + schedule(); + goto repeat; + } + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_unlink; + if (de->inode != inode->i_ino) { + retval = -ENOENT; + goto end_unlink; + } + if (!inode->i_nlink) { + printk("Deleting nonexistent file (%04x:%lu), %d\n", + inode->i_dev,inode->i_ino,inode->i_nlink); + inode->i_nlink=1; + } + de->inode = 0; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + inode->i_nlink--; + inode->i_ctime = dir->i_ctime; + inode->i_dirt = 1; + retval = 0; +end_unlink: + brelse(bh); + iput(inode); + iput(dir); + return retval; +} + +int minix_symlink(struct inode * dir, const char * name, int len, const char * symname) +{ + struct minix_dir_entry * de; + struct inode * inode = NULL; + struct buffer_head * bh = NULL, * name_block = NULL; + int i; + char c; + + if (!(inode = minix_new_inode(dir))) { + iput(dir); + return -ENOSPC; + } + inode->i_mode = S_IFLNK | 0777; + inode->i_op = &minix_symlink_inode_operations; + name_block = minix_bread(inode,0,1); + if (!name_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + i = 0; + while (i < 1023 && (c=*(symname++))) + name_block->b_data[i++] = c; + name_block->b_data[i] = 0; + mark_buffer_dirty(name_block, 1); + brelse(name_block); + inode->i_size = i; + inode->i_dirt = 1; + bh = minix_find_entry(dir,name,len,&de); + if (bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + brelse(bh); + iput(dir); + return -EEXIST; + } + i = minix_add_entry(dir, name, len, &bh, &de); + if (i) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return i; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int minix_link(struct inode * oldinode, struct inode * dir, const char * name, int len) +{ + int error; + struct minix_dir_entry * de; + struct buffer_head * bh; + + if (S_ISDIR(oldinode->i_mode)) { + iput(oldinode); + iput(dir); + return -EPERM; + } + if (oldinode->i_nlink >= MINIX_LINK_MAX) { + iput(oldinode); + iput(dir); + return -EMLINK; + } + bh = minix_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + iput(oldinode); + return -EEXIST; + } + error = minix_add_entry(dir, name, len, &bh, &de); + if (error) { + iput(dir); + iput(oldinode); + return error; + } + de->inode = oldinode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + oldinode->i_nlink++; + oldinode->i_ctime = CURRENT_TIME; + oldinode->i_dirt = 1; + iput(oldinode); + return 0; +} + +static int subdir(struct inode * new_inode, struct inode * old_inode) +{ + int ino; + int result; + + new_inode->i_count++; + result = 0; + for (;;) { + if (new_inode == old_inode) { + result = 1; + break; + } + if (new_inode->i_dev != old_inode->i_dev) + break; + ino = new_inode->i_ino; + if (minix_lookup(new_inode,"..",2,&new_inode)) + break; + if (new_inode->i_ino == ino) + break; + } + iput(new_inode); + return result; +} + +#define PARENT_INO(buffer) \ +(((struct minix_dir_entry *) ((buffer)+info->s_dirsize))->inode) + +/* + * rename uses retrying to avoid race-conditions: at least they should be minimal. + * it tries to allocate all the blocks, then sanity-checks, and if the sanity- + * checks fail, it tries to restart itself again. Very practical - no changes + * are done until we know everything works ok.. and then all the changes can be + * done in one fell swoop when we have claimed all the buffers needed. + * + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int do_minix_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct minix_dir_entry * old_de, * new_de; + struct minix_sb_info * info; + int retval; + + info = &old_dir->i_sb->u.minix_sb; + goto start_up; +try_again: + brelse(old_bh); + brelse(new_bh); + brelse(dir_bh); + iput(old_inode); + iput(new_inode); + current->counter = 0; + schedule(); +start_up: + old_inode = new_inode = NULL; + old_bh = new_bh = dir_bh = NULL; + old_bh = minix_find_entry(old_dir,old_name,old_len,&old_de); + retval = -ENOENT; + if (!old_bh) + goto end_rename; + old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */ + if (!old_inode) + goto end_rename; + retval = -EPERM; + if ((old_dir->i_mode & S_ISVTX) && + current->fsuid != old_inode->i_uid && + current->fsuid != old_dir->i_uid && !fsuser()) + goto end_rename; + new_bh = minix_find_entry(new_dir,new_name,new_len,&new_de); + if (new_bh) { + new_inode = __iget(new_dir->i_sb, new_de->inode, 0); + if (!new_inode) { + brelse(new_bh); + new_bh = NULL; + } + } + if (new_inode == old_inode) { + retval = 0; + goto end_rename; + } + if (new_inode && S_ISDIR(new_inode->i_mode)) { + retval = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -ENOTEMPTY; + if (!empty_dir(new_inode)) + goto end_rename; + retval = -EBUSY; + if (new_inode->i_count > 1) + goto end_rename; + } + retval = -EPERM; + if (new_inode && (new_dir->i_mode & S_ISVTX) && + current->fsuid != new_inode->i_uid && + current->fsuid != new_dir->i_uid && !fsuser()) + goto end_rename; + if (S_ISDIR(old_inode->i_mode)) { + retval = -ENOTDIR; + if (new_inode && !S_ISDIR(new_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -EIO; + dir_bh = minix_bread(old_inode,0,0); + if (!dir_bh) + goto end_rename; + if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (!new_inode && new_dir->i_nlink >= MINIX_LINK_MAX) + goto end_rename; + } + if (!new_bh) { + retval = minix_add_entry(new_dir,new_name,new_len,&new_bh,&new_de); + if (retval) + goto end_rename; + } +/* sanity checking before doing the rename - avoid races */ + if (new_inode && (new_de->inode != new_inode->i_ino)) + goto try_again; + if (new_de->inode && !new_inode) + goto try_again; + if (old_de->inode != old_inode->i_ino) + goto try_again; +/* ok, that's it */ + old_de->inode = 0; + new_de->inode = old_inode->i_ino; + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; + old_dir->i_dirt = 1; + old_dir->i_version = ++event; + new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME; + new_dir->i_dirt = 1; + new_dir->i_version = ++event; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + new_inode->i_dirt = 1; + } + mark_buffer_dirty(old_bh, 1); + mark_buffer_dirty(new_bh, 1); + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = new_dir->i_ino; + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + old_dir->i_dirt = 1; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_dirt = 1; + } else { + new_dir->i_nlink++; + new_dir->i_dirt = 1; + } + } + retval = 0; +end_rename: + brelse(dir_bh); + brelse(old_bh); + brelse(new_bh); + iput(old_inode); + iput(new_inode); + iput(old_dir); + iput(new_dir); + return retval; +} + +/* + * Ok, rename also locks out other renames, as they can change the parent of + * a directory, and we don't want any races. Other races are checked for by + * "do_rename()", which restarts if there are inconsistencies. + * + * Note that there is no race between different filesystems: it's only within + * the same device that races occur: many renames can happen at once, as long + * as they are on different partitions. + */ +int minix_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + static struct wait_queue * wait = NULL; + static int lock = 0; + int result; + + while (lock) + sleep_on(&wait); + lock = 1; + result = do_minix_rename(old_dir, old_name, old_len, + new_dir, new_name, new_len); + lock = 0; + wake_up(&wait); + return result; +} diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c new file mode 100644 index 000000000..bbd2b1f56 --- /dev/null +++ b/fs/minix/symlink.c @@ -0,0 +1,102 @@ +/* + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * minix symlink handling code + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/minix_fs.h> +#include <linux/stat.h> + +static int minix_readlink(struct inode *, char *, int); +static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations minix_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + minix_readlink, /* readlink */ + minix_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int minix_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + struct buffer_head * bh; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput(inode); + iput(dir); + return -ELOOP; + } + if (!(bh = minix_bread(inode, 0, 0))) { + iput(inode); + iput(dir); + return -EIO; + } + iput(inode); + current->link_count++; + error = open_namei(bh->b_data,flag,mode,res_inode,dir); + current->link_count--; + brelse(bh); + return error; +} + +static int minix_readlink(struct inode * inode, char * buffer, int buflen) +{ + struct buffer_head * bh; + int i; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + if (buflen > 1023) + buflen = 1023; + bh = minix_bread(inode, 0, 0); + iput(inode); + if (!bh) + return 0; + i = 0; + while (i<buflen && (c = bh->b_data[i])) { + i++; + put_fs_byte(c,buffer++); + } + brelse(bh); + return i; +} diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c new file mode 100644 index 000000000..0b127b9b8 --- /dev/null +++ b/fs/minix/truncate.c @@ -0,0 +1,184 @@ +/* + * linux/fs/truncate.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> + +/* + * Truncate has the most races in the whole filesystem: coding it is + * a pain in the a**. Especially as I don't do any locking... + * + * The code may look a bit weird, but that's just because I've tried to + * handle things like file-size changes in a somewhat graceful manner. + * Anyway, truncating a file at the same time somebody else writes to it + * is likely to result in pretty weird behaviour... + * + * The new code handles normal truncates (size = 0) as well as the more + * general case (size = XXX). I hope. + */ + +static int trunc_direct(struct inode * inode) +{ + unsigned short * p; + struct buffer_head * bh; + int i, tmp; + int retry = 0; +#define DIRECT_BLOCK ((inode->i_size + 1023) >> 10) + +repeat: + for (i = DIRECT_BLOCK ; i < 7 ; i++) { + p = i + inode->u.minix_i.i_data; + if (!(tmp = *p)) + continue; + bh = get_hash_table(inode->i_dev,tmp,BLOCK_SIZE); + if (i < DIRECT_BLOCK) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *p) { + retry = 1; + brelse(bh); + continue; + } + *p = 0; + inode->i_dirt = 1; + brelse(bh); + minix_free_block(inode->i_sb,tmp); + } + return retry; +} + +static int trunc_indirect(struct inode * inode, int offset, unsigned short * p) +{ + struct buffer_head * bh; + int i, tmp; + struct buffer_head * ind_bh; + unsigned short * ind; + int retry = 0; +#define INDIRECT_BLOCK (DIRECT_BLOCK-offset) + + tmp = *p; + if (!tmp) + return 0; + ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp != *p) { + brelse(ind_bh); + return 1; + } + if (!ind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = INDIRECT_BLOCK ; i < 512 ; i++) { + if (i < 0) + i = 0; + if (i < INDIRECT_BLOCK) + goto repeat; + ind = i+(unsigned short *) ind_bh->b_data; + tmp = *ind; + if (!tmp) + continue; + bh = get_hash_table(inode->i_dev,tmp,BLOCK_SIZE); + if (i < INDIRECT_BLOCK) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *ind) { + retry = 1; + brelse(bh); + continue; + } + *ind = 0; + mark_buffer_dirty(ind_bh, 1); + brelse(bh); + minix_free_block(inode->i_sb,tmp); + } + ind = (unsigned short *) ind_bh->b_data; + for (i = 0; i < 512; i++) + if (*(ind++)) + break; + if (i >= 512) + if (ind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + minix_free_block(inode->i_sb,tmp); + } + brelse(ind_bh); + return retry; +} + +static int trunc_dindirect(struct inode * inode) +{ + int i, tmp; + struct buffer_head * dind_bh; + unsigned short * dind, * p; + int retry = 0; +#define DINDIRECT_BLOCK ((DIRECT_BLOCK-(512+7))>>9) + + p = 8 + inode->u.minix_i.i_data; + if (!(tmp = *p)) + return 0; + dind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); + if (tmp != *p) { + brelse(dind_bh); + return 1; + } + if (!dind_bh) { + *p = 0; + return 0; + } +repeat: + for (i = DINDIRECT_BLOCK ; i < 512 ; i ++) { + if (i < 0) + i = 0; + if (i < DINDIRECT_BLOCK) + goto repeat; + dind = i+(unsigned short *) dind_bh->b_data; + retry |= trunc_indirect(inode,7+512+(i<<9),dind); + mark_buffer_dirty(dind_bh, 1); + } + dind = (unsigned short *) dind_bh->b_data; + for (i = 0; i < 512; i++) + if (*(dind++)) + break; + if (i >= 512) + if (dind_bh->b_count != 1) + retry = 1; + else { + tmp = *p; + *p = 0; + inode->i_dirt = 1; + minix_free_block(inode->i_sb,tmp); + } + brelse(dind_bh); + return retry; +} + +void minix_truncate(struct inode * inode) +{ + int retry; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + while (1) { + retry = trunc_direct(inode); + retry |= trunc_indirect(inode,7,inode->u.minix_i.i_data+7); + retry |= trunc_dindirect(inode); + if (!retry) + break; + current->counter = 0; + schedule(); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; +} diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile new file mode 100644 index 000000000..2c690d3ea --- /dev/null +++ b/fs/msdos/Makefile @@ -0,0 +1,34 @@ +# +# Makefile for the linux MS-DOS-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +ifndef CONFIG_MSDOS_FS +CFLAGS := $(CFLAGS) -DMODULE +endif + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= namei.o inode.o file.o dir.o misc.o fat.o + +msdos.o: $(OBJS) + $(LD) -r -o msdos.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/msdos/dir.c b/fs/msdos/dir.c new file mode 100644 index 000000000..2138b8778 --- /dev/null +++ b/fs/msdos/dir.c @@ -0,0 +1,126 @@ +/* + * linux/fs/msdos/dir.c + * + * Written 1992,1993 by Werner Almesberger + * + * MS-DOS directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/string.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + + +#define PRINTK(X) + +static int msdos_dir_read(struct inode * inode,struct file * filp, char * buf,int count) +{ + return -EISDIR; +} + +static struct file_operations msdos_dir_operations = { + NULL, /* lseek - default */ + msdos_dir_read, /* read */ + NULL, /* write - bad */ + msdos_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync /* fsync */ +}; + +struct inode_operations msdos_dir_inode_operations = { + &msdos_dir_operations, /* default directory file-ops */ + msdos_create, /* create */ + msdos_lookup, /* lookup */ + NULL, /* link */ + msdos_unlink, /* unlink */ + NULL, /* symlink */ + msdos_mkdir, /* mkdir */ + msdos_rmdir, /* rmdir */ + NULL, /* mknod */ + msdos_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + msdos_bmap, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +int msdos_readdir( + struct inode *inode, + struct file *filp, + struct dirent *dirent, /* dirent in user space */ + int count) +{ + int ino,i,i2,last; + char c,*walk; + struct buffer_head *bh; + struct msdos_dir_entry *de; + + if (!inode || !S_ISDIR(inode->i_mode)) return -EBADF; + if (inode->i_ino == MSDOS_ROOT_INO) { +/* Fake . and .. for the root directory. */ + if (filp->f_pos == 2) filp->f_pos = 0; + else if (filp->f_pos < 2) { + walk = filp->f_pos++ ? ".." : "."; + for (i = 0; *walk; walk++) + put_fs_byte(*walk,dirent->d_name+i++); + put_fs_long(MSDOS_ROOT_INO,&dirent->d_ino); + put_fs_byte(0,dirent->d_name+i); + put_fs_word(i,&dirent->d_reclen); + return ROUND_UP(NAME_OFFSET(dirent) + i + 1); + } + } + if (filp->f_pos & (sizeof(struct msdos_dir_entry)-1)) return -ENOENT; + bh = NULL; + while ((ino = msdos_get_entry(inode,&filp->f_pos,&bh,&de)) > -1) { + if (!IS_FREE(de->name) && !(de->attr & ATTR_VOLUME)) { + char bufname[13]; + char *ptname = bufname; + for (i = last = 0; i < 8; i++) { + if (!(c = de->name[i])) break; + if (c >= 'A' && c <= 'Z') c += 32; + if (c != ' ') + last = i+1; + ptname[i] = c; + } + i = last; + ptname[i] = '.'; + i++; + for (i2 = 0; i2 < 3; i2++) { + if (!(c = de->ext[i2])) break; + if (c >= 'A' && c <= 'Z') c += 32; + if (c != ' ') + last = i+1; + ptname[i] = c; + i++; + } + if ((i = last) != 0) { + if (!strcmp(de->name,MSDOS_DOT)) + ino = inode->i_ino; + else if (!strcmp(de->name,MSDOS_DOTDOT)) + ino = msdos_parent_ino(inode,0); + bufname[i] = '\0'; + put_fs_long(ino,&dirent->d_ino); + memcpy_tofs(dirent->d_name,bufname,i+1); + put_fs_word(i,&dirent->d_reclen); + PRINTK (("readdir avant brelse\n")); + brelse(bh); + PRINTK (("readdir retourne %d\n",i)); + return ROUND_UP(NAME_OFFSET(dirent) + i + 1); + } + } + } + if (bh) brelse(bh); + return 0; +} diff --git a/fs/msdos/fat.c b/fs/msdos/fat.c new file mode 100644 index 000000000..651e58b24 --- /dev/null +++ b/fs/msdos/fat.c @@ -0,0 +1,291 @@ +/* + * linux/fs/msdos/fat.c + * + * Written 1992,1993 by Werner Almesberger + */ + +#include <linux/msdos_fs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/stat.h> + + +static struct fat_cache *fat_cache,cache[FAT_CACHE]; + +/* Returns the this'th FAT entry, -1 if it is an end-of-file entry. If + new_value is != -1, that FAT entry is replaced by it. */ + +int fat_access(struct super_block *sb,int nr,int new_value) +{ + struct buffer_head *bh,*bh2,*c_bh,*c_bh2; + unsigned char *p_first,*p_last; + int first,last,next,copy; + + if ((unsigned) (nr-2) >= MSDOS_SB(sb)->clusters) return 0; + if (MSDOS_SB(sb)->fat_bits == 16) first = last = nr*2; + else { + first = nr*3/2; + last = first+1; + } + if (!(bh = msdos_sread(sb->s_dev,MSDOS_SB(sb)->fat_start+(first >> + SECTOR_BITS)))) { + printk("bread in fat_access failed\n"); + return 0; + } + if ((first >> SECTOR_BITS) == (last >> SECTOR_BITS)) + bh2 = bh; + else { + if (!(bh2 = msdos_sread(sb->s_dev,MSDOS_SB(sb)->fat_start+(last + >> SECTOR_BITS)))) { + brelse(bh); + printk("bread in fat_access failed\n"); + return 0; + } + } + if (MSDOS_SB(sb)->fat_bits == 16) { + p_first = p_last = NULL; /* GCC needs that stuff */ + next = CF_LE_W(((unsigned short *) bh->b_data)[(first & + (SECTOR_SIZE-1)) >> 1]); + if (next >= 0xfff7) next = -1; + } + else { + p_first = &((unsigned char *) bh->b_data)[first & (SECTOR_SIZE-1)]; + p_last = &((unsigned char *) bh2->b_data)[(first+1) & + (SECTOR_SIZE-1)]; + if (nr & 1) next = ((*p_first >> 4) | (*p_last << 4)) & 0xfff; + else next = (*p_first+(*p_last << 8)) & 0xfff; + if (next >= 0xff7) next = -1; + } + if (new_value != -1) { + if (MSDOS_SB(sb)->fat_bits == 16) + ((unsigned short *) bh->b_data)[(first & (SECTOR_SIZE-1)) >> + 1] = CT_LE_W(new_value); + else { + if (nr & 1) { + *p_first = (*p_first & 0xf) | (new_value << 4); + *p_last = new_value >> 4; + } + else { + *p_first = new_value & 0xff; + *p_last = (*p_last & 0xf0) | (new_value >> 8); + } + mark_buffer_dirty(bh2, 1); + } + mark_buffer_dirty(bh, 1); + for (copy = 1; copy < MSDOS_SB(sb)->fats; copy++) { + if (!(c_bh = msdos_sread(sb->s_dev,MSDOS_SB(sb)-> + fat_start+(first >> SECTOR_BITS)+MSDOS_SB(sb)-> + fat_length*copy))) break; + memcpy(c_bh->b_data,bh->b_data,SECTOR_SIZE); + mark_buffer_dirty(c_bh, 1); + if (bh != bh2) { + if (!(c_bh2 = msdos_sread(sb->s_dev, + MSDOS_SB(sb)->fat_start+(first >> + SECTOR_BITS)+MSDOS_SB(sb)->fat_length*copy + +1))) { + brelse(c_bh); + break; + } + memcpy(c_bh2->b_data,bh2->b_data,SECTOR_SIZE); + brelse(c_bh2); + } + brelse(c_bh); + } + } + brelse(bh); + if (bh != bh2) brelse(bh2); + return next; +} + + +void cache_init(void) +{ + static int initialized = 0; + int count; + + if (initialized) return; + fat_cache = &cache[0]; + for (count = 0; count < FAT_CACHE; count++) { + cache[count].device = 0; + cache[count].next = count == FAT_CACHE-1 ? NULL : + &cache[count+1]; + } + initialized = 1; +} + + +void cache_lookup(struct inode *inode,int cluster,int *f_clu,int *d_clu) +{ + struct fat_cache *walk; + +#ifdef DEBUG +printk("cache lookup: <%d,%d> %d (%d,%d) -> ",inode->i_dev,inode->i_ino,cluster, + *f_clu,*d_clu); +#endif + for (walk = fat_cache; walk; walk = walk->next) + if (inode->i_dev == walk->device && walk->ino == inode->i_ino && + walk->file_cluster <= cluster && walk->file_cluster > + *f_clu) { + *d_clu = walk->disk_cluster; +#ifdef DEBUG +printk("cache hit: %d (%d)\n",walk->file_cluster,*d_clu); +#endif + if ((*f_clu = walk->file_cluster) == cluster) return; + } +#ifdef DEBUG +printk("cache miss\n"); +#endif +} + + +#ifdef DEBUG +static void list_cache(void) +{ + struct fat_cache *walk; + + for (walk = fat_cache; walk; walk = walk->next) { + if (walk->device) + printk("<%d,%d>(%d,%d) ",walk->device,walk->ino, + walk->file_cluster,walk->disk_cluster); + else printk("-- "); + } + printk("\n"); +} +#endif + + +void cache_add(struct inode *inode,int f_clu,int d_clu) +{ + struct fat_cache *walk,*last; + +#ifdef DEBUG +printk("cache add: <%d,%d> %d (%d)\n",inode->i_dev,inode->i_ino,f_clu,d_clu); +#endif + last = NULL; + for (walk = fat_cache; walk->next; walk = (last = walk)->next) + if (inode->i_dev == walk->device && walk->ino == inode->i_ino && + walk->file_cluster == f_clu) { + if (walk->disk_cluster != d_clu) { + printk("FAT cache corruption"); + cache_inval_inode(inode); + return; + } + /* update LRU */ + if (last == NULL) return; + last->next = walk->next; + walk->next = fat_cache; + fat_cache = walk; +#ifdef DEBUG +list_cache(); +#endif + return; + } + walk->device = inode->i_dev; + walk->ino = inode->i_ino; + walk->file_cluster = f_clu; + walk->disk_cluster = d_clu; + last->next = NULL; + walk->next = fat_cache; + fat_cache = walk; +#ifdef DEBUG +list_cache(); +#endif +} + + +/* Cache invalidation occurs rarely, thus the LRU chain is not updated. It + fixes itself after a while. */ + +void cache_inval_inode(struct inode *inode) +{ + struct fat_cache *walk; + + for (walk = fat_cache; walk; walk = walk->next) + if (walk->device == inode->i_dev && walk->ino == inode->i_ino) + walk->device = 0; +} + + +void cache_inval_dev(int device) +{ + struct fat_cache *walk; + + for (walk = fat_cache; walk; walk = walk->next) + if (walk->device == device) walk->device = 0; +} + + +int get_cluster(struct inode *inode,int cluster) +{ + int nr,count; + + if (!(nr = MSDOS_I(inode)->i_start)) return 0; + if (!cluster) return nr; + count = 0; + for (cache_lookup(inode,cluster,&count,&nr); count < cluster; + count++) { + if ((nr = fat_access(inode->i_sb,nr,-1)) == -1) return 0; + if (!nr) return 0; + } + cache_add(inode,cluster,nr); + return nr; +} + + +int msdos_smap(struct inode *inode,int sector) +{ + struct msdos_sb_info *sb; + int cluster,offset; + + sb = MSDOS_SB(inode->i_sb); + if (inode->i_ino == MSDOS_ROOT_INO || (S_ISDIR(inode->i_mode) && + !MSDOS_I(inode)->i_start)) { + if (sector >= sb->dir_entries >> MSDOS_DPS_BITS) return 0; + return sector+sb->dir_start; + } + cluster = sector/sb->cluster_size; + offset = sector % sb->cluster_size; + if (!(cluster = get_cluster(inode,cluster))) return 0; + return (cluster-2)*sb->cluster_size+sb->data_start+offset; +} + + +/* Free all clusters after the skip'th cluster. Doesn't use the cache, + because this way we get an additional sanity check. */ + +int fat_free(struct inode *inode,int skip) +{ + int nr,last; + + if (!(nr = MSDOS_I(inode)->i_start)) return 0; + last = 0; + while (skip--) { + last = nr; + if ((nr = fat_access(inode->i_sb,nr,-1)) == -1) return 0; + if (!nr) { + printk("fat_free: skipped EOF\n"); + return -EIO; + } + } + if (last) + fat_access(inode->i_sb,last,MSDOS_SB(inode->i_sb)->fat_bits == + 12 ? 0xff8 : 0xfff8); + else { + MSDOS_I(inode)->i_start = 0; + inode->i_dirt = 1; + } + lock_fat(inode->i_sb); + while (nr != -1) { + if (!(nr = fat_access(inode->i_sb,nr,0))) { + fs_panic(inode->i_sb,"fat_free: deleting beyond EOF"); + break; + } + if (MSDOS_SB(inode->i_sb)->free_clusters != -1) + MSDOS_SB(inode->i_sb)->free_clusters++; + inode->i_blocks -= MSDOS_SB(inode->i_sb)->cluster_size; + } + unlock_fat(inode->i_sb); + cache_inval_inode(inode); + return 0; +} diff --git a/fs/msdos/file.c b/fs/msdos/file.c new file mode 100644 index 000000000..fb41fff21 --- /dev/null +++ b/fs/msdos/file.c @@ -0,0 +1,315 @@ +/* + * linux/fs/msdos/file.c + * + * Written 1992,1993 by Werner Almesberger + * + * MS-DOS regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/locks.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) + +#define PRINTK(x) +#define Printk(x) printk x + +static struct file_operations msdos_file_operations = { + NULL, /* lseek - default */ + msdos_file_read, /* read */ + msdos_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + file_fsync /* fsync */ +}; + +struct inode_operations msdos_file_inode_operations = { + &msdos_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + msdos_bmap, /* bmap */ + msdos_truncate, /* truncate */ + NULL, /* permission */ + NULL /* smap */ +}; + +#define MSDOS_PREFETCH 32 +struct msdos_pre { + int file_sector;/* Next sector to read in the prefetch table */ + /* This is relative to the file, not the disk */ + struct buffer_head *bhlist[MSDOS_PREFETCH]; /* All buffers needed */ + int nblist; /* Number of buffers in bhlist */ + int nolist; /* index in bhlist */ +}; +/* + Order the prefetch of more sectors. +*/ +static void msdos_prefetch ( + struct inode *inode, + struct msdos_pre *pre, + int nb) /* How many must be prefetch at once */ +{ + struct buffer_head *bhreq[MSDOS_PREFETCH]; /* Buffers not */ + /* already read */ + int nbreq=0; /* Number of buffers in bhreq */ + int i; + for (i=0; i<nb; i++){ + int sector = msdos_smap(inode,pre->file_sector); + if (sector != 0){ + struct buffer_head *bh; + PRINTK (("fsector2 %d -> %d\n",pre->file_sector-1,sector)); + pre->file_sector++; + bh = getblk(inode->i_dev,sector,SECTOR_SIZE); + if (bh == NULL) break; + pre->bhlist[pre->nblist++] = bh; + if (!bh->b_uptodate) bhreq[nbreq++] = bh; + }else{ + break; + } + } + if (nbreq > 0) ll_rw_block (READ,nbreq,bhreq); + for (i=pre->nblist; i<MSDOS_PREFETCH; i++) pre->bhlist[i] = NULL; +} + +/* + Read a file into user space +*/ +int msdos_file_read( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + char *start = buf; + char *end = buf + count; + int i; + int left_in_file; + struct msdos_pre pre; + + + if (!inode) { + printk("msdos_file_read: inode = NULL\n"); + return -EINVAL; + } + /* S_ISLNK allows for UMSDOS. Should never happen for normal MSDOS */ + if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) { + printk("msdos_file_read: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + if (filp->f_pos >= inode->i_size || count <= 0) return 0; + /* + Tell the buffer cache which block we expect to read in advance + Since we are limited with the stack, we preread only MSDOS_PREFETCH + because we have to keep the result into the local + arrays pre.bhlist and bhreq. + + Each time we process one block in bhlist, we replace + it by a new prefetch block if needed. + */ + PRINTK (("#### ino %ld pos %ld size %ld count %d\n",inode->i_ino,filp->f_pos,inode->i_size,count)); + { + /* + We must prefetch complete block, so we must + take in account the offset in the first block. + */ + int count_max = (filp->f_pos & (SECTOR_SIZE-1)) + count; + int to_reada; /* How many block to read all at once */ + pre.file_sector = filp->f_pos >> SECTOR_BITS; + to_reada = count_max / SECTOR_SIZE; + if (count_max & (SECTOR_SIZE-1)) to_reada++; + if (filp->f_reada || !MSDOS_I(inode)->i_binary){ + /* Doing a read ahead on ascii file make sure we always */ + /* pre read enough, since we don't know how many blocks */ + /* we really need */ + int ahead = read_ahead[MAJOR(inode->i_dev)]; + PRINTK (("to_reada %d ahead %d\n",to_reada,ahead)); + if (ahead == 0) ahead = 8; + to_reada += ahead; + } + if (to_reada > MSDOS_PREFETCH) to_reada = MSDOS_PREFETCH; + pre.nblist = 0; + msdos_prefetch (inode,&pre,to_reada); + } + pre.nolist = 0; + PRINTK (("count %d ahead %d nblist %d\n",count,read_ahead[MAJOR(inode->i_dev)],pre.nblist)); + while ((left_in_file = inode->i_size - filp->f_pos) > 0 + && buf < end){ + struct buffer_head *bh = pre.bhlist[pre.nolist]; + char *data; + int size,offset; + if (bh == NULL) break; + pre.bhlist[pre.nolist] = NULL; + pre.nolist++; + if (pre.nolist == MSDOS_PREFETCH/2){ + memcpy (pre.bhlist,pre.bhlist+MSDOS_PREFETCH/2 + ,(MSDOS_PREFETCH/2)*sizeof(pre.bhlist[0])); + pre.nblist -= MSDOS_PREFETCH/2; + msdos_prefetch (inode,&pre,MSDOS_PREFETCH/2); + pre.nolist = 0; + } + PRINTK (("file_read pos %ld nblist %d %d %d\n",filp->f_pos,pre.nblist,pre.fetched,count)); + wait_on_buffer(bh); + if (!bh->b_uptodate){ + /* read error ? */ + brelse (bh); + break; + } + offset = filp->f_pos & (SECTOR_SIZE-1); + data = bh->b_data + offset; + size = MIN(SECTOR_SIZE-offset,left_in_file); + if (MSDOS_I(inode)->i_binary) { + size = MIN(size,end-buf); + memcpy_tofs(buf,data,size); + buf += size; + filp->f_pos += size; + }else{ + for (; size && buf < end; size--) { + char ch = *data++; + filp->f_pos++; + if (ch == 26){ + filp->f_pos = inode->i_size; + break; + }else if (ch != '\r'){ + put_fs_byte(ch,buf++); + } + } + } + brelse(bh); + } + PRINTK (("--- %d -> %d\n",count,(int)(buf-start))); + for (i=0; i<pre.nblist; i++) brelse (pre.bhlist[i]); + if (start == buf) return -EIO; + if (!IS_RDONLY(inode)) inode->i_atime = CURRENT_TIME; + filp->f_reada = 1; /* Will be reset if a lseek is done */ + return buf-start; +} + +/* + Write to a file either from user space +*/ +int msdos_file_write( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + int sector,offset,size,left,written; + int error,carry; + char *start,*to,ch; + struct buffer_head *bh; + int binary_mode = MSDOS_I(inode)->i_binary; + + if (!inode) { + printk("msdos_file_write: inode = NULL\n"); + return -EINVAL; + } + /* S_ISLNK allows for UMSDOS. Should never happen for normal MSDOS */ + if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) { + printk("msdos_file_write: mode = %07o\n",inode->i_mode); + return -EINVAL; + } +/* + * ok, append may not work when many processes are writing at the same time + * but so what. That way leads to madness anyway. + */ + if (filp->f_flags & O_APPEND) filp->f_pos = inode->i_size; + if (count <= 0) return 0; + error = carry = 0; + for (start = buf; count || carry; count -= size) { + while (!(sector = msdos_smap(inode,filp->f_pos >> SECTOR_BITS))) + if ((error = msdos_add_cluster(inode)) < 0) break; + if (error) { + msdos_truncate(inode); + break; + } + offset = filp->f_pos & (SECTOR_SIZE-1); + size = MIN(SECTOR_SIZE-offset,MAX(carry,count)); + if (binary_mode + && offset == 0 + && (size == SECTOR_SIZE + || filp->f_pos + size >= inode->i_size)){ + /* No need to read the block first since we will */ + /* completely overwrite it */ + /* or at least write past the end of file */ + if (!(bh = getblk(inode->i_dev,sector,SECTOR_SIZE))){ + error = -EIO; + break; + } + }else if (!(bh = msdos_sread(inode->i_dev,sector))) { + error = -EIO; + break; + } + if (binary_mode) { + memcpy_fromfs(bh->b_data+offset,buf,written = size); + buf += size; + } + else { + written = left = SECTOR_SIZE-offset; + to = (char *) bh->b_data+(filp->f_pos & (SECTOR_SIZE-1)); + if (carry) { + *to++ = '\n'; + left--; + carry = 0; + } + for (size = 0; size < count && left; size++) { + if ((ch = get_fs_byte(buf++)) == '\n') { + *to++ = '\r'; + left--; + } + if (!left) carry = 1; + else { + *to++ = ch; + left--; + } + } + written -= left; + } + filp->f_pos += written; + if (filp->f_pos > inode->i_size) { + inode->i_size = filp->f_pos; + inode->i_dirt = 1; + } + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + brelse(bh); + } + if (start == buf) + return error; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + MSDOS_I(inode)->i_attrs |= ATTR_ARCH; + inode->i_dirt = 1; + return buf-start; +} + +void msdos_truncate(struct inode *inode) +{ + int cluster; + + cluster = SECTOR_SIZE*MSDOS_SB(inode->i_sb)->cluster_size; + (void) fat_free(inode,(inode->i_size+(cluster-1))/cluster); + MSDOS_I(inode)->i_attrs |= ATTR_ARCH; + inode->i_dirt = 1; +} diff --git a/fs/msdos/inode.c b/fs/msdos/inode.c new file mode 100644 index 000000000..e0577fbef --- /dev/null +++ b/fs/msdos/inode.c @@ -0,0 +1,494 @@ +/* + * linux/fs/msdos/inode.c + * + * Written 1992,1993 by Werner Almesberger + */ + +#include <linux/msdos_fs.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/major.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#ifdef MODULE + #include <linux/module.h> + #include "../../tools/version.h" +#endif + +#include <asm/segment.h> + +extern int *blksize_size[]; + +void msdos_put_inode(struct inode *inode) +{ + struct inode *depend; + struct super_block *sb; + + if (inode->i_nlink) { + if (MSDOS_I(inode)->i_busy) cache_inval_inode(inode); + return; + } + inode->i_size = 0; + msdos_truncate(inode); + depend = MSDOS_I(inode)->i_depend; + sb = inode->i_sb; + clear_inode(inode); + if (depend) { + if (MSDOS_I(depend)->i_old != inode) { + printk("Invalid link (0x%X): expected 0x%X, got 0x%X\n", + (int) depend,(int) inode,(int) MSDOS_I(depend)-> + i_old); + fs_panic(sb,"..."); + return; + } + MSDOS_I(depend)->i_old = NULL; + iput(depend); + } +} + + +void msdos_put_super(struct super_block *sb) +{ + cache_inval_dev(sb->s_dev); + set_blocksize (sb->s_dev,BLOCK_SIZE); + lock_super(sb); + sb->s_dev = 0; + unlock_super(sb); + #ifdef MODULE + MOD_DEC_USE_COUNT; + #endif + return; +} + + +static struct super_operations msdos_sops = { + msdos_read_inode, + msdos_notify_change, + msdos_write_inode, + msdos_put_inode, + msdos_put_super, + NULL, /* added in 0.96c */ + msdos_statfs, + NULL +}; + + +static int parse_options(char *options,char *check,char *conversion,uid_t *uid, + gid_t *gid,int *umask,int *debug,int *fat,int *quiet) +{ + char *this_char,*value; + + *check = 'n'; + *conversion = 'b'; + *uid = current->uid; + *gid = current->gid; + *umask = current->fs->umask; + *debug = *fat = *quiet = 0; + if (!options) return 1; + for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) { + if ((value = strchr(this_char,'=')) != NULL) + *value++ = 0; + if (!strcmp(this_char,"check") && value) { + if (value[0] && !value[1] && strchr("rns",*value)) + *check = *value; + else if (!strcmp(value,"relaxed")) *check = 'r'; + else if (!strcmp(value,"normal")) *check = 'n'; + else if (!strcmp(value,"strict")) *check = 's'; + else return 0; + } + else if (!strcmp(this_char,"conv") && value) { + if (value[0] && !value[1] && strchr("bta",*value)) + *conversion = *value; + else if (!strcmp(value,"binary")) *conversion = 'b'; + else if (!strcmp(value,"text")) *conversion = 't'; + else if (!strcmp(value,"auto")) *conversion = 'a'; + else return 0; + } + else if (!strcmp(this_char,"uid")) { + if (!value || !*value) + return 0; + *uid = simple_strtoul(value,&value,0); + if (*value) + return 0; + } + else if (!strcmp(this_char,"gid")) { + if (!value || !*value) + return 0; + *gid = simple_strtoul(value,&value,0); + if (*value) + return 0; + } + else if (!strcmp(this_char,"umask")) { + if (!value || !*value) + return 0; + *umask = simple_strtoul(value,&value,8); + if (*value) + return 0; + } + else if (!strcmp(this_char,"debug")) { + if (value) return 0; + *debug = 1; + } + else if (!strcmp(this_char,"fat")) { + if (!value || !*value) + return 0; + *fat = simple_strtoul(value,&value,0); + if (*value || (*fat != 12 && *fat != 16)) + return 0; + } + else if (!strcmp(this_char,"quiet")) { + if (value) return 0; + *quiet = 1; + } + else return 0; + } + return 1; +} + + +/* Read the super block of an MS-DOS FS. */ + +struct super_block *msdos_read_super(struct super_block *s,void *data, + int silent) +{ + struct buffer_head *bh; + struct msdos_boot_sector *b; + int data_sectors,logical_sector_size,sector_mult; + int debug,error,fat,quiet; + char check,conversion; + uid_t uid; + gid_t gid; + int umask; + + if (!parse_options((char *) data,&check,&conversion,&uid,&gid,&umask, + &debug,&fat,&quiet)) { + s->s_dev = 0; + return NULL; + } + cache_init(); + lock_super(s); + set_blocksize(s->s_dev, SECTOR_SIZE); + bh = bread(s->s_dev, 0, SECTOR_SIZE); + unlock_super(s); + if (bh == NULL) { + s->s_dev = 0; + printk("MSDOS bread failed\n"); + return NULL; + } + b = (struct msdos_boot_sector *) bh->b_data; + s->s_blocksize = 512; /* Using this small block size solve the */ + /* the misfit with buffer cache and cluster */ + /* because cluster (DOS) are often aligned */ + /* on odd sector */ + s->s_blocksize_bits = 9; /* we cannot handle anything else yet */ +/* + * The DOS3 partition size limit is *not* 32M as many people think. + * Instead, it is 64K sectors (with the usual sector size being + * 512 bytes, leading to a 32M limit). + * + * DOS 3 partition managers got around this problem by faking a + * larger sector size, ie treating multiple physical sectors as + * a single logical sector. + * + * We can accommodate this scheme by adjusting our cluster size, + * fat_start, and data_start by an appropriate value. + * + * (by Drew Eckhardt) + */ + +#define ROUND_TO_MULTIPLE(n,m) ((n) && (m) ? (n)+(m)-1-((n)-1)%(m) : 0) + /* don't divide by zero */ + + logical_sector_size = CF_LE_W(*(unsigned short *) &b->sector_size); + sector_mult = logical_sector_size >> SECTOR_BITS; + MSDOS_SB(s)->cluster_size = b->cluster_size*sector_mult; + MSDOS_SB(s)->fats = b->fats; + MSDOS_SB(s)->fat_start = CF_LE_W(b->reserved)*sector_mult; + MSDOS_SB(s)->fat_length = CF_LE_W(b->fat_length)*sector_mult; + MSDOS_SB(s)->dir_start = (CF_LE_W(b->reserved)+b->fats*CF_LE_W( + b->fat_length))*sector_mult; + MSDOS_SB(s)->dir_entries = CF_LE_W(*((unsigned short *) &b->dir_entries + )); + MSDOS_SB(s)->data_start = MSDOS_SB(s)->dir_start+ROUND_TO_MULTIPLE(( + MSDOS_SB(s)->dir_entries << MSDOS_DIR_BITS) >> SECTOR_BITS, + sector_mult); + data_sectors = (CF_LE_W(*((unsigned short *) &b->sectors)) ? + CF_LE_W(*((unsigned short *) &b->sectors)) : + CF_LE_L(b->total_sect))*sector_mult-MSDOS_SB(s)->data_start; + error = !b->cluster_size || !sector_mult; + if (!error) { + MSDOS_SB(s)->clusters = b->cluster_size ? data_sectors/ + b->cluster_size/sector_mult : 0; + MSDOS_SB(s)->fat_bits = fat ? fat : MSDOS_SB(s)->clusters > + MSDOS_FAT12 ? 16 : 12; + error = !MSDOS_SB(s)->fats || (MSDOS_SB(s)->dir_entries & + (MSDOS_DPS-1)) || MSDOS_SB(s)->clusters+2 > MSDOS_SB(s)-> + fat_length*SECTOR_SIZE*8/MSDOS_SB(s)->fat_bits || + (logical_sector_size & (SECTOR_SIZE-1)) || !b->secs_track || + !b->heads; + } + brelse(bh); + if (error || debug) { + /* The MSDOS_CAN_BMAP is obsolete, but left just to remember */ + printk("[MS-DOS FS Rel. 12,FAT %d,check=%c,conv=%c," + "uid=%d,gid=%d,umask=%03o%s]\n",MSDOS_SB(s)->fat_bits,check, + conversion,uid,gid,umask,MSDOS_CAN_BMAP(MSDOS_SB(s)) ? + ",bmap" : ""); + printk("[me=0x%x,cs=%d,#f=%d,fs=%d,fl=%d,ds=%d,de=%d,data=%d," + "se=%d,ts=%ld,ls=%d]\n",b->media,MSDOS_SB(s)->cluster_size, + MSDOS_SB(s)->fats,MSDOS_SB(s)->fat_start,MSDOS_SB(s)-> + fat_length,MSDOS_SB(s)->dir_start,MSDOS_SB(s)->dir_entries, + MSDOS_SB(s)->data_start,CF_LE_W(*(unsigned short *) &b-> + sectors),b->total_sect,logical_sector_size); + } + if (error) { + if (!silent) + printk("VFS: Can't find a valid MSDOS filesystem on dev 0x%04x.\n", + s->s_dev); + s->s_dev = 0; + return NULL; + } + s->s_magic = MSDOS_SUPER_MAGIC; + MSDOS_SB(s)->name_check = check; + MSDOS_SB(s)->conversion = conversion; + /* set up enough so that it can read an inode */ + s->s_op = &msdos_sops; + MSDOS_SB(s)->fs_uid = uid; + MSDOS_SB(s)->fs_gid = gid; + MSDOS_SB(s)->fs_umask = umask; + MSDOS_SB(s)->quiet = quiet; + MSDOS_SB(s)->free_clusters = -1; /* don't know yet */ + MSDOS_SB(s)->fat_wait = NULL; + MSDOS_SB(s)->fat_lock = 0; + MSDOS_SB(s)->prev_free = 0; + if (!(s->s_mounted = iget(s,MSDOS_ROOT_INO))) { + s->s_dev = 0; + printk("get root inode failed\n"); + return NULL; + } + #ifdef MODULE + MOD_INC_USE_COUNT; + #endif + return s; +} + + +void msdos_statfs(struct super_block *sb,struct statfs *buf) +{ + int free,nr; + + put_fs_long(sb->s_magic,&buf->f_type); + put_fs_long(MSDOS_SB(sb)->cluster_size*SECTOR_SIZE,&buf->f_bsize); + put_fs_long(MSDOS_SB(sb)->clusters,&buf->f_blocks); + lock_fat(sb); + if (MSDOS_SB(sb)->free_clusters != -1) + free = MSDOS_SB(sb)->free_clusters; + else { + free = 0; + for (nr = 2; nr < MSDOS_SB(sb)->clusters+2; nr++) + if (!fat_access(sb,nr,-1)) free++; + MSDOS_SB(sb)->free_clusters = free; + } + unlock_fat(sb); + put_fs_long(free,&buf->f_bfree); + put_fs_long(free,&buf->f_bavail); + put_fs_long(0,&buf->f_files); + put_fs_long(0,&buf->f_ffree); + put_fs_long(12,&buf->f_namelen); +} + + +int msdos_bmap(struct inode *inode,int block) +{ + struct msdos_sb_info *sb; + int cluster,offset; + + sb = MSDOS_SB(inode->i_sb); + if (inode->i_ino == MSDOS_ROOT_INO) { + return sb->dir_start + block; + } + cluster = block/sb->cluster_size; + offset = block % sb->cluster_size; + if (!(cluster = get_cluster(inode,cluster))) return 0; + return (cluster-2)*sb->cluster_size+sb->data_start+offset; +} + + +void msdos_read_inode(struct inode *inode) +{ + struct buffer_head *bh; + struct msdos_dir_entry *raw_entry; + int nr; + +/* printk("read inode %d\n",inode->i_ino); */ + MSDOS_I(inode)->i_busy = 0; + MSDOS_I(inode)->i_depend = MSDOS_I(inode)->i_old = NULL; + MSDOS_I(inode)->i_binary = 1; + inode->i_uid = MSDOS_SB(inode->i_sb)->fs_uid; + inode->i_gid = MSDOS_SB(inode->i_sb)->fs_gid; + if (inode->i_ino == MSDOS_ROOT_INO) { + inode->i_mode = (S_IRWXUGO & ~MSDOS_SB(inode->i_sb)->fs_umask) | + S_IFDIR; + inode->i_op = &msdos_dir_inode_operations; + inode->i_nlink = msdos_subdirs(inode)+2; + /* subdirs (neither . nor ..) plus . and "self" */ + inode->i_size = MSDOS_SB(inode->i_sb)->dir_entries* + sizeof(struct msdos_dir_entry); + inode->i_blksize = MSDOS_SB(inode->i_sb)->cluster_size* + SECTOR_SIZE; + inode->i_blocks = (inode->i_size+inode->i_blksize-1)/ + inode->i_blksize*MSDOS_SB(inode->i_sb)->cluster_size; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_attrs = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = 0; + return; + } + if (!(bh = bread(inode->i_dev,inode->i_ino >> MSDOS_DPB_BITS, + SECTOR_SIZE))) { + printk("dev = 0x%04X, ino = %ld\n",inode->i_dev,inode->i_ino); + panic("msdos_read_inode: unable to read i-node block"); + } + raw_entry = &((struct msdos_dir_entry *) (bh->b_data)) + [inode->i_ino & (MSDOS_DPB-1)]; + if ((raw_entry->attr & ATTR_DIR) && !IS_FREE(raw_entry->name)) { + inode->i_mode = MSDOS_MKMODE(raw_entry->attr,S_IRWXUGO & + ~MSDOS_SB(inode->i_sb)->fs_umask) | S_IFDIR; + inode->i_op = &msdos_dir_inode_operations; + MSDOS_I(inode)->i_start = CF_LE_W(raw_entry->start); + inode->i_nlink = msdos_subdirs(inode); + /* includes .., compensating for "self" */ +#ifdef DEBUG + if (!inode->i_nlink) { + printk("directory %d: i_nlink == 0\n",inode->i_ino); + inode->i_nlink = 1; + } +#endif + inode->i_size = 0; + if ((nr = CF_LE_W(raw_entry->start)) != 0) + while (nr != -1) { + inode->i_size += SECTOR_SIZE*MSDOS_SB(inode-> + i_sb)->cluster_size; + if (!(nr = fat_access(inode->i_sb,nr,-1))) { + printk("Directory %ld: bad FAT\n", + inode->i_ino); + break; + } + } + } + else { + inode->i_mode = MSDOS_MKMODE(raw_entry->attr,(IS_NOEXEC(inode) + ? S_IRUGO|S_IWUGO : S_IRWXUGO) & ~MSDOS_SB(inode->i_sb)->fs_umask) | + S_IFREG; + inode->i_op = &msdos_file_inode_operations; /* Now can always bmap */ + MSDOS_I(inode)->i_start = CF_LE_W(raw_entry->start); + inode->i_nlink = 1; + inode->i_size = CF_LE_L(raw_entry->size); + } + MSDOS_I(inode)->i_binary = is_binary(MSDOS_SB(inode->i_sb)->conversion, + raw_entry->ext); + MSDOS_I(inode)->i_attrs = raw_entry->attr & ATTR_UNUSED; + /* this is as close to the truth as we can get ... */ + inode->i_blksize = MSDOS_SB(inode->i_sb)->cluster_size*SECTOR_SIZE; + inode->i_blocks = (inode->i_size+inode->i_blksize-1)/ + inode->i_blksize*MSDOS_SB(inode->i_sb)->cluster_size; + inode->i_mtime = inode->i_atime = inode->i_ctime = + date_dos2unix(CF_LE_W(raw_entry->time),CF_LE_W(raw_entry->date)); + brelse(bh); +} + + +void msdos_write_inode(struct inode *inode) +{ + struct buffer_head *bh; + struct msdos_dir_entry *raw_entry; + + inode->i_dirt = 0; + if (inode->i_ino == MSDOS_ROOT_INO || !inode->i_nlink) return; + if (!(bh = bread(inode->i_dev,inode->i_ino >> MSDOS_DPB_BITS, + SECTOR_SIZE))) { + printk("dev = 0x%04X, ino = %ld\n",inode->i_dev,inode->i_ino); + panic("msdos_write_inode: unable to read i-node block"); + } + raw_entry = &((struct msdos_dir_entry *) (bh->b_data)) + [inode->i_ino & (MSDOS_DPB-1)]; + if (S_ISDIR(inode->i_mode)) { + raw_entry->attr = ATTR_DIR; + raw_entry->size = 0; + } + else { + raw_entry->attr = ATTR_NONE; + raw_entry->size = CT_LE_L(inode->i_size); + } + raw_entry->attr |= MSDOS_MKATTR(inode->i_mode) | + MSDOS_I(inode)->i_attrs; + raw_entry->start = CT_LE_L(MSDOS_I(inode)->i_start); + date_unix2dos(inode->i_mtime,&raw_entry->time,&raw_entry->date); + raw_entry->time = CT_LE_W(raw_entry->time); + raw_entry->date = CT_LE_W(raw_entry->date); + mark_buffer_dirty(bh, 1); + brelse(bh); +} + + +int msdos_notify_change(struct inode * inode,struct iattr * attr) +{ + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + + if (((attr->ia_valid & ATTR_UID) && + (attr->ia_uid != MSDOS_SB(inode->i_sb)->fs_uid)) || + ((attr->ia_valid & ATTR_GID) && + (attr->ia_gid != MSDOS_SB(inode->i_sb)->fs_gid)) || + ((attr->ia_valid & ATTR_MODE) && + (attr->ia_mode & ~MSDOS_VALID_MODE))) + error = -EPERM; + + if (error) + return MSDOS_SB(inode->i_sb)->quiet ? 0 : error; + + inode_setattr(inode, attr); + + if (IS_NOEXEC(inode) && !S_ISDIR(inode->i_mode)) + inode->i_mode &= S_IFMT | S_IRUGO | S_IWUGO; + else + inode->i_mode |= S_IXUGO; + + inode->i_mode = ((inode->i_mode & S_IFMT) | ((((inode->i_mode & S_IRWXU + & ~MSDOS_SB(inode->i_sb)->fs_umask) | S_IRUSR) >> 6)*S_IXUGO)) & + ~MSDOS_SB(inode->i_sb)->fs_umask; + return 0; +} +#ifdef MODULE + +char kernel_version[] = UTS_RELEASE; + +static struct file_system_type msdos_fs_type = { + msdos_read_super, "msdos", 1, NULL +}; + +int init_module(void) +{ + register_filesystem(&msdos_fs_type); + return 0; +} + +void cleanup_module(void) +{ + if (MOD_IN_USE) + printk("msdos: device busy, remove delayed\n"); + else + { + unregister_filesystem(&msdos_fs_type); + } +} + +#endif + diff --git a/fs/msdos/misc.c b/fs/msdos/misc.c new file mode 100644 index 000000000..630198afa --- /dev/null +++ b/fs/msdos/misc.c @@ -0,0 +1,515 @@ +/* + * linux/fs/msdos/misc.c + * + * Written 1992,1993 by Werner Almesberger + */ + +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/stat.h> + + +#define PRINTK(x) +/* Well-known binary file extensions */ + +static char bin_extensions[] = + "EXECOMBINAPPSYSDRVOVLOVROBJLIBDLLPIF" /* program code */ + "ARCZIPLHALZHZOOTARZ ARJ" /* common archivers */ + "TZ TAZTZPTPZ" /* abbreviations of tar.Z and tar.zip */ + "GZ TGZDEB" /* .gz, .tar.gz and Debian packages */ + "GIFBMPTIFGL JPGPCX" /* graphics */ + "TFMVF GF PK PXLDVI"; /* TeX */ + + +/* + * fs_panic reports a severe file system problem and sets the file system + * read-only. The file system can be made writable again by remounting it. + */ + +void fs_panic(struct super_block *s,char *msg) +{ + int not_ro; + + not_ro = !(s->s_flags & MS_RDONLY); + if (not_ro) s->s_flags |= MS_RDONLY; + printk("Filesystem panic (dev 0x%04X, mounted on 0x%04X:%ld)\n %s\n", + s->s_dev,s->s_covered->i_dev,s->s_covered->i_ino,msg); + if (not_ro) + printk(" File system has been set read-only\n"); +} + + +/* + * is_binary selects optional text conversion based on the conversion mode and + * the extension part of the file name. + */ + +int is_binary(char conversion,char *extension) +{ + char *walk; + + switch (conversion) { + case 'b': + return 1; + case 't': + return 0; + case 'a': + for (walk = bin_extensions; *walk; walk += 3) + if (!strncmp(extension,walk,3)) return 1; + return 0; + default: + printk("Invalid conversion mode - defaulting to " + "binary.\n"); + return 1; + } +} + + +/* File creation lock. This is system-wide to avoid deadlocks in rename. */ +/* (rename might deadlock before detecting cross-FS moves.) */ + +static struct wait_queue *creation_wait = NULL; +static creation_lock = 0; + + +void lock_creation(void) +{ + while (creation_lock) sleep_on(&creation_wait); + creation_lock = 1; +} + + +void unlock_creation(void) +{ + creation_lock = 0; + wake_up(&creation_wait); +} + + +void lock_fat(struct super_block *sb) +{ + while (MSDOS_SB(sb)->fat_lock) sleep_on(&MSDOS_SB(sb)->fat_wait); + MSDOS_SB(sb)->fat_lock = 1; +} + + +void unlock_fat(struct super_block *sb) +{ + MSDOS_SB(sb)->fat_lock = 0; + wake_up(&MSDOS_SB(sb)->fat_wait); +} + + +/* + * msdos_add_cluster tries to allocate a new cluster and adds it to the file + * represented by inode. The cluster is zero-initialized. + */ + +int msdos_add_cluster(struct inode *inode) +{ + int count,nr,limit,last,current,sector,last_sector; + struct buffer_head *bh; + int cluster_size = MSDOS_SB(inode->i_sb)->cluster_size; + + if (inode->i_ino == MSDOS_ROOT_INO) return -ENOSPC; + if (!MSDOS_SB(inode->i_sb)->free_clusters) return -ENOSPC; + lock_fat(inode->i_sb); + limit = MSDOS_SB(inode->i_sb)->clusters; + nr = limit; /* to keep GCC happy */ + for (count = 0; count < limit; count++) { + nr = ((count+MSDOS_SB(inode->i_sb)->prev_free) % limit)+2; + if (fat_access(inode->i_sb,nr,-1) == 0) break; + } +#ifdef DEBUG +printk("free cluster: %d\n",nr); +#endif + MSDOS_SB(inode->i_sb)->prev_free = (count+MSDOS_SB(inode->i_sb)-> + prev_free+1) % limit; + if (count >= limit) { + MSDOS_SB(inode->i_sb)->free_clusters = 0; + unlock_fat(inode->i_sb); + return -ENOSPC; + } + fat_access(inode->i_sb,nr,MSDOS_SB(inode->i_sb)->fat_bits == 12 ? + 0xff8 : 0xfff8); + if (MSDOS_SB(inode->i_sb)->free_clusters != -1) + MSDOS_SB(inode->i_sb)->free_clusters--; + unlock_fat(inode->i_sb); +#ifdef DEBUG +printk("set to %x\n",fat_access(inode->i_sb,nr,-1)); +#endif + last = 0; + if ((current = MSDOS_I(inode)->i_start) != 0) { + cache_lookup(inode,INT_MAX,&last,¤t); + while (current && current != -1) + if (!(current = fat_access(inode->i_sb, + last = current,-1))) { + fs_panic(inode->i_sb,"File without EOF"); + return -ENOSPC; + } + } +#ifdef DEBUG +printk("last = %d\n",last); +#endif + if (last) fat_access(inode->i_sb,last,nr); + else { + MSDOS_I(inode)->i_start = nr; + inode->i_dirt = 1; + } +#ifdef DEBUG +if (last) printk("next set to %d\n",fat_access(inode->i_sb,last,-1)); +#endif + sector = MSDOS_SB(inode->i_sb)->data_start+(nr-2)*cluster_size; + last_sector = sector + cluster_size; + for ( ; sector < last_sector; sector++) { + #ifdef DEBUG + printk("zeroing sector %d\n",sector); + #endif + if (!(bh = getblk(inode->i_dev,sector,SECTOR_SIZE))) + printk("getblk failed\n"); + else { + memset(bh->b_data,0,SECTOR_SIZE); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse(bh); + } + } + inode->i_blocks += cluster_size; + if (S_ISDIR(inode->i_mode)) { + if (inode->i_size & (SECTOR_SIZE-1)) { + fs_panic(inode->i_sb,"Odd directory size"); + inode->i_size = (inode->i_size+SECTOR_SIZE) & + ~(SECTOR_SIZE-1); + } + inode->i_size += SECTOR_SIZE*cluster_size; +#ifdef DEBUG +printk("size is %d now (%x)\n",inode->i_size,inode); +#endif + inode->i_dirt = 1; + } + return 0; +} + + +/* Linear day numbers of the respective 1sts in non-leap years. */ + +static int day_n[] = { 0,31,59,90,120,151,181,212,243,273,304,334,0,0,0,0 }; + /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */ + + +extern struct timezone sys_tz; + + +/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ + +int date_dos2unix(unsigned short time,unsigned short date) +{ + int month,year,secs; + + month = ((date >> 5) & 15)-1; + year = date >> 9; + secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* + ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && + month < 2 ? 1 : 0)+3653); + /* days since 1.1.70 plus 80's leap day */ + secs += sys_tz.tz_minuteswest*60; + return secs; +} + + +/* Convert linear UNIX date to a MS-DOS time/date pair. */ + +void date_unix2dos(int unix_date,unsigned short *time, + unsigned short *date) +{ + int day,year,nl_day,month; + + unix_date -= sys_tz.tz_minuteswest*60; + *time = (unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ + (((unix_date/3600) % 24) << 11); + day = unix_date/86400-3652; + year = day/365; + if ((year+3)/4+365*year > day) year--; + day -= (year+3)/4+365*year; + if (day == 59 && !(year & 3)) { + nl_day = day; + month = 2; + } + else { + nl_day = (year & 3) || day <= 59 ? day : day-1; + for (month = 0; month < 12; month++) + if (day_n[month] > nl_day) break; + } + *date = nl_day-day_n[month-1]+1+(month << 5)+(year << 9); +} + + +/* Returns the inode number of the directory entry at offset pos. If bh is + non-NULL, it is brelse'd before. Pos is incremented. The buffer header is + returned in bh. */ + +int msdos_get_entry(struct inode *dir, loff_t *pos,struct buffer_head **bh, + struct msdos_dir_entry **de) +{ + int sector,offset; + + while (1) { + offset = *pos; + PRINTK (("get_entry offset %d\n",offset)); + if ((sector = msdos_smap(dir,offset >> SECTOR_BITS)) == -1) + return -1; + PRINTK (("get_entry sector %d %p\n",sector,*bh)); + if (!sector) + return -1; /* beyond EOF */ + *pos += sizeof(struct msdos_dir_entry); + if (*bh) + brelse(*bh); + PRINTK (("get_entry sector apres brelse\n")); + if (!(*bh = msdos_sread(dir->i_dev,sector))) { + printk("Directory sread (sector %d) failed\n",sector); + continue; + } + PRINTK (("get_entry apres sread\n")); + *de = (struct msdos_dir_entry *) ((*bh)->b_data+(offset & + (SECTOR_SIZE-1))); + return (sector << MSDOS_DPS_BITS)+((offset & (SECTOR_SIZE-1)) >> + MSDOS_DIR_BITS); + } +} + + +/* + * Now an ugly part: this set of directory scan routines works on clusters + * rather than on inodes and sectors. They are necessary to locate the '..' + * directory "inode". raw_scan_sector operates in four modes: + * + * name number ino action + * -------- -------- -------- ------------------------------------------------- + * non-NULL - X Find an entry with that name + * NULL non-NULL non-NULL Find an entry whose data starts at *number + * NULL non-NULL NULL Count subdirectories in *number. (*) + * NULL NULL non-NULL Find an empty entry + * + * (*) The return code should be ignored. It DOES NOT indicate success or + * failure. *number has to be initialized to zero. + * + * - = not used, X = a value is returned unless NULL + * + * If res_bh is non-NULL, the buffer is not deallocated but returned to the + * caller on success. res_de is set accordingly. + * + * If cont is non-zero, raw_found continues with the entry after the one + * res_bh/res_de point to. + */ + + +#define RSS_NAME /* search for name */ \ + done = !strncmp(data[entry].name,name,MSDOS_NAME) && \ + !(data[entry].attr & ATTR_VOLUME); + +#define RSS_START /* search for start cluster */ \ + done = !IS_FREE(data[entry].name) && CF_LE_W(data[entry].start) == *number; + +#define RSS_FREE /* search for free entry */ \ + { \ + done = IS_FREE(data[entry].name); \ + if (done) { \ + inode = iget(sb,sector*MSDOS_DPS+entry); \ + if (inode) { \ + /* Directory slots of busy deleted files aren't available yet. */ \ + done = !MSDOS_I(inode)->i_busy; \ + iput(inode); \ + } \ + } \ + } + +#define RSS_COUNT /* count subdirectories */ \ + { \ + done = 0; \ + if (!IS_FREE(data[entry].name) && (data[entry].attr & ATTR_DIR)) \ + (*number)++; \ + } + +static int raw_scan_sector(struct super_block *sb,int sector,char *name, + int *number,int *ino,struct buffer_head **res_bh, + struct msdos_dir_entry **res_de) +{ + struct buffer_head *bh; + struct msdos_dir_entry *data; + struct inode *inode; + int entry,start,done; + + if (!(bh = msdos_sread(sb->s_dev,sector))) return -EIO; + data = (struct msdos_dir_entry *) bh->b_data; + for (entry = 0; entry < MSDOS_DPS; entry++) { + if (name) RSS_NAME + else { + if (!ino) RSS_COUNT + else { + if (number) RSS_START + else RSS_FREE + } + } + if (done) { + if (ino) *ino = sector*MSDOS_DPS+entry; + start = CF_LE_W(data[entry].start); + if (!res_bh) brelse(bh); + else { + *res_bh = bh; + *res_de = &data[entry]; + } + return start; + } + } + brelse(bh); + return -ENOENT; +} + + +/* + * raw_scan_root performs raw_scan_sector on the root directory until the + * requested entry is found or the end of the directory is reached. + */ + +static int raw_scan_root(struct super_block *sb,char *name,int *number,int *ino, + struct buffer_head **res_bh,struct msdos_dir_entry **res_de) +{ + int count,cluster; + + for (count = 0; count < MSDOS_SB(sb)->dir_entries/MSDOS_DPS; count++) { + if ((cluster = raw_scan_sector(sb,MSDOS_SB(sb)->dir_start+count, + name,number,ino,res_bh,res_de)) >= 0) return cluster; + } + return -ENOENT; +} + + +/* + * raw_scan_nonroot performs raw_scan_sector on a non-root directory until the + * requested entry is found or the end of the directory is reached. + */ + +static int raw_scan_nonroot(struct super_block *sb,int start,char *name, + int *number,int *ino,struct buffer_head **res_bh,struct msdos_dir_entry + **res_de) +{ + int count,cluster; + +#ifdef DEBUG + printk("raw_scan_nonroot: start=%d\n",start); +#endif + do { + for (count = 0; count < MSDOS_SB(sb)->cluster_size; count++) { + if ((cluster = raw_scan_sector(sb,(start-2)* + MSDOS_SB(sb)->cluster_size+MSDOS_SB(sb)->data_start+ + count,name,number,ino,res_bh,res_de)) >= 0) + return cluster; + } + if (!(start = fat_access(sb,start,-1))) { + fs_panic(sb,"FAT error"); + break; + } +#ifdef DEBUG + printk("next start: %d\n",start); +#endif + } + while (start != -1); + return -ENOENT; +} + + +/* + * raw_scan performs raw_scan_sector on any sector. + * + * NOTE: raw_scan must not be used on a directory that is is the process of + * being created. + */ + +static int raw_scan(struct super_block *sb,int start,char *name,int *number, + int *ino,struct buffer_head **res_bh,struct msdos_dir_entry **res_de) +{ + if (start) + return raw_scan_nonroot(sb,start,name,number,ino,res_bh,res_de); + else return raw_scan_root(sb,name,number,ino,res_bh,res_de); +} + + +/* + * msdos_parent_ino returns the inode number of the parent directory of dir. + * File creation has to be deferred while msdos_parent_ino is running to + * prevent renames. + */ + +int msdos_parent_ino(struct inode *dir,int locked) +{ + static int zero = 0; + int error,current,prev,nr; + + if (!S_ISDIR(dir->i_mode)) panic("Non-directory fed to m_p_i"); + if (dir->i_ino == MSDOS_ROOT_INO) return dir->i_ino; + if (!locked) lock_creation(); /* prevent renames */ + if ((current = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,MSDOS_DOTDOT, + &zero,NULL,NULL,NULL)) < 0) { + if (!locked) unlock_creation(); + return current; + } + if (!current) nr = MSDOS_ROOT_INO; + else { + if ((prev = raw_scan(dir->i_sb,current,MSDOS_DOTDOT,&zero,NULL, + NULL,NULL)) < 0) { + if (!locked) unlock_creation(); + return prev; + } + if ((error = raw_scan(dir->i_sb,prev,NULL,¤t,&nr,NULL, + NULL)) < 0) { + if (!locked) unlock_creation(); + return error; + } + } + if (!locked) unlock_creation(); + return nr; +} + + +/* + * msdos_subdirs counts the number of sub-directories of dir. It can be run + * on directories being created. + */ + +int msdos_subdirs(struct inode *dir) +{ + int count; + + count = 0; + if (dir->i_ino == MSDOS_ROOT_INO) + (void) raw_scan_root(dir->i_sb,NULL,&count,NULL,NULL,NULL); + else { + if (!MSDOS_I(dir)->i_start) return 0; /* in mkdir */ + else (void) raw_scan_nonroot(dir->i_sb,MSDOS_I(dir)->i_start, + NULL,&count,NULL,NULL,NULL); + } + return count; +} + + +/* + * Scans a directory for a given file (name points to its formatted name) or + * for an empty directory slot (name is NULL). Returns an error code or zero. + */ + +int msdos_scan(struct inode *dir,char *name,struct buffer_head **res_bh, + struct msdos_dir_entry **res_de,int *ino) +{ + int res; + + if (name) + res = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,name,NULL,ino, + res_bh,res_de); + else res = raw_scan(dir->i_sb,MSDOS_I(dir)->i_start,NULL,NULL,ino, + res_bh,res_de); + return res < 0 ? res : 0; +} diff --git a/fs/msdos/mmap.c b/fs/msdos/mmap.c new file mode 100644 index 000000000..0e85584e9 --- /dev/null +++ b/fs/msdos/mmap.c @@ -0,0 +1,102 @@ +/* + * fs/msdos/mmap.c + * + * Written by Jacques Gelinas (jacques@solucorp.qc.ca) + * Inspired by fs/nfs/mmap.c (Jaon Tombs 15 Aug 1993) + * + * msdos mmap handling + */ +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/shm.h> +#include <linux/errno.h> +#include <linux/mman.h> +#include <linux/string.h> +#include <linux/malloc.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/msdos_fs.h> + +/* + * Fill in the supplied page for mmap + */ +static unsigned long msdos_file_mmap_nopage( + struct vm_area_struct * area, + unsigned long address, + unsigned long page, + int error_code) +{ + struct inode * inode = area->vm_inode; + unsigned int clear; + int pos; + long gap; /* distance from eof to pos */ + + address &= PAGE_MASK; + pos = address - area->vm_start + area->vm_offset; + + clear = 0; + gap = inode->i_size - pos; + if (gap <= 0){ + /* mmaping beyond end of file */ + clear = PAGE_SIZE; + }else{ + int cur_read; + int need_read; + struct file filp; + if (gap < PAGE_SIZE){ + clear = PAGE_SIZE - gap; + } + filp.f_pos = pos; + need_read = PAGE_SIZE - clear; + { + unsigned long cur_fs = get_fs(); + set_fs (KERNEL_DS); + cur_read = msdos_file_read (inode,&filp,(char*)page + ,need_read); + set_fs (cur_fs); + } + if (cur_read != need_read){ + printk ("MSDOS: Error while reading an mmap file %d <> %d\n" + ,cur_read,need_read); + } + } + if (clear > 0){ + memset ((char*)page+PAGE_SIZE-clear,0,clear); + } + return page; +} + +struct vm_operations_struct msdos_file_mmap = { + NULL, /* open */ + NULL, /* close */ + msdos_file_mmap_nopage, /* nopage */ + NULL, /* wppage */ + NULL, /* share */ + NULL, /* unmap */ +}; + +/* + * This is used for a general mmap of an msdos file + * Returns 0 if ok, or a negative error code if not. + */ +int msdos_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) +{ + if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported now */ + return -EINVAL; + if (vma->vm_offset & (inode->i_sb->s_blocksize - 1)) + return -EINVAL; + if (!inode->i_sb || !S_ISREG(inode->i_mode)) + return -EACCES; + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + + vma->vm_inode = inode; + inode->i_count++; + vma->vm_ops = &msdos_file_mmap; + return 0; +} + diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c new file mode 100644 index 000000000..ad3b9c8bc --- /dev/null +++ b/fs/msdos/namei.c @@ -0,0 +1,620 @@ +/* + * linux/fs/msdos/namei.c + * + * Written 1992,1993 by Werner Almesberger + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/msdos_fs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/stat.h> + +#define PRINTK(x) + +/* MS-DOS "device special files" */ + +static char *reserved_names[] = { + "CON ","PRN ","NUL ","AUX ", + "LPT1 ","LPT2 ","LPT3 ","LPT4 ", + "COM1 ","COM2 ","COM3 ","COM4 ", + NULL }; + + +/* Characters that are undesirable in an MS-DOS file name */ + +static char bad_chars[] = "*?<>|\""; +static char bad_if_strict[] = "+=,; "; + + +/* Formats an MS-DOS file name. Rejects invalid names. */ + +static int msdos_format_name(char conv,const char *name,int len,char *res, + int dot_dirs) +{ + char *walk,**reserved; + unsigned char c; + int space; + + if (IS_FREE(name)) return -EINVAL; + if (name[0] == '.' && (len == 1 || (len == 2 && name[1] == '.'))) { + if (!dot_dirs) return -EEXIST; + memset(res+1,' ',10); + while (len--) *res++ = '.'; + return 0; + } + space = 1; /* disallow names starting with a dot */ + c = 0; + for (walk = res; len && walk-res < 8; walk++) { + c = *name++; + len--; + if (conv != 'r' && strchr(bad_chars,c)) return -EINVAL; + if (conv == 's' && strchr(bad_if_strict,c)) return -EINVAL; + if (c >= 'A' && c <= 'Z' && conv == 's') return -EINVAL; + if (c < ' ' || c == ':' || c == '\\') return -EINVAL; + if (c == '.') break; + space = c == ' '; + *walk = c >= 'a' && c <= 'z' ? c-32 : c; + } + if (space) return -EINVAL; + if (conv == 's' && len && c != '.') { + c = *name++; + len--; + if (c != '.') return -EINVAL; + } + while (c != '.' && len--) c = *name++; + if (c == '.') { + while (walk-res < 8) *walk++ = ' '; + while (len > 0 && walk-res < MSDOS_NAME) { + c = *name++; + len--; + if (conv != 'r' && strchr(bad_chars,c)) return -EINVAL; + if (conv == 's' && strchr(bad_if_strict,c)) + return -EINVAL; + if (c < ' ' || c == ':' || c == '\\' || c == '.') + return -EINVAL; + if (c >= 'A' && c <= 'Z' && conv == 's') return -EINVAL; + space = c == ' '; + *walk++ = c >= 'a' && c <= 'z' ? c-32 : c; + } + if (space) return -EINVAL; + if (conv == 's' && len) return -EINVAL; + } + while (walk-res < MSDOS_NAME) *walk++ = ' '; + for (reserved = reserved_names; *reserved; reserved++) + if (!strncmp(res,*reserved,8)) return -EINVAL; + return 0; +} + + +/* Locates a directory entry. */ + +static int msdos_find(struct inode *dir,const char *name,int len, + struct buffer_head **bh,struct msdos_dir_entry **de,int *ino) +{ + char msdos_name[MSDOS_NAME]; + int res; + + if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len, + msdos_name,1)) < 0) return res; + return msdos_scan(dir,msdos_name,bh,de,ino); +} + + +int msdos_lookup(struct inode *dir,const char *name,int len, + struct inode **result) +{ + int ino,res; + struct msdos_dir_entry *de; + struct buffer_head *bh; + struct inode *next; + + PRINTK (("msdos_lookup\n")); + + *result = NULL; + if (!dir) return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + PRINTK (("msdos_lookup 2\n")); + if (len == 1 && name[0] == '.') { + *result = dir; + return 0; + } + if (len == 2 && name[0] == '.' && name[1] == '.') { + ino = msdos_parent_ino(dir,0); + iput(dir); + if (ino < 0) return ino; + if (!(*result = iget(dir->i_sb,ino))) return -EACCES; + return 0; + } + PRINTK (("msdos_lookup 3\n")); + if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) { + iput(dir); + return res; + } + PRINTK (("msdos_lookup 4\n")); + if (bh) brelse(bh); + PRINTK (("msdos_lookup 4.5\n")); +/* printk("lookup: ino=%d\n",ino); */ + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -EACCES; + } + PRINTK (("msdos_lookup 5\n")); + if (MSDOS_I(*result)->i_busy) { /* mkdir in progress */ + iput(*result); + iput(dir); + return -ENOENT; + } + PRINTK (("msdos_lookup 6\n")); + while (MSDOS_I(*result)->i_old) { + next = MSDOS_I(*result)->i_old; + iput(*result); + if (!(*result = iget(next->i_sb,next->i_ino))) { + fs_panic(dir->i_sb,"msdos_lookup: Can't happen"); + iput(dir); + return -ENOENT; + } + } + PRINTK (("msdos_lookup 7\n")); + iput(dir); + PRINTK (("msdos_lookup 8\n")); + return 0; +} + + +/* Creates a directory entry (name is already formatted). */ + +static int msdos_create_entry(struct inode *dir,char *name,int is_dir, + struct inode **result) +{ + struct buffer_head *bh; + struct msdos_dir_entry *de; + int res,ino; + + if ((res = msdos_scan(dir,NULL,&bh,&de,&ino)) < 0) { + if (res != -ENOENT) return res; + if (dir->i_ino == MSDOS_ROOT_INO) return -ENOSPC; + if ((res = msdos_add_cluster(dir)) < 0) return res; + if ((res = msdos_scan(dir,NULL,&bh,&de,&ino)) < 0) return res; + } + /* + * XXX all times should be set by caller upon successful completion. + */ + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + memcpy(de->name,name,MSDOS_NAME); + memset(de->unused, 0, sizeof(de->unused)); + de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; + de->start = 0; + date_unix2dos(dir->i_mtime,&de->time,&de->date); + de->size = 0; + mark_buffer_dirty(bh, 1); + if ((*result = iget(dir->i_sb,ino)) != NULL) + msdos_read_inode(*result); + brelse(bh); + if (!*result) return -EIO; + (*result)->i_mtime = (*result)->i_atime = (*result)->i_ctime = + CURRENT_TIME; + (*result)->i_dirt = 1; + return 0; +} + + +int msdos_create(struct inode *dir,const char *name,int len,int mode, + struct inode **result) +{ + struct buffer_head *bh; + struct msdos_dir_entry *de; + char msdos_name[MSDOS_NAME]; + int ino,res; + + if (!dir) return -ENOENT; + if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len, + msdos_name,0)) < 0) { + iput(dir); + return res; + } + lock_creation(); + if (msdos_scan(dir,msdos_name,&bh,&de,&ino) >= 0) { + unlock_creation(); + brelse(bh); + iput(dir); + return -EEXIST; + } + res = msdos_create_entry(dir,msdos_name,S_ISDIR(mode),result); + unlock_creation(); + iput(dir); + return res; +} + + +#ifdef DEBUG + +static void dump_fat(struct super_block *sb,int start) +{ + printk("["); + while (start) { + printk("%d ",start); + start = fat_access(sb,start,-1); + if (!start) { + printk("ERROR"); + break; + } + if (start == -1) break; + } + printk("]\n"); +} + +#endif + + +int msdos_mkdir(struct inode *dir,const char *name,int len,int mode) +{ + struct buffer_head *bh; + struct msdos_dir_entry *de; + struct inode *inode,*dot; + char msdos_name[MSDOS_NAME]; + int ino,res; + + if ((res = msdos_format_name(MSDOS_SB(dir->i_sb)->name_check,name,len, + msdos_name,0)) < 0) { + iput(dir); + return res; + } + lock_creation(); + if (msdos_scan(dir,msdos_name,&bh,&de,&ino) >= 0) { + unlock_creation(); + brelse(bh); + iput(dir); + return -EEXIST; + } + if ((res = msdos_create_entry(dir,msdos_name,1,&inode)) < 0) { + unlock_creation(); + iput(dir); + return res; + } + dir->i_nlink++; + inode->i_nlink = 2; /* no need to mark them dirty */ + MSDOS_I(inode)->i_busy = 1; /* prevent lookups */ + if ((res = msdos_add_cluster(inode)) < 0) goto mkdir_error; + if ((res = msdos_create_entry(inode,MSDOS_DOT,1,&dot)) < 0) + goto mkdir_error; + dot->i_size = inode->i_size; /* doesn't grow in the 2nd create_entry */ + MSDOS_I(dot)->i_start = MSDOS_I(inode)->i_start; + dot->i_nlink = inode->i_nlink; + dot->i_dirt = 1; + iput(dot); + if ((res = msdos_create_entry(inode,MSDOS_DOTDOT,1,&dot)) < 0) + goto mkdir_error; + unlock_creation(); + dot->i_size = dir->i_size; + MSDOS_I(dot)->i_start = MSDOS_I(dir)->i_start; + dot->i_nlink = dir->i_nlink; + dot->i_dirt = 1; + MSDOS_I(inode)->i_busy = 0; + iput(dot); + iput(inode); + iput(dir); + return 0; +mkdir_error: + iput(inode); + if (msdos_rmdir(dir,name,len) < 0) + fs_panic(dir->i_sb,"rmdir in mkdir failed"); + unlock_creation(); + return res; +} + + +static int msdos_empty(struct inode *dir) +{ + loff_t pos; + struct buffer_head *bh; + struct msdos_dir_entry *de; + + if (dir->i_count > 1) + return -EBUSY; + if (MSDOS_I(dir)->i_start) { /* may be zero in mkdir */ + pos = 0; + bh = NULL; + while (msdos_get_entry(dir,&pos,&bh,&de) > -1) + if (!IS_FREE(de->name) && strncmp(de->name,MSDOS_DOT, + MSDOS_NAME) && strncmp(de->name,MSDOS_DOTDOT, + MSDOS_NAME)) { + brelse(bh); + return -ENOTEMPTY; + } + if (bh) + brelse(bh); + } + return 0; +} + + +int msdos_rmdir(struct inode *dir,const char *name,int len) +{ + int res,ino; + struct buffer_head *bh; + struct msdos_dir_entry *de; + struct inode *inode; + + bh = NULL; + inode = NULL; + res = -EPERM; + if (name[0] == '.' && (len == 1 || (len == 2 && name[1] == '.'))) + goto rmdir_done; + if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) goto rmdir_done; + res = -ENOENT; + if (!(inode = iget(dir->i_sb,ino))) goto rmdir_done; + res = -ENOTDIR; + if (!S_ISDIR(inode->i_mode)) goto rmdir_done; + res = -EBUSY; + if (dir->i_dev != inode->i_dev || dir == inode) goto rmdir_done; + res = msdos_empty(inode); + if (res) + goto rmdir_done; + inode->i_nlink = 0; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_nlink--; + inode->i_dirt = dir->i_dirt = 1; + de->name[0] = DELETED_FLAG; + mark_buffer_dirty(bh, 1); + res = 0; +rmdir_done: + brelse(bh); + iput(dir); + iput(inode); + return res; +} + + +static int msdos_unlinkx( + struct inode *dir, + const char *name, + int len, + int nospc) /* Flag special file ? */ +{ + int res,ino; + struct buffer_head *bh; + struct msdos_dir_entry *de; + struct inode *inode; + + bh = NULL; + inode = NULL; + if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) + goto unlink_done; + if (!(inode = iget(dir->i_sb,ino))) { + res = -ENOENT; + goto unlink_done; + } + if (!S_ISREG(inode->i_mode) && nospc){ + res = -EPERM; + goto unlink_done; + } + inode->i_nlink = 0; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + MSDOS_I(inode)->i_busy = 1; + inode->i_dirt = dir->i_dirt = 1; + de->name[0] = DELETED_FLAG; + mark_buffer_dirty(bh, 1); +unlink_done: + brelse(bh); + iput(inode); + iput(dir); + return res; +} + +int msdos_unlink(struct inode *dir,const char *name,int len) +{ + return msdos_unlinkx (dir,name,len,1); +} +/* + Special entry for umsdos +*/ +int msdos_unlink_umsdos(struct inode *dir,const char *name,int len) +{ + return msdos_unlinkx (dir,name,len,0); +} + +static int rename_same_dir(struct inode *old_dir,char *old_name, + struct inode *new_dir,char *new_name,struct buffer_head *old_bh, + struct msdos_dir_entry *old_de,int old_ino) +{ + struct buffer_head *new_bh; + struct msdos_dir_entry *new_de; + struct inode *new_inode,*old_inode; + int new_ino,exists,error; + + if (!strncmp(old_name,new_name,MSDOS_NAME)) return 0; + exists = msdos_scan(new_dir,new_name,&new_bh,&new_de,&new_ino) >= 0; + if (*(unsigned char *) old_de->name == DELETED_FLAG) { + if (exists) brelse(new_bh); + return -ENOENT; + } + if (exists) { + if (!(new_inode = iget(new_dir->i_sb,new_ino))) { + brelse(new_bh); + return -EIO; + } + error = S_ISDIR(new_inode->i_mode) ? (old_de->attr & ATTR_DIR) ? + msdos_empty(new_inode) : -EPERM : (old_de->attr & ATTR_DIR) + ? -EPERM : 0; + if (error) { + iput(new_inode); + brelse(new_bh); + return error; + } + if (S_ISDIR(new_inode->i_mode)) { + new_dir->i_nlink--; + new_dir->i_dirt = 1; + } + new_inode->i_nlink = 0; + MSDOS_I(new_inode)->i_busy = 1; + new_inode->i_dirt = 1; + new_de->name[0] = DELETED_FLAG; + mark_buffer_dirty(new_bh, 1); + iput(new_inode); + brelse(new_bh); + } + memcpy(old_de->name,new_name,MSDOS_NAME); + mark_buffer_dirty(old_bh, 1); + if (MSDOS_SB(old_dir->i_sb)->conversion == 'a') /* update binary info */ + if ((old_inode = iget(old_dir->i_sb,old_ino)) != NULL) { + msdos_read_inode(old_inode); + iput(old_inode); + } + return 0; +} + + +static int rename_diff_dir(struct inode *old_dir,char *old_name, + struct inode *new_dir,char *new_name,struct buffer_head *old_bh, + struct msdos_dir_entry *old_de,int old_ino) +{ + struct buffer_head *new_bh,*free_bh,*dotdot_bh; + struct msdos_dir_entry *new_de,*free_de,*dotdot_de; + struct inode *old_inode,*new_inode,*free_inode,*dotdot_inode,*walk; + int new_ino,free_ino,dotdot_ino; + int error,exists,ino; + + if (old_dir->i_dev != new_dir->i_dev) return -EINVAL; + if (old_ino == new_dir->i_ino) return -EINVAL; + if (!(walk = iget(new_dir->i_sb,new_dir->i_ino))) return -EIO; + while (walk->i_ino != MSDOS_ROOT_INO) { + ino = msdos_parent_ino(walk,1); + iput(walk); + if (ino < 0) return ino; + if (ino == old_ino) return -EINVAL; + if (!(walk = iget(new_dir->i_sb,ino))) return -EIO; + } + iput(walk); + while ((error = msdos_scan(new_dir,NULL,&free_bh,&free_de,&free_ino)) < + 0) { + if (error != -ENOENT) return error; + error = msdos_add_cluster(new_dir); + if (error) return error; + } + exists = msdos_scan(new_dir,new_name,&new_bh,&new_de,&new_ino) >= 0; + if (!(old_inode = iget(old_dir->i_sb,old_ino))) { + brelse(free_bh); + if (exists) brelse(new_bh); + return -EIO; + } + if (*(unsigned char *) old_de->name == DELETED_FLAG) { + iput(old_inode); + brelse(free_bh); + if (exists) brelse(new_bh); + return -ENOENT; + } + new_inode = NULL; /* to make GCC happy */ + if (exists) { + if (!(new_inode = iget(new_dir->i_sb,new_ino))) { + iput(old_inode); + brelse(new_bh); + return -EIO; + } + error = S_ISDIR(new_inode->i_mode) ? (old_de->attr & ATTR_DIR) ? + msdos_empty(new_inode) : -EPERM : (old_de->attr & ATTR_DIR) + ? -EPERM : 0; + if (error) { + iput(new_inode); + iput(old_inode); + brelse(new_bh); + return error; + } + new_inode->i_nlink = 0; + MSDOS_I(new_inode)->i_busy = 1; + new_inode->i_dirt = 1; + new_de->name[0] = DELETED_FLAG; + mark_buffer_dirty(new_bh, 1); + } + memcpy(free_de,old_de,sizeof(struct msdos_dir_entry)); + memcpy(free_de->name,new_name,MSDOS_NAME); + if (!(free_inode = iget(new_dir->i_sb,free_ino))) { + free_de->name[0] = DELETED_FLAG; +/* Don't mark free_bh as dirty. Both states are supposed to be equivalent. */ + brelse(free_bh); + if (exists) { + iput(new_inode); + brelse(new_bh); + } + return -EIO; + } + if (exists && S_ISDIR(new_inode->i_mode)) { + new_dir->i_nlink--; + new_dir->i_dirt = 1; + } + msdos_read_inode(free_inode); + MSDOS_I(old_inode)->i_busy = 1; + cache_inval_inode(old_inode); + old_inode->i_dirt = 1; + old_de->name[0] = DELETED_FLAG; + mark_buffer_dirty(old_bh, 1); + mark_buffer_dirty(free_bh, 1); + if (!exists) iput(free_inode); + else { + MSDOS_I(new_inode)->i_depend = free_inode; + MSDOS_I(free_inode)->i_old = new_inode; + /* free_inode is put when putting new_inode */ + iput(new_inode); + brelse(new_bh); + } + if (S_ISDIR(old_inode->i_mode)) { + if ((error = msdos_scan(old_inode,MSDOS_DOTDOT,&dotdot_bh, + &dotdot_de,&dotdot_ino)) < 0) goto rename_done; + if (!(dotdot_inode = iget(old_inode->i_sb,dotdot_ino))) { + brelse(dotdot_bh); + error = -EIO; + goto rename_done; + } + dotdot_de->start = MSDOS_I(dotdot_inode)->i_start = + MSDOS_I(new_dir)->i_start; + dotdot_inode->i_dirt = 1; + mark_buffer_dirty(dotdot_bh, 1); + old_dir->i_nlink--; + new_dir->i_nlink++; + /* no need to mark them dirty */ + dotdot_inode->i_nlink = new_dir->i_nlink; + iput(dotdot_inode); + brelse(dotdot_bh); + } + error = 0; +rename_done: + brelse(free_bh); + iput(old_inode); + return error; +} + + +int msdos_rename(struct inode *old_dir,const char *old_name,int old_len, + struct inode *new_dir,const char *new_name,int new_len) +{ + char old_msdos_name[MSDOS_NAME],new_msdos_name[MSDOS_NAME]; + struct buffer_head *old_bh; + struct msdos_dir_entry *old_de; + int old_ino,error; + + if ((error = msdos_format_name(MSDOS_SB(old_dir->i_sb)->name_check, + old_name,old_len,old_msdos_name,1)) < 0) goto rename_done; + if ((error = msdos_format_name(MSDOS_SB(new_dir->i_sb)->name_check, + new_name,new_len,new_msdos_name,0)) < 0) goto rename_done; + if ((error = msdos_scan(old_dir,old_msdos_name,&old_bh,&old_de, + &old_ino)) < 0) goto rename_done; + lock_creation(); + if (old_dir == new_dir) + error = rename_same_dir(old_dir,old_msdos_name,new_dir, + new_msdos_name,old_bh,old_de,old_ino); + else error = rename_diff_dir(old_dir,old_msdos_name,new_dir, + new_msdos_name,old_bh,old_de,old_ino); + unlock_creation(); + brelse(old_bh); +rename_done: + iput(old_dir); + iput(new_dir); + return error; +} diff --git a/fs/namei.c b/fs/namei.c new file mode 100644 index 000000000..f5f8b5c14 --- /dev/null +++ b/fs/namei.c @@ -0,0 +1,849 @@ +/* + * linux/fs/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * Some corrections by tytso. + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/fcntl.h> +#include <linux/stat.h> + +#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) + +/* + * How long a filename can we get from user space? + * -EFAULT if invalid area + * 0 if ok (ENAMETOOLONG before EFAULT) + * >0 EFAULT after xx bytes + */ +static inline int get_max_filename(unsigned long address) +{ + struct vm_area_struct * vma; + + if (get_fs() == KERNEL_DS) + return 0; + for (vma = current->mm->mmap ; ; vma = vma->vm_next) { + if (!vma) + return -EFAULT; + if (vma->vm_end > address) + break; + } + if (vma->vm_start > address || !(vma->vm_page_prot & PAGE_USER)) + return -EFAULT; + address = vma->vm_end - address; + if (address > PAGE_SIZE) + return 0; + if (vma->vm_next && vma->vm_next->vm_start == vma->vm_end && + (vma->vm_next->vm_page_prot & PAGE_USER)) + return 0; + return address; +} + +/* + * In order to reduce some races, while at the same time doing additional + * checking and hopefully speeding things up, we copy filenames to the + * kernel data space before using them.. + * + * POSIX.1 2.4: an empty pathname is invalid (ENOENT). + */ +int getname(const char * filename, char **result) +{ + int i, error; + unsigned long page; + char * tmp, c; + + i = get_max_filename((unsigned long) filename); + if (i < 0) + return i; + error = -EFAULT; + if (!i) { + error = -ENAMETOOLONG; + i = PAGE_SIZE; + } + c = get_fs_byte(filename++); + if (!c) + return -ENOENT; + if(!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + *result = tmp = (char *) page; + while (--i) { + *(tmp++) = c; + c = get_fs_byte(filename++); + if (!c) { + *tmp = '\0'; + return 0; + } + } + free_page(page); + return error; +} + +void putname(char * name) +{ + free_page((unsigned long) name); +} + +/* + * permission() + * + * is used to check for read/write/execute permissions on a file. + * We use "fsuid" for this, letting us set arbitrary permissions + * for filesystem access without changing the "normal" uids which + * are used for other things.. + */ +int permission(struct inode * inode,int mask) +{ + int mode = inode->i_mode; + + if (inode->i_op && inode->i_op->permission) + return inode->i_op->permission(inode, mask); + else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) + return 0; /* Nobody gets write access to an immutable file */ + else if (current->fsuid == inode->i_uid) + mode >>= 6; + else if (in_group_p(inode->i_gid)) + mode >>= 3; + if (((mode & mask & 0007) == mask) || fsuser()) + return 1; + return 0; +} + +/* + * get_write_access() gets write permission for a file. + * put_write_access() releases this write permission. + * This is used for regular files. + * We cannot support write (and maybe mmap read-write shared) accesses and + * MAP_DENYWRITE mmappings simultaneously. + */ +int get_write_access(struct inode * inode) +{ + struct task_struct ** p; + + if ((inode->i_count > 1) && S_ISREG(inode->i_mode)) /* shortcut */ + for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) { + struct vm_area_struct * mpnt; + if (!*p) + continue; + for(mpnt = (*p)->mm->mmap; mpnt; mpnt = mpnt->vm_next) { + if (inode != mpnt->vm_inode) + continue; + if (mpnt->vm_flags & VM_DENYWRITE) + return -ETXTBSY; + } + } + inode->i_wcount++; + return 0; +} + +void put_write_access(struct inode * inode) +{ + inode->i_wcount--; +} + +/* + * lookup() looks up one part of a pathname, using the fs-dependent + * routines (currently minix_lookup) for it. It also checks for + * fathers (pseudo-roots, mount-points) + */ +int lookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + struct super_block * sb; + int perm; + + *result = NULL; + if (!dir) + return -ENOENT; +/* check permissions before traversing mount-points */ + perm = permission(dir,MAY_EXEC); + if (len==2 && name[0] == '.' && name[1] == '.') { + if (dir == current->fs->root) { + *result = dir; + return 0; + } else if ((sb = dir->i_sb) && (dir == sb->s_mounted)) { + sb = dir->i_sb; + iput(dir); + dir = sb->s_covered; + if (!dir) + return -ENOENT; + dir->i_count++; + } + } + if (!dir->i_op || !dir->i_op->lookup) { + iput(dir); + return -ENOTDIR; + } + if (!perm) { + iput(dir); + return -EACCES; + } + if (!len) { + *result = dir; + return 0; + } + return dir->i_op->lookup(dir,name,len,result); +} + +int follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + if (!dir || !inode) { + iput(dir); + iput(inode); + *res_inode = NULL; + return -ENOENT; + } + if (!inode->i_op || !inode->i_op->follow_link) { + iput(dir); + *res_inode = inode; + return 0; + } + return inode->i_op->follow_link(dir,inode,flag,mode,res_inode); +} + +/* + * dir_namei() + * + * dir_namei() returns the inode of the directory of the + * specified name, and the name within that directory. + */ +static int dir_namei(const char * pathname, int * namelen, const char ** name, + struct inode * base, struct inode ** res_inode) +{ + char c; + const char * thisname; + int len,error; + struct inode * inode; + + *res_inode = NULL; + if (!base) { + base = current->fs->pwd; + base->i_count++; + } + if ((c = *pathname) == '/') { + iput(base); + base = current->fs->root; + pathname++; + base->i_count++; + } + while (1) { + thisname = pathname; + for(len=0;(c = *(pathname++))&&(c != '/');len++) + /* nothing */ ; + if (!c) + break; + base->i_count++; + error = lookup(base,thisname,len,&inode); + if (error) { + iput(base); + return error; + } + error = follow_link(base,inode,0,0,&base); + if (error) + return error; + } + if (!base->i_op || !base->i_op->lookup) { + iput(base); + return -ENOTDIR; + } + *name = thisname; + *namelen = len; + *res_inode = base; + return 0; +} + +static int _namei(const char * pathname, struct inode * base, + int follow_links, struct inode ** res_inode) +{ + const char * basename; + int namelen,error; + struct inode * inode; + + *res_inode = NULL; + error = dir_namei(pathname,&namelen,&basename,base,&base); + if (error) + return error; + base->i_count++; /* lookup uses up base */ + error = lookup(base,basename,namelen,&inode); + if (error) { + iput(base); + return error; + } + if (follow_links) { + error = follow_link(base,inode,0,0,&inode); + if (error) + return error; + } else + iput(base); + *res_inode = inode; + return 0; +} + +int lnamei(const char * pathname, struct inode ** res_inode) +{ + int error; + char * tmp; + + error = getname(pathname,&tmp); + if (!error) { + error = _namei(tmp,NULL,0,res_inode); + putname(tmp); + } + return error; +} + +/* + * namei() + * + * is used by most simple commands to get the inode of a specified name. + * Open, link etc use their own routines, but this is enough for things + * like 'chmod' etc. + */ +int namei(const char * pathname, struct inode ** res_inode) +{ + int error; + char * tmp; + + error = getname(pathname,&tmp); + if (!error) { + error = _namei(tmp,NULL,1,res_inode); + putname(tmp); + } + return error; +} + +/* + * open_namei() + * + * namei for open - this is in fact almost the whole open-routine. + * + * Note that the low bits of "flag" aren't the same as in the open + * system call - they are 00 - no permissions needed + * 01 - read permission needed + * 10 - write permission needed + * 11 - read/write permissions needed + * which is a lot more logical, and also allows the "no perm" needed + * for symlinks (where the permissions are checked later). + */ +int open_namei(const char * pathname, int flag, int mode, + struct inode ** res_inode, struct inode * base) +{ + const char * basename; + int namelen,error; + struct inode * dir, *inode; + + mode &= S_IALLUGO & ~current->fs->umask; + mode |= S_IFREG; + error = dir_namei(pathname,&namelen,&basename,base,&dir); + if (error) + return error; + if (!namelen) { /* special case: '/usr/' etc */ + if (flag & 2) { + iput(dir); + return -EISDIR; + } + /* thanks to Paul Pluzhnikov for noticing this was missing.. */ + if (!permission(dir,ACC_MODE(flag))) { + iput(dir); + return -EACCES; + } + *res_inode=dir; + return 0; + } + dir->i_count++; /* lookup eats the dir */ + if (flag & O_CREAT) { + down(&dir->i_sem); + error = lookup(dir,basename,namelen,&inode); + if (!error) { + if (flag & O_EXCL) { + iput(inode); + error = -EEXIST; + } + } else if (!permission(dir,MAY_WRITE | MAY_EXEC)) + error = -EACCES; + else if (!dir->i_op || !dir->i_op->create) + error = -EACCES; + else if (IS_RDONLY(dir)) + error = -EROFS; + else { + dir->i_count++; /* create eats the dir */ + error = dir->i_op->create(dir,basename,namelen,mode,res_inode); + up(&dir->i_sem); + iput(dir); + return error; + } + up(&dir->i_sem); + } else + error = lookup(dir,basename,namelen,&inode); + if (error) { + iput(dir); + return error; + } + error = follow_link(dir,inode,flag,mode,&inode); + if (error) + return error; + if (S_ISDIR(inode->i_mode) && (flag & 2)) { + iput(inode); + return -EISDIR; + } + if (!permission(inode,ACC_MODE(flag))) { + iput(inode); + return -EACCES; + } + if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { + if (IS_NODEV(inode)) { + iput(inode); + return -EACCES; + } + flag &= ~O_TRUNC; + } else { + if (IS_RDONLY(inode) && (flag & 2)) { + iput(inode); + return -EROFS; + } + } + /* + * An append-only file must be opened in append mode for writing + */ + if (IS_APPEND(inode) && ((flag & 2) && !(flag & O_APPEND))) { + iput(inode); + return -EPERM; + } + if (flag & O_TRUNC) { + struct iattr newattrs; + + if ((error = get_write_access(inode))) { + iput(inode); + return error; + } + newattrs.ia_size = 0; + newattrs.ia_valid = ATTR_SIZE; + if ((error = notify_change(inode, &newattrs))) { + put_write_access(inode); + iput(inode); + return error; + } + inode->i_size = 0; + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + inode->i_dirt = 1; + put_write_access(inode); + } + *res_inode = inode; + return 0; +} + +int do_mknod(const char * filename, int mode, dev_t dev) +{ + const char * basename; + int namelen, error; + struct inode * dir; + + mode &= ~current->fs->umask; + error = dir_namei(filename,&namelen,&basename, NULL, &dir); + if (error) + return error; + if (!namelen) { + iput(dir); + return -ENOENT; + } + if (IS_RDONLY(dir)) { + iput(dir); + return -EROFS; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + return -EACCES; + } + if (!dir->i_op || !dir->i_op->mknod) { + iput(dir); + return -EPERM; + } + dir->i_count++; + down(&dir->i_sem); + error = dir->i_op->mknod(dir,basename,namelen,mode,dev); + up(&dir->i_sem); + iput(dir); + return error; +} + +asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev) +{ + int error; + char * tmp; + + if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !fsuser())) + return -EPERM; + switch (mode & S_IFMT) { + case 0: + mode |= S_IFREG; + break; + case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: + break; + default: + return -EINVAL; + } + error = getname(filename,&tmp); + if (!error) { + error = do_mknod(tmp,mode,dev); + putname(tmp); + } + return error; +} + +static int do_mkdir(const char * pathname, int mode) +{ + const char * basename; + int namelen, error; + struct inode * dir; + + error = dir_namei(pathname,&namelen,&basename,NULL,&dir); + if (error) + return error; + if (!namelen) { + iput(dir); + return -ENOENT; + } + if (IS_RDONLY(dir)) { + iput(dir); + return -EROFS; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + return -EACCES; + } + if (!dir->i_op || !dir->i_op->mkdir) { + iput(dir); + return -EPERM; + } + dir->i_count++; + down(&dir->i_sem); + error = dir->i_op->mkdir(dir, basename, namelen, mode & 0777 & ~current->fs->umask); + up(&dir->i_sem); + iput(dir); + return error; +} + +asmlinkage int sys_mkdir(const char * pathname, int mode) +{ + int error; + char * tmp; + + error = getname(pathname,&tmp); + if (!error) { + error = do_mkdir(tmp,mode); + putname(tmp); + } + return error; +} + +static int do_rmdir(const char * name) +{ + const char * basename; + int namelen, error; + struct inode * dir; + + error = dir_namei(name,&namelen,&basename,NULL,&dir); + if (error) + return error; + if (!namelen) { + iput(dir); + return -ENOENT; + } + if (IS_RDONLY(dir)) { + iput(dir); + return -EROFS; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + return -EACCES; + } + /* + * A subdirectory cannot be removed from an append-only directory + */ + if (IS_APPEND(dir)) { + iput(dir); + return -EPERM; + } + if (!dir->i_op || !dir->i_op->rmdir) { + iput(dir); + return -EPERM; + } + return dir->i_op->rmdir(dir,basename,namelen); +} + +asmlinkage int sys_rmdir(const char * pathname) +{ + int error; + char * tmp; + + error = getname(pathname,&tmp); + if (!error) { + error = do_rmdir(tmp); + putname(tmp); + } + return error; +} + +static int do_unlink(const char * name) +{ + const char * basename; + int namelen, error; + struct inode * dir; + + error = dir_namei(name,&namelen,&basename,NULL,&dir); + if (error) + return error; + if (!namelen) { + iput(dir); + return -EPERM; + } + if (IS_RDONLY(dir)) { + iput(dir); + return -EROFS; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + return -EACCES; + } + /* + * A file cannot be removed from an append-only directory + */ + if (IS_APPEND(dir)) { + iput(dir); + return -EPERM; + } + if (!dir->i_op || !dir->i_op->unlink) { + iput(dir); + return -EPERM; + } + return dir->i_op->unlink(dir,basename,namelen); +} + +asmlinkage int sys_unlink(const char * pathname) +{ + int error; + char * tmp; + + error = getname(pathname,&tmp); + if (!error) { + error = do_unlink(tmp); + putname(tmp); + } + return error; +} + +static int do_symlink(const char * oldname, const char * newname) +{ + struct inode * dir; + const char * basename; + int namelen, error; + + error = dir_namei(newname,&namelen,&basename,NULL,&dir); + if (error) + return error; + if (!namelen) { + iput(dir); + return -ENOENT; + } + if (IS_RDONLY(dir)) { + iput(dir); + return -EROFS; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + return -EACCES; + } + if (!dir->i_op || !dir->i_op->symlink) { + iput(dir); + return -EPERM; + } + dir->i_count++; + down(&dir->i_sem); + error = dir->i_op->symlink(dir,basename,namelen,oldname); + up(&dir->i_sem); + iput(dir); + return error; +} + +asmlinkage int sys_symlink(const char * oldname, const char * newname) +{ + int error; + char * from, * to; + + error = getname(oldname,&from); + if (!error) { + error = getname(newname,&to); + if (!error) { + error = do_symlink(from,to); + putname(to); + } + putname(from); + } + return error; +} + +static int do_link(struct inode * oldinode, const char * newname) +{ + struct inode * dir; + const char * basename; + int namelen, error; + + error = dir_namei(newname,&namelen,&basename,NULL,&dir); + if (error) { + iput(oldinode); + return error; + } + if (!namelen) { + iput(oldinode); + iput(dir); + return -EPERM; + } + if (IS_RDONLY(dir)) { + iput(oldinode); + iput(dir); + return -EROFS; + } + if (dir->i_dev != oldinode->i_dev) { + iput(dir); + iput(oldinode); + return -EXDEV; + } + if (!permission(dir,MAY_WRITE | MAY_EXEC)) { + iput(dir); + iput(oldinode); + return -EACCES; + } + /* + * A link to an append-only or immutable file cannot be created + */ + if (IS_APPEND(oldinode) || IS_IMMUTABLE(oldinode)) { + iput(dir); + iput(oldinode); + return -EPERM; + } + if (!dir->i_op || !dir->i_op->link) { + iput(dir); + iput(oldinode); + return -EPERM; + } + dir->i_count++; + down(&dir->i_sem); + error = dir->i_op->link(oldinode, dir, basename, namelen); + up(&dir->i_sem); + iput(dir); + return error; +} + +asmlinkage int sys_link(const char * oldname, const char * newname) +{ + int error; + char * to; + struct inode * oldinode; + + error = namei(oldname, &oldinode); + if (error) + return error; + error = getname(newname,&to); + if (error) { + iput(oldinode); + return error; + } + error = do_link(oldinode,to); + putname(to); + return error; +} + +static int do_rename(const char * oldname, const char * newname) +{ + struct inode * old_dir, * new_dir; + const char * old_base, * new_base; + int old_len, new_len, error; + + error = dir_namei(oldname,&old_len,&old_base,NULL,&old_dir); + if (error) + return error; + if (!permission(old_dir,MAY_WRITE | MAY_EXEC)) { + iput(old_dir); + return -EACCES; + } + if (!old_len || (old_base[0] == '.' && + (old_len == 1 || (old_base[1] == '.' && + old_len == 2)))) { + iput(old_dir); + return -EPERM; + } + error = dir_namei(newname,&new_len,&new_base,NULL,&new_dir); + if (error) { + iput(old_dir); + return error; + } + if (!permission(new_dir,MAY_WRITE | MAY_EXEC)) { + iput(old_dir); + iput(new_dir); + return -EACCES; + } + if (!new_len || (new_base[0] == '.' && + (new_len == 1 || (new_base[1] == '.' && + new_len == 2)))) { + iput(old_dir); + iput(new_dir); + return -EPERM; + } + if (new_dir->i_dev != old_dir->i_dev) { + iput(old_dir); + iput(new_dir); + return -EXDEV; + } + if (IS_RDONLY(new_dir) || IS_RDONLY(old_dir)) { + iput(old_dir); + iput(new_dir); + return -EROFS; + } + /* + * A file cannot be removed from an append-only directory + */ + if (IS_APPEND(old_dir)) { + iput(old_dir); + iput(new_dir); + return -EPERM; + } + if (!old_dir->i_op || !old_dir->i_op->rename) { + iput(old_dir); + iput(new_dir); + return -EPERM; + } + new_dir->i_count++; + down(&new_dir->i_sem); + error = old_dir->i_op->rename(old_dir, old_base, old_len, + new_dir, new_base, new_len); + up(&new_dir->i_sem); + iput(new_dir); + return error; +} + +asmlinkage int sys_rename(const char * oldname, const char * newname) +{ + int error; + char * from, * to; + + error = getname(oldname,&from); + if (!error) { + error = getname(newname,&to); + if (!error) { + error = do_rename(from,to); + putname(to); + } + putname(from); + } + return error; +} diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile new file mode 100644 index 000000000..8610c95b1 --- /dev/null +++ b/fs/nfs/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the linux nfs-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= proc.o sock.o inode.o file.o dir.o \ + symlink.o mmap.o + +nfs.o: $(OBJS) + $(LD) -r -o nfs.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c new file mode 100644 index 000000000..62a3e0821 --- /dev/null +++ b/fs/nfs/dir.c @@ -0,0 +1,609 @@ +/* + * linux/fs/nfs/dir.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs directory handling functions + */ + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/nfs_fs.h> +#include <linux/fcntl.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/malloc.h> +#include <linux/mm.h> + +#include <asm/segment.h> /* for fs functions */ + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int nfs_dir_read(struct inode *, struct file *filp, char *buf, + int count); +static int nfs_readdir(struct inode *, struct file *, struct dirent *, int); +static int nfs_lookup(struct inode *dir, const char *name, int len, + struct inode **result); +static int nfs_create(struct inode *dir, const char *name, int len, int mode, + struct inode **result); +static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode); +static int nfs_rmdir(struct inode *dir, const char *name, int len); +static int nfs_unlink(struct inode *dir, const char *name, int len); +static int nfs_symlink(struct inode *inode, const char *name, int len, + const char *symname); +static int nfs_link(struct inode *oldinode, struct inode *dir, + const char *name, int len); +static int nfs_mknod(struct inode *dir, const char *name, int len, int mode, + int rdev); +static int nfs_rename(struct inode *old_dir, const char *old_name, + int old_len, struct inode *new_dir, const char *new_name, + int new_len); + +static struct file_operations nfs_dir_operations = { + NULL, /* lseek - default */ + nfs_dir_read, /* read - bad */ + NULL, /* write - bad */ + nfs_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* fsync */ +}; + +struct inode_operations nfs_dir_inode_operations = { + &nfs_dir_operations, /* default directory file-ops */ + nfs_create, /* create */ + nfs_lookup, /* lookup */ + nfs_link, /* link */ + nfs_unlink, /* unlink */ + nfs_symlink, /* symlink */ + nfs_mkdir, /* mkdir */ + nfs_rmdir, /* rmdir */ + nfs_mknod, /* mknod */ + nfs_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int nfs_dir_read(struct inode *inode, struct file *filp, char *buf, + int count) +{ + return -EISDIR; +} + +/* + * We need to do caching of directory entries to prevent an + * incredible amount of RPC traffic. Only the most recent open + * directory is cached. This seems sufficient for most purposes. + * Technically, we ought to flush the cache on close but this is + * not a problem in practice. + */ + +static int nfs_readdir(struct inode *inode, struct file *filp, + struct dirent *dirent, int count) +{ + static int c_dev = 0; + static int c_ino; + static int c_size; + static struct nfs_entry *c_entry = NULL; + + int result; + int i; + struct nfs_entry *entry; + + if (!inode || !S_ISDIR(inode->i_mode)) { + printk("nfs_readdir: inode is NULL or not a directory\n"); + return -EBADF; + } + + /* initialize cache memory if it hasn't been used before */ + + if (c_entry == NULL) { + i = sizeof (struct nfs_entry)*NFS_READDIR_CACHE_SIZE; + c_entry = (struct nfs_entry *) kmalloc(i, GFP_KERNEL); + for (i = 0; i < NFS_READDIR_CACHE_SIZE; i++) { + c_entry[i].name = (char *) kmalloc(NFS_MAXNAMLEN + 1, + GFP_KERNEL); + } + } + entry = NULL; + + /* try to find it in the cache */ + + if (inode->i_dev == c_dev && inode->i_ino == c_ino) { + for (i = 0; i < c_size; i++) { + if (filp->f_pos == c_entry[i].cookie) { + if (i == c_size - 1) { + if (c_entry[i].eof) + return 0; + } + else + entry = c_entry + i + 1; + break; + } + } + } + + /* if we didn't find it in the cache, revert to an nfs call */ + + if (!entry) { + result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode), + filp->f_pos, NFS_READDIR_CACHE_SIZE, c_entry); + if (result < 0) { + c_dev = 0; + return result; + } + if (result > 0) { + c_dev = inode->i_dev; + c_ino = inode->i_ino; + c_size = result; + entry = c_entry + 0; + } + } + + /* if we found it in the cache or from an nfs call, return results */ + + if (entry) { + i = strlen(entry->name); + memcpy_tofs(dirent->d_name, entry->name, i + 1); + put_fs_long(entry->fileid, &dirent->d_ino); + put_fs_word(i, &dirent->d_reclen); + filp->f_pos = entry->cookie; + return ROUND_UP(NAME_OFFSET(dirent)+i+1); + } + return 0; +} + +/* + * Lookup caching is a big win for performance but this is just + * a trial to see how well it works on a small scale. + * For example, bash does a lookup on ".." 13 times for each path + * element when running pwd. Yes, hard to believe but true. + * Try pwd in a filesystem mounted with noac. + * + * It trades a little cpu time and memory for a lot of network bandwidth. + * Since the cache is not hashed yet, it is a good idea not to make it too + * large because every lookup looks through the entire cache even + * though most of them will fail. + */ + +static struct nfs_lookup_cache_entry { + int dev; + int inode; + char filename[NFS_MAXNAMLEN + 1]; + struct nfs_fh fhandle; + struct nfs_fattr fattr; + int expiration_date; +} nfs_lookup_cache[NFS_LOOKUP_CACHE_SIZE]; + +static struct nfs_lookup_cache_entry *nfs_lookup_cache_index(struct inode *dir, + const char *filename) +{ + struct nfs_lookup_cache_entry *entry; + int i; + + for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) { + entry = nfs_lookup_cache + i; + if (entry->dev == dir->i_dev && entry->inode == dir->i_ino + && !strncmp(filename, entry->filename, NFS_MAXNAMLEN)) + return entry; + } + return NULL; +} + +static int nfs_lookup_cache_lookup(struct inode *dir, const char *filename, + struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + static int nfs_lookup_cache_in_use = 0; + + struct nfs_lookup_cache_entry *entry; + + if (!nfs_lookup_cache_in_use) { + memset(nfs_lookup_cache, 0, sizeof(nfs_lookup_cache)); + nfs_lookup_cache_in_use = 1; + } + if ((entry = nfs_lookup_cache_index(dir, filename))) { + if (jiffies > entry->expiration_date) { + entry->dev = 0; + return 0; + } + *fhandle = entry->fhandle; + *fattr = entry->fattr; + return 1; + } + return 0; +} + +static void nfs_lookup_cache_add(struct inode *dir, const char *filename, + struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + static int nfs_lookup_cache_pos = 0; + struct nfs_lookup_cache_entry *entry; + + /* compensate for bug in SGI NFS server */ + if (fattr->size == -1 || fattr->uid == -1 || fattr->gid == -1 + || fattr->atime.seconds == -1 || fattr->mtime.seconds == -1) + return; + if (!(entry = nfs_lookup_cache_index(dir, filename))) { + entry = nfs_lookup_cache + nfs_lookup_cache_pos++; + if (nfs_lookup_cache_pos == NFS_LOOKUP_CACHE_SIZE) + nfs_lookup_cache_pos = 0; + } + entry->dev = dir->i_dev; + entry->inode = dir->i_ino; + strcpy(entry->filename, filename); + entry->fhandle = *fhandle; + entry->fattr = *fattr; + entry->expiration_date = jiffies + (S_ISDIR(fattr->mode) + ? NFS_SERVER(dir)->acdirmax : NFS_SERVER(dir)->acregmax); +} + +static void nfs_lookup_cache_remove(struct inode *dir, struct inode *inode, + const char *filename) +{ + struct nfs_lookup_cache_entry *entry; + int dev; + int fileid; + int i; + + if (inode) { + dev = inode->i_dev; + fileid = inode->i_ino; + } + else if ((entry = nfs_lookup_cache_index(dir, filename))) { + dev = entry->dev; + fileid = entry->fattr.fileid; + } + else + return; + for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) { + entry = nfs_lookup_cache + i; + if (entry->dev == dev && entry->fattr.fileid == fileid) + entry->dev = 0; + } +} + +static void nfs_lookup_cache_refresh(struct inode *file, + struct nfs_fattr *fattr) +{ + struct nfs_lookup_cache_entry *entry; + int dev = file->i_dev; + int fileid = file->i_ino; + int i; + + for (i = 0; i < NFS_LOOKUP_CACHE_SIZE; i++) { + entry = nfs_lookup_cache + i; + if (entry->dev == dev && entry->fattr.fileid == fileid) + entry->fattr = *fattr; + } +} + +static int nfs_lookup(struct inode *dir, const char *__name, int len, + struct inode **result) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + char name[len > NFS_MAXNAMLEN? 1 : len+1]; + int error; + + *result = NULL; + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_lookup: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + memcpy(name,__name,len); + name[len] = '\0'; + if (len == 1 && name[0] == '.') { /* cheat for "." */ + *result = dir; + return 0; + } + if ((NFS_SERVER(dir)->flags & NFS_MOUNT_NOAC) + || !nfs_lookup_cache_lookup(dir, name, &fhandle, &fattr)) { + if ((error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), + name, &fhandle, &fattr))) { + iput(dir); + return error; + } + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + } + if (!(*result = nfs_fhget(dir->i_sb, &fhandle, &fattr))) { + iput(dir); + return -EACCES; + } + iput(dir); + return 0; +} + +static int nfs_create(struct inode *dir, const char *name, int len, int mode, + struct inode **result) +{ + struct nfs_sattr sattr; + struct nfs_fattr fattr; + struct nfs_fh fhandle; + int error; + + *result = NULL; + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_create: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + sattr.mode = mode; + sattr.uid = sattr.gid = sattr.size = (unsigned) -1; + sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + if ((error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), + name, &sattr, &fhandle, &fattr))) { + iput(dir); + return error; + } + if (!(*result = nfs_fhget(dir->i_sb, &fhandle, &fattr))) { + iput(dir); + return -EACCES; + } + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + iput(dir); + return 0; +} + +static int nfs_mknod(struct inode *dir, const char *name, int len, + int mode, int rdev) +{ + struct nfs_sattr sattr; + struct nfs_fattr fattr; + struct nfs_fh fhandle; + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_mknod: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + sattr.mode = mode; + sattr.uid = sattr.gid = (unsigned) -1; + if (S_ISCHR(mode) || S_ISBLK(mode)) + sattr.size = rdev; /* get out your barf bag */ + else + sattr.size = (unsigned) -1; + sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), + name, &sattr, &fhandle, &fattr); + if (!error) + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + iput(dir); + return error; +} + +static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode) +{ + struct nfs_sattr sattr; + struct nfs_fattr fattr; + struct nfs_fh fhandle; + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_mkdir: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + sattr.mode = mode; + sattr.uid = sattr.gid = sattr.size = (unsigned) -1; + sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir), + name, &sattr, &fhandle, &fattr); + if (!error) + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + iput(dir); + return error; +} + +static int nfs_rmdir(struct inode *dir, const char *name, int len) +{ + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_rmdir: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), name); + if (!error) + nfs_lookup_cache_remove(dir, NULL, name); + iput(dir); + return error; +} + +static int nfs_unlink(struct inode *dir, const char *name, int len) +{ + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_unlink: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), name); + if (!error) + nfs_lookup_cache_remove(dir, NULL, name); + iput(dir); + return error; +} + +static int nfs_symlink(struct inode *dir, const char *name, int len, + const char *symname) +{ + struct nfs_sattr sattr; + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_symlink: inode is NULL or not a directory\n"); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(dir); + return -ENAMETOOLONG; + } + if (strlen(symname) > NFS_MAXPATHLEN) { + iput(dir); + return -ENAMETOOLONG; + } + sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */ + sattr.uid = sattr.gid = sattr.size = (unsigned) -1; + sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir), + name, symname, &sattr); + iput(dir); + return error; +} + +static int nfs_link(struct inode *oldinode, struct inode *dir, + const char *name, int len) +{ + int error; + + if (!oldinode) { + printk("nfs_link: old inode is NULL\n"); + iput(oldinode); + iput(dir); + return -ENOENT; + } + if (!dir || !S_ISDIR(dir->i_mode)) { + printk("nfs_link: dir is NULL or not a directory\n"); + iput(oldinode); + iput(dir); + return -ENOENT; + } + if (len > NFS_MAXNAMLEN) { + iput(oldinode); + iput(dir); + return -ENAMETOOLONG; + } + error = nfs_proc_link(NFS_SERVER(oldinode), NFS_FH(oldinode), + NFS_FH(dir), name); + if (!error) + nfs_lookup_cache_remove(dir, oldinode, NULL); + iput(oldinode); + iput(dir); + return error; +} + +static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len, + struct inode *new_dir, const char *new_name, int new_len) +{ + int error; + + if (!old_dir || !S_ISDIR(old_dir->i_mode)) { + printk("nfs_rename: old inode is NULL or not a directory\n"); + iput(old_dir); + iput(new_dir); + return -ENOENT; + } + if (!new_dir || !S_ISDIR(new_dir->i_mode)) { + printk("nfs_rename: new inode is NULL or not a directory\n"); + iput(old_dir); + iput(new_dir); + return -ENOENT; + } + if (old_len > NFS_MAXNAMLEN || new_len > NFS_MAXNAMLEN) { + iput(old_dir); + iput(new_dir); + return -ENAMETOOLONG; + } + error = nfs_proc_rename(NFS_SERVER(old_dir), + NFS_FH(old_dir), old_name, + NFS_FH(new_dir), new_name); + if (!error) { + nfs_lookup_cache_remove(old_dir, NULL, old_name); + nfs_lookup_cache_remove(new_dir, NULL, new_name); + } + iput(old_dir); + iput(new_dir); + return error; +} + +/* + * Many nfs protocol calls return the new file attributes after + * an operation. Here we update the inode to reflect the state + * of the server's inode. + */ + +void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + int was_empty; + + if (!inode || !fattr) { + printk("nfs_refresh_inode: inode or fattr is NULL\n"); + return; + } + if (inode->i_ino != fattr->fileid) { + printk("nfs_refresh_inode: inode number mismatch\n"); + return; + } + was_empty = inode->i_mode == 0; + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + inode->i_size = fattr->size; + inode->i_blksize = fattr->blocksize; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = fattr->rdev; + else + inode->i_rdev = 0; + inode->i_blocks = fattr->blocks; + inode->i_atime = fattr->atime.seconds; + inode->i_mtime = fattr->mtime.seconds; + inode->i_ctime = fattr->ctime.seconds; + if (was_empty) { + if (S_ISREG(inode->i_mode)) + inode->i_op = &nfs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &nfs_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + else + inode->i_op = NULL; + } + nfs_lookup_cache_refresh(inode, fattr); +} + diff --git a/fs/nfs/file.c b/fs/nfs/file.c new file mode 100644 index 000000000..e71d29483 --- /dev/null +++ b/fs/nfs/file.c @@ -0,0 +1,237 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Changes Copyright (C) 1994 by Florian La Roche + * - Do not copy data too often around in the kernel. + * - In nfs_file_read the return value of kmalloc wasn't checked. + * - Put in a better version of read look-ahead buffering. Original idea + * and implementation by Wai S Kok elekokws@ee.nus.sg. + * + * Expire cache on write to a file by Wai S Kok (Oct 1994). + * + * nfs regular file handling functions + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/nfs_fs.h> +#include <linux/malloc.h> + +static int nfs_file_read(struct inode *, struct file *, char *, int); +static int nfs_file_write(struct inode *, struct file *, char *, int); +static int nfs_fsync(struct inode *, struct file *); + +static struct file_operations nfs_file_operations = { + NULL, /* lseek - default */ + nfs_file_read, /* read */ + nfs_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + nfs_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + nfs_fsync, /* fsync */ +}; + +struct inode_operations nfs_file_inode_operations = { + &nfs_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL /* truncate */ +}; + +/* Once data is inserted, it can only be deleted, if (in_use==0). */ +struct read_cache { + int in_use; /* currently in use? */ + unsigned long inode_num; /* inode number */ + off_t file_pos; /* file position */ + int len; /* size of data */ + unsigned long time; /* time, this entry was inserted */ + char * buf; /* data */ + int buf_size; /* size of buffer */ +}; + +#define READ_CACHE_SIZE 5 +#define EXPIRE_CACHE (HZ * 3) /* keep no longer than 3 seconds */ + +unsigned long num_requests = 0; +unsigned long num_cache_hits = 0; + +static int tail = 0; /* next cache slot to replace */ + +static struct read_cache cache[READ_CACHE_SIZE] = { + { 0, 0, -1, 0, 0, NULL, 0 }, + { 0, 0, -1, 0, 0, NULL, 0 }, + { 0, 0, -1, 0, 0, NULL, 0 }, + { 0, 0, -1, 0, 0, NULL, 0 }, + { 0, 0, -1, 0, 0, NULL, 0 } }; + +static int nfs_fsync(struct inode *inode, struct file *file) +{ + return 0; +} + +static int nfs_file_read(struct inode *inode, struct file *file, char *buf, + int count) +{ + int result, hunk, i, n, fs; + struct nfs_fattr fattr; + char *data; + off_t pos; + + if (!inode) { + printk("nfs_file_read: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("nfs_file_read: read from non-file, mode %07o\n", + inode->i_mode); + return -EINVAL; + } + pos = file->f_pos; + if (pos + count > inode->i_size) + count = inode->i_size - pos; + if (count <= 0) + return 0; + ++num_requests; + cli(); + for (i = 0; i < READ_CACHE_SIZE; i++) + if ((cache[i].inode_num == inode->i_ino) + && (cache[i].file_pos <= pos) + && (cache[i].file_pos + cache[i].len >= pos + count) + && (abs(jiffies - cache[i].time) <= EXPIRE_CACHE)) + break; + if (i < READ_CACHE_SIZE) { + ++cache[i].in_use; + sti(); + ++num_cache_hits; + memcpy_tofs(buf, cache[i].buf + pos - cache[i].file_pos, count); + --cache[i].in_use; + file->f_pos += count; + return count; + } + sti(); + n = NFS_SERVER(inode)->rsize; + for (i = 0; i < count - n; i += n) { + result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode), + pos, n, buf, &fattr, 1); + if (result < 0) + return result; + pos += result; + buf += result; + if (result < n) { + file->f_pos = pos; + nfs_refresh_inode(inode, &fattr); + return i + result; + } + } + fs = 0; + if (!(data = (char *)kmalloc(n, GFP_KERNEL))) { + data = buf; + fs = 1; + } + result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode), + pos, n, data, &fattr, fs); + if (result < 0) { + if (!fs) + kfree_s(data, n); + return result; + } + hunk = count - i; + if (result < hunk) + hunk = result; + if (fs) { + file->f_pos = pos + hunk; + nfs_refresh_inode(inode, &fattr); + return i + hunk; + } + memcpy_tofs(buf, data, hunk); + file->f_pos = pos + hunk; + nfs_refresh_inode(inode, &fattr); + cli(); + if (cache[tail].in_use == 0) { + if (cache[tail].buf) + kfree_s(cache[tail].buf, cache[tail].buf_size); + cache[tail].buf = data; + cache[tail].buf_size = n; + cache[tail].inode_num = inode->i_ino; + cache[tail].file_pos = pos; + cache[tail].len = result; + cache[tail].time = jiffies; + if (++tail >= READ_CACHE_SIZE) + tail = 0; + } else + kfree_s(data, n); + sti(); + return i + hunk; +} + +static int nfs_file_write(struct inode *inode, struct file *file, char *buf, + int count) +{ + int result, hunk, i, n, pos; + struct nfs_fattr fattr; + + if (!inode) { + printk("nfs_file_write: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("nfs_file_write: write to non-file, mode %07o\n", + inode->i_mode); + return -EINVAL; + } + if (count <= 0) + return 0; + + cli(); + /* If hit, cache is dirty and must be expired. */ + for (i = 0; i < READ_CACHE_SIZE; i++) + if(cache[i].inode_num == inode->i_ino) + cache[i].time -= EXPIRE_CACHE; + sti(); + + pos = file->f_pos; + if (file->f_flags & O_APPEND) + pos = inode->i_size; + n = NFS_SERVER(inode)->wsize; + for (i = 0; i < count; i += n) { + hunk = count - i; + if (hunk >= n) + hunk = n; + result = nfs_proc_write(NFS_SERVER(inode), NFS_FH(inode), + pos, hunk, buf, &fattr); + if (result < 0) + return result; + pos += hunk; + buf += hunk; + if (hunk < n) { + i += hunk; + break; + } + } + file->f_pos = pos; + nfs_refresh_inode(inode, &fattr); + return i; +} + diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c new file mode 100644 index 000000000..17f2cb6f0 --- /dev/null +++ b/fs/nfs/inode.c @@ -0,0 +1,240 @@ +/* + * linux/fs/nfs/inode.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs inode and superblock handling functions + */ + +#include <asm/system.h> +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/nfs_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> + +extern int close_fp(struct file *filp, unsigned int fd); + +static int nfs_notify_change(struct inode *, struct iattr *); +static void nfs_put_inode(struct inode *); +static void nfs_put_super(struct super_block *); +static void nfs_statfs(struct super_block *, struct statfs *); + +static struct super_operations nfs_sops = { + NULL, /* read inode */ + nfs_notify_change, /* notify change */ + NULL, /* write inode */ + nfs_put_inode, /* put inode */ + nfs_put_super, /* put superblock */ + NULL, /* write superblock */ + nfs_statfs, /* stat filesystem */ + NULL +}; + +static void nfs_put_inode(struct inode * inode) +{ + clear_inode(inode); +} + +void nfs_put_super(struct super_block *sb) +{ + /* No locks should be open on this, so 0 should be safe as a fd. */ + close_fp(sb->u.nfs_sb.s_server.file, 0); + lock_super(sb); + sb->s_dev = 0; + unlock_super(sb); +} + +/* + * The way this works is that the mount process passes a structure + * in the data argument which contains an open socket to the NFS + * server and the root file handle obtained from the server's mount + * daemon. We stash theses away in the private superblock fields. + * Later we can add other mount parameters like caching values. + */ + +struct super_block *nfs_read_super(struct super_block *sb, void *raw_data, + int silent) +{ + struct nfs_mount_data *data = (struct nfs_mount_data *) raw_data; + struct nfs_server *server; + unsigned int fd; + struct file *filp; + dev_t dev = sb->s_dev; + + if (!data) { + printk("nfs_read_super: missing data argument\n"); + sb->s_dev = 0; + return NULL; + } + fd = data->fd; + if (data->version != NFS_MOUNT_VERSION) { + printk("nfs warning: mount version %s than kernel\n", + data->version < NFS_MOUNT_VERSION ? "older" : "newer"); + } + if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) { + printk("nfs_read_super: invalid file descriptor\n"); + sb->s_dev = 0; + return NULL; + } + if (!S_ISSOCK(filp->f_inode->i_mode)) { + printk("nfs_read_super: not a socket\n"); + sb->s_dev = 0; + return NULL; + } + filp->f_count++; + lock_super(sb); + sb->s_blocksize = 1024; /* XXX */ + sb->s_blocksize_bits = 10; + sb->s_magic = NFS_SUPER_MAGIC; + sb->s_dev = dev; + sb->s_op = &nfs_sops; + server = &sb->u.nfs_sb.s_server; + server->file = filp; + server->lock = 0; + server->wait = NULL; + server->flags = data->flags; + server->rsize = data->rsize; + if (server->rsize <= 0) + server->rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + else if (server->rsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) + server->rsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + server->wsize = data->wsize; + if (server->wsize <= 0) + server->wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + else if (server->wsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) + server->wsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + server->timeo = data->timeo*HZ/10; + server->retrans = data->retrans; + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + strcpy(server->hostname, data->hostname); + sb->u.nfs_sb.s_root = data->root; + unlock_super(sb); + if (!(sb->s_mounted = nfs_fhget(sb, &data->root, NULL))) { + sb->s_dev = 0; + printk("nfs_read_super: get root inode failed\n"); + return NULL; + } + return sb; +} + +void nfs_statfs(struct super_block *sb, struct statfs *buf) +{ + int error; + struct nfs_fsinfo res; + + put_fs_long(NFS_SUPER_MAGIC, &buf->f_type); + error = nfs_proc_statfs(&sb->u.nfs_sb.s_server, &sb->u.nfs_sb.s_root, + &res); + if (error) { + printk("nfs_statfs: statfs error = %d\n", -error); + res.bsize = res.blocks = res.bfree = res.bavail = 0; + } + put_fs_long(res.bsize, &buf->f_bsize); + put_fs_long(res.blocks, &buf->f_blocks); + put_fs_long(res.bfree, &buf->f_bfree); + put_fs_long(res.bavail, &buf->f_bavail); + put_fs_long(0, &buf->f_files); + put_fs_long(0, &buf->f_ffree); + /* We should really try to interrogate the remote server to find + it's maximum name length here */ + put_fs_long(NAME_MAX, &buf->f_namelen); +} + +/* + * This is our own version of iget that looks up inodes by file handle + * instead of inode number. We use this technique instead of using + * the vfs read_inode function because there is no way to pass the + * file handle or current attributes into the read_inode function. + * We just have to be careful not to subvert iget's special handling + * of mount points. + */ + +struct inode *nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs_fattr newfattr; + int error; + struct inode *inode; + + if (!sb) { + printk("nfs_fhget: super block is NULL\n"); + return NULL; + } + if (!fattr) { + error = nfs_proc_getattr(&sb->u.nfs_sb.s_server, fhandle, + &newfattr); + if (error) { + printk("nfs_fhget: getattr error = %d\n", -error); + return NULL; + } + fattr = &newfattr; + } + if (!(inode = iget(sb, fattr->fileid))) { + printk("nfs_fhget: iget failed\n"); + return NULL; + } + if (inode->i_dev == sb->s_dev) { + if (inode->i_ino != fattr->fileid) { + printk("nfs_fhget: unexpected inode from iget\n"); + return inode; + } + *NFS_FH(inode) = *fhandle; + nfs_refresh_inode(inode, fattr); + } + return inode; +} + +int nfs_notify_change(struct inode *inode, struct iattr *attr) +{ + struct nfs_sattr sattr; + struct nfs_fattr fattr; + int error; + + if (attr->ia_valid & ATTR_MODE) + sattr.mode = attr->ia_mode; + else + sattr.mode = (unsigned) -1; + + if (attr->ia_valid & ATTR_UID) + sattr.uid = attr->ia_uid; + else + sattr.uid = (unsigned) -1; + + if (attr->ia_valid & ATTR_GID) + sattr.gid = attr->ia_gid; + else + sattr.gid = (unsigned) -1; + + if (attr->ia_valid & ATTR_SIZE) + sattr.size = S_ISREG(inode->i_mode) ? attr->ia_size : -1; + else + sattr.size = (unsigned) -1; + + if (attr->ia_valid & ATTR_MTIME) { + sattr.mtime.seconds = attr->ia_mtime; + sattr.mtime.useconds = 0; + } else + sattr.mtime.seconds = sattr.mtime.useconds = (unsigned) -1; + + if (attr->ia_valid & ATTR_ATIME) { + sattr.atime.seconds = attr->ia_atime; + sattr.atime.useconds = 0; + } else + sattr.atime.seconds = sattr.atime.useconds = (unsigned) -1; + + error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode), + &sattr, &fattr); + if (!error) + nfs_refresh_inode(inode, &fattr); + inode->i_dirt = 0; + return error; +} diff --git a/fs/nfs/mmap.c b/fs/nfs/mmap.c new file mode 100644 index 000000000..811176a69 --- /dev/null +++ b/fs/nfs/mmap.c @@ -0,0 +1,103 @@ +/* + * fs/nfs/mmap.c by Jon Tombs 15 Aug 1993 + * + * This code is from + * linux/mm/mmap.c which was written by obz, Linus and Eric + * and + * linux/mm/memory.c by Linus Torvalds and others + * + * Copyright (C) 1993 + * + */ +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/shm.h> +#include <linux/errno.h> +#include <linux/mman.h> +#include <linux/string.h> +#include <linux/malloc.h> +#include <linux/nfs_fs.h> + +#include <asm/segment.h> +#include <asm/system.h> + +/* + * Fill in the supplied page for mmap + */ +static unsigned long nfs_file_mmap_nopage(struct vm_area_struct * area, + unsigned long address, unsigned long page, int no_share) +{ + struct inode * inode = area->vm_inode; + unsigned int clear; + unsigned long tmp; + int n; + int i; + int pos; + struct nfs_fattr fattr; + + address &= PAGE_MASK; + pos = address - area->vm_start + area->vm_offset; + + clear = 0; + if (address + PAGE_SIZE > area->vm_end) { + clear = address + PAGE_SIZE - area->vm_end; + } + + n = NFS_SERVER(inode)->rsize; /* what we can read in one go */ + + for (i = 0; i < (PAGE_SIZE - clear); i += n) { + int hunk, result; + + hunk = PAGE_SIZE - i; + if (hunk > n) + hunk = n; + result = nfs_proc_read(NFS_SERVER(inode), NFS_FH(inode), + pos, hunk, (char *) (page + i), &fattr, 0); + if (result < 0) + break; + pos += result; + if (result < n) { + i += result; + break; + } + } + +#ifdef doweneedthishere + nfs_refresh_inode(inode, &fattr); +#endif + + tmp = page + PAGE_SIZE; + while (clear--) { + *(char *)--tmp = 0; + } + return page; +} +struct vm_operations_struct nfs_file_mmap = { + NULL, /* open */ + NULL, /* close */ + nfs_file_mmap_nopage, /* nopage */ + NULL, /* wppage */ + NULL, /* share */ + NULL, /* unmap */ +}; + + +/* This is used for a general mmap of a nfs file */ +int nfs_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) +{ + if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported now */ + return -EINVAL; + if (!inode->i_sb || !S_ISREG(inode->i_mode)) + return -EACCES; + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + + vma->vm_inode = inode; + inode->i_count++; + vma->vm_ops = &nfs_file_mmap; + return 0; +} diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c new file mode 100644 index 000000000..fd01dad99 --- /dev/null +++ b/fs/nfs/proc.c @@ -0,0 +1,931 @@ +/* + * linux/fs/nfs/proc.c + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * + * OS-independent nfs remote procedure call functions + * + * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers + * so at last we can have decent(ish) throughput off a + * Sun server. + * + * Coding optimized and cleaned up by Florian La Roche. + * Note: Error returns are optimized for NFS_OK, which isn't translated via + * nfs_stat_to_errno(), but happens to be already the right return code. + * + * FixMe: We ought to define a sensible small max size for + * things like getattr that are tiny packets and use the + * old get_free_page stuff with it. + * + * Also, the code currently doesn't check the size of the packet, when + * it decodes the packet. + * + * Feel free to fix it and mail me the diffs if it worries you. + */ + +/* + * Defining NFS_PROC_DEBUG causes a lookup of a file named + * "xyzzy" to toggle debugging. Just cd to an NFS-mounted + * filesystem and type 'ls xyzzy' to turn on debugging. + */ + +#if 0 +#define NFS_PROC_DEBUG +#endif + +#include <linux/config.h> +#include <linux/param.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/nfs_fs.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <asm/segment.h> + +#ifdef NFS_PROC_DEBUG + +static int proc_debug = 0; +#define PRINTK(format, args...) \ + do { \ + if (proc_debug) \ + printk(format , ## args); \ + } while (0) + +#else /* !NFS_PROC_DEBUG */ + +#define PRINTK(format, args...) do ; while (0) + +#endif /* !NFS_PROC_DEBUG */ + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +static int *nfs_rpc_header(int *p, int procedure, int ruid); +static int *nfs_rpc_verify(int *p); +static int nfs_stat_to_errno(int stat); + +/* + * Our memory allocation and release functions. + */ + +#define NFS_SLACK_SPACE 1024 /* Total overkill */ +/* !!! Be careful, this constant is now also used in sock.c... + We should easily convert to not using it anymore for most cases... */ + +static inline int *nfs_rpc_alloc(int size) +{ + int *i; + + while (!(i = (int *)kmalloc(size+NFS_SLACK_SPACE,GFP_NFS))) { + schedule(); + } + return i; +} + +static inline void nfs_rpc_free(int *p) +{ + kfree((void *)p); +} + +/* + * Here are a bunch of xdr encode/decode functions that convert + * between machine dependent and xdr data formats. + */ + +#define QUADLEN(len) (((len) + 3) >> 2) + +static inline int *xdr_encode_fhandle(int *p, struct nfs_fh *fhandle) +{ + *((struct nfs_fh *) p) = *fhandle; + return p + QUADLEN(sizeof(*fhandle)); +} + +static inline int *xdr_decode_fhandle(int *p, struct nfs_fh *fhandle) +{ + *fhandle = *((struct nfs_fh *) p); + return p + QUADLEN(sizeof(*fhandle)); +} + +static inline int *xdr_encode_string(int *p, const char *string) +{ + int len = strlen(string); + int quadlen = QUADLEN(len); + + p[quadlen] = 0; + *p++ = htonl(len); + memcpy(p, string, len); + return p + quadlen; +} + +static inline int *xdr_decode_string(int *p, char *string, unsigned int maxlen) +{ + unsigned int len = ntohl(*p++); + if (len > maxlen) + return NULL; + memcpy(string, p, len); + string[len] = '\0'; + return p + QUADLEN(len); +} + +static inline int *xdr_decode_string2(int *p, char **string, unsigned int *len, + unsigned int maxlen) +{ + *len = ntohl(*p++); + if (*len > maxlen) + return NULL; + *string = (char *) p; + return p + QUADLEN(*len); +} + + +static inline int *xdr_encode_data(int *p, char *data, int len) +{ + int quadlen = QUADLEN(len); + + p[quadlen] = 0; + *p++ = htonl(len); + memcpy_fromfs(p, data, len); + return p + quadlen; +} + +static inline int *xdr_decode_data(int *p, char *data, int *lenp, int maxlen, + int fs) +{ + unsigned len = *lenp = ntohl(*p++); + if (len > maxlen) + return NULL; + if (fs) + memcpy_tofs(data, p, len); + else + memcpy(data, p, len); + return p + QUADLEN(len); +} + +static int *xdr_decode_fattr(int *p, struct nfs_fattr *fattr) +{ + fattr->type = (enum nfs_ftype) ntohl(*p++); + fattr->mode = ntohl(*p++); + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + fattr->size = ntohl(*p++); + fattr->blocksize = ntohl(*p++); + fattr->rdev = ntohl(*p++); + fattr->blocks = ntohl(*p++); + fattr->fsid = ntohl(*p++); + fattr->fileid = ntohl(*p++); + fattr->atime.seconds = ntohl(*p++); + fattr->atime.useconds = ntohl(*p++); + fattr->mtime.seconds = ntohl(*p++); + fattr->mtime.useconds = ntohl(*p++); + fattr->ctime.seconds = ntohl(*p++); + fattr->ctime.useconds = ntohl(*p++); + return p; +} + +static int *xdr_encode_sattr(int *p, struct nfs_sattr *sattr) +{ + *p++ = htonl(sattr->mode); + *p++ = htonl(sattr->uid); + *p++ = htonl(sattr->gid); + *p++ = htonl(sattr->size); + *p++ = htonl(sattr->atime.seconds); + *p++ = htonl(sattr->atime.useconds); + *p++ = htonl(sattr->mtime.seconds); + *p++ = htonl(sattr->mtime.useconds); + return p; +} + +static int *xdr_decode_entry(int *p, struct nfs_entry *entry) +{ + entry->fileid = ntohl(*p++); + if (!(p = xdr_decode_string(p, entry->name, NFS_MAXNAMLEN))) + return NULL; + entry->cookie = ntohl(*p++); + entry->eof = 0; + return p; +} + +static int *xdr_decode_fsinfo(int *p, struct nfs_fsinfo *res) +{ + res->tsize = ntohl(*p++); + res->bsize = ntohl(*p++); + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); + return p; +} + +/* + * One function for each procedure in the NFS protocol. + */ + +int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call getattr\n"); + if (!(p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_GETATTR, ruid); + p = xdr_encode_fhandle(p, fhandle); + if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply getattr\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply getattr failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_sattr *sattr, struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call setattr\n"); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_SETATTR, ruid); + p = xdr_encode_fhandle(p, fhandle); + p = xdr_encode_sattr(p, sattr); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply setattr\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply setattr failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call lookup %s\n", name); +#ifdef NFS_PROC_DEBUG + if (!strcmp(name, "xyzzy")) + proc_debug = 1 - proc_debug; +#endif + if (!(p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_LOOKUP, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fhandle(p, fhandle); + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply lookup\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply lookup failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle, + int **p0, char **string, unsigned int *len, unsigned int maxlen) +{ + int *p; + int status, ruid = 0; + + PRINTK("NFS call readlink\n"); + if (!(*p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(*p0, NFSPROC_READLINK, ruid); + p = xdr_encode_fhandle(p, fhandle); + if ((status = nfs_rpc_call(server, *p0, p, server->rsize)) < 0) + return status; + if (!(p = nfs_rpc_verify(*p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + if (!(p = xdr_decode_string2(p, string, len, maxlen))) { + printk("nfs_proc_readlink: giant pathname\n"); + status = -errno_NFSERR_IO; + } + else /* status = 0, */ + PRINTK("NFS reply readlink\n"); + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply readlink failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + return status; +} + +int nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, + int offset, int count, char *data, struct nfs_fattr *fattr, int fs) +{ + int *p, *p0; + int status; + int ruid = 0; + int len; + + PRINTK("NFS call read %d @ %d\n", count, offset); + if (!(p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_READ, ruid); + p = xdr_encode_fhandle(p, fhandle); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); /* traditional, could be any value */ + if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fattr(p, fattr); + if (!(p = xdr_decode_data(p, data, &len, count, fs))) { + printk("nfs_proc_read: giant data size\n"); + status = -errno_NFSERR_IO; + } + else { + status = len; + PRINTK("NFS reply read %d\n", len); + } + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply read failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, + int offset, int count, char *data, struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call write %d @ %d\n", count, offset); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_WRITE, ruid); + p = xdr_encode_fhandle(p, fhandle); + *p++ = htonl(offset); /* traditional, could be any value */ + *p++ = htonl(offset); + *p++ = htonl(count); /* traditional, could be any value */ + p = xdr_encode_data(p, data, count); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply write\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply write failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir, + const char *name, struct nfs_sattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call create %s\n", name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_CREATE, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + p = xdr_encode_sattr(p, sattr); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fhandle(p, fhandle); + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply create\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply create failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call remove %s\n", name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_REMOVE, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + PRINTK("NFS reply remove\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply remove failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_rename(struct nfs_server *server, + struct nfs_fh *old_dir, const char *old_name, + struct nfs_fh *new_dir, const char *new_name) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call rename %s -> %s\n", old_name, new_name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_RENAME, ruid); + p = xdr_encode_fhandle(p, old_dir); + p = xdr_encode_string(p, old_name); + p = xdr_encode_fhandle(p, new_dir); + p = xdr_encode_string(p, new_name); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + PRINTK("NFS reply rename\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply rename failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fh *dir, const char *name) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call link %s\n", name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_LINK, ruid); + p = xdr_encode_fhandle(p, fhandle); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + PRINTK("NFS reply link\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply link failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, + const char *name, const char *path, struct nfs_sattr *sattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call symlink %s -> %s\n", name, path); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_SYMLINK, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + p = xdr_encode_string(p, path); + p = xdr_encode_sattr(p, sattr); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + PRINTK("NFS reply symlink\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply symlink failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir, + const char *name, struct nfs_sattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call mkdir %s\n", name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_MKDIR, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + p = xdr_encode_sattr(p, sattr); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fhandle(p, fhandle); + p = xdr_decode_fattr(p, fattr); + PRINTK("NFS reply mkdir\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply mkdir failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call rmdir %s\n", name); + if (!(p0 = nfs_rpc_alloc(server->wsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_RMDIR, ruid); + p = xdr_encode_fhandle(p, dir); + p = xdr_encode_string(p, name); + if ((status = nfs_rpc_call(server, p0, p, server->wsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + PRINTK("NFS reply rmdir\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply rmdir failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle, + int cookie, int count, struct nfs_entry *entry) +{ + int *p, *p0; + int status; + int ruid = 0; + int i; + int size; + int eof; + + PRINTK("NFS call readdir %d @ %d\n", count, cookie); + size = server->rsize; + if (!(p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_READDIR, ruid); + p = xdr_encode_fhandle(p, fhandle); + *p++ = htonl(cookie); + *p++ = htonl(size); + if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + for (i = 0; i < count && *p++; i++) { + if (!(p = xdr_decode_entry(p, entry++))) + break; + } + if (!p) { + printk("nfs_proc_readdir: giant filename\n"); + status = -errno_NFSERR_IO; + } + else { + eof = (i == count && !*p++ && *p++) + || (i < count && *p++); + if (eof && i) + entry[-1].eof = 1; + PRINTK("NFS reply readdir %d %s\n", i, + eof ? "eof" : ""); + status = i; + } + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply readdir failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *res) +{ + int *p, *p0; + int status; + int ruid = 0; + + PRINTK("NFS call statfs\n"); + if (!(p0 = nfs_rpc_alloc(server->rsize))) + return -EIO; +retry: + p = nfs_rpc_header(p0, NFSPROC_STATFS, ruid); + p = xdr_encode_fhandle(p, fhandle); + if ((status = nfs_rpc_call(server, p0, p, server->rsize)) < 0) { + nfs_rpc_free(p0); + return status; + } + if (!(p = nfs_rpc_verify(p0))) + status = -errno_NFSERR_IO; + else if ((status = ntohl(*p++)) == NFS_OK) { + p = xdr_decode_fsinfo(p, res); + PRINTK("NFS reply statfs\n"); + /* status = 0; */ + } + else { + if (!ruid && current->fsuid == 0 && current->uid != 0) { + ruid = 1; + goto retry; + } + PRINTK("NFS reply statfs failed = %d\n", status); + status = -nfs_stat_to_errno(status); + } + nfs_rpc_free(p0); + return status; +} + +/* + * Here are a few RPC-assist functions. + */ + +static int *nfs_rpc_header(int *p, int procedure, int ruid) +{ + int *p1, *p2; + int i; + static int xid = 0; + unsigned char *sys = (unsigned char *) system_utsname.nodename; + + if (xid == 0) { + xid = CURRENT_TIME; + xid ^= (sys[3]<<24) | (sys[2]<<16) | (sys[1]<<8) | sys[0]; + } + *p++ = htonl(++xid); + *p++ = htonl(RPC_CALL); + *p++ = htonl(RPC_VERSION); + *p++ = htonl(NFS_PROGRAM); + *p++ = htonl(NFS_VERSION); + *p++ = htonl(procedure); + *p++ = htonl(RPC_AUTH_UNIX); + p1 = p++; + *p++ = htonl(CURRENT_TIME); /* traditional, could be anything */ + p = xdr_encode_string(p, (char *) sys); + *p++ = htonl(ruid ? current->uid : current->fsuid); + *p++ = htonl(current->egid); + p2 = p++; + for (i = 0; i < 16 && i < NGROUPS && current->groups[i] != NOGROUP; i++) + *p++ = htonl(current->groups[i]); + *p2 = htonl(i); + *p1 = htonl((p - (p1 + 1)) << 2); + *p++ = htonl(RPC_AUTH_NULL); + *p++ = htonl(0); + return p; +} + +static int *nfs_rpc_verify(int *p) +{ + unsigned int n; + + p++; + if ((n = ntohl(*p++)) != RPC_REPLY) { + printk("nfs_rpc_verify: not an RPC reply: %d\n", n); + return NULL; + } + if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { + printk("nfs_rpc_verify: RPC call rejected: %d\n", n); + return NULL; + } + switch (n = ntohl(*p++)) { + case RPC_AUTH_NULL: case RPC_AUTH_UNIX: case RPC_AUTH_SHORT: + break; + default: + printk("nfs_rpc_verify: bad RPC authentication type: %d\n", n); + return NULL; + } + if ((n = ntohl(*p++)) > 400) { + printk("nfs_rpc_verify: giant auth size\n"); + return NULL; + } + p += QUADLEN(n); + if ((n = ntohl(*p++)) != RPC_SUCCESS) { + printk("nfs_rpc_verify: RPC call failed: %d\n", n); + return NULL; + } + return p; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ + +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, errno_NFSERR_IO }, + { NFSERR_NXIO, ENXIO }, + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, + { NFSERR_DQUOT, EDQUOT }, + { NFSERR_STALE, ESTALE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, EWFLUSH }, +#endif + { -1, EIO } +}; + +static int nfs_stat_to_errno(int stat) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + return nfs_errtbl[i].errno; +} + diff --git a/fs/nfs/sock.c b/fs/nfs/sock.c new file mode 100644 index 000000000..2455d938a --- /dev/null +++ b/fs/nfs/sock.c @@ -0,0 +1,242 @@ +/* + * linux/fs/nfs/sock.c + * + * Copyright (C) 1992, 1993 Rick Sladkey + * + * low-level nfs remote procedure call interface + * + * FIXES + * + * 2/7/94 James Bottomley and Jon Peatfield DAMTP, Cambridge University + * + * An xid mismatch no longer causes the request to be trashed. + * + * Peter Eriksson - incorrect XID used to confuse Linux + * Florian La Roche - use the correct max size, if reading a packet and + * also verify, if the whole packet has been read... + * more checks should be done in proc.c... + * + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/nfs_fs.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/fcntl.h> +#include <asm/segment.h> +#include <linux/in.h> +#include <linux/net.h> +#include <linux/mm.h> + +/* JEJB/JSP 2/7/94 + * this must match the value of NFS_SLACK_SPACE in linux/fs/nfs/proc.c + * ***FIXME*** should probably put this in nfs_fs.h */ +#define NFS_SLACK_SPACE 1024 + + +extern struct socket *socki_lookup(struct inode *inode); + +#define _S(nr) (1<<((nr)-1)) + +/* + * We violate some modularity principles here by poking around + * in some socket internals. Besides having to call socket + * functions from kernel-space instead of user space, the socket + * interface does not lend itself well to being cleanly called + * without a file descriptor. Since the nfs calls can run on + * behalf of any process, the superblock maintains a file pointer + * to the server socket. + */ + +static int do_nfs_rpc_call(struct nfs_server *server, int *start, int *end, int size) +{ + struct file *file; + struct inode *inode; + struct socket *sock; + unsigned short fs; + int result; + int xid; + int len; + select_table wait_table; + struct select_table_entry entry; + int (*select) (struct inode *, struct file *, int, select_table *); + int init_timeout, max_timeout; + int timeout; + int retrans; + int major_timeout_seen; + char *server_name; + int n; + int addrlen; + unsigned long old_mask; + /* JEJB/JSP 2/7/94 + * This is for a 4 byte recv of the xid only */ + int recv_xid; + + xid = start[0]; + len = ((char *) end) - ((char *) start); + file = server->file; + inode = file->f_inode; + select = file->f_op->select; + sock = socki_lookup(inode); + if (!sock) { + printk("nfs_rpc_call: socki_lookup failed\n"); + return -EBADF; + } + init_timeout = server->timeo; + max_timeout = NFS_MAX_RPC_TIMEOUT*HZ/10; + retrans = server->retrans; + major_timeout_seen = 0; + server_name = server->hostname; + old_mask = current->blocked; + current->blocked |= ~(_S(SIGKILL) +#if 0 + | _S(SIGSTOP) +#endif + | ((server->flags & NFS_MOUNT_INTR) + ? ((current->sigaction[SIGINT - 1].sa_handler == SIG_DFL + ? _S(SIGINT) : 0) + | (current->sigaction[SIGQUIT - 1].sa_handler == SIG_DFL + ? _S(SIGQUIT) : 0)) + : 0)); + fs = get_fs(); + set_fs(get_ds()); + for (n = 0, timeout = init_timeout; ; n++, timeout <<= 1) { + result = sock->ops->send(sock, (void *) start, len, 0, 0); + if (result < 0) { + printk("nfs_rpc_call: send error = %d\n", result); + break; + } + re_select: + wait_table.nr = 0; + wait_table.entry = &entry; + current->state = TASK_INTERRUPTIBLE; + if (!select(inode, file, SEL_IN, &wait_table) + && !select(inode, file, SEL_IN, NULL)) { + if (timeout > max_timeout) { + /* JEJB/JSP 2/7/94 + * This is useful to see if the system is + * hanging */ + printk("NFS max timeout reached on %s\n", + server_name); + timeout = max_timeout; + } + current->timeout = jiffies + timeout; + schedule(); + remove_wait_queue(entry.wait_address, &entry.wait); + current->state = TASK_RUNNING; + if (current->signal & ~current->blocked) { + current->timeout = 0; + result = -ERESTARTSYS; + break; + } + if (!current->timeout) { + if (n < retrans) + continue; + if (server->flags & NFS_MOUNT_SOFT) { + printk("NFS server %s not responding, " + "timed out\n", server_name); + result = -EIO; + break; + } + n = 0; + timeout = init_timeout; + init_timeout <<= 1; + if (!major_timeout_seen) { + printk("NFS server %s not responding, " + "still trying\n", server_name); + } + major_timeout_seen = 1; + continue; + } + else + current->timeout = 0; + } + else if (wait_table.nr) + remove_wait_queue(entry.wait_address, &entry.wait); + current->state = TASK_RUNNING; + addrlen = 0; + /* JEJB/JSP 2/7/94 + * Get the xid from the next packet using a peek, so keep it + * on the recv queue. If it is wrong, it will be some reply + * we don't now need, so discard it */ + result = sock->ops->recvfrom(sock, (void *)&recv_xid, + sizeof(recv_xid), 1, MSG_PEEK, + NULL, &addrlen); + if (result < 0) { + if (result == -EAGAIN) { +#if 0 + printk("nfs_rpc_call: bad select ready\n"); +#endif + goto re_select; + } + if (result == -ECONNREFUSED) { +#if 0 + printk("nfs_rpc_call: server playing coy\n"); +#endif + goto re_select; + } + if (result != -ERESTARTSYS) { + printk("nfs_rpc_call: recv error = %d\n", + -result); + } + break; + } + if (recv_xid == xid) { + if (major_timeout_seen) + printk("NFS server %s OK\n", server_name); + break; + } + /* JEJB/JSP 2/7/94 + * we have xid mismatch, so discard the packet and start + * again. What a hack! but I can't call recvfrom with + * a null buffer yet. */ + (void)sock->ops->recvfrom(sock, (void *)&recv_xid, + sizeof(recv_xid), 1, 0, NULL, + &addrlen); +#if 0 + printk("nfs_rpc_call: XID mismatch\n"); +#endif + goto re_select; + } + /* JEJB/JSP 2/7/94 + * + * we have the correct xid, so read into the correct place and + * return it + * + */ + result=sock->ops->recvfrom(sock, (void *)start, + size + 1024, 1, 0, NULL, + /* Here is NFS_SLACK_SPACE..., hack */ + &addrlen); + if (result < 0) { + printk("NFS: notice message: result=%d\n", result); + } else if (result < addrlen) { + printk("NFS: just caught a too small read memory size..., email to NET channel\n"); + printk("NFS: result=%d,addrlen=%d\n", result, addrlen); + result = -EIO; + } + current->blocked = old_mask; + set_fs(fs); + return result; +} + +/* + * For now we lock out other simultaneous nfs calls for the same filesystem + * because we are single-threaded and don't want to get mismatched + * RPC replies. + */ + +int nfs_rpc_call(struct nfs_server *server, int *start, int *end, int size) +{ + int result; + + while (server->lock) + sleep_on(&server->wait); + server->lock = 1; + result = do_nfs_rpc_call(server, start, end, size); + server->lock = 0; + wake_up(&server->wait); + return result; +} + diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c new file mode 100644 index 000000000..4cbe631c6 --- /dev/null +++ b/fs/nfs/symlink.c @@ -0,0 +1,116 @@ +/* + * linux/fs/nfs/symlink.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Optimization changes Copyright (C) 1994 Florian La Roche + * + * nfs symlink handling code + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/nfs_fs.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/string.h> + +static int nfs_readlink(struct inode *, char *, int); +static int nfs_follow_link(struct inode *, struct inode *, int, int, + struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations nfs_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + nfs_readlink, /* readlink */ + nfs_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int nfs_follow_link(struct inode *dir, struct inode *inode, + int flag, int mode, struct inode **res_inode) +{ + int error, *mem; + unsigned int len; + char *res, *res2; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput(inode); + iput(dir); + return -ELOOP; + } + error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem, + &res, &len, NFS_MAXPATHLEN); + if (error) { + iput(inode); + iput(dir); + kfree(mem); + return error; + } + while ((res2 = (char *) kmalloc(NFS_MAXPATHLEN + 1, GFP_NFS)) == NULL) { + schedule(); + } + memcpy(res2, res, len); + res2[len] = '\0'; + kfree(mem); + iput(inode); + current->link_count++; + error = open_namei(res2, flag, mode, res_inode, dir); + current->link_count--; + kfree_s(res2, NFS_MAXPATHLEN + 1); + return error; +} + +static int nfs_readlink(struct inode *inode, char *buffer, int buflen) +{ + int error, *mem; + unsigned int len; + char *res; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + if (buflen > NFS_MAXPATHLEN) + buflen = NFS_MAXPATHLEN; + error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem, + &res, &len, buflen); + iput(inode); + if (! error) { + memcpy_tofs(buffer, res, len); + put_fs_byte('\0', buffer + len); + error = len; + } + kfree(mem); + return error; +} diff --git a/fs/open.c b/fs/open.c new file mode 100644 index 000000000..ff95d375f --- /dev/null +++ b/fs/open.c @@ -0,0 +1,516 @@ +/* + * linux/fs/open.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/vfs.h> +#include <linux/types.h> +#include <linux/utime.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/tty.h> +#include <linux/time.h> + +#include <asm/segment.h> + +extern void fcntl_remove_locks(struct task_struct *, struct file *, unsigned int fd); + +asmlinkage int sys_ustat(int dev, struct ustat * ubuf) +{ + return -ENOSYS; +} + +asmlinkage int sys_statfs(const char * path, struct statfs * buf) +{ + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs)); + if (error) + return error; + error = namei(path,&inode); + if (error) + return error; + if (!inode->i_sb->s_op->statfs) { + iput(inode); + return -ENOSYS; + } + inode->i_sb->s_op->statfs(inode->i_sb, buf); + iput(inode); + return 0; +} + +asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf) +{ + struct inode * inode; + struct file * file; + int error; + + error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs)); + if (error) + return error; + if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + return -EBADF; + if (!(inode = file->f_inode)) + return -ENOENT; + if (!inode->i_sb->s_op->statfs) + return -ENOSYS; + inode->i_sb->s_op->statfs(inode->i_sb, buf); + return 0; +} + +asmlinkage int sys_truncate(const char * path, unsigned int length) +{ + struct inode * inode; + int error; + struct iattr newattrs; + + error = namei(path,&inode); + if (error) + return error; + if (S_ISDIR(inode->i_mode) || !permission(inode,MAY_WRITE)) { + iput(inode); + return -EACCES; + } + if (IS_RDONLY(inode)) { + iput(inode); + return -EROFS; + } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + iput(inode); + return -EPERM; + } + error = get_write_access(inode); + if (error) { + iput(inode); + return error; + } + inode->i_size = newattrs.ia_size = length; + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + newattrs.ia_ctime = newattrs.ia_mtime = CURRENT_TIME; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME; + inode->i_dirt = 1; + error = notify_change(inode, &newattrs); + put_write_access(inode); + iput(inode); + return error; +} + +asmlinkage int sys_ftruncate(unsigned int fd, unsigned int length) +{ + struct inode * inode; + struct file * file; + struct iattr newattrs; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + return -EBADF; + if (!(inode = file->f_inode)) + return -ENOENT; + if (S_ISDIR(inode->i_mode) || !(file->f_mode & 2)) + return -EACCES; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + inode->i_size = newattrs.ia_size = length; + if (inode->i_op && inode->i_op->truncate) + inode->i_op->truncate(inode); + newattrs.ia_ctime = newattrs.ia_mtime = CURRENT_TIME; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME; + inode->i_dirt = 1; + return notify_change(inode, &newattrs); +} + +/* If times==NULL, set access and modification to current time, + * must be owner or have write permission. + * Else, update from *times, must be owner or super user. + */ +asmlinkage int sys_utime(char * filename, struct utimbuf * times) +{ + struct inode * inode; + long actime,modtime; + int error; + unsigned int flags = 0; + struct iattr newattrs; + + error = namei(filename,&inode); + if (error) + return error; + if (IS_RDONLY(inode)) { + iput(inode); + return -EROFS; + } + /* Don't worry, the checks are done in inode_change_ok() */ + if (times) { + actime = get_fs_long((unsigned long *) ×->actime); + modtime = get_fs_long((unsigned long *) ×->modtime); + newattrs.ia_ctime = CURRENT_TIME; + flags = ATTR_ATIME_SET | ATTR_MTIME_SET; + } else { + if (!permission(inode,MAY_WRITE)) { + iput(inode); + return -EACCES; + } + actime = modtime = newattrs.ia_ctime = CURRENT_TIME; + } + newattrs.ia_atime = actime; + newattrs.ia_mtime = modtime; + newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME | flags; + inode->i_dirt = 1; + error = notify_change(inode, &newattrs); + iput(inode); + return error; +} + +/* + * access() needs to use the real uid/gid, not the effective uid/gid. + * We do this by temporarily setting fsuid/fsgid to the wanted values + */ +asmlinkage int sys_access(const char * filename, int mode) +{ + struct inode * inode; + int old_fsuid, old_fsgid; + int res; + + if (mode != (mode & S_IRWXO)) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; + old_fsuid = current->fsuid; + old_fsgid = current->fsgid; + current->fsuid = current->uid; + current->fsgid = current->gid; + res = namei(filename,&inode); + if (!res) { + if (!permission(inode, mode)) + res = -EACCES; + iput(inode); + } + current->fsuid = old_fsuid; + current->fsgid = old_fsgid; + return res; +} + +asmlinkage int sys_chdir(const char * filename) +{ + struct inode * inode; + int error; + + error = namei(filename,&inode); + if (error) + return error; + if (!S_ISDIR(inode->i_mode)) { + iput(inode); + return -ENOTDIR; + } + if (!permission(inode,MAY_EXEC)) { + iput(inode); + return -EACCES; + } + iput(current->fs->pwd); + current->fs->pwd = inode; + return (0); +} + +asmlinkage int sys_fchdir(unsigned int fd) +{ + struct inode * inode; + struct file * file; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + return -EBADF; + if (!(inode = file->f_inode)) + return -ENOENT; + if (!S_ISDIR(inode->i_mode)) + return -ENOTDIR; + if (!permission(inode,MAY_EXEC)) + return -EACCES; + iput(current->fs->pwd); + current->fs->pwd = inode; + inode->i_count++; + return (0); +} + +asmlinkage int sys_chroot(const char * filename) +{ + struct inode * inode; + int error; + + error = namei(filename,&inode); + if (error) + return error; + if (!S_ISDIR(inode->i_mode)) { + iput(inode); + return -ENOTDIR; + } + if (!fsuser()) { + iput(inode); + return -EPERM; + } + iput(current->fs->root); + current->fs->root = inode; + return (0); +} + +asmlinkage int sys_fchmod(unsigned int fd, mode_t mode) +{ + struct inode * inode; + struct file * file; + struct iattr newattrs; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + return -EBADF; + if (!(inode = file->f_inode)) + return -ENOENT; + if (IS_RDONLY(inode)) + return -EROFS; + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_ctime = CURRENT_TIME; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + inode->i_dirt = 1; + return notify_change(inode, &newattrs); +} + +asmlinkage int sys_chmod(const char * filename, mode_t mode) +{ + struct inode * inode; + int error; + struct iattr newattrs; + + error = namei(filename,&inode); + if (error) + return error; + if (IS_RDONLY(inode)) { + iput(inode); + return -EROFS; + } + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_ctime = CURRENT_TIME; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + inode->i_dirt = 1; + error = notify_change(inode, &newattrs); + iput(inode); + return error; +} + +asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) +{ + struct inode * inode; + struct file * file; + struct iattr newattrs; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + return -EBADF; + if (!(inode = file->f_inode)) + return -ENOENT; + if (IS_RDONLY(inode)) + return -EROFS; + if (user == (uid_t) -1) + user = inode->i_uid; + if (group == (gid_t) -1) + group = inode->i_gid; + newattrs.ia_mode = inode->i_mode; + newattrs.ia_uid = user; + newattrs.ia_gid = group; + newattrs.ia_ctime = CURRENT_TIME; + newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; + /* + * If the owner has been changed, remove the setuid bit + */ + if (user != inode->i_uid && (inode->i_mode & S_ISUID)) { + newattrs.ia_mode &= ~S_ISUID; + newattrs.ia_valid |= ATTR_MODE; + } + /* + * If the group has been changed, remove the setgid bit + */ + if (group != inode->i_gid && (inode->i_mode & S_ISGID)) { + newattrs.ia_mode &= ~S_ISGID; + newattrs.ia_valid |= ATTR_MODE; + } + inode->i_dirt = 1; + return notify_change(inode, &newattrs); +} + +asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) +{ + struct inode * inode; + int error; + struct iattr newattrs; + + error = lnamei(filename,&inode); + if (error) + return error; + if (IS_RDONLY(inode)) { + iput(inode); + return -EROFS; + } + if (user == (uid_t) -1) + user = inode->i_uid; + if (group == (gid_t) -1) + group = inode->i_gid; + newattrs.ia_mode = inode->i_mode; + newattrs.ia_uid = user; + newattrs.ia_gid = group; + newattrs.ia_ctime = CURRENT_TIME; + newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; + /* + * If the owner has been changed, remove the setuid bit + */ + if (user != inode->i_uid && (inode->i_mode & S_ISUID)) { + newattrs.ia_mode &= ~S_ISUID; + newattrs.ia_valid |= ATTR_MODE; + } + /* + * If the group has been changed, remove the setgid bit + */ + if (group != inode->i_gid && (inode->i_mode & S_ISGID)) { + newattrs.ia_mode &= ~S_ISGID; + newattrs.ia_valid |= ATTR_MODE; + } + inode->i_dirt = 1; + error = notify_change(inode, &newattrs); + iput(inode); + return(error); +} + +/* + * Note that while the flag value (low two bits) for sys_open means: + * 00 - read-only + * 01 - write-only + * 10 - read-write + * 11 - special + * it is changed into + * 00 - no permissions needed + * 01 - read-permission + * 10 - write-permission + * 11 - read-write + * for the internal routines (ie open_namei()/follow_link() etc). 00 is + * used by symlinks. + */ +int do_open(const char * filename,int flags,int mode) +{ + struct inode * inode; + struct file * f; + int flag,error,fd; + + for(fd=0 ; fd<NR_OPEN ; fd++) + if (!current->files->fd[fd]) + break; + if (fd>=NR_OPEN) + return -EMFILE; + FD_CLR(fd,¤t->files->close_on_exec); + f = get_empty_filp(); + if (!f) + return -ENFILE; + current->files->fd[fd] = f; + f->f_flags = flag = flags; + f->f_mode = (flag+1) & O_ACCMODE; + if (f->f_mode) + flag++; + if (flag & (O_TRUNC | O_CREAT)) + flag |= 2; + error = open_namei(filename,flag,mode,&inode,NULL); + if (!error && (f->f_mode & 2)) + error = get_write_access(inode); + if (error) { + current->files->fd[fd]=NULL; + f->f_count--; + return error; + } + + f->f_inode = inode; + f->f_pos = 0; + f->f_reada = 0; + f->f_op = NULL; + if (inode->i_op) + f->f_op = inode->i_op->default_file_ops; + if (f->f_op && f->f_op->open) { + error = f->f_op->open(inode,f); + if (error) { + if (f->f_mode & 2) put_write_access(inode); + iput(inode); + f->f_count--; + current->files->fd[fd]=NULL; + return error; + } + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + return (fd); +} + +asmlinkage int sys_open(const char * filename,int flags,int mode) +{ + char * tmp; + int error; + + error = getname(filename, &tmp); + if (error) + return error; + error = do_open(tmp,flags,mode); + putname(tmp); + return error; +} + +asmlinkage int sys_creat(const char * pathname, int mode) +{ + return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); +} + +int close_fp(struct file *filp, unsigned int fd) +{ + struct inode *inode; + + if (filp->f_count == 0) { + printk("VFS: Close: file count is 0\n"); + return 0; + } + inode = filp->f_inode; + if (inode) + fcntl_remove_locks(current, filp, fd); + if (filp->f_count > 1) { + filp->f_count--; + return 0; + } + if (filp->f_op && filp->f_op->release) + filp->f_op->release(inode,filp); + filp->f_count--; + filp->f_inode = NULL; + if (filp->f_mode & 2) put_write_access(inode); + iput(inode); + return 0; +} + +asmlinkage int sys_close(unsigned int fd) +{ + struct file * filp; + + if (fd >= NR_OPEN) + return -EBADF; + FD_CLR(fd, ¤t->files->close_on_exec); + if (!(filp = current->files->fd[fd])) + return -EBADF; + current->files->fd[fd] = NULL; + return (close_fp (filp, fd)); +} + +/* + * This routine simulates a hangup on the tty, to arrange that users + * are given clean terminals at login time. + */ +asmlinkage int sys_vhangup(void) +{ + if (!suser()) + return -EPERM; + /* If there is a controlling tty, hang it up */ + if (current->tty) + tty_vhangup(current->tty); + return 0; +} diff --git a/fs/pipe.c b/fs/pipe.c new file mode 100644 index 000000000..bc557888e --- /dev/null +++ b/fs/pipe.c @@ -0,0 +1,426 @@ +/* + * linux/fs/pipe.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/fcntl.h> +#include <linux/termios.h> + + +/* We don't use the head/tail construction any more. Now we use the start/len*/ +/* construction providing full use of PIPE_BUF (multiple of PAGE_SIZE) */ +/* Florian Coosmann (FGC) ^ current = 1 */ +/* Additionally, we now use locking technique. This prevents race condition */ +/* in case of paging and multiple read/write on the same pipe. (FGC) */ + + +static int pipe_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + int chars = 0, size = 0, read = 0; + char *pipebuf; + + if (filp->f_flags & O_NONBLOCK) { + if (PIPE_LOCK(*inode)) + return -EAGAIN; + if (PIPE_EMPTY(*inode)) + if (PIPE_WRITERS(*inode)) + return -EAGAIN; + else + return 0; + } else while (PIPE_EMPTY(*inode) || PIPE_LOCK(*inode)) { + if (PIPE_EMPTY(*inode)) { + if (!PIPE_WRITERS(*inode)) + return 0; + } + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + interruptible_sleep_on(&PIPE_WAIT(*inode)); + } + PIPE_LOCK(*inode)++; + while (count>0 && (size = PIPE_SIZE(*inode))) { + chars = PIPE_MAX_RCHUNK(*inode); + if (chars > count) + chars = count; + if (chars > size) + chars = size; + read += chars; + pipebuf = PIPE_BASE(*inode)+PIPE_START(*inode); + PIPE_START(*inode) += chars; + PIPE_START(*inode) &= (PIPE_BUF-1); + PIPE_LEN(*inode) -= chars; + count -= chars; + memcpy_tofs(buf, pipebuf, chars ); + buf += chars; + } + PIPE_LOCK(*inode)--; + wake_up_interruptible(&PIPE_WAIT(*inode)); + if (read) + return read; + if (PIPE_WRITERS(*inode)) + return -EAGAIN; + return 0; +} + +static int pipe_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + int chars = 0, free = 0, written = 0; + char *pipebuf; + + if (!PIPE_READERS(*inode)) { /* no readers */ + send_sig(SIGPIPE,current,0); + return -EPIPE; + } +/* if count <= PIPE_BUF, we have to make it atomic */ + if (count <= PIPE_BUF) + free = count; + else + free = 1; /* can't do it atomically, wait for any free space */ + while (count>0) { + while ((PIPE_FREE(*inode) < free) || PIPE_LOCK(*inode)) { + if (!PIPE_READERS(*inode)) { /* no readers */ + send_sig(SIGPIPE,current,0); + return written? :-EPIPE; + } + if (current->signal & ~current->blocked) + return written? :-ERESTARTSYS; + if (filp->f_flags & O_NONBLOCK) + return written? :-EAGAIN; + interruptible_sleep_on(&PIPE_WAIT(*inode)); + } + PIPE_LOCK(*inode)++; + while (count>0 && (free = PIPE_FREE(*inode))) { + chars = PIPE_MAX_WCHUNK(*inode); + if (chars > count) + chars = count; + if (chars > free) + chars = free; + pipebuf = PIPE_BASE(*inode)+PIPE_END(*inode); + written += chars; + PIPE_LEN(*inode) += chars; + count -= chars; + memcpy_fromfs(pipebuf, buf, chars ); + buf += chars; + } + PIPE_LOCK(*inode)--; + wake_up_interruptible(&PIPE_WAIT(*inode)); + free = 1; + } + return written; +} + +static int pipe_lseek(struct inode * inode, struct file * file, off_t offset, int orig) +{ + return -ESPIPE; +} + +static int pipe_readdir(struct inode * inode, struct file * file, struct dirent * de, int count) +{ + return -ENOTDIR; +} + +static int bad_pipe_rw(struct inode * inode, struct file * filp, char * buf, int count) +{ + return -EBADF; +} + +static int pipe_ioctl(struct inode *pino, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + int error; + + switch (cmd) { + case FIONREAD: + error = verify_area(VERIFY_WRITE, (void *) arg,4); + if (!error) + put_fs_long(PIPE_SIZE(*pino),(unsigned long *) arg); + return error; + default: + return -EINVAL; + } +} + +static int pipe_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +{ + switch (sel_type) { + case SEL_IN: + if (!PIPE_EMPTY(*inode) || !PIPE_WRITERS(*inode)) + return 1; + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_OUT: + if (!PIPE_FULL(*inode) || !PIPE_READERS(*inode)) + return 1; + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_EX: + if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) + return 1; + select_wait(&inode->i_wait,wait); + return 0; + } + return 0; +} + +/* + * Arggh. Why does SunOS have to have different select() behaviour + * for pipes and fifos? Hate-Hate-Hate. See difference in SEL_IN.. + */ +static int fifo_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +{ + switch (sel_type) { + case SEL_IN: + if (!PIPE_EMPTY(*inode)) + return 1; + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_OUT: + if (!PIPE_FULL(*inode) || !PIPE_READERS(*inode)) + return 1; + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_EX: + if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) + return 1; + select_wait(&inode->i_wait,wait); + return 0; + } + return 0; +} + +/* + * The 'connect_xxx()' functions are needed for named pipes when + * the open() code hasn't guaranteed a connection (O_NONBLOCK), + * and we need to act differently until we do get a writer.. + */ +static int connect_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + while (!PIPE_SIZE(*inode)) { + if (PIPE_WRITERS(*inode)) + break; + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + wake_up_interruptible(& PIPE_WAIT(*inode)); + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + interruptible_sleep_on(& PIPE_WAIT(*inode)); + } + filp->f_op = &read_fifo_fops; + return pipe_read(inode,filp,buf,count); +} + +static int connect_select(struct inode * inode, struct file * filp, int sel_type, select_table * wait) +{ + switch (sel_type) { + case SEL_IN: + if (!PIPE_EMPTY(*inode)) { + filp->f_op = &read_fifo_fops; + return 1; + } + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_OUT: + if (!PIPE_FULL(*inode)) + return 1; + select_wait(&PIPE_WAIT(*inode), wait); + return 0; + case SEL_EX: + if (!PIPE_READERS(*inode) || !PIPE_WRITERS(*inode)) + return 1; + select_wait(&inode->i_wait,wait); + return 0; + } + return 0; +} + +/* + * Ok, these three routines NOW keep track of readers/writers, + * Linus previously did it with inode->i_count checking. + */ +static void pipe_read_release(struct inode * inode, struct file * filp) +{ + PIPE_READERS(*inode)--; + wake_up_interruptible(&PIPE_WAIT(*inode)); +} + +static void pipe_write_release(struct inode * inode, struct file * filp) +{ + PIPE_WRITERS(*inode)--; + wake_up_interruptible(&PIPE_WAIT(*inode)); +} + +static void pipe_rdwr_release(struct inode * inode, struct file * filp) +{ + PIPE_READERS(*inode)--; + PIPE_WRITERS(*inode)--; + wake_up_interruptible(&PIPE_WAIT(*inode)); +} + +/* + * The file_operations structs are not static because they + * are also used in linux/fs/fifo.c to do operations on fifo's. + */ +struct file_operations connecting_fifo_fops = { + pipe_lseek, + connect_read, + bad_pipe_rw, + pipe_readdir, + connect_select, + pipe_ioctl, + NULL, /* no mmap on pipes.. surprise */ + NULL, /* no special open code */ + pipe_read_release, + NULL +}; + +struct file_operations read_fifo_fops = { + pipe_lseek, + pipe_read, + bad_pipe_rw, + pipe_readdir, + fifo_select, + pipe_ioctl, + NULL, /* no mmap on pipes.. surprise */ + NULL, /* no special open code */ + pipe_read_release, + NULL +}; + +struct file_operations write_fifo_fops = { + pipe_lseek, + bad_pipe_rw, + pipe_write, + pipe_readdir, + fifo_select, + pipe_ioctl, + NULL, /* mmap */ + NULL, /* no special open code */ + pipe_write_release, + NULL +}; + +struct file_operations rdwr_fifo_fops = { + pipe_lseek, + pipe_read, + pipe_write, + pipe_readdir, + fifo_select, + pipe_ioctl, + NULL, /* mmap */ + NULL, /* no special open code */ + pipe_rdwr_release, + NULL +}; + +struct file_operations read_pipe_fops = { + pipe_lseek, + pipe_read, + bad_pipe_rw, + pipe_readdir, + pipe_select, + pipe_ioctl, + NULL, /* no mmap on pipes.. surprise */ + NULL, /* no special open code */ + pipe_read_release, + NULL +}; + +struct file_operations write_pipe_fops = { + pipe_lseek, + bad_pipe_rw, + pipe_write, + pipe_readdir, + pipe_select, + pipe_ioctl, + NULL, /* mmap */ + NULL, /* no special open code */ + pipe_write_release, + NULL +}; + +struct file_operations rdwr_pipe_fops = { + pipe_lseek, + pipe_read, + pipe_write, + pipe_readdir, + pipe_select, + pipe_ioctl, + NULL, /* mmap */ + NULL, /* no special open code */ + pipe_rdwr_release, + NULL +}; + +struct inode_operations pipe_inode_operations = { + &rdwr_pipe_fops, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +asmlinkage int sys_pipe(unsigned long * fildes) +{ + struct inode * inode; + struct file * f[2]; + int fd[2]; + int i,j; + + j = verify_area(VERIFY_WRITE,fildes,8); + if (j) + return j; + for(j=0 ; j<2 ; j++) + if (!(f[j] = get_empty_filp())) + break; + if (j==1) + f[0]->f_count--; + if (j<2) + return -ENFILE; + j=0; + for(i=0;j<2 && i<NR_OPEN;i++) + if (!current->files->fd[i]) { + current->files->fd[ fd[j]=i ] = f[j]; + j++; + } + if (j==1) + current->files->fd[fd[0]]=NULL; + if (j<2) { + f[0]->f_count--; + f[1]->f_count--; + return -EMFILE; + } + if (!(inode=get_pipe_inode())) { + current->files->fd[fd[0]] = NULL; + current->files->fd[fd[1]] = NULL; + f[0]->f_count--; + f[1]->f_count--; + return -ENFILE; + } + f[0]->f_inode = f[1]->f_inode = inode; + f[0]->f_pos = f[1]->f_pos = 0; + f[0]->f_flags = O_RDONLY; + f[0]->f_op = &read_pipe_fops; + f[0]->f_mode = 1; /* read */ + f[1]->f_flags = O_WRONLY; + f[1]->f_op = &write_pipe_fops; + f[1]->f_mode = 2; /* write */ + put_fs_long(fd[0],0+fildes); + put_fs_long(fd[1],1+fildes); + return 0; +} diff --git a/fs/proc/Makefile b/fs/proc/Makefile new file mode 100644 index 000000000..71c62433c --- /dev/null +++ b/fs/proc/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for the linux proc-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= inode.o root.o base.o mem.o link.o fd.o array.o kmsg.o net.o + +proc.o: $(OBJS) + $(LD) -r -o proc.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/proc/array.c b/fs/proc/array.c new file mode 100644 index 000000000..6fd7bccbe --- /dev/null +++ b/fs/proc/array.c @@ -0,0 +1,598 @@ +/* + * linux/fs/proc/array.c + * + * Copyright (C) 1992 by Linus Torvalds + * based on ideas by Darren Senn + * + * Fixes: + * Michael. K. Johnson: stat,statm extensions. + * <johnsonm@stolaf.edu> + * + * Pauline Middelink : Made cmdline,envline only break at '\0's, to + * make sure SET_PROCTITLE works. Also removed + * bad '!' which forced address recalculation for + * EVERY character on the current page. + * <middelin@polyware.iaf.nl> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> + +#include <asm/segment.h> +#include <asm/io.h> + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +#ifdef CONFIG_DEBUG_MALLOC +int get_malloc(char * buffer); +#endif + +static int read_core(struct inode * inode, struct file * file,char * buf, int count) +{ + unsigned long p = file->f_pos; + int read; + int count1; + char * pnt; + struct user dump; + + memset(&dump, 0, sizeof(struct user)); + dump.magic = CMAGIC; + dump.u_dsize = high_memory >> 12; + + if (count < 0) + return -EINVAL; + if (p >= high_memory + PAGE_SIZE) + return 0; + if (count > high_memory + PAGE_SIZE - p) + count = high_memory + PAGE_SIZE - p; + read = 0; + + if (p < sizeof(struct user) && count > 0) { + count1 = count; + if (p + count1 > sizeof(struct user)) + count1 = sizeof(struct user)-p; + pnt = (char *) &dump + p; + memcpy_tofs(buf,(void *) pnt, count1); + buf += count1; + p += count1; + count -= count1; + read += count1; + } + + while (p < 2*PAGE_SIZE && count > 0) { + put_fs_byte(0,buf); + buf++; + p++; + count--; + read++; + } + memcpy_tofs(buf,(void *) (p - PAGE_SIZE),count); + read += count; + file->f_pos += read; + return read; +} + +static struct file_operations proc_kcore_operations = { + NULL, /* lseek */ + read_core, +}; + +struct inode_operations proc_kcore_inode_operations = { + &proc_kcore_operations, +}; + +static int get_loadavg(char * buffer) +{ + int a, b, c; + + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + return sprintf(buffer,"%d.%02d %d.%02d %d.%02d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c)); +} + +static int get_kstat(char * buffer) +{ + int i, len; + unsigned sum = 0; + + for (i = 0 ; i < 16 ; i++) + sum += kstat.interrupts[i]; + len = sprintf(buffer, + "cpu %u %u %u %lu\n" + "disk %u %u %u %u\n" + "page %u %u\n" + "swap %u %u\n" + "intr %u", + kstat.cpu_user, + kstat.cpu_nice, + kstat.cpu_system, + jiffies - (kstat.cpu_user + kstat.cpu_nice + kstat.cpu_system), + kstat.dk_drive[0], + kstat.dk_drive[1], + kstat.dk_drive[2], + kstat.dk_drive[3], + kstat.pgpgin, + kstat.pgpgout, + kstat.pswpin, + kstat.pswpout, + sum); + for (i = 0 ; i < 16 ; i++) + len += sprintf(buffer + len, " %u", kstat.interrupts[i]); + len += sprintf(buffer + len, + "\nctxt %u\n" + "btime %lu\n", + kstat.context_swtch, + xtime.tv_sec - jiffies / HZ); + return len; +} + + +static int get_uptime(char * buffer) +{ + unsigned long uptime; + unsigned long idle; + + uptime = jiffies; + idle = task[0]->utime + task[0]->stime; + return sprintf(buffer,"%lu.%02lu %lu.%02lu\n", + uptime / HZ, + uptime % HZ, + idle / HZ, + idle % HZ); +} + +static int get_meminfo(char * buffer) +{ + struct sysinfo i; + + si_meminfo(&i); + si_swapinfo(&i); + return sprintf(buffer, " total: used: free: shared: buffers:\n" + "Mem: %8lu %8lu %8lu %8lu %8lu\n" + "Swap: %8lu %8lu %8lu\n", + i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, + i.totalswap, i.totalswap-i.freeswap, i.freeswap); +} + +static int get_version(char * buffer) +{ + extern char *linux_banner; + + strcpy(buffer, linux_banner); + return strlen(buffer); +} + +static struct task_struct ** get_task(pid_t pid) +{ + struct task_struct ** p; + + p = task; + while (++p < task+NR_TASKS) { + if (*p && (*p)->pid == pid) + return p; + } + return NULL; +} + +static unsigned long get_phys_addr(struct task_struct ** p, unsigned long ptr) +{ + unsigned long page; + + if (!p || !*p || ptr >= TASK_SIZE) + return 0; + page = *PAGE_DIR_OFFSET((*p)->tss.cr3,ptr); + if (!(page & PAGE_PRESENT)) + return 0; + page &= PAGE_MASK; + page += PAGE_PTR(ptr); + page = *(unsigned long *) page; + if (!(page & PAGE_PRESENT)) + return 0; + page &= PAGE_MASK; + page += ptr & ~PAGE_MASK; + return page; +} + +static int get_array(struct task_struct ** p, unsigned long start, unsigned long end, char * buffer) +{ + unsigned long addr; + int size = 0, result = 0; + char c; + + if (start >= end) + return result; + for (;;) { + addr = get_phys_addr(p, start); + if (!addr) + goto ready; + do { + c = *(char *) addr; + if (!c) + result = size; + if (size < PAGE_SIZE) + buffer[size++] = c; + else + goto ready; + addr++; + start++; + if (!c && start >= end) + goto ready; + } while (addr & ~PAGE_MASK); + } +ready: + /* remove the trailing blanks, used to fill out argv,envp space */ + while (result>0 && buffer[result-1]==' ') + result--; + return result; +} + +static int get_env(int pid, char * buffer) +{ + struct task_struct ** p = get_task(pid); + + if (!p || !*p) + return 0; + return get_array(p, (*p)->mm->env_start, (*p)->mm->env_end, buffer); +} + +static int get_arg(int pid, char * buffer) +{ + struct task_struct ** p = get_task(pid); + + if (!p || !*p) + return 0; + return get_array(p, (*p)->mm->arg_start, (*p)->mm->arg_end, buffer); +} + +static unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ebp, eip; + unsigned long stack_page; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack_page = p->kernel_stack_page; + if (!stack_page) + return 0; + ebp = p->tss.ebp; + do { + if (ebp < stack_page || ebp >= 4092+stack_page) + return 0; + eip = *(unsigned long *) (ebp+4); + if ((void *)eip != sleep_on && + (void *)eip != interruptible_sleep_on) + return eip; + ebp = *(unsigned long *) ebp; + } while (count++ < 16); + return 0; +} + +#define KSTK_EIP(stack) (((unsigned long *)stack)[1019]) +#define KSTK_ESP(stack) (((unsigned long *)stack)[1022]) + +static int get_stat(int pid, char * buffer) +{ + struct task_struct ** p = get_task(pid); + unsigned long sigignore=0, sigcatch=0, bit=1, wchan; + unsigned long vsize, eip, esp; + int i,tty_pgrp; + char state; + + if (!p || !*p) + return 0; + if ((*p)->state < 0 || (*p)->state > 5) + state = '.'; + else + state = "RSDZTD"[(*p)->state]; + eip = esp = 0; + vsize = (*p)->kernel_stack_page; + if (vsize) { + eip = KSTK_EIP(vsize); + esp = KSTK_ESP(vsize); + vsize = (*p)->mm->brk - (*p)->mm->start_code + PAGE_SIZE-1; + if (esp) + vsize += TASK_SIZE - esp; + } + wchan = get_wchan(*p); + for(i=0; i<32; ++i) { + switch((int) (*p)->sigaction[i].sa_handler) { + case 1: sigignore |= bit; break; + case 0: break; + default: sigcatch |= bit; + } bit <<= 1; + } + if ((*p)->tty) + tty_pgrp = (*p)->tty->pgrp; + else + tty_pgrp = -1; + return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ +%lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %u %lu %lu %lu %lu %lu %lu \ +%lu %lu %lu %lu\n", + pid, + (*p)->comm, + state, + (*p)->p_pptr->pid, + (*p)->pgrp, + (*p)->session, + (*p)->tty ? (*p)->tty->device : 0, + tty_pgrp, + (*p)->flags, + (*p)->mm->min_flt, + (*p)->mm->cmin_flt, + (*p)->mm->maj_flt, + (*p)->mm->cmaj_flt, + (*p)->utime, + (*p)->stime, + (*p)->cutime, + (*p)->cstime, + (*p)->counter, /* this is the kernel priority --- + subtract 30 in your user-level program. */ + (*p)->priority, /* this is the nice value --- + subtract 15 in your user-level program. */ + (*p)->timeout, + (*p)->it_real_value, + (*p)->start_time, + vsize, + (*p)->mm->rss, /* you might want to shift this left 3 */ + (*p)->rlim[RLIMIT_RSS].rlim_cur, + (*p)->mm->start_code, + (*p)->mm->end_code, + (*p)->mm->start_stack, + esp, + eip, + (*p)->signal, + (*p)->blocked, + sigignore, + sigcatch, + wchan); +} + +static int get_statm(int pid, char * buffer) +{ + struct task_struct ** p = get_task(pid); + int i, tpag; + int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0; + unsigned long ptbl, *buf, *pte, *pagedir, map_nr; + + if (!p || !*p) + return 0; + tpag = (*p)->mm->end_code / PAGE_SIZE; + if ((*p)->state != TASK_ZOMBIE) { + pagedir = (unsigned long *) (*p)->tss.cr3; + for (i = 0; i < 0x300; ++i) { + if ((ptbl = pagedir[i]) == 0) { + tpag -= PTRS_PER_PAGE; + continue; + } + buf = (unsigned long *)(ptbl & PAGE_MASK); + for (pte = buf; pte < (buf + PTRS_PER_PAGE); ++pte) { + if (*pte != 0) { + ++size; + if (*pte & 1) { + ++resident; + if (tpag > 0) + ++trs; + else + ++drs; + if (i >= 15 && i < 0x2f0) { + ++lrs; + if (*pte & 0x40) + ++dt; + else + --drs; + } + map_nr = MAP_NR(*pte); + if (map_nr < (high_memory / PAGE_SIZE) && mem_map[map_nr] > 1) + ++share; + } + } + --tpag; + } + } + } + return sprintf(buffer,"%d %d %d %d %d %d %d\n", + size, resident, share, trs, lrs, drs, dt); +} + +static int get_maps(int pid, char *buf) +{ + int sz = 0; + struct task_struct **p = get_task(pid); + struct vm_area_struct *map; + + if (!p || !*p) + return 0; + + for(map = (*p)->mm->mmap; map != NULL; map = map->vm_next) { + char str[7], *cp = str; + int flags; + int end = sz + 80; /* Length of line */ + dev_t dev; + unsigned long ino; + + flags = map->vm_flags; + + *cp++ = flags & VM_READ ? 'r' : '-'; + *cp++ = flags & VM_WRITE ? 'w' : '-'; + *cp++ = flags & VM_EXEC ? 'x' : '-'; + *cp++ = flags & VM_SHARED ? 's' : 'p'; + *cp++ = 0; + + if (end >= PAGE_SIZE) { + sprintf(buf+sz, "...\n"); + break; + } + + if (map->vm_inode != NULL) { + dev = map->vm_inode->i_dev; + ino = map->vm_inode->i_ino; + } else { + dev = 0; + ino = 0; + } + + sz += sprintf(buf+sz, "%08lx-%08lx %s %08lx %02x:%02x %lu\n", + map->vm_start, map->vm_end, str, map->vm_offset, + MAJOR(dev),MINOR(dev), ino); + if (sz > end) { + printk("get_maps: end(%d) < sz(%d)\n", end, sz); + break; + } + } + + return sz; +} + +extern int get_module_list(char *); +extern int get_device_list(char *); +extern int get_filesystem_list(char *); +extern int get_ksyms_list(char *); +extern int get_irq_list(char *); +extern int get_dma_list(char *); + +static int get_root_array(char * page, int type) +{ + switch (type) { + case PROC_LOADAVG: + return get_loadavg(page); + + case PROC_UPTIME: + return get_uptime(page); + + case PROC_MEMINFO: + return get_meminfo(page); + + case PROC_VERSION: + return get_version(page); + +#ifdef CONFIG_DEBUG_MALLOC + case PROC_MALLOC: + return get_malloc(page); +#endif + + case PROC_MODULES: + return get_module_list(page); + + case PROC_STAT: + return get_kstat(page); + + case PROC_DEVICES: + return get_device_list(page); + + case PROC_INTERRUPTS: + return get_irq_list(page); + + case PROC_FILESYSTEMS: + return get_filesystem_list(page); + + case PROC_KSYMS: + return get_ksyms_list(page); + + case PROC_DMA: + return get_dma_list(page); + } + return -EBADF; +} + +static int get_process_array(char * page, int pid, int type) +{ + switch (type) { + case PROC_PID_ENVIRON: + return get_env(pid, page); + case PROC_PID_CMDLINE: + return get_arg(pid, page); + case PROC_PID_STAT: + return get_stat(pid, page); + case PROC_PID_STATM: + return get_statm(pid, page); + case PROC_PID_MAPS: + return get_maps(pid, page); + } + return -EBADF; +} + + +static inline int fill_array(char * page, int pid, int type) +{ + if (pid) + return get_process_array(page, pid, type); + return get_root_array(page, type); +} + +static int array_read(struct inode * inode, struct file * file,char * buf, int count) +{ + unsigned long page; + int length; + int end; + unsigned int type, pid; + + if (count < 0) + return -EINVAL; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + type = inode->i_ino; + pid = type >> 16; + type &= 0x0000ffff; + length = fill_array((char *) page, pid, type); + if (length < 0) { + free_page(page); + return length; + } + if (file->f_pos >= length) { + free_page(page); + return 0; + } + if (count + file->f_pos > length) + count = length - file->f_pos; + end = count + file->f_pos; + memcpy_tofs(buf, (char *) page + file->f_pos, count); + free_page(page); + file->f_pos = end; + return count; +} + +static struct file_operations proc_array_operations = { + NULL, /* array_lseek */ + array_read, + NULL, /* array_write */ + NULL, /* array_readdir */ + NULL, /* array_select */ + NULL, /* array_ioctl */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +struct inode_operations proc_array_inode_operations = { + &proc_array_operations, /* default base directory file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; diff --git a/fs/proc/base.c b/fs/proc/base.c new file mode 100644 index 000000000..3dcf0189b --- /dev/null +++ b/fs/proc/base.c @@ -0,0 +1,155 @@ +/* + * linux/fs/proc/base.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc base directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> + +static int proc_readbase(struct inode *, struct file *, struct dirent *, int); +static int proc_lookupbase(struct inode *,const char *,int,struct inode **); + +static struct file_operations proc_base_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ + proc_readbase, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +/* + * proc directories can do almost nothing.. + */ +struct inode_operations proc_base_inode_operations = { + &proc_base_operations, /* default base directory file-ops */ + NULL, /* create */ + proc_lookupbase, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct proc_dir_entry base_dir[] = { + { PROC_PID_INO, 1, "." }, + { PROC_ROOT_INO, 2, ".." }, + { PROC_PID_MEM, 3, "mem" }, + { PROC_PID_CWD, 3, "cwd" }, + { PROC_PID_ROOT, 4, "root" }, + { PROC_PID_EXE, 3, "exe" }, + { PROC_PID_FD, 2, "fd" }, + { PROC_PID_ENVIRON, 7, "environ" }, + { PROC_PID_CMDLINE, 7, "cmdline" }, + { PROC_PID_STAT, 4, "stat" }, + { PROC_PID_STATM, 5, "statm" }, + { PROC_PID_MAPS, 4, "maps" } +}; + +#define NR_BASE_DIRENTRY ((sizeof (base_dir))/(sizeof (base_dir[0]))) + +int proc_match(int len,const char * name,struct proc_dir_entry * de) +{ + if (!de || !de->low_ino) + return 0; + /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ + if (!len && (de->name[0]=='.') && (de->name[1]=='\0')) + return 1; + if (de->namelen != len) + return 0; + return !memcmp(name, de->name, len); +} + +static int proc_lookupbase(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + unsigned int pid, ino; + int i; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + ino = dir->i_ino; + pid = ino >> 16; + i = NR_BASE_DIRENTRY; + while (i-- > 0 && !proc_match(len,name,base_dir+i)) + /* nothing */; + if (i < 0) { + iput(dir); + return -ENOENT; + } + if (base_dir[i].low_ino == 1) + ino = 1; + else + ino = (pid << 16) + base_dir[i].low_ino; + for (i = 0 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == pid) + break; + if (!pid || i >= NR_TASKS) { + iput(dir); + return -ENOENT; + } + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -ENOENT; + } + iput(dir); + return 0; +} + +static int proc_readbase(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + struct proc_dir_entry * de; + unsigned int pid, ino; + int i,j; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + ino = inode->i_ino; + pid = ino >> 16; + for (i = 0 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == pid) + break; + if (!pid || i >= NR_TASKS) + return 0; + if (((unsigned) filp->f_pos) < NR_BASE_DIRENTRY) { + de = base_dir + filp->f_pos; + filp->f_pos++; + i = de->namelen; + ino = de->low_ino; + if (ino != 1) + ino |= (pid << 16); + put_fs_long(ino, &dirent->d_ino); + put_fs_word(i,&dirent->d_reclen); + put_fs_byte(0,i+dirent->d_name); + j = i; + while (i--) + put_fs_byte(de->name[i], i+dirent->d_name); + return j; + } + return 0; +} diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 000000000..954540871 --- /dev/null +++ b/fs/proc/fd.c @@ -0,0 +1,180 @@ +/* + * linux/fs/proc/fd.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc fd directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> + +static int proc_readfd(struct inode *, struct file *, struct dirent *, int); +static int proc_lookupfd(struct inode *,const char *,int,struct inode **); + +static struct file_operations proc_fd_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ + proc_readfd, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +/* + * proc directories can do almost nothing.. + */ +struct inode_operations proc_fd_inode_operations = { + &proc_fd_operations, /* default base directory file-ops */ + NULL, /* create */ + proc_lookupfd, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int proc_lookupfd(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + unsigned int ino, pid, fd, c; + struct task_struct * p; + struct super_block * sb; + int i; + + *result = NULL; + ino = dir->i_ino; + pid = ino >> 16; + ino &= 0x0000ffff; + ino -= 7; + if (!dir) + return -ENOENT; + sb = dir->i_sb; + if (!pid || ino || !S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + if (!len || (name[0] == '.' && (len == 1 || + (name[1] == '.' && len == 2)))) { + if (len < 2) { + *result = dir; + return 0; + } + if (!(*result = iget(sb,(pid << 16)+2))) { + iput(dir); + return -ENOENT; + } + iput(dir); + return 0; + } + iput(dir); + fd = 0; + while (len-- > 0) { + c = *name - '0'; + name++; + if (c > 9) { + fd = 0xfffff; + break; + } + fd *= 10; + fd += c; + if (fd & 0xffff0000) { + fd = 0xfffff; + break; + } + } + for (i = 0 ; i < NR_TASKS ; i++) + if ((p = task[i]) && p->pid == pid) + break; + if (!pid || i >= NR_TASKS) + return -ENOENT; + + if (fd >= NR_OPEN || !p->files->fd[fd] || !p->files->fd[fd]->f_inode) + return -ENOENT; + + ino = (pid << 16) + 0x100 + fd; + + if (!(*result = iget(sb,ino))) + return -ENOENT; + return 0; +} + +static int proc_readfd(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + struct task_struct * p; + unsigned int fd, pid, ino; + int i,j; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + ino = inode->i_ino; + pid = ino >> 16; + ino &= 0x0000ffff; + ino -= 7; + if (ino) + return 0; + while (1) { + fd = filp->f_pos; + filp->f_pos++; + if (fd < 2) { + i = j = fd+1; + if (!fd) + fd = inode->i_ino; + else + fd = (inode->i_ino & 0xffff0000) | 2; + put_fs_long(fd, &dirent->d_ino); + put_fs_word(i, &dirent->d_reclen); + put_fs_byte(0, i+dirent->d_name); + while (i--) + put_fs_byte('.', i+dirent->d_name); + return j; + } + fd -= 2; + for (i = 1 ; i < NR_TASKS ; i++) + if ((p = task[i]) && p->pid == pid) + break; + if (i >= NR_TASKS) + return 0; + if (fd >= NR_OPEN) + break; + + if (!p->files->fd[fd] || !p->files->fd[fd]->f_inode) + continue; + + j = 10; + i = 1; + while (fd >= j) { + j *= 10; + i++; + } + j = i; + ino = (pid << 16) + 0x100 + fd; + + put_fs_long(ino, &dirent->d_ino); + put_fs_word(i, &dirent->d_reclen); + put_fs_byte(0, i+dirent->d_name); + while (i--) { + put_fs_byte('0'+(fd % 10), i+dirent->d_name); + fd /= 10; + } + return j; + } + return 0; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c new file mode 100644 index 000000000..0d0848b33 --- /dev/null +++ b/fs/proc/inode.c @@ -0,0 +1,191 @@ +/* + * linux/fs/proc/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/locks.h> +#include <linux/limits.h> + +#include <asm/system.h> +#include <asm/segment.h> + +void proc_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; +} + +void proc_put_super(struct super_block *sb) +{ + lock_super(sb); + sb->s_dev = 0; + unlock_super(sb); +} + +static struct super_operations proc_sops = { + proc_read_inode, + NULL, + proc_write_inode, + proc_put_inode, + proc_put_super, + NULL, + proc_statfs, + NULL +}; + +struct super_block *proc_read_super(struct super_block *s,void *data, + int silent) +{ + lock_super(s); + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = PROC_SUPER_MAGIC; + s->s_op = &proc_sops; + unlock_super(s); + if (!(s->s_mounted = iget(s,PROC_ROOT_INO))) { + s->s_dev = 0; + printk("get root inode failed\n"); + return NULL; + } + return s; +} + +void proc_statfs(struct super_block *sb, struct statfs *buf) +{ + put_fs_long(PROC_SUPER_MAGIC, &buf->f_type); + put_fs_long(PAGE_SIZE/sizeof(long), &buf->f_bsize); + put_fs_long(0, &buf->f_blocks); + put_fs_long(0, &buf->f_bfree); + put_fs_long(0, &buf->f_bavail); + put_fs_long(0, &buf->f_files); + put_fs_long(0, &buf->f_ffree); + put_fs_long(NAME_MAX, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ +} + +void proc_read_inode(struct inode * inode) +{ + unsigned long ino, pid; + struct task_struct * p; + int i; + + inode->i_op = NULL; + inode->i_mode = 0; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_nlink = 1; + inode->i_size = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_blocks = 0; + inode->i_blksize = 1024; + ino = inode->i_ino; + pid = ino >> 16; + p = task[0]; + for (i = 0; i < NR_TASKS ; i++) + if ((p = task[i]) && (p->pid == pid)) + break; + if (!p || i >= NR_TASKS) + return; + if (ino == PROC_ROOT_INO) { + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_nlink = 2; + for (i = 1 ; i < NR_TASKS ; i++) + if (task[i]) + inode->i_nlink++; + inode->i_op = &proc_root_inode_operations; + return; + } + + /* files within /proc/net */ + if ((ino >= PROC_NET_UNIX) && (ino < PROC_NET_LAST)) { + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_op = &proc_net_inode_operations; + return; + } + + if (!pid) { + switch (ino) { + case PROC_KMSG: + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_op = &proc_kmsg_inode_operations; + break; + case PROC_NET: + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_nlink = 2; + inode->i_op = &proc_net_inode_operations; + break; + case PROC_KCORE: + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_op = &proc_kcore_inode_operations; + inode->i_size = high_memory + PAGE_SIZE; + break; + default: + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_op = &proc_array_inode_operations; + break; + } + return; + } + ino &= 0x0000ffff; + inode->i_uid = p->euid; + inode->i_gid = p->egid; + switch (ino) { + case PROC_PID_INO: + inode->i_nlink = 4; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_op = &proc_base_inode_operations; + return; + case PROC_PID_MEM: + inode->i_op = &proc_mem_inode_operations; + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + return; + case PROC_PID_CWD: + case PROC_PID_ROOT: + case PROC_PID_EXE: + inode->i_op = &proc_link_inode_operations; + inode->i_size = 64; + inode->i_mode = S_IFLNK | S_IRWXU; + return; + case PROC_PID_FD: + inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; + inode->i_op = &proc_fd_inode_operations; + inode->i_nlink = 2; + return; + case PROC_PID_ENVIRON: + case PROC_PID_CMDLINE: + case PROC_PID_STAT: + case PROC_PID_STATM: + case PROC_PID_MAPS: + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_op = &proc_array_inode_operations; + return; + } + switch (ino >> 8) { + case PROC_PID_FD_DIR: + ino &= 0xff; + if (ino >= NR_OPEN || !p->files->fd[ino]) + return; + inode->i_op = &proc_link_inode_operations; + inode->i_size = 64; + inode->i_mode = S_IFLNK; + if (p->files->fd[ino]->f_mode & 1) + inode->i_mode |= S_IRUSR | S_IXUSR; + if (p->files->fd[ino]->f_mode & 2) + inode->i_mode |= S_IWUSR | S_IXUSR; + return; + } + return; +} + +void proc_write_inode(struct inode * inode) +{ + inode->i_dirt=0; +} diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c new file mode 100644 index 000000000..812ee3dd5 --- /dev/null +++ b/fs/proc/kmsg.c @@ -0,0 +1,76 @@ +/* + * linux/fs/proc/kmsg.c + * + * Copyright (C) 1992 by Linus Torvalds + * + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> + +#include <asm/segment.h> +#include <asm/io.h> + +extern unsigned long log_size; +extern struct wait_queue * log_wait; + +asmlinkage int sys_syslog(int type, char * bug, int count); + +static int kmsg_open(struct inode * inode, struct file * file) +{ + return sys_syslog(1,NULL,0); +} + +static void kmsg_release(struct inode * inode, struct file * file) +{ + (void) sys_syslog(0,NULL,0); +} + +static int kmsg_read(struct inode * inode, struct file * file,char * buf, int count) +{ + return sys_syslog(2,buf,count); +} + +static int kmsg_select(struct inode *inode, struct file *file, int sel_type, select_table * wait) +{ + if (sel_type != SEL_IN) + return 0; + if (log_size) + return 1; + select_wait(&log_wait, wait); + return 0; +} + + +static struct file_operations proc_kmsg_operations = { + NULL, /* kmsg_lseek */ + kmsg_read, + NULL, /* kmsg_write */ + NULL, /* kmsg_readdir */ + kmsg_select, /* kmsg_select */ + NULL, /* kmsg_ioctl */ + NULL, /* mmap */ + kmsg_open, + kmsg_release, + NULL /* can't fsync */ +}; + +struct inode_operations proc_kmsg_inode_operations = { + &proc_kmsg_operations, /* default base directory file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; diff --git a/fs/proc/link.c b/fs/proc/link.c new file mode 100644 index 000000000..769014f46 --- /dev/null +++ b/fs/proc/link.c @@ -0,0 +1,195 @@ +/* + * linux/fs/proc/link.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * /proc link-file handling code + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> + +static int proc_readlink(struct inode *, char *, int); +static int proc_follow_link(struct inode *, struct inode *, int, int, + struct inode **); +static int proc_fd_dupf(struct inode * inode, struct file * f); + +#define PLAN9_SEMANTICS + +/* + * links can't do much... + */ +static struct file_operations proc_fd_link_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + proc_fd_dupf, /* very special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +struct inode_operations proc_link_inode_operations = { + &proc_fd_link_operations,/* file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + proc_readlink, /* readlink */ + proc_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +/* + * This open routine is somewhat of a hack.... what we are doing is + * looking up the file structure of the newly opened proc fd file, and + * replacing it with the actual file structure of the process's file + * descriptor. This allows plan 9 semantics, so that the returned + * file descriptor is an dup of the target file descriptor. + */ +static int proc_fd_dupf(struct inode * inode, struct file * f) +{ + unsigned int pid, ino; + int i, fd; + struct task_struct * p; + struct file *new_f; + + for(fd=0 ; fd<NR_OPEN ; fd++) + if (current->files->fd[fd] == f) + break; + if (fd>=NR_OPEN) + return -ENOENT; /* should never happen */ + + ino = inode->i_ino; + pid = ino >> 16; + ino &= 0x0000ffff; + + for (i = 0 ; i < NR_TASKS ; i++) + if ((p = task[i]) && p->pid == pid) + break; + + if ((i >= NR_TASKS) || + ((ino >> 8) != 1) || !(new_f = p->files->fd[ino & 0x0ff])) + return -ENOENT; + + if (new_f->f_mode && !f->f_mode && 3) + return -EPERM; + + new_f->f_count++; + current->files->fd[fd] = new_f; + if (!--f->f_count) + iput(f->f_inode); + return 0; +} + +static int proc_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + unsigned int pid, ino; + struct task_struct * p; + struct inode * new_inode; + int i; + + *res_inode = NULL; + if (dir) + iput(dir); + if (!inode) + return -ENOENT; + if (!permission(inode, MAY_EXEC)) { + iput(inode); + return -EACCES; + } + ino = inode->i_ino; + pid = ino >> 16; + ino &= 0x0000ffff; + for (i = 0 ; i < NR_TASKS ; i++) + if ((p = task[i]) && p->pid == pid) + break; + if (i >= NR_TASKS) { + iput(inode); + return -ENOENT; + } + new_inode = NULL; + switch (ino) { + case PROC_PID_CWD: + new_inode = p->fs->pwd; + break; + case PROC_PID_ROOT: + new_inode = p->fs->root; + break; + case PROC_PID_EXE: { + struct vm_area_struct * vma = p->mm->mmap; + while (vma) { + if (vma->vm_flags & VM_EXECUTABLE) { + new_inode = vma->vm_inode; + break; + } + vma = vma->vm_next; + } + break; + } + default: + switch (ino >> 8) { + case PROC_PID_FD_DIR: + ino &= 0xff; + if (ino < NR_OPEN && p->files->fd[ino]) { +#ifdef PLAN9_SEMANTICS + if (dir) { + *res_inode = inode; + return 0; + } +#endif + new_inode = p->files->fd[ino]->f_inode; + } + break; + } + } + iput(inode); + if (!new_inode) + return -ENOENT; + *res_inode = new_inode; + new_inode->i_count++; + return 0; +} + +static int proc_readlink(struct inode * inode, char * buffer, int buflen) +{ + int i; + unsigned int dev,ino; + char buf[64]; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + i = proc_follow_link(NULL, inode, 0, 0, &inode); + if (i) + return i; + if (!inode) + return -EIO; + dev = inode->i_dev; + ino = inode->i_ino; + iput(inode); + i = sprintf(buf,"[%04x]:%u", dev, ino); + if (buflen > i) + buflen = i; + i = 0; + while (i < buflen) + put_fs_byte(buf[i++],buffer++); + return i; +} diff --git a/fs/proc/mem.c b/fs/proc/mem.c new file mode 100644 index 000000000..ae043bb0a --- /dev/null +++ b/fs/proc/mem.c @@ -0,0 +1,260 @@ +/* + * linux/fs/proc/mem.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/segment.h> +#include <asm/io.h> + +/* + * mem_write isn't really a good idea right now. It needs + * to check a lot more: if the process we try to write to + * dies in the middle right now, mem_write will overwrite + * kernel memory.. This disables it altogether. + */ +#define mem_write NULL + +static int mem_read(struct inode * inode, struct file * file,char * buf, int count) +{ + unsigned long addr, pid, cr3; + char *tmp; + unsigned long pte, page; + int i; + + if (count < 0) + return -EINVAL; + pid = inode->i_ino; + pid >>= 16; + cr3 = 0; + for (i = 1 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == pid) { + cr3 = task[i]->tss.cr3; + break; + } + if (!cr3) + return -EACCES; + addr = file->f_pos; + tmp = buf; + while (count > 0) { + if (current->signal & ~current->blocked) + break; + pte = *PAGE_DIR_OFFSET(cr3,addr); + if (!(pte & PAGE_PRESENT)) + break; + pte &= PAGE_MASK; + pte += PAGE_PTR(addr); + page = *(unsigned long *) pte; + if (!(page & 1)) + break; + page &= PAGE_MASK; + page += addr & ~PAGE_MASK; + i = PAGE_SIZE-(addr & ~PAGE_MASK); + if (i > count) + i = count; + memcpy_tofs(tmp,(void *) page,i); + addr += i; + tmp += i; + count -= i; + } + file->f_pos = addr; + return tmp-buf; +} + +#ifndef mem_write + +static int mem_write(struct inode * inode, struct file * file,char * buf, int count) +{ + unsigned long addr, pid, cr3; + char *tmp; + unsigned long pte, page; + int i; + + if (count < 0) + return -EINVAL; + addr = file->f_pos; + pid = inode->i_ino; + pid >>= 16; + cr3 = 0; + for (i = 1 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == pid) { + cr3 = task[i]->tss.cr3; + break; + } + if (!cr3) + return -EACCES; + tmp = buf; + while (count > 0) { + if (current->signal & ~current->blocked) + break; + pte = *PAGE_DIR_OFFSET(cr3,addr); + if (!(pte & PAGE_PRESENT)) + break; + pte &= PAGE_MASK; + pte += PAGE_PTR(addr); + page = *(unsigned long *) pte; + if (!(page & PAGE_PRESENT)) + break; + if (!(page & 2)) { + do_wp_page(0,addr,current,0); + continue; + } + page &= PAGE_MASK; + page += addr & ~PAGE_MASK; + i = PAGE_SIZE-(addr & ~PAGE_MASK); + if (i > count) + i = count; + memcpy_fromfs((void *) page,tmp,i); + addr += i; + tmp += i; + count -= i; + } + file->f_pos = addr; + if (tmp != buf) + return tmp-buf; + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + return 0; +} + +#endif + +static int mem_lseek(struct inode * inode, struct file * file, off_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + return file->f_pos; + case 1: + file->f_pos += offset; + return file->f_pos; + default: + return -EINVAL; + } +} + +int +mem_mmap(struct inode * inode, struct file * file, + struct vm_area_struct * vma) +{ + unsigned long *src_table, *dest_table, stmp, dtmp, cr3; + struct vm_area_struct *src_vma = 0; + int i; + + /* Get the source's task information */ + + cr3 = 0; + for (i = 1 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == (inode->i_ino >> 16)) { + cr3 = task[i]->tss.cr3; + src_vma = task[i]->mm->mmap; + break; + } + + if (!cr3) + return -EACCES; + +/* Ensure that we have a valid source area. (Has to be mmap'ed and + have valid page information.) We can't map shared memory at the + moment because working out the vm_area_struct & nattach stuff isn't + worth it. */ + + stmp = vma->vm_offset; + while (stmp < vma->vm_offset + (vma->vm_end - vma->vm_start)) { + while (src_vma && stmp > src_vma->vm_end) + src_vma = src_vma->vm_next; + if (!src_vma || (src_vma->vm_flags & VM_SHM)) + return -EINVAL; + + src_table = PAGE_DIR_OFFSET(cr3, stmp); + if (!*src_table) + return -EINVAL; + src_table = (unsigned long *)((*src_table & PAGE_MASK) + PAGE_PTR(stmp)); + if (!*src_table) + return -EINVAL; + + if (stmp < src_vma->vm_start) { + if (!(src_vma->vm_flags & VM_GROWSDOWN)) + return -EINVAL; + if (src_vma->vm_end - stmp > current->rlim[RLIMIT_STACK].rlim_cur) + return -EINVAL; + } + stmp += PAGE_SIZE; + } + + src_vma = task[i]->mm->mmap; + stmp = vma->vm_offset; + dtmp = vma->vm_start; + + while (dtmp < vma->vm_end) { + while (src_vma && stmp > src_vma->vm_end) + src_vma = src_vma->vm_next; + + src_table = PAGE_DIR_OFFSET(cr3, stmp); + src_table = (unsigned long *)((*src_table & PAGE_MASK) + PAGE_PTR(stmp)); + + dest_table = PAGE_DIR_OFFSET(current->tss.cr3, dtmp); + + if (!*dest_table) { + *dest_table = get_free_page(GFP_KERNEL); + if (!*dest_table) { oom(current); *dest_table=BAD_PAGE; } + else *dest_table |= PAGE_TABLE; + } + + dest_table = (unsigned long *)((*dest_table & PAGE_MASK) + PAGE_PTR(dtmp)); + + if (!(*src_table & PAGE_PRESENT)) + do_no_page(src_vma, stmp, PAGE_PRESENT); + + if ((vma->vm_flags & VM_WRITE) && !(*src_table & PAGE_RW)) + do_wp_page(src_vma, stmp, PAGE_RW | PAGE_PRESENT); + + *src_table |= PAGE_DIRTY; + *dest_table = *src_table; + mem_map[MAP_NR(*src_table)]++; + + stmp += PAGE_SIZE; + dtmp += PAGE_SIZE; + } + + invalidate(); + return 0; +} + +static struct file_operations proc_mem_operations = { + mem_lseek, + mem_read, + mem_write, + NULL, /* mem_readdir */ + NULL, /* mem_select */ + NULL, /* mem_ioctl */ + mem_mmap, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +struct inode_operations proc_mem_inode_operations = { + &proc_mem_operations, /* default base directory file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; diff --git a/fs/proc/net.c b/fs/proc/net.c new file mode 100644 index 000000000..601f590d3 --- /dev/null +++ b/fs/proc/net.c @@ -0,0 +1,300 @@ +/* + * linux/fs/proc/net.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * gjh 3/'93 heim@peanuts.informatik.uni-tuebingen.de (Gerald J. Heim) + * most of this file is stolen from base.c + * it works, but you shouldn't use it as a guideline + * for new proc-fs entries. once i'll make it better. + * fvk 3/'93 waltje@uwalt.nl.mugnet.org (Fred N. van Kempen) + * cleaned up the whole thing, moved "net" specific code to + * the NET kernel layer (where it belonged in the first place). + * Michael K. Johnson (johnsonm@stolaf.edu) 3/93 + * Added support from my previous inet.c. Cleaned things up + * quite a bit, modularized the code. + * fvk 4/'93 waltje@uwalt.nl.mugnet.org (Fred N. van Kempen) + * Renamed "route_get_info()" to "rt_get_info()" for consistency. + * Alan Cox (gw4pts@gw4pts.ampr.org) 4/94 + * Dusted off the code and added IPX. Fixed the 4K limit. + * Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de) + * /proc/net/snmp. + * + * proc net directory handling functions + */ +#include <linux/autoconf.h> + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> + +/* forward references */ +static int proc_readnet(struct inode * inode, struct file * file, + char * buf, int count); +static int proc_readnetdir(struct inode *, struct file *, + struct dirent *, int); +static int proc_lookupnet(struct inode *,const char *,int,struct inode **); + +/* the get_*_info() functions are in the net code, and are configured + in via the standard mechanism... */ +extern int unix_get_info(char *, char **, off_t, int); +#ifdef CONFIG_INET +extern int tcp_get_info(char *, char **, off_t, int); +extern int udp_get_info(char *, char **, off_t, int); +extern int raw_get_info(char *, char **, off_t, int); +extern int arp_get_info(char *, char **, off_t, int); +extern int rarp_get_info(char *, char **, off_t, int); +extern int dev_get_info(char *, char **, off_t, int); +extern int rt_get_info(char *, char **, off_t, int); +extern int snmp_get_info(char *, char **, off_t, int); +#endif /* CONFIG_INET */ +#ifdef CONFIG_IPX +extern int ipx_get_info(char *, char **, off_t, int); +extern int ipx_rt_get_info(char *, char **, off_t, int); +#endif /* CONFIG_IPX */ +#ifdef CONFIG_AX25 +extern int ax25_get_info(char *, char **, off_t, int); +extern int ax25_rt_get_info(char *, char **, off_t, int); +#ifdef CONFIG_NETROM +extern int nr_get_info(char *, char **, off_t, int); +extern int nr_nodes_get_info(char *, char **, off_t, int); +extern int nr_neigh_get_info(char *, char **, off_t, int); +#endif /* CONFIG_NETROM */ +#endif /* CONFIG_AX25 */ + + +static struct file_operations proc_net_operations = { + NULL, /* lseek - default */ + proc_readnet, /* read - bad */ + NULL, /* write - bad */ + proc_readnetdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* can't fsync */ +}; + +/* + * proc directories can do almost nothing.. + */ +struct inode_operations proc_net_inode_operations = { + &proc_net_operations, /* default net directory file-ops */ + NULL, /* create */ + proc_lookupnet, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct proc_dir_entry net_dir[] = { + { PROC_NET, 1, "." }, + { PROC_ROOT_INO, 2, ".." }, + { PROC_NET_UNIX, 4, "unix" }, +#ifdef CONFIG_INET + { PROC_NET_ARP, 3, "arp" }, + { PROC_NET_ROUTE, 5, "route" }, + { PROC_NET_DEV, 3, "dev" }, + { PROC_NET_RAW, 3, "raw" }, + { PROC_NET_TCP, 3, "tcp" }, + { PROC_NET_UDP, 3, "udp" }, + { PROC_NET_SNMP, 4, "snmp" }, +#ifdef CONFIG_INET_RARP + { PROC_NET_RARP, 4, "rarp"}, +#endif +#endif /* CONFIG_INET */ +#ifdef CONFIG_IPX + { PROC_NET_IPX_ROUTE, 9, "ipx_route" }, + { PROC_NET_IPX, 3, "ipx" }, +#endif /* CONFIG_IPX */ +#ifdef CONFIG_AX25 + { PROC_NET_AX25_ROUTE, 10, "ax25_route" }, + { PROC_NET_AX25, 4, "ax25" }, +#ifdef CONFIG_NETROM + { PROC_NET_NR_NODES, 8, "nr_nodes" }, + { PROC_NET_NR_NEIGH, 8, "nr_neigh" }, + { PROC_NET_NR, 2, "nr" }, +#endif /* CONFIG_NETROM */ +#endif /* CONFIG_AX25 */ + { 0, 0, NULL } +}; + +#define NR_NET_DIRENTRY ((sizeof (net_dir))/(sizeof (net_dir[0])) - 1) + +static int proc_lookupnet(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + struct proc_dir_entry *de; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + for (de = net_dir ; de->name ; de++) { + if (!proc_match(len, name, de)) + continue; + *result = iget(dir->i_sb, de->low_ino); + iput(dir); + if (!*result) + return -ENOENT; + return 0; + } + return -ENOENT; +} + +static int proc_readnetdir(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + struct proc_dir_entry * de; + unsigned int ino; + int i,j; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; + ino = inode->i_ino; + if (((unsigned) filp->f_pos) < NR_NET_DIRENTRY) { + de = net_dir + filp->f_pos; + filp->f_pos++; + i = de->namelen; + ino = de->low_ino; + put_fs_long(ino, &dirent->d_ino); + put_fs_word(i,&dirent->d_reclen); + put_fs_byte(0,i+dirent->d_name); + j = i; + while (i--) + put_fs_byte(de->name[i], i+dirent->d_name); + return j; + } + return 0; +} + + +#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ + +static int proc_readnet(struct inode * inode, struct file * file, + char * buf, int count) +{ + char * page; + int length; + unsigned int ino; + int bytes=count; + int thistime; + int copied=0; + char *start; + + if (count < 0) + return -EINVAL; + if (!(page = (char*) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + ino = inode->i_ino; + + while(bytes>0) + { + thistime=bytes; + if(bytes>PROC_BLOCK_SIZE) + thistime=PROC_BLOCK_SIZE; + + switch (ino) + { + case PROC_NET_UNIX: + length = unix_get_info(page,&start,file->f_pos,thistime); + break; +#ifdef CONFIG_INET + case PROC_NET_ARP: + length = arp_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_ROUTE: + length = rt_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_DEV: + length = dev_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_RAW: + length = raw_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_TCP: + length = tcp_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_UDP: + length = udp_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_SNMP: + length = snmp_get_info(page, &start, file->f_pos,thistime); + break; +#ifdef CONFIG_INET_RARP + case PROC_NET_RARP: + length = rarp_get_info(page,&start,file->f_pos,thistime); + break; +#endif /* CONFIG_INET_RARP */ +#endif /* CONFIG_INET */ +#ifdef CONFIG_IPX + case PROC_NET_IPX_ROUTE: + length = ipx_rt_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_IPX: + length = ipx_get_info(page,&start,file->f_pos,thistime); + break; +#endif /* CONFIG_IPX */ +#ifdef CONFIG_AX25 + case PROC_NET_AX25_ROUTE: + length = ax25_rt_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_AX25: + length = ax25_get_info(page,&start,file->f_pos,thistime); + break; +#ifdef CONFIG_NETROM + case PROC_NET_NR_NODES: + length = nr_nodes_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_NR_NEIGH: + length = nr_neigh_get_info(page,&start,file->f_pos,thistime); + break; + case PROC_NET_NR: + length = nr_get_info(page,&start,file->f_pos,thistime); + break; +#endif /* CONFIG_NETROM */ +#endif /* CONFIG_AX25 */ + + default: + free_page((unsigned long) page); + return -EBADF; + } + + /* + * We have been given a non page aligned block of + * the data we asked for + a bit. We have been given + * the start pointer and we know the length.. + */ + + if (length <= 0) + break; + /* + * Copy the bytes + */ + memcpy_tofs(buf+copied, start, length); + file->f_pos+=length; /* Move down the file */ + bytes-=length; + copied+=length; + if(length<thistime) + break; /* End of file */ + } + free_page((unsigned long) page); + return copied; + +} diff --git a/fs/proc/root.c b/fs/proc/root.c new file mode 100644 index 000000000..97cf2ff25 --- /dev/null +++ b/fs/proc/root.c @@ -0,0 +1,184 @@ +/* + * linux/fs/proc/root.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc root directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/config.h> + +static int proc_readroot(struct inode *, struct file *, struct dirent *, int); +static int proc_lookuproot(struct inode *,const char *,int,struct inode **); + +static struct file_operations proc_root_operations = { + NULL, /* lseek - default */ + NULL, /* read - bad */ + NULL, /* write - bad */ + proc_readroot, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* no fsync */ +}; + +/* + * proc directories can do almost nothing.. + */ +struct inode_operations proc_root_inode_operations = { + &proc_root_operations, /* default base directory file-ops */ + NULL, /* create */ + proc_lookuproot, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct proc_dir_entry root_dir[] = { + { PROC_ROOT_INO, 1, "." }, + { PROC_ROOT_INO, 2, ".." }, + { PROC_LOADAVG, 7, "loadavg" }, + { PROC_UPTIME, 6, "uptime" }, + { PROC_MEMINFO, 7, "meminfo" }, + { PROC_KMSG, 4, "kmsg" }, + { PROC_VERSION, 7, "version" }, + { PROC_SELF, 4, "self" }, /* will change inode # */ + { PROC_NET, 3, "net" }, +#ifdef CONFIG_DEBUG_MALLOC + { PROC_MALLOC, 6, "malloc" }, +#endif + { PROC_KCORE, 5, "kcore" }, + { PROC_MODULES, 7, "modules" }, + { PROC_STAT, 4, "stat" }, + { PROC_DEVICES, 7, "devices" }, + { PROC_INTERRUPTS, 10,"interrupts" }, + { PROC_FILESYSTEMS, 11,"filesystems" }, + { PROC_KSYMS, 5, "ksyms" }, + { PROC_DMA, 3, "dma" }, +}; + +#define NR_ROOT_DIRENTRY ((sizeof (root_dir))/(sizeof (root_dir[0]))) + +static int proc_lookuproot(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + unsigned int pid, c; + int i, ino; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + i = NR_ROOT_DIRENTRY; + while (i-- > 0 && !proc_match(len,name,root_dir+i)) + /* nothing */; + if (i >= 0) { + ino = root_dir[i].low_ino; + if (ino == PROC_ROOT_INO) { + *result = dir; + return 0; + } + if (ino == PROC_SELF) /* self modifying inode ... */ + ino = (current->pid << 16) + 2; + } else { + pid = 0; + while (len-- > 0) { + c = *name - '0'; + name++; + if (c > 9) { + pid = 0; + break; + } + pid *= 10; + pid += c; + if (pid & 0xffff0000) { + pid = 0; + break; + } + } + for (i = 0 ; i < NR_TASKS ; i++) + if (task[i] && task[i]->pid == pid) + break; + if (!pid || i >= NR_TASKS) { + iput(dir); + return -ENOENT; + } + ino = (pid << 16) + 2; + } + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -ENOENT; + } + iput(dir); + return 0; +} + +static int proc_readroot(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + struct task_struct * p; + unsigned int nr,pid; + int i,j; + + if (!inode || !S_ISDIR(inode->i_mode)) + return -EBADF; +repeat: + nr = filp->f_pos; + if (nr < NR_ROOT_DIRENTRY) { + struct proc_dir_entry * de = root_dir + nr; + + filp->f_pos++; + i = de->namelen; + put_fs_long(de->low_ino, &dirent->d_ino); + put_fs_word(i,&dirent->d_reclen); + put_fs_byte(0,i+dirent->d_name); + j = i; + while (i--) + put_fs_byte(de->name[i], i+dirent->d_name); + return j; + } + nr -= NR_ROOT_DIRENTRY; + if (nr >= NR_TASKS) + return 0; + filp->f_pos++; + p = task[nr]; + if (!p || !(pid = p->pid)) + goto repeat; + if (pid & 0xffff0000) + goto repeat; + j = 10; + i = 1; + while (pid >= j) { + j *= 10; + i++; + } + j = i; + put_fs_long((pid << 16)+2, &dirent->d_ino); + put_fs_word(i, &dirent->d_reclen); + put_fs_byte(0, i+dirent->d_name); + while (i--) { + put_fs_byte('0'+(pid % 10), i+dirent->d_name); + pid /= 10; + } + return j; +} diff --git a/fs/read_write.c b/fs/read_write.c new file mode 100644 index 000000000..5f457b9cb --- /dev/null +++ b/fs/read_write.c @@ -0,0 +1,171 @@ +/* + * linux/fs/read_write.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/segment.h> + +/* + * Count is now a supported feature, but currently only the ext2fs + * uses it. A count value of 1 is supported for compatibility with + * earlier libraries, but larger values are supported: count should + * indicate the total buffer space available for filling with dirents. + * The d_off entry in the dirents will then indicate the offset from + * each dirent to the next, and the return value will indicate the + * number of bytes written. All dirents will be written at + * word-aligned addresses. [sct Oct 1994] + */ +asmlinkage int sys_readdir(unsigned int fd, struct dirent * dirent, unsigned int count) +{ + int error; + struct file * file; + struct inode * inode; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || + !(inode = file->f_inode)) + return -EBADF; + error = -ENOTDIR; + if (file->f_op && file->f_op->readdir) { + int size = count; + if (count == 1) + size = sizeof(*dirent); + error = verify_area(VERIFY_WRITE, dirent, size); + if (!error) + error = file->f_op->readdir(inode,file,dirent,count); + } + return error; +} + +asmlinkage int sys_lseek(unsigned int fd, off_t offset, unsigned int origin) +{ + struct file * file; + int tmp = -1; + + if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode)) + return -EBADF; + if (origin > 2) + return -EINVAL; + if (file->f_op && file->f_op->lseek) + return file->f_op->lseek(file->f_inode,file,offset,origin); + +/* this is the default handler if no lseek handler is present */ + switch (origin) { + case 0: + tmp = offset; + break; + case 1: + tmp = file->f_pos + offset; + break; + case 2: + if (!file->f_inode) + return -EINVAL; + tmp = file->f_inode->i_size + offset; + break; + } + if (tmp < 0) + return -EINVAL; + if (tmp != file->f_pos) { + file->f_pos = tmp; + file->f_reada = 0; + file->f_version = ++event; + } + return file->f_pos; +} + +asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high, + unsigned long offset_low, loff_t * result, + unsigned int origin) +{ + struct file * file; + loff_t tmp = -1; + loff_t offset; + int err; + + if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode)) + return -EBADF; + if (origin > 2) + return -EINVAL; + if ((err = verify_area(VERIFY_WRITE, result, sizeof(loff_t)))) + return err; + offset = (loff_t) (((unsigned long long) offset_high << 32) | offset_low); +/* there is no fs specific llseek handler */ + switch (origin) { + case 0: + tmp = offset; + break; + case 1: + tmp = file->f_pos + offset; + break; + case 2: + if (!file->f_inode) + return -EINVAL; + tmp = file->f_inode->i_size + offset; + break; + } + if (tmp < 0) + return -EINVAL; + file->f_pos = tmp; + file->f_reada = 0; + file->f_version = ++event; + memcpy_tofs(result, &file->f_pos, sizeof(loff_t)); + return 0; +} + +asmlinkage int sys_read(unsigned int fd,char * buf,unsigned int count) +{ + int error; + struct file * file; + struct inode * inode; + + if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) + return -EBADF; + if (!(file->f_mode & 1)) + return -EBADF; + if (!file->f_op || !file->f_op->read) + return -EINVAL; + if (!count) + return 0; + error = verify_area(VERIFY_WRITE,buf,count); + if (error) + return error; + return file->f_op->read(inode,file,buf,count); +} + +asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count) +{ + int error; + struct file * file; + struct inode * inode; + int written; + + if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) + return -EBADF; + if (!(file->f_mode & 2)) + return -EBADF; + if (!file->f_op || !file->f_op->write) + return -EINVAL; + if (!count) + return 0; + error = verify_area(VERIFY_READ,buf,count); + if (error) + return error; + written = file->f_op->write(inode,file,buf,count); + /* + * If data has been written to the file, remove the setuid and + * the setgid bits + */ + if (written > 0 && !suser() && (inode->i_mode & (S_ISUID | S_ISGID))) { + struct iattr newattrs; + newattrs.ia_mode = inode->i_mode & ~(S_ISUID | S_ISGID); + newattrs.ia_valid = ATTR_MODE; + notify_change(inode, &newattrs); + } + return written; +} diff --git a/fs/select.c b/fs/select.c new file mode 100644 index 000000000..e87ae07eb --- /dev/null +++ b/fs/select.c @@ -0,0 +1,258 @@ +/* + * This file contains the procedures for the handling of select + * + * Created for Linux based loosely upon Mathius Lattner's minix + * patches by Peter MacDonald. Heavily edited by Linus. + * + * 4 February 1994 + * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS + * flag set in its personality we do *not* modify the given timeout + * parameter to reflect time remaining. + */ + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/personality.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) + +/* + * Ok, Peter made a complicated, but straightforward multiple_wait() function. + * I have rewritten this, taking some shortcuts: This code may not be easy to + * follow, but it should be free of race-conditions, and it's practical. If you + * understand what I'm doing here, then you understand how the linux + * sleep/wakeup mechanism works. + * + * Two very simple procedures, select_wait() and free_wait() make all the work. + * select_wait() is a inline-function defined in <linux/sched.h>, as all select + * functions have to call it to add an entry to the select table. + */ + +/* + * I rewrote this again to make the select_table size variable, take some + * more shortcuts, improve responsiveness, and remove another race that + * Linus noticed. -- jrs + */ + +static void free_wait(select_table * p) +{ + struct select_table_entry * entry = p->entry + p->nr; + + while (p->nr > 0) { + p->nr--; + entry--; + remove_wait_queue(entry->wait_address,&entry->wait); + } +} + +/* + * The check function checks the ready status of a file using the vfs layer. + * + * If the file was not ready we were added to its wait queue. But in + * case it became ready just after the check and just before it called + * select_wait, we call it again, knowing we are already on its + * wait queue this time. The second call is not necessary if the + * select_table is NULL indicating an earlier file check was ready + * and we aren't going to sleep on the select_table. -- jrs + */ + +static int check(int flag, select_table * wait, struct file * file) +{ + struct inode * inode; + struct file_operations *fops; + int (*select) (struct inode *, struct file *, int, select_table *); + + inode = file->f_inode; + if ((fops = file->f_op) && (select = fops->select)) + return select(inode, file, flag, wait) + || (wait && select(inode, file, flag, NULL)); + if (S_ISREG(inode->i_mode)) + return 1; + return 0; +} + +static int do_select(int n, fd_set *in, fd_set *out, fd_set *ex, + fd_set *res_in, fd_set *res_out, fd_set *res_ex) +{ + int count; + select_table wait_table, *wait; + struct select_table_entry *entry; + unsigned long set; + int i,j; + int max = -1; + + for (j = 0 ; j < __FDSET_LONGS ; j++) { + i = j << 5; + if (i >= n) + break; + set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j]; + for ( ; set ; i++,set >>= 1) { + if (i >= n) + goto end_check; + if (!(set & 1)) + continue; + if (!current->files->fd[i]) + return -EBADF; + if (!current->files->fd[i]->f_inode) + return -EBADF; + max = i; + } + } +end_check: + n = max + 1; + if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + FD_ZERO(res_in); + FD_ZERO(res_out); + FD_ZERO(res_ex); + count = 0; + wait_table.nr = 0; + wait_table.entry = entry; + wait = &wait_table; +repeat: + current->state = TASK_INTERRUPTIBLE; + for (i = 0 ; i < n ; i++) { + if (FD_ISSET(i,in) && check(SEL_IN,wait,current->files->fd[i])) { + FD_SET(i, res_in); + count++; + wait = NULL; + } + if (FD_ISSET(i,out) && check(SEL_OUT,wait,current->files->fd[i])) { + FD_SET(i, res_out); + count++; + wait = NULL; + } + if (FD_ISSET(i,ex) && check(SEL_EX,wait,current->files->fd[i])) { + FD_SET(i, res_ex); + count++; + wait = NULL; + } + } + wait = NULL; + if (!count && current->timeout && !(current->signal & ~current->blocked)) { + schedule(); + goto repeat; + } + free_wait(&wait_table); + free_page((unsigned long) entry); + current->state = TASK_RUNNING; + return count; +} + +/* + * We do a VERIFY_WRITE here even though we are only reading this time: + * we'll write to it eventually.. + */ +static int __get_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset) +{ + int error; + + FD_ZERO(fdset); + if (!fs_pointer) + return 0; + error = verify_area(VERIFY_WRITE,fs_pointer,sizeof(fd_set)); + if (error) + return error; + while (nr > 0) { + *fdset = get_fs_long(fs_pointer); + fdset++; + fs_pointer++; + nr -= 32; + } + return 0; +} + +static void __set_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset) +{ + if (!fs_pointer) + return; + while (nr > 0) { + put_fs_long(*fdset, fs_pointer); + fdset++; + fs_pointer++; + nr -= 32; + } +} + +#define get_fd_set(nr,fsp,fdp) \ +__get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp)) + +#define set_fd_set(nr,fsp,fdp) \ +__set_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp)) + +/* + * We can actually return ERESTARTSYS instead of EINTR, but I'd + * like to be certain this leads to no problems. So I return + * EINTR just for safety. + * + * Update: ERESTARTSYS breaks at least the xview clock binary, so + * I'm trying ERESTARTNOHAND which restart only when you want to. + */ +asmlinkage int sys_select( unsigned long *buffer ) +{ +/* Perform the select(nd, in, out, ex, tv) system call. */ + int i; + fd_set res_in, in, *inp; + fd_set res_out, out, *outp; + fd_set res_ex, ex, *exp; + int n; + struct timeval *tvp; + unsigned long timeout; + + i = verify_area(VERIFY_READ, buffer, 20); + if (i) + return i; + n = get_fs_long(buffer++); + if (n < 0) + return -EINVAL; + if (n > NR_OPEN) + n = NR_OPEN; + inp = (fd_set *) get_fs_long(buffer++); + outp = (fd_set *) get_fs_long(buffer++); + exp = (fd_set *) get_fs_long(buffer++); + tvp = (struct timeval *) get_fs_long(buffer); + if ((i = get_fd_set(n, inp, &in)) || + (i = get_fd_set(n, outp, &out)) || + (i = get_fd_set(n, exp, &ex))) return i; + timeout = ~0UL; + if (tvp) { + i = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp)); + if (i) + return i; + timeout = ROUND_UP(get_fs_long((unsigned long *)&tvp->tv_usec),(1000000/HZ)); + timeout += get_fs_long((unsigned long *)&tvp->tv_sec) * HZ; + if (timeout) + timeout += jiffies + 1; + } + current->timeout = timeout; + i = do_select(n, &in, &out, &ex, &res_in, &res_out, &res_ex); + if (current->timeout > jiffies) + timeout = current->timeout - jiffies; + else + timeout = 0; + current->timeout = 0; + if (tvp && !(current->personality & STICKY_TIMEOUTS)) { + put_fs_long(timeout/HZ, (unsigned long *) &tvp->tv_sec); + timeout %= HZ; + timeout *= (1000000/HZ); + put_fs_long(timeout, (unsigned long *) &tvp->tv_usec); + } + if (i < 0) + return i; + if (!i && (current->signal & ~current->blocked)) + return -ERESTARTNOHAND; + set_fd_set(n, inp, &res_in); + set_fd_set(n, outp, &res_out); + set_fd_set(n, exp, &res_ex); + return i; +} diff --git a/fs/stat.c b/fs/stat.c new file mode 100644 index 000000000..70f5d166e --- /dev/null +++ b/fs/stat.c @@ -0,0 +1,207 @@ +/* + * linux/fs/stat.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <asm/segment.h> + +static void cp_old_stat(struct inode * inode, struct old_stat * statbuf) +{ + struct old_stat tmp; + + printk("VFS: Warning: %s using old stat() call. Recompile your binary.\n", + current->comm); + tmp.st_dev = inode->i_dev; + tmp.st_ino = inode->i_ino; + tmp.st_mode = inode->i_mode; + tmp.st_nlink = inode->i_nlink; + tmp.st_uid = inode->i_uid; + tmp.st_gid = inode->i_gid; + tmp.st_rdev = inode->i_rdev; + tmp.st_size = inode->i_size; + if (inode->i_pipe) + tmp.st_size = PIPE_SIZE(*inode); + tmp.st_atime = inode->i_atime; + tmp.st_mtime = inode->i_mtime; + tmp.st_ctime = inode->i_ctime; + memcpy_tofs(statbuf,&tmp,sizeof(tmp)); +} + +static void cp_new_stat(struct inode * inode, struct new_stat * statbuf) +{ + struct new_stat tmp; + unsigned int blocks, indirect; + + memset(&tmp, 0, sizeof(tmp)); + tmp.st_dev = inode->i_dev; + tmp.st_ino = inode->i_ino; + tmp.st_mode = inode->i_mode; + tmp.st_nlink = inode->i_nlink; + tmp.st_uid = inode->i_uid; + tmp.st_gid = inode->i_gid; + tmp.st_rdev = inode->i_rdev; + tmp.st_size = inode->i_size; + if (inode->i_pipe) + tmp.st_size = PIPE_SIZE(*inode); + tmp.st_atime = inode->i_atime; + tmp.st_mtime = inode->i_mtime; + tmp.st_ctime = inode->i_ctime; +/* + * st_blocks and st_blksize are approximated with a simple algorithm if + * they aren't supported directly by the filesystem. The minix and msdos + * filesystems don't keep track of blocks, so they would either have to + * be counted explicitly (by delving into the file itself), or by using + * this simple algorithm to get a reasonable (although not 100% accurate) + * value. + */ + +/* + * Use minix fs values for the number of direct and indirect blocks. The + * count is now exact for the minix fs except that it counts zero blocks. + * Everything is in BLOCK_SIZE'd units until the assignment to + * tmp.st_blksize. + */ +#define D_B 7 +#define I_B (BLOCK_SIZE / sizeof(unsigned short)) + + if (!inode->i_blksize) { + blocks = (tmp.st_size + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (blocks > D_B) { + indirect = (blocks - D_B + I_B - 1) / I_B; + blocks += indirect; + if (indirect > 1) { + indirect = (indirect - 1 + I_B - 1) / I_B; + blocks += indirect; + if (indirect > 1) + blocks++; + } + } + tmp.st_blocks = (BLOCK_SIZE / 512) * blocks; + tmp.st_blksize = BLOCK_SIZE; + } else { + tmp.st_blocks = inode->i_blocks; + tmp.st_blksize = inode->i_blksize; + } + memcpy_tofs(statbuf,&tmp,sizeof(tmp)); +} + +asmlinkage int sys_stat(char * filename, struct old_stat * statbuf) +{ + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + error = namei(filename,&inode); + if (error) + return error; + cp_old_stat(inode,statbuf); + iput(inode); + return 0; +} + +asmlinkage int sys_newstat(char * filename, struct new_stat * statbuf) +{ + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + error = namei(filename,&inode); + if (error) + return error; + cp_new_stat(inode,statbuf); + iput(inode); + return 0; +} + +asmlinkage int sys_lstat(char * filename, struct old_stat * statbuf) +{ + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + error = lnamei(filename,&inode); + if (error) + return error; + cp_old_stat(inode,statbuf); + iput(inode); + return 0; +} + +asmlinkage int sys_newlstat(char * filename, struct new_stat * statbuf) +{ + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + error = lnamei(filename,&inode); + if (error) + return error; + cp_new_stat(inode,statbuf); + iput(inode); + return 0; +} + +asmlinkage int sys_fstat(unsigned int fd, struct old_stat * statbuf) +{ + struct file * f; + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode)) + return -EBADF; + cp_old_stat(inode,statbuf); + return 0; +} + +asmlinkage int sys_newfstat(unsigned int fd, struct new_stat * statbuf) +{ + struct file * f; + struct inode * inode; + int error; + + error = verify_area(VERIFY_WRITE,statbuf,sizeof (*statbuf)); + if (error) + return error; + if (fd >= NR_OPEN || !(f=current->files->fd[fd]) || !(inode=f->f_inode)) + return -EBADF; + cp_new_stat(inode,statbuf); + return 0; +} + +asmlinkage int sys_readlink(const char * path, char * buf, int bufsiz) +{ + struct inode * inode; + int error; + + if (bufsiz <= 0) + return -EINVAL; + error = verify_area(VERIFY_WRITE,buf,bufsiz); + if (error) + return error; + error = lnamei(path,&inode); + if (error) + return error; + if (!inode->i_op || !inode->i_op->readlink) { + iput(inode); + return -EINVAL; + } + return inode->i_op->readlink(inode,buf,bufsiz); +} diff --git a/fs/super.c b/fs/super.c new file mode 100644 index 000000000..9ead32a3e --- /dev/null +++ b/fs/super.c @@ -0,0 +1,689 @@ +/* + * linux/fs/super.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * super.c contains code to handle the super-block tables. + */ +#include <stdarg.h> + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/locks.h> + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/bitops.h> + +extern struct file_operations * get_blkfops(unsigned int); +extern struct file_operations * get_chrfops(unsigned int); + +extern void wait_for_keypress(void); + +extern int root_mountflags; + +struct super_block super_blocks[NR_SUPER]; + +static int do_remount_sb(struct super_block *sb, int flags, char * data); + +/* this is initialized in init/main.c */ +dev_t ROOT_DEV = 0; + +static struct file_system_type * file_systems = NULL; + +int register_filesystem(struct file_system_type * fs) +{ + struct file_system_type ** tmp; + + if (!fs) + return -EINVAL; + if (fs->next) + return -EBUSY; + tmp = &file_systems; + while (*tmp) { + if (strcmp((*tmp)->name, fs->name) == 0) + return -EBUSY; + tmp = &(*tmp)->next; + } + *tmp = fs; + return 0; +} + +int unregister_filesystem(struct file_system_type * fs) +{ + struct file_system_type ** tmp; + + tmp = &file_systems; + while (*tmp) { + if (fs == *tmp) { + *tmp = fs->next; + fs->next = NULL; + return 0; + } + tmp = &(*tmp)->next; + } + return -EINVAL; +} + +static int fs_index(const char * __name) +{ + struct file_system_type * tmp; + char * name; + int err, index; + + err = getname(__name, &name); + if (err) + return err; + index = 0; + for (tmp = file_systems ; tmp ; tmp = tmp->next) { + if (strcmp(tmp->name, name) == 0) { + putname(name); + return index; + } + index++; + } + putname(name); + return -EINVAL; +} + +static int fs_name(unsigned int index, char * buf) +{ + struct file_system_type * tmp; + int err, len; + + tmp = file_systems; + while (tmp && index > 0) { + tmp = tmp->next; + index--; + } + if (!tmp) + return -EINVAL; + len = strlen(tmp->name) + 1; + err = verify_area(VERIFY_WRITE, buf, len); + if (err) + return err; + memcpy_tofs(buf, tmp->name, len); + return 0; +} + +static int fs_maxindex(void) +{ + struct file_system_type * tmp; + int index; + + index = 0; + for (tmp = file_systems ; tmp ; tmp = tmp->next) + index++; + return index; +} + +/* + * Whee.. Weird sysv syscall. + */ +asmlinkage int sys_sysfs(int option, ...) +{ + va_list args; + int retval = -EINVAL; + unsigned int index; + + va_start(args, option); + switch (option) { + case 1: + retval = fs_index(va_arg(args, const char *)); + break; + + case 2: + index = va_arg(args, unsigned int); + retval = fs_name(index, va_arg(args, char *)); + break; + + case 3: + retval = fs_maxindex(); + break; + } + va_end(args); + return retval; +} + +int get_filesystem_list(char * buf) +{ + int len = 0; + struct file_system_type * tmp; + + tmp = file_systems; + while (tmp && len < PAGE_SIZE - 80) { + len += sprintf(buf+len, "%s\t%s\n", + tmp->requires_dev ? "" : "nodev", + tmp->name); + tmp = tmp->next; + } + return len; +} + +struct file_system_type *get_fs_type(char *name) +{ + struct file_system_type * fs = file_systems; + + if (!name) + return fs; + while (fs) { + if (!strcmp(name,fs->name)) + break; + fs = fs->next; + } + return fs; +} + +void __wait_on_super(struct super_block * sb) +{ + struct wait_queue wait = { current, NULL }; + + add_wait_queue(&sb->s_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (sb->s_lock) { + schedule(); + goto repeat; + } + remove_wait_queue(&sb->s_wait, &wait); + current->state = TASK_RUNNING; +} + +void sync_supers(dev_t dev) +{ + struct super_block * sb; + + for (sb = super_blocks + 0 ; sb < super_blocks + NR_SUPER ; sb++) { + if (!sb->s_dev) + continue; + if (dev && sb->s_dev != dev) + continue; + wait_on_super(sb); + if (!sb->s_dev || !sb->s_dirt) + continue; + if (dev && (dev != sb->s_dev)) + continue; + if (sb->s_op && sb->s_op->write_super) + sb->s_op->write_super(sb); + } +} + +static struct super_block * get_super(dev_t dev) +{ + struct super_block * s; + + if (!dev) + return NULL; + s = 0+super_blocks; + while (s < NR_SUPER+super_blocks) + if (s->s_dev == dev) { + wait_on_super(s); + if (s->s_dev == dev) + return s; + s = 0+super_blocks; + } else + s++; + return NULL; +} + +void put_super(dev_t dev) +{ + struct super_block * sb; + + if (dev == ROOT_DEV) { + printk("VFS: Root device %d/%d: prepare for armageddon\n", + MAJOR(dev), MINOR(dev)); + return; + } + if (!(sb = get_super(dev))) + return; + if (sb->s_covered) { + printk("VFS: Mounted device %d/%d - tssk, tssk\n", + MAJOR(dev), MINOR(dev)); + return; + } + if (sb->s_op && sb->s_op->put_super) + sb->s_op->put_super(sb); +} + +static struct super_block * read_super(dev_t dev,char *name,int flags, + void *data, int silent) +{ + struct super_block * s; + struct file_system_type *type; + + if (!dev) + return NULL; + check_disk_change(dev); + s = get_super(dev); + if (s) + return s; + if (!(type = get_fs_type(name))) { + printk("VFS: on device %d/%d: get_fs_type(%s) failed\n", + MAJOR(dev), MINOR(dev), name); + return NULL; + } + for (s = 0+super_blocks ;; s++) { + if (s >= NR_SUPER+super_blocks) + return NULL; + if (!s->s_dev) + break; + } + s->s_dev = dev; + s->s_flags = flags; + if (!type->read_super(s,data, silent)) { + s->s_dev = 0; + return NULL; + } + s->s_dev = dev; + s->s_covered = NULL; + s->s_rd_only = 0; + s->s_dirt = 0; + return s; +} + +/* + * Unnamed block devices are dummy devices used by virtual + * filesystems which don't use real block-devices. -- jrs + */ + +static char unnamed_dev_in_use[256/8] = { 0, }; + +static dev_t get_unnamed_dev(void) +{ + int i; + + for (i = 1; i < 256; i++) { + if (!set_bit(i,unnamed_dev_in_use)) + return (UNNAMED_MAJOR << 8) | i; + } + return 0; +} + +static void put_unnamed_dev(dev_t dev) +{ + if (!dev) + return; + if (MAJOR(dev) == UNNAMED_MAJOR && + clear_bit(MINOR(dev), unnamed_dev_in_use)) + return; + printk("VFS: put_unnamed_dev: freeing unused device %d/%d\n", + MAJOR(dev), MINOR(dev)); +} + +static int do_umount(dev_t dev) +{ + struct super_block * sb; + int retval; + + if (dev==ROOT_DEV) { + /* Special case for "unmounting" root. We just try to remount + it readonly, and sync() the device. */ + if (!(sb=get_super(dev))) + return -ENOENT; + if (!(sb->s_flags & MS_RDONLY)) { + fsync_dev(dev); + retval = do_remount_sb(sb, MS_RDONLY, 0); + if (retval) + return retval; + } + return 0; + } + if (!(sb=get_super(dev)) || !(sb->s_covered)) + return -ENOENT; + if (!sb->s_covered->i_mount) + printk("VFS: umount(%d/%d): mounted inode has i_mount=NULL\n", + MAJOR(dev), MINOR(dev)); + if (!fs_may_umount(dev, sb->s_mounted)) + return -EBUSY; + sb->s_covered->i_mount = NULL; + iput(sb->s_covered); + sb->s_covered = NULL; + iput(sb->s_mounted); + sb->s_mounted = NULL; + if (sb->s_op && sb->s_op->write_super && sb->s_dirt) + sb->s_op->write_super(sb); + put_super(dev); + return 0; +} + +/* + * Now umount can handle mount points as well as block devices. + * This is important for filesystems which use unnamed block devices. + * + * There is a little kludge here with the dummy_inode. The current + * vfs release functions only use the r_dev field in the inode so + * we give them the info they need without using a real inode. + * If any other fields are ever needed by any block device release + * functions, they should be faked here. -- jrs + */ + +asmlinkage int sys_umount(char * name) +{ + struct inode * inode; + dev_t dev; + int retval; + struct inode dummy_inode; + struct file_operations * fops; + + if (!suser()) + return -EPERM; + retval = namei(name,&inode); + if (retval) { + retval = lnamei(name,&inode); + if (retval) + return retval; + } + if (S_ISBLK(inode->i_mode)) { + dev = inode->i_rdev; + if (IS_NODEV(inode)) { + iput(inode); + return -EACCES; + } + } else { + if (!inode || !inode->i_sb || inode != inode->i_sb->s_mounted) { + iput(inode); + return -EINVAL; + } + dev = inode->i_sb->s_dev; + iput(inode); + memset(&dummy_inode, 0, sizeof(dummy_inode)); + dummy_inode.i_rdev = dev; + inode = &dummy_inode; + } + if (MAJOR(dev) >= MAX_BLKDEV) { + iput(inode); + return -ENXIO; + } + if (!(retval = do_umount(dev)) && dev != ROOT_DEV) { + fops = get_blkfops(MAJOR(dev)); + if (fops && fops->release) + fops->release(inode,NULL); + if (MAJOR(dev) == UNNAMED_MAJOR) + put_unnamed_dev(dev); + } + if (inode != &dummy_inode) + iput(inode); + if (retval) + return retval; + fsync_dev(dev); + return 0; +} + +/* + * do_mount() does the actual mounting after sys_mount has done the ugly + * parameter parsing. When enough time has gone by, and everything uses the + * new mount() parameters, sys_mount() can then be cleaned up. + * + * We cannot mount a filesystem if it has active, used, or dirty inodes. + * We also have to flush all inode-data for this device, as the new mount + * might need new info. + */ +static int do_mount(dev_t dev, const char * dir, char * type, int flags, void * data) +{ + struct inode * dir_i; + struct super_block * sb; + int error; + + error = namei(dir,&dir_i); + if (error) + return error; + if (dir_i->i_count != 1 || dir_i->i_mount) { + iput(dir_i); + return -EBUSY; + } + if (!S_ISDIR(dir_i->i_mode)) { + iput(dir_i); + return -ENOTDIR; + } + if (!fs_may_mount(dev)) { + iput(dir_i); + return -EBUSY; + } + sb = read_super(dev,type,flags,data,0); + if (!sb) { + iput(dir_i); + return -EINVAL; + } + if (sb->s_covered) { + iput(dir_i); + return -EBUSY; + } + sb->s_covered = dir_i; + dir_i->i_mount = sb->s_mounted; + return 0; /* we don't iput(dir_i) - see umount */ +} + + +/* + * Alters the mount flags of a mounted file system. Only the mount point + * is used as a reference - file system type and the device are ignored. + * FS-specific mount options can't be altered by remounting. + */ + +static int do_remount_sb(struct super_block *sb, int flags, char *data) +{ + int retval; + + if (!(flags & MS_RDONLY ) && sb->s_dev && is_read_only(sb->s_dev)) + return -EACCES; + /*flags |= MS_RDONLY;*/ + /* If we are remounting RDONLY, make sure there are no rw files open */ + if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) + if (!fs_may_remount_ro(sb->s_dev)) + return -EBUSY; + if (sb->s_op && sb->s_op->remount_fs) { + retval = sb->s_op->remount_fs(sb, &flags, data); + if (retval) + return retval; + } + sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | + (flags & MS_RMT_MASK); + return 0; +} + +static int do_remount(const char *dir,int flags,char *data) +{ + struct inode *dir_i; + int retval; + + retval = namei(dir,&dir_i); + if (retval) + return retval; + if (dir_i != dir_i->i_sb->s_mounted) { + iput(dir_i); + return -EINVAL; + } + retval = do_remount_sb(dir_i->i_sb, flags, data); + iput(dir_i); + return retval; +} + +static int copy_mount_options (const void * data, unsigned long *where) +{ + int i; + unsigned long page; + struct vm_area_struct * vma; + + *where = 0; + if (!data) + return 0; + + for (vma = current->mm->mmap ; ; ) { + if (!vma || + (unsigned long) data < vma->vm_start) { + return -EFAULT; + } + if ((unsigned long) data < vma->vm_end) + break; + vma = vma->vm_next; + } + i = vma->vm_end - (unsigned long) data; + if (PAGE_SIZE <= (unsigned long) i) + i = PAGE_SIZE-1; + if (!(page = __get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + memcpy_fromfs((void *) page,data,i); + *where = page; + return 0; +} + +/* + * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to + * be given to the mount() call (ie: read-only, no-dev, no-suid etc). + * + * data is a (void *) that can point to any structure up to + * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent + * information (or be NULL). + * + * NOTE! As old versions of mount() didn't use this setup, the flags + * has to have a special 16-bit magic number in the hight word: + * 0xC0ED. If this magic word isn't present, the flags and data info + * isn't used, as the syscall assumes we are talking to an older + * version that didn't understand them. + */ +asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, + unsigned long new_flags, void * data) +{ + struct file_system_type * fstype; + struct inode * inode; + struct file_operations * fops; + dev_t dev; + int retval; + char * t; + unsigned long flags = 0; + unsigned long page = 0; + + if (!suser()) + return -EPERM; + if ((new_flags & + (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) { + retval = copy_mount_options (data, &page); + if (retval < 0) + return retval; + retval = do_remount(dir_name, + new_flags & ~MS_MGC_MSK & ~MS_REMOUNT, + (char *) page); + free_page(page); + return retval; + } + retval = copy_mount_options (type, &page); + if (retval < 0) + return retval; + fstype = get_fs_type((char *) page); + free_page(page); + if (!fstype) + return -ENODEV; + t = fstype->name; + if (fstype->requires_dev) { + retval = namei(dev_name,&inode); + if (retval) + return retval; + if (!S_ISBLK(inode->i_mode)) { + iput(inode); + return -ENOTBLK; + } + if (IS_NODEV(inode)) { + iput(inode); + return -EACCES; + } + dev = inode->i_rdev; + if (MAJOR(dev) >= MAX_BLKDEV) { + iput(inode); + return -ENXIO; + } + } else { + if (!(dev = get_unnamed_dev())) + return -EMFILE; + inode = NULL; + } + fops = get_blkfops(MAJOR(dev)); + if (fops && fops->open) { + struct file dummy; /* allows read-write or read-only flag */ + memset(&dummy, 0, sizeof(dummy)); + dummy.f_inode = inode; + dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3; + retval = fops->open(inode, &dummy); + if (retval) { + iput(inode); + return retval; + } + } + page = 0; + if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) { + flags = new_flags & ~MS_MGC_MSK; + retval = copy_mount_options(data, &page); + if (retval < 0) { + iput(inode); + return retval; + } + } + retval = do_mount(dev,dir_name,t,flags,(void *) page); + free_page(page); + if (retval && fops && fops->release) + fops->release(inode, NULL); + iput(inode); + return retval; +} + +void mount_root(void) +{ + struct file_system_type * fs_type; + struct super_block * sb; + struct inode * inode, d_inode; + struct file filp; + int retval; + + memset(super_blocks, 0, sizeof(super_blocks)); +#ifdef CONFIG_BLK_DEV_FD + if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { + printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n"); + wait_for_keypress(); + } +#endif + + memset(&filp, 0, sizeof(filp)); + memset(&d_inode, 0, sizeof(d_inode)); + d_inode.i_rdev = ROOT_DEV; + filp.f_inode = &d_inode; + if ( root_mountflags & MS_RDONLY) + filp.f_mode = 1; /* read only */ + else + filp.f_mode = 3; /* read write */ + retval = blkdev_open(&d_inode, &filp); + if(retval == -EROFS){ + root_mountflags |= MS_RDONLY; + filp.f_mode = 1; + retval = blkdev_open(&d_inode, &filp); + } + + for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) { + if(retval) + break; + if (!fs_type->requires_dev) + continue; + sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1); + if (sb) { + inode = sb->s_mounted; + inode->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */ + sb->s_covered = inode; + sb->s_flags = root_mountflags; + current->fs->pwd = inode; + current->fs->root = inode; + printk ("VFS: Mounted root (%s filesystem)%s.\n", + fs_type->name, + (sb->s_flags & MS_RDONLY) ? " readonly" : ""); + return; + } + } + panic("VFS: Unable to mount root fs on %02x:%02x", + MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); +} diff --git a/fs/sysv/INTRO b/fs/sysv/INTRO new file mode 100644 index 000000000..9e53cb317 --- /dev/null +++ b/fs/sysv/INTRO @@ -0,0 +1,183 @@ +This is the implementation of the SystemV/Coherent filesystem for Linux. +It grew out of separate filesystem implementations + + Xenix FS Doug Evans <dje@cygnus.com> June 1992 + SystemV FS Paul B. Monday <pmonday@eecs.wsu.edu> March-June 1993 + Coherent FS B. Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> June 1993 + +and was merged together in July 1993. + +These filesystems are rather similar. Here is a comparison with Minix FS: + +* Linux fdisk reports on partitions + - Minix FS 0x81 Linux/Minix + - Xenix FS ?? + - SystemV FS ?? + - Coherent FS 0x08 AIX bootable + +* Size of a block or zone (data allocation unit on disk) + - Minix FS 1024 + - Xenix FS 1024 (also 512 ??) + - SystemV FS 1024 (also 512) + - Coherent FS 512 + +* General layout: all have one boot block, one super block and + separate areas for inodes and for directories/data. + On SystemV Release 2 FS (e.g. Microport) the first track is reserved and + all the block numbers (including the super block) are offset by one track. + +* Byte ordering of "short" (16 bit entities) on disk: + - Minix FS little endian 0 1 + - Xenix FS little endian 0 1 + - SystemV FS little endian 0 1 + - Coherent FS little endian 0 1 + Of course, this affects only the file system, not the data of files on it! + +* Byte ordering of "long" (32 bit entities) on disk: + - Minix FS little endian 0 1 2 3 + - Xenix FS little endian 0 1 2 3 + - SystemV FS little endian 0 1 2 3 + - Coherent FS PDP-11 2 3 0 1 + Of course, this affects only the file system, not the data of files on it! + +* Inode on disk: "short", 0 means non-existent, the root dir ino is: + - Minix FS 1 + - Xenix FS, SystemV FS, Coherent FS 2 + +* Maximum number of hard links to a file: + - Minix FS 250 + - Xenix FS ?? + - SystemV FS ?? + - Coherent FS >=10000 + +* Free inode management: + - Minix FS a bitmap + - Xenix FS, SystemV FS, Coherent FS + There is a cache of a certain number of free inodes in the super-block. + When it is exhausted, new free inodes are found using a linear search. + +* Free block management: + - Minix FS a bitmap + - Xenix FS, SystemV FS, Coherent FS + Free blocks are organized in a "free list". Maybe a misleading term, + since it is not true that every free block contains a pointer to + the next free block. Rather, the free blocks are organized in chunks + of limited size, and every now and then a free block contains pointers + to the free blocks pertaining to the next chunk; the first of these + contains pointers and so on. The list terminates with a "block number" + 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS. + +* Super-block location: + - Minix FS block 1 = bytes 1024..2047 + - Xenix FS block 1 = bytes 1024..2047 + - SystemV FS bytes 512..1023 + - Coherent FS block 1 = bytes 512..1023 + +* Super-block layout: + - Minix FS + unsigned short s_ninodes; + unsigned short s_nzones; + unsigned short s_imap_blocks; + unsigned short s_zmap_blocks; + unsigned short s_firstdatazone; + unsigned short s_log_zone_size; + unsigned long s_max_size; + unsigned short s_magic; + - Xenix FS, SystemV FS, Coherent FS + unsigned short s_firstdatazone; + unsigned long s_nzones; + unsigned short s_fzone_count; + unsigned long s_fzones[NICFREE]; + unsigned short s_finode_count; + unsigned short s_finodes[NICINOD]; + char s_flock; + char s_ilock; + char s_modified; + char s_rdonly; + unsigned long s_time; + short s_dinfo[4]; -- SystemV FS only + unsigned long s_free_zones; + unsigned short s_free_inodes; + short s_dinfo[4]; -- Xenix FS only + unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only + char s_fname[6]; + char s_fpack[6]; + then they differ considerably: + Xenix FS + char s_clean; + char s_fill[371]; + long s_magic; + long s_type; + SystemV FS + long s_fill[12 or 14]; + long s_state; + long s_magic; + long s_type; + Coherent FS + unsigned long s_unique; + Note that Coherent FS has no magic. + +* Inode layout: + - Minix FS + unsigned short i_mode; + unsigned short i_uid; + unsigned long i_size; + unsigned long i_time; + unsigned char i_gid; + unsigned char i_nlinks; + unsigned short i_zone[7+1+1]; + - Xenix FS, SystemV FS, Coherent FS + unsigned short i_mode; + unsigned short i_nlink; + unsigned short i_uid; + unsigned short i_gid; + unsigned long i_size; + unsigned char i_zone[3*(10+1+1+1)]; + unsigned long i_atime; + unsigned long i_mtime; + unsigned long i_ctime; + +* Regular file data blocks are organized as + - Minix FS + 7 direct blocks + 1 indirect block (pointers to blocks) + 1 double-indirect block (pointer to pointers to blocks) + - Xenix FS, SystemV FS, Coherent FS + 10 direct blocks + 1 indirect block (pointers to blocks) + 1 double-indirect block (pointer to pointers to blocks) + 1 triple-indirect block (pointer to pointers to pointers to blocks) + +* Inode size, inodes per block + - Minix FS 32 32 + - Xenix FS 64 16 + - SystemV FS 64 16 + - Coherent FS 64 8 + +* Directory entry on disk + - Minix FS + unsigned short inode; + char name[14/30]; + - Xenix FS, SystemV FS, Coherent FS + unsigned short inode; + char name[14]; + +* Dir entry size, dir entries per block + - Minix FS 16/32 64/32 + - Xenix FS 16 64 + - SystemV FS 16 64 + - Coherent FS 16 32 + +* How to implement symbolic links such that the host fsck doesn't scream: + - Minix FS normal + - Xenix FS kludge: as regular files with chmod 1000 + - SystemV FS ?? + - Coherent FS kludge: as regular files with chmod 1000 + + +Notation: We often speak of a "block" but mean a zone (the allocation unit) +and not the disk driver's notion of "block". + + +Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> + diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile new file mode 100644 index 000000000..d4a6ecbd4 --- /dev/null +++ b/fs/sysv/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the Linux SystemV/Coherent-filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= ialloc.o balloc.o inode.o file.o dir.o symlink.o namei.o \ + fsync.o truncate.o + +sysv.o: $(OBJS) + $(LD) -r -o sysv.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/sysv/README b/fs/sysv/README new file mode 100644 index 000000000..d318eb64b --- /dev/null +++ b/fs/sysv/README @@ -0,0 +1,37 @@ +This is the implementation of the SystemV/Coherent filesystem for Linux. +It implements all of + - Xenix FS, + - SystemV/386 FS, + - Coherent FS. + +This is version beta 4. + +To install: +* Answer the 'System V and Coherent filesystem support' question with 'y' + when configuring the kernel. +* To mount a disk or a partition, use + mount [-r] -t sysv device mountpoint + The file system type names + -t sysv + -t xenix + -t coherent + may be used interchangeably, but the last two will eventually disappear. + +Bugs in the present implementation: +- Coherent FS: + - The "free list interleave" n:m is currently ignored. + - Only file systems with no filesystem name and no pack name are recognized. + (See Coherent "man mkfs" for a description of these features.) +- SystemV Release 2 FS: + The superblock is only searched in the blocks 9, 15, 18, which corresponds to the + beginning of track 1 on floppy disks. No support for this FS on hard disk yet. + + +Please report any bugs and suggestions to + Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhde.de> or + Pascal Haible <haible@izfm.uni-stuttgart.de> . + + +Bruno Haible +<haible@ma2s2.mathematik.uni-karlsruhe.de> + diff --git a/fs/sysv/balloc.c b/fs/sysv/balloc.c new file mode 100644 index 000000000..f0fb850be --- /dev/null +++ b/fs/sysv/balloc.c @@ -0,0 +1,329 @@ +/* + * linux/fs/sysv/balloc.c + * + * minix/bitmap.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext/freelists.c + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * xenix/alloc.c + * Copyright (C) 1992 Doug Evans + * + * coh/alloc.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/balloc.c + * Copyright (C) 1993 Bruno Haible + * + * This file contains code for allocating/freeing blocks. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/string.h> +#include <linux/locks.h> + +/* We don't trust the value of + sb->sv_sbd2->s_tfree = *sb->sv_sb_total_free_blocks + but we nevertheless keep it up to date. */ + +void sysv_free_block(struct super_block * sb, unsigned int block) +{ + struct buffer_head * bh; + char * bh_data; + + if (!sb) { + printk("sysv_free_block: trying to free block on nonexistent device\n"); + return; + } + if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) { + printk("sysv_free_block: trying to free block not in datazone\n"); + return; + } + lock_super(sb); + if (*sb->sv_sb_flc_count > sb->sv_flc_size) { + printk("sysv_free_block: flc_count > flc_size\n"); + unlock_super(sb); + return; + } + /* If the free list head in super-block is full, it is copied + * into this block being freed: + */ + if (*sb->sv_sb_flc_count == sb->sv_flc_size) { + unsigned short * flc_count; + unsigned long * flc_blocks; + + bh = sv_getblk(sb, sb->s_dev, block); + if (!bh) { + printk("sysv_free_block: getblk() failed\n"); + unlock_super(sb); + return; + } + bh_data = bh->b_data; + switch (sb->sv_type) { + case FSTYPE_XENIX: + flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV4: + flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV2: + flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_COH: + flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0]; + break; + default: panic("sysv_free_block: invalid fs type\n"); + } + *flc_count = *sb->sv_sb_flc_count; /* = sb->sv_flc_size */ + memcpy(flc_blocks, sb->sv_sb_flc_blocks, *flc_count * sizeof(sysv_zone_t)); + mark_buffer_dirty(bh, 1); + bh->b_uptodate = 1; + brelse(bh); + *sb->sv_sb_flc_count = 0; + } else + /* If the free list head in super-block is empty, create a new head + * in this block being freed: + */ + if (*sb->sv_sb_flc_count == 0) { /* Applies only to Coherent FS */ + bh = sv_getblk(sb, sb->s_dev, block); + if (!bh) { + printk("sysv_free_block: getblk() failed\n"); + unlock_super(sb); + return; + } + memset(bh->b_data, 0, sb->sv_block_size); + /* this implies ((struct ..._freelist_chunk *) bh->b_data)->flc_count = 0; */ + mark_buffer_dirty(bh, 1); + bh->b_uptodate = 1; + brelse(bh); + /* still *sb->sv_sb_flc_count = 0 */ + } else { + /* Throw away block's contents */ + bh = sv_get_hash_table(sb, sb->s_dev, block); + if (bh) + bh->b_dirt = 0; + brelse(bh); + } + if (sb->sv_convert) + block = to_coh_ulong(block); + sb->sv_sb_flc_blocks[(*sb->sv_sb_flc_count)++] = block; + if (sb->sv_convert) + *sb->sv_sb_total_free_blocks = + to_coh_ulong(from_coh_ulong(*sb->sv_sb_total_free_blocks) + 1); + else + *sb->sv_sb_total_free_blocks = *sb->sv_sb_total_free_blocks + 1; + mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ + if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); + sb->s_dirt = 1; /* and needs time stamp */ + unlock_super(sb); +} + +int sysv_new_block(struct super_block * sb) +{ + unsigned int block; + struct buffer_head * bh; + char * bh_data; + + if (!sb) { + printk("sysv_new_block: trying to get new block from nonexistent device\n"); + return 0; + } + lock_super(sb); + if (*sb->sv_sb_flc_count == 0) { /* Applies only to Coherent FS */ + unlock_super(sb); + return 0; /* no blocks available */ + } + block = sb->sv_sb_flc_blocks[(*sb->sv_sb_flc_count)-1]; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (block == 0) { /* Applies only to Xenix FS, SystemV FS */ + unlock_super(sb); + return 0; /* no blocks available */ + } + (*sb->sv_sb_flc_count)--; + if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) { + printk("sysv_new_block: new block %d is not in data zone\n",block); + unlock_super(sb); + return 0; + } + if (*sb->sv_sb_flc_count == 0) { /* the last block continues the free list */ + unsigned short * flc_count; + unsigned long * flc_blocks; + + if (!(bh = sv_bread(sb, sb->s_dev, block))) { + printk("sysv_new_block: cannot read free-list block\n"); + /* retry this same block next time */ + (*sb->sv_sb_flc_count)++; + unlock_super(sb); + return 0; + } + bh_data = bh->b_data; + switch (sb->sv_type) { + case FSTYPE_XENIX: + flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV4: + flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV2: + flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_COH: + flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0]; + break; + default: panic("sysv_new_block: invalid fs type\n"); + } + if (*flc_count > sb->sv_flc_size) { + printk("sysv_new_block: free-list block with >flc_size entries\n"); + brelse(bh); + unlock_super(sb); + return 0; + } + *sb->sv_sb_flc_count = *flc_count; + memcpy(sb->sv_sb_flc_blocks, flc_blocks, *flc_count * sizeof(sysv_zone_t)); + brelse(bh); + } + /* Now the free list head in the superblock is valid again. */ + bh = sv_getblk(sb, sb->s_dev, block); + if (!bh) { + printk("sysv_new_block: getblk() failed\n"); + unlock_super(sb); + return 0; + } + if (bh->b_count != 1) { + printk("sysv_new_block: block already in use\n"); + unlock_super(sb); + return 0; + } + memset(bh->b_data, 0, sb->sv_block_size); + mark_buffer_dirty(bh, 1); + bh->b_uptodate = 1; + brelse(bh); + if (sb->sv_convert) + *sb->sv_sb_total_free_blocks = + to_coh_ulong(from_coh_ulong(*sb->sv_sb_total_free_blocks) - 1); + else + *sb->sv_sb_total_free_blocks = *sb->sv_sb_total_free_blocks - 1; + mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ + if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); + sb->s_dirt = 1; /* and needs time stamp */ + unlock_super(sb); + return block; +} + +unsigned long sysv_count_free_blocks(struct super_block * sb) +{ +#if 1 /* test */ + int count, old_count; + unsigned int block; + struct buffer_head * bh; + char * bh_data; + int i; + + /* this causes a lot of disk traffic ... */ + count = 0; + lock_super(sb); + if (*sb->sv_sb_flc_count > 0) { + for (i = *sb->sv_sb_flc_count ; /* i > 0 */ ; ) { + block = sb->sv_sb_flc_blocks[--i]; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (block == 0) /* block 0 terminates list */ + goto done; + count++; + if (i == 0) + break; + } + /* block = sb->sv_sb_flc_blocks[0], the last block continues the free list */ + while (1) { + unsigned short * flc_count; + unsigned long * flc_blocks; + + if (block < sb->sv_firstdatazone || block >= sb->sv_nzones) { + printk("sysv_count_free_blocks: new block %d is not in data zone\n",block); + break; + } + if (!(bh = sv_bread(sb, sb->s_dev, block))) { + printk("sysv_count_free_blocks: cannot read free-list block\n"); + break; + } + bh_data = bh->b_data; + switch (sb->sv_type) { + case FSTYPE_XENIX: + flc_count = &((struct xenix_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct xenix_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV4: + flc_count = &((struct sysv4_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv4_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_SYSV2: + flc_count = &((struct sysv2_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct sysv2_freelist_chunk *) bh_data)->fl_free[0]; + break; + case FSTYPE_COH: + flc_count = &((struct coh_freelist_chunk *) bh_data)->fl_nfree; + flc_blocks = &((struct coh_freelist_chunk *) bh_data)->fl_free[0]; + break; + default: panic("sysv_count_free_blocks: invalid fs type\n"); + } + if (*flc_count > sb->sv_flc_size) { + printk("sysv_count_free_blocks: free-list block with >flc_size entries\n"); + brelse(bh); + break; + } + if (*flc_count == 0) { /* Applies only to Coherent FS */ + brelse(bh); + break; + } + for (i = *flc_count ; /* i > 0 */ ; ) { + block = flc_blocks[--i]; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (block == 0) /* block 0 terminates list */ + break; + count++; + if (i == 0) + break; + } + /* block = flc_blocks[0], the last block continues the free list */ + brelse(bh); + if (block == 0) /* Applies only to Xenix FS and SystemV FS */ + break; + } + done: ; + } + old_count = *sb->sv_sb_total_free_blocks; + if (sb->sv_convert) + old_count = from_coh_ulong(old_count); + if (count != old_count) { + printk("sysv_count_free_blocks: free block count was %d, correcting to %d\n",old_count,count); + if (!(sb->s_flags & MS_RDONLY)) { + *sb->sv_sb_total_free_blocks = (sb->sv_convert ? to_coh_ulong(count) : count); + mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified */ + sb->s_dirt = 1; /* and needs time stamp */ + } + } + unlock_super(sb); + return count; +#else + int count; + + count = *sb->sv_sb_total_free_blocks; + if (sb->sv_convert) + count = from_coh_ulong(count); + return count; +#endif +} + diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c new file mode 100644 index 000000000..a4b019228 --- /dev/null +++ b/fs/sysv/dir.c @@ -0,0 +1,144 @@ +/* + * linux/fs/sysv/dir.c + * + * minix/dir.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * coh/dir.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/dir.c + * Copyright (C) 1993 Bruno Haible + * + * SystemV/Coherent directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/stat.h> + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int sysv_dir_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + return -EISDIR; +} + +static int sysv_readdir(struct inode *, struct file *, struct dirent *, int); + +static struct file_operations sysv_dir_operations = { + NULL, /* lseek - default */ + sysv_dir_read, /* read */ + NULL, /* write - bad */ + sysv_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync /* default fsync */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations sysv_dir_inode_operations = { + &sysv_dir_operations, /* default directory file-ops */ + sysv_create, /* create */ + sysv_lookup, /* lookup */ + sysv_link, /* link */ + sysv_unlink, /* unlink */ + sysv_symlink, /* symlink */ + sysv_mkdir, /* mkdir */ + sysv_rmdir, /* rmdir */ + sysv_mknod, /* mknod */ + sysv_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + sysv_truncate, /* truncate */ + NULL /* permission */ +}; + +static int sysv_readdir1 (struct inode * inode, struct file * filp, + struct dirent * dirent) +{ + struct super_block * sb; + unsigned int offset,i; + char c; + struct buffer_head * bh; + char* bh_data; + struct sysv_dir_entry * de; + + if (!inode || !(sb = inode->i_sb) || !S_ISDIR(inode->i_mode)) + return -EBADF; + if ((unsigned long)(filp->f_pos) % SYSV_DIRSIZE) + return -EBADF; + while (filp->f_pos < inode->i_size) { + offset = filp->f_pos & sb->sv_block_size_1; + bh = sysv_file_bread(inode, filp->f_pos >> sb->sv_block_size_bits, 0); + if (!bh) { + filp->f_pos += sb->sv_block_size - offset; + continue; + } + bh_data = bh->b_data; + while (offset < sb->sv_block_size && filp->f_pos < inode->i_size) { + de = (struct sysv_dir_entry *) (offset + bh_data); + offset += SYSV_DIRSIZE; + filp->f_pos += SYSV_DIRSIZE; + if (de->inode) { + struct sysv_dir_entry sde; + + /* Copy the directory entry first, because the directory + * might be modified while we sleep in put_fs_byte... + */ + memcpy(&sde, de, sizeof(struct sysv_dir_entry)); + + for (i = 0; i < SYSV_NAMELEN; i++) + if ((c = sde.name[i]) != 0) + put_fs_byte(c,i+dirent->d_name); + else + break; + if (i) { + if (sde.inode > inode->i_sb->sv_ninodes) + printk("sysv_readdir: Bad inode number on dev 0x%04x, ino %ld, offset 0x%04lx: %d is out of range\n", + inode->i_dev, inode->i_ino, (off_t) filp->f_pos - SYSV_DIRSIZE, sde.inode); + put_fs_long(sde.inode,&dirent->d_ino); + put_fs_byte(0,i+dirent->d_name); + put_fs_word(i,&dirent->d_reclen); + brelse(bh); + return ROUND_UP(NAME_OFFSET(dirent)+i+1); + } + } + } + brelse(bh); + } + return 0; +} + +static int sysv_readdir(struct inode * inode, struct file * filp, + struct dirent * dirent, int count) +{ + int retval, stored; + + /* compatibility */ + if (count==1) + return sysv_readdir1(inode,filp,dirent); + + stored = 0; + while (count >= sizeof(struct dirent)) { + retval = sysv_readdir1(inode,filp,dirent); + if (retval < 0) + return retval; + if (!retval) + return stored; + dirent = (struct dirent *)((char *) dirent + retval); + stored += retval; + count -= retval; + } + return stored; +} diff --git a/fs/sysv/file.c b/fs/sysv/file.c new file mode 100644 index 000000000..27f82d51a --- /dev/null +++ b/fs/sysv/file.c @@ -0,0 +1,263 @@ +/* + * linux/fs/sysv/file.c + * + * minix/file.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * coh/file.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/file.c + * Copyright (C) 1993 Bruno Haible + * + * SystemV/Coherent regular file handling primitives + */ + +#include <asm/segment.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#include <linux/fs.h> +#include <linux/sysv_fs.h> + +static int sysv_file_write(struct inode *, struct file *, char *, int); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the coh filesystem. + */ +static struct file_operations sysv_file_operations = { + NULL, /* lseek - default */ + sysv_file_read, /* read */ + sysv_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + sysv_sync_file /* fsync */ +}; + +struct inode_operations sysv_file_inode_operations = { + &sysv_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + sysv_bmap, /* bmap */ + sysv_truncate, /* truncate */ + NULL /* permission */ +}; + +int sysv_file_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + struct super_block * sb = inode->i_sb; + int read,left,chars; + unsigned int block; + int blocks, offset; + int bhrequest, uptodate; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + unsigned int size; + + if (!inode) { + printk("sysv_file_read: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("sysv_file_read: mode = %07o\n",inode->i_mode); + return -EINVAL; + } + offset = filp->f_pos; + size = inode->i_size; + if (offset > size) + left = 0; + else + left = size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + block = offset >> sb->sv_block_size_bits; + offset &= sb->sv_block_size_1; + size = (size + sb->sv_block_size_1) >> sb->sv_block_size_bits; + blocks = (left + offset + sb->sv_block_size_1) >> sb->sv_block_size_bits; + bhb = bhe = buflist; + if (filp->f_reada) { + blocks += read_ahead[MAJOR(inode->i_dev)] >> (sb->sv_block_size_bits - 9); + if (block + blocks > size) + blocks = size - block; + } + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. + */ + + do { + bhrequest = 0; + uptodate = 1; + while (blocks) { + --blocks; + *bhb = sysv_getblk(inode, block++, 0); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if (uptodate) + break; + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) + ll_rw_block(READ, bhrequest, bhreq); + + do { /* Finish off all I/O that has actually completed */ + if (*bhe) { + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < sb->sv_block_size - offset) + chars = left; + else + chars = sb->sv_block_size - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-- > 0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + if (!read) + return -EIO; + filp->f_reada = 1; + if (!IS_RDONLY(inode)) + inode->i_atime = CURRENT_TIME; + return read; +} + +static int sysv_file_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + struct super_block * sb = inode->i_sb; + off_t pos; + int written,c; + struct buffer_head * bh; + char * p; + + if (!inode) { + printk("sysv_file_write: inode = NULL\n"); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("sysv_file_write: mode = %07o\n",inode->i_mode); + return -EINVAL; + } +/* + * ok, append may not work when many processes are writing at the same time + * but so what. That way leads to madness anyway. + * But we need to protect against simultaneous truncate as we may end up + * writing our data into blocks that have meanwhile been incorporated into + * the freelist, thereby trashing the freelist. + */ + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else + pos = filp->f_pos; + written = 0; + while (written<count) { + bh = sysv_getblk (inode, pos >> sb->sv_block_size_bits, 1); + if (!bh) { + if (!written) + written = -ENOSPC; + break; + } + c = sb->sv_block_size - (pos & sb->sv_block_size_1); + if (c > count-written) + c = count-written; + if (c != sb->sv_block_size && !bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + if (!written) + written = -EIO; + break; + } + } + /* now either c==sb->sv_block_size or bh->b_uptodate */ + p = (pos & sb->sv_block_size_1) + bh->b_data; + pos += c; + if (pos > inode->i_size) { + inode->i_size = pos; + inode->i_dirt = 1; + } + written += c; + memcpy_fromfs(p,buf,c); + buf += c; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + brelse(bh); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + filp->f_pos = pos; + inode->i_dirt = 1; + return written; +} diff --git a/fs/sysv/fsync.c b/fs/sysv/fsync.c new file mode 100644 index 000000000..9e105077d --- /dev/null +++ b/fs/sysv/fsync.c @@ -0,0 +1,197 @@ +/* + * linux/fs/sysv/fsync.c + * + * minix/fsync.c + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * + * coh/fsync.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/fsync.c + * Copyright (C) 1993 Bruno Haible + * + * SystemV/Coherent fsync primitive + */ + +#include <linux/errno.h> +#include <linux/stat.h> + +#include <linux/fs.h> +#include <linux/sysv_fs.h> + + +/* return values: 0 means OK/done, 1 means redo, -1 means I/O error. */ + +/* Sync one block. The block number is + * from_coh_ulong(*blockp) if convert=1, *blockp if convert=0. + */ +static int sync_block (struct inode * inode, unsigned long * blockp, int convert, int wait) +{ + struct buffer_head * bh; + unsigned long tmp, block; + struct super_block * sb; + + block = tmp = *blockp; + if (convert) + block = from_coh_ulong(block); + if (!block) + return 0; + sb = inode->i_sb; + bh = sv_get_hash_table(sb, inode->i_dev, block); + if (!bh) + return 0; + if (*blockp != tmp) { + brelse (bh); + return 1; + } + if (wait && bh->b_req && !bh->b_uptodate) { + brelse(bh); + return -1; + } + if (wait || !bh->b_uptodate || !bh->b_dirt) { + brelse(bh); + return 0; + } + ll_rw_block(WRITE, 1, &bh); + bh->b_count--; + return 0; +} + +/* Sync one block full of indirect pointers and read it because we'll need it. */ +static int sync_iblock (struct inode * inode, unsigned long * iblockp, int convert, + struct buffer_head * *bh, int wait) +{ + int rc; + unsigned long tmp, block; + + *bh = NULL; + block = tmp = *iblockp; + if (convert) + block = from_coh_ulong(block); + if (!block) + return 0; + rc = sync_block (inode, iblockp, convert, wait); + if (rc) + return rc; + *bh = sv_bread(inode->i_sb, inode->i_dev, block); + if (tmp != *iblockp) { + brelse(*bh); + *bh = NULL; + return 1; + } + if (!*bh) + return -1; + return 0; +} + + +static int sync_direct(struct inode *inode, int wait) +{ + int i; + int rc, err = 0; + + for (i = 0; i < 10; i++) { + rc = sync_block (inode, inode->u.sysv_i.i_data + i, 0, wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + return err; +} + +static int sync_indirect(struct inode *inode, unsigned long *iblockp, int convert, int wait) +{ + int i; + struct buffer_head * ind_bh; + int rc, err = 0; + struct super_block * sb; + + rc = sync_iblock (inode, iblockp, convert, &ind_bh, wait); + if (rc || !ind_bh) + return rc; + + sb = inode->i_sb; + for (i = 0; i < sb->sv_ind_per_block; i++) { + rc = sync_block (inode, + ((unsigned long *) ind_bh->b_data) + i, sb->sv_convert, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(ind_bh); + return err; +} + +static int sync_dindirect(struct inode *inode, unsigned long *diblockp, int convert, + int wait) +{ + int i; + struct buffer_head * dind_bh; + int rc, err = 0; + struct super_block * sb; + + rc = sync_iblock (inode, diblockp, convert, &dind_bh, wait); + if (rc || !dind_bh) + return rc; + + sb = inode->i_sb; + for (i = 0; i < sb->sv_ind_per_block; i++) { + rc = sync_indirect (inode, + ((unsigned long *) dind_bh->b_data) + i, sb->sv_convert, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(dind_bh); + return err; +} + +static int sync_tindirect(struct inode *inode, unsigned long *tiblockp, int convert, + int wait) +{ + int i; + struct buffer_head * tind_bh; + int rc, err = 0; + struct super_block * sb; + + rc = sync_iblock (inode, tiblockp, convert, &tind_bh, wait); + if (rc || !tind_bh) + return rc; + + sb = inode->i_sb; + for (i = 0; i < sb->sv_ind_per_block; i++) { + rc = sync_dindirect (inode, + ((unsigned long *) tind_bh->b_data) + i, sb->sv_convert, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(tind_bh); + return err; +} + +int sysv_sync_file(struct inode * inode, struct file * file) +{ + int wait, err = 0; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + + for (wait=0; wait<=1; wait++) { + err |= sync_direct(inode, wait); + err |= sync_indirect(inode, inode->u.sysv_i.i_data+10, 0, wait); + err |= sync_dindirect(inode, inode->u.sysv_i.i_data+11, 0, wait); + err |= sync_tindirect(inode, inode->u.sysv_i.i_data+12, 0, wait); + } + err |= sysv_sync_inode (inode); + return (err < 0) ? -EIO : 0; +} diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c new file mode 100644 index 000000000..f87009100 --- /dev/null +++ b/fs/sysv/ialloc.c @@ -0,0 +1,218 @@ +/* + * linux/fs/sysv/ialloc.c + * + * minix/bitmap.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext/freelists.c + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * + * xenix/alloc.c + * Copyright (C) 1992 Doug Evans + * + * coh/alloc.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/ialloc.c + * Copyright (C) 1993 Bruno Haible + * + * This file contains code for allocating/freeing inodes. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/stddef.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +/* We don't trust the value of + sb->sv_sbd2->s_tinode = *sb->sv_sb_total_free_inodes + but we nevertheless keep it up to date. */ + +/* An inode on disk is considered free if both i_mode == 0 and i_nlink == 0. */ + +/* return &sb->sv_sb_fic_inodes[i] = &sbd->s_inode[i]; */ +static inline sysv_ino_t * sv_sb_fic_inode (struct super_block * sb, unsigned int i) +{ + if (sb->sv_bh1 == sb->sv_bh2) + return &sb->sv_sb_fic_inodes[i]; + else { + /* 512 byte Xenix FS */ + unsigned int offset = offsetof(struct xenix_super_block, s_inode[i]); + if (offset < 512) + return (sysv_ino_t*)(sb->sv_sbd1 + offset); + else + return (sysv_ino_t*)(sb->sv_sbd2 + offset); + } +} + +void sysv_free_inode(struct inode * inode) +{ + struct super_block * sb; + unsigned int ino; + struct buffer_head * bh; + struct sysv_inode * raw_inode; + + if (!inode) + return; + if (!inode->i_dev) { + printk("sysv_free_inode: inode has no device\n"); + return; + } + if (inode->i_count != 1) { + printk("sysv_free_inode: inode has count=%d\n", inode->i_count); + return; + } + if (inode->i_nlink) { + printk("sysv_free_inode: inode has nlink=%d\n", inode->i_nlink); + return; + } + if (!(sb = inode->i_sb)) { + printk("sysv_free_inode: inode on nonexistent device\n"); + return; + } + ino = inode->i_ino; + if (ino <= SYSV_ROOT_INO || ino > sb->sv_ninodes) { + printk("sysv_free_inode: inode 0,1,2 or nonexistent inode\n"); + return; + } + if (!(bh = sv_bread(sb, inode->i_dev, sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits)))) { + printk("sysv_free_inode: unable to read inode block on device %d/%d\n",MAJOR(inode->i_dev),MINOR(inode->i_dev)); + clear_inode(inode); + return; + } + raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1); + lock_super(sb); + if (*sb->sv_sb_fic_count < sb->sv_fic_size) + *sv_sb_fic_inode(sb,(*sb->sv_sb_fic_count)++) = ino; + (*sb->sv_sb_total_free_inodes)++; + mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ + if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); + sb->s_dirt = 1; /* and needs time stamp */ + memset(raw_inode, 0, sizeof(struct sysv_inode)); + mark_buffer_dirty(bh, 1); + unlock_super(sb); + brelse(bh); + clear_inode(inode); +} + +struct inode * sysv_new_inode(const struct inode * dir) +{ + struct inode * inode; + struct super_block * sb; + struct buffer_head * bh; + struct sysv_inode * raw_inode; + int i,j,ino,block; + + if (!dir || !(inode = get_empty_inode())) + return NULL; + sb = dir->i_sb; + inode->i_sb = sb; + inode->i_flags = inode->i_sb->s_flags; + lock_super(sb); /* protect against task switches */ + if ((*sb->sv_sb_fic_count == 0) + || (*sv_sb_fic_inode(sb,(*sb->sv_sb_fic_count)-1) == 0) /* Applies only to SystemV2 FS */ + ) { + /* Rebuild cache of free inodes: */ + /* i : index into cache slot being filled */ + /* ino : inode we are trying */ + /* block : firstinodezone + (ino-1)/inodes_per_block */ + /* j : (ino-1)%inodes_per_block */ + /* bh : buffer for block */ + /* raw_inode : pointer to inode ino in the block */ + for (i = 0, ino = SYSV_ROOT_INO+1, block = sb->sv_firstinodezone, j = SYSV_ROOT_INO ; i < sb->sv_fic_size && block < sb->sv_firstdatazone ; block++, j = 0) { + if (!(bh = sv_bread(sb, sb->s_dev, block))) { + printk("sysv_new_inode: unable to read inode table\n"); + break; /* go with what we've got */ + /* FIXME: Perhaps try the next block? */ + } + raw_inode = (struct sysv_inode *) bh->b_data + j; + for (; j < sb->sv_inodes_per_block && i < sb->sv_fic_size; ino++, j++, raw_inode++) { + if (raw_inode->i_mode == 0 && raw_inode->i_nlink == 0) + *sv_sb_fic_inode(sb,i++) = ino; + } + brelse(bh); + } + if (i == 0) { + iput(inode); + unlock_super(sb); + return NULL; /* no inodes available */ + } + *sb->sv_sb_fic_count = i; + } + /* Now *sb->sv_sb_fic_count > 0. */ + ino = *sv_sb_fic_inode(sb,--(*sb->sv_sb_fic_count)); + mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ + if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); + sb->s_dirt = 1; /* and needs time stamp */ + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_dev = sb->s_dev; + inode->i_uid = current->fsuid; + inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; + inode->i_dirt = 1; + inode->i_ino = ino; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = NULL; + inode->i_blocks = inode->i_blksize = 0; + insert_inode_hash(inode); + /* Change directory entry: */ + inode->i_mode = 0; /* for sysv_write_inode() */ + inode->i_size = 0; /* ditto */ + sysv_write_inode(inode); /* ensure inode not allocated again */ + /* FIXME: caller may call this too. */ + inode->i_dirt = 1; /* cleared by sysv_write_inode() */ + /* That's it. */ + (*sb->sv_sb_total_free_inodes)--; + mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified again */ + sb->s_dirt = 1; /* and needs time stamp again */ + unlock_super(sb); + return inode; +} + +unsigned long sysv_count_free_inodes(struct super_block * sb) +{ +#if 1 /* test */ + struct buffer_head * bh; + struct sysv_inode * raw_inode; + int j,block,count; + + /* this causes a lot of disk traffic ... */ + count = 0; + lock_super(sb); + /* i : index into cache slot being filled */ + /* ino : inode we are trying */ + /* block : firstinodezone + (ino-1)/inodes_per_block */ + /* j : (ino-1)%inodes_per_block */ + /* bh : buffer for block */ + /* raw_inode : pointer to inode ino in the block */ + for (block = sb->sv_firstinodezone, j = SYSV_ROOT_INO ; block < sb->sv_firstdatazone ; block++, j = 0) { + if (!(bh = sv_bread(sb, sb->s_dev, block))) { + printk("sysv_count_free_inodes: unable to read inode table\n"); + break; /* go with what we've got */ + /* FIXME: Perhaps try the next block? */ + } + raw_inode = (struct sysv_inode *) bh->b_data + j; + for (; j < sb->sv_inodes_per_block ; j++, raw_inode++) + if (raw_inode->i_mode == 0 && raw_inode->i_nlink == 0) + count++; + brelse(bh); + } + if (count != *sb->sv_sb_total_free_inodes) { + printk("sysv_count_free_inodes: free inode count was %d, correcting to %d\n",(short)(*sb->sv_sb_total_free_inodes),count); + if (!(sb->s_flags & MS_RDONLY)) { + *sb->sv_sb_total_free_inodes = count; + mark_buffer_dirty(sb->sv_bh2, 1); /* super-block has been modified */ + sb->s_dirt = 1; /* and needs time stamp */ + } + } + unlock_super(sb); + return count; +#else + return *sb->sv_sb_total_free_inodes; +#endif +} + diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c new file mode 100644 index 000000000..0b61ae7c7 --- /dev/null +++ b/fs/sysv/inode.c @@ -0,0 +1,951 @@ +/* + * linux/fs/sysv/inode.c + * + * minix/inode.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * xenix/inode.c + * Copyright (C) 1992 Doug Evans + * + * coh/inode.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/inode.c + * Copyright (C) 1993 Paul B. Monday + * + * sysv/inode.c + * Copyright (C) 1993 Bruno Haible + * + * This file contains code for allocating/freeing inodes and for read/writing + * the superblock. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> + +#include <asm/segment.h> + +void sysv_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; + sysv_truncate(inode); + sysv_free_inode(inode); +} + + +static struct super_operations sysv_sops = { + sysv_read_inode, + sysv_notify_change, + sysv_write_inode, + sysv_put_inode, + sysv_put_super, + sysv_write_super, + sysv_statfs, + NULL +}; + +/* The following functions try to recognize specific filesystems. + * We recognize: + * - Xenix FS by its magic number. + * - SystemV FS by its magic number. + * - Coherent FS by its funny fname/fpack field. + * We discriminate among SystemV4 and SystemV2 FS by the assumption that + * the time stamp is not < 01-01-1980. + */ + +static void detected_bs512 (struct super_block *sb) +{ + sb->sv_block_size = 512; + sb->sv_block_size_1 = 512-1; + sb->sv_block_size_bits = 9; + sb->sv_block_size_ratio = 2; + sb->sv_block_size_ratio_bits = 1; + sb->sv_inodes_per_block = 512/64; + sb->sv_inodes_per_block_1 = 512/64-1; + sb->sv_inodes_per_block_bits = 9-6; + sb->sv_toobig_block = 10 + + (sb->sv_ind_per_block = 512/4) + + (sb->sv_ind_per_block_2 = (512/4)*(512/4)) + + (sb->sv_ind_per_block_3 = (512/4)*(512/4)*(512/4)); + sb->sv_ind_per_block_1 = 512/4-1; + sb->sv_ind_per_block_2_1 = (512/4)*(512/4)-1; + sb->sv_ind_per_block_2_bits = 2 * + (sb->sv_ind_per_block_bits = 9-2); + sb->sv_ind_per_block_block_size_1 = (512/4)*512-1; + sb->sv_ind_per_block_block_size_bits = (9-2)+9; + sb->sv_ind_per_block_2_block_size_1 = (512/4)*(512/4)*512-1; + sb->sv_ind_per_block_2_block_size_bits = (9-2)+(9-2)+9; + sb->sv_ind0_size = 10 * 512; + sb->sv_ind1_size = (10 + (512/4))* 512; + sb->sv_ind2_size = (10 + (512/4) + (512/4)*(512/4)) * 512; +} + +static void detected_bs1024 (struct super_block *sb) +{ + sb->sv_block_size = 1024; + sb->sv_block_size_1 = 1024-1; + sb->sv_block_size_bits = 10; + sb->sv_block_size_ratio = 1; + sb->sv_block_size_ratio_bits = 0; + sb->sv_inodes_per_block = 1024/64; + sb->sv_inodes_per_block_1 = 1024/64-1; + sb->sv_inodes_per_block_bits = 10-6; + sb->sv_toobig_block = 10 + + (sb->sv_ind_per_block = 1024/4) + + (sb->sv_ind_per_block_2 = (1024/4)*(1024/4)) + + (sb->sv_ind_per_block_3 = (1024/4)*(1024/4)*(1024/4)); + sb->sv_ind_per_block_1 = 1024/4-1; + sb->sv_ind_per_block_2_1 = (1024/4)*(1024/4)-1; + sb->sv_ind_per_block_2_bits = 2 * + (sb->sv_ind_per_block_bits = 10-2); + sb->sv_ind_per_block_block_size_1 = (1024/4)*1024-1; + sb->sv_ind_per_block_block_size_bits = (10-2)+10; + sb->sv_ind_per_block_2_block_size_1 = (1024/4)*(1024/4)*1024-1; + sb->sv_ind_per_block_2_block_size_bits = (10-2)+(10-2)+10; + sb->sv_ind0_size = 10 * 1024; + sb->sv_ind1_size = (10 + (1024/4))* 1024; + sb->sv_ind2_size = (10 + (1024/4) + (1024/4)*(1024/4)) * 1024; +} + +static const char* detect_xenix (struct super_block *sb, struct buffer_head *bh) +{ + struct xenix_super_block * sbd; + + sbd = (struct xenix_super_block *) bh->b_data; + if (sbd->s_magic != 0x2b5544) + return NULL; + switch (sbd->s_type) { + case 1: detected_bs512(sb); break; + case 2: detected_bs1024(sb); break; + default: return NULL; + } + sb->sv_type = FSTYPE_XENIX; + return "Xenix"; +} +static struct super_block * detected_xenix (struct super_block *sb, struct buffer_head *bh1, struct buffer_head *bh2) +{ + struct xenix_super_block * sbd1; + struct xenix_super_block * sbd2; + + if (sb->sv_block_size == BLOCK_SIZE) + /* block size = 1024, so bh1 = bh2 */ + sbd1 = sbd2 = (struct xenix_super_block *) bh1->b_data; + else { + /* block size = 512, so bh1 != bh2 */ + sbd1 = (struct xenix_super_block *) bh1->b_data; + sbd2 = (struct xenix_super_block *) (bh2->b_data - BLOCK_SIZE/2); + /* sanity check */ + if (sbd2->s_magic != 0x2b5544) + return NULL; + } + + sb->sv_convert = 0; + sb->sv_kludge_symlinks = 1; + sb->sv_truncate = 1; + sb->sv_link_max = XENIX_LINK_MAX; + sb->sv_fic_size = XENIX_NICINOD; + sb->sv_flc_size = XENIX_NICFREE; + sb->sv_bh1 = bh1; + sb->sv_bh2 = bh2; + sb->sv_sbd1 = (char *) sbd1; + sb->sv_sbd2 = (char *) sbd2; + sb->sv_sb_fic_count = &sbd1->s_ninode; + sb->sv_sb_fic_inodes = &sbd1->s_inode[0]; + sb->sv_sb_total_free_inodes = &sbd2->s_tinode; + sb->sv_sb_flc_count = &sbd1->s_nfree; + sb->sv_sb_flc_blocks = &sbd1->s_free[0]; + sb->sv_sb_total_free_blocks = &sbd2->s_tfree; + sb->sv_sb_time = &sbd2->s_time; + sb->sv_block_base = 0; + sb->sv_firstinodezone = 2; + sb->sv_firstdatazone = sbd1->s_isize; + sb->sv_nzones = sbd1->s_fsize; + sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone; + return sb; +} + +static const char* detect_sysv4 (struct super_block *sb, struct buffer_head *bh) +{ + struct sysv4_super_block * sbd; + + sbd = (struct sysv4_super_block *) (bh->b_data + BLOCK_SIZE/2); + if (sbd->s_magic != 0xfd187e20) + return NULL; + if (sbd->s_time < 315532800) /* this is likely to happen on SystemV2 FS */ + return NULL; + switch (sbd->s_type) { + case 1: detected_bs512(sb); break; + case 2: detected_bs1024(sb); break; + default: return NULL; + } + sb->sv_type = FSTYPE_SYSV4; + return "SystemV"; +} +static struct super_block * detected_sysv4 (struct super_block *sb, struct buffer_head *bh) +{ + struct sysv4_super_block * sbd; + + if (sb->sv_block_size == BLOCK_SIZE) + sbd = (struct sysv4_super_block *) (bh->b_data + BLOCK_SIZE/2); + else { + sbd = (struct sysv4_super_block *) bh->b_data; + /* sanity check */ + if (sbd->s_magic != 0xfd187e20) + return NULL; + if (sbd->s_time < 315532800) + return NULL; + } + + sb->sv_convert = 0; + sb->sv_kludge_symlinks = 0; /* ?? */ + sb->sv_truncate = 1; + sb->sv_link_max = SYSV_LINK_MAX; + sb->sv_fic_size = SYSV_NICINOD; + sb->sv_flc_size = SYSV_NICFREE; + sb->sv_bh1 = bh; + sb->sv_bh2 = bh; + sb->sv_sbd1 = (char *) sbd; + sb->sv_sbd2 = (char *) sbd; + sb->sv_sb_fic_count = &sbd->s_ninode; + sb->sv_sb_fic_inodes = &sbd->s_inode[0]; + sb->sv_sb_total_free_inodes = &sbd->s_tinode; + sb->sv_sb_flc_count = &sbd->s_nfree; + sb->sv_sb_flc_blocks = &sbd->s_free[0]; + sb->sv_sb_total_free_blocks = &sbd->s_tfree; + sb->sv_sb_time = &sbd->s_time; + sb->sv_block_base = 0; + sb->sv_firstinodezone = 2; + sb->sv_firstdatazone = sbd->s_isize; + sb->sv_nzones = sbd->s_fsize; + sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone; + return sb; +} + +static const char* detect_sysv2 (struct super_block *sb, struct buffer_head *bh) +{ + struct sysv2_super_block * sbd; + + sbd = (struct sysv2_super_block *) (bh->b_data + BLOCK_SIZE/2); + if (sbd->s_magic != 0xfd187e20) + return NULL; + if (sbd->s_time < 315532800) /* this is likely to happen on SystemV4 FS */ + return NULL; + switch (sbd->s_type) { + case 1: detected_bs512(sb); break; + case 2: detected_bs1024(sb); break; + default: return NULL; + } + sb->sv_type = FSTYPE_SYSV2; + return "SystemV Release 2"; +} +static struct super_block * detected_sysv2 (struct super_block *sb, struct buffer_head *bh) +{ + struct sysv2_super_block * sbd; + + if (sb->sv_block_size == BLOCK_SIZE) + sbd = (struct sysv2_super_block *) (bh->b_data + BLOCK_SIZE/2); + else { + sbd = (struct sysv2_super_block *) bh->b_data; + /* sanity check */ + if (sbd->s_magic != 0xfd187e20) + return NULL; + if (sbd->s_time < 315532800) + return NULL; + } + + sb->sv_convert = 0; + sb->sv_kludge_symlinks = 0; /* ?? */ + sb->sv_truncate = 1; + sb->sv_link_max = SYSV_LINK_MAX; + sb->sv_fic_size = SYSV_NICINOD; + sb->sv_flc_size = SYSV_NICFREE; + sb->sv_bh1 = bh; + sb->sv_bh2 = bh; + sb->sv_sbd1 = (char *) sbd; + sb->sv_sbd2 = (char *) sbd; + sb->sv_sb_fic_count = &sbd->s_ninode; + sb->sv_sb_fic_inodes = &sbd->s_inode[0]; + sb->sv_sb_total_free_inodes = &sbd->s_tinode; + sb->sv_sb_flc_count = &sbd->s_nfree; + sb->sv_sb_flc_blocks = &sbd->s_free[0]; + sb->sv_sb_total_free_blocks = &sbd->s_tfree; + sb->sv_sb_time = &sbd->s_time; + sb->sv_block_base = 0; + sb->sv_firstinodezone = 2; + sb->sv_firstdatazone = sbd->s_isize; + sb->sv_nzones = sbd->s_fsize; + sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone; + return sb; +} + +static const char* detect_coherent (struct super_block *sb, struct buffer_head *bh) +{ + struct coh_super_block * sbd; + + sbd = (struct coh_super_block *) (bh->b_data + BLOCK_SIZE/2); + if ((memcmp(sbd->s_fname,"noname",6) && memcmp(sbd->s_fname,"xxxxx ",6)) + || (memcmp(sbd->s_fpack,"nopack",6) && memcmp(sbd->s_fpack,"xxxxx\n",6))) + return NULL; + detected_bs512(sb); + sb->sv_type = FSTYPE_COH; + return "Coherent"; +} +static struct super_block * detected_coherent (struct super_block *sb, struct buffer_head *bh) +{ + struct coh_super_block * sbd; + + sbd = (struct coh_super_block *) bh->b_data; + /* sanity check */ + if ((memcmp(sbd->s_fname,"noname",6) && memcmp(sbd->s_fname,"xxxxx ",6)) + || (memcmp(sbd->s_fpack,"nopack",6) && memcmp(sbd->s_fpack,"xxxxx\n",6))) + return NULL; + + sb->sv_convert = 1; + sb->sv_kludge_symlinks = 1; + sb->sv_truncate = 1; + sb->sv_link_max = COH_LINK_MAX; + sb->sv_fic_size = COH_NICINOD; + sb->sv_flc_size = COH_NICFREE; + sb->sv_bh1 = bh; + sb->sv_bh2 = bh; + sb->sv_sbd1 = (char *) sbd; + sb->sv_sbd2 = (char *) sbd; + sb->sv_sb_fic_count = &sbd->s_ninode; + sb->sv_sb_fic_inodes = &sbd->s_inode[0]; + sb->sv_sb_total_free_inodes = &sbd->s_tinode; + sb->sv_sb_flc_count = &sbd->s_nfree; + sb->sv_sb_flc_blocks = &sbd->s_free[0]; + sb->sv_sb_total_free_blocks = &sbd->s_tfree; + sb->sv_sb_time = &sbd->s_time; + sb->sv_block_base = 0; + sb->sv_firstinodezone = 2; + sb->sv_firstdatazone = sbd->s_isize; + sb->sv_nzones = from_coh_ulong(sbd->s_fsize); + sb->sv_ndatazones = sb->sv_nzones - sb->sv_firstdatazone; + return sb; +} + +struct super_block *sysv_read_super(struct super_block *sb,void *data, + int silent) +{ + struct buffer_head *bh; + const char *found; + int dev = sb->s_dev; + + if (1024 != sizeof (struct xenix_super_block)) + panic("Xenix FS: bad super-block size"); + if ((512 != sizeof (struct sysv4_super_block)) + || (512 != sizeof (struct sysv2_super_block))) + panic("SystemV FS: bad super-block size"); + if (500 != sizeof (struct coh_super_block)) + panic("Coherent FS: bad super-block size"); + if (64 != sizeof (struct sysv_inode)) + panic("sysv fs: bad i-node size"); + lock_super(sb); + set_blocksize(dev,BLOCK_SIZE); + + /* Try to read Xenix superblock */ + if ((bh = bread(dev, 1, BLOCK_SIZE)) != NULL) { + if ((found = detect_xenix(sb,bh)) != NULL) + goto ok; + brelse(bh); + } + if ((bh = bread(dev, 0, BLOCK_SIZE)) != NULL) { + /* Try to recognize SystemV superblock */ + if ((found = detect_sysv4(sb,bh)) != NULL) + goto ok; + if ((found = detect_sysv2(sb,bh)) != NULL) + goto ok; + /* Try to recognize Coherent superblock */ + if ((found = detect_coherent(sb,bh)) != NULL) + goto ok; + brelse(bh); + } + /* Try to recognize SystemV superblock */ + /* Offset by 1 track, i.e. most probably 9, 15, or 18 kilobytes. */ + { static int offsets[] = { 9, 15, 18, }; + int i; + for (i = 0; i < sizeof(offsets)/sizeof(offsets[0]); i++) + if ((bh = bread(dev, offsets[i], BLOCK_SIZE)) != NULL) { + /* Try to recognize SystemV superblock */ + if ((found = detect_sysv4(sb,bh)) != NULL) { + sb->sv_block_base = offsets[i] << sb->sv_block_size_ratio_bits; + goto ok; + } + if ((found = detect_sysv2(sb,bh)) != NULL) { + sb->sv_block_base = offsets[i] << sb->sv_block_size_ratio_bits; + goto ok; + } + brelse(bh); + } + } + sb->s_dev=0; + unlock_super(sb); + if (!silent) + printk("VFS: unable to read Xenix/SystemV/Coherent superblock on device %d/%d\n",MAJOR(dev),MINOR(dev)); + return NULL; + + ok: + if (sb->sv_block_size == BLOCK_SIZE) { + switch (sb->sv_type) { + case FSTYPE_XENIX: + if (!detected_xenix(sb,bh,bh)) + goto bad_superblock; + break; + case FSTYPE_SYSV4: + if (!detected_sysv4(sb,bh)) + goto bad_superblock; + break; + case FSTYPE_SYSV2: + if (!detected_sysv2(sb,bh)) + goto bad_superblock; + break; + default: + bad_superblock: + brelse(bh); + sb->s_dev = 0; + unlock_super(sb); + printk("SysV FS: cannot read superblock in 1024 byte mode\n"); + return NULL; + } + } else { + /* Switch to another block size. Unfortunately, we have to + release the 1 KB block bh and read it in two parts again. */ + struct buffer_head *bh1, *bh2; + unsigned long blocknr = bh->b_blocknr << sb->sv_block_size_ratio_bits; + + brelse(bh); + set_blocksize(dev,sb->sv_block_size); + bh1 = NULL; bh2 = NULL; + switch (sb->sv_type) { + case FSTYPE_XENIX: + if ((bh1 = bread(dev, blocknr, sb->sv_block_size)) == NULL) + goto bad_superblock2; + if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL) + goto bad_superblock2; + if (!detected_xenix(sb,bh1,bh2)) + goto bad_superblock2; + break; + case FSTYPE_SYSV4: + if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL) + goto bad_superblock2; + if (!detected_sysv4(sb,bh2)) + goto bad_superblock2; + break; + case FSTYPE_SYSV2: + if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL) + goto bad_superblock2; + if (!detected_sysv2(sb,bh2)) + goto bad_superblock2; + break; + case FSTYPE_COH: + if ((bh2 = bread(dev, blocknr+1, sb->sv_block_size)) == NULL) + goto bad_superblock2; + if (!detected_coherent(sb,bh2)) + goto bad_superblock2; + break; + default: + bad_superblock2: + brelse(bh1); + brelse(bh2); + set_blocksize(sb->s_dev,BLOCK_SIZE); + sb->s_dev = 0; + unlock_super(sb); + printk("SysV FS: cannot read superblock in 512 byte mode\n"); + return NULL; + } + } + sb->sv_ninodes = (sb->sv_firstdatazone - sb->sv_firstinodezone) << sb->sv_inodes_per_block_bits; + if (!silent) + printk("VFS: Found a %s FS (block size = %d) on device %d/%d\n",found,sb->sv_block_size,MAJOR(dev),MINOR(dev)); + sb->s_magic = SYSV_MAGIC_BASE + sb->sv_type; + /* The buffer code now supports block size 512 as well as 1024. */ + sb->s_blocksize = sb->sv_block_size; + sb->s_blocksize_bits = sb->sv_block_size_bits; + /* set up enough so that it can read an inode */ + sb->s_dev = dev; + sb->s_op = &sysv_sops; + sb->s_mounted = iget(sb,SYSV_ROOT_INO); + unlock_super(sb); + if (!sb->s_mounted) { + sysv_put_super(sb); + printk("SysV FS: get root inode failed\n"); + return NULL; + } + sb->s_dirt = 1; + /* brelse(bh); resp. brelse(bh1); brelse(bh2); + occurs when the disk is unmounted. */ + return sb; +} + +/* This is only called on sync() and umount(), when s_dirt=1. */ +void sysv_write_super (struct super_block *sb) +{ + lock_super(sb); + if (sb->sv_bh1->b_dirt || sb->sv_bh2->b_dirt) { + /* If we are going to write out the super block, + then attach current time stamp. */ + unsigned long time = CURRENT_TIME; + if (sb->sv_convert) + time = to_coh_ulong(time); + *sb->sv_sb_time = time; + mark_buffer_dirty(sb->sv_bh2, 1); + } + sb->s_dirt = 0; + unlock_super(sb); +} + +void sysv_put_super(struct super_block *sb) +{ + /* we can assume sysv_write_super() has already been called */ + lock_super(sb); + brelse(sb->sv_bh1); + if (sb->sv_bh1 != sb->sv_bh2) brelse(sb->sv_bh2); + /* switch back to default block size */ + if (sb->s_blocksize != BLOCK_SIZE) + set_blocksize(sb->s_dev,BLOCK_SIZE); + sb->s_dev = 0; + unlock_super(sb); +} + +void sysv_statfs(struct super_block *sb, struct statfs *buf) +{ + long tmp; + + put_fs_long(sb->s_magic, &buf->f_type); /* type of filesystem */ + put_fs_long(sb->sv_block_size, &buf->f_bsize); /* block size */ + put_fs_long(sb->sv_ndatazones, &buf->f_blocks); /* total data blocks in file system */ + tmp = sysv_count_free_blocks(sb); + put_fs_long(tmp, &buf->f_bfree); /* free blocks in fs */ + put_fs_long(tmp, &buf->f_bavail); /* free blocks available to non-superuser */ + put_fs_long(sb->sv_ninodes, &buf->f_files); /* total file nodes in file system */ + put_fs_long(sysv_count_free_inodes(sb), &buf->f_ffree); /* free file nodes in fs */ + put_fs_long(SYSV_NAMELEN, &buf->f_namelen); + /* Don't know what value to put in buf->f_fsid */ /* file system id */ +} + + +/* bmap support for running executables and shared libraries. */ + +static inline int inode_bmap(struct super_block * sb, struct inode * inode, int nr) +{ + int tmp = inode->u.sysv_i.i_data[nr]; + if (!tmp) + return 0; + return tmp + sb->sv_block_base; +} + +static int block_bmap(struct super_block * sb, struct buffer_head * bh, int nr, int convert) +{ + int tmp; + + if (!bh) + return 0; + tmp = ((sysv_zone_t *) bh->b_data) [nr]; + if (convert) + tmp = from_coh_ulong(tmp); + brelse(bh); + if (!tmp) + return 0; + return tmp + sb->sv_block_base; +} + +int sysv_bmap(struct inode * inode,int block_nr) +{ + unsigned int block = block_nr; + struct super_block * sb = inode->i_sb; + int convert; + int i; + struct buffer_head * bh; + + if (block < 10) + return inode_bmap(sb,inode,block); + block -= 10; + convert = sb->sv_convert; + if (block < sb->sv_ind_per_block) { + i = inode_bmap(sb,inode,10); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + return block_bmap(sb, bh, block, convert); + } + block -= sb->sv_ind_per_block; + if (block < sb->sv_ind_per_block_2) { + i = inode_bmap(sb,inode,11); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + i = block_bmap(sb, bh, block >> sb->sv_ind_per_block_bits, convert); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + return block_bmap(sb, bh, block & sb->sv_ind_per_block_1, convert); + } + block -= sb->sv_ind_per_block_2; + if (block < sb->sv_ind_per_block_3) { + i = inode_bmap(sb,inode,12); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + i = block_bmap(sb, bh, block >> sb->sv_ind_per_block_2_bits, convert); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + i = block_bmap(sb, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1,convert); + if (!i) + return 0; + bh = bread(inode->i_dev,i,sb->sv_block_size); + return block_bmap(sb, bh, block & sb->sv_ind_per_block_1, convert); + } + if ((int)block<0) { + printk("sysv_bmap: block<0"); + return 0; + } + printk("sysv_bmap: block>big"); + return 0; +} + +/* End of bmap support. */ + + +/* Access selected blocks of regular files (or directories) */ + +static struct buffer_head * inode_getblk(struct inode * inode, int nr, int create) +{ + struct super_block *sb; + unsigned long tmp; + unsigned long *p; + struct buffer_head * result; + + sb = inode->i_sb; + p = inode->u.sysv_i.i_data + nr; +repeat: + tmp = *p; + if (tmp) { + result = sv_getblk(sb, inode->i_dev, tmp); + if (tmp == *p) + return result; + brelse(result); + goto repeat; + } + if (!create) + return NULL; + tmp = sysv_new_block(sb); + if (!tmp) + return NULL; + result = sv_getblk(sb, inode->i_dev, tmp); + if (*p) { + sysv_free_block(sb,tmp); + brelse(result); + goto repeat; + } + *p = tmp; + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + return result; +} + +static struct buffer_head * block_getblk(struct inode * inode, + struct buffer_head * bh, int nr, int create) +{ + struct super_block *sb; + unsigned long tmp, block; + sysv_zone_t *p; + struct buffer_head * result; + + if (!bh) + return NULL; + if (!bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + return NULL; + } + } + sb = inode->i_sb; + p = nr + (sysv_zone_t *) bh->b_data; +repeat: + block = tmp = *p; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (tmp) { + result = sv_getblk(sb, bh->b_dev, block); + if (tmp == *p) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } + if (!create) { + brelse(bh); + return NULL; + } + block = sysv_new_block(sb); + if (!block) { + brelse(bh); + return NULL; + } + result = sv_getblk(sb, bh->b_dev, block); + if (*p) { + sysv_free_block(sb,block); + brelse(result); + goto repeat; + } + *p = (sb->sv_convert ? to_coh_ulong(block) : block); + mark_buffer_dirty(bh, 1); + brelse(bh); + return result; +} + +struct buffer_head * sysv_getblk(struct inode * inode, unsigned int block, int create) +{ + struct super_block * sb = inode->i_sb; + struct buffer_head * bh; + + if (block < 10) + return inode_getblk(inode,block,create); + block -= 10; + if (block < sb->sv_ind_per_block) { + bh = inode_getblk(inode,10,create); + return block_getblk(inode, bh, block, create); + } + block -= sb->sv_ind_per_block; + if (block < sb->sv_ind_per_block_2) { + bh = inode_getblk(inode,11,create); + bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_bits, create); + return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create); + } + block -= sb->sv_ind_per_block_2; + if (block < sb->sv_ind_per_block_3) { + bh = inode_getblk(inode,12,create); + bh = block_getblk(inode, bh, block >> sb->sv_ind_per_block_2_bits, create); + bh = block_getblk(inode, bh, (block >> sb->sv_ind_per_block_bits) & sb->sv_ind_per_block_1, create); + return block_getblk(inode, bh, block & sb->sv_ind_per_block_1, create); + } + if ((int)block<0) { + printk("sysv_getblk: block<0"); + return NULL; + } + printk("sysv_getblk: block>big"); + return NULL; +} + +struct buffer_head * sysv_file_bread(struct inode * inode, int block, int create) +{ + struct buffer_head * bh; + + bh = sysv_getblk(inode,block,create); + if (!bh || bh->b_uptodate) + return bh; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + + +static inline unsigned long read3byte (char * p) +{ + return (unsigned long)(*(unsigned short *)p) + | (unsigned long)(*(unsigned char *)(p+2)) << 16; +} + +static inline void write3byte (char * p, unsigned long val) +{ + *(unsigned short *)p = (unsigned short) val; + *(unsigned char *)(p+2) = val >> 16; +} + +static inline unsigned long coh_read3byte (char * p) +{ + return (unsigned long)(*(unsigned char *)p) << 16 + | (unsigned long)(*(unsigned short *)(p+1)); +} + +static inline void coh_write3byte (char * p, unsigned long val) +{ + *(unsigned char *)p = val >> 16; + *(unsigned short *)(p+1) = (unsigned short) val; +} + +void sysv_read_inode(struct inode * inode) +{ + struct super_block * sb = inode->i_sb; + struct buffer_head * bh; + struct sysv_inode * raw_inode; + unsigned int block, ino; + umode_t mode; + + ino = inode->i_ino; + inode->i_op = NULL; + inode->i_mode = 0; + if (!ino || ino > sb->sv_ninodes) { + printk("Bad inode number on dev 0x%04x: %d is out of range\n", + inode->i_dev, ino); + return; + } + block = sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits); + if (!(bh = sv_bread(sb,inode->i_dev,block))) { + printk("Major problem: unable to read inode from dev 0x%04x\n", + inode->i_dev); + return; + } + raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1); + mode = raw_inode->i_mode; + if (sb->sv_kludge_symlinks) + mode = from_coh_imode(mode); + /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ + inode->i_mode = mode; + inode->i_uid = raw_inode->i_uid; + inode->i_gid = raw_inode->i_gid; + inode->i_nlink = raw_inode->i_nlink; + if (sb->sv_convert) { + inode->i_size = from_coh_ulong(raw_inode->i_size); + inode->i_atime = from_coh_ulong(raw_inode->i_atime); + inode->i_mtime = from_coh_ulong(raw_inode->i_mtime); + inode->i_ctime = from_coh_ulong(raw_inode->i_ctime); + } else { + inode->i_size = raw_inode->i_size; + inode->i_atime = raw_inode->i_atime; + inode->i_mtime = raw_inode->i_mtime; + inode->i_ctime = raw_inode->i_ctime; + } + inode->i_blocks = inode->i_blksize = 0; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = raw_inode->i_a.i_rdev; + else + if (sb->sv_convert) + for (block = 0; block < 10+1+1+1; block++) + inode->u.sysv_i.i_data[block] = + coh_read3byte(&raw_inode->i_a.i_addb[3*block]); + else + for (block = 0; block < 10+1+1+1; block++) + inode->u.sysv_i.i_data[block] = + read3byte(&raw_inode->i_a.i_addb[3*block]); + brelse(bh); + if (S_ISREG(inode->i_mode)) + inode->i_op = &sysv_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &sysv_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &sysv_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); +} + +/* To avoid inconsistencies between inodes in memory and inodes on disk. */ +extern int sysv_notify_change(struct inode *inode, struct iattr *attr) +{ + int error; + + if ((error = inode_change_ok(inode, attr)) != 0) + return error; + + if (attr->ia_valid & ATTR_MODE) + if (inode->i_sb->sv_kludge_symlinks) + if (attr->ia_mode == COH_KLUDGE_SYMLINK_MODE) + attr->ia_mode = COH_KLUDGE_NOT_SYMLINK; + + inode_setattr(inode, attr); + + return 0; +} + +static struct buffer_head * sysv_update_inode(struct inode * inode) +{ + struct super_block * sb = inode->i_sb; + struct buffer_head * bh; + struct sysv_inode * raw_inode; + unsigned int ino, block; + umode_t mode; + + ino = inode->i_ino; + if (!ino || ino > sb->sv_ninodes) { + printk("Bad inode number on dev 0x%04x: %d is out of range\n", + inode->i_dev, ino); + inode->i_dirt = 0; + return 0; + } + block = sb->sv_firstinodezone + ((ino-1) >> sb->sv_inodes_per_block_bits); + if (!(bh = sv_bread(sb,inode->i_dev,block))) { + printk("unable to read i-node block\n"); + inode->i_dirt = 0; + return 0; + } + raw_inode = (struct sysv_inode *) bh->b_data + ((ino-1) & sb->sv_inodes_per_block_1); + mode = inode->i_mode; + if (sb->sv_kludge_symlinks) + mode = to_coh_imode(mode); + raw_inode->i_mode = mode; + raw_inode->i_uid = inode->i_uid; + raw_inode->i_gid = inode->i_gid; + raw_inode->i_nlink = inode->i_nlink; + if (sb->sv_convert) { + raw_inode->i_size = to_coh_ulong(inode->i_size); + raw_inode->i_atime = to_coh_ulong(inode->i_atime); + raw_inode->i_mtime = to_coh_ulong(inode->i_mtime); + raw_inode->i_ctime = to_coh_ulong(inode->i_ctime); + } else { + raw_inode->i_size = inode->i_size; + raw_inode->i_atime = inode->i_atime; + raw_inode->i_mtime = inode->i_mtime; + raw_inode->i_ctime = inode->i_ctime; + } + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_a.i_rdev = inode->i_rdev; /* write 2 or 3 bytes ?? */ + else + if (sb->sv_convert) + for (block = 0; block < 10+1+1+1; block++) + coh_write3byte(&raw_inode->i_a.i_addb[3*block],inode->u.sysv_i.i_data[block]); + else + for (block = 0; block < 10+1+1+1; block++) + write3byte(&raw_inode->i_a.i_addb[3*block],inode->u.sysv_i.i_data[block]); + inode->i_dirt=0; + mark_buffer_dirty(bh, 1); + return bh; +} + +void sysv_write_inode(struct inode * inode) +{ + struct buffer_head *bh; + bh = sysv_update_inode(inode); + brelse(bh); +} + +int sysv_sync_inode(struct inode * inode) +{ + int err = 0; + struct buffer_head *bh; + + bh = sysv_update_inode(inode); + if (bh && bh->b_dirt) { + ll_rw_block(WRITE, 1, &bh); + wait_on_buffer(bh); + if (bh->b_req && !bh->b_uptodate) + { + printk ("IO error syncing sysv inode [%04x:%08lx]\n", + inode->i_dev, inode->i_ino); + err = -1; + } + } + else if (!bh) + err = -1; + brelse (bh); + return err; +} + diff --git a/fs/sysv/mmap.c b/fs/sysv/mmap.c new file mode 100644 index 000000000..3ec3867a9 --- /dev/null +++ b/fs/sysv/mmap.c @@ -0,0 +1,85 @@ +/* + * linux/fs/sysv/mmap.c + * + * mm/memory.c, mm/mmap.c + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + * + * nfs/mmap.c + * Copyright (C) 1993 Jon Tombs + * + * fs/msdos/mmap.c + * Copyright (C) 1994 Jacques Gelinas + * + * fs/sysv/mmap.c + * Copyright (C) 1994 Bruno Haible + * + * SystemV/Coherent mmap handling + */ + +#include <asm/segment.h> + +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/malloc.h> + +/* + * Fill in the supplied page for mmap + */ +static unsigned long sysv_file_mmap_nopage (struct vm_area_struct * area, + unsigned long address, unsigned long page, int no_share) +{ + int remaining, count, old_fs; + struct file filp; + + address &= PAGE_MASK; + /* prepare a file pointer */ + filp.f_pos = address - area->vm_start + area->vm_offset; + filp.f_reada = 0; + remaining = area->vm_end - address; + if (remaining > PAGE_SIZE) + remaining = PAGE_SIZE; + /* read from the file. page is in kernel space, not user space. */ + old_fs = get_fs(); set_fs(get_ds()); + count = sysv_file_read (area->vm_inode, &filp, (char *)page, remaining); + set_fs(old_fs); + if (count < 0) + count = 0; /* do nothing on I/O error ?? */ + else + remaining -= count; + if (remaining > 0) + memset((char *)page + count, 0, remaining); + return page; +} + +static struct vm_operations_struct sysv_file_mmap = { + NULL, /* open */ + NULL, /* close */ + sysv_file_mmap_nopage, /* nopage */ + NULL, /* wppage */ + NULL, /* share */ + NULL, /* unmap */ +}; + +int sysv_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) +{ + if (vma->vm_page_prot & PAGE_RW) /* only PAGE_COW or read-only supported right now */ + return -EINVAL; + if (vma->vm_offset & (inode->i_sb->s_blocksize - 1)) + return -EINVAL; + if (!inode->i_sb || !S_ISREG(inode->i_mode)) + return -EACCES; + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + + vma->vm_inode = inode; + inode->i_count++; + vma->vm_ops = &sysv_file_mmap; + return 0; +} diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c new file mode 100644 index 000000000..c9fd77158 --- /dev/null +++ b/fs/sysv/namei.c @@ -0,0 +1,822 @@ +/* + * linux/fs/sysv/namei.c + * + * minix/namei.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * coh/namei.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/namei.c + * Copyright (C) 1993 Bruno Haible + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> + +/* compare strings: name[0..len-1] (not zero-terminated) and + * buffer[0..] (filled with zeroes up to buffer[0..maxlen-1]) + */ +static inline int namecompare(int len, int maxlen, + const char * name, const char * buffer) +{ + if (len > maxlen) + return 0; + if (len < maxlen && buffer[len]) + return 0; + return !memcmp(name, buffer, len); +} + +/* + * ok, we cannot use strncmp, as the name is not in our data space. [Now it is!] + * Thus we'll have to use sysv_match. No big problem. Match also makes + * some sanity tests. + * + * NOTE! unlike strncmp, sysv_match returns 1 for success, 0 for failure. + */ +static int sysv_match(int len, const char * name, struct sysv_dir_entry * de) +{ + if (!de->inode || len > SYSV_NAMELEN) + return 0; + /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ + if (!len && (de->name[0]=='.') && (de->name[1]=='\0')) + return 1; + return namecompare(len,SYSV_NAMELEN,name,de->name); +} + +/* + * sysv_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +static struct buffer_head * sysv_find_entry(struct inode * dir, + const char * name, int namelen, struct sysv_dir_entry ** res_dir) +{ + struct super_block * sb; + unsigned long pos, block, offset; /* pos = block * block_size + offset */ + struct buffer_head * bh; + + *res_dir = NULL; + if (!dir) + return NULL; + sb = dir->i_sb; + if (namelen > SYSV_NAMELEN) + if (sb->sv_truncate) + namelen = SYSV_NAMELEN; + else + return NULL; + bh = NULL; + pos = block = offset = 0; + while (pos < dir->i_size) { + if (!bh) { + bh = sysv_file_bread(dir,block,0); + if (!bh) { + /* offset = 0; */ block++; + pos += sb->sv_block_size; + continue; + } + } + if (sysv_match(namelen, name, + *res_dir = (struct sysv_dir_entry *) (bh->b_data + offset) )) + return bh; + pos += SYSV_DIRSIZE; + offset += SYSV_DIRSIZE; + if (offset < sb->sv_block_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; block++; + } + brelse(bh); + *res_dir = NULL; + return NULL; +} + +int sysv_lookup(struct inode * dir,const char * name, int len, + struct inode ** result) +{ + int ino; + struct sysv_dir_entry * de; + struct buffer_head * bh; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + if (!(bh = sysv_find_entry(dir,name,len,&de))) { + iput(dir); + return -ENOENT; + } + ino = de->inode; + brelse(bh); + if (!(*result = iget(dir->i_sb,ino))) { + iput(dir); + return -EACCES; + } + iput(dir); + return 0; +} + +/* + * sysv_add_entry() + * + * adds a file entry to the specified directory, returning a possible + * error value if it fails. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static int sysv_add_entry(struct inode * dir, + const char * name, int namelen, + struct buffer_head ** res_buf, + struct sysv_dir_entry ** res_dir) +{ + struct super_block * sb; + int i; + unsigned long pos, block, offset; /* pos = block * block_size + offset */ + struct buffer_head * bh; + struct sysv_dir_entry * de; + + *res_buf = NULL; + *res_dir = NULL; + if (!dir) + return -ENOENT; + sb = dir->i_sb; + if (namelen > SYSV_NAMELEN) + if (sb->sv_truncate) + namelen = SYSV_NAMELEN; + else + return -ENAMETOOLONG; + if (!namelen) + return -ENOENT; + bh = NULL; + pos = block = offset = 0; + while (1) { + if (!bh) { + bh = sysv_file_bread(dir,block,1); + if (!bh) + return -ENOSPC; + } + de = (struct sysv_dir_entry *) (bh->b_data + offset); + pos += SYSV_DIRSIZE; + offset += SYSV_DIRSIZE; + if (pos > dir->i_size) { + de->inode = 0; + dir->i_size = pos; + dir->i_dirt = 1; + } + if (de->inode) { + if (namecompare(namelen, SYSV_NAMELEN, name, de->name)) { + brelse(bh); + return -EEXIST; + } + } else { + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + for (i = 0; i < SYSV_NAMELEN ; i++) + de->name[i] = (i < namelen) ? name[i] : 0; + mark_buffer_dirty(bh, 1); + *res_dir = de; + break; + } + if (offset < sb->sv_block_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; block++; + } + *res_buf = bh; + return 0; +} + +int sysv_create(struct inode * dir,const char * name, int len, int mode, + struct inode ** result) +{ + int error; + struct inode * inode; + struct buffer_head * bh; + struct sysv_dir_entry * de; + + *result = NULL; + if (!dir) + return -ENOENT; + inode = sysv_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &sysv_file_inode_operations; + inode->i_mode = mode; + inode->i_dirt = 1; + error = sysv_add_entry(dir,name,len, &bh ,&de); + if (error) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + *result = inode; + return 0; +} + +int sysv_mknod(struct inode * dir, const char * name, int len, int mode, int rdev) +{ + int error; + struct inode * inode; + struct buffer_head * bh; + struct sysv_dir_entry * de; + + if (!dir) + return -ENOENT; + bh = sysv_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + inode = sysv_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_uid = current->fsuid; + inode->i_mode = mode; + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &sysv_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &sysv_dir_inode_operations; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + } + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &sysv_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (S_ISBLK(mode) || S_ISCHR(mode)) + inode->i_rdev = rdev; + inode->i_dirt = 1; + error = sysv_add_entry(dir, name, len, &bh, &de); + if (error) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int sysv_mkdir(struct inode * dir, const char * name, int len, int mode) +{ + int error; + struct inode * inode; + struct buffer_head * bh, *dir_block; + struct sysv_dir_entry * de; + + if (!dir) { + iput(dir); + return -EINVAL; + } + bh = sysv_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + if (dir->i_nlink >= dir->i_sb->sv_link_max) { + iput(dir); + return -EMLINK; + } + inode = sysv_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &sysv_dir_inode_operations; + inode->i_size = 2 * SYSV_DIRSIZE; + dir_block = sysv_file_bread(inode,0,1); + if (!dir_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + de = (struct sysv_dir_entry *) (dir_block->b_data + 0*SYSV_DIRSIZE); + de->inode = inode->i_ino; + strcpy(de->name,"."); /* rest of de->name is zero, see sysv_new_block */ + de = (struct sysv_dir_entry *) (dir_block->b_data + 1*SYSV_DIRSIZE); + de->inode = dir->i_ino; + strcpy(de->name,".."); /* rest of de->name is zero, see sysv_new_block */ + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse(dir_block); + inode->i_mode = S_IFDIR | (mode & 0777 & ~current->fs->umask); + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + inode->i_dirt = 1; + error = sysv_add_entry(dir, name, len, &bh, &de); + if (error) { + iput(dir); + inode->i_nlink=0; + iput(inode); + return error; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + dir->i_nlink++; + dir->i_dirt = 1; + iput(dir); + iput(inode); + brelse(bh); + return 0; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir(struct inode * inode) +{ + struct super_block * sb; + unsigned long pos, block, offset; /* pos = block * block_size + offset */ + struct buffer_head * bh; + struct sysv_dir_entry * de; + + if (!inode) + return 1; + block = 0; + bh = NULL; + pos = offset = 2*SYSV_DIRSIZE; + if (inode->i_size % SYSV_DIRSIZE) + goto bad_dir; + if (inode->i_size < pos) + goto bad_dir; + bh = sysv_file_bread(inode,0,0); + if (!bh) + goto bad_dir; + de = (struct sysv_dir_entry *) (bh->b_data + 0*SYSV_DIRSIZE); + if (!de->inode || strcmp(de->name,".")) + goto bad_dir; + de = (struct sysv_dir_entry *) (bh->b_data + 1*SYSV_DIRSIZE); + if (!de->inode || strcmp(de->name,"..")) + goto bad_dir; + sb = inode->i_sb; + while (pos < inode->i_size) { + if (!bh) { + bh = sysv_file_bread(inode,block,0); + if (!bh) { + /* offset = 0; */ block++; + pos += sb->sv_block_size; + continue; + } + } + de = (struct sysv_dir_entry *) (bh->b_data + offset); + pos += SYSV_DIRSIZE; + offset += SYSV_DIRSIZE; + if (de->inode) { + brelse(bh); + return 0; + } + if (offset < sb->sv_block_size) + continue; + brelse(bh); + bh = NULL; + offset = 0; block++; + } + brelse(bh); + return 1; +bad_dir: + brelse(bh); + printk("Bad directory on device %04x\n",inode->i_dev); + return 1; +} + +int sysv_rmdir(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct sysv_dir_entry * de; + + inode = NULL; + bh = sysv_find_entry(dir,name,len,&de); + retval = -ENOENT; + if (!bh) + goto end_rmdir; + retval = -EPERM; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_rmdir; + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_rmdir; + if (inode->i_dev != dir->i_dev) + goto end_rmdir; + if (inode == dir) /* we may not delete ".", but "../dir" is ok */ + goto end_rmdir; + if (!S_ISDIR(inode->i_mode)) { + retval = -ENOTDIR; + goto end_rmdir; + } + if (!empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + if (de->inode != inode->i_ino) { + retval = -ENOENT; + goto end_rmdir; + } + if (inode->i_count > 1) { + retval = -EBUSY; + goto end_rmdir; + } + if (inode->i_nlink != 2) + printk("empty directory has nlink!=2 (%d)\n",inode->i_nlink); + de->inode = 0; + mark_buffer_dirty(bh, 1); + inode->i_nlink=0; + inode->i_dirt=1; + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt=1; + retval = 0; +end_rmdir: + iput(dir); + iput(inode); + brelse(bh); + return retval; +} + +int sysv_unlink(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct sysv_dir_entry * de; + +repeat: + retval = -ENOENT; + inode = NULL; + bh = sysv_find_entry(dir,name,len,&de); + if (!bh) + goto end_unlink; + if (!(inode = iget(dir->i_sb, de->inode))) + goto end_unlink; + retval = -EPERM; + if (S_ISDIR(inode->i_mode)) + goto end_unlink; + if (de->inode != inode->i_ino) { + iput(inode); + brelse(bh); + current->counter = 0; + schedule(); + goto repeat; + } + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_unlink; + if (de->inode != inode->i_ino) { + retval = -ENOENT; + goto end_unlink; + } + if (!inode->i_nlink) { + printk("Deleting nonexistent file (%04x:%lu), %d\n", + inode->i_dev,inode->i_ino,inode->i_nlink); + inode->i_nlink=1; + } + de->inode = 0; + mark_buffer_dirty(bh, 1); + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + inode->i_nlink--; + inode->i_ctime = dir->i_ctime; + inode->i_dirt = 1; + retval = 0; +end_unlink: + brelse(bh); + iput(inode); + iput(dir); + return retval; +} + +int sysv_symlink(struct inode * dir, const char * name, int len, const char * symname) +{ + struct sysv_dir_entry * de; + struct inode * inode; + struct buffer_head * name_block; + char * name_block_data; + struct super_block * sb; + int i; + char c; + struct buffer_head * bh; + + if (!(inode = sysv_new_inode(dir))) { + iput(dir); + return -ENOSPC; + } + inode->i_mode = S_IFLNK | 0777; + inode->i_op = &sysv_symlink_inode_operations; + name_block = sysv_file_bread(inode,0,1); + if (!name_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + sb = inode->i_sb; + name_block_data = name_block->b_data; + i = 0; + while (i < sb->sv_block_size_1 && (c = *(symname++))) + name_block_data[i++] = c; + name_block_data[i] = 0; + mark_buffer_dirty(name_block, 1); + brelse(name_block); + inode->i_size = i; + inode->i_dirt = 1; + bh = sysv_find_entry(dir,name,len,&de); + if (bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + brelse(bh); + iput(dir); + return -EEXIST; + } + i = sysv_add_entry(dir, name, len, &bh, &de); + if (i) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return i; + } + de->inode = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int sysv_link(struct inode * oldinode, struct inode * dir, const char * name, int len) +{ + int error; + struct sysv_dir_entry * de; + struct buffer_head * bh; + + if (S_ISDIR(oldinode->i_mode)) { + iput(oldinode); + iput(dir); + return -EPERM; + } + if (oldinode->i_nlink >= oldinode->i_sb->sv_link_max) { + iput(oldinode); + iput(dir); + return -EMLINK; + } + bh = sysv_find_entry(dir,name,len,&de); + if (bh) { + brelse(bh); + iput(dir); + iput(oldinode); + return -EEXIST; + } + error = sysv_add_entry(dir, name, len, &bh, &de); + if (error) { + iput(dir); + iput(oldinode); + return error; + } + de->inode = oldinode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + oldinode->i_nlink++; + oldinode->i_ctime = CURRENT_TIME; + oldinode->i_dirt = 1; + iput(oldinode); + return 0; +} + +/* return 1 if `new' is a subdir of `old' on the same device */ +static int subdir(struct inode * new_inode, struct inode * old_inode) +{ + int ino; + int result; + + new_inode->i_count++; + result = 0; + for (;;) { + if (new_inode == old_inode) { + result = 1; + break; + } + if (new_inode->i_dev != old_inode->i_dev) + break; + ino = new_inode->i_ino; + if (sysv_lookup(new_inode,"..",2,&new_inode)) + break; + if (new_inode->i_ino == ino) /* root dir reached ? */ + break; + } + iput(new_inode); + return result; +} + +#define PARENT_INO(buffer) \ +(((struct sysv_dir_entry *) ((buffer) + 1*SYSV_DIRSIZE))->inode) + +/* + * rename uses retrying to avoid race-conditions: at least they should be minimal. + * it tries to allocate all the blocks, then sanity-checks, and if the sanity- + * checks fail, it tries to restart itself again. Very practical - no changes + * are done until we know everything works ok.. and then all the changes can be + * done in one fell swoop when we have claimed all the buffers needed. + * + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int do_sysv_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct sysv_dir_entry * old_de, * new_de; + int retval; + + goto start_up; +try_again: + brelse(old_bh); + brelse(new_bh); + brelse(dir_bh); + iput(old_inode); + iput(new_inode); + current->counter = 0; + schedule(); +start_up: + old_inode = new_inode = NULL; + old_bh = new_bh = dir_bh = NULL; + old_bh = sysv_find_entry(old_dir,old_name,old_len,&old_de); + retval = -ENOENT; + if (!old_bh) + goto end_rename; + old_inode = __iget(old_dir->i_sb, old_de->inode, 0); /* don't cross mnt-points */ + if (!old_inode) + goto end_rename; + retval = -EPERM; + if ((old_dir->i_mode & S_ISVTX) && + current->fsuid != old_inode->i_uid && + current->fsuid != old_dir->i_uid && !fsuser()) + goto end_rename; + new_bh = sysv_find_entry(new_dir,new_name,new_len,&new_de); + if (new_bh) { + new_inode = __iget(new_dir->i_sb, new_de->inode, 0); + if (!new_inode) { + brelse(new_bh); + new_bh = NULL; + } + } + if (new_inode == old_inode) { + retval = 0; + goto end_rename; + } + if (new_inode && S_ISDIR(new_inode->i_mode)) { + retval = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -ENOTEMPTY; + if (!empty_dir(new_inode)) + goto end_rename; + retval = -EBUSY; + if (new_inode->i_count > 1) + goto end_rename; + } + retval = -EPERM; + if (new_inode && (new_dir->i_mode & S_ISVTX) && + current->fsuid != new_inode->i_uid && + current->fsuid != new_dir->i_uid && !fsuser()) + goto end_rename; + if (S_ISDIR(old_inode->i_mode)) { + retval = -ENOTDIR; + if (new_inode && !S_ISDIR(new_inode->i_mode)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -EIO; + dir_bh = sysv_file_bread(old_inode,0,0); + if (!dir_bh) + goto end_rename; + if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (!new_inode && new_dir->i_nlink >= new_dir->i_sb->sv_link_max) + goto end_rename; + } + if (!new_bh) { + retval = sysv_add_entry(new_dir,new_name,new_len,&new_bh,&new_de); + if (retval) + goto end_rename; + } +/* sanity checking before doing the rename - avoid races */ + if (new_inode && (new_de->inode != new_inode->i_ino)) + goto try_again; + if (new_de->inode && !new_inode) + goto try_again; + if (old_de->inode != old_inode->i_ino) + goto try_again; +/* ok, that's it */ + old_de->inode = 0; + new_de->inode = old_inode->i_ino; + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; + old_dir->i_dirt = 1; + new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME; + new_dir->i_dirt = 1; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + new_inode->i_dirt = 1; + } + mark_buffer_dirty(old_bh, 1); + mark_buffer_dirty(new_bh, 1); + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = new_dir->i_ino; + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + old_dir->i_dirt = 1; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_dirt = 1; + } else { + new_dir->i_nlink++; + new_dir->i_dirt = 1; + } + } + retval = 0; +end_rename: + brelse(dir_bh); + brelse(old_bh); + brelse(new_bh); + iput(old_inode); + iput(new_inode); + iput(old_dir); + iput(new_dir); + return retval; +} + +/* + * Ok, rename also locks out other renames, as they can change the parent of + * a directory, and we don't want any races. Other races are checked for by + * "do_rename()", which restarts if there are inconsistencies. + * + * Note that there is no race between different filesystems: it's only within + * the same device that races occur: many renames can happen at once, as long + * as they are on different partitions. + */ +int sysv_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + static struct wait_queue * wait = NULL; + static int lock = 0; + int result; + + while (lock) + sleep_on(&wait); + lock = 1; + result = do_sysv_rename(old_dir, old_name, old_len, + new_dir, new_name, new_len); + lock = 0; + wake_up(&wait); + return result; +} diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c new file mode 100644 index 000000000..d392816bc --- /dev/null +++ b/fs/sysv/symlink.c @@ -0,0 +1,110 @@ +/* + * linux/fs/sysv/symlink.c + * + * minix/symlink.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * coh/symlink.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/symlink.c + * Copyright (C) 1993 Bruno Haible + * + * SystemV/Coherent symlink handling code + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/sysv_fs.h> +#include <linux/stat.h> + +static int sysv_readlink(struct inode *, char *, int); +static int sysv_follow_link(struct inode *, struct inode *, int, int, struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations sysv_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + sysv_readlink, /* readlink */ + sysv_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int sysv_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + struct buffer_head * bh; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (current->link_count > 5) { + iput(inode); + iput(dir); + return -ELOOP; + } + if (!(bh = sysv_file_bread(inode, 0, 0))) { /* is reading 1 block enough ?? */ + iput(inode); + iput(dir); + return -EIO; + } + iput(inode); + current->link_count++; + error = open_namei(bh->b_data,flag,mode,res_inode,dir); + current->link_count--; + brelse(bh); + return error; +} + +static int sysv_readlink(struct inode * inode, char * buffer, int buflen) +{ + struct buffer_head * bh; + char * bh_data; + int i; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + if (buflen > inode->i_sb->sv_block_size_1) + buflen = inode->i_sb->sv_block_size_1; + bh = sysv_file_bread(inode, 0, 0); + iput(inode); + if (!bh) + return 0; + bh_data = bh->b_data; + i = 0; + while (i<buflen && (c = bh_data[i])) { + i++; + put_fs_byte(c,buffer++); + } + brelse(bh); + return i; +} diff --git a/fs/sysv/truncate.c b/fs/sysv/truncate.c new file mode 100644 index 000000000..21451e6dd --- /dev/null +++ b/fs/sysv/truncate.c @@ -0,0 +1,283 @@ +/* + * linux/fs/sysv/truncate.c + * + * minix/truncate.c + * Copyright (C) 1991, 1992 Linus Torvalds + * + * coh/truncate.c + * Copyright (C) 1993 Pascal Haible, Bruno Haible + * + * sysv/truncate.c + * Copyright (C) 1993 Bruno Haible + */ + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/sysv_fs.h> +#include <linux/stat.h> + + +/* Linus' implementation of truncate. + * It doesn't need locking because it can tell from looking at bh->b_count + * whether a given block is in use elsewhere. + */ + +/* + * Truncate has the most races in the whole filesystem: coding it is + * a pain in the a**. Especially as I don't do any locking... + * + * The code may look a bit weird, but that's just because I've tried to + * handle things like file-size changes in a somewhat graceful manner. + * Anyway, truncating a file at the same time somebody else writes to it + * is likely to result in pretty weird behaviour... + * + * The new code handles normal truncates (size = 0) as well as the more + * general case (size = XXX). I hope. + */ + +/* We throw away any data beyond inode->i_size. */ + +static int trunc_direct(struct inode * inode) +{ + struct super_block * sb; + unsigned int i; + unsigned long * p; + unsigned long block; + struct buffer_head * bh; + int retry = 0; + + sb = inode->i_sb; +repeat: + for (i = ((unsigned long) inode->i_size + sb->sv_block_size_1) >> sb->sv_block_size_bits; i < 10; i++) { + p = inode->u.sysv_i.i_data + i; + block = *p; + if (!block) + continue; + bh = sv_get_hash_table(sb, inode->i_dev, block); + if ((i << sb->sv_block_size_bits) < inode->i_size) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || (block != *p)) { + retry = 1; + brelse(bh); + continue; + } + *p = 0; + inode->i_dirt = 1; + brelse(bh); + sysv_free_block(sb,block); + } + return retry; +} + +static int trunc_indirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt) +{ + unsigned long indtmp, indblock; + struct super_block * sb; + struct buffer_head * indbh; + unsigned int i; + sysv_zone_t * ind; + unsigned long tmp, block; + struct buffer_head * bh; + int retry = 0; + + indblock = indtmp = *p; + if (convert) + indblock = from_coh_ulong(indblock); + if (!indblock) + return 0; + sb = inode->i_sb; + indbh = sv_bread(sb, inode->i_dev, indblock); + if (indtmp != *p) { + brelse(indbh); + return 1; + } + if (!indbh) { + *p = 0; + *dirt = 1; + return 0; + } +repeat: + if (inode->i_size < offset) + i = 0; + else + i = (inode->i_size - offset + sb->sv_block_size_1) >> sb->sv_block_size_bits; + for (; i < sb->sv_ind_per_block; i++) { + ind = ((sysv_zone_t *) indbh->b_data) + i; + block = tmp = *ind; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (!block) + continue; + bh = sv_get_hash_table(sb, inode->i_dev, block); + if ((i << sb->sv_block_size_bits) + offset < inode->i_size) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || (tmp != *ind)) { + retry = 1; + brelse(bh); + continue; + } + *ind = 0; + mark_buffer_dirty(indbh, 1); + brelse(bh); + sysv_free_block(sb,block); + } + for (i = 0; i < sb->sv_ind_per_block; i++) + if (((sysv_zone_t *) indbh->b_data)[i]) + goto done; + if ((indbh->b_count != 1) || (indtmp != *p)) { + brelse(indbh); + return 1; + } + *p = 0; + *dirt = 1; + sysv_free_block(sb,indblock); +done: + brelse(indbh); + return retry; +} + +static int trunc_dindirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt) +{ + unsigned long indtmp, indblock; + struct super_block * sb; + struct buffer_head * indbh; + unsigned int i; + sysv_zone_t * ind; + unsigned long tmp, block; + int retry = 0; + + indblock = indtmp = *p; + if (convert) + indblock = from_coh_ulong(indblock); + if (!indblock) + return 0; + sb = inode->i_sb; + indbh = sv_bread(sb, inode->i_dev, indblock); + if (indtmp != *p) { + brelse(indbh); + return 1; + } + if (!indbh) { + *p = 0; + *dirt = 1; + return 0; + } + if (inode->i_size < offset) + i = 0; + else + i = (inode->i_size - offset + sb->sv_ind_per_block_block_size_1) >> sb->sv_ind_per_block_block_size_bits; + for (; i < sb->sv_ind_per_block; i++) { + ind = ((sysv_zone_t *) indbh->b_data) + i; + block = tmp = *ind; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (!block) + continue; + retry |= trunc_indirect(inode,offset+(i<<sb->sv_ind_per_block_bits),ind,sb->sv_convert,&indbh->b_dirt); + } + for (i = 0; i < sb->sv_ind_per_block; i++) + if (((sysv_zone_t *) indbh->b_data)[i]) + goto done; + if ((indbh->b_count != 1) || (indtmp != *p)) { + brelse(indbh); + return 1; + } + *p = 0; + *dirt = 1; + sysv_free_block(sb,indblock); +done: + brelse(indbh); + return retry; +} + +static int trunc_tindirect(struct inode * inode, unsigned long offset, unsigned long * p, int convert, unsigned char * dirt) +{ + unsigned long indtmp, indblock; + struct super_block * sb; + struct buffer_head * indbh; + unsigned int i; + sysv_zone_t * ind; + unsigned long tmp, block; + int retry = 0; + + indblock = indtmp = *p; + if (convert) + indblock = from_coh_ulong(indblock); + if (!indblock) + return 0; + sb = inode->i_sb; + indbh = sv_bread(sb, inode->i_dev, indblock); + if (indtmp != *p) { + brelse(indbh); + return 1; + } + if (!indbh) { + *p = 0; + *dirt = 1; + return 0; + } + if (inode->i_size < offset) + i = 0; + else + i = (inode->i_size - offset + sb->sv_ind_per_block_2_block_size_1) >> sb->sv_ind_per_block_2_block_size_bits; + for (; i < sb->sv_ind_per_block; i++) { + ind = ((sysv_zone_t *) indbh->b_data) + i; + block = tmp = *ind; + if (sb->sv_convert) + block = from_coh_ulong(block); + if (!block) + continue; + retry |= trunc_dindirect(inode,offset+(i<<sb->sv_ind_per_block_2_bits),ind,sb->sv_convert,&indbh->b_dirt); + } + for (i = 0; i < sb->sv_ind_per_block; i++) + if (((sysv_zone_t *) indbh->b_data)[i]) + goto done; + if ((indbh->b_count != 1) || (indtmp != *p)) { + brelse(indbh); + return 1; + } + *p = 0; + *dirt = 1; + sysv_free_block(sb,indblock); +done: + brelse(indbh); + return retry; +} + +static int trunc_all(struct inode * inode) +{ + struct super_block * sb; + + sb = inode->i_sb; + return trunc_direct(inode) + | trunc_indirect(inode,sb->sv_ind0_size,&inode->u.sysv_i.i_data[10],0,&inode->i_dirt) + | trunc_dindirect(inode,sb->sv_ind1_size,&inode->u.sysv_i.i_data[11],0,&inode->i_dirt) + | trunc_tindirect(inode,sb->sv_ind2_size,&inode->u.sysv_i.i_data[12],0,&inode->i_dirt); +} + + +void sysv_truncate(struct inode * inode) +{ + /* If this is called from sysv_put_inode, we needn't worry about + * races as we are just losing the last reference to the inode. + * If this is called from another place, let's hope it's a regular + * file. + * Truncating symbolic links is strange. We assume we don't truncate + * a directory we are just modifying. We ensure we don't truncate + * a regular file we are just writing to, by use of a lock. + */ + if (S_ISLNK(inode->i_mode)) + printk("sysv_truncate: truncating symbolic link\n"); + else if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + return; + while (trunc_all(inode)) { + current->counter = 0; + schedule(); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; +} diff --git a/fs/umsdos/Makefile b/fs/umsdos/Makefile new file mode 100644 index 000000000..cfba11e63 --- /dev/null +++ b/fs/umsdos/Makefile @@ -0,0 +1,44 @@ +# +# Makefile for the umsdos unix-like filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +ifndef CONFIG_UMSDOS_FS +CFLAGS := $(CFLAGS) -DMODULE +endif + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= dir.o emd.o file.o inode.o ioctl.o mangle.o namei.o\ + rdir.o symlink.o #check.o + +umsdos.o: $(OBJS) + $(LD) -r -o umsdos.o $(OBJS) + +clean: + rm -f core *.o *.a *.s + +dep: + $(CPP) -M *.c > .depend + +p: + proto *.c >/usr/include/linux/umsdos_fs.p + +doc: + nadoc -i -p umsdos.doc - /tmp/umsdos.mpg + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/umsdos/README b/fs/umsdos/README new file mode 100644 index 000000000..4ce8b4148 --- /dev/null +++ b/fs/umsdos/README @@ -0,0 +1,84 @@ +Very short explanation for the impatient!!! + +Umsdos is a file system driver that run on top the MSDOS fs driver. +It is written by Jacques Gelinas (jacques@solucorp.qc.ca) + +Umsdos is not a file system per se, but a twist to make a boring +one into a useful one. + +It gives you: + + long file name + Permissions and owner + Links + Special files (devices, pipe...) + All is need to be a linux root fs. + +There is plenty of documentation on it in the source. A formated document +made from those comments is available from +sunsite.unc.edu:/pub/Linux/ALPHA/umsdos + +Mostly... + +You mount a DOS partition like this + +mount -t umsdos /dev/hda3 /mnt + ^ +---------| + +All option are passed to the msdos drivers. Option like uid,gid etc are +given to msdos. + +The default behavior of Umsdos is to do the same thing as the msdos driver +mostly passing commands to it without much processing. Again, this is +the default. After doing the mount on a DOS partition, nothing special +happen. This is why all mount options are passed to the Msdos fs driver. + +Umsdos use a special DOS file --linux-.--- to store the information +which can't be handle by the normal MsDOS file system. This is the trick. + +--linux-.--- is optional. There is one per directory. + +**** If --linux-.--- is missing, then Umsdos process the directory the + same way the msdos driver do. Short file name, no goodies, default + owner and permissions. So each directory may have or not this + --linux-.--- + +Now, how to get those --linux-.---. + +\begin joke_section + + Well send me a directory content + and I will send you one customised for you. + $5 per directory. Add any applicable taxes. +\end joke_section + +A utility umssync creates those and maintain them. It is available +from the same directory above (sunsite) in the file umsdos_progs-0.3.tar.gz. +A compiled version is available in umsdos-0.3a.bin.tar.gz. + +So in our example, after mounting mnt, we do + +umssync . + +This will promote this directory (a recursive option is available) to full +umsdos capabilities (long name ...). A ls -l before and after won't show +much difference however. The file which were there are still there. But now +you can do all this: + + chmod 644 * + chown you.your_groupe * + ls >THIS_IS.A.VERY.LONG.NAME + ln -s toto tata + ls -l + +Once a directory is promoted, all subdirectory created will inherit that +promotion. + +What happen if you boot DOS and create files in those promoted directories ? +Umsdos won't notice new files, but will signal removed file (it won't crash). +Using umssync in /etc/rc will make sure the DOS directory is in sync with +the --linux-.---. + +Hope this helps! + diff --git a/fs/umsdos/check.c b/fs/umsdos/check.c new file mode 100644 index 000000000..d1102b4ce --- /dev/null +++ b/fs/umsdos/check.c @@ -0,0 +1,55 @@ +#include <asm/system.h> + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/head.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> + +extern unsigned long high_memory; + +static int check_one_table(unsigned long * page_dir) +{ + unsigned long pg_table = *page_dir; + + if (!pg_table) + return 0; + if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) { + return 1; + } + return 0; +} + +/* + * This function frees up all page tables of a process when it exits. + */ +void check_page_tables(void) +{ + unsigned long pg_dir; + static int err = 0; + + int stack_level = (long)(&pg_dir)-current->kernel_stack_page; + if (stack_level < 1500) printk ("** %d ** ",stack_level); + pg_dir = current->tss.cr3; + if (mem_map[MAP_NR(pg_dir)] > 1) { + return; + } + if (err == 0){ + unsigned long *page_dir = (unsigned long *) pg_dir; + unsigned long *base = page_dir; + int i; + for (i = 0 ; i < PTRS_PER_PAGE ; i++,page_dir++){ + int notok = check_one_table(page_dir); + if (notok){ + err++; + printk ("|%d| ",page_dir-base); + } + } + if (err) printk ("Erreur MM %d\n",err); + } +} + diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c new file mode 100644 index 000000000..2a668d102 --- /dev/null +++ b/fs/umsdos/dir.c @@ -0,0 +1,706 @@ +/* + * linux/fs/umsdos/dir.c + * + * Written 1993 by Jacques Gelinas + * Inspired from linux/fs/msdos/... : Werner Almesberger + * + * Extended MS-DOS directory handling functions + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/limits.h> +#include <linux/umsdos_fs.h> +#include <linux/malloc.h> + +#define PRINTK(x) +#define Printk(x) printk x + +#define UMSDOS_SPECIAL_DIRFPOS 3 +extern struct inode *pseudo_root; +/* + So grep * doesn't complain in the presence of directories. +*/ +int UMSDOS_dir_read(struct inode *inode,struct file *filp,char *buf, + int count) +{ + return -EISDIR; +} +/* + Read count directory entries from directory filp + Return a negative value from linux/errno.h. + Return > 0 if success (the length of the file name). + + This function is used by the normal readdir VFS entry point and by + some function who try to find out info on a file from a pure MSDOS + inode. See umsdos_locate_ancestor() below. +*/ +static int umsdos_readdir_x( + struct inode *dir, /* Point to a description of the super block */ + struct file *filp, /* Point to a directory which is read */ + struct dirent *dirent, /* Will hold count directory entry */ + int dirent_in_fs, /* dirent point in user's space ? */ + int count, + struct umsdos_dirent *u_entry, /* Optional umsdos entry */ + int follow_hlink, + off_t *pt_f_pos) /* will hold the offset of the entry in EMD */ +{ + int ret = 0; + + umsdos_startlookup(dir); + if (filp->f_pos == UMSDOS_SPECIAL_DIRFPOS + && dir == pseudo_root + && dirent_in_fs){ + /* + We don't need to simulate this pseudo directory + when umsdos_readdir_x is called for internal operation + of umsdos. This is why dirent_in_fs is tested + */ + /* #Specification: pseudo root / directory /DOS + When umsdos operates in pseudo root mode (C:\linux is the + linux root), it simulate a directory /DOS which points to + the real root of the file system. + */ + put_fs_long(dir->i_sb->s_mounted->i_ino,&dirent->d_ino); + memcpy_tofs (dirent->d_name,"DOS",3); + put_fs_byte(0,dirent->d_name+3); + put_fs_word (3,&dirent->d_reclen); + if (u_entry != NULL) u_entry->flags = 0; + ret = 3; + filp->f_pos++; + }else if (filp->f_pos < 2 + || (dir != dir->i_sb->s_mounted && filp->f_pos == 32)){ + /* #Specification: readdir / . and .. + The msdos filesystem manage the . and .. entry properly + so the EMD file won't hold any info about it. + + In readdir, we assume that for the root directory + the read position will be 0 for ".", 1 for "..". For + a non root directory, the read position will be 0 for "." + and 32 for "..". + */ + /* + This is a trick used by the msdos file system (fs/msdos/dir.c) + to manage . and .. for the root directory of a file system. + Since there is no such entry in the root, fs/msdos/dir.c + use the following: + + if f_pos == 0, return ".". + if f_pos == 1, return "..". + + So let msdos handle it + + Since umsdos entries are much larger, we share the same f_pos. + if f_pos is 0 or 1 or 32, we are clearly looking at . and + .. + + As soon as we get f_pos == 2 or f_pos == 64, then back to + 0, but this time we are reading the EMD file. + + Well, not so true. The problem, is that UMSDOS_REC_SIZE is + also 64, so as soon as we read the first record in the + EMD, we are back at offset 64. So we set the offset + to UMSDOS_SPECIAL_DIRFPOS(3) as soon as we have read the + .. entry from msdos. + */ + ret = msdos_readdir(dir,filp,dirent,count); + if (filp->f_pos == 64) filp->f_pos = UMSDOS_SPECIAL_DIRFPOS; + if (u_entry != NULL) u_entry->flags = 0; + }else{ + struct inode *emd_dir = umsdos_emd_dir_lookup(dir,0); + if (emd_dir != NULL){ + if (filp->f_pos <= UMSDOS_SPECIAL_DIRFPOS+1) filp->f_pos = 0; + PRINTK (("f_pos %ld i_size %d\n",filp->f_pos,emd_dir->i_size)); + ret = 0; + while (filp->f_pos < emd_dir->i_size){ + struct umsdos_dirent entry; + off_t cur_f_pos = filp->f_pos; + if (umsdos_emd_dir_readentry (emd_dir,filp,&entry)!=0){ + ret = -EIO; + break; + }else if (entry.name_len != 0){ + /* #Specification: umsdos / readdir + umsdos_readdir() should fill a struct dirent with + an inode number. The cheap way to get it is to + do a lookup in the MSDOS directory for each + entry processed by the readdir() function. + This is not very efficient, but very simple. The + other way around is to maintain a copy of the inode + number in the EMD file. This is a problem because + this has to be maintained in sync using tricks. + Remember that MSDOS (the OS) does not update the + modification time (mtime) of a directory. There is + no easy way to tell that a directory was modified + during a DOS session and synchronise the EMD file. + + Suggestion welcome. + + So the easy way is used! + */ + struct umsdos_info info; + struct inode *inode; + int lret; + umsdos_parse (entry.name,entry.name_len,&info); + info.f_pos = cur_f_pos; + *pt_f_pos = cur_f_pos; + umsdos_manglename (&info); + lret = umsdos_real_lookup (dir,info.fake.fname + ,info.fake.len,&inode); + PRINTK (("Cherche inode de %s lret %d flags %d\n" + ,info.fake.fname,lret,entry.flags)); + if (lret == 0 + && (entry.flags & UMSDOS_HLINK) + && follow_hlink){ + struct inode *rinode; + lret = umsdos_hlink2inode (inode,&rinode); + inode = rinode; + } + if (lret == 0){ + /* #Specification: pseudo root / reading real root + The pseudo root (/linux) is logically + erased from the real root. This mean that + ls /DOS, won't show "linux". This avoids + infinite recursion /DOS/linux/DOS/linux while + walking the file system. + */ + if (inode != pseudo_root){ + PRINTK (("Trouve ino %d ",inode->i_ino)); + if (dirent_in_fs){ + put_fs_long(inode->i_ino,&dirent->d_ino); + memcpy_tofs (dirent->d_name,entry.name + ,entry.name_len); + put_fs_byte(0,dirent->d_name+entry.name_len); + put_fs_word (entry.name_len + ,&dirent->d_reclen); + /* In this case, the caller only needs */ + /* flags */ + if (u_entry != NULL){ + u_entry->flags = entry.flags; + } + }else{ + dirent->d_ino = inode->i_ino; + memcpy (dirent->d_name,entry.name + ,entry.name_len); + dirent->d_name[entry.name_len] = '\0'; + dirent->d_reclen = entry.name_len; + if (u_entry != NULL) *u_entry = entry; + } + ret = entry.name_len; + iput (inode); + break; + } + iput (inode); + }else{ + /* #Specification: umsdos / readdir / not in MSDOS + During a readdir operation, if the file is not + in the MSDOS directory anymore, the entry is + removed from the EMD file silently. + */ + ret = umsdos_writeentry (dir,emd_dir,&info,1); + if (ret != 0){ + break; + } + } + } + } + iput(emd_dir); + } + } + umsdos_endlookup(dir); + PRINTK (("read dir %p pos %d ret %d\n",dir,filp->f_pos,ret)); + return ret; +} +/* + Read count directory entries from directory filp + Return a negative value from linux/errno.h. + Return > 0 if success (the length of the file name). +*/ +static int UMSDOS_readdir( + struct inode *dir, /* Point to a description of the super block */ + struct file *filp, /* Point to a directory which is read */ + struct dirent *dirent, /* Will hold count directory entry */ + int count) +{ + int ret = -ENOENT; + while (1){ + struct umsdos_dirent entry; + off_t f_pos; + ret = umsdos_readdir_x (dir,filp,dirent,1,count,&entry,1,&f_pos); + if (ret <= 0 || !(entry.flags & UMSDOS_HIDDEN)) break; + } + return ret; +} +/* + Complete the inode content with info from the EMD file +*/ +void umsdos_lookup_patch ( + struct inode *dir, + struct inode *inode, + struct umsdos_dirent *entry, + off_t emd_pos) +{ + /* + This function modify the state of a dir inode. It decides + if the dir is a umsdos dir or a dos dir. This is done + deeper in umsdos_patch_inode() called at the end of this function. + + umsdos_patch_inode() may block because it is doing disk access. + At the same time, another process may get here to initialise + the same dir inode. There is 3 cases. + + 1-The inode is already initialised. We do nothing. + 2-The inode is not initialised. We lock access and do it. + 3-Like 2 but another process has lock the inode, so we try + to lock it and right after check if initialisation is still + needed. + + + Thanks to the mem option of the kernel command line, it was + possible to consistently reproduce this problem by limiting + my mem to 4 meg and running X. + */ + /* + Do this only if the inode is freshly read, because we will lose + the current (updated) content. + */ + /* + A lookup of a mount point directory yield the inode into + the other fs, so we don't care about initialising it. iget() + does this automatically. + */ + if (inode->i_sb == dir->i_sb && !umsdos_isinit(inode)){ + if (S_ISDIR(inode->i_mode)) umsdos_lockcreate(inode); + if (!umsdos_isinit(inode)){ + /* #Specification: umsdos / lookup / inode info + After successfully reading an inode from the MSDOS + filesystem, we use the EMD file to complete it. + We update the following field. + + uid, gid, atime, ctime, mtime, mode. + + We rely on MSDOS for mtime. If the file + was modified during an MSDOS session, at least + mtime will be meaningful. We do this only for regular + file. + + We don't rely on MSDOS for mtime for directory because + the MSDOS directory date is creation time (strange + MSDOS behavior) which fit nowhere in the three UNIX + time stamp. + */ + if (S_ISREG(entry->mode)) entry->mtime = inode->i_mtime; + inode->i_mode = entry->mode; + inode->i_rdev = entry->rdev; + inode->i_atime = entry->atime; + inode->i_ctime = entry->ctime; + inode->i_mtime = entry->mtime; + inode->i_uid = entry->uid; + inode->i_gid = entry->gid; + /* #Specification: umsdos / i_nlink + The nlink field of an inode is maintain by the MSDOS file system + for directory and by UMSDOS for other file. The logic is that + MSDOS is already figuring out what to do for directories and + does nothing for other files. For MSDOS, there are no hard link + so all file carry nlink==1. UMSDOS use some info in the + EMD file to plug the correct value. + */ + if (!S_ISDIR(entry->mode)){ + if (entry->nlink > 0){ + inode->i_nlink = entry->nlink; + }else{ + printk ("UMSDOS: lookup_patch entry->nlink < 1 ???\n"); + } + } + umsdos_patch_inode(inode,dir,emd_pos); + } + if (S_ISDIR(inode->i_mode)) umsdos_unlockcreate(inode); +if (inode->u.umsdos_i.i_emd_owner==0) printk ("emd_owner still 0 ???\n"); + } +} +/* + Locate entry of an inode in a directory. + Return 0 or a negative error code. + + Normally, this function must succeed. It means a strange corruption + in the file system if not. +*/ +int umsdos_inode2entry ( + struct inode *dir, + struct inode *inode, + struct umsdos_dirent *entry) /* Will hold the entry */ +{ + int ret = -ENOENT; + if (inode == pseudo_root){ + /* + Quick way to find the name. + Also umsdos_readdir_x won't show /linux anyway + */ + memcpy (entry->name,UMSDOS_PSDROOT_NAME,UMSDOS_PSDROOT_LEN+1); + entry->name_len = UMSDOS_PSDROOT_LEN; + ret = 0; + }else{ + struct inode *emddir = umsdos_emd_dir_lookup(dir,0); + iput (emddir); + if (emddir == NULL){ + /* This is a DOS directory */ + struct file filp; + filp.f_reada = 1; + filp.f_pos = 0; + while (1){ + struct dirent dirent; + if (umsdos_readdir_kmem (dir,&filp,&dirent,1) <= 0){ + printk ("UMSDOS: can't locate inode %ld in DOS directory???\n" + ,inode->i_ino); + }else if (dirent.d_ino == inode->i_ino){ + ret = 0; + memcpy (entry->name,dirent.d_name,dirent.d_reclen); + entry->name[dirent.d_reclen] = '\0'; + entry->name_len = dirent.d_reclen; + inode->u.umsdos_i.i_dir_owner = dir->i_ino; + inode->u.umsdos_i.i_emd_owner = 0; + umsdos_setup_dir_inode(inode); + break; + } + } + }else{ + /* skip . and .. see umsdos_readdir_x() */ + struct file filp; + filp.f_reada = 1; + filp.f_pos = UMSDOS_SPECIAL_DIRFPOS; + while (1){ + struct dirent dirent; + off_t f_pos; + if (umsdos_readdir_x(dir,&filp,&dirent + ,0,1,entry,0,&f_pos) <= 0){ + printk ("UMSDOS: can't locate inode %ld in EMD file???\n" + ,inode->i_ino); + break; + }else if (dirent.d_ino == inode->i_ino){ + ret = 0; + umsdos_lookup_patch (dir,inode,entry,f_pos); + break; + } + } + } + } + return ret; +} +/* + Locate the parent of a directory and the info on that directory + Return 0 or a negative error code. +*/ +static int umsdos_locate_ancestor ( + struct inode *dir, + struct inode **result, + struct umsdos_dirent *entry) +{ + int ret; + umsdos_patch_inode (dir,NULL,0); + ret = umsdos_real_lookup (dir,"..",2,result); + PRINTK (("result %d %x ",ret,*result)); + if (ret == 0){ + struct inode *adir = *result; + ret = umsdos_inode2entry (adir,dir,entry); + } + PRINTK (("\n")); + return ret; +} +/* + Build the path name of an inode (relative to the file system. + This function is need to set (pseudo) hard link. + + It uses the same strategy as the standard getcwd(). +*/ +int umsdos_locate_path ( + struct inode *inode, + char *path) +{ + int ret = 0; + struct inode *dir = inode; + char *bpath = (char*)kmalloc(PATH_MAX,GFP_KERNEL); + if (bpath == NULL){ + ret = -ENOMEM; + }else{ + struct umsdos_dirent entry; + char *ptbpath = bpath+PATH_MAX-1; + *ptbpath = '\0'; + PRINTK (("locate_path mode %x ",inode->i_mode)); + if (!S_ISDIR(inode->i_mode)){ + ret = umsdos_get_dirowner (inode,&dir); + PRINTK (("locate_path ret %d ",ret)); + if (ret == 0){ + ret = umsdos_inode2entry (dir,inode,&entry); + if (ret == 0){ + ptbpath -= entry.name_len; + memcpy (ptbpath,entry.name,entry.name_len); + PRINTK (("ptbpath :%s: ",ptbpath)); + } + } + }else{ + dir->i_count++; + } + if (ret == 0){ + while (dir != dir->i_sb->s_mounted){ + struct inode *adir; + ret = umsdos_locate_ancestor (dir,&adir,&entry); + iput (dir); + dir = NULL; + PRINTK (("ancestor %d ",ret)); + if (ret == 0){ + *--ptbpath = '/'; + ptbpath -= entry.name_len; + memcpy (ptbpath,entry.name,entry.name_len); + dir = adir; + PRINTK (("ptbpath :%s: ",ptbpath)); + }else{ + break; + } + } + } + strcpy (path,ptbpath); + kfree (bpath); + } + PRINTK (("\n")); + iput (dir); + return ret; +} + +/* + Return != 0 if an entry is the pseudo DOS entry in the pseudo root. +*/ +int umsdos_is_pseudodos ( + struct inode *dir, + const char *name, + int len) +{ + /* #Specification: pseudo root / DOS hard coded + The pseudo sub-directory DOS in the pseudo root is hard coded. + The name is DOS. This is done this way to help standardised + the umsdos layout. The idea is that from now on /DOS is + a reserved path and nobody will think of using such a path + for a package. + */ + return dir == pseudo_root + && len == 3 + && name[0] == 'D' && name[1] == 'O' && name[2] == 'S'; +} +/* + Check if a file exist in the current directory. + Return 0 if ok, negative error code if not (ex: -ENOENT). +*/ +static int umsdos_lookup_x ( + struct inode *dir, + const char *name, + int len, + struct inode **result, /* Will hold inode of the file, if successful */ + int nopseudo) /* Don't care about pseudo root mode */ +{ + int ret = -ENOENT; + *result = NULL; + umsdos_startlookup(dir); + if (len == 1 && name[0] == '.'){ + *result = dir; + dir->i_count++; + ret = 0; + }else if (len == 2 && name[0] == '.' && name[1] == '.'){ + if (pseudo_root != NULL && dir == pseudo_root->i_sb->s_mounted){ + /* #Specification: pseudo root / .. in real root + Whenever a lookup is those in the real root for + the directory .., and pseudo root is active, the + pseudo root is returned. + */ + ret = 0; + *result = pseudo_root; + pseudo_root->i_count++; + }else{ + /* #Specification: locating .. / strategy + We use the msdos filesystem to locate the parent directory. + But it is more complicated than that. + + We have to step back even further to + get the parent of the parent, so we can get the EMD + of the parent of the parent. Using the EMD file, we can + locate all the info on the parent, such a permissions + and owner. + */ + ret = umsdos_real_lookup (dir,"..",2,result); + PRINTK (("ancestor ret %d dir %p *result %p ",ret,dir,*result)); + if (ret == 0 + && *result != dir->i_sb->s_mounted + && *result != pseudo_root){ + struct inode *aadir; + struct umsdos_dirent entry; + ret = umsdos_locate_ancestor (*result,&aadir,&entry); + iput (aadir); + } + } + }else if (umsdos_is_pseudodos(dir,name,len)){ + /* #Specification: pseudo root / lookup(DOS) + A lookup of DOS in the pseudo root will always succeed + and return the inode of the real root. + */ + *result = dir->i_sb->s_mounted; + (*result)->i_count++; + ret = 0; + }else{ + struct umsdos_info info; + ret = umsdos_parse (name,len,&info); + if (ret == 0) ret = umsdos_findentry (dir,&info,0); + PRINTK (("lookup %s pos %d ret %d len %d ",info.fake.fname,info.f_pos,ret + ,info.fake.len)); + if (ret == 0){ + /* #Specification: umsdos / lookup + A lookup for a file is done in two step. First, we locate + the file in the EMD file. If not present, we return + an error code (-ENOENT). If it is there, we repeat the + operation on the msdos file system. If this fails, it means + that the file system is not in sync with the emd file. + We silently remove this entry from the emd file, + and return ENOENT. + */ + struct inode *inode; + ret = umsdos_real_lookup (dir,info.fake.fname,info.fake.len,result); + inode = *result; + if (inode == NULL){ + printk ("UMSDOS: Erase entry %s, out of sync with MsDOS\n" + ,info.fake.fname); + umsdos_delentry (dir,&info,S_ISDIR(info.entry.mode)); + }else{ + umsdos_lookup_patch (dir,inode,&info.entry,info.f_pos); + PRINTK (("lookup ino %d flags %d\n",inode->i_ino + ,info.entry.flags)); + if (info.entry.flags & UMSDOS_HLINK){ + ret = umsdos_hlink2inode (inode,result); + } + if (*result == pseudo_root && !nopseudo){ + /* #Specification: pseudo root / dir lookup + For the same reason as readdir, a lookup in /DOS for + the pseudo root directory (linux) will fail. + */ + /* + This has to be allowed for resolving hard link + which are recorded independently of the pseudo-root + mode. + */ + iput (pseudo_root); + *result = NULL; + ret = -ENOENT; + } + } + } + } + umsdos_endlookup(dir); + iput (dir); + return ret; +} +/* + Check if a file exist in the current directory. + Return 0 if ok, negative error code if not (ex: -ENOENT). +*/ +int UMSDOS_lookup ( + struct inode *dir, + const char *name, + int len, + struct inode **result) /* Will hold inode of the file, if successful */ +{ + return umsdos_lookup_x(dir,name,len,result,0); +} +/* + Locate the inode pointed by a (pseudo) hard link + Return 0 if ok, a negative error code if not. +*/ +int umsdos_hlink2inode (struct inode *hlink, struct inode **result) +{ + int ret = -EIO; + char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL); + *result = NULL; + if (path == NULL){ + ret = -ENOMEM; + iput (hlink); + }else{ + struct file filp; + filp.f_reada = 1; + filp.f_pos = 0; + PRINTK (("hlink2inode ")); + if (umsdos_file_read_kmem (hlink,&filp,path,hlink->i_size) + ==hlink->i_size){ + struct inode *dir; + char *pt = path; + dir = hlink->i_sb->s_mounted; + path[hlink->i_size] = '\0'; + iput (hlink); + dir->i_count++; + while (1){ + char *start = pt; + int len; + while (*pt != '\0' && *pt != '/') pt++; + len = (int)(pt - start); + if (*pt == '/') *pt++ = '\0'; + if (dir->u.umsdos_i.i_emd_dir == 0){ + /* This is a DOS directory */ + ret = msdos_lookup(dir,start,len,result); + }else{ + ret = umsdos_lookup_x(dir,start,len,result,1); + } + PRINTK (("h2n lookup :%s: -> %d ",start,ret)); + if (ret == 0 && *pt != '\0'){ + dir = *result; + }else{ + break; + } + } + }else{ + iput (hlink); + } + PRINTK (("hlink2inode ret = %d %p -> %p\n",ret,hlink,*result)); + kfree (path); + } + return ret; +} + +static struct file_operations umsdos_dir_operations = { + NULL, /* lseek - default */ + UMSDOS_dir_read, /* read */ + NULL, /* write - bad */ + UMSDOS_readdir, /* readdir */ + NULL, /* select - default */ + UMSDOS_ioctl_dir, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* fsync */ +}; + +struct inode_operations umsdos_dir_inode_operations = { + &umsdos_dir_operations, /* default directory file-ops */ + UMSDOS_create, /* create */ + UMSDOS_lookup, /* lookup */ + UMSDOS_link, /* link */ + UMSDOS_unlink, /* unlink */ + UMSDOS_symlink, /* symlink */ + UMSDOS_mkdir, /* mkdir */ + UMSDOS_rmdir, /* rmdir */ + UMSDOS_mknod, /* mknod */ + UMSDOS_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + + + + + + + + + + diff --git a/fs/umsdos/emd.c b/fs/umsdos/emd.c new file mode 100644 index 000000000..e4d6a9470 --- /dev/null +++ b/fs/umsdos/emd.c @@ -0,0 +1,505 @@ +/* + * linux/fs/umsdos/emd.c + * + * Written 1993 by Jacques Gelinas + * + * Extended MS-DOS directory handling functions + */ +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/kernel.h> +#include <asm/segment.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/msdos_fs.h> +#include <linux/umsdos_fs.h> + +#define PRINTK(x) +#define Printk(x) printk x + +int umsdos_readdir_kmem( + struct inode *inode, + struct file *filp, + struct dirent *dirent, + int count) +{ + int ret; + int old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = msdos_readdir(inode,filp,dirent,count); + set_fs (old_fs); + return ret; +} +/* + Read a file into kernel space memory +*/ +int umsdos_file_read_kmem( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + int ret; + int old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = msdos_file_read(inode,filp,buf,count); + set_fs (old_fs); + return ret; +} +/* + Write to a file from kernel space +*/ +int umsdos_file_write_kmem( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + int ret; + int old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = msdos_file_write(inode,filp,buf,count); + set_fs (old_fs); + return ret; +} + + +/* + Write a block of bytes into one EMD file. + The block of data is NOT in user space. + + Return 0 if ok, a negative error code if not. +*/ +int umsdos_emd_dir_write ( + struct inode *emd_dir, + struct file *filp, + char *buf, /* buffer in kernel memory, not in user space */ + int count) +{ + int written; + filp->f_flags = 0; + written = umsdos_file_write_kmem (emd_dir,filp,buf,count); + return written != count ? -EIO : 0; +} +/* + Read a block of bytes from one EMD file. + The block of data is NOT in user space. + Return 0 if ok, -EIO if any error. +*/ +int umsdos_emd_dir_read ( + struct inode *emd_dir, + struct file *filp, + char *buf, /* buffer in kernel memory, not in user space */ + int count) +{ + int ret = 0; + int sizeread; + filp->f_flags = 0; + sizeread = umsdos_file_read_kmem (emd_dir,filp,buf,count); + if (sizeread != count){ + printk ("UMSDOS: problem with EMD file. Can't read\n"); + ret = -EIO; + } + return ret; + +} +/* + Locate the EMD file in a directory and optionally, creates it. + + Return NULL if error. If ok, dir->u.umsdos_i.emd_inode +*/ +struct inode *umsdos_emd_dir_lookup(struct inode *dir, int creat) +{ + struct inode *ret = NULL; + if (dir->u.umsdos_i.i_emd_dir != 0){ + ret = iget (dir->i_sb,dir->u.umsdos_i.i_emd_dir); + PRINTK (("deja trouve %d %x [%d] " + ,dir->u.umsdos_i.i_emd_dir,ret,ret->i_count)); + }else{ + umsdos_real_lookup (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN,&ret); + PRINTK (("emd_dir_lookup ")); + if (ret != NULL){ + PRINTK (("Find --linux ")); + dir->u.umsdos_i.i_emd_dir = ret->i_ino; + }else if (creat){ + int code; + PRINTK (("avant create ")); + dir->i_count++; + code = msdos_create (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN + ,S_IFREG|0777,&ret); + PRINTK (("Creat EMD code %d ret %x ",code,ret)); + if (ret != NULL){ + dir->u.umsdos_i.i_emd_dir = ret->i_ino; + }else{ + printk ("UMSDOS: Can't create EMD file\n"); + } + } + } + if (ret != NULL){ + /* Disable UMSDOS_notify_change() for EMD file */ + ret->u.umsdos_i.i_emd_owner = 0xffffffff; + } + return ret; +} + +/* + Read an entry from the EMD file. + Support variable length record. + Return -EIO if error, 0 if ok. +*/ +int umsdos_emd_dir_readentry ( + struct inode *emd_dir, + struct file *filp, + struct umsdos_dirent *entry) +{ + int ret = umsdos_emd_dir_read(emd_dir,filp,(char*)entry,UMSDOS_REC_SIZE); + if (ret == 0){ + /* Variable size record. Maybe, we have to read some more */ + int recsize = umsdos_evalrecsize (entry->name_len); + if (recsize > UMSDOS_REC_SIZE){ + ret = umsdos_emd_dir_read(emd_dir,filp + ,((char*)entry)+UMSDOS_REC_SIZE,recsize - UMSDOS_REC_SIZE); + + } + } + return ret; +} +/* + Write an entry in the EMD file. + Return 0 if ok, -EIO if some error. +*/ +int umsdos_writeentry ( + struct inode *dir, + struct inode *emd_dir, + struct umsdos_info *info, + int free_entry) /* This entry is deleted, so Write all 0's */ +{ + int ret = 0; + struct file filp; + struct umsdos_dirent *entry = &info->entry; + struct umsdos_dirent entry0; + if (free_entry){ + /* #Specification: EMD file / empty entries + Unused entry in the EMD file are identify + by the name_len field equal to 0. However to + help future extension (or bug correction :-( ), + empty entries are filled with 0. + */ + memset (&entry0,0,sizeof(entry0)); + entry = &entry0; + }else if (entry->name_len > 0){ + memset (entry->name+entry->name_len,'\0' + ,sizeof(entry->name)-entry->name_len); + /* #Specification: EMD file / spare bytes + 10 bytes are unused in each record of the EMD. They + are set to 0 all the time. So it will be possible + to do new stuff and rely on the state of those + bytes in old EMD file around. + */ + memset (entry->spare,0,sizeof(entry->spare)); + } + filp.f_pos = info->f_pos; + filp.f_reada = 0; + ret = umsdos_emd_dir_write(emd_dir,&filp,(char*)entry,info->recsize); + if (ret != 0){ + printk ("UMSDOS: problem with EMD file. Can't write\n"); + }else{ + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + } + return ret; +} + +#define CHUNK_SIZE (8*UMSDOS_REC_SIZE) +struct find_buffer{ + char buffer[CHUNK_SIZE]; + int pos; /* read offset in buffer */ + int size; /* Current size of buffer */ + struct file filp; +}; + +/* + Fill the read buffer and take care of the byte remaining inside. + Unread bytes are simply move to the beginning. + + Return -ENOENT if EOF, 0 if ok, a negative error code if any problem. +*/ +static int umsdos_fillbuf ( + struct inode *inode, + struct find_buffer *buf) +{ + int ret = -ENOENT; + int mustmove = buf->size - buf->pos; + int mustread; + int remain; + if (mustmove > 0){ + memcpy (buf->buffer,buf->buffer+buf->pos,mustmove); + } + buf->pos = 0; + mustread = CHUNK_SIZE - mustmove; + remain = inode->i_size - buf->filp.f_pos; + if (remain < mustread) mustread = remain; + if (mustread > 0){ + ret = umsdos_emd_dir_read (inode,&buf->filp,buf->buffer+mustmove + ,mustread); + if (ret == 0) buf->size = mustmove + mustread; + }else if (mustmove){ + buf->size = mustmove; + ret = 0; + } + return ret; +} + +/* + General search, locate a name in the EMD file or an empty slot to + store it. if info->entry.name_len == 0, search the first empty + slot (of the proper size). + + Caller must do iput on *pt_emd_dir. + + Return 0 if found, -ENOENT if not found, another error code if + other problem. + + So this routine is used to either find an existing entry or to + create a new one, while making sure it is a new one. After you + get -ENOENT, you make sure the entry is stuffed correctly and + call umsdos_writeentry(). + + To delete an entry, you find it, zero out the entry (memset) + and call umsdos_writeentry(). + + All this to say that umsdos_writeentry must be call after this + function since it rely on the f_pos field of info. +*/ +static int umsdos_find ( + struct inode *dir, + struct umsdos_info *info, /* Hold name and name_len */ + /* Will hold the entry found */ + struct inode **pt_emd_dir) /* Will hold the emd_dir inode */ + /* or NULL if not found */ +{ + /* #Specification: EMD file structure + The EMD file uses a fairly simple layout. It is made of records + (UMSDOS_REC_SIZE == 64). When a name can't be written is a single + record, multiple contiguous record are allocated. + */ + int ret = -ENOENT; + struct inode *emd_dir = umsdos_emd_dir_lookup(dir,1); + if (emd_dir != NULL){ + struct umsdos_dirent *entry = &info->entry; + int recsize = info->recsize; + struct { + off_t posok; /* Position available to store the entry */ + int found; /* A valid empty position has been found */ + off_t one; /* One empty position -> maybe <- large enough */ + int onesize; /* size of empty region starting at one */ + }empty; + /* Read several entries at a time to speed up the search */ + struct find_buffer buf; + buf.pos = 0; + buf.size = 0; + buf.filp.f_pos = 0; + buf.filp.f_reada = 1; + empty.found = 0; + empty.posok = emd_dir->i_size; + empty.onesize = 0; + while (1){ + struct umsdos_dirent *rentry = (struct umsdos_dirent*) + (buf.buffer + buf.pos); + int file_pos = buf.filp.f_pos - buf.size + buf.pos; + if (buf.pos == buf.size){ + ret = umsdos_fillbuf (emd_dir,&buf); + if (ret < 0){ + /* Not found, so note where it can be added */ + info->f_pos = empty.posok; + break; + } + }else if (rentry->name_len == 0){ + /* We are looking for an empty section at least */ + /* recsize large */ + if (entry->name_len == 0){ + info->f_pos = file_pos; + ret = 0; + break; + }else if (!empty.found){ + if (empty.onesize == 0){ + /* This is the first empty record of a section */ + empty.one = file_pos; + } + /* grow the empty section */ + empty.onesize += UMSDOS_REC_SIZE; + if (empty.onesize == recsize){ + /* here is a large enough section */ + empty.posok = empty.one; + empty.found = 1; + } + } + buf.pos += UMSDOS_REC_SIZE; + }else{ + int entry_size = umsdos_evalrecsize(rentry->name_len); + if (buf.pos+entry_size > buf.size){ + ret = umsdos_fillbuf (emd_dir,&buf); + if (ret < 0){ + /* Not found, so note where it can be added */ + info->f_pos = empty.posok; + break; + } + }else{ + empty.onesize = 0; /* Reset the free slot search */ + if (entry->name_len == rentry->name_len + && memcmp(entry->name,rentry->name,rentry->name_len) + ==0){ + info->f_pos = file_pos; + *entry = *rentry; + ret = 0; + break; + }else{ + buf.pos += entry_size; + } + } + } + } + umsdos_manglename(info); + } + *pt_emd_dir = emd_dir; + return ret; +} +/* + Add a new entry in the emd file + Return 0 if ok or a negative error code. + Return -EEXIST if the entry already exist. + + Complete the information missing in info. +*/ +int umsdos_newentry ( + struct inode *dir, + struct umsdos_info *info) +{ + struct inode *emd_dir; + int ret = umsdos_find (dir,info,&emd_dir); + if (ret == 0){ + ret = -EEXIST; + }else if (ret == -ENOENT){ + ret = umsdos_writeentry(dir,emd_dir,info,0); + PRINTK (("umsdos_newentry EDM ret = %d\n",ret)); + } + iput (emd_dir); + return ret; +} +/* + Create a new hidden link. + Return 0 if ok, an error code if not. +*/ +int umsdos_newhidden ( + struct inode *dir, + struct umsdos_info *info) +{ + struct inode *emd_dir; + int ret; + umsdos_parse ("..LINK",6,info); + info->entry.name_len = 0; + ret = umsdos_find (dir,info,&emd_dir); + iput (emd_dir); + if (ret == -ENOENT || ret == 0){ + /* #Specification: hard link / hidden name + When a hard link is created, the original file is renamed + to a hidden name. The name is "..LINKNNN" where NNN is a + number define from the entry offset in the EMD file. + */ + info->entry.name_len = sprintf (info->entry.name,"..LINK%ld" + ,info->f_pos); + ret = 0; + } + return ret; +} +/* + Remove an entry from the emd file + Return 0 if ok, a negative error code otherwise. + + Complete the information missing in info. +*/ +int umsdos_delentry ( + struct inode *dir, + struct umsdos_info *info, + int isdir) +{ + struct inode *emd_dir; + int ret = umsdos_find (dir,info,&emd_dir); + if (ret == 0){ + if (info->entry.name_len != 0){ + if ((isdir != 0) != (S_ISDIR(info->entry.mode) != 0)){ + if (S_ISDIR(info->entry.mode)){ + ret = -EISDIR; + }else{ + ret = -ENOTDIR; + } + }else{ + ret = umsdos_writeentry(dir,emd_dir,info,1); + } + } + } + iput(emd_dir); + return ret; +} + + +/* + Verify is a EMD directory is empty. + Return 0 if not empty + 1 if empty + 2 if empty, no EMD file. +*/ +int umsdos_isempty (struct inode *dir) +{ + int ret = 2; + struct inode *emd_dir = umsdos_emd_dir_lookup(dir,0); + /* If the EMD file does not exist, it is certainly empty :-) */ + if (emd_dir != NULL){ + struct file filp; + /* Find an empty slot */ + filp.f_pos = 0; + filp.f_reada = 1; + filp.f_flags = O_RDONLY; + ret = 1; + while (filp.f_pos < emd_dir->i_size){ + struct umsdos_dirent entry; + if (umsdos_emd_dir_readentry(emd_dir,&filp,&entry)!=0){ + ret = 0; + break; + }else if (entry.name_len != 0){ + ret = 0; + break; + } + } + iput (emd_dir); + } + return ret; +} + +/* + Locate an entry in a EMD directory. + Return 0 if ok, errcod if not, generally -ENOENT. +*/ +int umsdos_findentry ( + struct inode *dir, + struct umsdos_info *info, + int expect) /* 0: anything */ + /* 1: file */ + /* 2: directory */ +{ + struct inode *emd_dir; + int ret = umsdos_find (dir,info,&emd_dir); + if (ret == 0){ + if (expect != 0){ + if (S_ISDIR(info->entry.mode)){ + if (expect != 2) ret = -EISDIR; + }else if (expect == 2){ + ret = -ENOTDIR; + } + } + } + iput (emd_dir); + return ret; +} + diff --git a/fs/umsdos/file.c b/fs/umsdos/file.c new file mode 100644 index 000000000..d292ea3c2 --- /dev/null +++ b/fs/umsdos/file.c @@ -0,0 +1,103 @@ +/* + * linux/fs/umsdos/file.c + * + * Written 1993 by Jacques Gelinas + * inspired from linux/fs/msdos/file.c Werner Almesberger + * + * Extended MS-DOS regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/msdos_fs.h> +#include <linux/umsdos_fs.h> + + +#define PRINTK(x) +#define Printk(x) printk x +/* + Read a file into user space memory +*/ +static int UMSDOS_file_read( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + /* We have to set the access time because msdos don't care */ + int ret = msdos_file_read(inode,filp,buf,count); + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + return ret; +} +/* + Write a file from user space memory +*/ +static int UMSDOS_file_write( + struct inode *inode, + struct file *filp, + char *buf, + int count) +{ + return msdos_file_write(inode,filp,buf,count); +} +/* + Truncate a file to 0 length. +*/ +static void UMSDOS_truncate(struct inode *inode) +{ + PRINTK (("UMSDOS_truncate\n")); + msdos_truncate (inode); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + inode->i_dirt = 1; +} +/* + See inode.c + + Some entry point are filled dynamically with function pointers + from the msdos file_operations and file_inode_operations. + + The idea is to have the code as independent as possible from + the msdos file system. +*/ + +struct file_operations umsdos_file_operations = { + NULL, /* lseek - default */ + UMSDOS_file_read, /* read */ + UMSDOS_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + file_fsync /* fsync */ +}; + +struct inode_operations umsdos_file_inode_operations = { + &umsdos_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + UMSDOS_truncate,/* truncate */ + NULL, /* permission */ + msdos_smap /* smap */ +}; + + diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c new file mode 100644 index 000000000..40f7feb68 --- /dev/null +++ b/fs/umsdos/inode.c @@ -0,0 +1,513 @@ +/* + * linux/fs/umsdos/inode.c + * + * Written 1993 by Jacques Gelinas + * Inspired from linux/fs/msdos/... by Werner Almesberger + * + */ + +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <asm/segment.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/stat.h> +#include <linux/umsdos_fs.h> + +#ifdef MODULE + #include <linux/module.h> + #include "../../tools/version.h" +#endif + +struct inode *pseudo_root=NULL; /* Useful to simulate the pseudo DOS */ + /* directory. See UMSDOS_readdir_x() */ + +/* #Specification: convention / PRINTK Printk and printk + Here is the convention for the use of printk inside fs/umsdos + + printk carry important message (error or status). + Printk is for debugging (it is a macro defined at the beginning of + most source. + PRINTK is a nulled Printk macro. + + This convention makes the source easier to read, and Printk easier + to shut off. +*/ +#define PRINTK(x) +#define Printk(x) printk x + + +void UMSDOS_put_inode(struct inode *inode) +{ + PRINTK (("put inode %x owner %x pos %d dir %x\n",inode + ,inode->u.umsdos_i.i_emd_owner,inode->u.umsdos_i.pos + ,inode->u.umsdos_i.i_emd_dir)); + msdos_put_inode(inode); +} + + +void UMSDOS_put_super(struct super_block *sb) +{ + msdos_put_super(sb); + #ifdef MODULE + MOD_DEC_USE_COUNT; + #endif +} + + +void UMSDOS_statfs(struct super_block *sb,struct statfs *buf) +{ + msdos_statfs(sb,buf); +} + + +/* + Call msdos_lookup, but set back the original msdos function table. + Return 0 if ok, or a negative error code if not. +*/ +int umsdos_real_lookup ( + struct inode *dir, + const char *name, + int len, + struct inode **result) /* Will hold inode of the file, if successful */ +{ + int ret; + dir->i_count++; + ret = msdos_lookup (dir,name,len,result); + return ret; +} +/* + Complete the setup of an directory inode. + First, it completes the function pointers, then + it locates the EMD file. If the EMD is there, then plug the + umsdos function table. If not, use the msdos one. +*/ +void umsdos_setup_dir_inode (struct inode *inode) +{ + inode->u.umsdos_i.i_emd_dir = 0; + { + struct inode *emd_dir = umsdos_emd_dir_lookup (inode,0); + extern struct inode_operations umsdos_rdir_inode_operations; + inode->i_op = emd_dir != NULL + ? &umsdos_dir_inode_operations + : &umsdos_rdir_inode_operations; + iput (emd_dir); + } +} +/* + Add some info into an inode so it can find its owner quickly +*/ +void umsdos_set_dirinfo( + struct inode *inode, + struct inode *dir, + off_t f_pos) +{ + struct inode *emd_owner = umsdos_emd_dir_lookup(dir,1); + inode->u.umsdos_i.i_dir_owner = dir->i_ino; + inode->u.umsdos_i.i_emd_owner = emd_owner->i_ino; + iput (emd_owner); + inode->u.umsdos_i.pos = f_pos; +} +/* + Tells if an Umsdos inode has been "patched" once. + Return != 0 if so. +*/ +int umsdos_isinit (struct inode *inode) +{ +#if 1 + return inode->u.umsdos_i.i_emd_owner != 0; +#elif 0 + return inode->i_atime != 0; +#else + return inode->i_count > 1; +#endif +} +/* + Connect the proper tables in the inode and add some info. +*/ +void umsdos_patch_inode ( + struct inode *inode, + struct inode *dir, /* May be NULL */ + off_t f_pos) +{ + /* + This function is called very early to setup the inode, somewhat + too early (called by UMSDOS_read_inode). At this point, we can't + do to much, such as lookup up EMD files and so on. This causes + confusion in the kernel. This is why some initialisation + will be done when dir != NULL only. + + UMSDOS do run piggy back on top of msdos fs. It looks like something + is missing in the VFS to accommodate stacked fs. Still unclear what + (quite honestly). + + Well, maybe one! A new entry "may_unmount" which would allow + the stacked fs to allocate some inode permanently and release + them at the end. Doing that now introduce a problem. unmount + always fail because some inodes are in use. + */ + if (!umsdos_isinit(inode)){ + inode->u.umsdos_i.i_emd_dir = 0; + if (S_ISREG(inode->i_mode)){ + static char is_init = 0; + if (!is_init){ + /* + I don't want to change the msdos file system code + so I get the address of some subroutine dynamically + once. + */ + umsdos_file_inode_operations.bmap = inode->i_op->bmap; + inode->i_op = &umsdos_file_inode_operations; + is_init = 1; + } + inode->i_op = &umsdos_file_inode_operations; + }else if (S_ISDIR(inode->i_mode)){ + if (dir != NULL){ + umsdos_setup_dir_inode(inode); + } + }else if (S_ISLNK(inode->i_mode)){ + inode->i_op = &umsdos_symlink_inode_operations; + }else if (S_ISCHR(inode->i_mode)){ + inode->i_op = &chrdev_inode_operations; + }else if (S_ISBLK(inode->i_mode)){ + inode->i_op = &blkdev_inode_operations; + }else if (S_ISFIFO(inode->i_mode)){ + init_fifo(inode); + } + if (dir != NULL){ + /* #Specification: inode / umsdos info + The first time an inode is seen (inode->i_count == 1), + the inode number of the EMD file which control this inode + is tagged to this inode. It allows operation such + as notify_change to be handled. + */ + /* + This is done last because it also control the + status of umsdos_isinit() + */ + umsdos_set_dirinfo (inode,dir,f_pos); + } + }else if (dir != NULL){ + /* + Test to see if the info is maintained. + This should be removed when the file system will be proven. + */ + struct inode *emd_owner = umsdos_emd_dir_lookup(dir,1); + iput (emd_owner); + if (emd_owner->i_ino != inode->u.umsdos_i.i_emd_owner){ + printk ("UMSDOS: *** EMD_OWNER ??? *** ino = %ld %ld <> %ld " + ,inode->i_ino,emd_owner->i_ino,inode->u.umsdos_i.i_emd_owner); + } + } +} +/* + Get the inode of the directory which owns this inode. + Return 0 if ok, -EIO if error. +*/ +int umsdos_get_dirowner( + struct inode *inode, + struct inode **result) /* Hold NULL if any error */ + /* else, the inode of the directory */ +{ + int ret = -EIO; + unsigned long ino = inode->u.umsdos_i.i_dir_owner; + *result = NULL; + if (ino == 0){ + printk ("UMSDOS: umsdos_get_dirowner ino == 0\n"); + }else{ + struct inode *dir = *result = iget(inode->i_sb,ino); + if (dir != NULL){ + umsdos_patch_inode (dir,NULL,0); + ret = 0; + } + } + return ret; +} +/* + Load an inode from disk. +*/ +void UMSDOS_read_inode(struct inode *inode) +{ + PRINTK (("read inode %x ino = %d ",inode,inode->i_ino)); + msdos_read_inode(inode); + PRINTK (("ino = %d %d\n",inode->i_ino,inode->i_count)); + if (S_ISDIR(inode->i_mode) + && (inode->u.umsdos_i.u.dir_info.creating != 0 + || inode->u.umsdos_i.u.dir_info.looking != 0 + || inode->u.umsdos_i.u.dir_info.p != NULL)){ + Printk (("read inode %d %d %p\n" + ,inode->u.umsdos_i.u.dir_info.creating + ,inode->u.umsdos_i.u.dir_info.looking + ,inode->u.umsdos_i.u.dir_info.p)); + } + /* #Specification: Inode / post initialisation + To completely initialise an inode, we need access to the owner + directory, so we can locate more info in the EMD file. This is + not available the first time the inode is access, we use + a value in the inode to tell if it has been finally initialised. + + At first, we have tried testing i_count but it was causing + problem. It is possible that two or more process use the + newly accessed inode. While the first one block during + the initialisation (probably while reading the EMD file), the + others believe all is well because i_count > 1. They go banana + with a broken inode. See umsdos_lookup_patch and umsdos_patch_inode. + */ + umsdos_patch_inode(inode,NULL,0); +} + +/* + Update the disk with the inode content +*/ +void UMSDOS_write_inode(struct inode *inode) +{ + struct iattr newattrs; + + PRINTK (("UMSDOS_write_inode emd %d\n",inode->u.umsdos_i.i_emd_owner)); + msdos_write_inode(inode); + newattrs.ia_mtime = inode->i_mtime; + newattrs.ia_atime = inode->i_atime; + newattrs.ia_ctime = inode->i_ctime; + newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME | ATTR_CTIME; + /* + UMSDOS_notify_change is convenient to call here + to update the EMD entry associated with this inode. + But it has the side effect to re"dirt" the inode. + */ + UMSDOS_notify_change (inode, &newattrs); + inode->i_dirt = 0; +} + +int UMSDOS_notify_change(struct inode *inode, struct iattr *attr) +{ + int ret = 0; + + if ((ret = inode_change_ok(inode, attr)) != 0) + return ret; + + if (inode->i_nlink > 0){ + /* #Specification: notify_change / i_nlink > 0 + notify change is only done for inode with nlink > 0. An inode + with nlink == 0 is no longer associated with any entry in + the EMD file, so there is nothing to update. + */ + unsigned long i_emd_owner = inode->u.umsdos_i.i_emd_owner; + if (inode == inode->i_sb->s_mounted){ + /* #Specification: root inode / attributes + I don't know yet how this should work. Normally + the attributes (permissions bits, owner, times) of + a directory are stored in the EMD file of its parent. + + One thing we could do is store the attributes of the root + inode in its own EMD file. A simple entry named "." could + be used for this special case. It would be read once + when the file system is mounted and update in + UMSDOS_notify_change() (right here). + + I am not sure of the behavior of the root inode for + a real UNIX file system. For now, this is a nop. + */ + }else if (i_emd_owner != 0xffffffff && i_emd_owner != 0){ + /* This inode is not a EMD file nor an inode used internally + by MSDOS, so we can update its status. + See emd.c + */ + struct inode *emd_owner = iget (inode->i_sb,i_emd_owner); + PRINTK (("notify change %p ",inode)); + if (emd_owner == NULL){ + printk ("UMSDOS: emd_owner = NULL ???"); + ret = -EPERM; + }else{ + struct file filp; + struct umsdos_dirent entry; + filp.f_pos = inode->u.umsdos_i.pos; + filp.f_reada = 0; + PRINTK (("pos = %d ",filp.f_pos)); + /* Read only the start of the entry since we don't touch */ + /* the name */ + ret = umsdos_emd_dir_read (emd_owner,&filp,(char*)&entry + ,UMSDOS_REC_SIZE); + if (ret == 0){ + if (attr->ia_valid & ATTR_UID) + entry.uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + entry.gid = attr->ia_gid; + if (attr->ia_valid & ATTR_MODE) + entry.mode = attr->ia_mode; + if (attr->ia_valid & ATTR_ATIME) + entry.atime = attr->ia_atime; + if (attr->ia_valid & ATTR_MTIME) + entry.mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + entry.ctime = attr->ia_ctime; + + entry.nlink = inode->i_nlink; + filp.f_pos = inode->u.umsdos_i.pos; + ret = umsdos_emd_dir_write (emd_owner,&filp,(char*)&entry + ,UMSDOS_REC_SIZE); + + PRINTK (("notify pos %d ret %d nlink %d " + ,inode->u.umsdos_i.pos + ,ret,entry.nlink)); + /* #Specification: notify_change / msdos fs + notify_change operation are done only on the + EMD file. The msdos fs is not even called. + */ + } + iput (emd_owner); + } + PRINTK (("\n")); + } + } + if (ret == 0) + inode_setattr(inode, attr); + return ret; +} + +/* #Specification: function name / convention + A simple convention for function name has been used in + the UMSDOS file system. First all function use the prefix + umsdos_ to avoid name clash with other part of the kernel. + + And standard VFS entry point use the prefix UMSDOS (upper case) + so it's easier to tell them apart. +*/ + +static struct super_operations umsdos_sops = { + UMSDOS_read_inode, + UMSDOS_notify_change, + UMSDOS_write_inode, + UMSDOS_put_inode, + UMSDOS_put_super, + NULL, /* added in 0.96c */ + UMSDOS_statfs, + NULL +}; + +/* + Read the super block of an Extended MS-DOS FS. +*/ +struct super_block *UMSDOS_read_super( + struct super_block *s, + void *data, + int silent) +{ + /* #Specification: mount / options + Umsdos run on top of msdos. Currently, it supports no + mount option, but happily pass all option received to + the msdos driver. I am not sure if all msdos mount option + make sense with Umsdos. Here are at least those who + are useful. + uid= + gid= + + These options affect the operation of umsdos in directories + which do not have an EMD file. They behave like normal + msdos directory, with all limitation of msdos. + */ + struct super_block *sb = msdos_read_super(s,data,silent); + printk ("UMSDOS Alpha 0.5a (compatibility level %d.%d, fast msdos)\n" + ,UMSDOS_VERSION,UMSDOS_RELEASE); + if (sb != NULL){ + sb->s_op = &umsdos_sops; + PRINTK (("umsdos_read_super %p\n",sb->s_mounted)); + umsdos_setup_dir_inode (sb->s_mounted); + PRINTK (("End umsdos_read_super\n")); + if (s == super_blocks){ + /* #Specification: pseudo root / mount + When a umsdos fs is mounted, a special handling is done + if it is the root partition. We check for the presence + of the file /linux/etc/init or /linux/etc/rc. + If one is there, we do a chroot("/linux"). + + We check both because (see init/main.c) the kernel + try to exec init at different place and if it fails + it tries /bin/sh /etc/rc. To be consistent with + init/main.c, many more test would have to be done + to locate init. Any complain ? + + The chroot is done manually in init/main.c but the + info (the inode) is located at mount time and store + in a global variable (pseudo_root) which is used at + different place in the umsdos driver. There is no + need to store this variable elsewhere because it + will always be one, not one per mount. + + This feature allows the installation + of a linux system within a DOS system in a subdirectory. + + A user may install its linux stuff in c:\linux + avoiding any clash with existing DOS file and subdirectory. + When linux boots, it hides this fact, showing a normal + root directory with /etc /bin /tmp ... + + The word "linux" is hardcoded in /usr/include/linux/umsdos_fs.h + in the macro UMSDOS_PSDROOT_NAME. + */ + + struct inode *pseudo; + Printk (("Mounting root\n")); + if (umsdos_real_lookup (sb->s_mounted,UMSDOS_PSDROOT_NAME + ,UMSDOS_PSDROOT_LEN,&pseudo)==0 + && S_ISDIR(pseudo->i_mode)){ + struct inode *etc = NULL; + struct inode *rc = NULL; + Printk (("/%s is there\n",UMSDOS_PSDROOT_NAME)); + if (umsdos_real_lookup (pseudo,"etc",3,&etc)==0 + && S_ISDIR(etc->i_mode)){ + struct inode *init; + Printk (("/%s/etc is there\n",UMSDOS_PSDROOT_NAME)); + if ((umsdos_real_lookup (etc,"init",4,&init)==0 + && S_ISREG(init->i_mode)) + || (umsdos_real_lookup (etc,"rc",2,&rc)==0 + && S_ISREG(rc->i_mode))){ + umsdos_setup_dir_inode (pseudo); + Printk (("Activating pseudo root /%s\n",UMSDOS_PSDROOT_NAME)); + pseudo_root = pseudo; + pseudo->i_count++; + pseudo = NULL; + } + iput (init); + iput (rc); + } + iput (etc); + } + iput (pseudo); + } + #ifdef MODULE + MOD_INC_USE_COUNT; + #endif + } + return sb; +} + + +#ifdef MODULE + +char kernel_version[] = UTS_RELEASE; + +static struct file_system_type umsdos_fs_type = { + UMSDOS_read_super, "umsdos", 1, NULL +}; + +int init_module(void) +{ + register_filesystem(&umsdos_fs_type); + return 0; +} + +void cleanup_module(void) +{ + if (MOD_IN_USE) + printk("Umsdos: file system in use, remove delayed\n"); + else + { + unregister_filesystem(&umsdos_fs_type); + } +} + +#endif + diff --git a/fs/umsdos/ioctl.c b/fs/umsdos/ioctl.c new file mode 100644 index 000000000..972571796 --- /dev/null +++ b/fs/umsdos/ioctl.c @@ -0,0 +1,259 @@ +/* + * linux/fs/umsdos/ioctl.c + * + * Written 1993 by Jacques Gelinas + * + * Extended MS-DOS ioctl directory handling functions + */ +#include <asm/segment.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/umsdos_fs.h> + +#define PRINTK(x) +#define Printk(x) printk x + +/* + Perform special function on a directory +*/ +int UMSDOS_ioctl_dir ( + struct inode *dir, + struct file *filp, + unsigned int cmd, + unsigned long data) +{ + int ret = -EPERM; + /* #Specification: ioctl / acces + Only root (effective id) is allowed to do IOCTL on directory + in UMSDOS. EPERM is returned for other user. + */ + if (current->euid == 0 + || cmd == UMSDOS_GETVERSION){ + struct umsdos_ioctl *idata = (struct umsdos_ioctl *)data; + ret = -EINVAL; + /* #Specification: ioctl / prototypes + The official prototype for the umsdos ioctl on directory + is: + + int ioctl ( + int fd, // File handle of the directory + int cmd, // command + struct umsdos_ioctl *data) + + The struct and the commands are defined in linux/umsdos_fs.h. + + umsdos_progs/umsdosio.c provide an interface in C++ to all + these ioctl. umsdos_progs/udosctl is a small utility showing + all this. + + These ioctl generally allow one to work on the EMD or the + DOS directory independently. These are essential to implement + the synchronise. + */ + PRINTK (("ioctl %d ",cmd)); + if (cmd == UMSDOS_GETVERSION){ + /* #Specification: ioctl / UMSDOS_GETVERSION + The field version and release of the structure + umsdos_ioctl are filled with the version and release + number of the fs code in the kernel. This will allow + some form of checking. Users won't be able to run + incompatible utility such as the synchroniser (umssync). + umsdos_progs/umsdosio.c enforce this checking. + + Return always 0. + */ + put_fs_byte (UMSDOS_VERSION,&idata->version); + put_fs_byte (UMSDOS_RELEASE,&idata->release); + ret = 0; + }else if (cmd == UMSDOS_READDIR_DOS){ + /* #Specification: ioctl / UMSDOS_READDIR_DOS + One entry is read from the DOS directory at the current + file position. The entry is put as is in the dos_dirent + field of struct umsdos_ioctl. + + Return > 0 if success. + */ + ret = msdos_readdir(dir,filp,&idata->dos_dirent,1); + }else if (cmd == UMSDOS_READDIR_EMD){ + /* #Specification: ioctl / UMSDOS_READDIR_EMD + One entry is read from the EMD at the current + file position. The entry is put as is in the umsdos_dirent + field of struct umsdos_ioctl. The corresponding mangled + DOS entry name is put in the dos_dirent field. + + All entries are read including hidden links. Blank + entries are skipped. + + Return > 0 if success. + */ + struct inode *emd_dir = umsdos_emd_dir_lookup (dir,0); + if (emd_dir != NULL){ + while (1){ + if (filp->f_pos >= emd_dir->i_size){ + ret = 0; + break; + }else{ + struct umsdos_dirent entry; + off_t f_pos = filp->f_pos; + ret = umsdos_emd_dir_readentry (emd_dir,filp,&entry); + if (ret < 0){ + break; + }else if (entry.name_len > 0){ + struct umsdos_info info; + ret = entry.name_len; + umsdos_parse (entry.name,entry.name_len,&info); + info.f_pos = f_pos; + umsdos_manglename(&info); + memcpy_tofs(&idata->umsdos_dirent,&entry + ,sizeof(entry)); + memcpy_tofs(&idata->dos_dirent.d_name + ,info.fake.fname,info.fake.len+1); + break; + } + } + } + iput (emd_dir); + }else{ + /* The absence of the EMD is simply seen as an EOF */ + ret = 0; + } + }else if (cmd == UMSDOS_INIT_EMD){ + /* #Specification: ioctl / UMSDOS_INIT_EMD + The UMSDOS_INIT_EMD command make sure the EMD + exist for a directory. If it does not, it is + created. Also, it makes sure the directory functions + table (struct inode_operations) is set to the UMSDOS + semantic. This mean that umssync may be applied to + an "opened" msdos directory, and it will change behavior + on the fly. + + Return 0 if success. + */ + extern struct inode_operations umsdos_rdir_inode_operations; + struct inode *emd_dir = umsdos_emd_dir_lookup (dir,1); + ret = emd_dir != NULL; + iput (emd_dir); + + dir->i_op = ret + ? &umsdos_dir_inode_operations + : &umsdos_rdir_inode_operations; + }else{ + struct umsdos_ioctl data; + memcpy_fromfs (&data,idata,sizeof(data)); + if (cmd == UMSDOS_CREAT_EMD){ + /* #Specification: ioctl / UMSDOS_CREAT_EMD + The umsdos_dirent field of the struct umsdos_ioctl is used + as is to create a new entry in the EMD of the directory. + The DOS directory is not modified. + No validation is done (yet). + + Return 0 if success. + */ + struct umsdos_info info; + /* This makes sure info.entry and info in general is correctly */ + /* initialised */ + memcpy (&info.entry,&data.umsdos_dirent + ,sizeof(data.umsdos_dirent)); + umsdos_parse (data.umsdos_dirent.name + ,data.umsdos_dirent.name_len,&info); + ret = umsdos_newentry (dir,&info); + }else if (cmd == UMSDOS_UNLINK_EMD){ + /* #Specification: ioctl / UMSDOS_UNLINK_EMD + The umsdos_dirent field of the struct umsdos_ioctl is used + as is to remove an entry from the EMD of the directory. + No validation is done (yet). The mode field is used + to validate S_ISDIR or S_ISREG. + + Return 0 if success. + */ + struct umsdos_info info; + /* This makes sure info.entry and info in general is correctly */ + /* initialised */ + memcpy (&info.entry,&data.umsdos_dirent + ,sizeof(data.umsdos_dirent)); + umsdos_parse (data.umsdos_dirent.name + ,data.umsdos_dirent.name_len,&info); + ret = umsdos_delentry (dir,&info + ,S_ISDIR(data.umsdos_dirent.mode)); + }else if (cmd == UMSDOS_UNLINK_DOS){ + /* #Specification: ioctl / UMSDOS_UNLINK_DOS + The dos_dirent field of the struct umsdos_ioctl is used to + execute a msdos_unlink operation. The d_name and d_reclen + fields are used. + + Return 0 if success. + */ + dir->i_count++; + ret = msdos_unlink (dir,data.dos_dirent.d_name + ,data.dos_dirent.d_reclen); + }else if (cmd == UMSDOS_RMDIR_DOS){ + /* #Specification: ioctl / UMSDOS_RMDIR_DOS + The dos_dirent field of the struct umsdos_ioctl is used to + execute a msdos_unlink operation. The d_name and d_reclen + fields are used. + + Return 0 if success. + */ + dir->i_count++; + ret = msdos_rmdir (dir,data.dos_dirent.d_name + ,data.dos_dirent.d_reclen); + }else if (cmd == UMSDOS_STAT_DOS){ + /* #Specification: ioctl / UMSDOS_STAT_DOS + The dos_dirent field of the struct umsdos_ioctl is + used to execute a stat operation in the DOS directory. + The d_name and d_reclen fields are used. + + The following field of umsdos_ioctl.stat are filled. + + st_ino,st_mode,st_size,st_atime,st_mtime,st_ctime, + Return 0 if success. + */ + struct inode *inode; + ret = umsdos_real_lookup (dir,data.dos_dirent.d_name + ,data.dos_dirent.d_reclen,&inode); + if (ret == 0){ + data.stat.st_ino = inode->i_ino; + data.stat.st_mode = inode->i_mode; + data.stat.st_size = inode->i_size; + data.stat.st_atime = inode->i_atime; + data.stat.st_ctime = inode->i_ctime; + data.stat.st_mtime = inode->i_mtime; + memcpy_tofs (&idata->stat,&data.stat,sizeof(data.stat)); + iput (inode); + } + }else if (cmd == UMSDOS_DOS_SETUP){ + /* #Specification: ioctl / UMSDOS_DOS_SETUP + The UMSDOS_DOS_SETUP ioctl allow changing the + default permission of the MsDOS file system driver + on the fly. The MsDOS driver apply global permission + to every file and directory. Normally these permissions + are controlled by a mount option. This is not + available for root partition, so a special utility + (umssetup) is provided to do this, normally in + /etc/rc.local. + + Be aware that this apply ONLY to MsDOS directory + (those without EMD --linux-.---). Umsdos directory + have independent (standard) permission for each + and every file. + + The field umsdos_dirent provide the information needed. + umsdos_dirent.uid and gid sets the owner and group. + umsdos_dirent.mode set the permissions flags. + */ + dir->i_sb->u.msdos_sb.fs_uid = data.umsdos_dirent.uid; + dir->i_sb->u.msdos_sb.fs_gid = data.umsdos_dirent.gid; + dir->i_sb->u.msdos_sb.fs_umask = data.umsdos_dirent.mode; + ret = 0; + } + } + } + PRINTK (("ioctl return %d\n",ret)); + return ret; +} + + + diff --git a/fs/umsdos/mangle.c b/fs/umsdos/mangle.c new file mode 100644 index 000000000..1f59447e9 --- /dev/null +++ b/fs/umsdos/mangle.c @@ -0,0 +1,478 @@ +/* + * linux/fs/umsdos/mangle.c + * + * Written 1993 by Jacques Gelinas + * + * Control the mangling of file name to fit msdos name space. + * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID) +*/ +#include <linux/errno.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/umsdos_fs.h> + +/* + Complete the mangling of the MSDOS fake name + based on the position of the entry in the EMD file. + + Simply complete the job of umsdos_parse; fill the extension. + + Beware that info->f_pos must be set. +*/ +void umsdos_manglename (struct umsdos_info *info) +{ + if (info->msdos_reject){ + /* #Specification: file name / non MSDOS conforming / mangling + Each non MSDOS conforming file has a special extension + build from the entry position in the EMD file. + + This number is then transform in a base 32 number, where + each digit is expressed like hexadecimal number, using + digit and letter, except it uses 22 letters from 'a' to 'v'. + The number 32 comes from 2**5. It is faster to split a binary + number using a base which is a power of two. And I was 32 + when I started this project. Pick your answer :-) . + + If the result is '0', it is replace with '_', simply + to make it odd. + + This is true for the first two character of the extension. + The last one is taken from a list of odd character, which + are: + + { } ( ) ! ` ^ & @ + + With this scheme, we can produce 9216 ( 9* 32 * 32) + different extensions which should not clash with any useful + extension already popular or meaningful. Since most directory + have much less than 32 * 32 files in it, the first character + of the extension of any mangle name will be {. + + Here are the reason to do this (this kind of mangling). + + -The mangling is deterministic. Just by the extension, we + are able to locate the entry in the EMD file. + + -By keeping to beginning of the file name almost unchanged, + we are helping the MSDOS user. + + -The mangling produces names not too ugly, so an msdos user + may live with it (remember it, type it, etc...). + + -The mangling produces names ugly enough so no one will + ever think of using such a name in real life. This is not + fool proof. I don't think there is a total solution to this. + */ + union { + int entry_num; + struct { + unsigned num1:5,num2:5,num3:5; + }num; + } u; + char *pt = info->fake.fname + info->fake.len; + /* lookup for encoding the last character of the extension */ + /* It contain valid character after the ugly one to make sure */ + /* even if someone overflow the 32 * 32 * 9 limit, it still do */ + /* something */ + #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@' + static char lookup3[]={ + SPECIAL_MANGLING, + /* This is the start of lookup12 */ + '_','1','2','3','4','5','6','7','8','9', + 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', + 'p','q','r','s','t','u','v' + }; + #define lookup12 (lookup3+9) + u.entry_num = info->f_pos / UMSDOS_REC_SIZE; + if (u.entry_num > (9* 32 * 32)){ + printk ("UMSDOS: More than 9216 file in a directory.\n" + "This may break the mangling strategy.\n" + "Not a killer problem. See doc.\n"); + } + *pt++ = '.'; + *pt++ = lookup3 [u.num.num3]; + *pt++ = lookup12[u.num.num2]; + *pt++ = lookup12[u.num.num1]; + *pt = '\0'; /* help doing printk */ + info->fake.len += 4; + info->msdos_reject = 0; /* Avoid mangling twice */ + } +} + +/* + Evaluate the record size needed to store of name of len character. + The value returned is a multiple of UMSDOS_REC_SIZE. +*/ +int umsdos_evalrecsize (int len) +{ + struct umsdos_dirent dirent; + int nbrec = 1+((len-1+(dirent.name-(char*)&dirent)) + / UMSDOS_REC_SIZE); + return nbrec * UMSDOS_REC_SIZE; + /* + GLU This should be inlined or something to speed it up to the max. + GLU nbrec is absolutely not needed to return the value. + */ +} +#ifdef TEST +int umsdos_evalrecsize_old (int len) +{ + struct umsdos_dirent dirent; + int size = len + (dirent.name-(char*)&dirent); + int nbrec = size / UMSDOS_REC_SIZE; + int extra = size % UMSDOS_REC_SIZE; + if (extra > 0) nbrec++; + return nbrec * UMSDOS_REC_SIZE; +} +#endif +/* + Fill the struct info with the full and msdos name of a file + Return 0 if all is ok, a negative error code otherwise. +*/ +int umsdos_parse ( + const char *fname, + int len, + struct umsdos_info *info) +{ + int ret = -ENAMETOOLONG; + /* #Specification: file name / too long + If a file name exceed UMSDOS maxima, the file name is silently + truncated. This makes it conformant with the other file system + of Linux (minix and ext2 at least). + */ + if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME; + { + const char *firstpt=NULL; /* First place we saw a . in fname */ + /* #Specification: file name / non MSDOS conforming / base length 0 + file name beginning with a period '.' are invalid for MsDOS. + It needs absolutely a base name. So the file name is mangled + */ + int ivldchar = fname[0] == '.';/* At least one invalid character */ + int msdos_len = len; + int base_len; + /* + cardinal_per_size tells if there exist at least one + DOS pseudo devices on length n. See the test below. + */ + static const char cardinal_per_size[9]={ + 0, 0, 0, 1, 1, 0, 1, 0, 1 + }; + /* + lkp translate all character to acceptable character (for DOS). + When lkp[n] == n, it means also it is an acceptable one. + So it serve both as a flag and as a translator. + */ + static char lkp[256]; + static char is_init=0; + if (!is_init){ + /* + Initialisation of the array is easier and less error prone + like this. + */ + int i; + static char *spc = "\"*+,/:;<=>?[\\]|~"; + is_init = 1; + for (i=0; i<=32; i++) lkp[i] = '#'; + for (i=33; i<'A'; i++) lkp[i] = (char)i; + for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A')); + for (i='Z'+1; i<127; i++) lkp[i] = (char)i; + for (i=128; i<256; i++) lkp[i] = '#'; + + lkp['.'] = '_'; + while (*spc != '\0') lkp[(unsigned char)(*spc++)] = '#'; + } + /* GLU + file name which are longer than 8+'.'+3 are invalid for MsDOS. + So the file name is to be mangled no more test needed. + This Speed Up for long and very long name. + The position of the last point is no more necessary anyway. + */ + if (len<=(8+1+3)){ + const char *pt = fname; + const char *endpt = fname + len; + while (pt < endpt){ + if (*pt == '.'){ + if (firstpt != NULL){ + /* 2 . in a file name. Reject */ + ivldchar = 1; + break; + }else{ + int extlen = (int)(endpt - pt); + firstpt = pt; + if (firstpt - fname > 8){ + /* base name longer than 8: reject */ + ivldchar = 1; + break; + }else if (extlen > 4){ + /* Extension longer than 4 (including .): reject */ + ivldchar = 1; + break; + }else if (extlen == 1){ + /* #Specification: file name / non MSDOS conforming / last char == . + If the last character of a file name is + a period, mangling is applied. MsDOS do + not support those file name. + */ + ivldchar = 1; + break; + }else if (extlen == 4){ + /* #Specification: file name / non MSDOS conforming / mangling clash + To avoid clash with the umsdos mangling, any file + with a special character as the first character + of the extension will be mangled. This solve the + following problem: + + touch FILE + # FILE is invalid for DOS, so mangling is applied + # file.{_1 is created in the DOS directory + touch file.{_1 + # To UMSDOS file point to a single DOS entry. + # So file.{_1 has to be mangled. + */ + static char special[]={ + SPECIAL_MANGLING,'\0' + }; + if (strchr(special,firstpt[1])!= NULL){ + ivldchar = 1; + break; + } + } + } + }else if (lkp[(unsigned char)(*pt)] != *pt){ + ivldchar = 1; + break; + } + pt++; + } + }else{ + ivldchar = 1; + } + if (ivldchar + || (firstpt == NULL && len > 8) + || (len == UMSDOS_EMD_NAMELEN + && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){ + /* #Specification: file name / --linux-.--- + The name of the EMD file --linux-.--- is map to a mangled + name. So UMSDOS does not restrict its use. + */ + /* #Specification: file name / non MSDOS conforming / mangling + Non MSDOS conforming file name must use some alias to fit + in the MSDOS name space. + + The strategy is simple. The name is simply truncated to + 8 char. points are replace with underscore and a + number is given as an extension. This number correspond + to the entry number in the EMD file. The EMD file + only need to carry the real name. + + Upper case is also convert to lower case. + Control character are converted to #. + Space are converted to #. + The following character are also converted to #. + " * + , / : ; < = > ? [ \ ] | ~ + + Sometime, the problem is not in MsDOS itself but in + command.com. + */ + int i; + char *pt = info->fake.fname; + base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len; + /* + There is no '.' any more so we know for a fact that + the base length is the length. + */ + memcpy (info->fake.fname,fname,msdos_len); + for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsigned char)(*pt)]; + *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */ + info->msdos_reject = 1; + /* + The numeric extension is added only when we know + the position in the EMD file, in umsdos_newentry(), + umsdos_delentry(), and umsdos_findentry(). + See umsdos_manglename(). + */ + }else{ + /* Conforming MSDOS file name */ + strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */ + info->msdos_reject = 0; + base_len = firstpt != NULL ? (int)(firstpt - fname) : len; + } + if (cardinal_per_size[base_len]){ + /* #Specification: file name / MSDOS devices / mangling + To avoid unreachable file from MsDOS, any MsDOS conforming + file with a basename equal to one of the MsDOS pseudo + devices will be mangled. + + If a file such as "prn" was created, it would be unreachable + under MsDOS because prn is assumed to be the printer, even + if the file does have an extension. + + Since the extension is unimportant to MsDOS, we must patch + the basename also. We simply insert a minus '-'. To avoid + conflict with valid file with a minus in front (such as + "-prn"), we add an mangled extension like any other + mangled file name. + + Here is the list of DOS pseudo devices: + + "prn","con","aux","nul", + "lpt1","lpt2","lpt3","lpt4", + "com1","com2","com3","com4", + "clock$" + + and some standard ones for common DOS programs + + "emmxxxx0","xmsxxxx0","setverxx" + + (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK> + for pointing these to me). + + Is there one missing ? + */ + /* This table must be ordered by length */ + static const char *tbdev[]={ + "prn","con","aux","nul", + "lpt1","lpt2","lpt3","lpt4", + "com1","com2","com3","com4", + "clock$", + "emmxxxx0","xmsxxxx0","setverxx" + }; + /* Tell where to find in tbdev[], the first name of */ + /* a certain length */ + static const char start_ind_dev[9]={ + 0, 0, 0, 4, 12, 12, 13, 13, 16 + }; + char basen[9]; + int i; + for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){ + if (memcmp(info->fake.fname,tbdev[i],base_len)==0){ + memcpy (basen,info->fake.fname,base_len); + basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */ + /* + GLU On ne fait cela que si necessaire, on essaye d'etre le + GLU simple dans le cas general (le plus frequent). + */ + info->fake.fname[0] = '-'; + strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */ + msdos_len = (base_len==8) ? 8 : base_len + 1; + info->msdos_reject = 1; + break; + } + } + } + info->fake.fname[msdos_len] = '\0'; /* Help doing printk */ + /* GLU Ce zero devrais deja y etre ! (invariant ?) */ + info->fake.len = msdos_len; + /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/ + memcpy (info->entry.name,fname,len); + info->entry.name_len = len; + ret = 0; + } + /* + Evaluate how many record are needed to store this entry. + */ + info->recsize = umsdos_evalrecsize (len); + return ret; +} + +#ifdef TEST + +struct MANG_TEST{ + char *fname; /* Name to validate */ + int msdos_reject; /* Expected msdos_reject flag */ + char *msname; /* Expected msdos name */ +}; + +struct MANG_TEST tb[]={ + "hello", 0, "hello", + "hello.1", 0, "hello.1", + "hello.1_", 0, "hello.1_", + "prm", 0, "prm", + +#ifdef PROPOSITION + "HELLO", 1, "hello", + "Hello.1", 1, "hello.1", + "Hello.c", 1, "hello.c", +#elseif +/* + Je trouve les trois exemples ci-dessous tres "malheureux". + Je propose de mettre en minuscule dans un passe preliminaire, + et de tester apres si il y a d'autres caracters "mechants". + Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement + modifiable que ca. Mais c'est pour le principe. + Evidemment cela augmente les chances de "Collision", + par exemple: entre "HELLO" et "Hello", mais ces problemes + peuvent etre traiter ailleur avec les autres collisions. +*/ + "HELLO", 1, "hello", + "Hello.1", 1, "hello_1", + "Hello.c", 1, "hello_c", +#endif + + "hello.{_1", 1, "hello_{_", + "hello\t", 1, "hello#", + "hello.1.1", 1, "hello_1_", + "hel,lo", 1, "hel#lo", + "Salut.Tu.vas.bien?", 1, "salut_tu", + ".profile", 1, "_profile", + ".xv", 1, "_xv", + "toto.", 1, "toto_", + "clock$.x", 1, "-clock$", + "emmxxxx0", 1, "-emmxxxx", + "emmxxxx0.abcd", 1, "-emmxxxx", + "aux", 1, "-aux", + "prn", 1, "-prn", + "prn.abc", 1, "-prn", + "PRN", 1, "-prn", +/* +GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version +GLU du mangle par rapport au mangle originale. +GLU CAUSE: La maniere de calculer la variable baselen. +GLU Pour toi c'est toujours 3 +GLU Pour moi c'est respectivement 7, 8 et 8 +*/ + "PRN.abc", 1, "prn_abc", + "Prn.abcd", 1, "prn_abcd", + "prn.abcd", 1, "prn_abcd", + "Prn.abcdefghij", 1, "prn_abcd" +}; + +int main (int argc, char *argv[]) +{ + int i,rold,rnew; + printf ("Testing the umsdos_parse.\n"); + for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){ + struct MANG_TEST *pttb = tb+i; + struct umsdos_info info; + int ok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info); + if (strcmp(info.fake.fname,pttb->msname)!=0){ + printf ("**** %s -> ",pttb->fname); + printf ("%s <> %s\n",info.fake.fname,pttb->msname); + }else if (info.msdos_reject != pttb->msdos_reject){ + printf ("**** %s -> %s ",pttb->fname,pttb->msname); + printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject); + }else{ + printf (" %s -> %s %d\n",pttb->fname,pttb->msname + ,pttb->msdos_reject); + } + } + printf ("Testing the new umsdos_evalrecsize."); + for (i=0; i<UMSDOS_MAXNAME ; i++){ + rnew=umsdos_evalrecsize (i); + rold=umsdos_evalrecsize_old (i); + if (!(i%UMSDOS_REC_SIZE)){ + printf ("\n%d:\t",i); + } + if (rnew!=rold){ + printf ("**** %d newres: %d != %d \n", i, rnew, rold); + }else{ + printf("."); + } + } + printf ("\nEnd of Testing.\n"); + + return 0; +} + +#endif diff --git a/fs/umsdos/namei.c b/fs/umsdos/namei.c new file mode 100644 index 000000000..567039e14 --- /dev/null +++ b/fs/umsdos/namei.c @@ -0,0 +1,1043 @@ +/* + * linux/fs/umsdos/namei.c + * + * Written 1993 by Jacques Gelinas + * Inspired from linux/fs/msdos/... by Werner Almesberger + * + * Maintain and access the --linux alternate directory file. +*/ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/msdos_fs.h> +#include <linux/umsdos_fs.h> +#include <linux/malloc.h> + +#define PRINTK(x) +#define Printk(x) printk x + +#if 1 +/* + Wait for creation exclusivity. + Return 0 if the dir was already available. + Return 1 if a wait was necessary. + When 1 is return, it means a wait was done. It does not + mean the directory is available. +*/ +static int umsdos_waitcreate(struct inode *dir) +{ + int ret = 0; + if (dir->u.umsdos_i.u.dir_info.creating + && dir->u.umsdos_i.u.dir_info.pid != current->pid){ + sleep_on(&dir->u.umsdos_i.u.dir_info.p); + ret = 1; + } + return ret; +} +/* + Wait for any lookup process to finish +*/ +static void umsdos_waitlookup (struct inode *dir) +{ + while (dir->u.umsdos_i.u.dir_info.looking){ + sleep_on(&dir->u.umsdos_i.u.dir_info.p); + } +} +/* + Lock all other process out of this directory. +*/ +void umsdos_lockcreate (struct inode *dir) +{ + /* #Specification: file creation / not atomic + File creation is a two step process. First we create (allocate) + an entry in the EMD file and then (using the entry offset) we + build a unique name for MSDOS. We create this name in the msdos + space. + + We have to use semaphore (sleep_on/wake_up) to prevent lookup + into a directory when we create a file or directory and to + prevent creation while a lookup is going on. Since many lookup + may happen at the same time, the semaphore is a counter. + + Only one creation is allowed at the same time. This protection + may not be necessary. The problem arise mainly when a lookup + or a readdir is done while a file is partially created. The + lookup process see that as a "normal" problem and silently + erase the file from the EMD file. Normal because a file + may be erased during a MSDOS session, but not removed from + the EMD file. + + The locking is done on a directory per directory basis. Each + directory inode has its wait_queue. + + For some operation like hard link, things even get worse. Many + creation must occur at once (atomic). To simplify the design + a process is allowed to recursively lock the directory for + creation. The pid of the locking process is kept along with + a counter so a second level of locking is granted or not. + */ + /* + Wait for any creation process to finish except + if we (the process) own the lock + */ + while (umsdos_waitcreate(dir)!=0); + dir->u.umsdos_i.u.dir_info.creating++; + dir->u.umsdos_i.u.dir_info.pid = current->pid; + umsdos_waitlookup (dir); +} +/* + Lock all other process out of those two directories. +*/ +static void umsdos_lockcreate2 (struct inode *dir1, struct inode *dir2) +{ + /* + We must check that both directory are available before + locking anyone of them. This is to avoid some deadlock. + Thanks to dglaude@is1.vub.ac.be (GLAUDE DAVID) for pointing + this to me. + */ + while (1){ + if (umsdos_waitcreate(dir1)==0 + && umsdos_waitcreate(dir2)==0){ + /* We own both now */ + dir1->u.umsdos_i.u.dir_info.creating++; + dir1->u.umsdos_i.u.dir_info.pid = current->pid; + dir2->u.umsdos_i.u.dir_info.creating++; + dir2->u.umsdos_i.u.dir_info.pid = current->pid; + break; + } + } + umsdos_waitlookup(dir1); + umsdos_waitlookup(dir2); +} +/* + Wait until creation is finish in this directory. +*/ +void umsdos_startlookup (struct inode *dir) +{ + while (umsdos_waitcreate (dir) != 0); + dir->u.umsdos_i.u.dir_info.looking++; +} +void check_page_tables(void); + +/* + Unlock the directory. +*/ +void umsdos_unlockcreate (struct inode *dir) +{ + dir->u.umsdos_i.u.dir_info.creating--; + if (dir->u.umsdos_i.u.dir_info.creating < 0){ + printk ("UMSDOS: dir->u.umsdos_i.u.dir_info.creating < 0: %d" + ,dir->u.umsdos_i.u.dir_info.creating); + } + wake_up (&dir->u.umsdos_i.u.dir_info.p); +} +/* + Tell directory lookup is over. +*/ +void umsdos_endlookup (struct inode *dir) +{ + dir->u.umsdos_i.u.dir_info.looking--; + if (dir->u.umsdos_i.u.dir_info.looking < 0){ + printk ("UMSDOS: dir->u.umsdos_i.u.dir_info.looking < 0: %d" + ,dir->u.umsdos_i.u.dir_info.looking); + } + wake_up (&dir->u.umsdos_i.u.dir_info.p); +} +#else +static void umsdos_lockcreate (struct inode *dir){} +static void umsdos_lockcreate2 (struct inode *dir1, struct inode *dir2){} +void umsdos_startlookup (struct inode *dir){} +static void umsdos_unlockcreate (struct inode *dir){} +void umsdos_endlookup (struct inode *dir){} +#endif +static int umsdos_nevercreat( + struct inode *dir, + const char *name, /* Name of the file to add */ + int len, + int errcod) /* Length of the name */ +{ + int ret = 0; + if (umsdos_is_pseudodos(dir,name,len)){ + /* #Specification: pseudo root / any file creation /DOS + The pseudo sub-directory /DOS can't be created! + EEXIST is returned. + + The pseudo sub-directory /DOS can't be removed! + EPERM is returned. + */ + ret = -EPERM; + ret = errcod; + }else if (name[0] == '.' + && (len == 1 || (len == 2 && name[1] == '.'))){ + /* #Specification: create / . and .. + If one try to creates . or .., it always fail and return + EEXIST. + + If one try to delete . or .., it always fail and return + EPERM. + + This should be test at the VFS layer level to avoid + duplicating this in all file systems. Any comments ? + */ + ret = errcod; + } + return ret; +} + +/* + Add a new file (ordinary or special) into the alternate directory. + The file is added to the real MSDOS directory. If successful, it + is then added to the EDM file. + + Return the status of the operation. 0 mean success. +*/ +static int umsdos_create_any ( + struct inode *dir, + const char *name, /* Name of the file to add */ + int len, /* Length of the name */ + int mode, /* Permission bit + file type ??? */ + int rdev, /* major, minor or 0 for ordinary file */ + /* and symlinks */ + char flags, + struct inode **result) /* Will hold the inode of the newly created */ + /* file */ +{ + int ret = umsdos_nevercreat(dir,name,len,-EEXIST); + if (ret == 0){ + struct umsdos_info info; + ret = umsdos_parse (name,len,&info); + *result = NULL; + if (ret == 0){ + info.entry.mode = mode; + info.entry.rdev = rdev; + info.entry.flags = flags; + info.entry.uid = current->fsuid; + info.entry.gid = (dir->i_mode & S_ISGID) + ? dir->i_gid : current->fsgid; + info.entry.ctime = info.entry.atime = info.entry.mtime + = CURRENT_TIME; + info.entry.nlink = 1; + umsdos_lockcreate(dir); + ret = umsdos_newentry (dir,&info); + if (ret == 0){ + dir->i_count++; + ret = msdos_create (dir,info.fake.fname,info.fake.len + ,S_IFREG|0777,result); + if (ret == 0){ + struct inode *inode = *result; + umsdos_lookup_patch (dir,inode,&info.entry,info.f_pos); + PRINTK (("inode %p[%d] ",inode,inode->i_count)); + PRINTK (("Creation OK: [%d] %s %d pos %d\n",dir->i_ino + ,info.fake.fname,current->pid,info.f_pos)); + }else{ + /* #Specification: create / file exist in DOS + Here is a situation. Trying to create a file with + UMSDOS. The file is unknown to UMSDOS but already + exist in the DOS directory. + + Here is what we are NOT doing: + + We could silently assume that everything is fine + and allows the creation to succeed. + + It is possible not all files in the partition + are mean to be visible from linux. By trying to create + those file in some directory, one user may get access + to those file without proper permissions. Looks like + a security hole to me. Off course sharing a file system + with DOS is some kind of security hole :-) + + So ? + + We return EEXIST in this case. + The same is true for directory creation. + */ + if (ret == -EEXIST){ + printk ("UMSDOS: out of sync, Creation error [%ld], " + "deleting %s %d %d pos %ld\n",dir->i_ino + ,info.fake.fname,-ret,current->pid,info.f_pos); + } + umsdos_delentry (dir,&info,0); + } + PRINTK (("umsdos_create %s ret = %d pos %d\n" + ,info.fake.fname,ret,info.f_pos)); + } + umsdos_unlockcreate(dir); + } + } + iput (dir); + return ret; +} +/* + Initialise the new_entry from the old for a rename operation. + (Only useful for umsdos_rename_f() below). +*/ +static void umsdos_ren_init( + struct umsdos_info *new_info, + struct umsdos_info *old_info, + int flags) /* 0 == copy flags from old_name */ + /* != 0, this is the value of flags */ +{ + new_info->entry.mode = old_info->entry.mode; + new_info->entry.rdev = old_info->entry.rdev; + new_info->entry.uid = old_info->entry.uid; + new_info->entry.gid = old_info->entry.gid; + new_info->entry.ctime = old_info->entry.ctime; + new_info->entry.atime = old_info->entry.atime; + new_info->entry.mtime = old_info->entry.mtime; + new_info->entry.flags = flags ? flags : old_info->entry.flags; + new_info->entry.nlink = old_info->entry.nlink; +} + +#define chkstk() \ + if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page){\ + printk(KERN_ALERT "UMSDOS: %s magic %x != %lx ligne %d\n" \ + , current->comm,STACK_MAGIC \ + ,*(unsigned long *)current->kernel_stack_page \ + ,__LINE__); \ + } + +/* + Rename a file (move) in the file system. +*/ +static int umsdos_rename_f( + struct inode * old_dir, + const char * old_name, + int old_len, + struct inode * new_dir, + const char * new_name, + int new_len, + int flags) /* 0 == copy flags from old_name */ + /* != 0, this is the value of flags */ +{ + int ret = EPERM; + struct umsdos_info old_info; + int old_ret = umsdos_parse (old_name,old_len,&old_info); + struct umsdos_info new_info; + int new_ret = umsdos_parse (new_name,new_len,&new_info); +chkstk(); + PRINTK (("umsdos_rename %d %d ",old_ret,new_ret)); + if (old_ret == 0 && new_ret == 0){ + umsdos_lockcreate2(old_dir,new_dir); +chkstk(); + PRINTK (("old findentry ")); + ret = umsdos_findentry(old_dir,&old_info,0); +chkstk(); + PRINTK (("ret %d ",ret)); + if (ret == 0){ + PRINTK (("new newentry ")); + umsdos_ren_init(&new_info,&old_info,flags); + ret = umsdos_newentry (new_dir,&new_info); +chkstk(); + PRINTK (("ret %d %d ",ret,new_info.fake.len)); + if (ret == 0){ + PRINTK (("msdos_rename ")); + old_dir->i_count++; + new_dir->i_count++; /* Both inode are needed later */ + ret = msdos_rename (old_dir + ,old_info.fake.fname,old_info.fake.len + ,new_dir + ,new_info.fake.fname,new_info.fake.len); +chkstk(); + PRINTK (("after m_rename ret %d ",ret)); + if (ret != 0){ + umsdos_delentry (new_dir,&new_info + ,S_ISDIR(new_info.entry.mode)); +chkstk(); + }else{ + ret = umsdos_delentry (old_dir,&old_info + ,S_ISDIR(old_info.entry.mode)); +chkstk(); + if (ret == 0){ + /* + This UMSDOS_lookup does not look very useful. + It makes sure that the inode of the file will + be correctly setup (umsdos_patch_inode()) in + case it is already in use. + + Not very efficient ... + */ + struct inode *inode; + new_dir->i_count++; + PRINTK (("rename lookup len %d %d -- ",new_len,new_info.entry.flags)); + ret = UMSDOS_lookup (new_dir,new_name,new_len + ,&inode); +chkstk(); + if (ret != 0){ + printk ("UMSDOS: partial rename for file %s\n" + ,new_info.entry.name); + }else{ + /* + Update f_pos so notify_change will succeed + if the file was already in use. + */ + umsdos_set_dirinfo (inode,new_dir,new_info.f_pos); +chkstk(); + iput (inode); + } + } + } + } + } + umsdos_unlockcreate(old_dir); + umsdos_unlockcreate(new_dir); + } + iput (old_dir); + iput (new_dir); + PRINTK (("\n")); + return ret; +} +/* + Setup un Symbolic link or a (pseudo) hard link + Return a negative error code or 0 if ok. +*/ +static int umsdos_symlink_x( + struct inode * dir, + const char * name, + int len, + const char * symname, /* name will point to this path */ + int mode, + char flags) +{ + /* #Specification: symbolic links / strategy + A symbolic link is simply a file which hold a path. It is + implemented as a normal MSDOS file (not very space efficient :-() + + I see 2 different way to do it. One is to place the link data + in unused entry of the EMD file. The other is to have a separate + file dedicated to hold all symbolic links data. + + Lets go for simplicity... + */ + struct inode *inode; + int ret; + dir->i_count++; /* We keep the inode in case we need it */ + /* later */ + ret = umsdos_create_any (dir,name,len,mode,0,flags,&inode); + PRINTK (("umsdos_symlink ret %d ",ret)); + if (ret == 0){ + int len = strlen(symname); + struct file filp; + filp.f_pos = 0; + /* Make the inode acceptable to MSDOS */ + ret = umsdos_file_write_kmem (inode,&filp,(char*)symname,len); + iput (inode); + if (ret >= 0){ + if (ret != len){ + ret = -EIO; + printk ("UMSDOS: " + "Can't write symbolic link data\n"); + }else{ + ret = 0; + } + } + if (ret != 0){ + UMSDOS_unlink (dir,name,len); + dir = NULL; + } + } + iput (dir); + PRINTK (("\n")); + return ret; +} +/* + Setup un Symbolic link. + Return a negative error code or 0 if ok. +*/ +int UMSDOS_symlink( + struct inode * dir, + const char * name, + int len, + const char * symname) /* name will point to this path */ +{ + return umsdos_symlink_x (dir,name,len,symname,S_IFLNK|0777,0); +} +/* + Add a link to an inode in a directory +*/ +int UMSDOS_link ( + struct inode * oldinode, + struct inode * dir, + const char * name, + int len) +{ + /* #Specification: hard link / strategy + Well ... hard link are difficult to implement on top of an + MsDOS fat file system. Unlike UNIX file systems, there are no + inode. A directory entry hold the functionality of the inode + and the entry. + + We will used the same strategy as a normal Unix file system + (with inode) except we will do it symbolically (using paths). + + Because anything can happen during a DOS session (defragment, + directory sorting, etc...), we can't rely on MsDOS pseudo + inode number to record the link. For this reason, the link + will be done using hidden symbolic links. The following + scenario illustrate how it work. + + Given a file /foo/file + + ln /foo/file /tmp/file2 + + become internally + + mv /foo/file /foo/-LINK1 + ln -s /foo/-LINK1 /foo/file + ln -s /foo/-LINK1 /tmp/file2 + + Using this strategy, we can operate on /foo/file or /foo/file2. + We can remove one and keep the other, like a normal Unix hard link. + We can rename /foo/file or /tmp/file2 independently. + + The entry -LINK1 will be hidden. It will hold a link count. + When all link are erased, the hidden file is erased too. + */ + /* #Specification: weakness / hard link + The strategy for hard link introduces a side effect that + may or may not be acceptable. Here is the sequence + + mkdir subdir1 + touch subdir1/file + mkdir subdir2 + ln subdir1/file subdir2/file + rm subdir1/file + rmdir subdir1 + rmdir: subdir1: Directory not empty + + This happen because there is an invisible file (--link) in + subdir1 which is referenced by subdir2/file. + + Any idea ? + */ + /* #Specification: weakness / hard link / rename directory + Another weakness of hard link come from the fact that + it is based on hidden symbolic links. Here is an example. + + mkdir /subdir1 + touch /subdir1/file + mkdir /subdir2 + ln /subdir1/file subdir2/file + mv /subdir1 subdir3 + ls -l /subdir2/file + + Since /subdir2/file is a hidden symbolic link + to /subdir1/..hlinkNNN, accessing it will fail since + /subdir1 does not exist anymore (has been renamed). + */ + int ret = 0; + if (S_ISDIR(oldinode->i_mode)){ + /* #Specification: hard link / directory + A hard link can't be made on a directory. EPERM is returned + in this case. + */ + ret = -EPERM; + }else if ((ret = umsdos_nevercreat(dir,name,len,-EPERM))==0){ + struct inode *olddir; + ret = umsdos_get_dirowner(oldinode,&olddir); + PRINTK (("umsdos_link dir_owner = %d -> %p [%d] " + ,oldinode->u.umsdos_i.i_dir_owner,olddir,olddir->i_count)); + if (ret == 0){ + struct umsdos_dirent entry; + umsdos_lockcreate2(dir,olddir); + ret = umsdos_inode2entry (olddir,oldinode,&entry); + if (ret == 0){ + PRINTK (("umsdos_link :%s: ino %d flags %d " + ,entry.name + ,oldinode->i_ino,entry.flags)); + if (!(entry.flags & UMSDOS_HIDDEN)){ + /* #Specification: hard link / first hard link + The first time a hard link is done on a file, this + file must be renamed and hidden. Then an internal + symbolic link must be done on the hidden file. + + The second link is done after on this hidden file. + + It is expected that the Linux MSDOS file system + keeps the same pseudo inode when a rename operation + is done on a file in the same directory. + */ + struct umsdos_info info; + ret = umsdos_newhidden (olddir,&info); + if (ret == 0){ + olddir->i_count+=2; + PRINTK (("olddir[%d] ",olddir->i_count)); + ret = umsdos_rename_f (olddir,entry.name + ,entry.name_len + ,olddir,info.entry.name,info.entry.name_len + ,UMSDOS_HIDDEN); + if (ret == 0){ + char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL); + if (path == NULL){ + ret = -ENOMEM; + }else{ + PRINTK (("olddir[%d] ",olddir->i_count)); + ret = umsdos_locate_path (oldinode,path); + PRINTK (("olddir[%d] ",olddir->i_count)); + if (ret == 0){ + olddir->i_count++; + ret = umsdos_symlink_x (olddir + ,entry.name + ,entry.name_len,path + ,S_IFREG|0777,UMSDOS_HLINK); + if (ret == 0){ + dir->i_count++; + ret = umsdos_symlink_x (dir,name,len + ,path + ,S_IFREG|0777,UMSDOS_HLINK); + } + } + kfree (path); + } + } + } + }else{ + char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL); + if (path == NULL){ + ret = -ENOMEM; + }else{ + ret = umsdos_locate_path (oldinode,path); + if (ret == 0){ + dir->i_count++; + ret = umsdos_symlink_x (dir,name,len,path + ,S_IFREG|0777,UMSDOS_HLINK); + } + kfree (path); + } + } + } + umsdos_unlockcreate(olddir); + umsdos_unlockcreate(dir); + } + iput (olddir); + } + if (ret == 0){ + struct iattr newattrs; + oldinode->i_nlink++; + newattrs.ia_valid = 0; + ret = UMSDOS_notify_change(oldinode, &newattrs); + } + iput (oldinode); + iput (dir); + PRINTK (("umsdos_link %d\n",ret)); + return ret; +} +/* + Add a new file into the alternate directory. + The file is added to the real MSDOS directory. If successful, it + is then added to the EDM file. + + Return the status of the operation. 0 mean success. +*/ +int UMSDOS_create ( + struct inode *dir, + const char *name, /* Name of the file to add */ + int len, /* Length of the name */ + int mode, /* Permission bit + file type ??? */ + struct inode **result) /* Will hold the inode of the newly created */ + /* file */ +{ + return umsdos_create_any (dir,name,len,mode,0,0,result); +} +/* + Add a sub-directory in a directory +*/ +int UMSDOS_mkdir( + struct inode * dir, + const char * name, + int len, + int mode) +{ + int ret = umsdos_nevercreat(dir,name,len,-EEXIST); + if (ret == 0){ + struct umsdos_info info; + ret = umsdos_parse (name,len,&info); + PRINTK (("umsdos_mkdir %d\n",ret)); + if (ret == 0){ + info.entry.mode = mode | S_IFDIR; + info.entry.rdev = 0; + info.entry.uid = current->fsuid; + info.entry.gid = (dir->i_mode & S_ISGID) + ? dir->i_gid : current->fsgid; + info.entry.ctime = info.entry.atime = info.entry.mtime + = CURRENT_TIME; + info.entry.flags = 0; + umsdos_lockcreate(dir); + info.entry.nlink = 1; + ret = umsdos_newentry (dir,&info); + PRINTK (("newentry %d ",ret)); + if (ret == 0){ + dir->i_count++; + ret = msdos_mkdir (dir,info.fake.fname,info.fake.len,mode); + if (ret != 0){ + umsdos_delentry (dir,&info,1); + /* #Specification: mkdir / Directory already exist in DOS + We do the same thing as for file creation. + For all user it is an error. + */ + }else{ + /* #Specification: mkdir / umsdos directory / create EMD + When we created a new sub-directory in a UMSDOS + directory (one with full UMSDOS semantic), we + create immediately an EMD file in the new + sub-directory so it inherit UMSDOS semantic. + */ + struct inode *subdir; + ret = umsdos_real_lookup (dir,info.fake.fname + ,info.fake.len,&subdir); + if (ret == 0){ + struct inode *result; + ret = msdos_create (subdir,UMSDOS_EMD_FILE + ,UMSDOS_EMD_NAMELEN,S_IFREG|0777,&result); + subdir = NULL; + iput (result); + } + if (ret < 0){ + printk ("UMSDOS: Can't create empty --linux-.---\n"); + } + iput (subdir); + } + } + umsdos_unlockcreate(dir); + } + } + PRINTK (("umsdos_mkdir %d\n",ret)); + iput (dir); + return ret; +} +/* + Add a new device special file into a directory. +*/ +int UMSDOS_mknod( + struct inode * dir, + const char * name, + int len, + int mode, + int rdev) +{ + /* #Specification: Special files / strategy + Device special file, pipes, etc ... are created like normal + file in the msdos file system. Of course they remain empty. + + One strategy was to create those files only in the EMD file + since they were not important for MSDOS. The problem with + that, is that there were not getting inode number allocated. + The MSDOS filesystems is playing a nice game to fake inode + number, so why not use it. + + The absence of inode number compatible with those allocated + for ordinary files was causing major trouble with hard link + in particular and other parts of the kernel I guess. + */ + struct inode *inode; + int ret = umsdos_create_any (dir,name,len,mode,rdev,0,&inode); + iput (inode); + return ret; +} + +/* + Remove a sub-directory. +*/ +int UMSDOS_rmdir( + struct inode * dir, + const char * name, + int len) +{ + /* #Specification: style / iput strategy + In the UMSDOS project, I am trying to apply a single + programming style regarding inode management. Many + entry point are receiving an inode to act on, and must + do an iput() as soon as they are finished with + the inode. + + For simple case, there is no problem. When you introduce + error checking, you end up with many iput placed around the + code. + + The coding style I use all around is one where I am trying + to provide independent flow logic (I don't know how to + name this). With this style, code is easier to understand + but you rapidly get iput() all around. Here is an exemple + of what I am trying to avoid. + + if (a){ + ... + if(b){ + ... + } + ... + if (c){ + // Complex state. Was b true ? + ... + } + ... + } + // Weird state + if (d){ + // ... + } + // Was iput finally done ? + return status; + + Here is the style I am using. Still sometime I do the + first when things are very simple (or very complicated :-( ) + + if (a){ + if (b){ + ... + }else if (c){ + // A single state gets here + } + }else if (d){ + ... + } + return status; + + Again, while this help clarifying the code, I often get a lot + of iput(), unlike the first style, where I can place few + "strategic" iput(). "strategic" also mean, more difficult + to place. + + So here is the style I will be using from now on in this project. + There is always an iput() at the end of a function (which has + to do an iput()). One iput by inode. There is also one iput() + at the places where a successful operation is achieved. This + iput() is often done by a sub-function (often from the msdos + file system). So I get one too many iput() ? At the place + where an iput() is done, the inode is simply nulled, disabling + the last one. + + if (a){ + if (b){ + ... + }else if (c){ + msdos_rmdir(dir,...); + dir = NULL; + } + }else if (d){ + ... + } + iput (dir); + return status; + + Note that the umsdos_lockcreate() and umsdos_unlockcreate() function + pair goes against this practice of "forgetting" the inode as soon + as possible. + */ + int ret = umsdos_nevercreat(dir,name,len,-EPERM); + if (ret == 0){ + struct inode *sdir; + dir->i_count++; + ret = UMSDOS_lookup (dir,name,len,&sdir); + PRINTK (("rmdir lookup %d ",ret)); + if (ret == 0){ + int empty; + umsdos_lockcreate(dir); + if (sdir->i_count > 1){ + ret = -EBUSY; + }else if ((empty = umsdos_isempty (sdir)) != 0){ + PRINTK (("isempty %d i_count %d ",empty,sdir->i_count)); + if (empty == 1){ + /* We have to removed the EMD file */ + ret = msdos_unlink(sdir,UMSDOS_EMD_FILE + ,UMSDOS_EMD_NAMELEN); + sdir = NULL; + } + /* sdir must be free before msdos_rmdir() */ + iput (sdir); + sdir = NULL; + PRINTK (("isempty ret %d nlink %d ",ret,dir->i_nlink)); + if (ret == 0){ + struct umsdos_info info; + dir->i_count++; + umsdos_parse (name,len,&info); + /* The findentry is there only to complete */ + /* the mangling */ + umsdos_findentry (dir,&info,2); + ret = msdos_rmdir (dir,info.fake.fname + ,info.fake.len); + if (ret == 0){ + ret = umsdos_delentry (dir,&info,1); + } + } + }else{ + /* + The subdirectory is not empty, so leave it there + */ + ret = -ENOTEMPTY; + } + iput(sdir); + umsdos_unlockcreate(dir); + } + } + iput (dir); + PRINTK (("umsdos_rmdir %d\n",ret)); + return ret; +} +/* + Remove a file from the directory. +*/ +int UMSDOS_unlink ( + struct inode * dir, + const char * name, + int len) +{ + struct umsdos_info info; + int ret = umsdos_nevercreat(dir,name,len,-EPERM); + if (ret == 0){ + ret = umsdos_parse (name,len,&info); + if (ret == 0){ + umsdos_lockcreate(dir); + ret = umsdos_findentry(dir,&info,1); + if (ret == 0){ + PRINTK (("UMSDOS_unlink %s ",info.fake.fname)); + if (info.entry.flags & UMSDOS_HLINK){ + /* #Specification: hard link / deleting a link + When we deletes a file, and this file is a link + we must subtract 1 to the nlink field of the + hidden link. + + If the count goes to 0, we delete this hidden + link too. + */ + /* + First, get the inode of the hidden link + using the standard lookup function. + */ + struct inode *inode; + dir->i_count++; + ret = UMSDOS_lookup (dir,name,len,&inode); + if (ret == 0){ + PRINTK (("unlink nlink = %d ",inode->i_nlink)); + inode->i_nlink--; + if (inode->i_nlink == 0){ + struct inode *hdir = iget(inode->i_sb + ,inode->u.umsdos_i.i_dir_owner); + struct umsdos_dirent entry; + ret = umsdos_inode2entry (hdir,inode,&entry); + if (ret == 0){ + ret = UMSDOS_unlink (hdir,entry.name + ,entry.name_len); + }else{ + iput (hdir); + } + }else{ + struct iattr newattrs; + newattrs.ia_valid = 0; + ret = UMSDOS_notify_change (inode, &newattrs); + } + iput (inode); + } + } + if (ret == 0){ + ret = umsdos_delentry (dir,&info,0); + if (ret == 0){ + PRINTK (("Avant msdos_unlink %s ",info.fake.fname)); + dir->i_count++; + ret = msdos_unlink_umsdos (dir,info.fake.fname + ,info.fake.len); + PRINTK (("msdos_unlink %s %o ret %d ",info.fake.fname + ,info.entry.mode,ret)); + } + } + } + umsdos_unlockcreate(dir); + } + } + iput (dir); + PRINTK (("umsdos_unlink %d\n",ret)); + return ret; +} + +/* + Rename a file (move) in the file system. +*/ +int UMSDOS_rename( + struct inode * old_dir, + const char * old_name, + int old_len, + struct inode * new_dir, + const char * new_name, + int new_len) +{ + /* #Specification: weakness / rename + There is a case where UMSDOS rename has a different behavior + than normal UNIX file system. Renaming an open file across + directory boundary does not work. Renaming an open file within + a directory does work however. + + The problem (not sure) is in the linux VFS msdos driver. + I believe this is not a bug but a design feature, because + an inode number represent some sort of directory address + in the MSDOS directory structure. So moving the file into + another directory does not preserve the inode number. + */ + int ret = umsdos_nevercreat(new_dir,new_name,new_len,-EEXIST); + if (ret == 0){ + /* umsdos_rename_f eat the inode and we may need those later */ + old_dir->i_count++; + new_dir->i_count++; + ret = umsdos_rename_f (old_dir,old_name,old_len,new_dir,new_name + ,new_len,0); + if (ret == -EEXIST){ + /* #Specification: rename / new name exist + If the destination name already exist, it will + silently be removed. EXT2 does it this way + and this is the spec of SUNOS. So does UMSDOS. + + If the destination is an empty directory it will + also be removed. + */ + /* #Specification: rename / new name exist / possible flaw + The code to handle the deletion of the target (file + and directory) use to be in umsdos_rename_f, surrounded + by proper directory locking. This was insuring that only + one process could achieve a rename (modification) operation + in the source and destination directory. This was also + insuring the operation was "atomic". + + This has been changed because this was creating a kernel + stack overflow (stack is only 4k in the kernel). To avoid + the code doing the deletion of the target (if exist) has + been moved to a upper layer. umsdos_rename_f is tried + once and if it fails with EEXIST, the target is removed + and umsdos_rename_f is done again. + + This makes the code cleaner and (not sure) solve a + deadlock problem one tester was experiencing. + + The point is to mention that possibly, the semantic of + "rename" may be wrong. Anyone dare to check that :-) + Be aware that IF it is wrong, to produce the problem you + will need two process trying to rename a file to the + same target at the same time. Again, I am not sure it + is a problem at all. + */ + /* This is not super efficient but should work */ + new_dir->i_count++; + ret = UMSDOS_unlink (new_dir,new_name,new_len); +chkstk(); + PRINTK (("rename unlink ret %d %d -- ",ret,new_len)); + if (ret == -EISDIR){ + new_dir->i_count++; + ret = UMSDOS_rmdir (new_dir,new_name,new_len); +chkstk(); + PRINTK (("rename rmdir ret %d -- ",ret)); + } + if (ret == 0){ + ret = umsdos_rename_f (old_dir,old_name,old_len + ,new_dir,new_name,new_len,0); + new_dir = old_dir = NULL; + } + } + } + iput (new_dir); + iput (old_dir); + return ret; +} + diff --git a/fs/umsdos/notes b/fs/umsdos/notes new file mode 100644 index 000000000..3c47d1f4f --- /dev/null +++ b/fs/umsdos/notes @@ -0,0 +1,17 @@ +This file contain idea and things I don't want to forget + +Possible bug in fs/read_write.c +Function sys_readdir() + + There is a call the verify_area that does not take in account + the count parameter. I guess it should read + + error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent)); + + instead of + + error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent)); + + Of course, now , count is always 1 + + diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c new file mode 100644 index 000000000..d7272ed96 --- /dev/null +++ b/fs/umsdos/rdir.c @@ -0,0 +1,239 @@ +/* + * linux/fs/umsdos/rdir.c + * + * Written 1994 by Jacques Gelinas + * + * Extended MS-DOS directory pure MS-DOS handling functions + * (For directory without EMD file). + */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/limits.h> +#include <linux/umsdos_fs.h> +#include <linux/malloc.h> + +#define PRINTK(x) +#define Printk(x) printk x + + +extern struct inode *pseudo_root; + +static int UMSDOS_rreaddir ( + struct inode *dir, + struct file *filp, + struct dirent *dirent, + int count) +{ + int ret = 0; + while (1){ + ret = msdos_readdir(dir,filp,dirent,count); + if (ret == 5 + && pseudo_root != NULL + && dir->i_sb->s_mounted == pseudo_root->i_sb->s_mounted){ + /* + In pseudo root mode, we must eliminate logically + the directory linux from the real root. + */ + char name[5]; + memcpy_fromfs (name,dirent->d_name,5); + if (memcmp(name,UMSDOS_PSDROOT_NAME,UMSDOS_PSDROOT_LEN)!=0) break; + }else{ + if (pseudo_root != NULL + && ret == 2 + && dir == dir->i_sb->s_mounted + && dir == pseudo_root->i_sb->s_mounted){ + char name[2]; + memcpy_fromfs (name,dirent->d_name,2); + if (name[0] == '.' && name[1] == '.'){ + put_fs_long (pseudo_root->i_ino,&dirent->d_ino); + } + } + break; + } + } + return ret; +} + +int UMSDOS_rlookup( + struct inode *dir, + const char *name, + int len, + struct inode **result) /* Will hold inode of the file, if successful */ +{ + int ret; + if (pseudo_root != NULL + && len == 2 + && name[0] == '.' + && name[1] == '.' + && dir == dir->i_sb->s_mounted + && dir == pseudo_root->i_sb->s_mounted){ + *result = pseudo_root; + pseudo_root->i_count++; + ret = 0; + /* #Specification: pseudo root / DOS/.. + In the real root directory (c:\), the directory .. + is the pseudo root (c:\linux). + */ + }else{ + ret = umsdos_real_lookup (dir,name,len,result); + if (ret == 0){ + struct inode *inode = *result; + if (inode == pseudo_root){ + /* #Specification: pseudo root / DOS/linux + Even in the real root directory (c:\), the directory + /linux won't show + */ + ret = -ENOENT; + iput (pseudo_root); + *result = NULL; + }else if (S_ISDIR(inode->i_mode)){ + /* We must place the proper function table */ + /* depending if this is a MsDOS directory or an UMSDOS directory */ + umsdos_setup_dir_inode(inode); + } + } + } + iput (dir); + return ret; +} + +static int UMSDOS_rrmdir ( + struct inode *dir, + const char *name, + int len) +{ + /* #Specification: dual mode / rmdir in a DOS directory + In a DOS (not EMD in it) directory, we use a reverse strategy + compared with an Umsdos directory. We assume that a subdirectory + of a DOS directory is also a DOS directory. This is not always + true (umssync may be used anywhere), but make sense. + + So we call msdos_rmdir() directly. If it failed with a -ENOTEMPTY + then we check if it is a Umsdos directory. We check if it is + really empty (only . .. and --linux-.--- in it). If it is true + we remove the EMD and do a msdos_rmdir() again. + + In a Umsdos directory, we assume all subdirectory are also + Umsdos directory, so we check the EMD file first. + */ + int ret; + if (umsdos_is_pseudodos(dir,name,len)){ + /* #Specification: pseudo root / rmdir /DOS + The pseudo sub-directory /DOS can't be removed! + This is done even if the pseudo root is not a Umsdos + directory anymore (very unlikely), but an accident (under + MsDOS) is always possible. + + EPERM is returned. + */ + ret = -EPERM; + }else{ + umsdos_lockcreate (dir); + dir->i_count++; + ret = msdos_rmdir (dir,name,len); + if (ret == -ENOTEMPTY){ + struct inode *sdir; + dir->i_count++; + ret = UMSDOS_rlookup (dir,name,len,&sdir); + PRINTK (("rrmdir lookup %d ",ret)); + if (ret == 0){ + int empty; + if ((empty = umsdos_isempty (sdir)) != 0){ + PRINTK (("isempty %d i_count %d ",empty,sdir->i_count)); + if (empty == 2){ + /* + Not a Umsdos directory, so the previous msdos_rmdir + was not lying :-) + */ + ret = -ENOTEMPTY; + }else if (empty == 1){ + /* We have to removed the EMD file */ + ret = msdos_unlink(sdir,UMSDOS_EMD_FILE + ,UMSDOS_EMD_NAMELEN); + sdir = NULL; + if (ret == 0){ + dir->i_count++; + ret = msdos_rmdir (dir,name,len); + } + } + }else{ + ret = -ENOTEMPTY; + } + iput (sdir); + } + } + umsdos_unlockcreate (dir); + } + iput (dir); + return ret; +} + +/* #Specification: dual mode / introduction + One goal of UMSDOS is to allow a practical and simple coexistence + between MsDOS and Linux in a single partition. Using the EMD file + in each directory, UMSDOS add Unix semantics and capabilities to + normal DOS file system. To help and simplify coexistence, here is + the logic related to the EMD file. + + If it is missing, then the directory is managed by the MsDOS driver. + The names are limited to DOS limits (8.3). No links, no device special + and pipe and so on. + + If it is there, it is the directory. If it is there but empty, then + the directory looks empty. The utility umssync allows synchronisation + of the real DOS directory and the EMD. + + Whenever umssync is applied to a directory without EMD, one is + created on the fly. The directory is promoted to full unix semantic. + Of course, the ls command will show exactly the same content as before + the umssync session. + + It is believed that the user/admin will promote directories to unix + semantic as needed. + + The strategy to implement this is to use two function table (struct + inode_operations). One for true UMSDOS directory and one for directory + with missing EMD. + + Functions related to the DOS semantic (but aware of UMSDOS) generally + have a "r" prefix (r for real) such as UMSDOS_rlookup, to differentiate + from the one with full UMSDOS semantic. +*/ +static struct file_operations umsdos_rdir_operations = { + NULL, /* lseek - default */ + UMSDOS_dir_read, /* read */ + NULL, /* write - bad */ + UMSDOS_rreaddir, /* readdir */ + NULL, /* select - default */ + UMSDOS_ioctl_dir, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + NULL /* fsync */ +}; + +struct inode_operations umsdos_rdir_inode_operations = { + &umsdos_rdir_operations, /* default directory file-ops */ + msdos_create, /* create */ + UMSDOS_rlookup, /* lookup */ + NULL, /* link */ + msdos_unlink, /* unlink */ + NULL, /* symlink */ + msdos_mkdir, /* mkdir */ + UMSDOS_rrmdir, /* rmdir */ + NULL, /* mknod */ + msdos_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + + diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c new file mode 100644 index 000000000..1b1e561c2 --- /dev/null +++ b/fs/umsdos/symlink.c @@ -0,0 +1,145 @@ +/* + * linux/fs/umsdos/file.c + * + * Written 1992 by Jacques Gelinas + * inspired from linux/fs/msdos/file.c Werner Almesberger + * + * Extended MS-DOS regular file handling primitives + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/msdos_fs.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/umsdos_fs.h> +#include <linux/malloc.h> + +#define PRINTK(x) +#define Printk(x) printk x +/* + Read the data associate with the symlink. + Return length read in buffer or a negative error code. +*/ +static int umsdos_readlink_x ( + struct inode *inode, + char *buffer, + int (*msdos_read)(struct inode *, struct file *, char *, int), + int bufsiz) +{ + int ret = inode->i_size; + struct file filp; + filp.f_pos = 0; + filp.f_reada = 0; + if (ret > bufsiz) ret = bufsiz; + if ((*msdos_read) (inode, &filp, buffer,ret) != ret){ + ret = -EIO; + } + return ret; +} +/* + Follow a symbolic link chain by calling open_namei recursively + until an inode is found. + + Return 0 if ok, or a negative error code if not. +*/ +static int UMSDOS_follow_link( + struct inode * dir, + struct inode * inode, + int flag, + int mode, + struct inode ** res_inode) +{ + int ret = -ELOOP; + *res_inode = NULL; + if (current->link_count < 5) { + char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL); + if (path == NULL){ + ret = -ENOMEM; + }else{ + if (!dir) { + dir = current->fs[1].root; + dir->i_count++; + } + if (!inode){ + PRINTK (("symlink: inode = NULL\n")); + ret = -ENOENT; + }else if (!S_ISLNK(inode->i_mode)){ + PRINTK (("symlink: Not ISLNK\n")); + *res_inode = inode; + inode = NULL; + ret = 0; + }else{ + ret = umsdos_readlink_x (inode,path + ,umsdos_file_read_kmem,PATH_MAX-1); + if (ret > 0){ + path[ret] = '\0'; + PRINTK (("follow :%s: %d ",path,ret)); + iput(inode); + inode = NULL; + current->link_count++; + ret = open_namei(path,flag,mode,res_inode,dir); + current->link_count--; + dir = NULL; + }else{ + ret = -EIO; + } + } + kfree (path); + } + } + iput(inode); + iput(dir); + PRINTK (("follow_link ret %d\n",ret)); + return ret; +} + +static int UMSDOS_readlink(struct inode * inode, char * buffer, int buflen) +{ + int ret = -EINVAL; + if (S_ISLNK(inode->i_mode)) { + ret = umsdos_readlink_x (inode,buffer,msdos_file_read,buflen); + } + PRINTK (("readlink %d %x bufsiz %d\n",ret,inode->i_mode,buflen)); + iput(inode); + return ret; + +} + +static struct file_operations umsdos_symlink_operations = { + NULL, /* lseek - default */ + NULL, /* read */ + NULL, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + NULL /* fsync */ +}; + +struct inode_operations umsdos_symlink_inode_operations = { + &umsdos_symlink_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + UMSDOS_readlink, /* readlink */ + UMSDOS_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + + + diff --git a/fs/xiafs/Makefile b/fs/xiafs/Makefile new file mode 100644 index 000000000..097563244 --- /dev/null +++ b/fs/xiafs/Makefile @@ -0,0 +1,31 @@ +# +# Makefile for the XIAFS filesystem routines. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.c.s: + $(CC) $(CFLAGS) -S $< +.c.o: + $(CC) $(CFLAGS) -c $< +.s.o: + $(AS) -o $*.o $< + +OBJS= bitmap.o truncate.o namei.o inode.o \ + file.o dir.o symlink.o fsync.o + +xiafs.o: $(OBJS) + $(LD) -r -o xiafs.o $(OBJS) + +dep: + $(CPP) -M *.c > .depend + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/fs/xiafs/bitmap.c b/fs/xiafs/bitmap.c new file mode 100644 index 000000000..4dee5cfbb --- /dev/null +++ b/fs/xiafs/bitmap.c @@ -0,0 +1,388 @@ +/* + * linux/fs/xiafs/bitmap.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/bitmap.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +/* bitmap.c contains the code that handles the inode and block bitmaps */ + +#include <linux/sched.h> +#include <linux/locks.h> +#include <linux/xia_fs.h> +#include <linux/stat.h> +#include <linux/kernel.h> +#include <linux/string.h> + +#include <asm/bitops.h> + +#include "xiafs_mac.h" + + +char internal_error_message[]="XIA-FS: internal error %s %d\n"; + +static int find_first_zero(struct buffer_head *bh, int start_bit, int end_bit) +{ + /* This routine searches first 0 bit from (start_bit) to (end_bit-1). + * If found the bit is set to 1 and the bit # is returned, otherwise, + * -1 is returned. Race condition is avoid by using "btsl" and + * "goto repeat". ---Frank. + */ + + int end, i, j, tmp; + u_long *bmap; + + bmap=(u_long *)bh->b_data; + end = end_bit >> 5; + +repeat: + i=start_bit >> 5; + if ( (tmp=(~bmap[i]) & (0xffffffff << (start_bit & 31))) ) + goto zone_found; + while (++i < end) + if (~bmap[i]) { + tmp=~bmap[i]; + goto zone_found; + } + if ( !(tmp=~bmap[i] & ((1 << (end_bit & 31)) -1)) ) + return -1; +zone_found: + for (j=0; j < 32; j++) + if (tmp & (1 << j)) + break; + if (set_bit(j,bmap+i)) { + start_bit=j + (i << 5) + 1; + goto repeat; + } + mark_buffer_dirty(bh, 1); + return j + (i << 5); +} + +static void clear_buf(struct buffer_head * bh) +{ + register int i; + register long * lp; + + lp=(long *)bh->b_data; + for (i= bh->b_size >> 2; i-- > 0; ) + *lp++=0; +} + +static void que(struct buffer_head * bmap[], int bznr[], int pos) +{ + struct buffer_head * tbh; + int tmp; + int i; + + tbh=bmap[pos]; + tmp=bznr[pos]; + for (i=pos; i > 0; i--) { + bmap[i]=bmap[i-1]; + bznr[i]=bznr[i-1]; + } + bmap[0]=tbh; + bznr[0]=tmp; +} + +#define get_imap_zone(sb, bit_nr, not_que) \ + get__map_zone((sb), (sb)->u.xiafs_sb.s_imap_buf, \ + (sb)->u.xiafs_sb.s_imap_iznr, \ + (sb)->u.xiafs_sb.s_imap_cached, 1, \ + (sb)->u.xiafs_sb.s_imap_zones, _XIAFS_IMAP_SLOTS, \ + bit_nr, not_que) + +#define get_zmap_zone(sb, bit_nr, not_que) \ + get__map_zone((sb), (sb)->u.xiafs_sb.s_zmap_buf, \ + (sb)->u.xiafs_sb.s_zmap_zznr, \ + (sb)->u.xiafs_sb.s_zmap_cached, \ + 1+(sb)->u.xiafs_sb.s_imap_zones, \ + (sb)->u.xiafs_sb.s_zmap_zones, _XIAFS_ZMAP_SLOTS, \ + bit_nr, not_que) + +static struct buffer_head * +get__map_zone(struct super_block *sb, struct buffer_head * bmap_buf[], + int bznr[], u_char cache, int first_zone, + int bmap_zones, int slots, u_long bit_nr, int * not_que) +{ + struct buffer_head * tmp_bh; + int z_nr, i; + + z_nr = bit_nr >> XIAFS_BITS_PER_Z_BITS(sb); + if (z_nr >= bmap_zones) { + printk("XIA-FS: bad inode/zone number (%s %d)\n", WHERE_ERR); + return NULL; + } + if (!cache) + return bmap_buf[z_nr]; + lock_super(sb); + for (i=0; i < slots; i++) + if (bznr[i]==z_nr) + break; + if (i < slots) { /* cache hit */ + if (not_que) { + *not_que=i; + return bmap_buf[i]; + } else { + que(bmap_buf, bznr, i); + return bmap_buf[0]; + } + } + tmp_bh=bread(sb->s_dev, z_nr+first_zone, XIAFS_ZSIZE(sb)); /* cache not hit */ + if (!tmp_bh) { + printk("XIA-FS: read bitmap failed (%s %d)\n", WHERE_ERR); + unlock_super(sb); + return NULL; + } + brelse(bmap_buf[slots-1]); + bmap_buf[slots-1]=tmp_bh; + bznr[slots-1]=z_nr; + if (not_que) + *not_que=slots-1; + else + que(bmap_buf, bznr, slots-1); + return tmp_bh; +} + +#define xiafs_unlock_super(sb, cache) if (cache) unlock_super(sb); + +#define get_free_ibit(sb, prev_bit) \ + get_free__bit(sb, sb->u.xiafs_sb.s_imap_buf, \ + sb->u.xiafs_sb.s_imap_iznr, \ + sb->u.xiafs_sb.s_imap_cached, \ + 1, sb->u.xiafs_sb.s_imap_zones, \ + _XIAFS_IMAP_SLOTS, prev_bit); + +#define get_free_zbit(sb, prev_bit) \ + get_free__bit(sb, sb->u.xiafs_sb.s_zmap_buf, \ + sb->u.xiafs_sb.s_zmap_zznr, \ + sb->u.xiafs_sb.s_zmap_cached, \ + 1 + sb->u.xiafs_sb.s_imap_zones, \ + sb->u.xiafs_sb.s_zmap_zones, \ + _XIAFS_ZMAP_SLOTS, prev_bit); + +static u_long +get_free__bit(struct super_block *sb, struct buffer_head * bmap_buf[], + int bznr[], u_char cache, int first_zone, int bmap_zones, + int slots, u_long prev_bit) +{ + struct buffer_head * bh; + int not_done=0; + u_long pos, start_bit, end_bit, total_bits; + int z_nr, tmp; + + total_bits=bmap_zones << XIAFS_BITS_PER_Z_BITS(sb); + if (prev_bit >= total_bits) + prev_bit=0; + pos=prev_bit+1; + end_bit=XIAFS_BITS_PER_Z(sb); + + do { + if (pos >= total_bits) + pos=0; + if (!not_done) { /* first time */ + not_done=1; + start_bit= pos & (end_bit-1); + } else + start_bit=0; + if ( pos < prev_bit && pos+end_bit >= prev_bit) { /* last time */ + not_done=0; + end_bit=prev_bit & (end_bit-1); /* only here end_bit modified */ + } + bh = get__map_zone(sb, bmap_buf, bznr, cache, first_zone, + bmap_zones, slots, pos, &z_nr); + if (!bh) + return 0; + tmp=find_first_zero(bh, start_bit, end_bit); + if (tmp >= 0) + break; + xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); + pos=(pos & ~(end_bit-1))+end_bit; + } while (not_done); + + if (tmp < 0) + return 0; + if (cache) + que(bmap_buf, bznr, z_nr); + xiafs_unlock_super(sb, cache); + return (pos & ~(XIAFS_BITS_PER_Z(sb)-1))+tmp; +} + +void xiafs_free_zone(struct super_block * sb, int d_addr) +{ + struct buffer_head * bh; + unsigned int bit, offset; + + if (!sb) { + printk(INTERN_ERR); + return; + } + if (d_addr < sb->u.xiafs_sb.s_firstdatazone || + d_addr >= sb->u.xiafs_sb.s_nzones) { + printk("XIA-FS: bad zone number (%s %d)\n", WHERE_ERR); + return; + } + bh = get_hash_table(sb->s_dev, d_addr, XIAFS_ZSIZE(sb)); + if (bh) + bh->b_dirt=0; + brelse(bh); + bit=d_addr - sb->u.xiafs_sb.s_firstdatazone + 1; + bh = get_zmap_zone(sb, bit, NULL); + if (!bh) + return; + offset = bit & (XIAFS_BITS_PER_Z(sb) -1); + if (!clear_bit(offset, bh->b_data)) + printk("XIA-FS: dev %04x" + " block bit %u (0x%x) already cleared (%s %d)\n", + sb->s_dev, bit, bit, WHERE_ERR); + mark_buffer_dirty(bh, 1); + xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); +} + +int xiafs_new_zone(struct super_block * sb, u_long prev_addr) +{ + struct buffer_head * bh; + int prev_znr, tmp; + + if (!sb) { + printk(INTERN_ERR); + return 0; + } + if (prev_addr < sb->u.xiafs_sb.s_firstdatazone || + prev_addr >= sb->u.xiafs_sb.s_nzones) { + prev_addr=sb->u.xiafs_sb.s_firstdatazone; + } + prev_znr=prev_addr-sb->u.xiafs_sb.s_firstdatazone+1; + tmp=get_free_zbit(sb, prev_znr); + if (!tmp) + return 0; + tmp += sb->u.xiafs_sb.s_firstdatazone -1; + if (!(bh = getblk(sb->s_dev, tmp, XIAFS_ZSIZE(sb)))) { + printk("XIA-FS: I/O error (%s %d)\n", WHERE_ERR); + return 0; + } + if (bh->b_count != 1) { + printk(INTERN_ERR); + return 0; + } + clear_buf(bh); + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 1); + brelse(bh); + return tmp; +} + +void xiafs_free_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct super_block * sb; + unsigned long ino; + + if (!inode) + return; + if (!inode->i_dev || inode->i_count!=1 || inode->i_nlink || !inode->i_sb || + inode->i_ino < 3 || inode->i_ino > inode->i_sb->u.xiafs_sb.s_ninodes) { + printk("XIA-FS: bad inode (%s %d)\n", WHERE_ERR); + return; + } + sb = inode->i_sb; + ino = inode->i_ino; + bh = get_imap_zone(sb, ino, NULL); + if (!bh) + return; + clear_inode(inode); + if (!clear_bit(ino & (XIAFS_BITS_PER_Z(sb)-1), bh->b_data)) + printk("XIA-FS: dev %04x" + "inode bit %ld (0x%lx) already cleared (%s %d)\n", + inode->i_dev, ino, ino, WHERE_ERR); + mark_buffer_dirty(bh, 1); + xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached); +} + +struct inode * xiafs_new_inode(struct inode * dir) +{ + struct super_block * sb; + struct inode * inode; + ino_t tmp; + + sb = dir->i_sb; + if (!dir || !(inode = get_empty_inode())) + return NULL; + inode->i_sb = sb; + inode->i_flags = inode->i_sb->s_flags; + + tmp=get_free_ibit(sb, dir->i_ino); + if (!tmp) { + iput(inode); + return NULL; + } + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_dev = sb->s_dev; + inode->i_uid = current->fsuid; + inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; + inode->i_dirt = 1; + inode->i_ino = tmp; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = NULL; + inode->i_blocks = 0; + inode->i_blksize = XIAFS_ZSIZE(inode->i_sb); + insert_inode_hash(inode); + return inode; +} + +static int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; + +static u_long count_zone(struct buffer_head * bh) +{ + int i, tmp; + u_long sum; + + sum=0; + for (i=bh->b_size; i-- > 0; ) { + tmp=bh->b_data[i]; + sum += nibblemap[tmp & 0xf] + nibblemap[(tmp & 0xff) >> 4]; + } + return sum; +} + +unsigned long xiafs_count_free_inodes(struct super_block *sb) +{ + struct buffer_head * bh; + int izones, i, not_que; + u_long sum; + + sum=0; + izones=sb->u.xiafs_sb.s_imap_zones; + for (i=0; i < izones; i++) { + bh=get_imap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), ¬_que); + if (bh) { + sum += count_zone(bh); + xiafs_unlock_super(sb, sb->u.xiafs_sb.s_imap_cached); + } + } + i=izones << XIAFS_BITS_PER_Z_BITS(sb); + return i - sum; +} + +unsigned long xiafs_count_free_zones(struct super_block *sb) +{ + struct buffer_head * bh; + int zzones, i, not_que; + u_long sum; + + sum=0; + zzones=sb->u.xiafs_sb.s_zmap_zones; + for (i=0; i < zzones; i++) { + bh=get_zmap_zone(sb, i << XIAFS_BITS_PER_Z_BITS(sb), ¬_que); + if (bh) { + sum += count_zone(bh); + xiafs_unlock_super(sb, sb->u.xiafs_sb.s_zmap_cached); + } + } + i=zzones << XIAFS_BITS_PER_Z_BITS(sb); + return i - sum; +} diff --git a/fs/xiafs/dir.c b/fs/xiafs/dir.c new file mode 100644 index 000000000..d9db56ddc --- /dev/null +++ b/fs/xiafs/dir.c @@ -0,0 +1,135 @@ +/* + * linux/fs/xiafs/dir.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/dir.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <asm/segment.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/xia_fs.h> +#include <linux/stat.h> + +#include "xiafs_mac.h" + +#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) +#define ROUND_UP(x) (((x)+3) & ~3) + +static int xiafs_dir_read(struct inode *, struct file *, char *, int); +static int xiafs_readdir(struct inode *, struct file *, struct dirent *, int); + +static struct file_operations xiafs_dir_operations = { + NULL, /* lseek - default */ + xiafs_dir_read, /* read */ + NULL, /* write - bad */ + xiafs_readdir, /* readdir */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* no special release code */ + file_fsync /* default fsync */ +}; + +/* + * directories can handle most operations... + */ +struct inode_operations xiafs_dir_inode_operations = { + &xiafs_dir_operations, /* default directory file-ops */ + xiafs_create, /* create */ + xiafs_lookup, /* lookup */ + xiafs_link, /* link */ + xiafs_unlink, /* unlink */ + xiafs_symlink, /* symlink */ + xiafs_mkdir, /* mkdir */ + xiafs_rmdir, /* rmdir */ + xiafs_mknod, /* mknod */ + xiafs_rename, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + xiafs_truncate, /* truncate */ + NULL /* permission */ +}; + +static int xiafs_dir_read(struct inode * inode, + struct file * filp, char * buf, int count) +{ + return -EISDIR; +} + +static int xiafs_readdir(struct inode * inode, + struct file * filp, struct dirent * dirent, int count) +{ + u_int offset, i,ret; + struct buffer_head * bh; + struct xiafs_direct * de; + + if (!inode || !inode->i_sb || !S_ISDIR(inode->i_mode)) + return -EBADF; + if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1) ) + return -EBADF; + ret = 0; + while (!ret && filp->f_pos < inode->i_size) { + offset = filp->f_pos & (XIAFS_ZSIZE(inode->i_sb) - 1); + bh = xiafs_bread(inode, filp->f_pos >> XIAFS_ZSIZE_BITS(inode->i_sb),0); + if (!bh) { + filp->f_pos += XIAFS_ZSIZE(inode->i_sb)-offset; + continue; + } + for (i = 0; i < XIAFS_ZSIZE(inode->i_sb) && i < offset; ) { + de = (struct xiafs_direct *) (bh->b_data + i); + if (!de->d_rec_len) + break; + i += de->d_rec_len; + } + offset = i; + de = (struct xiafs_direct *) (offset + bh->b_data); + + while (!ret && offset < XIAFS_ZSIZE(inode->i_sb) && filp->f_pos < inode->i_size) { + if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || + de->d_rec_len < 12 || + (char *)de+de->d_rec_len > XIAFS_ZSIZE(inode->i_sb)+bh->b_data || + de->d_name_len < 1 || de->d_name_len + 8 > de->d_rec_len || + de->d_name_len > _XIAFS_NAME_LEN || + de->d_name[de->d_name_len] ) { + printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); + brelse(bh); + return 0; + } + offset += de->d_rec_len; + filp->f_pos += de->d_rec_len; + if (de->d_ino) { + for (i = 0; i < de->d_name_len ; i++) + put_fs_byte(de->d_name[i],i+dirent->d_name); + put_fs_byte(0,i+dirent->d_name); + put_fs_long(de->d_ino,&dirent->d_ino); + put_fs_word(i,&dirent->d_reclen); + if (!IS_RDONLY (inode)) { + inode->i_atime=CURRENT_TIME; + inode->i_dirt=1; + } + ret = ROUND_UP(NAME_OFFSET(dirent)+i+1); + break; + } + de = (struct xiafs_direct *) (offset + bh->b_data); + } + brelse(bh); + if (offset > XIAFS_ZSIZE(inode->i_sb)) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + return 0; + } + } + if (!IS_RDONLY (inode)) { + inode->i_atime=CURRENT_TIME; + inode->i_dirt=1; + } + return ret; +} diff --git a/fs/xiafs/file.c b/fs/xiafs/file.c new file mode 100644 index 000000000..5678ffd0b --- /dev/null +++ b/fs/xiafs/file.c @@ -0,0 +1,252 @@ +/* + * linux/fs/xiafs/file.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/file.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/xia_fs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/locks.h> + +#include "xiafs_mac.h" + +#define NBUF 32 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +static int xiafs_file_read(struct inode *, struct file *, char *, int); +static int xiafs_file_write(struct inode *, struct file *, char *, int); + +/* + * We have mostly NULL's here: the current defaults are ok for + * the xiafs filesystem. + */ +static struct file_operations xiafs_file_operations = { + NULL, /* lseek - default */ + xiafs_file_read, /* read */ + xiafs_file_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select - default */ + NULL, /* ioctl - default */ + generic_mmap, /* mmap */ + NULL, /* no special open is needed */ + NULL, /* release */ + xiafs_sync_file /* fsync */ +}; + +struct inode_operations xiafs_file_inode_operations = { + &xiafs_file_operations, /* default file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + xiafs_bmap, /* bmap */ + xiafs_truncate, /* truncate */ + NULL /* permission */ +}; + +static int +xiafs_file_read(struct inode * inode, struct file * filp, char * buf, int count) +{ + int read, left, chars; + int zone_nr, zones, f_zones, offset; + int bhrequest, uptodate; + struct buffer_head ** bhb, ** bhe; + struct buffer_head * bhreq[NBUF]; + struct buffer_head * buflist[NBUF]; + + if (!inode) { + printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR); + return -EINVAL; + } + offset = filp->f_pos; + left = inode->i_size - offset; + if (left > count) + left = count; + if (left <= 0) + return 0; + read = 0; + zone_nr = offset >> XIAFS_ZSIZE_BITS(inode->i_sb); + offset &= XIAFS_ZSIZE(inode->i_sb) -1 ; + f_zones =(inode->i_size+XIAFS_ZSIZE(inode->i_sb)-1)>>XIAFS_ZSIZE_BITS(inode->i_sb); + zones = (left+offset+XIAFS_ZSIZE(inode->i_sb)-1) >> XIAFS_ZSIZE_BITS(inode->i_sb); + bhb = bhe = buflist; + if (filp->f_reada) { + if(zones < read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb))) + zones = read_ahead[MAJOR(inode->i_dev)] >> (1+XIAFS_ZSHIFT(inode->i_sb)); + if (zone_nr + zones > f_zones) + zones = f_zones - zone_nr; + } + + /* We do this in a two stage process. We first try and request + as many blocks as we can, then we wait for the first one to + complete, and then we try and wrap up as many as are actually + done. This routine is rather generic, in that it can be used + in a filesystem by substituting the appropriate function in + for getblk. + + This routine is optimized to make maximum use of the various + buffers and caches. */ + + do { + bhrequest = 0; + uptodate = 1; + while (zones--) { + *bhb = xiafs_getblk(inode, zone_nr++, 0); + if (*bhb && !(*bhb)->b_uptodate) { + uptodate = 0; + bhreq[bhrequest++] = *bhb; + } + + if (++bhb == &buflist[NBUF]) + bhb = buflist; + + /* If the block we have on hand is uptodate, go ahead + and complete processing. */ + if (uptodate) + break; + if (bhb == bhe) + break; + } + + /* Now request them all */ + if (bhrequest) + ll_rw_block(READ, bhrequest, bhreq); + + do { /* Finish off all I/O that has actually completed */ + if (*bhe) { + wait_on_buffer(*bhe); + if (!(*bhe)->b_uptodate) { /* read error? */ + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + left = 0; + break; + } + } + if (left < XIAFS_ZSIZE(inode->i_sb) - offset) + chars = left; + else + chars = XIAFS_ZSIZE(inode->i_sb) - offset; + filp->f_pos += chars; + left -= chars; + read += chars; + if (*bhe) { + memcpy_tofs(buf,offset+(*bhe)->b_data,chars); + brelse(*bhe); + buf += chars; + } else { + while (chars-->0) + put_fs_byte(0,buf++); + } + offset = 0; + if (++bhe == &buflist[NBUF]) + bhe = buflist; + } while (left > 0 && bhe != bhb && (!*bhe || !(*bhe)->b_lock)); + } while (left > 0); + +/* Release the read-ahead blocks */ + while (bhe != bhb) { + brelse(*bhe); + if (++bhe == &buflist[NBUF]) + bhe = buflist; + }; + if (!read) + return -EIO; + filp->f_reada = 1; + if (!IS_RDONLY (inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + return read; +} + +static int +xiafs_file_write(struct inode * inode, struct file * filp, char * buf, int count) +{ + off_t pos; + int written, c; + struct buffer_head * bh; + char * cp; + + if (!inode) { + printk("XIA-FS: inode = NULL (%s %d)\n", WHERE_ERR); + return -EINVAL; + } + if (!S_ISREG(inode->i_mode)) { + printk("XIA-FS: mode != regular (%s %d)\n", WHERE_ERR); + return -EINVAL; + } +/* + * ok, append may not work when many processes are writing at the same time + * but so what. That way leads to madness anyway. + */ + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else + pos = filp->f_pos; + written = 0; + while (written < count) { + bh = xiafs_getblk(inode, pos >> XIAFS_ZSIZE_BITS(inode->i_sb), 1); + if (!bh) { + if (!written) + written = -ENOSPC; + break; + } + c = XIAFS_ZSIZE(inode->i_sb) - (pos & (XIAFS_ZSIZE(inode->i_sb) - 1)); + if (c > count-written) + c = count-written; + if (c != XIAFS_ZSIZE(inode->i_sb) && !bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + if (!written) + written = -EIO; + break; + } + } + cp = (pos & (XIAFS_ZSIZE(inode->i_sb)-1)) + bh->b_data; + pos += c; + if (pos > inode->i_size) { + inode->i_size = pos; + inode->i_dirt = 1; + } + written += c; + memcpy_fromfs(cp,buf,c); + buf += c; + bh->b_uptodate = 1; + mark_buffer_dirty(bh, 0); + brelse(bh); + } + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + filp->f_pos = pos; + inode->i_dirt = 1; + + return written; +} diff --git a/fs/xiafs/fsync.c b/fs/xiafs/fsync.c new file mode 100644 index 000000000..67681b2c6 --- /dev/null +++ b/fs/xiafs/fsync.c @@ -0,0 +1,159 @@ +/* + * linux/fs/xiafs/fsync.c + * + * Changes Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) + * from + * Copyright (C) 1991, 1992 Linus Torvalds + * + * xiafs fsync primitive + */ + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/locks.h> + +#include <linux/fs.h> +#include <linux/xia_fs.h> + +#include "xiafs_mac.h" + + +#define blocksize (XIAFS_ZSIZE(inode->i_sb)) +#define addr_per_block (XIAFS_ADDRS_PER_Z(inode->i_sb)) + +static int sync_block (struct inode * inode, unsigned long * block, int wait) +{ + struct buffer_head * bh; + int tmp; + + if (!*block) + return 0; + tmp = *block; + bh = get_hash_table(inode->i_dev, *block, blocksize); + if (!bh) + return 0; + if (*block != tmp) { + brelse (bh); + return 1; + } + if (wait && bh->b_req && !bh->b_uptodate) { + brelse(bh); + return -1; + } + if (wait || !bh->b_uptodate || !bh->b_dirt) + { + brelse(bh); + return 0; + } + ll_rw_block(WRITE, 1, &bh); + bh->b_count--; + return 0; +} + +static int sync_iblock (struct inode * inode, unsigned long * iblock, + struct buffer_head **bh, int wait) +{ + int rc, tmp; + + *bh = NULL; + tmp = *iblock; + if (!tmp) + return 0; + rc = sync_block (inode, iblock, wait); + if (rc) + return rc; + *bh = bread(inode->i_dev, tmp, blocksize); + if (tmp != *iblock) { + brelse(*bh); + *bh = NULL; + return 1; + } + if (!*bh) + return -1; + return 0; +} + + +static int sync_direct(struct inode *inode, int wait) +{ + int i; + int rc, err = 0; + + for (i = 0; i < 8; i++) { + rc = sync_block (inode, inode->u.ext_i.i_data + i, wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + return err; +} + +static int sync_indirect(struct inode *inode, unsigned long *iblock, int wait) +{ + int i; + struct buffer_head * ind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, iblock, &ind_bh, wait); + if (rc || !ind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_block (inode, + ((unsigned long *) ind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(ind_bh); + return err; +} + +static int sync_dindirect(struct inode *inode, unsigned long *diblock, + int wait) +{ + int i; + struct buffer_head * dind_bh; + int rc, err = 0; + + rc = sync_iblock (inode, diblock, &dind_bh, wait); + if (rc || !dind_bh) + return rc; + + for (i = 0; i < addr_per_block; i++) { + rc = sync_indirect (inode, + ((unsigned long *) dind_bh->b_data) + i, + wait); + if (rc > 0) + break; + if (rc) + err = rc; + } + brelse(dind_bh); + return err; +} + +int xiafs_sync_file(struct inode * inode, struct file * file) +{ + int wait, err = 0; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + for (wait=0; wait<=1; wait++) + { + err |= sync_direct(inode, wait); + err |= sync_indirect(inode, &inode->u.xiafs_i.i_ind_zone, wait); + err |= sync_dindirect(inode, &inode->u.xiafs_i.i_dind_zone, wait); + } + err |= xiafs_sync_inode (inode); + return (err < 0) ? -EIO : 0; +} diff --git a/fs/xiafs/inode.c b/fs/xiafs/inode.c new file mode 100644 index 000000000..171499a95 --- /dev/null +++ b/fs/xiafs/inode.c @@ -0,0 +1,502 @@ +/* + * linux/fs/xiafs/inode.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/inode.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <linux/sched.h> +#include <linux/xia_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/locks.h> +#include <asm/system.h> +#include <asm/segment.h> + +#include "xiafs_mac.h" + +static u_long random_nr; + +void xiafs_put_inode(struct inode *inode) +{ + if (inode->i_nlink) + return; + inode->i_size = 0; + xiafs_truncate(inode); + xiafs_free_inode(inode); +} + +void xiafs_put_super(struct super_block *sb) +{ + int i; + + lock_super(sb); + sb->s_dev = 0; + for(i = 0 ; i < _XIAFS_IMAP_SLOTS ; i++) + brelse(sb->u.xiafs_sb.s_imap_buf[i]); + for(i = 0 ; i < _XIAFS_ZMAP_SLOTS ; i++) + brelse(sb->u.xiafs_sb.s_zmap_buf[i]); + unlock_super(sb); +} + +static struct super_operations xiafs_sops = { + xiafs_read_inode, + NULL, + xiafs_write_inode, + xiafs_put_inode, + xiafs_put_super, + NULL, + xiafs_statfs, + NULL +}; + +struct super_block *xiafs_read_super(struct super_block *s, void *data, + int silent) +{ + struct buffer_head *bh; + struct xiafs_super_block *sp; + int i, z, dev; + + dev=s->s_dev; + lock_super(s); + + set_blocksize(dev, BLOCK_SIZE); + + if (!(bh = bread(dev, 0, BLOCK_SIZE))) { + s->s_dev=0; + unlock_super(s); + printk("XIA-FS: read super_block failed (%s %d)\n", WHERE_ERR); + return NULL; + } + sp = (struct xiafs_super_block *) bh->b_data; + s->s_magic = sp->s_magic; + if (s->s_magic != _XIAFS_SUPER_MAGIC) { + s->s_dev = 0; + unlock_super(s); + brelse(bh); + if (!silent) + printk("VFS: Can't find a xiafs filesystem on dev 0x%04x.\n", + dev); + return NULL; + } + s->s_blocksize = sp->s_zone_size; + s->s_blocksize_bits = 10 + sp->s_zone_shift; + if (s->s_blocksize != BLOCK_SIZE && + (s->s_blocksize == 1024 || s->s_blocksize == 2048 || + s->s_blocksize == 4096)) { + brelse(bh); + set_blocksize(dev, s->s_blocksize); + bh = bread (dev, 0, s->s_blocksize); + if(!bh) return NULL; + sp = (struct xiafs_super_block *) (((char *)bh->b_data) + BLOCK_SIZE) ; + }; + s->u.xiafs_sb.s_nzones = sp->s_nzones; + s->u.xiafs_sb.s_ninodes = sp->s_ninodes; + s->u.xiafs_sb.s_ndatazones = sp->s_ndatazones; + s->u.xiafs_sb.s_imap_zones = sp->s_imap_zones; + s->u.xiafs_sb.s_zmap_zones = sp->s_zmap_zones; + s->u.xiafs_sb.s_firstdatazone = sp->s_firstdatazone; + s->u.xiafs_sb.s_zone_shift = sp->s_zone_shift; + s->u.xiafs_sb.s_max_size = sp->s_max_size; + brelse(bh); + for (i=0;i < _XIAFS_IMAP_SLOTS;i++) { + s->u.xiafs_sb.s_imap_buf[i] = NULL; + s->u.xiafs_sb.s_imap_iznr[i] = -1; + } + for (i=0;i < _XIAFS_ZMAP_SLOTS;i++) { + s->u.xiafs_sb.s_zmap_buf[i] = NULL; + s->u.xiafs_sb.s_zmap_zznr[i] = -1; + } + z=1; + if ( s->u.xiafs_sb.s_imap_zones > _XIAFS_IMAP_SLOTS ) + s->u.xiafs_sb.s_imap_cached=1; + else { + s->u.xiafs_sb.s_imap_cached=0; + for (i=0 ; i < s->u.xiafs_sb.s_imap_zones ; i++) { + if (!(s->u.xiafs_sb.s_imap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s)))) + goto xiafs_read_super_fail; + s->u.xiafs_sb.s_imap_iznr[i]=i; + } + } + if ( s->u.xiafs_sb.s_zmap_zones > _XIAFS_ZMAP_SLOTS ) + s->u.xiafs_sb.s_zmap_cached=1; + else { + s->u.xiafs_sb.s_zmap_cached=0; + for (i=0 ; i < s->u.xiafs_sb.s_zmap_zones ; i++) { + if (!(s->u.xiafs_sb.s_zmap_buf[i]=bread(dev, z++, XIAFS_ZSIZE(s)))) + goto xiafs_read_super_fail; + s->u.xiafs_sb.s_zmap_zznr[i]=i; + } + } + /* set up enough so that it can read an inode */ + s->s_dev = dev; + s->s_op = &xiafs_sops; + s->s_mounted = iget(s, _XIAFS_ROOT_INO); + if (!s->s_mounted) + goto xiafs_read_super_fail; + unlock_super(s); + random_nr=CURRENT_TIME; + return s; + +xiafs_read_super_fail: + for(i=0; i < _XIAFS_IMAP_SLOTS; i++) + brelse(s->u.xiafs_sb.s_imap_buf[i]); + for(i=0; i < _XIAFS_ZMAP_SLOTS; i++) + brelse(s->u.xiafs_sb.s_zmap_buf[i]); + s->s_dev=0; + unlock_super(s); + printk("XIA-FS: read bitmaps failed (%s %d)\n", WHERE_ERR); + return NULL; +} + +void xiafs_statfs(struct super_block *sb, struct statfs *buf) +{ + long tmp; + + put_fs_long(_XIAFS_SUPER_MAGIC, &buf->f_type); + put_fs_long(XIAFS_ZSIZE(sb), &buf->f_bsize); + put_fs_long(sb->u.xiafs_sb.s_ndatazones, &buf->f_blocks); + tmp = xiafs_count_free_zones(sb); + put_fs_long(tmp, &buf->f_bfree); + put_fs_long(tmp, &buf->f_bavail); + put_fs_long(sb->u.xiafs_sb.s_ninodes, &buf->f_files); + put_fs_long(xiafs_count_free_inodes(sb), &buf->f_ffree); + put_fs_long(_XIAFS_NAME_LEN, &buf->f_namelen); + /* don't know what should be put in buf->f_fsid */ +} + +static int zone_bmap(struct buffer_head * bh, int nr) +{ + int tmp; + + if (!bh) + return 0; + tmp = ((u_long *) bh->b_data)[nr]; + brelse(bh); + return tmp; +} + +int xiafs_bmap(struct inode * inode,int zone) +{ + int i; + + if (zone < 0) { + printk("XIA-FS: block < 0 (%s %d)\n", WHERE_ERR); + return 0; + } + if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) { + printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR); + return 0; + } + if (!IS_RDONLY (inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + if (zone < 8) + return inode->u.xiafs_i.i_zone[zone]; + zone -= 8; + if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) { + i = inode->u.xiafs_i.i_ind_zone; + if (i) + i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)), zone); + return i; + } + zone -= XIAFS_ADDRS_PER_Z(inode->i_sb); + i = inode->u.xiafs_i.i_dind_zone; + if (i) + i = zone_bmap(bread(inode->i_dev, i, XIAFS_ZSIZE(inode->i_sb)), + zone >> XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)); + if (i) + i= zone_bmap(bread(inode->i_dev,i, XIAFS_ZSIZE(inode->i_sb)), + zone & (XIAFS_ADDRS_PER_Z(inode->i_sb)-1)); + return i; +} + +static u_long get_prev_addr(struct inode * inode, int zone) +{ + u_long tmp; + + if (zone > 0) + while (--zone >= 0) /* only files with holes suffer */ + if ((tmp=xiafs_bmap(inode, zone))) + return tmp; + random_nr=(random_nr+23)%inode->i_sb->u.xiafs_sb.s_ndatazones; + return random_nr + inode->i_sb->u.xiafs_sb.s_firstdatazone; +} + +static struct buffer_head * +dt_getblk(struct inode * inode, u_long *lp, int create, u_long prev_addr) +{ + int tmp; + struct buffer_head * result; + +repeat: + if ((tmp=*lp)) { + result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (tmp == *lp) + return result; + brelse(result); + goto repeat; + } + if (!create) + return NULL; + tmp = xiafs_new_zone(inode->i_sb, prev_addr); + if (!tmp) + return NULL; + result = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (*lp) { + xiafs_free_zone(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + *lp = tmp; + inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb); + return result; +} + +static struct buffer_head * +indt_getblk(struct inode * inode, struct buffer_head * bh, + int nr, int create, u_long prev_addr) +{ + int tmp; + u_long *lp; + struct buffer_head * result; + + if (!bh) + return NULL; + if (!bh->b_uptodate) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!bh->b_uptodate) { + brelse(bh); + return NULL; + } + } + lp = nr + (u_long *) bh->b_data; +repeat: + if ((tmp=*lp)) { + result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (tmp == *lp) { + brelse(bh); + return result; + } + brelse(result); + goto repeat; + } + if (!create) { + brelse(bh); + return NULL; + } + tmp = xiafs_new_zone(inode->i_sb, prev_addr); + if (!tmp) { + brelse(bh); + return NULL; + } + result = getblk(bh->b_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (*lp) { + xiafs_free_zone(inode->i_sb, tmp); + brelse(result); + goto repeat; + } + *lp = tmp; + inode->i_blocks+=2 << XIAFS_ZSHIFT(inode->i_sb); + mark_buffer_dirty(bh, 1); + brelse(bh); + return result; +} + +struct buffer_head * xiafs_getblk(struct inode * inode, int zone, int create) +{ + struct buffer_head * bh; + u_long prev_addr=0; + + if (zone<0) { + printk("XIA-FS: zone < 0 (%s %d)\n", WHERE_ERR); + return NULL; + } + if (zone >= 8+(1+XIAFS_ADDRS_PER_Z(inode->i_sb))*XIAFS_ADDRS_PER_Z(inode->i_sb)) { + if (!create) + printk("XIA-FS: zone > big (%s %d)\n", WHERE_ERR); + return NULL; + } + if (create) + prev_addr=get_prev_addr(inode, zone); + if (zone < 8) + return dt_getblk(inode, zone+inode->u.xiafs_i.i_zone, create, prev_addr); + zone -= 8; + if (zone < XIAFS_ADDRS_PER_Z(inode->i_sb)) { + bh = dt_getblk(inode, &(inode->u.xiafs_i.i_ind_zone), create, prev_addr); + bh = indt_getblk(inode, bh, zone, create, prev_addr); + return bh; + } + zone -= XIAFS_ADDRS_PER_Z(inode->i_sb); + bh = dt_getblk(inode, &(inode->u.xiafs_i.i_dind_zone), create, prev_addr); + bh = indt_getblk(inode, bh, zone>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb), + create, prev_addr); + bh = indt_getblk(inode, bh, zone&(XIAFS_ADDRS_PER_Z(inode->i_sb)-1), + create, prev_addr); + return bh; +} + +struct buffer_head * xiafs_bread(struct inode * inode, int zone, int create) +{ + struct buffer_head * bh; + + bh = xiafs_getblk(inode, zone, create); + if (!bh || bh->b_uptodate) + return bh; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (bh->b_uptodate) + return bh; + brelse(bh); + return NULL; +} + +void xiafs_read_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct xiafs_inode * raw_inode; + int zone; + ino_t ino; + + ino = inode->i_ino; + inode->i_op = NULL; + inode->i_mode=0; + if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) { + printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR); + return; + } + zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones + + inode->i_sb->u.xiafs_sb.s_zmap_zones + + (ino-1)/ XIAFS_INODES_PER_Z(inode->i_sb); + if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) { + printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR); + return; + } + raw_inode = ((struct xiafs_inode *) bh->b_data) + + ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) - 1)); + inode->i_mode = raw_inode->i_mode; + inode->i_uid = raw_inode->i_uid; + inode->i_gid = raw_inode->i_gid; + inode->i_nlink = raw_inode->i_nlinks; + inode->i_size = raw_inode->i_size; + inode->i_mtime = raw_inode->i_mtime; + inode->i_atime = raw_inode->i_atime; + inode->i_ctime = raw_inode->i_ctime; + inode->i_blksize = XIAFS_ZSIZE(inode->i_sb); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + inode->i_blocks=0; + inode->i_rdev = raw_inode->i_zone[0]; + } else { + XIAFS_GET_BLOCKS(raw_inode, inode->i_blocks); + for (zone = 0; zone < 8; zone++) + inode->u.xiafs_i.i_zone[zone] = raw_inode->i_zone[zone] & 0xffffff; + inode->u.xiafs_i.i_ind_zone = raw_inode->i_ind_zone & 0xffffff; + inode->u.xiafs_i.i_dind_zone = raw_inode->i_dind_zone & 0xffffff; + } + brelse(bh); + if (S_ISREG(inode->i_mode)) + inode->i_op = &xiafs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &xiafs_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &xiafs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); +} + +static struct buffer_head * xiafs_update_inode(struct inode * inode) +{ + struct buffer_head * bh; + struct xiafs_inode * raw_inode; + int zone; + ino_t ino; + + if (IS_RDONLY (inode)) { + printk("XIA-FS: write_inode on a read-only filesystem (%s %d)\n", WHERE_ERR); + inode->i_dirt = 0; + return 0; + } + + ino = inode->i_ino; + if (!ino || ino > inode->i_sb->u.xiafs_sb.s_ninodes) { + printk("XIA-FS: bad inode number (%s %d)\n", WHERE_ERR); + inode->i_dirt=0; + return 0; + } + zone = 1 + inode->i_sb->u.xiafs_sb.s_imap_zones + + inode->i_sb->u.xiafs_sb.s_zmap_zones + + (ino-1) / XIAFS_INODES_PER_Z(inode->i_sb); + if (!(bh=bread(inode->i_dev, zone, XIAFS_ZSIZE(inode->i_sb)))) { + printk("XIA-FS: read i-node zone failed (%s %d)\n", WHERE_ERR); + inode->i_dirt=0; + return 0; + } + raw_inode = ((struct xiafs_inode *)bh->b_data) + + ((ino-1) & (XIAFS_INODES_PER_Z(inode->i_sb) -1)); + raw_inode->i_mode = inode->i_mode; + raw_inode->i_uid = inode->i_uid; + raw_inode->i_gid = inode->i_gid; + raw_inode->i_nlinks = inode->i_nlink; + raw_inode->i_size = inode->i_size; + raw_inode->i_atime = inode->i_atime; + raw_inode->i_ctime = inode->i_ctime; + raw_inode->i_mtime = inode->i_mtime; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_zone[0] = inode->i_rdev; + else { + XIAFS_PUT_BLOCKS(raw_inode, inode->i_blocks); + for (zone = 0; zone < 8; zone++) + raw_inode->i_zone[zone] = (raw_inode->i_zone[zone] & 0xff000000) + | (inode->u.xiafs_i.i_zone[zone] & 0xffffff); + raw_inode->i_ind_zone = (raw_inode->i_ind_zone & 0xff000000) + | (inode->u.xiafs_i.i_ind_zone & 0xffffff); + raw_inode->i_dind_zone = (raw_inode->i_dind_zone & 0xff000000) + | (inode->u.xiafs_i.i_dind_zone & 0xffffff); + } + inode->i_dirt=0; + mark_buffer_dirty(bh, 1); + return bh; +} + + +void xiafs_write_inode(struct inode * inode) +{ + struct buffer_head * bh; + bh = xiafs_update_inode(inode); + brelse (bh); +} + +int xiafs_sync_inode (struct inode *inode) +{ + int err = 0; + struct buffer_head *bh; + + bh = xiafs_update_inode(inode); + if (bh && bh->b_dirt) + { + ll_rw_block(WRITE, 1, &bh); + wait_on_buffer(bh); + if (bh->b_req && !bh->b_uptodate) + { + printk ("IO error syncing xiafs inode [%04X:%lu]\n", + inode->i_dev, inode->i_ino); + err = -1; + } + } + else if (!bh) + err = -1; + brelse (bh); + return err; +} diff --git a/fs/xiafs/namei.c b/fs/xiafs/namei.c new file mode 100644 index 000000000..0532b1754 --- /dev/null +++ b/fs/xiafs/namei.c @@ -0,0 +1,848 @@ +/* + * Linux/fs/xiafs/namei.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/namei.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <linux/sched.h> +#include <linux/xia_fs.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/errno.h> +#include <asm/segment.h> + +#include "xiafs_mac.h" + +#define RNDUP4(x) ((3+(u_long)(x)) & ~3) +/* + * ok, we cannot use strncmp, as the name is not in our data space. + * Thus we'll have to use xiafs_match. No big problem. Match also makes + * some sanity tests. + * + * NOTE! unlike strncmp, xiafs_match returns 1 for success, 0 for failure. + */ +static int xiafs_match(int len, const char * name, struct xiafs_direct * dep) +{ + int i; + + if (!dep || !dep->d_ino || len > _XIAFS_NAME_LEN) + return 0; + /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ + if (!len && (dep->d_name[0]=='.') && (dep->d_name[1]=='\0')) + return 1; + if (len != dep->d_name_len) + return 0; + for (i=0; i < len; i++) + if (*name++ != dep->d_name[i]) + return 0; + return 1; +} + +/* + * xiafs_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +static struct buffer_head * +xiafs_find_entry(struct inode * inode, const char * name, int namelen, + struct xiafs_direct ** res_dir, struct xiafs_direct ** res_pre) +{ + int i, zones, pos; + struct buffer_head * bh; + struct xiafs_direct * dep, * dep_pre; + + *res_dir = NULL; + if (!inode) + return NULL; + if (namelen > _XIAFS_NAME_LEN) + return NULL; + + if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb) - 1)) { + printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR); + return NULL; + } + zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb); + for (i=0; i < zones; i++ ) { + bh = xiafs_bread(inode, i, 0); + if (!bh) + continue; + dep_pre=dep=(struct xiafs_direct *)bh->b_data; + if (!i && (dep->d_rec_len != 12 || !dep->d_ino || + dep->d_name_len != 1 || strcmp(dep->d_name, "."))) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + brelse(bh); + return NULL; + } + pos = 0; + while ( pos < XIAFS_ZSIZE(inode->i_sb) ) { + if (dep->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || + dep->d_rec_len < 12 || + dep->d_rec_len+(char *)dep > bh->b_data+XIAFS_ZSIZE(inode->i_sb) || + dep->d_name_len + 8 > dep->d_rec_len || dep->d_name_len <= 0 || + dep->d_name[dep->d_name_len] ) { + brelse(bh); + return NULL; + } + if (xiafs_match(namelen, name, dep)) { + *res_dir=dep; + if (res_pre) + *res_pre=dep_pre; + return bh; + } + pos += dep->d_rec_len; + dep_pre=dep; + dep=(struct xiafs_direct *)(bh->b_data + pos); + } + brelse(bh); + if (pos > XIAFS_ZSIZE(inode->i_sb)) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + return NULL; + } + } + return NULL; +} + +int xiafs_lookup(struct inode * dir, const char * name, int len, + struct inode ** result) +{ + int ino; + struct xiafs_direct * dep; + struct buffer_head * bh; + + *result = NULL; + if (!dir) + return -ENOENT; + if (!S_ISDIR(dir->i_mode)) { + iput(dir); + return -ENOENT; + } + if (!(bh = xiafs_find_entry(dir, name, len, &dep, NULL))) { + iput(dir); + return -ENOENT; + } + ino = dep->d_ino; + brelse(bh); + if (!(*result = iget(dir->i_sb, ino))) { + iput(dir); + return -EACCES; + } + iput(dir); + return 0; +} + +/* + * xiafs_add_entry() + * + * adds a file entry to the specified directory, using the same + * semantics as xiafs_find_entry(). It returns NULL if it failed. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static struct buffer_head * xiafs_add_entry(struct inode * dir, + const char * name, int namelen, struct xiafs_direct ** res_dir, + struct xiafs_direct ** res_pre) +{ + int i, pos, offset; + struct buffer_head * bh; + struct xiafs_direct * de, * de_pre; + + *res_dir = NULL; + if (!dir || !namelen || namelen > _XIAFS_NAME_LEN) + return NULL; + + if (dir->i_size & (XIAFS_ZSIZE(dir->i_sb) - 1)) { + printk("XIA-FS: bad dir size (%s %d)\n", WHERE_ERR); + return NULL; + } + pos=0; + for ( ; ; ) { + bh = xiafs_bread(dir, pos >> XIAFS_ZSIZE_BITS(dir->i_sb), pos ? 1:0); + if (!bh) + return NULL; + de_pre=de=(struct xiafs_direct *)bh->b_data; + if (!pos) { + if (de->d_rec_len != 12 || !de->d_ino || de->d_name_len != 1 || + strcmp(de->d_name, ".")) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + brelse(bh); + return NULL; + } + offset = 12; + de_pre=de=(struct xiafs_direct *)(bh->b_data+12); + } else + offset = 0; + while (offset < XIAFS_ZSIZE(dir->i_sb)) { + if (pos >= dir->i_size) { + de->d_ino=0; + de->d_name_len=0; + de->d_name[0]=0; + de->d_rec_len=XIAFS_ZSIZE(dir->i_sb); + dir->i_size += XIAFS_ZSIZE(dir->i_sb); + dir->i_dirt = 1; + } else { + if (de->d_ino > dir->i_sb->u.xiafs_sb.s_ninodes || + de->d_rec_len < 12 || + (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(dir->i_sb) || + de->d_name_len + 8 > de->d_rec_len || + de->d_name[de->d_name_len]) { + printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); + brelse(bh); + return NULL; + } + if (de->d_ino && + RNDUP4(de->d_name_len)+RNDUP4(namelen)+16<=de->d_rec_len) { + i=RNDUP4(de->d_name_len)+8; + de_pre=de; + de=(struct xiafs_direct *)(i+(u_char *)de_pre); + de->d_ino=0; + de->d_rec_len=de_pre->d_rec_len-i; + de_pre->d_rec_len=i; + } + } + if (!de->d_ino && RNDUP4(namelen)+8 <= de->d_rec_len) { + /* + * XXX all times should be set by caller upon successful + * completion. + */ + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + memcpy(de->d_name, name, namelen); + de->d_name[namelen]=0; + de->d_name_len=namelen; + mark_buffer_dirty(bh, 1); + *res_dir = de; + if (res_pre) + *res_pre = de_pre; + return bh; + } + offset+=de->d_rec_len; + de_pre=de; + de=(struct xiafs_direct *)(bh->b_data+offset); + } + brelse(bh); + if (offset > XIAFS_ZSIZE(dir->i_sb)) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + return NULL; + } + pos+=XIAFS_ZSIZE(dir->i_sb); + } + return NULL; +} + +int xiafs_create(struct inode * dir, const char * name, int len, int mode, + struct inode ** result) +{ + struct inode * inode; + struct buffer_head * bh; + struct xiafs_direct * de; + + *result = NULL; + if (!dir) + return -ENOENT; + inode = xiafs_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &xiafs_file_inode_operations; + inode->i_mode = mode; + inode->i_dirt = 1; + bh = xiafs_add_entry(dir, name, len, &de, NULL); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->d_ino = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + *result = inode; + return 0; +} + +int xiafs_mknod(struct inode *dir, const char *name, int len, int mode, int rdev) +{ + struct inode * inode; + struct buffer_head * bh; + struct xiafs_direct * de; + + if (!dir) + return -ENOENT; + bh = xiafs_find_entry(dir,name,len,&de, NULL); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + inode = xiafs_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_uid = current->fsuid; + inode->i_mode = mode; + inode->i_op = NULL; + if (S_ISREG(inode->i_mode)) + inode->i_op = &xiafs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &xiafs_dir_inode_operations; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + } + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &xiafs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + if (S_ISBLK(mode) || S_ISCHR(mode)) + inode->i_rdev = rdev; + inode->i_atime = inode->i_ctime = inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + bh = xiafs_add_entry(dir, name, len, &de, NULL); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->d_ino = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int xiafs_mkdir(struct inode * dir, const char * name, int len, int mode) +{ + struct inode * inode; + struct buffer_head * bh, *dir_block; + struct xiafs_direct * de; + + bh = xiafs_find_entry(dir,name,len,&de, NULL); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + if (dir->i_nlink > 64000) { + iput(dir); + return -EMLINK; + } + inode = xiafs_new_inode(dir); + if (!inode) { + iput(dir); + return -ENOSPC; + } + inode->i_op = &xiafs_dir_inode_operations; + inode->i_size = XIAFS_ZSIZE(dir->i_sb); + inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME; + dir_block = xiafs_bread(inode,0,1); + if (!dir_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + de = (struct xiafs_direct *) dir_block->b_data; + de->d_ino=inode->i_ino; + strcpy(de->d_name,"."); + de->d_name_len=1; + de->d_rec_len=12; + de =(struct xiafs_direct *)(12 + dir_block->b_data); + de->d_ino = dir->i_ino; + strcpy(de->d_name,".."); + de->d_name_len=2; + de->d_rec_len=XIAFS_ZSIZE(dir->i_sb)-12; + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse(dir_block); + inode->i_mode = S_IFDIR | (mode & S_IRWXUGO & ~current->fs->umask); + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + inode->i_dirt = 1; + bh = xiafs_add_entry(dir, name, len, &de, NULL); + if (!bh) { + iput(dir); + inode->i_nlink=0; + iput(inode); + return -ENOSPC; + } + de->d_ino = inode->i_ino; + mark_buffer_dirty(bh, 1); + dir->i_nlink++; + dir->i_dirt = 1; + iput(dir); + iput(inode); + brelse(bh); + return 0; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir(struct inode * inode) +{ + int i, zones, offset; + struct buffer_head * bh; + struct xiafs_direct * de; + + if (inode->i_size & (XIAFS_ZSIZE(inode->i_sb)-1) ) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + return 1; + } + + zones=inode->i_size >> XIAFS_ZSIZE_BITS(inode->i_sb); + for (i=0; i < zones; i++) { + bh = xiafs_bread(inode, i, 0); + if (!i) { + if (!bh) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + return 1; + } + de=(struct xiafs_direct *)bh->b_data; + if (de->d_ino != inode->i_ino || strcmp(".", de->d_name) || + de->d_rec_len != 12 ) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + brelse(bh); + return 1; + } + de=(struct xiafs_direct *)(12 + bh->b_data); + if (!de->d_ino || strcmp("..", de->d_name)) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + brelse(bh); + return 1; + } + offset=de->d_rec_len+12; + } + else + offset = 0; + if (!bh) + continue; + while (offset < XIAFS_ZSIZE(inode->i_sb)) { + de=(struct xiafs_direct *)(bh->b_data+offset); + if (de->d_ino > inode->i_sb->u.xiafs_sb.s_ninodes || + de->d_rec_len < 12 || + (char *)de+de->d_rec_len > bh->b_data+XIAFS_ZSIZE(inode->i_sb) || + de->d_name_len + 8 > de->d_rec_len || + de->d_name[de->d_name_len]) { + printk("XIA-FS: bad directory (%s %d)\n", WHERE_ERR); + brelse(bh); + return 1; + } + if (de->d_ino) { + brelse(bh); + return 0; + } + offset+=de->d_rec_len; + } + brelse(bh); + } + return 1; +} + +static void xiafs_rm_entry(struct xiafs_direct *de, struct xiafs_direct * de_pre) +{ + if (de==de_pre) { + de->d_ino=0; + return; + } + while (de_pre->d_rec_len+(u_char *)de_pre < (u_char *)de) { + if (de_pre->d_rec_len < 12) { + printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); + return; + } + de_pre=(struct xiafs_direct *)(de_pre->d_rec_len+(u_char *)de_pre); + } + if (de_pre->d_rec_len+(u_char *)de_pre > (u_char *)de) { + printk("XIA-FS: bad directory entry (%s %d)\n", WHERE_ERR); + return; + } + de_pre->d_rec_len+=de->d_rec_len; +} + +int xiafs_rmdir(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct xiafs_direct * de, * de_pre; + + inode = NULL; + bh = xiafs_find_entry(dir, name, len, &de, &de_pre); + retval = -ENOENT; + if (!bh) + goto end_rmdir; + retval = -EPERM; + if (!(inode = iget(dir->i_sb, de->d_ino))) + goto end_rmdir; + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_rmdir; + if (inode->i_dev != dir->i_dev) + goto end_rmdir; + if (inode == dir) /* we may not delete ".", but "../dir" is ok */ + goto end_rmdir; + if (!S_ISDIR(inode->i_mode)) { + retval = -ENOTDIR; + goto end_rmdir; + } + if (!empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + if (inode->i_count > 1) { + retval = -EBUSY; + goto end_rmdir; + } + if (inode->i_nlink != 2) + printk("XIA-FS: empty directory has nlink!=2 (%s %d)\n", WHERE_ERR); + xiafs_rm_entry(de, de_pre); + mark_buffer_dirty(bh, 1); + inode->i_nlink=0; + inode->i_dirt=1; + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt=1; + retval = 0; +end_rmdir: + iput(dir); + iput(inode); + brelse(bh); + return retval; +} + +int xiafs_unlink(struct inode * dir, const char * name, int len) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct xiafs_direct * de, * de_pre; + +repeat: + retval = -ENOENT; + inode = NULL; + bh = xiafs_find_entry(dir, name, len, &de, &de_pre); + if (!bh) + goto end_unlink; + if (!(inode = iget(dir->i_sb, de->d_ino))) + goto end_unlink; + retval = -EPERM; + if (S_ISDIR(inode->i_mode)) + goto end_unlink; + if (de->d_ino != inode->i_ino) { + iput(inode); + brelse(bh); + current->counter = 0; + schedule(); + goto repeat; + } + if ((dir->i_mode & S_ISVTX) && !fsuser() && + current->fsuid != inode->i_uid && + current->fsuid != dir->i_uid) + goto end_unlink; + if (!inode->i_nlink) { + printk("XIA-FS: Deleting nonexistent file (%s %d)\n", WHERE_ERR); + inode->i_nlink=1; + } + xiafs_rm_entry(de, de_pre); + mark_buffer_dirty(bh, 1); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->i_dirt = 1; + inode->i_nlink--; + inode->i_dirt = 1; + retval = 0; +end_unlink: + brelse(bh); + iput(inode); + iput(dir); + return retval; +} + +int xiafs_symlink(struct inode * dir, const char * name, + int len, const char * symname) +{ + struct xiafs_direct * de; + struct inode * inode = NULL; + struct buffer_head * bh = NULL, * name_block = NULL; + int i; + char c; + + bh = xiafs_find_entry(dir,name,len, &de, NULL); + if (bh) { + brelse(bh); + iput(dir); + return -EEXIST; + } + if (!(inode = xiafs_new_inode(dir))) { + iput(dir); + return -ENOSPC; + } + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_op = &xiafs_symlink_inode_operations; + name_block = xiafs_bread(inode,0,1); + if (!name_block) { + iput(dir); + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + return -ENOSPC; + } + for (i = 0; i < BLOCK_SIZE-1 && (c=*symname++); i++) + name_block->b_data[i] = c; + name_block->b_data[i] = 0; + mark_buffer_dirty(name_block, 1); + brelse(name_block); + inode->i_size = i; + inode->i_dirt = 1; + bh = xiafs_add_entry(dir, name, len, &de, NULL); + if (!bh) { + inode->i_nlink--; + inode->i_dirt = 1; + iput(inode); + iput(dir); + return -ENOSPC; + } + de->d_ino = inode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + iput(inode); + return 0; +} + +int xiafs_link(struct inode * oldinode, struct inode * dir, + const char * name, int len) +{ + struct xiafs_direct * de; + struct buffer_head * bh; + + if (S_ISDIR(oldinode->i_mode)) { + iput(oldinode); + iput(dir); + return -EPERM; + } + if (oldinode->i_nlink > 64000) { + iput(oldinode); + iput(dir); + return -EMLINK; + } + bh = xiafs_find_entry(dir, name, len, &de, NULL); + if (bh) { + brelse(bh); + iput(dir); + iput(oldinode); + return -EEXIST; + } + bh = xiafs_add_entry(dir, name, len, &de, NULL); + if (!bh) { + iput(dir); + iput(oldinode); + return -ENOSPC; + } + de->d_ino = oldinode->i_ino; + mark_buffer_dirty(bh, 1); + brelse(bh); + iput(dir); + oldinode->i_nlink++; + oldinode->i_ctime = CURRENT_TIME; + oldinode->i_dirt = 1; + iput(oldinode); + return 0; +} + +static int subdir(struct inode * new_inode, struct inode * old_inode) +{ + int ino; + int result; + + new_inode->i_count++; + result = 0; + for (;;) { + if (new_inode == old_inode) { + result = 1; + break; + } + if (new_inode->i_dev != old_inode->i_dev) + break; + ino = new_inode->i_ino; + if (xiafs_lookup(new_inode,"..",2,&new_inode)) + break; + if (new_inode->i_ino == ino) + break; + } + iput(new_inode); + return result; +} + +#define PARENT_INO(buffer) \ + (((struct xiafs_direct *) ((u_char *)(buffer) + 12))->d_ino) + +/* + * rename uses retry to avoid race-conditions: at least they should be minimal. + * it tries to allocate all the blocks, then sanity-checks, and if the sanity- + * checks fail, it tries to restart itself again. Very practical - no changes + * are done until we know everything works ok.. and then all the changes can be + * done in one fell swoop when we have claimed all the buffers needed. + * + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int do_xiafs_rename(struct inode * old_dir, const char * old_name, + int old_len, struct inode * new_dir, + const char * new_name, int new_len) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct xiafs_direct * old_de, * old_de_pre, * new_de, * new_de_pre; + int retval; + +try_again: + old_inode = new_inode = NULL; + old_bh = new_bh = dir_bh = NULL; + old_bh = xiafs_find_entry(old_dir, old_name, old_len, &old_de, &old_de_pre); + retval = -ENOENT; + if (!old_bh) + goto end_rename; + old_inode = __iget(old_dir->i_sb, old_de->d_ino, 0); /* don't cross mnt-points */ + if (!old_inode) + goto end_rename; + retval = -EPERM; + if ((old_dir->i_mode & S_ISVTX) && + current->fsuid != old_inode->i_uid && + current->fsuid != old_dir->i_uid && !fsuser()) + goto end_rename; + new_bh = xiafs_find_entry(new_dir, new_name, new_len, &new_de, NULL); + if (new_bh) { + new_inode = __iget(new_dir->i_sb, new_de->d_ino, 0); + if (!new_inode) { + brelse(new_bh); + new_bh = NULL; + } + } + if (new_inode == old_inode) { + retval = 0; + goto end_rename; + } + if (new_inode && S_ISDIR(new_inode->i_mode)) { + retval = -EEXIST; + goto end_rename; + } + retval = -EPERM; + if (new_inode && (new_dir->i_mode & S_ISVTX) && + current->fsuid != new_inode->i_uid && + current->fsuid != new_dir->i_uid && !fsuser()) + goto end_rename; + if (S_ISDIR(old_inode->i_mode)) { + retval = -EEXIST; + if (new_bh) + goto end_rename; + retval = -EACCES; + if (!permission(old_inode, MAY_WRITE)) + goto end_rename; + retval = -EINVAL; + if (subdir(new_dir, old_inode)) + goto end_rename; + retval = -EIO; + dir_bh = xiafs_bread(old_inode,0,0); + if (!dir_bh) + goto end_rename; + if (PARENT_INO(dir_bh->b_data) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (new_dir->i_nlink > 64000) + goto end_rename; + } + if (!new_bh) + new_bh = xiafs_add_entry(new_dir, new_name, new_len, &new_de, &new_de_pre); + retval = -ENOSPC; + if (!new_bh) + goto end_rename; + /* sanity checking */ + if ( (new_inode && (new_de->d_ino != new_inode->i_ino)) + || (new_de->d_ino && !new_inode) + || (old_de->d_ino != old_inode->i_ino)) { + xiafs_rm_entry(new_de, new_de_pre); + brelse(old_bh); + brelse(new_bh); + brelse(dir_bh); + iput(old_inode); + iput(new_inode); + current->counter=0; + schedule(); + goto try_again; + } + xiafs_rm_entry(old_de, old_de_pre); + new_de->d_ino = old_inode->i_ino; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_dirt = 1; + } + mark_buffer_dirty(old_bh, 1); + mark_buffer_dirty(new_bh, 1); + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = new_dir->i_ino; + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + new_dir->i_nlink++; + old_dir->i_dirt = 1; + new_dir->i_dirt = 1; + } + retval = 0; +end_rename: + brelse(dir_bh); + brelse(old_bh); + brelse(new_bh); + iput(old_inode); + iput(new_inode); + iput(old_dir); + iput(new_dir); + return retval; +} + +/* + * Ok, rename also locks out other renames, as they can change the parent of + * a directory, and we don't want any races. Other races are checked for by + * "do_rename()", which restarts if there are inconsistencies. + * + * Note that there is no race between different filesystems: it's only within + * the same device that races occur: many renames can happen at once, as long + * as they are on different partitions. + */ +int xiafs_rename(struct inode * old_dir, const char * old_name, int old_len, + struct inode * new_dir, const char * new_name, int new_len) +{ + static struct wait_queue * wait = NULL; + static int lock = 0; + int result; + + while (lock) + sleep_on(&wait); + lock = 1; + result = do_xiafs_rename(old_dir, old_name, old_len, + new_dir, new_name, new_len); + lock = 0; + wake_up(&wait); + return result; +} diff --git a/fs/xiafs/symlink.c b/fs/xiafs/symlink.c new file mode 100644 index 000000000..757ad5796 --- /dev/null +++ b/fs/xiafs/symlink.c @@ -0,0 +1,118 @@ +/* + * linux/fs/xiafs/symlink.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/symlink.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <asm/segment.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/xia_fs.h> +#include <linux/stat.h> + +static int +xiafs_readlink(struct inode *, char *, int); + +static int +xiafs_follow_link(struct inode *, struct inode *, int, int, struct inode **); + +/* + * symlinks can't do much... + */ +struct inode_operations xiafs_symlink_inode_operations = { + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + xiafs_readlink, /* readlink */ + xiafs_follow_link, /* follow_link */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static int xiafs_readlink(struct inode * inode, char * buffer, int buflen) +{ + struct buffer_head * bh; + int i; + char c; + + if (!S_ISLNK(inode->i_mode)) { + iput(inode); + return -EINVAL; + } + if (buflen > BLOCK_SIZE) + buflen = BLOCK_SIZE; + bh = xiafs_bread(inode, 0, 0); + if (!IS_RDONLY (inode)) { + inode->i_atime=CURRENT_TIME; + inode->i_dirt=1; + } + iput(inode); + if (!bh) + return 0; + for (i=0; i < buflen && (c=bh->b_data[i]); i++) + put_fs_byte(c, buffer++); + if (i < buflen-1) + put_fs_byte((char)0, buffer); + brelse(bh); + return i; +} + +static int xiafs_follow_link(struct inode * dir, struct inode * inode, + int flag, int mode, struct inode ** res_inode) +{ + int error; + struct buffer_head * bh; + + *res_inode = NULL; + if (!dir) { + dir = current->fs->root; + dir->i_count++; + } + if (!inode) { + iput(dir); + return -ENOENT; + } + if (!S_ISLNK(inode->i_mode)) { + iput(dir); + *res_inode = inode; + return 0; + } + if (!IS_RDONLY (inode)) { + inode->i_atime=CURRENT_TIME; + inode->i_dirt=1; + } + if (current->link_count > 5) { + iput(inode); + iput(dir); + return -ELOOP; + } + if (!(bh = xiafs_bread(inode, 0, 0))) { + iput(inode); + iput(dir); + return -EIO; + } + iput(inode); + current->link_count++; + error = open_namei(bh->b_data,flag,mode,res_inode,dir); + current->link_count--; + brelse(bh); + return error; +} + + + diff --git a/fs/xiafs/truncate.c b/fs/xiafs/truncate.c new file mode 100644 index 000000000..bdb9d39be --- /dev/null +++ b/fs/xiafs/truncate.c @@ -0,0 +1,197 @@ +/* + * linux/fs/xiafs/truncate.c + * + * Copyright (C) Q. Frank Xia, 1993. + * + * Based on Linus' minix/truncate.c + * Copyright (C) Linus Torvalds, 1991, 1992. + * + * This software may be redistributed per Linux Copyright. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/xia_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> + +#include "xiafs_mac.h" + +/* + * Linus' comment: + * + * Truncate has the most races in the whole filesystem: coding it is + * a pain in the a**. Especially as I don't do any locking... + * + * The code may look a bit weird, but that's just because I've tried to + * handle things like file-size changes in a somewhat graceful manner. + * Anyway, truncating a file at the same time somebody else writes to it + * is likely to result in pretty weird behaviour... + * + * The new code handles normal truncates (size = 0) as well as the more + * general case (size = XXX). I hope. + */ + +#define DT_ZONE ((inode->i_size + XIAFS_ZSIZE(inode->i_sb) - 1) \ + >> XIAFS_ZSIZE_BITS(inode->i_sb) ) + +static int trunc_direct(struct inode * inode) +{ + u_long * lp; + struct buffer_head * bh; + int i, tmp; + int retry = 0; + +repeat: + for (i = DT_ZONE ; i < 8 ; i++) { + if (i < DT_ZONE) + goto repeat; + lp=i + inode->u.xiafs_i.i_zone; + if (!(tmp = *lp)) + continue; + bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (i < DT_ZONE) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *lp) + retry = 1; + else { + *lp = 0; + inode->i_dirt = 1; + inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb); + xiafs_free_zone(inode->i_sb, tmp); + } + brelse(bh); + } + return retry; +} + +static int trunc_indirect(struct inode * inode, int addr_off, u_long * lp) +{ + +#define INDT_ZONE (DT_ZONE - addr_off) + + struct buffer_head * bh, * ind_bh; + int i, tmp; + u_long * indp; + int retry = 0; + + if ( !(tmp=*lp) ) + return 0; + ind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (tmp != *lp) { + brelse(ind_bh); + return 1; + } + if (!ind_bh) { + *lp = 0; + return 0; + } +repeat: + for (i = INDT_ZONE<0?0:INDT_ZONE; i < XIAFS_ADDRS_PER_Z(inode->i_sb); i++) { + if (i < INDT_ZONE) + goto repeat; + indp = i+(u_long *) ind_bh->b_data; + if (!(tmp=*indp)) + continue; + bh = getblk(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (i < INDT_ZONE) { + brelse(bh); + goto repeat; + } + if ((bh && bh->b_count != 1) || tmp != *indp) + retry = 1; + else { + *indp = 0; + mark_buffer_dirty(ind_bh, 1); + inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb); + xiafs_free_zone(inode->i_sb, tmp); + } + brelse(bh); + } + indp = (u_long *) ind_bh->b_data; + for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*indp++); i++) ; + if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) { + if (ind_bh->b_count != 1) + retry = 1; + else { + tmp = *lp; + *lp = 0; + inode->i_blocks-= 2 << XIAFS_ZSHIFT(inode->i_sb); + xiafs_free_zone(inode->i_sb, tmp); + } + } + brelse(ind_bh); + return retry; +} + +static int trunc_dindirect(struct inode * inode) +{ + +#define DINDT_ZONE \ + ((DT_ZONE-XIAFS_ADDRS_PER_Z(inode->i_sb)-8)>>XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)) + + int i, tmp; + struct buffer_head * dind_bh; + u_long * dindp, * lp; + int retry = 0; + + lp = &(inode->u.xiafs_i.i_dind_zone); + if (!(tmp = *lp)) + return 0; + dind_bh = bread(inode->i_dev, tmp, XIAFS_ZSIZE(inode->i_sb)); + if (tmp != *lp) { + brelse(dind_bh); + return 1; + } + if (!dind_bh) { + *lp = 0; + return 0; + } +repeat: + for (i=DINDT_ZONE<0?0:DINDT_ZONE ; i < XIAFS_ADDRS_PER_Z(inode->i_sb) ; i ++) { + if (i < DINDT_ZONE) + goto repeat; + dindp = i+(u_long *) dind_bh->b_data; + retry |= trunc_indirect(inode, + 8+((1+i)<<XIAFS_ADDRS_PER_Z_BITS(inode->i_sb)), + dindp); + mark_buffer_dirty(dind_bh, 1); + } + dindp = (u_long *) dind_bh->b_data; + for (i = 0; i < XIAFS_ADDRS_PER_Z(inode->i_sb) && !(*dindp++); i++); + if (i >= XIAFS_ADDRS_PER_Z(inode->i_sb)) { + if (dind_bh->b_count != 1) + retry = 1; + else { + tmp = *lp; + *lp = 0; + inode->i_dirt = 1; + inode->i_blocks-=2 << XIAFS_ZSHIFT(inode->i_sb); + xiafs_free_zone(inode->i_sb, tmp); + } + } + brelse(dind_bh); + return retry; +} + +void xiafs_truncate(struct inode * inode) +{ + int retry; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + while (1) { + retry = trunc_direct(inode); + retry |= trunc_indirect(inode, 8, &(inode->u.xiafs_i.i_ind_zone)); + retry |= trunc_dindirect(inode); + if (!retry) + break; + current->counter = 0; + schedule(); + } + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + inode->i_dirt = 1; +} diff --git a/fs/xiafs/xiafs_mac.h b/fs/xiafs/xiafs_mac.h new file mode 100644 index 000000000..05af6e42a --- /dev/null +++ b/fs/xiafs/xiafs_mac.h @@ -0,0 +1,32 @@ +/* + * linux/fs/xiafs/xiafs_mac.h + * + * Copyright (C) Q. Frank Xia, 1993. + */ + +extern char internal_error_message[]; +#define INTERN_ERR internal_error_message, __FILE__, __LINE__ +#define WHERE_ERR __FILE__, __LINE__ + +#define XIAFS_ZSHIFT(sp) ((sp)->u.xiafs_sb.s_zone_shift) +#define XIAFS_ZSIZE(sp) (BLOCK_SIZE << XIAFS_ZSHIFT(sp)) +#define XIAFS_ZSIZE_BITS(sp) (BLOCK_SIZE_BITS + XIAFS_ZSHIFT(sp)) +#define XIAFS_ADDRS_PER_Z(sp) (BLOCK_SIZE >> (2 - XIAFS_ZSHIFT(sp))) +#define XIAFS_ADDRS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS - 2 + XIAFS_ZSHIFT(sp)) +#define XIAFS_BITS_PER_Z(sp) (BLOCK_SIZE << (3 + XIAFS_ZSHIFT(sp))) +#define XIAFS_BITS_PER_Z_BITS(sp) (BLOCK_SIZE_BITS + 3 + XIAFS_ZSHIFT(sp)) +#define XIAFS_INODES_PER_Z(sp) (_XIAFS_INODES_PER_BLOCK << XIAFS_ZSHIFT(sp)) + +/* Use the most significant bytes of zone pointers to store block counter. */ +/* This is ugly, but it works. Note, We have another 7 bytes for "expansion". */ + +#define XIAFS_GET_BLOCKS(row_ip, blocks) \ + blocks=((((row_ip)->i_zone[0] >> 24) & 0xff )|\ + (((row_ip)->i_zone[1] >> 16) & 0xff00 )|\ + (((row_ip)->i_zone[2] >> 8) & 0xff0000 ) ) + +/* XIAFS_PUT_BLOCKS should be called before saving zone pointers */ +#define XIAFS_PUT_BLOCKS(row_ip, blocks) \ + (row_ip)->i_zone[2]=((blocks)<< 8) & 0xff000000;\ + (row_ip)->i_zone[1]=((blocks)<<16) & 0xff000000;\ + (row_ip)->i_zone[0]=((blocks)<<24) & 0xff000000 |