diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-31 22:22:27 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-31 22:22:27 +0000 |
commit | 825423e4c4f18289df2393951cfd2a7a31fc0464 (patch) | |
tree | 4ad80e981c3d9effa910d2247d118d254f9a5d09 /include/linux | |
parent | c4693dc4856ab907a5c02187a8d398861bebfc7e (diff) |
Merge with Linux 2.4.1.
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/acpi.h | 15 | ||||
-rw-r--r-- | include/linux/blk.h | 6 | ||||
-rw-r--r-- | include/linux/blkdev.h | 28 | ||||
-rw-r--r-- | include/linux/dn.h | 7 | ||||
-rw-r--r-- | include/linux/elevator.h | 78 | ||||
-rw-r--r-- | include/linux/fs.h | 73 | ||||
-rw-r--r-- | include/linux/lvm.h | 67 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 4 | ||||
-rw-r--r-- | include/linux/raid/md_u.h | 1 | ||||
-rw-r--r-- | include/linux/reiserfs_fs.h | 2074 | ||||
-rw-r--r-- | include/linux/reiserfs_fs_i.h | 63 | ||||
-rw-r--r-- | include/linux/reiserfs_fs_sb.h | 398 | ||||
-rw-r--r-- | include/linux/rtc.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 24 | ||||
-rw-r--r-- | include/linux/swap.h | 2 | ||||
-rw-r--r-- | include/linux/vt_kern.h | 2 |
17 files changed, 2675 insertions, 171 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ff1dcaf45..4fce3f847 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -28,21 +28,6 @@ #include <linux/wait.h> #endif /* __KERNEL__ */ -u64 acpi_get_rsdp_ptr(void); - -/* - * System sleep states - */ -enum -{ - ACPI_S0, /* working */ - ACPI_S1, /* sleep */ - ACPI_S2, /* sleep */ - ACPI_S3, /* sleep */ - ACPI_S4, /* non-volatile sleep */ - ACPI_S5, /* soft-off */ -}; - typedef int acpi_sstate_t; /* diff --git a/include/linux/blk.h b/include/linux/blk.h index 11db342a3..3f9c707c7 100644 --- a/include/linux/blk.h +++ b/include/linux/blk.h @@ -87,10 +87,6 @@ void initrd_init(void); static inline void blkdev_dequeue_request(struct request * req) { - if (req->e) { - req->e->dequeue_fn(req); - req->e = NULL; - } list_del(&req->queue); } @@ -322,7 +318,7 @@ static void floppy_off(unsigned int nr); #define DEVICE_NAME "ida" #define TIMEOUT_VALUE (25*HZ) -#define DEVICE_REQUEST do_ida_request0 +#define DEVICE_REQUEST do_ida_request #define DEVICE_NR(device) (MINOR(device) >> 4) #endif /* MAJOR_NR == whatever */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 851bf3c53..01cd38a83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -23,8 +23,6 @@ struct request { int elevator_sequence; struct list_head table; - struct list_head *free_list; - volatile int rq_status; /* should split this into a few status bits */ #define RQ_INACTIVE (-1) #define RQ_ACTIVE 1 @@ -47,7 +45,6 @@ struct request { struct buffer_head * bh; struct buffer_head * bhtail; request_queue_t *q; - elevator_t *e; }; #include <linux/elevator.h> @@ -67,9 +64,10 @@ typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); typedef void (unplug_device_fn) (void *q); /* - * Default nr free requests per queue + * Default nr free requests per queue, ll_rw_blk will scale it down + * according to available RAM at init time */ -#define QUEUE_NR_REQUESTS 256 +#define QUEUE_NR_REQUESTS 8192 struct request_queue { @@ -77,6 +75,8 @@ struct request_queue * the queue request freelist, one for reads and one for writes */ struct list_head request_freelist[2]; + struct list_head pending_freelist[2]; + int pending_free[2]; /* * Together with queue_head for cacheline sharing @@ -116,7 +116,7 @@ struct request_queue * Is meant to protect the queue in the future instead of * io_request_lock */ - spinlock_t request_lock; + spinlock_t queue_lock; /* * Tasks wait here for free request @@ -152,6 +152,7 @@ extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, lon extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); extern void generic_make_request(int rw, struct buffer_head * bh); extern request_queue_t *blk_get_queue(kdev_t dev); +extern inline request_queue_t *__blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); /* @@ -162,6 +163,7 @@ extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_pluggable(request_queue_t *, plug_device_fn *); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; @@ -175,9 +177,10 @@ extern int * max_sectors[MAX_BLKDEV]; extern int * max_segments[MAX_BLKDEV]; -#define MAX_SECTORS 254 +extern atomic_t queued_sectors; -#define MAX_SEGMENTS MAX_SECTORS +#define MAX_SEGMENTS 128 +#define MAX_SECTORS (MAX_SEGMENTS*8) #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) @@ -203,5 +206,14 @@ static inline int get_hardsect_size(kdev_t dev) return 512; } +#define blk_finished_io(nsects) \ + atomic_sub(nsects, &queued_sectors); \ + if (atomic_read(&queued_sectors) < 0) { \ + printk("block: queued_sectors < 0\n"); \ + atomic_set(&queued_sectors, 0); \ + } + +#define blk_started_io(nsects) \ + atomic_add(nsects, &queued_sectors); #endif diff --git a/include/linux/dn.h b/include/linux/dn.h index c7448158b..782cae49e 100644 --- a/include/linux/dn.h +++ b/include/linux/dn.h @@ -45,7 +45,12 @@ #define DSO_LINKINFO 7 /* Set/Get link information */ #define DSO_STREAM 8 /* Set socket type to stream */ #define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */ -#define DSO_MAX 10 /* Maximum option number */ +#define DSO_MAXWINDOW 11 /* Maximum window size allowed */ +#define DSO_NODELAY 12 /* Turn off nagle */ +#define DSO_CORK 13 /* Wait for more data! */ +#define DSO_SERVICES 14 /* NSP Services field */ +#define DSO_INFO 15 /* NSP Info field */ +#define DSO_MAX 15 /* Maximum option number */ /* LINK States */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 16fa9cae6..9e9b27dfa 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -7,34 +7,32 @@ typedef void (elevator_fn) (struct request *, elevator_t *, struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, - struct buffer_head *, int, int *, int *); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, + struct buffer_head *, int, int, int); -typedef void (elevator_dequeue_fn) (struct request *); +typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); + +typedef void (elevator_merge_req_fn) (struct request *, struct request *); struct elevator_s { - int sequence; - int read_latency; int write_latency; - int max_bomb_segments; - unsigned int nr_segments; - int read_pendings; - - elevator_fn * elevator_fn; elevator_merge_fn *elevator_merge_fn; - elevator_dequeue_fn *dequeue_fn; + elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; + elevator_merge_req_fn *elevator_merge_req_fn; unsigned int queue_ID; }; -void elevator_noop(struct request *, elevator_t *, struct list_head *, struct list_head *, int); -int elevator_noop_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *); -void elevator_noop_dequeue(struct request *); -void elevator_linus(struct request *, elevator_t *, struct list_head *, struct list_head *, int); -int elevator_linus_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int); +void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); +void elevator_noop_merge_req(struct request *, struct request *); + +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int); +void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); +void elevator_linus_merge_req(struct request *, struct request *); typedef struct blkelv_ioctl_arg_s { int queue_ID; @@ -69,6 +67,10 @@ extern void elevator_init(elevator_t *, elevator_t); (s1)->sector < (s2)->sector)) || \ (s1)->rq_dev < (s2)->rq_dev) +#define BHRQ_IN_ORDER(bh, rq) \ + (((bh)->b_rdev == (rq)->rq_dev && \ + (bh)->b_rsector < (rq)->sector)) + static inline int elevator_request_latency(elevator_t * elevator, int rw) { int latency; @@ -80,36 +82,24 @@ static inline int elevator_request_latency(elevator_t * elevator, int rw) return latency; } -#define ELEVATOR_NOOP \ -((elevator_t) { \ - 0, /* sequence */ \ - \ - 0, /* read_latency */ \ - 0, /* write_latency */ \ - 0, /* max_bomb_segments */ \ - \ - 0, /* nr_segments */ \ - 0, /* read_pendings */ \ - \ - elevator_noop, /* elevator_fn */ \ - elevator_noop_merge, /* elevator_merge_fn */ \ - elevator_noop_dequeue, /* dequeue_fn */ \ +#define ELEVATOR_NOOP \ +((elevator_t) { \ + 0, /* read_latency */ \ + 0, /* write_latency */ \ + \ + elevator_noop_merge, /* elevator_merge_fn */ \ + elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ + elevator_noop_merge_req, /* elevator_merge_req_fn */ \ }) -#define ELEVATOR_LINUS \ -((elevator_t) { \ - 0, /* not used */ \ - \ - 1000000, /* read passovers */ \ - 2000000, /* write passovers */ \ - 0, /* max_bomb_segments */ \ - \ - 0, /* not used */ \ - 0, /* not used */ \ - \ - elevator_linus, /* elevator_fn */ \ - elevator_linus_merge, /* elevator_merge_fn */ \ - elevator_noop_dequeue, /* dequeue_fn */ \ +#define ELEVATOR_LINUS \ +((elevator_t) { \ + 8192, /* read passovers */ \ + 16384, /* write passovers */ \ + \ + elevator_linus_merge, /* elevator_merge_fn */ \ + elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ + elevator_linus_merge_req, /* elevator_merge_req_fn */ \ }) #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 8032db992..686aef31a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -288,6 +288,7 @@ extern void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long #include <linux/hfs_fs_i.h> #include <linux/adfs_fs_i.h> #include <linux/qnx4_fs_i.h> +#include <linux/reiserfs_fs_i.h> #include <linux/bfs_fs_i.h> #include <linux/udf_fs_i.h> #include <linux/ncp_fs_i.h> @@ -450,6 +451,7 @@ struct inode { struct hfs_inode_info hfs_i; struct adfs_inode_info adfs_i; struct qnx4_inode_info qnx4_i; + struct reiserfs_inode_info reiserfs_i; struct bfs_inode_info bfs_i; struct udf_inode_info udf_i; struct ncp_inode_info ncpfs_i; @@ -460,35 +462,6 @@ struct inode { } u; }; -/* Inode state bits.. */ -#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ -#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ -#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */ -#define I_LOCK 8 -#define I_FREEING 16 -#define I_CLEAR 32 - -#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) - -extern void __mark_inode_dirty(struct inode *, int); -static inline void mark_inode_dirty(struct inode *inode) -{ - if ((inode->i_state & I_DIRTY) != I_DIRTY) - __mark_inode_dirty(inode, I_DIRTY); -} - -static inline void mark_inode_dirty_sync(struct inode *inode) -{ - if (!(inode->i_state & I_DIRTY_SYNC)) - __mark_inode_dirty(inode, I_DIRTY_SYNC); -} - -static inline void mark_inode_dirty_pages(struct inode *inode) -{ - if (inode && !(inode->i_state & I_DIRTY_PAGES)) - __mark_inode_dirty(inode, I_DIRTY_PAGES); -} - struct fown_struct { int pid; /* pid or -pgrp where SIGIO should be sent */ uid_t uid, euid; /* uid/euid of process setting the owner */ @@ -654,6 +627,7 @@ struct quota_mount_options #include <linux/hfs_fs_sb.h> #include <linux/adfs_fs_sb.h> #include <linux/qnx4_fs_sb.h> +#include <linux/reiserfs_fs_sb.h> #include <linux/bfs_fs_sb.h> #include <linux/udf_fs_sb.h> #include <linux/ncp_fs_sb.h> @@ -702,6 +676,7 @@ struct super_block { struct hfs_sb_info hfs_sb; struct adfs_sb_info adfs_sb; struct qnx4_sb_info qnx4_sb; + struct reiserfs_sb_info reiserfs_sb; struct bfs_sb_info bfs_sb; struct udf_sb_info udf_sb; struct ncp_sb_info ncpfs_sb; @@ -815,17 +790,54 @@ struct inode_operations { */ struct super_operations { void (*read_inode) (struct inode *); + + /* reiserfs kludge. reiserfs needs 64 bits of information to + ** find an inode. We are using the read_inode2 call to get + ** that information. We don't like this, and are waiting on some + ** VFS changes for the real solution. + ** iget4 calls read_inode2, iff it is defined + */ + void (*read_inode2) (struct inode *, void *) ; + void (*dirty_inode) (struct inode *); void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); + void (*write_super_lockfs) (struct super_block *); + void (*unlockfs) (struct super_block *); int (*statfs) (struct super_block *, struct statfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); }; +/* Inode state bits.. */ +#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ +#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ +#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */ +#define I_LOCK 8 +#define I_FREEING 16 +#define I_CLEAR 32 + +#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) + +extern void __mark_inode_dirty(struct inode *, int); +static inline void mark_inode_dirty(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY); +} + +static inline void mark_inode_dirty_sync(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY_SYNC); +} + +static inline void mark_inode_dirty_pages(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY_PAGES); +} + struct dquot_operations { void (*initialize) (struct inode *, short); void (*drop) (struct inode *); @@ -988,6 +1000,9 @@ extern int fs_may_remount_ro(struct super_block *); extern int try_to_free_buffers(struct page *, int); extern void refile_buffer(struct buffer_head * buf); +/* reiserfs_writepage needs this */ +extern void set_buffer_async_io(struct buffer_head *bh) ; + #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ diff --git a/include/linux/lvm.h b/include/linux/lvm.h index 7b4ff94f9..4e95eb73d 100644 --- a/include/linux/lvm.h +++ b/include/linux/lvm.h @@ -9,6 +9,7 @@ * May-July 1998 * January-March,July,September,October,Dezember 1999 * January,February,July,November 2000 + * January 2001 * * lvm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,6 +58,8 @@ * 26/06/2000 - implemented snapshot persistency and resizing support * 02/11/2000 - added hash table size member to lv structure * 12/11/2000 - removed unneeded timestamp definitions + * 24/12/2000 - removed LVM_TO_{CORE,DISK}*, use cpu_{from, to}_le* + * instead - Christoph Hellwig * */ @@ -64,11 +67,10 @@ #ifndef _LVM_H_INCLUDE #define _LVM_H_INCLUDE -#define _LVM_KERNEL_H_VERSION "LVM 0.9 (13/11/2000)" +#define _LVM_KERNEL_H_VERSION "LVM 0.9.1_beta2 (18/01/2001)" #include <linux/config.h> #include <linux/version.h> -#include <endian.h> /* * preprocessor definitions @@ -77,8 +79,7 @@ #define LVM_TOTAL_RESET #ifdef __KERNEL__ -#define LVM_GET_INODE -#undef LVM_HD_NAME /* display nice names in /proc/partitions */ +#undef LVM_HD_NAME /* display nice names in /proc/partitions */ /* lots of debugging output (see driver source) #define DEBUG_LVM_GET_INFO @@ -109,6 +110,7 @@ #ifdef __KERNEL__ #include <linux/spinlock.h> + #include <asm/semaphore.h> #endif /* #ifdef __KERNEL__ */ @@ -216,15 +218,13 @@ * * 1K volume group structure ~200 byte * - * 5K time stamp structure ~ - * * 6K namelist of physical volumes 128 byte each * - * 6k + n * 128byte n logical volume structures ~300 byte each + * 6k + n * ~300byte n logical volume structures ~300 byte each * - * + m * 328byte m physical extent alloc. structs 4 byte each + * + m * 4byte m physical extent alloc. structs 4 byte each * - * End of disk - first physical extent typical 4 megabyte + * End of disk - first physical extent typically 4 megabyte * PE total * * PE size * @@ -292,7 +292,7 @@ #define LVM_MAX_PE_SIZE ( 16L * 1024L * 1024L / SECTOR_SIZE * 1024) /* 16GB in sectors */ #define LVM_DEFAULT_PE_SIZE ( 4096L * 1024 / SECTOR_SIZE) /* 4 MB in sectors */ #define LVM_DEFAULT_STRIPE_SIZE 16L /* 16 KB */ -#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE>>9) /* PAGESIZE in sectors */ +#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE/SECTOR_SIZE) /* PAGESIZE in sectors */ #define LVM_MAX_STRIPE_SIZE ( 512L * 1024 / SECTOR_SIZE) /* 512 KB in sectors */ #define LVM_MAX_STRIPES 128 /* max # of stripes */ #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ @@ -326,51 +326,6 @@ COW_table_entries_per_PE - COW_table_chunks_per_PE;}) -/* to disk and to core data conversion macros */ -#if __BYTE_ORDER == __BIG_ENDIAN - -#define LVM_TO_CORE16(x) ( \ - ((uint16_t)((((uint16_t)(x) & 0x00FFU) << 8) | \ - (((uint16_t)(x) & 0xFF00U) >> 8)))) - -#define LVM_TO_DISK16(x) LVM_TO_CORE16(x) - -#define LVM_TO_CORE32(x) ( \ - ((uint32_t)((((uint32_t)(x) & 0x000000FFU) << 24) | \ - (((uint32_t)(x) & 0x0000FF00U) << 8))) \ - (((uint32_t)(x) & 0x00FF0000U) >> 8))) \ - (((uint32_t)(x) & 0xFF000000U) >> 24)))) - -#define LVM_TO_DISK32(x) LVM_TO_CORE32(x) - -#define LVM_TO_CORE64(x) \ - ((uint64_t)((((uint64_t)(x) & 0x00000000000000FFULL) << 56) | \ - (((uint64_t)(x) & 0x000000000000FF00ULL) << 40) | \ - (((uint64_t)(x) & 0x0000000000FF0000ULL) << 24) | \ - (((uint64_t)(x) & 0x00000000FF000000ULL) << 8) | \ - (((uint64_t)(x) & 0x000000FF00000000ULL) >> 8) | \ - (((uint64_t)(x) & 0x0000FF0000000000ULL) >> 24) | \ - (((uint64_t)(x) & 0x00FF000000000000ULL) >> 40) | \ - (((uint64_t)(x) & 0xFF00000000000000ULL) >> 56))) - -#define LVM_TO_DISK64(x) LVM_TO_CORE64(x) - -#elif __BYTE_ORDER == __LITTLE_ENDIAN - -#define LVM_TO_CORE16(x) x -#define LVM_TO_DISK16(x) x -#define LVM_TO_CORE32(x) x -#define LVM_TO_DISK32(x) x -#define LVM_TO_CORE64(x) x -#define LVM_TO_DISK64(x) x - -#else - -#error "__BYTE_ORDER must be defined as __LITTLE_ENDIAN or __BIG_ENDIAN" - -#endif /* #if __BYTE_ORDER == __BIG_ENDIAN */ - - /* * ioctls */ @@ -687,6 +642,8 @@ typedef struct lv_v4 { wait_queue_head_t lv_snapshot_wait; int lv_snapshot_use_rate; void *vg; + + uint lv_allocated_snapshot_le; #else char dummy[200]; #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 245201241..713b01316 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -386,6 +386,7 @@ extern void clear_page_tables(struct mm_struct *, unsigned long, int); struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share); struct file *shmem_file_setup(char * name, loff_t size); +extern void shmem_lock(struct file * file, int lock); extern int shmem_zero_setup(struct vm_area_struct *); extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); @@ -464,6 +465,7 @@ extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); #else #define __GFP_HIGHMEM 0x0 /* noop */ #endif +#define __GFP_VM 0x20 #define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1ee4dd616..4c77c2081 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -74,7 +74,7 @@ struct nfs_readargs { struct nfs_readres { struct nfs_fattr * fattr; - unsigned int count; + __u32 count; int eof; }; @@ -84,7 +84,7 @@ struct nfs_readres { #define NFS_WRITE_MAXIOV 8 struct nfs_writeargs { struct nfs_fh * fh; - __u32 offset; + __u64 offset; __u32 count; enum nfs3_stable_how stable; unsigned int nriov; diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index c96b0e404..22a154380 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -22,6 +22,7 @@ #define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) #define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) #define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) +#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) /* configuration */ #define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h new file mode 100644 index 000000000..277281d6d --- /dev/null +++ b/include/linux/reiserfs_fs.h @@ -0,0 +1,2074 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + + /* this file has an amazingly stupid + name, yura please fix it to be + reiserfs.h, and merge all the rest + of our .h files that are in this + directory into it. */ + + +#ifndef _LINUX_REISER_FS_H +#define _LINUX_REISER_FS_H + + +#include <linux/types.h> +#ifdef __KERNEL__ +#include <linux/malloc.h> +#include <linux/tqueue.h> +#endif + +/* + * include/linux/reiser_fs.h + * + * Reiser File System constants and structures + * + */ + +/* in reading the #defines, it may help to understand that they employ + the following abbreviations: + + B = Buffer + I = Item header + H = Height within the tree (should be changed to LEV) + N = Number of the item in the node + STAT = stat data + DEH = Directory Entry Header + EC = Entry Count + E = Entry number + UL = Unsigned Long + BLKH = BLocK Header + UNFM = UNForMatted node + DC = Disk Child + P = Path + + These #defines are named by concatenating these abbreviations, + where first comes the arguments, and last comes the return value, + of the macro. + +*/ + + /* Vladimir, what is the story with + new_get_new_buffer nowadays? I + want a complete explanation written + here. */ + +/* NEW_GET_NEW_BUFFER will try to allocate new blocks better */ +/*#define NEW_GET_NEW_BUFFER*/ +#define OLD_GET_NEW_BUFFER + + /* Vladimir, what about this one too? */ +/* if this is undefined, all inode changes get into stat data immediately, if it can be found in RAM */ +#define DIRTY_LATER + +/* enable journalling */ +#define ENABLE_JOURNAL + +#ifdef __KERNEL__ + +/* #define REISERFS_CHECK */ + +#define REISERFS_PREALLOCATE +#endif +#define PREALLOCATION_SIZE 8 + +/* if this is undefined, all inode changes get into stat data + immediately, if it can be found in RAM */ +#define DIRTY_LATER + + +/*#define READ_LOCK_REISERFS*/ + + +/* n must be power of 2 */ +#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) + +// to be ok for alpha and others we have to align structures to 8 byte +// boundary. +// FIXME: do not change 4 by anything else: there is code which relies on that + /* what 4? -Hans */ +#define ROUND_UP(x) _ROUND_UP(x,8LL) + +/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug +** messages. +*/ +#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ + +/* + * Disk Data Structures + */ + +/***************************************************************************/ +/* SUPER BLOCK */ +/***************************************************************************/ + +/* + * Structure of super block on disk, a version of which in RAM is often accessed as s->u.reiserfs_sb.s_rs + * the version in RAM is part of a larger structure containing fields never written to disk. + */ + + /* used by gcc */ +#define REISERFS_SUPER_MAGIC 0x52654973 + /* used by file system utilities that + look at the superblock, etc. */ +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" + +extern inline int is_reiserfs_magic_string (struct reiserfs_super_block * rs) +{ + return (!strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, + strlen ( REISERFS_SUPER_MAGIC_STRING)) || + !strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, + strlen ( REISER2FS_SUPER_MAGIC_STRING))); +} + + /* ReiserFS leaves the first 64k unused, + so that partition labels have enough + space. If someone wants to write a + fancy bootloader that needs more than + 64k, let us know, and this will be + increased in size. This number must + be larger than than the largest block + size on any platform, or code will + break. -Hans */ +#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) +#define REISERFS_FIRST_BLOCK unused_define + +/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ +#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) + + +// reiserfs internal error code (used by search_by_key adn fix_nodes)) +#define CARRY_ON 0 +#define REPEAT_SEARCH -1 +#define IO_ERROR -2 +#define NO_DISK_SPACE -3 +#define NO_BALANCING_NEEDED (-4) +#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) + +//#define SCHEDULE_OCCURRED 1 +//#define PATH_INCORRECT 2 + +//#define NO_DISK_SPACE (-1) + + + +typedef unsigned long b_blocknr_t; +typedef __u32 unp_t; + + /* who is responsible for this + completely uncommented struct? */ +struct unfm_nodeinfo { + /* This is what? */ + unp_t unfm_nodenum; + /* now this I know what it is, and + most of the people on our project + know what it is, but I bet nobody + new I hire will have a clue. */ + unsigned short unfm_freespace; +}; + + +/* when reiserfs_file_write is called with a byte count >= MIN_PACK_ON_CLOSE, +** it sets the inode to pack on close, and when extending the file, will only +** use unformatted nodes. +** +** This is a big speed up for the journal, which is badly hurt by direct->indirect +** conversions (they must be logged). +*/ +#define MIN_PACK_ON_CLOSE 512 + +/* the defines below say, that if file size is >= + DIRECT_TAIL_SUPPRESSION_SIZE * blocksize, then if tail is longer + than MAX_BYTES_SUPPRESS_DIRECT_TAIL, it will be stored in + unformatted node */ +#define DIRECT_TAIL_SUPPRESSION_SIZE 1024 +#define MAX_BYTES_SUPPRESS_DIRECT_TAIL 1024 + +#if 0 + +// +#define mark_file_with_tail(inode,offset) \ +{\ +inode->u.reiserfs_i.i_has_tail = 1;\ +} + +#define mark_file_without_tail(inode) \ +{\ +inode->u.reiserfs_i.i_has_tail = 0;\ +} + +#endif + +// this says about version of all items (but stat data) the object +// consists of +#define inode_items_version(inode) ((inode)->u.reiserfs_i.i_version) + + +/* We store tail in unformatted node if it is too big to fit into a + formatted node or if DIRECT_TAIL_SUPPRESSION_SIZE, + MAX_BYTES_SUPPRESS_DIRECT_TAIL and file size say that. */ +/* #define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ */ +/* ( ((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ */ +/* ( ( (n_file_size) >= (n_block_size) * DIRECT_TAIL_SUPPRESSION_SIZE ) && \ */ +/* ( (n_tail_size) >= MAX_BYTES_SUPPRESS_DIRECT_TAIL ) ) ) */ + + /* This is an aggressive tail suppression policy, I am hoping it + improves our benchmarks. The principle behind it is that + percentage space saving is what matters, not absolute space + saving. This is non-intuitive, but it helps to understand it if + you consider that the cost to access 4 blocks is not much more + than the cost to access 1 block, if you have to do a seek and + rotate. A tail risks a non-linear disk access that is + significant as a percentage of total time cost for a 4 block file + and saves an amount of space that is less significant as a + percentage of space, or so goes the hypothesis. -Hans */ +#define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ +(\ + (!(n_tail_size)) || \ + (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ + ( (n_file_size) >= (n_block_size) * 4 ) || \ + ( ( (n_file_size) >= (n_block_size) * 3 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ + ( ( (n_file_size) >= (n_block_size) * 2 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ + ( ( (n_file_size) >= (n_block_size) ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ +) + + +/* + * values for s_state field + */ +#define REISERFS_VALID_FS 1 +#define REISERFS_ERROR_FS 2 + + + +/***************************************************************************/ +/* KEY & ITEM HEAD */ +/***************************************************************************/ + +// +// we do support for old format of reiserfs: the problem is to +// distinuquish keys with 32 bit offset and keys with 60 bit ones. On +// leaf level we use ih_version of struct item_head (was +// ih_reserved). For all old items it is set to 0 +// (ITEM_VERSION_1). For new items it is ITEM_VERSION_2. On internal +// levels we have to know version of item key belongs to. +// +#define ITEM_VERSION_1 0 +#define ITEM_VERSION_2 1 + + +/* loff_t - long long */ + + +// +// directories use this key as well as old files +// +struct offset_v1 { + __u32 k_offset; + __u32 k_uniqueness; +} __attribute__ ((__packed__)); + +struct offset_v2 { + __u64 k_offset:60; + __u64 k_type: 4; +} __attribute__ ((__packed__)); + + + +/* Key of an item determines its location in the S+tree, and + is composed of 4 components */ +struct key { + __u32 k_dir_id; /* packing locality: by default parent + directory object id */ + __u32 k_objectid; /* object identifier */ + union { + struct offset_v1 k_offset_v1; + struct offset_v2 k_offset_v2; + } __attribute__ ((__packed__)) u; +} __attribute__ ((__packed__)); + + +struct cpu_key { + struct key on_disk_key; + int version; + int key_length; /* 3 in all cases but direct2indirect and + indirect2direct conversion */ +}; + + + + + + + + /* Our function for comparing keys can compare keys of different + lengths. It takes as a parameter the length of the keys it is to + compare. These defines are used in determining what is to be + passed to it as that parameter. */ +#define REISERFS_FULL_KEY_LEN 4 + +#define REISERFS_SHORT_KEY_LEN 2 + +/* The result of the key compare */ +#define FIRST_GREATER 1 +#define SECOND_GREATER -1 +#define KEYS_IDENTICAL 0 +#define KEY_FOUND 1 +#define KEY_NOT_FOUND 0 + + +#define KEY_SIZE (sizeof(struct key)) +#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32)) + +/* return values for search_by_key and clones */ +#define ITEM_FOUND 1 +#define ITEM_NOT_FOUND 0 +#define ENTRY_FOUND 1 +#define ENTRY_NOT_FOUND 0 +#define DIRECTORY_NOT_FOUND -1 +#define REGULAR_FILE_FOUND -2 +#define DIRECTORY_FOUND -3 +#define BYTE_FOUND 1 +#define BYTE_NOT_FOUND 0 +#define FILE_NOT_FOUND -1 + +#define POSITION_FOUND 1 +#define POSITION_NOT_FOUND 0 + +// return values for reiserfs_find_entry and search_by_entry_key +#define NAME_FOUND 1 +#define NAME_NOT_FOUND 0 +#define GOTO_PREVIOUS_ITEM 2 +#define NAME_FOUND_INVISIBLE 3 + + + +/* Everything in the filesystem is stored as a set of items. The + item head contains the key of the item, its free space (for + indirect items) and specifies the location of the item itself + within the block. */ + +struct item_head +{ + struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/ + + /* This is bloat, this should be part + of the item not the item + header. -Hans */ + union { + __u16 ih_free_space_reserved; /* The free space in the last unformatted node of an indirect item if this + is an indirect item. This equals 0xFFFF iff this is a direct item or + stat data item. Note that the key, not this field, is used to determine + the item type, and thus which field this union contains. */ + __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory + entries in the directory item. */ + } __attribute__ ((__packed__)) u; + __u16 ih_item_len; /* total size of the item body */ + __u16 ih_item_location; /* an offset to the item body within the block */ + /* I thought we were going to use this + for having lots of item types? Why + don't you use this for item type + not item version. That is how you + talked me into this field a year + ago, remember? I am still not + convinced it needs to be 16 bits + (for at least many years), but at + least I can sympathize with that + hope. Change the name from version + to type, and tell people not to use + FFFF in case 16 bits is someday too + small and needs to be extended:-). */ + __u16 ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all done */ +} __attribute__ ((__packed__)); +/* size of item header */ +#define IH_SIZE (sizeof(struct item_head)) + +#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) +#define ih_version(ih) le16_to_cpu((ih)->ih_version) +#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) +#define ih_location(ih) le16_to_cpu((ih)->ih_item_location) +#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) + +#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) +#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) +#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) +#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) +#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) + + +// FIXME: now would that work for other than i386 archs +#define unreachable_item(ih) (ih->ih_version & (1 << 15)) + +#define get_ih_free_space(ih) (ih_version (ih) == ITEM_VERSION_2 ? 0 : ih_free_space (ih)) +#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == ITEM_VERSION_2) ? 0 : (val))) + + +// +// there are 5 item types currently +// +#define TYPE_STAT_DATA 0 +#define TYPE_INDIRECT 1 +#define TYPE_DIRECT 2 +#define TYPE_DIRENTRY 3 +#define TYPE_ANY 15 // FIXME: comment is required + +// +// in old version uniqueness field shows key type +// +#define V1_SD_UNIQUENESS 0 +#define V1_INDIRECT_UNIQUENESS 0xfffffffe +#define V1_DIRECT_UNIQUENESS 0xffffffff +#define V1_DIRENTRY_UNIQUENESS 500 +#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required + +// +// here are conversion routines +// +extern inline int uniqueness2type (__u32 uniqueness) +{ + switch (uniqueness) { + case V1_SD_UNIQUENESS: return TYPE_STAT_DATA; + case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT; + case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT; + case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY; + } +/* + if (uniqueness != V1_ANY_UNIQUENESS) { + printk ("uniqueness %d\n", uniqueness); + BUG (); + } +*/ + return TYPE_ANY; +} + +extern inline __u32 type2uniqueness (int type) +{ + switch (type) { + case TYPE_STAT_DATA: return V1_SD_UNIQUENESS; + case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS; + case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS; + case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS; + } + /* + if (type != TYPE_ANY) + BUG (); + */ + return V1_ANY_UNIQUENESS; +} + + +// +// key is pointer to on disk key which is stored in le, result is cpu, +// there is no way to get version of object from key, so, provide +// version to these defines +// +extern inline loff_t le_key_k_offset (int version, struct key * key) +{ + return (version == ITEM_VERSION_1) ? key->u.k_offset_v1.k_offset : + le64_to_cpu (key->u.k_offset_v2.k_offset); +} +extern inline loff_t le_ih_k_offset (struct item_head * ih) +{ + return le_key_k_offset (ih_version (ih), &(ih->ih_key)); +} + + +extern inline loff_t le_key_k_type (int version, struct key * key) +{ + return (version == ITEM_VERSION_1) ? uniqueness2type (key->u.k_offset_v1.k_uniqueness) : + le16_to_cpu (key->u.k_offset_v2.k_type); +} +extern inline loff_t le_ih_k_type (struct item_head * ih) +{ + return le_key_k_type (ih_version (ih), &(ih->ih_key)); +} + + +extern inline void set_le_key_k_offset (int version, struct key * key, loff_t offset) +{ + (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_offset = offset) : + (key->u.k_offset_v2.k_offset = cpu_to_le64 (offset)); +} +extern inline void set_le_ih_k_offset (struct item_head * ih, loff_t offset) +{ + set_le_key_k_offset (ih_version (ih), &(ih->ih_key), offset); +} + + + +extern inline void set_le_key_k_type (int version, struct key * key, int type) +{ + (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_uniqueness = type2uniqueness (type)) : + (key->u.k_offset_v2.k_type = cpu_to_le16 (type)); +} +extern inline void set_le_ih_k_type (struct item_head * ih, int type) +{ + set_le_key_k_type (ih_version (ih), &(ih->ih_key), type); +} + + +#define is_direntry_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRENTRY) +#define is_direct_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRECT) +#define is_indirect_le_key(version,key) (le_key_k_type (version, key) == TYPE_INDIRECT) +#define is_statdata_le_key(version,key) (le_key_k_type (version, key) == TYPE_STAT_DATA) + +// +// item header has version. +// +#define is_direntry_le_ih(ih) is_direntry_le_key (ih_version (ih), &((ih)->ih_key)) +#define is_direct_le_ih(ih) is_direct_le_key (ih_version (ih), &((ih)->ih_key)) +#define is_indirect_le_ih(ih) is_indirect_le_key (ih_version(ih), &((ih)->ih_key)) +#define is_statdata_le_ih(ih) is_statdata_le_key (ih_version (ih), &((ih)->ih_key)) + + + +// +// key is pointer to cpu key, result is cpu +// +extern inline loff_t cpu_key_k_offset (struct cpu_key * key) +{ + return (key->version == ITEM_VERSION_1) ? key->on_disk_key.u.k_offset_v1.k_offset : + key->on_disk_key.u.k_offset_v2.k_offset; +} + +extern inline loff_t cpu_key_k_type (struct cpu_key * key) +{ + return (key->version == ITEM_VERSION_1) ? uniqueness2type (key->on_disk_key.u.k_offset_v1.k_uniqueness) : + key->on_disk_key.u.k_offset_v2.k_type; +} + +extern inline void set_cpu_key_k_offset (struct cpu_key * key, loff_t offset) +{ + (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_offset = offset) : + (key->on_disk_key.u.k_offset_v2.k_offset = offset); +} + + +extern inline void set_cpu_key_k_type (struct cpu_key * key, int type) +{ + (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_uniqueness = type2uniqueness (type)) : + (key->on_disk_key.u.k_offset_v2.k_type = type); +} + +extern inline void cpu_key_k_offset_dec (struct cpu_key * key) +{ + if (key->version == ITEM_VERSION_1) + key->on_disk_key.u.k_offset_v1.k_offset --; + else + key->on_disk_key.u.k_offset_v2.k_offset --; +} + + +#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) +#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) +#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) +#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) + + +/* are these used ? */ +#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) +#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) +#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) +#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) + + + + + +#define I_K_KEY_IN_ITEM(p_s_ih, p_s_key, n_blocksize) \ + ( ! COMP_SHORT_KEYS(p_s_ih, p_s_key) && \ + I_OFF_BYTE_IN_ITEM(p_s_ih, k_offset (p_s_key), n_blocksize) ) + +/* maximal length of item */ +#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) +#define MIN_ITEM_LEN 1 + + +/* object identifier for root dir */ +#define REISERFS_ROOT_OBJECTID 2 +#define REISERFS_ROOT_PARENT_OBJECTID 1 +extern struct key root_key; + + + + +/* + * Picture represents a leaf of the S+tree + * ______________________________________________________ + * | | Array of | | | + * |Block | Object-Item | F r e e | Objects- | + * | head | Headers | S p a c e | Items | + * |______|_______________|___________________|___________| + */ + +/* Header of a disk block. More precisely, header of a formatted leaf + or internal node, and not the header of an unformatted node. */ +struct block_head { + __u16 blk_level; /* Level of a block in the tree. */ + __u16 blk_nr_item; /* Number of keys/items in a block. */ + __u16 blk_free_space; /* Block free space in bytes. */ + __u16 blk_reserved; + /* dump this in v4/planA */ + struct key blk_right_delim_key; /* kept only for compatibility */ +}; + +#define BLKH_SIZE (sizeof(struct block_head)) + +/* + * values for blk_level field of the struct block_head + */ + +#define FREE_LEVEL 0 /* when node gets removed from the tree its + blk_level is set to FREE_LEVEL. It is then + used to see whether the node is still in the + tree */ + +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level.*/ + +/* Given the buffer head of a formatted node, resolve to the block head of that node. */ +#define B_BLK_HEAD(p_s_bh) ((struct block_head *)((p_s_bh)->b_data)) +/* Number of items that are in buffer. */ +#define B_NR_ITEMS(p_s_bh) (le16_to_cpu ( B_BLK_HEAD(p_s_bh)->blk_nr_item )) +#define B_LEVEL(bh) (le16_to_cpu ( B_BLK_HEAD(bh)->blk_level )) +#define B_FREE_SPACE(bh) (le16_to_cpu ( B_BLK_HEAD(bh)->blk_free_space )) + +#define PUT_B_NR_ITEMS(p_s_bh) do { B_BLK_HEAD(p_s_bh)->blk_nr_item = cpu_to_le16(val); } while (0) +#define PUT_B_LEVEL(bh, val) do { B_BLK_HEAD(bh)->blk_level = cpu_to_le16(val); } while (0) +#define PUT_B_FREE_SPACE(bh) do { B_BLK_HEAD(bh)->blk_free_space = cpu_to_le16(val); } while (0) + +/* Get right delimiting key. */ +#define B_PRIGHT_DELIM_KEY(p_s_bh) ( &(B_BLK_HEAD(p_s_bh)->blk_right_delim_key) ) + +/* Does the buffer contain a disk leaf. */ +#define B_IS_ITEMS_LEVEL(p_s_bh) ( B_BLK_HEAD(p_s_bh)->blk_level == DISK_LEAF_NODE_LEVEL ) + +/* Does the buffer contain a disk internal node */ +#define B_IS_KEYS_LEVEL(p_s_bh) ( B_BLK_HEAD(p_s_bh)->blk_level > DISK_LEAF_NODE_LEVEL &&\ + B_BLK_HEAD(p_s_bh)->blk_level <= MAX_HEIGHT ) + + + + +/***************************************************************************/ +/* STAT DATA */ +/***************************************************************************/ + + +// +// old stat data is 32 bytes long. We are going to distinguish new one by +// different size +// +struct stat_data_v1 +{ + __u16 sd_mode; /* file type, permissions */ + __u16 sd_nlink; /* number of hard links */ + __u16 sd_uid; /* owner */ + __u16 sd_gid; /* group */ + __u32 sd_size; /* file size */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + union { + __u32 sd_rdev; + __u32 sd_blocks; /* number of blocks file uses */ + } __attribute__ ((__packed__)) u; + __u32 sd_first_direct_byte; /* first byte of file which is stored + in a direct item: except that if it + equals 1 it is a symlink and if it + equals ~(__u32)0 there is no + direct item. The existence of this + field really grates on me. Let's + replace it with a macro based on + sd_size and our tail suppression + policy. Someday. -Hans */ +} __attribute__ ((__packed__)); + +#define SD_V1_SIZE (sizeof(struct stat_data_v1)) + + +/* Stat Data on disk (reiserfs version of UFS disk inode minus the + address blocks) */ +struct stat_data { + __u16 sd_mode; /* file type, permissions */ + __u16 sd_reserved; + __u32 sd_nlink; /* number of hard links */ + __u64 sd_size; /* file size */ + __u32 sd_uid; /* owner */ + __u32 sd_gid; /* group */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + __u32 sd_blocks; + union { + __u32 sd_rdev; + //__u32 sd_first_direct_byte; + /* first byte of file which is stored in a + direct item: except that if it equals 1 + it is a symlink and if it equals + ~(__u32)0 there is no direct item. The + existence of this field really grates + on me. Let's replace it with a macro + based on sd_size and our tail + suppression policy? */ + } __attribute__ ((__packed__)) u; +} __attribute__ ((__packed__)); +// +// this is 40 bytes long +// +#define SD_SIZE (sizeof(struct stat_data)) + +#define stat_data_v1(ih) (ih_version (ih) == ITEM_VERSION_1) + + +/***************************************************************************/ +/* DIRECTORY STRUCTURE */ +/***************************************************************************/ +/* + Picture represents the structure of directory items + ________________________________________________ + | Array of | | | | | | + | directory |N-1| N-2 | .... | 1st |0th| + | entry headers | | | | | | + |_______________|___|_____|________|_______|___| + <---- directory entries ------> + + First directory item has k_offset component 1. We store "." and ".." + in one item, always, we never split "." and ".." into differing + items. This makes, among other things, the code for removing + directories simpler. */ +#define SD_OFFSET 0 +#define SD_UNIQUENESS 0 +#define DOT_OFFSET 1 +#define DOT_DOT_OFFSET 2 +#define DIRENTRY_UNIQUENESS 500 + +/* */ +#define FIRST_ITEM_OFFSET 1 + +/* + Q: How to get key of object pointed to by entry from entry? + + A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key + of object, entry points to */ + +/* NOT IMPLEMENTED: + Directory will someday contain stat data of object */ + + + +struct reiserfs_de_head +{ + __u32 deh_offset; /* third component of the directory entry key */ + __u32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced + by directory entry */ + __u32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ + __u16 deh_location; /* offset of name in the whole item */ + __u16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether + entry is hidden (unlinked) */ +} __attribute__ ((__packed__)); +#define DEH_SIZE sizeof(struct reiserfs_de_head) + +/* empty directory contains two entries "." and ".." and their headers */ +#define EMPTY_DIR_SIZE \ +(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) + +/* old format directories have this size when empty */ +#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) + +#define DEH_Statdata 0 /* not used now */ +#define DEH_Visible 2 + +/* bitops which deals with unaligned addrs; + needed for alpha port. --zam */ +#ifdef __alpha__ +# define ADDR_UNALIGNED_BITS (5) +#endif + +#ifdef ADDR_UNALIGNED_BITS + +# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) +# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) + +# define set_bit_unaligned(nr, addr) set_bit((nr) + unaligned_offset(addr), aligned_address(addr)) +# define clear_bit_unaligned(nr, addr) clear_bit((nr) + unaligned_offset(addr), aligned_address(addr)) +# define test_bit_unaligned(nr, addr) test_bit((nr) + unaligned_offset(addr), aligned_address(addr)) + +#else + +# define set_bit_unaligned(nr, addr) set_bit(nr, addr) +# define clear_bit_unaligned(nr, addr) clear_bit(nr, addr) +# define test_bit_unaligned(nr, addr) test_bit(nr, addr) + +#endif + +#define deh_dir_id(deh) (__le32_to_cpu ((deh)->deh_dir_id)) +#define deh_objectid(deh) (__le32_to_cpu ((deh)->deh_objectid)) +#define deh_offset(deh) (__le32_to_cpu ((deh)->deh_offset)) + + +#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) + +#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) + +/* compose directory item containing "." and ".." entries (entries are + not aligned to 4 byte boundary) */ +extern inline void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid, + __u32 par_dirid, __u32 par_objid) +{ + struct reiserfs_de_head * deh; + + memset (body, 0, EMPTY_DIR_SIZE_V1); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET); + deh[0].deh_dir_id = cpu_to_le32 (dirid); + deh[0].deh_objectid = cpu_to_le32 (objid); + deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE_V1 - strlen (".")); + deh[0].deh_state = 0; + mark_de_visible(&(deh[0])); + + /* direntry header of ".." */ + deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + deh[1].deh_dir_id = cpu_to_le32 (par_dirid); + deh[1].deh_objectid = cpu_to_le32 (par_objid); + deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - strlen ("..")); + deh[1].deh_state = 0; + mark_de_visible(&(deh[1])); + + /* copy ".." and "." */ + memcpy (body + deh[0].deh_location, ".", 1); + memcpy (body + deh[1].deh_location, "..", 2); +} + +/* compose directory item containing "." and ".." entries */ +extern inline void make_empty_dir_item (char * body, __u32 dirid, __u32 objid, + __u32 par_dirid, __u32 par_objid) +{ + struct reiserfs_de_head * deh; + + memset (body, 0, EMPTY_DIR_SIZE); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET); + deh[0].deh_dir_id = cpu_to_le32 (dirid); + deh[0].deh_objectid = cpu_to_le32 (objid); + deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE - ROUND_UP (strlen ("."))); + deh[0].deh_state = 0; + mark_de_visible(&(deh[0])); + + /* direntry header of ".." */ + deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + deh[1].deh_dir_id = cpu_to_le32 (par_dirid); + deh[1].deh_objectid = cpu_to_le32 (par_objid); + deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - ROUND_UP (strlen (".."))); + deh[1].deh_state = 0; + mark_de_visible(&(deh[1])); + + /* copy ".." and "." */ + memcpy (body + deh[0].deh_location, ".", 1); + memcpy (body + deh[1].deh_location, "..", 2); +} + + +/* array of the entry headers */ + /* get item body */ +#define B_I_PITEM(bh,ih) ( (bh)->b_data + (ih)->ih_item_location ) +#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih))) + +/* length of the directory entry in directory item. This define + calculates length of i-th directory entry using directory entry + locations from dir entry head. When it calculates length of 0-th + directory entry, it uses length of whole item in place of entry + location of the non-existent following entry in the calculation. + See picture above.*/ +/* +#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ +((i) ? (((deh)-1)->deh_location - (deh)->deh_location) : ((ih)->ih_item_len) - (deh)->deh_location) +*/ +extern inline int entry_length (struct buffer_head * bh, struct item_head * ih, + int pos_in_item) +{ + struct reiserfs_de_head * deh; + + deh = B_I_DEH (bh, ih) + pos_in_item; + if (pos_in_item) + return (le16_to_cpu ((deh - 1)->deh_location) - le16_to_cpu (deh->deh_location)); + return (le16_to_cpu (ih->ih_item_len) - le16_to_cpu (deh->deh_location)); +} + + + +/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ +#define I_ENTRY_COUNT(ih) ((ih)->u.ih_entry_count) + + +/* name by bh, ih and entry_num */ +#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih->ih_item_location + (B_I_DEH(bh,ih)+(entry_num))->deh_location)) + +// two entries per block (at least) +//#define REISERFS_MAX_NAME_LEN(block_size) +//((block_size - BLKH_SIZE - IH_SIZE - DEH_SIZE * 2) / 2) + +// two entries per block (at least) +#define REISERFS_MAX_NAME_LEN(block_size) 255 + + + + +/* this structure is used for operations on directory entries. It is + not a disk structure. */ +/* When reiserfs_find_entry or search_by_entry_key find directory + entry, they return filled reiserfs_dir_entry structure */ +struct reiserfs_dir_entry +{ + struct buffer_head * de_bh; + int de_item_num; + struct item_head * de_ih; + int de_entry_num; + struct reiserfs_de_head * de_deh; + int de_entrylen; + int de_namelen; + char * de_name; + char * de_gen_number_bit_string; + + __u32 de_dir_id; + __u32 de_objectid; + + struct cpu_key de_entry_key; +}; + +/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ + +/* pointer to file name, stored in entry */ +#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + (deh)->deh_location) + +/* length of name */ +#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ +(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) + + + +/* hash value occupies bits from 7 up to 30 */ +#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) +/* generation number occupies 7 bits starting from 0 up to 6 */ +#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) +#define MAX_GENERATION_NUMBER 127 + +#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) + + +/* + * Picture represents an internal node of the reiserfs tree + * ______________________________________________________ + * | | Array of | Array of | Free | + * |block | keys | pointers | space | + * | head | N | N+1 | | + * |______|_______________|___________________|___________| + */ + +/***************************************************************************/ +/* DISK CHILD */ +/***************************************************************************/ +/* Disk child pointer: The pointer from an internal node of the tree + to a node that is on disk. */ +struct disk_child { + __u32 dc_block_number; /* Disk child's block number. */ + __u16 dc_size; /* Disk child's used space. */ + __u16 dc_reserved; +}; + +#define DC_SIZE (sizeof(struct disk_child)) + +/* Get disk child by buffer header and position in the tree node. */ +#define B_N_CHILD(p_s_bh,n_pos) ((struct disk_child *)\ +((p_s_bh)->b_data+BLKH_SIZE+B_NR_ITEMS(p_s_bh)*KEY_SIZE+DC_SIZE*(n_pos))) + +/* Get disk child number by buffer header and position in the tree node. */ +#define B_N_CHILD_NUM(p_s_bh,n_pos) (le32_to_cpu (B_N_CHILD(p_s_bh,n_pos)->dc_block_number)) +#define PUT_B_N_CHILD_NUM(p_s_bh,n_pos, val) do { B_N_CHILD(p_s_bh,n_pos)->dc_block_number = cpu_to_le32(val); } while (0) + + /* maximal value of field child_size in structure disk_child */ + /* child size is the combined size of all items and their headers */ +#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) + +/* amount of used space in buffer (not including block head) */ +#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) + +/* max and min number of keys in internal node */ +#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) +#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) + +/***************************************************************************/ +/* PATH STRUCTURES AND DEFINES */ +/***************************************************************************/ + + +/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the + key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it + does not find them in the cache it reads them from disk. For each node search_by_key finds using + reiserfs_bread it then uses bin_search to look through that node. bin_search will find the + position of the block_number of the next node if it is looking through an internal node. If it + is looking through a leaf node bin_search will find the position of the item which has key either + equal to given key, or which is the maximal key less than the given key. */ + +struct path_element { + struct buffer_head * pe_buffer; /* Pointer to the buffer at the path in the tree. */ + int pe_position; /* Position in the tree node which is placed in the */ + /* buffer above. */ +}; + +#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ +#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ +#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ + +#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ +#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ + + + +/* We need to keep track of who the ancestors of nodes are. When we + perform a search we record which nodes were visited while + descending the tree looking for the node we searched for. This list + of nodes is called the path. This information is used while + performing balancing. Note that this path information may become + invalid, and this means we must check it when using it to see if it + is still valid. You'll need to read search_by_key and the comments + in it, especially about decrement_counters_in_path(), to understand + this structure. + +Paths make the code so much harder to work with and debug.... An +enormous number of bugs are due to them, and trying to write or modify +code that uses them just makes my head hurt. They are based on an +excessive effort to avoid disturbing the precious VFS code.:-( The +gods only know how we are going to SMP the code that uses them. +znodes are the way! */ + + +struct path { + int path_length; /* Length of the array above. */ + struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ + int pos_in_item; +}; + +#define pos_in_item(path) ((path)->pos_in_item) + +#define INITIALIZE_PATH(var) \ +struct path var = {ILLEGAL_PATH_ELEMENT_OFFSET, } + +/* Get path element by path and path position. */ +#define PATH_OFFSET_PELEMENT(p_s_path,n_offset) ((p_s_path)->path_elements +(n_offset)) + +/* Get buffer header at the path by path and path position. */ +#define PATH_OFFSET_PBUFFER(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_buffer) + +/* Get position in the element at the path by path and path position. */ +#define PATH_OFFSET_POSITION(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_position) + + +#define PATH_PLAST_BUFFER(p_s_path) (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length)) + /* you know, to the person who didn't + write this the macro name does not + at first suggest what it does. + Maybe POSITION_FROM_PATH_END? Or + maybe we should just focus on + dumping paths... -Hans */ +#define PATH_LAST_POSITION(p_s_path) (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length)) + + +#define PATH_PITEM_HEAD(p_s_path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path),PATH_LAST_POSITION(p_s_path)) + +/* in do_balance leaf has h == 0 in contrast with path structure, + where root has level == 0. That is why we need these defines */ +#define PATH_H_PBUFFER(p_s_path, h) PATH_OFFSET_PBUFFER (p_s_path, p_s_path->path_length - (h)) /* tb->S[h] */ +#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ +#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) +#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ + +#define PATH_H_PATH_OFFSET(p_s_path, n_h) ((p_s_path)->path_length - (n_h)) + +#define get_bh(path) PATH_PLAST_BUFFER(path) +#define get_ih(path) PATH_PITEM_HEAD(path) +#define get_item_pos(path) PATH_LAST_POSITION(path) +#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path))) +#define item_moved(ih,path) comp_items(ih, path) +#define path_changed(ih,path) comp_items (ih, path) + + +/***************************************************************************/ +/* MISC */ +/***************************************************************************/ + +/* Size of pointer to the unformatted node. */ +#define UNFM_P_SIZE (sizeof(unp_t)) + +// in in-core inode key is stored on le form +#define INODE_PKEY(inode) ((struct key *)((inode)->u.reiserfs_i.i_key)) +//#define mark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted),1)) +//#define unmark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted), 0)) +//#define is_tail_converted(inode) (atomic_read(&((inode)->u.reiserfs_i.i_converted))) + + + +#define MAX_UL_INT 0xffffffff +#define MAX_INT 0x7ffffff +#define MAX_US_INT 0xffff + +///#define TOO_LONG_LENGTH (~0ULL) + +// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset +#define U32_MAX (~(__u32)0) +extern inline loff_t max_reiserfs_offset (struct inode * inode) +{ + if (inode_items_version (inode) == ITEM_VERSION_1) + return (loff_t)U32_MAX; + + return (loff_t)((~(__u64)0) >> 4); +} + + +/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ +#define MAX_KEY_OBJECTID MAX_UL_INT + + +#define MAX_B_NUM MAX_UL_INT +#define MAX_FC_NUM MAX_US_INT + + +/* the purpose is to detect overflow of an unsigned short */ +#define REISERFS_LINK_MAX (MAX_US_INT - 1000) + + +/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ +#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ +#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ + +#define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter) +#define get_generation(s) atomic_read (&fs_generation(s)) +#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) +#define fs_changed(gen,s) (gen != get_generation (s)) + + +/***************************************************************************/ +/* FIXATE NODES */ +/***************************************************************************/ + +//#define VI_TYPE_STAT_DATA 1 +//#define VI_TYPE_DIRECT 2 +//#define VI_TYPE_INDIRECT 4 +//#define VI_TYPE_DIRECTORY 8 +//#define VI_TYPE_FIRST_DIRECTORY_ITEM 16 +//#define VI_TYPE_INSERTED_DIRECTORY_ITEM 32 + +#define VI_TYPE_LEFT_MERGEABLE 1 +#define VI_TYPE_RIGHT_MERGEABLE 2 + +/* To make any changes in the tree we always first find node, that + contains item to be changed/deleted or place to insert a new + item. We call this node S. To do balancing we need to decide what + we will shift to left/right neighbor, or to a new node, where new + item will be etc. To make this analysis simpler we build virtual + node. Virtual node is an array of items, that will replace items of + node S. (For instance if we are going to delete an item, virtual + node does not contain it). Virtual node keeps information about + item sizes and types, mergeability of first and last items, sizes + of all entries in directory item. We use this array of items when + calculating what we can shift to neighbors and how many nodes we + have to have if we do not any shiftings, if we shift to left/right + neighbor or to both. */ +struct virtual_item +{ + int vi_index; // index in the array of item operations + unsigned short vi_type; // left/right mergeability + unsigned short vi_item_len; /* length of item that it will have after balancing */ + struct item_head * vi_ih; + const char * vi_item; // body of item (old or new) + const void * vi_new_data; // 0 always but paste mode + void * vi_uarea; // item specific area +}; + + +struct virtual_node +{ + char * vn_free_ptr; /* this is a pointer to the free space in the buffer */ + unsigned short vn_nr_item; /* number of items in virtual node */ + short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ + short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ + short vn_affected_item_num; + short vn_pos_in_item; + struct item_head * vn_ins_ih; /* item header of inserted item, 0 for other modes */ + const void * vn_data; + struct virtual_item * vn_vi; /* array of items (including a new one, excluding item to be deleted) */ +}; + + +/***************************************************************************/ +/* TREE BALANCE */ +/***************************************************************************/ + +/* This temporary structure is used in tree balance algorithms, and + constructed as we go to the extent that its various parts are + needed. It contains arrays of nodes that can potentially be + involved in the balancing of node S, and parameters that define how + each of the nodes must be balanced. Note that in these algorithms + for balancing the worst case is to need to balance the current node + S and the left and right neighbors and all of their parents plus + create a new node. We implement S1 balancing for the leaf nodes + and S0 balancing for the internal nodes (S1 and S0 are defined in + our papers.)*/ + +#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ + +/* maximum number of FEB blocknrs on a single level */ +#define MAX_AMOUNT_NEEDED 2 + +/* someday somebody will prefix every field in this struct with tb_ */ +struct tree_balance +{ + int tb_mode; + int need_balance_dirty; + struct super_block * tb_sb; + struct reiserfs_transaction_handle *transaction_handle ; + struct path * tb_path; + struct buffer_head * L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ + struct buffer_head * R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path*/ + struct buffer_head * FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ + struct buffer_head * FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ + struct buffer_head * CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ + struct buffer_head * CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ + + struct buffer_head * FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals + cur_blknum. */ + struct buffer_head * used[MAX_FEB_SIZE]; + struct buffer_head * thrown[MAX_FEB_SIZE]; + int lnum[MAX_HEIGHT]; /* array of number of items which must be + shifted to the left in order to balance the + current node; for leaves includes item that + will be partially shifted; for internal + nodes, it is the number of child pointers + rather than items. It includes the new item + being created. The code sometimes subtracts + one to get the number of wholly shifted + items for other purposes. */ + int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ + int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and + S[h] to its item number within the node CFL[h] */ + int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ + int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from + S[h]. A negative value means removing. */ + int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after + balancing on the level h of the tree. If 0 then S is + being deleted, if 1 then S is remaining and no new nodes + are being created, if 2 or 3 then 1 or 2 new nodes is + being created */ + + /* fields that are used only for balancing leaves of the tree */ + int cur_blknum; /* number of empty blocks having been already allocated */ + int s0num; /* number of items that fall into left most node when S[0] splits */ + int s1num; /* number of items that fall into first new node when S[0] splits */ + int s2num; /* number of items that fall into second new node when S[0] splits */ + int lbytes; /* number of bytes which can flow to the left neighbor from the left */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int rbytes; /* number of bytes which will flow to the right neighbor from the right */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ + /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ + int s2bytes; + struct buffer_head * buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ + char * vn_buf; /* kmalloced memory. Used to create + virtual node and keep map of + dirtied bitmap blocks */ + int vn_buf_size; /* size of the vn_buf */ + struct virtual_node * tb_vn; /* VN starts after bitmap of bitmap blocks */ + + int fs_gen; /* saved value of `reiserfs_generation' counter + see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ +} ; + + +#if 0 + /* when balancing we potentially affect a 3 node wide column of nodes + in the tree (the top of the column may be tapered). C is the nodes + at the center of this column, and L and R are the nodes to the + left and right. */ + struct seal * L_path_seals[MAX_HEIGHT]; + struct seal * C_path_seals[MAX_HEIGHT]; + struct seal * R_path_seals[MAX_HEIGHT]; + char L_path_lock_types[MAX_HEIGHT]; /* 'r', 'w', or 'n' for read, write, or none */ + char C_path_lock_types[MAX_HEIGHT]; + char R_path_lock_types[MAX_HEIGHT]; + + + struct seal_list_elem * C_seal[MAX_HEIGHT]; /* array of seals on nodes in the path */ + struct seal_list_elem * L_seal[MAX_HEIGHT]; /* array of seals on left neighbors of nodes in the path */ + struct seal_list_elem * R_seal[MAX_HEIGHT]; /* array of seals on right neighbors of nodes in the path*/ + struct seal_list_elem * FL_seal[MAX_HEIGHT]; /* array of seals on fathers of the left neighbors */ + struct seal_list_elem * FR_seal[MAX_HEIGHT]; /* array of seals on fathers of the right neighbors */ + struct seal_list_elem * CFL_seal[MAX_HEIGHT]; /* array of seals on common parents of center node and its left neighbor */ + struct seal_list_elem * CFR_seal[MAX_HEIGHT]; /* array of seals on common parents of center node and its right neighbor */ + + struct char C_desired_lock_type[MAX_HEIGHT]; /* 'r', 'w', or 'n' for read, write, or none */ + struct char L_desired_lock_type[MAX_HEIGHT]; + struct char R_desired_lock_type[MAX_HEIGHT]; + struct char FL_desired_lock_type[MAX_HEIGHT]; + struct char FR_desired_lock_type[MAX_HEIGHT]; + struct char CFL_desired_lock_type[MAX_HEIGHT]; + struct char CFR_desired_lock_type[MAX_HEIGHT]; +#endif + + + + + +/* These are modes of balancing */ + +/* When inserting an item. */ +#define M_INSERT 'i' +/* When inserting into (directories only) or appending onto an already + existant item. */ +#define M_PASTE 'p' +/* When deleting an item. */ +#define M_DELETE 'd' +/* When truncating an item or removing an entry from a (directory) item. */ +#define M_CUT 'c' + +/* used when balancing on leaf level skipped (in reiserfsck) */ +#define M_INTERNAL 'n' + +/* When further balancing is not needed, then do_balance does not need + to be called. */ +#define M_SKIP_BALANCING 's' +#define M_CONVERT 'v' + +/* modes of leaf_move_items */ +#define LEAF_FROM_S_TO_L 0 +#define LEAF_FROM_S_TO_R 1 +#define LEAF_FROM_R_TO_L 2 +#define LEAF_FROM_L_TO_R 3 +#define LEAF_FROM_S_TO_SNEW 4 + +#define FIRST_TO_LAST 0 +#define LAST_TO_FIRST 1 + +/* used in do_balance for passing parent of node information that has + been gotten from tb struct */ +struct buffer_info { + struct tree_balance * tb; + struct buffer_head * bi_bh; + struct buffer_head * bi_parent; + int bi_position; +}; + + +/* there are 4 types of items: stat data, directory item, indirect, direct. ++-------------------+------------+--------------+------------+ +| | k_offset | k_uniqueness | mergeable? | ++-------------------+------------+--------------+------------+ +| stat data | 0 | 0 | no | ++-------------------+------------+--------------+------------+ +| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | +| non 1st directory | hash value | | yes | +| item | | | | ++-------------------+------------+--------------+------------+ +| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object ++-------------------+------------+--------------+------------+ +| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object ++-------------------+------------+--------------+------------+ +*/ + +struct item_operations { + int (*bytes_number) (struct item_head * ih, int block_size); + void (*decrement_key) (struct cpu_key *); + int (*is_left_mergeable) (struct key * ih, unsigned long bsize); + void (*print_item) (struct item_head *, char * item); + void (*check_item) (struct item_head *, char * item); + + int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, + int is_affected, int insert_size); + int (*check_left) (struct virtual_item * vi, int free, + int start_skip, int end_skip); + int (*check_right) (struct virtual_item * vi, int free); + int (*part_size) (struct virtual_item * vi, int from, int to); + int (*unit_num) (struct virtual_item * vi); + void (*print_vi) (struct virtual_item * vi); +}; + + +extern struct item_operations stat_data_ops, indirect_ops, direct_ops, + direntry_ops; +extern struct item_operations * item_ops [4]; + +#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) +#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) +#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) +#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) +#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) +#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) +#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) +#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) +#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) +#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) + + + + + +#define COMP_KEYS comp_keys +#define COMP_SHORT_KEYS comp_short_keys +#define keys_of_same_object comp_short_keys + +/*#define COMP_KEYS(p_s_key1, p_s_key2) comp_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2)) +#define COMP_SHORT_KEYS(p_s_key1, p_s_key2) comp_short_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2))*/ + + +/* number of blocks pointed to by the indirect item */ +#define I_UNFM_NUM(p_s_ih) ( (p_s_ih)->ih_item_len / UNFM_P_SIZE ) + +/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ +#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - (ih)->u.ih_free_space : (size)) + +/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ + + +/* get the item header */ +#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get key */ +#define B_N_PDELIM_KEY(bh,item_num) ( (struct key * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get the key */ +#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) ) + +/* get item body */ +#define B_N_PITEM(bh,item_num) ( (bh)->b_data + B_N_PITEM_HEAD((bh),(item_num))->ih_item_location) + +/* get the stat data by the buffer header and the item order */ +#define B_N_STAT_DATA(bh,nr) \ +( (struct stat_data *)((bh)->b_data+B_N_PITEM_HEAD((bh),(nr))->ih_item_location ) ) + + /* following defines use reiserfs buffer header and item header */ + +/* get stat-data */ +#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + (ih)->ih_item_location) ) + +// this is 3976 for size==4096 +#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) + +/* indirect items consist of entries which contain blocknrs, pos + indicates which entry, and B_I_POS_UNFM_POINTER resolves to the + blocknr contained by the entry pos points to */ +#define B_I_POS_UNFM_POINTER(bh,ih,pos) (*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) +#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) + +/* Reiserfs buffer cache statistics. */ +#ifdef REISERFS_CACHE_STAT + struct reiserfs_cache_stat + { + int nr_reiserfs_ll_r_block; /* Number of block reads. */ + int nr_reiserfs_ll_w_block; /* Number of block writes. */ + int nr_reiserfs_schedule; /* Number of locked buffers waits. */ + unsigned long nr_reiserfs_bread; /* Number of calls to reiserfs_bread function */ + unsigned long nr_returns; /* Number of breads of buffers that were hoped to contain a key but did not after bread completed + (usually due to object shifting while bread was executing.) + In the code this manifests as the number + of times that the repeat variable is nonzero in search_by_key.*/ + unsigned long nr_fixed; /* number of calls of fix_nodes function */ + unsigned long nr_failed; /* number of calls of fix_nodes in which schedule occurred while the function worked */ + unsigned long nr_find1; /* How many times we access a child buffer using its direct pointer from an internal node.*/ + unsigned long nr_find2; /* Number of times there is neither a direct pointer to + nor any entry in the child list pointing to the buffer. */ + unsigned long nr_find3; /* When parent is locked (meaning that there are no direct pointers) + or parent is leaf and buffer to be found is an unformatted node. */ + } cache_stat; +#endif + +struct reiserfs_iget4_args { + __u32 objectid ; +} ; + +/***************************************************************************/ +/* FUNCTION DECLARATIONS */ +/***************************************************************************/ + +/*#ifdef __KERNEL__*/ + +/* journal.c see journal.c for all the comments here */ + +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit structs at 4k */ + + +/* first block written in a commit. */ +struct reiserfs_journal_desc { + __u32 j_trans_id ; /* id of commit */ + __u32 j_len ; /* length of commit. len +1 is the commit block */ + __u32 j_mount_id ; /* mount id of this trans*/ + __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */ + char j_magic[12] ; +} ; + +/* last block written in a commit */ +struct reiserfs_journal_commit { + __u32 j_trans_id ; /* must match j_trans_id from the desc block */ + __u32 j_len ; /* ditto */ + __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */ + char j_digest[16] ; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ +} ; + +/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the +** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, +** and this transaction does not need to be replayed. +*/ +struct reiserfs_journal_header { + __u32 j_last_flush_trans_id ; /* id of last fully flushed transaction */ + __u32 j_first_unflushed_offset ; /* offset in the log of where to start replay after a crash */ + __u32 j_mount_id ; +} ; + +/* these are used to keep flush pages that contain converted direct items. +** if the page is not flushed before the transaction that converted it +** is committed, we risk losing data +** +** note, while a page is in this list, its counter is incremented. +*/ +struct reiserfs_page_list { + struct reiserfs_page_list *next ; + struct reiserfs_page_list *prev ; + struct page *page ; + unsigned long blocknr ; /* block number holding converted data */ + + /* if a transaction writer has the page locked the flush_page_list + ** function doesn't need to (and can't) get the lock while flushing + ** the page. do_not_lock needs to be set by anyone who calls journal_end + ** with a page lock held. They have to look in the inode and see + ** if the inode has the page they have locked in the flush list. + ** + ** this sucks. + */ + int do_not_lock ; +} ; + +extern task_queue reiserfs_commit_thread_tq ; +extern wait_queue_head_t reiserfs_commit_thread_wait ; + +/* biggest tunable defines are right here */ +#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ +#define JOURNAL_MAX_BATCH 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ +#define JOURNAL_MAX_COMMIT_AGE 30 +#define JOURNAL_MAX_TRANS_AGE 30 +#define JOURNAL_PER_BALANCE_CNT 12 /* must be >= (5 + 2 * (MAX_HEIGHT-2) + 1) */ + +/* both of these can be as low as 1, or as high as you want. The min is the +** number of 4k bitmap nodes preallocated on mount. New nodes are allocated +** as needed, and released when transactions are committed. On release, if +** the current number of nodes is > max, the node is freed, otherwise, +** it is put on a free list for faster use later. +*/ +#define REISERFS_MIN_BITMAP_NODES 10 +#define REISERFS_MAX_BITMAP_NODES 100 + +#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ +#define JBH_HASH_MASK 8191 + +/* After several hours of tedious analysis, the following hash + * function won. Do not mess with it... -DaveM + */ +#define _jhashfn(dev,block) \ + ((((dev)<<(JBH_HASH_SHIFT - 6)) ^ ((dev)<<(JBH_HASH_SHIFT - 9))) ^ \ + (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) +#define journal_hash(t,dev,block) ((t)[_jhashfn((dev),(block)) & JBH_HASH_MASK]) + +/* finds n'th buffer with 0 being the start of this commit. Needs to go away, j_ap_blocks has changed +** since I created this. One chunk of code in journal.c needs changing before deleting it +*/ +#define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT]) + +void reiserfs_wait_on_write_block(struct super_block *s) ; +void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ; +void reiserfs_allow_writes(struct super_block *s) ; +void reiserfs_check_lock_depth(char *caller) ; +void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ; +void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ; +int journal_init(struct super_block *) ; +int journal_release(struct reiserfs_transaction_handle*, struct super_block *) ; +int journal_release_error(struct reiserfs_transaction_handle*, struct super_block *) ; +int journal_end(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; +int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; +int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; +int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, unsigned long blocknr) ; +int push_journal_writer(char *w) ; +int pop_journal_writer(int windex) ; +int journal_lock_dobalance(struct super_block *p_s_sb) ; +int journal_unlock_dobalance(struct super_block *p_s_sb) ; +int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; +int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, unsigned long bl, int size, int searchall, unsigned long *next) ; +int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; +int journal_join(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; +struct super_block *reiserfs_get_super(kdev_t dev) ; +void flush_async_commits(struct super_block *p_s_sb) ; + +int remove_from_transaction(struct super_block *p_s_sb, unsigned long blocknr, int already_cleaned) ; +int remove_from_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_head *bh, int remove_freed) ; + +int buffer_journaled(struct buffer_head *bh) ; +int mark_buffer_journal_new(struct buffer_head *bh) ; +int reiserfs_sync_all_buffers(kdev_t dev, int wait) ; +int reiserfs_sync_buffers(kdev_t dev, int wait) ; +int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *, + struct inode *, struct buffer_head *) ; +int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *, + struct inode *) ; + +int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, int) ; + + /* why is this kerplunked right here? */ +static inline int reiserfs_buffer_prepared(struct buffer_head *bh) { + if (bh && test_bit(BH_JPrepared, &bh->b_state)) + return 1 ; + else + return 0 ; +} + +/* buffer was journaled, waiting to get to disk */ +static inline int buffer_journal_dirty(struct buffer_head *bh) { + if (bh) + return test_bit(BH_JDirty_wait, &bh->b_state) ; + else + return 0 ; +} +static inline int mark_buffer_notjournal_dirty(struct buffer_head *bh) { + if (bh) + clear_bit(BH_JDirty_wait, &bh->b_state) ; + return 0 ; +} +static inline int mark_buffer_notjournal_new(struct buffer_head *bh) { + if (bh) { + clear_bit(BH_JNew, &bh->b_state) ; + } + return 0 ; +} + +/* objectid.c */ +__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th); +void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, __u32 objectid_to_release); +int reiserfs_convert_objectid_map_v1(struct super_block *) ; + +/* stree.c */ +int B_IS_IN_TREE(struct buffer_head *); +extern inline void copy_key (void * to, void * from); +extern inline void copy_short_key (void * to, void * from); +extern inline void copy_item_head(void * p_v_to, void * p_v_from); + +// first key is in cpu form, second - le +extern inline int comp_keys (struct key * le_key, struct cpu_key * cpu_key); +extern inline int comp_short_keys (struct key * le_key, struct cpu_key * cpu_key); +extern inline void le_key2cpu_key (struct cpu_key * to, struct key * from); + +// both are cpu keys +extern inline int comp_cpu_keys (struct cpu_key *, struct cpu_key *); +extern inline int comp_short_cpu_keys (struct cpu_key *, struct cpu_key *); +extern inline void cpu_key2cpu_key (struct cpu_key *, struct cpu_key *); + +// both are in le form +extern inline int comp_le_keys (struct key *, struct key *); +extern inline int comp_short_le_keys (struct key *, struct key *); + +// +// get key version from on disk key - kludge +// +extern inline int le_key_version (struct key * key) +{ + int type; + + type = le16_to_cpu (key->u.k_offset_v2.k_type); + if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY) + return ITEM_VERSION_1; + + return ITEM_VERSION_2; + +} + + +extern inline void copy_key (void * to, void * from) +{ + memcpy (to, from, KEY_SIZE); +} + + +int comp_items (struct item_head * p_s_ih, struct path * p_s_path); +struct key * get_rkey (struct path * p_s_chk_path, struct super_block * p_s_sb); +inline int bin_search (void * p_v_key, void * p_v_base, int p_n_num, int p_n_width, int * p_n_pos); +int search_by_key (struct super_block *, struct cpu_key *, struct path *, int); +#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) +int search_for_position_by_key (struct super_block * p_s_sb, struct cpu_key * p_s_cpu_key, struct path * p_s_search_path); +extern inline void decrement_bcount (struct buffer_head * p_s_bh); +void decrement_counters_in_path (struct path * p_s_search_path); +void pathrelse (struct path * p_s_search_path); +int reiserfs_check_path(struct path *p) ; +void pathrelse_and_restore (struct super_block *s, struct path * p_s_search_path); + +int reiserfs_insert_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct item_head * ih, const char * body); + +int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + const char * body, int paste_size); + +int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct inode * inode, + struct page *page, + loff_t new_file_size); + +int reiserfs_delete_item (struct reiserfs_transaction_handle *th, + struct path * path, + struct cpu_key * key, + struct inode * inode, + struct buffer_head * p_s_un_bh); + + +void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode); +void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, + struct inode * p_s_inode, struct page *, + int update_timestamps); +// +//void lock_inode_to_convert (struct inode * p_s_inode); +//void unlock_inode_after_convert (struct inode * p_s_inode); +//void increment_i_read_sync_counter (struct inode * p_s_inode); +//void decrement_i_read_sync_counter (struct inode * p_s_inode); + + +#define block_size(inode) ((inode)->i_sb->s_blocksize) +#define file_size(inode) ((inode)->i_size) +#define tail_size(inode) (file_size (inode) & (block_size (inode) - 1)) + +#define tail_has_to_be_packed(inode) (!dont_have_tails ((inode)->i_sb) &&\ +!STORE_TAIL_IN_UNFM(file_size (inode), tail_size(inode), block_size (inode))) + +/* +int get_buffer_by_range (struct super_block * p_s_sb, struct key * p_s_range_begin, struct key * p_s_range_end, + struct buffer_head ** pp_s_buf, unsigned long * p_n_objectid); +int get_buffers_from_range (struct super_block * p_s_sb, struct key * p_s_range_start, struct key * p_s_range_end, + struct buffer_head ** p_s_range_buffers, + int n_max_nr_buffers_to_return); +*/ + +#ifndef REISERFS_FSCK + +//inline int is_left_mergeable (struct item_head * ih, unsigned long bsize); + +#else + +int is_left_mergeable (struct super_block * s, struct path * path); +int is_right_mergeable (struct super_block * s, struct path * path); +int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize); + +#endif +void padd_item (char * item, int total_length, int length); + + +/* inode.c */ + +int reiserfs_prepare_write(struct file *, struct page *, unsigned, unsigned) ; +void reiserfs_truncate_file(struct inode *, int update_timestamps) ; +void make_cpu_key (struct cpu_key * cpu_key, const struct inode * inode, loff_t offset, + int type, int key_length); +void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version, + loff_t offset, int type, int length, int entry_count); +/*void store_key (struct key * key); +void forget_key (struct key * key);*/ +int reiserfs_get_block (struct inode * inode, long block, + struct buffer_head * bh_result, int create); +struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key); +void reiserfs_read_inode (struct inode * inode) ; +void reiserfs_read_inode2(struct inode * inode, void *p) ; +void reiserfs_delete_inode (struct inode * inode); +extern int reiserfs_notify_change(struct dentry * dentry, struct iattr * attr); +void reiserfs_write_inode (struct inode * inode, int) ; + +/* we don't mark inodes dirty, we just log them */ +void reiserfs_dirty_inode (struct inode * inode) ; + +struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, const struct inode * dir, int mode, + const char * symname, int item_len, + struct dentry *dentry, struct inode *inode, int * err); +int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); +void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); +int reiserfs_inode_setattr(struct dentry *, struct iattr * attr); + +/* namei.c */ +inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de); +int search_by_entry_key (struct super_block * sb, struct cpu_key * key, struct path * path, + struct reiserfs_dir_entry * de); +struct dentry * reiserfs_lookup (struct inode * dir, struct dentry *dentry); +int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode); +int reiserfs_mknod (struct inode * dir_inode, struct dentry *dentry, int mode, int rdev); +int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode); +int reiserfs_rmdir (struct inode * dir, struct dentry *dentry); +int reiserfs_unlink (struct inode * dir, struct dentry *dentry); +int reiserfs_symlink (struct inode * dir, struct dentry *dentry, const char * symname); +int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry); +int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry); + +/* super.c */ +inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag); +inline void reiserfs_mark_buffer_clean (struct buffer_head * bh); +void reiserfs_panic (struct super_block * s, const char * fmt, ...); +void reiserfs_write_super (struct super_block * s); +void reiserfs_put_super (struct super_block * s); +int reiserfs_remount (struct super_block * s, int * flags, char * data); +/*int read_super_block (struct super_block * s, int size); +int read_bitmaps (struct super_block * s); +int read_old_bitmaps (struct super_block * s); +int read_old_super_block (struct super_block * s, int size);*/ +struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent); +int reiserfs_statfs (struct super_block * s, struct statfs * buf); + +/* dir.c */ +extern struct inode_operations reiserfs_dir_inode_operations; +extern struct file_operations reiserfs_dir_operations; + +/* tail_conversion.c */ +int direct2indirect (struct reiserfs_transaction_handle *, struct inode *, struct path *, struct buffer_head *, loff_t); +int indirect2direct (struct reiserfs_transaction_handle *, struct inode *, struct page *, struct path *, struct cpu_key *, loff_t, char *); +void reiserfs_unmap_buffer(struct buffer_head *) ; + + +/* file.c */ +extern struct inode_operations reiserfs_file_inode_operations; +extern struct file_operations reiserfs_file_operations; +extern struct address_space_operations reiserfs_address_space_operations ; +int get_new_buffer (struct reiserfs_transaction_handle *th, struct buffer_head *, + struct buffer_head **, struct path *); + + +/* buffer2.c */ +struct buffer_head * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size); +void wait_buffer_until_released (struct buffer_head * bh); +struct buffer_head * reiserfs_bread (kdev_t n_dev, int n_block, int n_size); + + +/* fix_nodes.c */ +void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s); +void reiserfs_kfree (const void * vp, size_t size, struct super_block * s); +int fix_nodes (int n_op_mode, struct tree_balance * p_s_tb, struct item_head * p_s_ins_ih, const void *); +void unfix_nodes (struct tree_balance *); +void free_buffers_in_tb (struct tree_balance * p_s_tb); + + +/* prints.c */ +void reiserfs_panic (struct super_block * s, const char * fmt, ...); +void reiserfs_warning (const char * fmt, ...); +void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...); +void print_virtual_node (struct virtual_node * vn); +void print_indirect_item (struct buffer_head * bh, int item_num); +void store_print_tb (struct tree_balance * tb); +void print_cur_tb (char * mes); +void print_de (struct reiserfs_dir_entry * de); +void print_bi (struct buffer_info * bi, char * mes); +#define PRINT_LEAF_ITEMS 1 /* print all items */ +#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ +#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ +void print_block (struct buffer_head * bh, ...); +void print_path (struct tree_balance * tb, struct path * path); +void print_bmap (struct super_block * s, int silent); +void print_bmap_block (int i, char * data, int size, int silent); +/*void print_super_block (struct super_block * s, char * mes);*/ +void print_objectid_map (struct super_block * s); +void print_block_head (struct buffer_head * bh, char * mes); +void check_leaf (struct buffer_head * bh); +void check_internal (struct buffer_head * bh); +void print_statistics (struct super_block * s); + +/* lbalance.c */ +int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew); +int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes); +int leaf_shift_right (struct tree_balance * tb, int shift_num, int shift_bytes); +void leaf_delete_items (struct buffer_info * cur_bi, int last_first, int first, int del_num, int del_bytes); +void leaf_insert_into_buf (struct buffer_info * bi, int before, + struct item_head * inserted_item_ih, const char * inserted_item_body, int zeros_number); +void leaf_paste_in_buffer (struct buffer_info * bi, int pasted_item_num, + int pos_in_item, int paste_size, const char * body, int zeros_number); +void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, int pos_in_item, + int cut_size); +void leaf_paste_entries (struct buffer_head * bh, int item_num, int before, + int new_entry_count, struct reiserfs_de_head * new_dehs, const char * records, int paste_size); +/* ibalance.c */ +int balance_internal (struct tree_balance * , int, int, struct item_head * , + struct buffer_head **); + +/* do_balance.c */ +inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, + struct buffer_head * bh, int flag); +#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty +#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty + +void do_balance (struct tree_balance * tb, struct item_head * ih, + const char * body, int flag); +void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh); + +int get_left_neighbor_position (struct tree_balance * tb, int h); +int get_right_neighbor_position (struct tree_balance * tb, int h); +void replace_key (struct tree_balance * tb, struct buffer_head *, int, struct buffer_head *, int); +void replace_lkey (struct tree_balance *, int, struct item_head *); +void replace_rkey (struct tree_balance *, int, struct item_head *); +void make_empty_node (struct buffer_info *); +struct buffer_head * get_FEB (struct tree_balance *); + +/* bitmap.c */ +int is_reusable (struct super_block * s, unsigned long block, int bit_value); +void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long); +int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, + unsigned long * pblocknrs, unsigned long start_from, int amount_needed); +int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, + unsigned long * pblocknr, unsigned long start_from); +#ifdef REISERFS_PREALLOCATE +int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, + struct inode * inode, + unsigned long * pblocknr, + unsigned long start_from); + +void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, + struct inode * inode); +#endif + +/* hashes.c */ +__u32 keyed_hash (const char *msg, int len); +__u32 yura_hash (const char *msg, int len); +__u32 r5_hash (const char *msg, int len); + +/* version.c */ +char *reiserfs_get_version_string(void) ; + +/* the ext2 bit routines adjust for big or little endian as +** appropriate for the arch, so in our laziness we use them rather +** than using the bit routines they call more directly. These +** routines must be used when changing on disk bitmaps. */ +#define reiserfs_test_and_set_le_bit ext2_set_bit +#define reiserfs_test_and_clear_le_bit ext2_clear_bit +#define reiserfs_test_le_bit ext2_test_bit +#define reiserfs_find_next_zero_le_bit ext2_find_next_zero_bit + + +// +// this was totally copied from from linux's +// find_first_zero_bit and changed a bit +// + +#ifdef __i386__ + +extern __inline__ int +find_first_nonzero_bit(void * addr, unsigned size) { + int res; + int __d0; + void *__d1; + + + if (!size) { + return (0); + } + __asm__ __volatile__ ( + "cld\n\t" + "xorl %%eax,%%eax\n\t" + "repe; scasl\n\t" + "je 1f\n\t" + "movl -4(%%edi),%%eax\n\t" + "subl $4, %%edi\n\t" + "bsfl %%eax,%%eax\n\t" + "1:\tsubl %%edx,%%edi\n\t" + "shll $3,%%edi\n\t" + "addl %%edi,%%eax" + :"=a" (res), + "=c"(__d0), "=D"(__d1) + :"1" ((size + 31) >> 5), "d" (addr), "2" (addr)); + return (res); +} + +#else /* __i386__ */ + +extern __inline__ int find_next_nonzero_bit(void * addr, unsigned size, unsigned offset) +{ + unsigned int * p = ((unsigned int *) addr) + (offset >> 5); + unsigned int result = offset & ~31UL; + unsigned int tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31UL; + if (offset) { + tmp = *p++; + /* set to zero first offset bits */ + tmp &= ~(~0UL >> (32-offset)); + if (size < 32) + goto found_first; + if (tmp != 0U) + goto found_middle; + size -= 32; + result += 32; + } + while (size >= 32) { + if ((tmp = *p++) != 0U) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = *p; +found_first: +found_middle: + return result + ffs(tmp); +} + +#define find_first_nonzero_bit(addr,size) find_next_nonzero_bit((addr), (size), 0) + +#endif /* 0 */ + +/* sometimes reiserfs_truncate may require to allocate few new blocks + to perform indirect2direct conversion. People probably used to + think, that truncate should work without problems on a filesystem + without free disk space. They may complain that they can not + truncate due to lack of free disk space. This spare space allows us + to not worry about it. 500 is probably too much, but it should be + absolutely safe */ +#define SPARE_SPACE 500 + +extern inline unsigned long reiserfs_get_journal_block(struct super_block *s) { + return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_journal_block) ; +} +extern inline unsigned long reiserfs_get_journal_orig_size(struct super_block *s) { + return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_orig_journal_size) ; +} + +/* prototypes from ioctl.c */ +int reiserfs_ioctl (struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); +int reiserfs_unpack (struct inode * inode, struct file * filp); + +/* ioctl's command */ +#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) + +#endif /* _LINUX_REISER_FS_H */ + + diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h new file mode 100644 index 000000000..3a60b8667 --- /dev/null +++ b/include/linux/reiserfs_fs_i.h @@ -0,0 +1,63 @@ +#ifndef _REISER_FS_I +#define _REISER_FS_I + +/* these are used to keep track of the pages that need +** flushing before the current transaction can commit +*/ +struct reiserfs_page_list ; + +struct reiserfs_inode_info { + __u32 i_key [4];/* key is still 4 32 bit integers */ + + /* this comment will be totally + cryptic to readers not familiar + with 3.5/3.6 format conversion, and + it does not consider that that 3.6 + might not be the last version */ + int i_version; // this says whether file is old or new + + int i_pack_on_close ; // file might need tail packing on close + + __u32 i_first_direct_byte; // offset of first byte stored in direct item. + + /* pointer to the page that must be flushed before + ** the current transaction can commit. + ** + ** this pointer is only used when the tail is converted back into + ** a direct item, or the file is deleted + */ + struct reiserfs_page_list *i_converted_page ; + + /* we save the id of the transaction when we did the direct->indirect + ** conversion. That allows us to flush the buffers to disk + ** without having to update this inode to zero out the converted + ** page variable + */ + int i_conversion_trans_id ; + + /* My guess is this contains the first + unused block of a sequence of + blocks plus the length of the + sequence, which I think is always + at least two at the time of the + preallocation. I really prefer + allocate on flush conceptually..... + + You know, it really annoys me when + code is this badly commented that I + have to guess what it does. + Neither I nor anyone else has time + for guessing what your + datastructures mean. -Hans */ + //For preallocation + int i_prealloc_block; + int i_prealloc_count; + + /* I regret that you think the below + is a comment you should make.... -Hans */ + //nopack-attribute + int nopack; +}; + + +#endif diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h new file mode 100644 index 000000000..a6921dad0 --- /dev/null +++ b/include/linux/reiserfs_fs_sb.h @@ -0,0 +1,398 @@ +/* Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing + * and copyright details */ + +#ifndef _LINUX_REISER_FS_SB +#define _LINUX_REISER_FS_SB + +#ifdef __KERNEL__ +#include <linux/tqueue.h> +#endif + +// +// super block's field values +// +/*#define REISERFS_VERSION 0 undistributed bitmap */ +/*#define REISERFS_VERSION 1 distributed bitmap and resizer*/ +#define REISERFS_VERSION_2 2 /* distributed bitmap, resizer, 64-bit, etc*/ +#define UNSET_HASH 0 // read_super will guess about, what hash names + // in directories were sorted with +#define TEA_HASH 1 +#define YURA_HASH 2 +#define R5_HASH 3 +#define DEFAULT_HASH R5_HASH + +/* this is the on disk super block */ + +struct reiserfs_super_block +{ + __u32 s_block_count; + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + __u32 s_journal_block; /* journal block number */ + __u32 s_journal_dev; /* journal device number */ + + /* Since journal size is currently a #define in a header file, if + ** someone creates a disk with a 16MB journal and moves it to a + ** system with 32MB journal default, they will overflow their journal + ** when they mount the disk. s_orig_journal_size, plus some checks + ** while mounting (inside journal_init) prevent that from happening + */ + + /* great comment Chris. Thanks. -Hans */ + + __u32 s_orig_journal_size; + __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */ + __u32 s_journal_block_count ; /* total size of the journal. can change over time */ + __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_state; /* valid or error */ + char s_magic[12]; /* reiserfs magic string indicates that file system is reiserfs */ + __u32 s_hash_function_code; /* indicate, what hash function is being use to sort names in a directory*/ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ + __u16 s_version; /* I'd prefer it if this was a string, + something like "3.6.4", and maybe + 16 bytes long mostly unused. We + don't need to save bytes in the + superblock. -Hans */ + __u16 s_reserved; + char s_unused[128] ; /* zero filled by mkreiserfs */ +} __attribute__ ((__packed__)); + +#define SB_SIZE (sizeof(struct reiserfs_super_block)) + +/* this is the super from 3.5.X, where X >= 10 */ +struct reiserfs_super_block_v1 +{ + __u32 s_block_count; /* blocks count */ + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + __u32 s_journal_block; /* journal block number */ + __u32 s_journal_dev; /* journal device number */ + __u32 s_orig_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ + __u32 s_journal_trans_max ; /* max number of blocks in a transaction. */ + __u32 s_journal_block_count ; /* total size of the journal. can change over time */ + __u32 s_journal_max_batch ; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_state; /* valid or error */ + char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ + __u32 s_reserved; +} __attribute__ ((__packed__)); + +#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) + +/* LOGGING -- */ + +/* These all interelate for performance. +** +** If the journal block count is smaller than n transactions, you lose speed. +** I don't know what n is yet, I'm guessing 8-16. +** +** typical transaction size depends on the application, how often fsync is +** called, and how many metadata blocks you dirty in a 30 second period. +** The more small files (<16k) you use, the larger your transactions will +** be. +** +** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal +** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough +** to prevent wrapping before dirty meta blocks get to disk. +** +** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal +** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. +** +** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. +** +*/ + +/* don't mess with these for a while */ + /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ +#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ +#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ +#define JOURNAL_TRANS_MAX 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ +#define JOURNAL_HASH_SIZE 8192 +#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ +#define JOURNAL_LIST_COUNT 64 + +/* these are bh_state bit flag offset numbers, for use in the buffer head */ + +#define BH_JDirty 16 /* journal data needs to be written before buffer can be marked dirty */ +#define BH_JDirty_wait 18 /* commit is done, buffer marked dirty */ +#define BH_JNew 19 /* buffer allocated during this transaction, no need to write if freed during this trans too */ + +/* ugly. metadata blocks must be prepared before they can be logged. +** prepared means unlocked and cleaned. If the block is prepared, but not +** logged for some reason, any bits cleared while preparing it must be +** set again. +*/ +#define BH_JPrepared 20 /* block has been prepared for the log */ +#define BH_JRestore_dirty 22 /* restore the dirty bit later */ + +/* One of these for every block in every transaction +** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a +** hash of all the in memory transactions. +** next and prev are used by the current transaction (journal_hash). +** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash +** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging +** to a given transaction. +*/ +struct reiserfs_journal_cnode { + struct buffer_head *bh ; /* real buffer head */ + kdev_t dev ; /* dev of real buffer head */ + unsigned long blocknr ; /* block number of real buffer head, == 0 when buffer on disk */ + int state ; + struct reiserfs_journal_list *jlist ; /* journal list this cnode lives in */ + struct reiserfs_journal_cnode *next ; /* next in transaction list */ + struct reiserfs_journal_cnode *prev ; /* prev in transaction list */ + struct reiserfs_journal_cnode *hprev ; /* prev in hash list */ + struct reiserfs_journal_cnode *hnext ; /* next in hash list */ +}; + +struct reiserfs_bitmap_node { + int id ; + char *data ; + struct list_head list ; +} ; + +struct reiserfs_list_bitmap { + struct reiserfs_journal_list *journal_list ; + struct reiserfs_bitmap_node **bitmaps ; +} ; + +/* +** transaction handle which is passed around for all journal calls +*/ +struct reiserfs_transaction_handle { + /* ifdef it. -Hans */ + char *t_caller ; /* debugging use */ + int t_blocks_logged ; /* number of blocks this writer has logged */ + int t_blocks_allocated ; /* number of blocks this writer allocated */ + unsigned long t_trans_id ; /* sanity check, equals the current trans id */ + struct super_block *t_super ; /* super for this FS when journal_begin was + called. saves calls to reiserfs_get_super */ + +} ; + +/* +** one of these for each transaction. The most important part here is the j_realblock. +** this list of cnodes is used to hash all the blocks in all the commits, to mark all the +** real buffer heads dirty once all the commits hit the disk, +** and to make sure every real block in a transaction is on disk before allowing the log area +** to be overwritten */ +struct reiserfs_journal_list { + unsigned long j_start ; + unsigned long j_len ; + atomic_t j_nonzerolen ; + atomic_t j_commit_left ; + atomic_t j_flushing ; + atomic_t j_commit_flushing ; + atomic_t j_older_commits_done ; /* all commits older than this on disk*/ + unsigned long j_trans_id ; + time_t j_timestamp ; + struct reiserfs_list_bitmap *j_list_bitmap ; + struct buffer_head *j_commit_bh ; /* commit buffer head */ + struct reiserfs_journal_cnode *j_realblock ; + struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */ + wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */ + wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */ +} ; + +struct reiserfs_page_list ; /* defined in reiserfs_fs.h */ + +struct reiserfs_journal { + struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */ + struct reiserfs_journal_cnode *j_last ; /* newest journal block */ + struct reiserfs_journal_cnode *j_first ; /* oldest journal block. start here for traverse */ + + int j_state ; + unsigned long j_trans_id ; + unsigned long j_mount_id ; + unsigned long j_start ; /* start of current waiting commit (index into j_ap_blocks) */ + unsigned long j_len ; /* lenght of current waiting commit */ + unsigned long j_len_alloc ; /* number of buffers requested by journal_begin() */ + atomic_t j_wcount ; /* count of writers for current commit */ + unsigned long j_bcount ; /* batch count. allows turning X transactions into 1 */ + unsigned long j_first_unflushed_offset ; /* first unflushed transactions offset */ + unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */ + struct buffer_head *j_header_bh ; + + /* j_flush_pages must be flushed before the current transaction can + ** commit + */ + struct reiserfs_page_list *j_flush_pages ; + time_t j_trans_start_time ; /* time this transaction started */ + wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */ + atomic_t j_wlock ; /* lock for j_wait */ + wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */ + atomic_t j_jlock ; /* lock for j_join_wait */ + int j_journal_list_index ; /* journal list number of the current trans */ + int j_list_bitmap_index ; /* number of next list bitmap to use */ + int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */ + int j_next_full_flush ; /* next journal_end will flush all journal list */ + int j_next_async_flush ; /* next journal_end will flush all async commits */ + + int j_cnode_used ; /* number of cnodes on the used list */ + int j_cnode_free ; /* number of cnodes on the free list */ + + struct reiserfs_journal_cnode *j_cnode_free_list ; + struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */ + + int j_free_bitmap_nodes ; + int j_used_bitmap_nodes ; + struct list_head j_bitmap_nodes ; + struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */ + struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ; /* array of all the journal lists */ + struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */ + struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all + the transactions */ +}; + +#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ + + +typedef __u32 (*hashf_t) (const char *, int); + +/* reiserfs union of in-core super block data */ +struct reiserfs_sb_info +{ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + /* both the comment and the choice of + name are unclear for s_rs -Hans */ + struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_ap_bitmap; /* array of buffers, holding block bitmap */ + struct reiserfs_journal *s_journal ; /* pointer to journal information */ + unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ + + /* Comment? -Hans */ + void (*end_io_handler)(struct buffer_head *, int); + hashf_t s_hash_function; /* pointer to function which is used + to sort names in directory. Set on + mount */ + unsigned long s_mount_opt; /* reiserfs's mount options are set + here (currently - NOTAIL, NOLOG, + REPLAYONLY) */ + + /* Comment? -Hans */ + wait_queue_head_t s_wait; + /* To be obsoleted soon by per buffer seals.. -Hans */ + atomic_t s_generation_counter; // increased by one every time the + // tree gets re-balanced + + /* session statistics */ + int s_kmallocs; + int s_disk_reads; + int s_disk_writes; + int s_fix_nodes; + int s_do_balance; + int s_unneeded_left_neighbor; + int s_good_search_by_key_reada; + int s_bmaps; + int s_bmaps_without_search; + int s_direct2indirect; + int s_indirect2direct; +}; + + +#define NOTAIL 0 /* -o notail: no tails will be created in a session */ +#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */ +#define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */ +#define REISERFS_CONVERT 5 /* -o conv: causes conversion of old + format super block to the new + format. If not specified - old + partition will be dealt with in a + manner of 3.5.x */ + +/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting +** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option +** is not required. If the normal autodection code can't determine which +** hash to use (because both hases had the same value for a file) +** use this option to force a specific hash. It won't allow you to override +** the existing hash on the FS, so if you have a tea hash disk, and mount +** with -o hash=rupasov, the mount will fail. +*/ +#define FORCE_TEA_HASH 6 /* try to force tea hash on mount */ +#define FORCE_RUPASOV_HASH 7 /* try to force rupasov hash on mount */ +#define FORCE_R5_HASH 8 /* try to force rupasov hash on mount */ +#define FORCE_HASH_DETECT 9 /* try to detect hash function on mount */ + + +/* used for testing experimental features, makes benchmarking new + features with and without more convenient, should never be used by + users in any code shipped to users (ideally) */ + +#define REISERFS_NO_BORDER 11 +#define REISERFS_NO_UNHASHED_RELOCATION 12 +#define REISERFS_HASHED_RELOCATION 13 +#define REISERFS_TEST4 14 + +#define REISERFS_TEST1 11 +#define REISERFS_TEST2 12 +#define REISERFS_TEST3 13 +#define REISERFS_TEST4 14 + +#define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH)) +#define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH)) +#define reiserfs_tea_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_TEA_HASH)) +#define reiserfs_hash_detect(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_HASH_DETECT)) +#define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER)) +#define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) +#define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) +#define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4)) + +#define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL)) +#define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY)) +#define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG)) +#define old_format_only(s) ((SB_VERSION(s) != REISERFS_VERSION_2) && !((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_CONVERT))) + + +void reiserfs_file_buffer (struct buffer_head * bh, int list); +int reiserfs_is_super(struct super_block *s) ; +int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; +int flush_old_commits(struct super_block *s, int) ; +int show_reiserfs_locks(void) ; +int reiserfs_resize(struct super_block *, unsigned long) ; + +#define CARRY_ON 0 +#define SCHEDULE_OCCURRED 1 + + +#define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh) +#define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal) +#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list) +#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) +#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) +#define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap) + + +// on-disk super block fields converted to cpu form +#define SB_DISK_SUPER_BLOCK(s) ((s)->u.reiserfs_sb.s_rs) +#define SB_BLOCK_COUNT(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_block_count)) +#define SB_FREE_BLOCKS(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_free_blocks)) +#define SB_REISERFS_MAGIC(s) (SB_DISK_SUPER_BLOCK(s)->s_magic) +#define SB_ROOT_BLOCK(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_root_block)) +#define SB_TREE_HEIGHT(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_tree_height)) +#define SB_REISERFS_STATE(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_state)) +#define SB_VERSION(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_version)) +#define SB_BMAP_NR(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_bmap_nr)) + +#define PUT_SB_BLOCK_COUNT(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) +#define PUT_SB_FREE_BLOCKS(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) +#define PUT_SB_ROOT_BLOCK(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) +#define PUT_SB_TREE_HEIGHT(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) +#define PUT_SB_REISERFS_STATE(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_state = cpu_to_le16(val); } while (0) +#define PUT_SB_VERSION(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) +#define PUT_SB_BMAP_NR(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) + +#endif /* _LINUX_REISER_FS_SB */ + + + diff --git a/include/linux/rtc.h b/include/linux/rtc.h index fba9111da..df958b66a 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -8,7 +8,7 @@ * Copyright (C) 1999 Hewlett-Packard Co. * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> */ -#ifndef _LINUX_RTC_H +#ifndef _LINUX_RTC_H_ #define _LINUX_RTC_H_ /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 25196fc53..a382b6736 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -219,13 +219,14 @@ struct mm_struct { unsigned long rss, total_vm, locked_vm; unsigned long def_flags; unsigned long cpu_vm_mask; - unsigned long swap_cnt; /* number of pages to swap on next pass */ unsigned long swap_address; /* Architecture-specific MM context */ mm_context_t context; }; +extern int mmlist_nr; + #define INIT_MM(name) \ { \ mmap: &init_mmap, \ @@ -542,8 +543,8 @@ extern unsigned long prof_shift; #define CURRENT_TIME (xtime.tv_sec) -extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)); -extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)); +extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr)); +extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); extern void FASTCALL(sleep_on(wait_queue_head_t *q)); extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, signed long timeout)); @@ -552,12 +553,17 @@ extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout)); extern void FASTCALL(wake_up_process(struct task_struct * tsk)); -#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE) -#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,0) -#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE) -#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE) -#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE,0) -#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE) +#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) +#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) +#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0) +#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) +#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) +#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1) +#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr) +#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0) +#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1) +#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr) +asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); diff --git a/include/linux/swap.h b/include/linux/swap.h index e001de887..afb1d96b3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -107,7 +107,7 @@ extern wait_queue_head_t kreclaimd_wait; extern int page_launder(int, int); extern int free_shortage(void); extern int inactive_shortage(void); -extern void wakeup_kswapd(int); +extern void wakeup_kswapd(void); extern int try_to_free_pages(unsigned int gfp_mask); /* linux/mm/page_io.c */ diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 80feae4c7..f24e4de6b 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -30,7 +30,7 @@ extern struct vt_struct { wait_queue_head_t paste_wait; } *vt_cons[MAX_NR_CONSOLES]; -void (*kd_mksound)(unsigned int hz, unsigned int ticks); +extern void (*kd_mksound)(unsigned int hz, unsigned int ticks); /* console.c */ |