38 files changed, 3931 insertions, 475 deletions
diff --git a/drivers/block/Config.in b/drivers/block/Config.in
index 79bd8078e..01fc28943 100644
--- a/drivers/block/Config.in
+++ b/drivers/block/Config.in
@@ -206,8 +206,8 @@ bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD
 if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then
    tristate '  Linear (append) mode' CONFIG_MD_LINEAR
    tristate '  RAID-0 (striping) mode' CONFIG_MD_STRIPED
-   tristate '  RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING
-   tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5
+#   tristate '  RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING
+#   tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5
 fi
 if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then
    bool '    Boot support (linear, striped)' CONFIG_MD_BOOT
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 45e86000a..d912f8c08 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1009,37 +1009,58 @@ static boolean DAC960_ReportDeviceConfiguration(DAC960_Controller_T *Controller)
 }
 
 
-static int DAC_merge_fn(request_queue_t *q, struct request *req, 
-			struct buffer_head *bh) 
+static inline int DAC_new_segment(request_queue_t *q, struct request *req,
+				  int __max_segments)
 {
 	int max_segments;
 	DAC960_Controller_T * Controller = q->queuedata;
 
 	max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)];
+	if (__max_segments < max_segments)
+		max_segments = __max_segments;
 
-	if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) {
-		if (req->nr_segments < max_segments) {
-			req->nr_segments++;
-			return 1;
-		}
-		return 0;
+	if (req->nr_segments < max_segments) {
+		req->nr_segments++;
+		q->nr_segments++;
+		return 1;
 	}
+	return 0;
+}
 
-	return 1;
+static int DAC_back_merge_fn(request_queue_t *q, struct request *req, 
+			     struct buffer_head *bh, int __max_segments)
+{
+	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+		return 1;
+	return DAC_new_segment(q, req, __max_segments);
+}
+
+static int DAC_front_merge_fn(request_queue_t *q, struct request *req, 
+			      struct buffer_head *bh, int __max_segments)
+{
+	if (bh->b_data + bh->b_size == req->bh->b_data)
+		return 1;
+	return DAC_new_segment(q, req, __max_segments);
 }
 
 static int DAC_merge_requests_fn(request_queue_t *q,
 				 struct request *req,
-				 struct request *next)
+				 struct request *next,
+				 int __max_segments)
 {
 	int max_segments;
 	DAC960_Controller_T * Controller = q->queuedata;
 	int total_segments = req->nr_segments + next->nr_segments;
 
 	max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)];
+	if (__max_segments < max_segments)
+		max_segments = __max_segments;
 
 	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	{
 		total_segments--;
+		q->nr_segments--;
+	}
     
 	if (total_segments > max_segments)
 		return 0;
@@ -1068,7 +1089,7 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
   /*
     Register the Block Device Major Number for this DAC960 Controller.
   */
-  if (register_blkdev(MajorNumber, "rd", &DAC960_FileOperations) < 0)
+  if (devfs_register_blkdev(MajorNumber, "dac960", &DAC960_FileOperations) < 0)
     {
       DAC960_Error("UNABLE TO ACQUIRE MAJOR NUMBER %d - DETACHING\n",
 		   Controller, MajorNumber);
@@ -1080,7 +1101,8 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
   q = BLK_DEFAULT_QUEUE(MajorNumber);
   blk_init_queue(q, RequestFunctions[Controller->ControllerNumber]);
   blk_queue_headactive(q, 0);
-  q->merge_fn = DAC_merge_fn;
+  q->back_merge_fn = DAC_back_merge_fn;
+  q->front_merge_fn = DAC_front_merge_fn;
   q->merge_requests_fn = DAC_merge_requests_fn;
   q->queuedata = (void *) Controller;
 
@@ -1108,12 +1130,13 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
     Complete initialization of the Generic Disk Information structure.
   */
   Controller->GenericDiskInfo.major = MajorNumber;
-  Controller->GenericDiskInfo.major_name = "rd";
+  Controller->GenericDiskInfo.major_name = "dac960";
   Controller->GenericDiskInfo.minor_shift = DAC960_MaxPartitionsBits;
   Controller->GenericDiskInfo.max_p = DAC960_MaxPartitions;
   Controller->GenericDiskInfo.nr_real = Controller->LogicalDriveCount;
   Controller->GenericDiskInfo.real_devices = Controller;
   Controller->GenericDiskInfo.next = NULL;
+  Controller->GenericDiskInfo.fops = &DAC960_FileOperations;
   /*
     Install the Generic Disk Information structure at the end of the list.
   */
@@ -1142,7 +1165,7 @@ static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller)
   /*
     Unregister the Block Device Major Number for this DAC960 Controller.
   */
-  unregister_blkdev(MajorNumber, "rd");
+  devfs_unregister_blkdev(MajorNumber, "dac960");
   /*
     Remove the I/O Request Function.
   */
@@ -1156,7 +1179,6 @@ static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller)
   blk_size[MajorNumber] = NULL;
   blksize_size[MajorNumber] = NULL;
   max_sectors[MajorNumber] = NULL;
-  max_segments[MajorNumber] = NULL;
   /*
     Remove the Generic Disk Information structure from the list.
   */
@@ -1305,15 +1327,17 @@ static int DAC960_Finalize(NotifierBlock_T *NotifierBlock,
 static boolean DAC960_ProcessRequest(DAC960_Controller_T *Controller,
 				     boolean WaitForCommand)
 {
-  IO_Request_T **RequestQueuePointer =
-    &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.current_request;
+  struct list_head * queue_head;
   IO_Request_T *Request;
   DAC960_Command_T *Command;
   char *RequestBuffer;
+
+  queue_head = &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.queue_head;
   while (true)
     {
-      Request = *RequestQueuePointer;
-      if (Request == NULL || Request->rq_status == RQ_INACTIVE) return false;
+      if (list_empty(queue_head)) return false;
+      Request = blkdev_entry_next_request(queue_head);
+      if (Request->rq_status == RQ_INACTIVE) return false;
       Command = DAC960_AllocateCommand(Controller);
       if (Command != NULL) break;
       if (!WaitForCommand) return false;
@@ -1335,7 +1359,7 @@ static boolean DAC960_ProcessRequest(DAC960_Controller_T *Controller,
   Command->BufferHeader = Request->bh;
   RequestBuffer = Request->buffer;
   Request->rq_status = RQ_INACTIVE;
-  *RequestQueuePointer = Request->next;
+  blkdev_dequeue_request(Request);
   wake_up(&wait_for_request);
   if (Command->SegmentCount == 1)
     {
@@ -2565,8 +2589,8 @@ static int DAC960_IOCTL(Inode_T *Inode, File_T *File,
 		      (long *) Argument);
     case BLKRAGET:
       /* Get Read-Ahead. */
-      if ((int *) Argument == NULL) return -EINVAL;
-      return put_user(read_ahead[MAJOR(Inode->i_rdev)], (int *) Argument);
+      if ((long *) Argument == NULL) return -EINVAL;
+      return put_user(read_ahead[MAJOR(Inode->i_rdev)], (long *) Argument);
     case BLKRASET:
       /* Set Read-Ahead. */
       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 3f9c5f85b..9f313de8f 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -326,6 +326,14 @@ else
   endif
 endif
 
+ifeq ($(CONFIG_BLK_DEV_LVM),y)
+L_OBJS += lvm.o lvm-snap.o
+else
+   ifeq ($(CONFIG_BLK_DEV_LVM),m)
+   M_OBJS += lvm-mod.o
+   endif
+endif
+
 ifeq ($(CONFIG_BLK_DEV_MD),y)
 LX_OBJS += md.o
 
@@ -354,13 +362,9 @@ else
 endif
 
 ifeq ($(CONFIG_MD_RAID5),y)
-LX_OBJS += xor.o
-CFLAGS_xor.o := $(PROFILING) -fomit-frame-pointer
 L_OBJS += raid5.o
 else
   ifeq ($(CONFIG_MD_RAID5),m)
-  LX_OBJS += xor.o
-  CFLAGS_xor.o := $(PROFILING) -fomit-frame-pointer
   M_OBJS += raid5.o
   endif
 endif
@@ -407,3 +411,6 @@ ide-mod.o: ide.o ide-features.o $(IDE_OBJS)
 
 ide-probe-mod.o: ide-probe.o ide-geometry.o
 	$(LD) $(LD_RFLAG) -r -o $@ ide-probe.o ide-geometry.o
+
+lvm-mod.o:	lvm.o lvm-snap.o
+	$(LD) -r -o $@ lvm.o lvm-snap.o
diff --git a/drivers/block/README.lvm b/drivers/block/README.lvm
new file mode 100644
index 000000000..3d652457f
--- /dev/null
+++ b/drivers/block/README.lvm
@@ -0,0 +1,8 @@
+
+This is the Logical Volume Manager driver for Linux,
+
+Tools, library that manage logical volumes can be found
+at <http://linux.msede.com/lvm>.
+
+There you can obtain actual driver versions too.
+
diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index ef9e3fa7c..f2a102cf2 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -54,6 +54,7 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/genhd.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <linux/major.h>
@@ -769,7 +770,7 @@ static void unexpected_acsi_interrupt( void )
 static void bad_rw_intr( void )
 
 {
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 
 	if (++CURRENT->errors >= MAX_ERRORS)
@@ -843,7 +844,7 @@ static void acsi_times_out( unsigned long dummy )
 
 	DEVICE_INTR = NULL;
 	printk( KERN_ERR "ACSI timeout\n" );
-	if (!CURRENT) return;
+	if (QUEUE_EMPTY) return;
 	if (++CURRENT->errors >= MAX_ERRORS) {
 #ifdef DEBUG
 		printk( KERN_ERR "ACSI: too many errors.\n" );
@@ -953,7 +954,7 @@ static void redo_acsi_request( void )
 	unsigned long		pbuffer;
 	struct buffer_head	*bh;
 	
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) {
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) {
 		if (!DEVICE_INTR) {
 			ENABLE_IRQ();
 			stdma_release();
@@ -969,7 +970,7 @@ static void redo_acsi_request( void )
 	/* Another check here: An interrupt or timer event could have
 	 * happened since the last check!
 	 */
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) {
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) {
 		if (!DEVICE_INTR) {
 			ENABLE_IRQ();
 			stdma_release();
@@ -979,7 +980,7 @@ static void redo_acsi_request( void )
 	if (DEVICE_INTR)
 		return;
 
-	if (!CURRENT) {
+	if (QUEUE_EMPTY) {
 		CLEAR_INTR;
 		ENABLE_IRQ();
 		stdma_release();
@@ -1385,6 +1386,8 @@ static int acsi_mode_sense( int target, int lun, SENSE_DATA *sd )
  ********************************************************************/
 
 
+extern struct block_device_operations acsi_fops;
+
 static struct gendisk acsi_gendisk = {
 	MAJOR_NR,		/* Major number */	
 	"ad",			/* Major name */
@@ -1394,7 +1397,8 @@ static struct gendisk acsi_gendisk = {
 	acsi_sizes,		/* block sizes */
 	0,			/* number */
 	(void *)acsi_info,	/* internal */
-	NULL			/* next */
+	NULL,			/* next */
+	&acsi_fops,		/* file operations */
 };
 	
 #define MAX_SCSI_DEVICE_CODE 10
@@ -1776,16 +1780,14 @@ int acsi_init( void )
 	int err = 0;
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ACSI))
 		return 0;
-
-	if (register_blkdev( MAJOR_NR, "ad", &acsi_fops )) {
+	if (devfs_register_blkdev( MAJOR_NR, "ad", &acsi_fops )) {
 		printk( KERN_ERR "Unable to get major %d for ACSI\n", MAJOR_NR );
 		return -EBUSY;
 	}
-
 	if (!(acsi_buffer =
 		  (char *)atari_stram_alloc( ACSI_BUFFER_SIZE, NULL, "acsi" ))) {
 		printk( KERN_ERR "Unable to get ACSI ST-Ram buffer.\n" );
-		unregister_blkdev( MAJOR_NR, "ad" );
+		devfs_unregister_blkdev( MAJOR_NR, "ad" );
 		return -ENOMEM;
 	}
 	phys_acsi_buffer = virt_to_phys( acsi_buffer );
@@ -1824,7 +1826,7 @@ void cleanup_module(void)
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 	atari_stram_free( acsi_buffer );
 
-	if (unregister_blkdev( MAJOR_NR, "ad" ) != 0)
+	if (devfs_unregister_blkdev( MAJOR_NR, "ad" ) != 0)
 		printk( KERN_ERR "acsi: cleanup_module failed\n");
 
 	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
diff --git a/drivers/block/acsi_slm.c b/drivers/block/acsi_slm.c
index e4d343be3..88fa04ac6 100644
--- a/drivers/block/acsi_slm.c
+++ b/drivers/block/acsi_slm.c
@@ -65,6 +65,7 @@ not be guaranteed. There are several ways to assure this:
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
+#include <linux/devfs_fs_kernel.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -987,23 +988,28 @@ int attach_slm( int target, int lun )
 	return( 1 );
 }
 
+static devfs_handle_t devfs_handle = NULL;
 
 int slm_init( void )
 
 {
-	if (register_chrdev( MAJOR_NR, "slm", &slm_fops )) {
+	if (devfs_register_chrdev( MAJOR_NR, "slm", &slm_fops )) {
 		printk( KERN_ERR "Unable to get major %d for ACSI SLM\n", MAJOR_NR );
 		return -EBUSY;
 	}
 	
 	if (!(SLMBuffer = atari_stram_alloc( SLM_BUFFER_SIZE, NULL, "SLM" ))) {
 		printk( KERN_ERR "Unable to get SLM ST-Ram buffer.\n" );
-		unregister_chrdev( MAJOR_NR, "slm" );
+		devfs_unregister_chrdev( MAJOR_NR, "slm" );
 		return -ENOMEM;
 	}
 	BufferP = SLMBuffer;
 	SLMState = IDLE;
 	
+	devfs_handle = devfs_mk_dir (NULL, "slm", 3, NULL);
+	devfs_register_series (devfs_handle, "%u", MAX_SLM, DEVFS_FL_DEFAULT,
+			       MAJOR_NR, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0,
+			       &slm_fops, NULL);
 	return 0;
 }
 
@@ -1026,7 +1032,8 @@ int init_module(void)
 
 void cleanup_module(void)
 {
-	if (unregister_chrdev( MAJOR_NR, "slm" ) != 0)
+	devfs_unregister (devfs_handle);
+	if (devfs_unregister_chrdev( MAJOR_NR, "slm" ) != 0)
 		printk( KERN_ERR "acsi_slm: cleanup_module failed\n");
 	atari_stram_free( SLMBuffer );
 }
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e6bf5fa0c..0c7af176e 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1385,12 +1385,12 @@ static void redo_fd_request(void)
 	char *data;
 	unsigned long flags;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		return;
 	}
 
  repeat:
-	if (!CURRENT) {
+	if (QUEUE_EMPTY) {
 		/* Nothing left to do */
 		return;
 	}
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index b1e20b7d3..b7aa4241e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -624,7 +624,7 @@ static void fd_error( void )
 		return;
 	}
 		
-	if (!CURRENT) return;
+	if (QUEUE_EMPTY) return;
 	CURRENT->errors++;
 	if (CURRENT->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
@@ -1450,18 +1450,18 @@ static void redo_fd_request(void)
 	int device, drive, type;
   
 	DPRINT(("redo_fd_request: CURRENT=%08lx CURRENT->dev=%04x CURRENT->sector=%ld\n",
-		(unsigned long)CURRENT, CURRENT ? CURRENT->rq_dev : 0,
-		CURRENT ? CURRENT->sector : 0 ));
+		(unsigned long)CURRENT, !QUEUE_EMPTY ? CURRENT->rq_dev : 0,
+		!QUEUE_EMPTY ? CURRENT->sector : 0 ));
 
 	IsFormatting = 0;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		return;
 	}
 
 repeat:
     
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		goto the_end;
 
 	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 78269edf3..47291bef1 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -880,14 +880,16 @@ static void do_ida_request(int ctlr)
 	cmdlist_t *c;
 	int seg, sect;
 	char *lastdataend;
-	request_queue_t * q;
+	struct list_head * queue_head;
 	struct buffer_head *bh;
 	struct request *creq;
 
-	q = &blk_dev[MAJOR_NR+ctlr].request_queue;
+	queue_head = &blk_dev[MAJOR_NR+ctlr].request_queue.queue_head;
 
-	creq = q->current_request;
-	if (creq == NULL || creq->rq_status == RQ_INACTIVE)
+	if (list_empty(queue_head))
+		goto doreq_done;
+	creq = blkdev_entry_next_request(queue_head);
+	if (creq->rq_status == RQ_INACTIVE)
 		goto doreq_done;
 
 	if (ctlr != MAJOR(creq->rq_dev)-MAJOR_NR ||
@@ -961,10 +963,9 @@ DBGPX(
 		bh->b_reqnext = NULL;
 DBGPX(		printk("More to do on same request %p\n", creq); );
 	} else {
-DBGPX(		printk("Done with %p, queueing %p\n", creq, creq->next); );
-		creq->rq_status = RQ_INACTIVE;
-		q->current_request = creq->next;
-		wake_up(&wait_for_request);
+DBGPX(		printk("Done with %p\n", creq); );
+		blkdev_dequeue_request(creq);
+		end_that_request_last(creq);
 	}
 
 	c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE;
diff --git a/drivers/block/cs5530.c b/drivers/block/cs5530.c
index cf8b5fdda..3e26b8006 100644
--- a/drivers/block/cs5530.c
+++ b/drivers/block/cs5530.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/block/cs5530.c		Version 0.2	Jan 30, 2000
+ * linux/drivers/block/cs5530.c		Version 0.5	Feb 13, 2000
  *
  * Copyright (C) 2000			Mark Lord <mlord@pobox.com>
  * May be copied or modified under the terms of the GNU General Public License
@@ -285,8 +285,6 @@ static int cs5530_config_dma (ide_drive_t *drive)
 	}
 	outb(inb(hwif->dma_base+2)|(unit?0x40:0x20), hwif->dma_base+2);	/* set DMA_capable bit */
 
-	if (!strcmp(drive->name, "hdc"))	/* FIXME */
-		return 0;
 	/*
 	 * Finally, turn DMA on in software, and exit.
 	 */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 7b956dfae..be7e25879 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -144,6 +144,10 @@ static int irqdma_allocated = 0;
 #define FDPATCHES
 #include <linux/fdreg.h>
 
+/*
+ * 1998/1/21 -- Richard Gooch <rgooch@atnf.csiro.au> -- devfs support
+ */
+
 
 #include <linux/fd.h>
 #include <linux/hdreg.h>
@@ -158,6 +162,7 @@ static int irqdma_allocated = 0;
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
 
 /*
  * PS/2 floppies have much slower step rates than regular floppies.
@@ -196,7 +201,9 @@ static int use_virtual_dma=0;
 static unsigned short virtual_dma_port=0x3f0;
 void floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs);
 static int set_dor(int fdc, char mask, char data);
-static inline int __get_order(unsigned long size);
+static void register_devfs_entries (int drive);
+static devfs_handle_t devfs_handle = NULL;
+
 #define K_64	0x10000		/* 64KB */
 #include <asm/floppy.h>
 
@@ -213,26 +220,12 @@ static inline int __get_order(unsigned long size);
 
 /* Dma Memory related stuff */
 
-/* Pure 2^n version of get_order */
-static inline int __get_order(unsigned long size)
-{
-	int order;
-
-	size = (size-1) >> (PAGE_SHIFT-1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
-
 #ifndef fd_dma_mem_free
-#define fd_dma_mem_free(addr, size) free_pages(addr, __get_order(size))
+#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
 #endif
 
 #ifndef fd_dma_mem_alloc
-#define fd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,__get_order(size))
+#define fd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size))
 #endif
 
 static inline void fallback_on_nodma_alloc(char **addr, size_t l)
@@ -2276,7 +2269,7 @@ static void request_done(int uptodate)
 	probing = 0;
 	reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate);
 
-	if (!CURRENT){
+	if (QUEUE_EMPTY){
 		DPRINT("request list destroyed in floppy request done\n");
 		return;
 	}
@@ -2290,14 +2283,14 @@ static void request_done(int uptodate)
 			DRS->maxtrack = 1;
 
 		/* unlock chained buffers */
-		while (current_count_sectors && CURRENT &&
+		while (current_count_sectors && !QUEUE_EMPTY &&
 		       current_count_sectors >= CURRENT->current_nr_sectors){
 			current_count_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->sector += CURRENT->current_nr_sectors;
 			end_request(1);
 		}
-		if (current_count_sectors && CURRENT){
+		if (current_count_sectors && !QUEUE_EMPTY){
 			/* "unlock" last subsector */
 			CURRENT->buffer += current_count_sectors <<9;
 			CURRENT->current_nr_sectors -= current_count_sectors;
@@ -2306,7 +2299,7 @@ static void request_done(int uptodate)
 			return;
 		}
 
-		if (current_count_sectors && !CURRENT)
+		if (current_count_sectors && QUEUE_EMPTY)
 			DPRINT("request list destroyed in floppy request done\n");
 
 	} else {
@@ -2869,14 +2862,14 @@ static void redo_fd_request(void)
 	if (current_drive < N_DRIVE)
 		floppy_off(current_drive);
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		CLEAR_INTR;
 		unlock_fdc();
 		return;
 	}
 
 	while(1){
-		if (!CURRENT) {
+		if (QUEUE_EMPTY) {
 			CLEAR_INTR;
 			unlock_fdc();
 			return;
@@ -3631,6 +3624,7 @@ static void config_types(void)
 				first = 0;
 			}
 			printk("%s fd%d is %s", prepend, drive, name);
+			register_devfs_entries (drive);
 		}
 		*UDP = *params;
 	}
@@ -3844,6 +3838,37 @@ static struct block_device_operations floppy_fops = {
 	revalidate:		floppy_revalidate,
 };
 
+static void register_devfs_entries (int drive)
+{
+    int base_minor, i;
+    static char *table[] =
+    {"", "d360", "h1200", "u360", "u720", "h360", "h720",
+     "u1440", "u2880", "CompaQ", "h1440", "u1680", "h410",
+     "u820", "h1476", "u1722", "h420", "u830", "h1494", "u1743",
+     "h880", "u1040", "u1120", "h1600", "u1760", "u1920",
+     "u3200", "u3520", "u3840", "u1840", "u800", "u1600",
+     NULL
+    };
+    static int t360[] = {1,0}, t1200[] = {2,5,6,10,12,14,16,18,20,23,0},
+      t3in[] = {8,9,26,27,28, 7,11,15,19,24,25,29,31, 3,4,13,17,21,22,30,0};
+    static int *table_sup[] = 
+    {NULL, t360, t1200, t3in+5+8, t3in+5, t3in, t3in};
+
+    base_minor = (drive < 4) ? drive : (124 + drive);
+    if (UDP->cmos <= NUMBER(default_drive_params)) {
+	i = 0;
+	do {
+	    char name[16];
+
+	    sprintf (name, "%d%s", drive, table[table_sup[UDP->cmos][i]]);
+	    devfs_register (devfs_handle, name, 0, DEVFS_FL_DEFAULT, MAJOR_NR,
+			    base_minor + (table_sup[UDP->cmos][i] << 2),
+			    S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP |S_IWGRP,
+			    0, 0, &floppy_fops, NULL);
+	} while (table_sup[UDP->cmos][i++]);
+    }
+}
+
 /*
  * Floppy Driver initialization
  * =============================
@@ -4066,7 +4091,8 @@ int __init floppy_init(void)
 
 	raw_cmd = 0;
 
-	if (register_blkdev(MAJOR_NR,"fd",&floppy_fops)) {
+	devfs_handle = devfs_mk_dir (NULL, "floppy", 0, NULL);
+	if (devfs_register_blkdev(MAJOR_NR,"fd",&floppy_fops)) {
 		printk("Unable to get major %d for floppy\n",MAJOR_NR);
 		return -EBUSY;
 	}
@@ -4097,7 +4123,7 @@ int __init floppy_init(void)
 	use_virtual_dma = can_use_virtual_dma & 1;
 	fdc_state[0].address = FDC1;
 	if (fdc_state[0].address == -1) {
-		unregister_blkdev(MAJOR_NR,"fd");
+		devfs_unregister_blkdev(MAJOR_NR,"fd");
 		del_timer(&fd_timeout);
 		return -ENODEV;
 	}
@@ -4109,7 +4135,7 @@ int __init floppy_init(void)
 	if (floppy_grab_irq_and_dma()){
 		del_timer(&fd_timeout);
 		blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
-		unregister_blkdev(MAJOR_NR,"fd");
+		devfs_unregister_blkdev(MAJOR_NR,"fd");
 		del_timer(&fd_timeout);
 		return -EBUSY;
 	}
@@ -4175,7 +4201,7 @@ int __init floppy_init(void)
  		if (usage_count)
  			floppy_release_irq_and_dma();
 		blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
-		unregister_blkdev(MAJOR_NR,"fd");		
+		devfs_unregister_blkdev(MAJOR_NR,"fd");		
 	}
 	
 	for (drive = 0; drive < N_DRIVE; drive++) {
@@ -4413,7 +4439,8 @@ void cleanup_module(void)
 {
 	int dummy;
 		
-	unregister_blkdev(MAJOR_NR, "fd");
+	devfs_unregister (devfs_handle);
+	devfs_unregister_blkdev(MAJOR_NR, "fd");
 
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 	/* eject disk, if any */
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 95999e273..3f3237e87 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -4,6 +4,8 @@
  *
  *  Copyright (C) 1991-1998  Linus Torvalds
  *
+ *  devfs support - jj, rgooch, 980122
+ *
  *  Moved partition checking code to fs/partitions* - Russell King
  *  (linux@arm.uk.linux.org)
  */
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 05a17a0c1..5520c17b0 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/fs.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/kernel.h>
 #include <linux/hdreg.h>
 #include <linux/genhd.h>
@@ -145,7 +146,7 @@ static void dump_status (const char *msg, unsigned int stat)
 	unsigned long flags;
 	char devc;
 
-	devc = CURRENT ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?';
+	devc = !QUEUE_EMPTY ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?';
 	save_flags (flags);
 	sti();
 #ifdef VERBOSE_ERRORS
@@ -174,7 +175,7 @@ static void dump_status (const char *msg, unsigned int stat)
 		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				printk(", sector=%ld", CURRENT->sector);
 		}
 		printk("\n");
@@ -351,7 +352,7 @@ static void bad_rw_intr(void)
 {
 	int dev;
 
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 	dev = DEVICE_NR(CURRENT->rq_dev);
 	if (++CURRENT->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
@@ -414,7 +415,7 @@ ok_to_read:
 #if (HD_DELAY > 0)
 	last_req = read_timer();
 #endif
-	if (CURRENT)
+	if (!QUEUE_EMPTY)
 		hd_request();
 	return;
 }
@@ -475,7 +476,7 @@ static void hd_times_out(void)
 	unsigned int dev;
 
 	DEVICE_INTR = NULL;
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 	disable_irq(HD_IRQ);
 	sti();
@@ -522,7 +523,7 @@ static void hd_request(void)
 {
 	unsigned int dev, block, nsect, sec, track, head, cyl;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) return;
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) return;
 	if (DEVICE_INTR)
 		return;
 repeat:
@@ -662,6 +663,8 @@ static int hd_release(struct inode * inode, struct file * file)
 	return 0;
 }
 
+extern struct block_device_operations hd_fops;
+
 static struct gendisk hd_gendisk = {
 	MAJOR_NR,	/* Major number */	
 	"hd",		/* Major name */
@@ -671,7 +674,8 @@ static struct gendisk hd_gendisk = {
 	hd_sizes,	/* block sizes */
 	0,		/* number */
 	NULL,		/* internal use, not presently used */
-	NULL		/* next */
+	NULL,		/* next */
+	&hd_fops,       /* file operations */
 };
 	
 static void hd_interrupt(int irq, void *dev_id, struct pt_regs *regs)
@@ -800,7 +804,7 @@ static void hd_geninit(void)
 
 int __init hd_init(void)
 {
-	if (register_blkdev(MAJOR_NR,"hd",&hd_fops)) {
+	if (devfs_register_blkdev(MAJOR_NR,"hd",&hd_fops)) {
 		printk("hd: unable to get major %d for hard disk\n",MAJOR_NR);
 		return -1;
 	}
diff --git a/drivers/block/icside.c b/drivers/block/icside.c
index 166d29abf..d0e8f8328 100644
--- a/drivers/block/icside.c
+++ b/drivers/block/icside.c
@@ -24,6 +24,8 @@
 #include <asm/ecard.h>
 #include <asm/io.h>
 
+extern char *ide_xfer_verbose (byte xfer_rate);
+
 /*
  * Maximum number of interfaces per card
  */
diff --git a/drivers/block/ide-cd.c b/drivers/block/ide-cd.c
index 48cf87c81..0f032ac8c 100644
--- a/drivers/block/ide-cd.c
+++ b/drivers/block/ide-cd.c
@@ -299,7 +299,6 @@
  * Generic packet command support and error handling routines.
  */
 
-
 /* Mark that we've seen a media change, and invalidate our internal
    buffers. */
 static void cdrom_saw_media_change (ide_drive_t *drive)
@@ -2270,7 +2269,12 @@ static int ide_cdrom_register (ide_drive_t *drive, int nslots)
 		devinfo->mask |= CDC_PLAY_AUDIO;
 	if (!CDROM_CONFIG_FLAGS (drive)->close_tray)
 		devinfo->mask |= CDC_CLOSE_TRAY;
-		
+
+	devinfo->de = devfs_register (drive->de, "cd", 2, DEVFS_FL_DEFAULT,
+				      HWIF(drive)->major, minor,
+				      S_IFBLK | S_IRUGO | S_IWUGO, 0, 0,
+				      ide_fops, NULL);
+
 	return register_cdrom (devinfo);
 }
 
diff --git a/drivers/block/ide-disk.c b/drivers/block/ide-disk.c
index 1209aa82a..e62295241 100644
--- a/drivers/block/ide-disk.c
+++ b/drivers/block/ide-disk.c
@@ -744,6 +744,8 @@ static int idedisk_cleanup (ide_drive_t *drive)
 
 static void idedisk_setup (ide_drive_t *drive)
 {
+	int i;
+	
 	struct hd_driveid *id = drive->id;
 	unsigned long capacity;
 	
@@ -764,6 +766,15 @@ static void idedisk_setup (ide_drive_t *drive)
 			drive->doorlocking = 1;
 		}
 	}
+	for (i = 0; i < MAX_DRIVES; ++i) {
+		ide_hwif_t *hwif = HWIF(drive);
+
+		if (drive != &hwif->drives[i]) continue;
+		hwif->gd->de_arr[i] = drive->de;
+		if (drive->removable)
+			hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+		break;
+	}
 
 	/* Extract geometry if we did not already have one for the drive */
 	if (!drive->cyl || !drive->head || !drive->sect) {
diff --git a/drivers/block/ide-dma.c b/drivers/block/ide-dma.c
index 1e450b7e6..3b6f5e56a 100644
--- a/drivers/block/ide-dma.c
+++ b/drivers/block/ide-dma.c
@@ -214,6 +214,10 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
 	struct scatterlist *sg = hwif->sg_table;
 	int nents = 0;
 
+	if (rq->cmd == READ)
+		hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+	else
+		hwif->sg_dma_direction = PCI_DMA_TODEVICE;
 	bh = rq->bh;
 	do {
 		unsigned char *virt_addr = bh->b_data;
@@ -230,7 +234,7 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
 		nents++;
 	} while (bh != NULL);
 
-	return pci_map_sg(hwif->pci_dev, sg, nents);
+	return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
 }
 
 /*
@@ -265,7 +269,8 @@ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func)
 				printk("%s: DMA table too small\n", drive->name);
 				pci_unmap_sg(HWIF(drive)->pci_dev,
 					     HWIF(drive)->sg_table,
-					     HWIF(drive)->sg_nents);
+					     HWIF(drive)->sg_nents,
+					     HWIF(drive)->sg_dma_direction);
 				return 0; /* revert to PIO for this request */
 			} else {
 				u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
@@ -301,7 +306,7 @@ void ide_destroy_dmatable (ide_drive_t *drive)
 	struct scatterlist *sg = HWIF(drive)->sg_table;
 	int nents = HWIF(drive)->sg_nents;
 
-	pci_unmap_sg(dev, sg, nents);
+	pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction);
 }
 
 /*
diff --git a/drivers/block/ide-floppy.c b/drivers/block/ide-floppy.c
index b24933637..e2977c754 100644
--- a/drivers/block/ide-floppy.c
+++ b/drivers/block/ide-floppy.c
@@ -1549,6 +1549,15 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy)
 
 	(void) idefloppy_get_capacity (drive);
 	idefloppy_add_settings(drive);
+	for (i = 0; i < MAX_DRIVES; ++i) {
+		ide_hwif_t *hwif = HWIF(drive);
+
+		if (drive != &hwif->drives[i]) continue;
+		hwif->gd->de_arr[i] = drive->de;
+		if (drive->removable)
+			hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+		break;
+	}
 }
 
 static int idefloppy_cleanup (ide_drive_t *drive)
diff --git a/drivers/block/ide-probe.c b/drivers/block/ide-probe.c
index 33ca2900b..b57fa28da 100644
--- a/drivers/block/ide-probe.c
+++ b/drivers/block/ide-probe.c
@@ -406,7 +406,7 @@ static void probe_hwif (ide_hwif_t *hwif)
 	ide_ioreg_t ide_control_reg 	= hwif->io_ports[IDE_CONTROL_OFFSET];
 	ide_ioreg_t region_low 		= hwif->io_ports[IDE_DATA_OFFSET];
 	ide_ioreg_t region_high 	= region_low;
-	ide_ioreg_t region_request	= 8;
+	unsigned int region_request	= 8;
 	int i;
 
 	if (hwif->noprobe)
@@ -699,13 +699,28 @@ static void init_gendisk (ide_hwif_t *hwif)
 	gd->nr_real	= units;		/* current num real drives */
 	gd->real_devices= hwif;			/* ptr to internal data */
 	gd->next	= NULL;			/* linked list of major devs */
+	gd->fops        = ide_fops;             /* file operations */
+	gd->de_arr	= kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+	gd->flags	= kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+	if (gd->de_arr)
+		memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+	if (gd->flags)
+		memset (gd->flags, 0, sizeof *gd->flags * units);
 
 	for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ;
 	hwif->gd = *gdp = gd;			/* link onto tail of list */
 
 	for (unit = 0; unit < units; ++unit) {
-		if (hwif->drives[unit].present)
+		if (hwif->drives[unit].present) {
+			char name[64];
+
 			ide_add_generic_settings(hwif->drives + unit);
+			sprintf (name, "ide/host%d/bus%d/target%d/lun%d",
+				 hwif->channel ? hwif->mate->index : hwif->index,
+				 hwif->channel, unit, 0);
+			hwif->drives[unit].de =
+				devfs_mk_dir (NULL, name, 0, NULL);
+		}
 	}
 }
 
@@ -764,7 +779,7 @@ static int hwif_init (ide_hwif_t *hwif)
 		printk("%s: request_fn NOT DEFINED\n", hwif->name);
 		return (hwif->present = 0);
 	}
-	if (register_blkdev (hwif->major, hwif->name, ide_fops)) {
+	if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) {
 		printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
 		return (hwif->present = 0);
 	}
diff --git a/drivers/block/ide-tape.c b/drivers/block/ide-tape.c
index 9d2bc216f..1e1b6e44e 100644
--- a/drivers/block/ide-tape.c
+++ b/drivers/block/ide-tape.c
@@ -396,6 +396,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/major.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/errno.h>
 #include <linux/genhd.h>
 #include <linux/malloc.h>
@@ -794,6 +795,7 @@ typedef struct {
  */
 typedef struct {
 	ide_drive_t *drive;
+	devfs_handle_t de_r, de_n;
 
 	/*
 	 *	Since a typical character device operation requires more
@@ -5770,11 +5772,13 @@ static int idetape_cleanup (ide_drive_t *drive)
 	DRIVER(drive)->busy = 0;
 	(void) ide_unregister_subdriver (drive);
 	drive->driver_data = NULL;
+	devfs_unregister (tape->de_r);
+	devfs_unregister (tape->de_n);
 	kfree (tape);
 	for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++)
 		if (idetape_chrdevs[minor].drive != NULL)
 			return 0;
-	unregister_chrdev (IDETAPE_MAJOR, "ht");
+	devfs_unregister_chrdev (IDETAPE_MAJOR, "ht");
 	idetape_chrdev_present = 0;
 	return 0;
 }
@@ -5871,7 +5875,8 @@ int idetape_init (void)
 #endif
 		return 0;
 	}
-	if (!idetape_chrdev_present && register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) {
+	if (!idetape_chrdev_present &&
+	    devfs_register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) {
 		printk (KERN_ERR "ide-tape: Failed to register character device interface\n");
 		MOD_DEC_USE_COUNT;
 #if ONSTREAM_DEBUG
@@ -5905,10 +5910,21 @@ int idetape_init (void)
 		for (minor = 0; idetape_chrdevs[minor].drive != NULL; minor++);
 		idetape_setup (drive, tape, minor);
 		idetape_chrdevs[minor].drive = drive;
+		tape->de_r =
+		    devfs_register (drive->de, "mt", 2, DEVFS_FL_DEFAULT,
+				    HWIF(drive)->major, minor,
+				    S_IFCHR | S_IRUGO | S_IWUGO, 0, 0,
+				    &idetape_fops, NULL);
+		tape->de_n =
+		    devfs_register (drive->de, "mtn", 3, DEVFS_FL_DEFAULT,
+				    HWIF(drive)->major, minor + 128,
+				    S_IFCHR | S_IRUGO | S_IWUGO, 0, 0,
+				    &idetape_fops, NULL);
+		devfs_register_tape (tape->de_r);
 		supported++; failed--;
 	} while ((drive = ide_scan_devices (ide_tape, idetape_driver.name, NULL, failed++)) != NULL);
 	if (!idetape_chrdev_present && !supported) {
-		unregister_chrdev (IDETAPE_MAJOR, "ht");
+		devfs_unregister_chrdev (IDETAPE_MAJOR, "ht");
 	} else
 		idetape_chrdev_present = 1;
 	ide_register_module (&idetape_module);
diff --git a/drivers/block/ide.c b/drivers/block/ide.c
index 06e1bbcc6..93da9bea2 100644
--- a/drivers/block/ide.c
+++ b/drivers/block/ide.c
@@ -501,8 +501,7 @@ void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
 
 	if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
 		add_blkdev_randomness(MAJOR(rq->rq_dev));
-		hwgroup->drive->queue.current_request = rq->next;
-		blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL;
+		blkdev_dequeue_request(rq);
         	hwgroup->rq = NULL;
 		end_that_request_last(rq);
 	}
@@ -772,8 +771,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err)
 		}
 	}
 	spin_lock_irqsave(&io_request_lock, flags);
-	drive->queue.current_request = rq->next;
-	blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL;
+	blkdev_dequeue_request(rq);
 	HWGROUP(drive)->rq = NULL;
 	rq->rq_status = RQ_INACTIVE;
 	spin_unlock_irqrestore(&io_request_lock, flags);
@@ -1076,7 +1074,7 @@ static ide_startstop_t start_request (ide_drive_t *drive)
 {
 	ide_startstop_t startstop;
 	unsigned long block, blockend;
-	struct request *rq = drive->queue.current_request;
+	struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head);
 	unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -1159,13 +1157,12 @@ repeat:
 	best = NULL;
 	drive = hwgroup->drive;
 	do {
-		if (drive->queue.current_request && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
+		if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
 			if (!best
 			 || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
 			 || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
 			{
-				struct blk_dev_struct *bdev = &blk_dev[HWIF(drive)->major];
-				if( !bdev->request_queue.plugged )
+				if( !drive->queue.plugged )
 					best = drive;
 			}
 		}
@@ -1229,7 +1226,6 @@ repeat:
  */
 static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 {
-	struct blk_dev_struct *bdev;
 	ide_drive_t	*drive;
 	ide_hwif_t	*hwif;
 	ide_startstop_t	startstop;
@@ -1246,9 +1242,6 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 			hwgroup->rq = NULL;
 			drive = hwgroup->drive;
 			do {
-				bdev = &blk_dev[HWIF(drive)->major];
-				if( !bdev->request_queue.plugged )
-					bdev->request_queue.current_request = NULL;		/* (broken since patch-2.1.15) */
 				if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
 					sleep = drive->sleep;
 			} while ((drive = drive->next) != hwgroup->drive);
@@ -1285,10 +1278,9 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		drive->sleep = 0;
 		drive->service_start = jiffies;
 
-		bdev = &blk_dev[hwif->major];
-		if ( bdev->request_queue.plugged )	/* FIXME: paranoia */
+		if ( drive->queue.plugged )	/* paranoia */
 			printk("%s: Huh? nuking plugged queue\n", drive->name);
-		bdev->request_queue.current_request = hwgroup->rq = drive->queue.current_request;
+		hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
 		/*
 		 * Some systems have trouble with IDE IRQs arriving while
 		 * the driver is still setting things up.  So, here we disable
@@ -1670,7 +1662,7 @@ void ide_init_drive_cmd (struct request *rq)
 	rq->sem = NULL;
 	rq->bh = NULL;
 	rq->bhtail = NULL;
-	rq->next = NULL;
+	rq->q = NULL;
 }
 
 /*
@@ -1703,7 +1695,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 	unsigned int major = HWIF(drive)->major;
-	struct request *cur_rq;
+	struct list_head * queue_head;
 	DECLARE_MUTEX_LOCKED(sem);
 
 #ifdef CONFIG_BLK_DEV_PDC4030
@@ -1716,20 +1708,17 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	if (action == ide_wait)
 		rq->sem = &sem;
 	spin_lock_irqsave(&io_request_lock, flags);
-	cur_rq = drive->queue.current_request;
-	if (cur_rq == NULL || action == ide_preempt) {
-		rq->next = cur_rq;
-		drive->queue.current_request = rq;
+	queue_head = &drive->queue.queue_head;
+	if (list_empty(queue_head) || action == ide_preempt) {
 		if (action == ide_preempt)
 			hwgroup->rq = NULL;
 	} else {
 		if (action == ide_wait || action == ide_end) {
-			while (cur_rq->next != NULL)	/* find end of list */
-				cur_rq = cur_rq->next;
-		}
-		rq->next = cur_rq->next;
-		cur_rq->next = rq;
+			queue_head = queue_head->prev;
+		} else
+			queue_head = queue_head->next;
 	}
+	list_add(&rq->queue, queue_head);
 	ide_do_request(hwgroup, 0);
 	spin_unlock_irqrestore(&io_request_lock, flags);
 	if (action == ide_wait) {
@@ -1989,6 +1978,10 @@ void ide_unregister (unsigned int index)
 	d = hwgroup->drive;
 	for (i = 0; i < MAX_DRIVES; ++i) {
 		drive = &hwif->drives[i];
+		if (drive->de) {
+			devfs_unregister (drive->de);
+			drive->de = NULL;
+		}
 		if (!drive->present)
 			continue;
 		while (hwgroup->drive->next != drive)
@@ -2037,6 +2030,10 @@ void ide_unregister (unsigned int index)
 		gd = *gdp; *gdp = gd->next;
 		kfree(gd->sizes);
 		kfree(gd->part);
+		if (gd->de_arr)
+			kfree (gd->de_arr);
+		if (gd->flags)
+			kfree (gd->flags);
 		kfree(gd);
 	}
 	old_hwif = *hwif;
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 3ed507694..808878b3e 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
 /*
@@ -27,6 +28,8 @@
 
 #include <linux/module.h>
 
+#define DEBUG_ELEVATOR
+
 /*
  * MAC Floppy IWM hooks
  */
@@ -147,6 +150,18 @@ request_queue_t * blk_get_queue (kdev_t dev)
 	return ret;
 }
 
+static inline int get_request_latency(elevator_t * elevator, int rw)
+{
+	int latency;
+
+	if (rw != READ)
+		latency = elevator->write_latency;
+	else
+		latency = elevator->read_latency;
+
+	return latency;
+}
+
 void blk_cleanup_queue(request_queue_t * q)
 {
 	memset(q, 0, sizeof(*q));
@@ -167,55 +182,49 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	q->make_request_fn = mfn;
 }
 
-static int ll_merge_fn(request_queue_t *q, struct request *req, 
-		       struct buffer_head *bh) 
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
 {
-	if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) {
-		if (req->nr_segments < MAX_SEGMENTS) {
-			req->nr_segments++;
-			return 1;
-		}
-		return 0;
+	if (req->nr_segments < max_segments) {
+		req->nr_segments++;
+		q->nr_segments++;
+		return 1;
 	}
-	return 1;
+	return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+			    struct buffer_head *bh, int max_segments)
+{
+	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+		return 1;
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+			     struct buffer_head *bh, int max_segments)
+{
+	if (bh->b_data + bh->b_size == req->bh->b_data)
+		return 1;
+	return ll_new_segment(q, req, max_segments);
 }
 
 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
-				struct request *next)
+				struct request *next, int max_segments)
 {
 	int total_segments = req->nr_segments + next->nr_segments;
 
-	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
 		total_segments--;
+		q->nr_segments--;
+	}
     
-	if (total_segments > MAX_SEGMENTS)
+	if (total_segments > max_segments)
 		return 0;
 
 	req->nr_segments = total_segments;
 	return 1;
 }
 
-void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
-{
-	q->request_fn     	= rfn;
-	q->current_request	= NULL;
-	q->merge_fn       	= ll_merge_fn;
-	q->merge_requests_fn	= ll_merge_requests_fn;
-	q->make_request_fn	= NULL;
-	q->plug_tq.sync   	= 0;
-	q->plug_tq.routine	= &generic_unplug_device;
-	q->plug_tq.data   	= q;
-	q->plugged        	= 0;
-	/*
-	 * These booleans describe the queue properties.  We set the
-	 * default (and most common) values here.  Other drivers can
-	 * use the appropriate functions to alter the queue properties.
-	 * as appropriate.
-	 */
-	q->plug_device_fn 	= NULL;
-	q->head_active    	= 1;
-}
-
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
@@ -224,19 +233,44 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
  * This is called with interrupts off and no requests on the queue.
  * (and with the request spinlock aquired)
  */
-inline void generic_plug_device (request_queue_t *q, kdev_t dev)
+static void generic_plug_device (request_queue_t *q, kdev_t dev)
 {
+#ifdef CONFIG_BLK_DEV_MD
 	if (MAJOR(dev) == MD_MAJOR) {
 		spin_unlock_irq(&io_request_lock);
 		BUG();
 	}
-	if (q->current_request)
+#endif
+	if (!list_empty(&q->queue_head))
 		return;
 
 	q->plugged = 1;
 	queue_task(&q->plug_tq, &tq_disk);
 }
 
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+	INIT_LIST_HEAD(&q->queue_head);
+	q->elevator = ELEVATOR_DEFAULTS;
+	q->request_fn     	= rfn;
+	q->back_merge_fn       	= ll_back_merge_fn;
+	q->front_merge_fn      	= ll_front_merge_fn;
+	q->merge_requests_fn	= ll_merge_requests_fn;
+	q->make_request_fn	= NULL;
+	q->plug_tq.sync   	= 0;
+	q->plug_tq.routine	= &generic_unplug_device;
+	q->plug_tq.data   	= q;
+	q->plugged        	= 0;
+	/*
+	 * These booleans describe the queue properties.  We set the
+	 * default (and most common) values here.  Other drivers can
+	 * use the appropriate functions to alter the queue properties.
+	 * as appropriate.
+	 */
+	q->plug_device_fn 	= generic_plug_device;
+	q->head_active    	= 1;
+}
+
 /*
  * remove the plug and let it rip..
  */
@@ -248,7 +282,7 @@ void generic_unplug_device(void * data)
 	spin_lock_irqsave(&io_request_lock,flags);
 	if (q->plugged) {
 		q->plugged = 0;
-		if (q->current_request)
+		if (!list_empty(&q->queue_head))
 			(q->request_fn)(q);
 	}
 	spin_unlock_irqrestore(&io_request_lock,flags);
@@ -388,6 +422,119 @@ static inline void drive_stat_acct(struct request *req,
 		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 }
 
+/* elevator */
+
+#define elevator_sequence_after(a,b) ((int)((b)-(a)) < 0)
+#define elevator_sequence_before(a,b) elevator_sequence_after(b,a)
+#define elevator_sequence_after_eq(a,b) ((int)((b)-(a)) <= 0)
+#define elevator_sequence_before_eq(a,b) elevator_sequence_after_eq(b,a)
+
+static inline struct list_head * seek_to_not_starving_chunk(request_queue_t * q,
+							    int * lat, int * starving)
+{
+	int sequence = q->elevator.sequence;
+	struct list_head * entry = q->queue_head.prev;
+	int pos = 0;
+
+	do {
+		struct request * req = blkdev_entry_to_request(entry);
+		if (elevator_sequence_before(req->elevator_sequence, sequence)) {
+			*lat -= q->nr_segments - pos;
+			*starving = 1;
+			return entry;
+		}
+		pos += req->nr_segments;
+	} while ((entry = entry->prev) != &q->queue_head);
+
+	*starving = 0;
+
+	return entry->next;
+}
+
+static inline void elevator_merge_requests(elevator_t * e, struct request * req, struct request * next)
+{
+	if (elevator_sequence_before(next->elevator_sequence, req->elevator_sequence))
+		req->elevator_sequence = next->elevator_sequence;
+	if (req->cmd == READ)
+		e->read_pendings--;
+
+}
+
+static inline int elevator_sequence(elevator_t * e, int latency)
+{
+	return latency + e->sequence;
+}
+
+#define elevator_merge_before(q, req, lat)	__elevator_merge((q), (req), (lat), 0)
+#define elevator_merge_after(q, req, lat)	__elevator_merge((q), (req), (lat), 1)
+static inline void __elevator_merge(request_queue_t * q, struct request * req, int latency, int after)
+{
+	int sequence = elevator_sequence(&q->elevator, latency);
+	if (after)
+		sequence -= req->nr_segments;
+	if (elevator_sequence_before(sequence, req->elevator_sequence)) {
+		if (!after)
+			printk(KERN_WARNING __FUNCTION__
+			       ": req latency %d req latency %d\n",
+			       req->elevator_sequence - q->elevator.sequence,
+			       sequence - q->elevator.sequence);
+		req->elevator_sequence = sequence;
+	}
+}
+
+static inline void elevator_queue(request_queue_t * q,
+				  struct request * req,
+				  struct list_head * entry,
+				  int latency, int starving)
+{
+	struct request * tmp, * __tmp;
+	int __latency = latency;
+
+	__tmp = tmp = blkdev_entry_to_request(entry);
+
+	for (;; tmp = blkdev_next_request(tmp))
+	{
+		if ((latency -= tmp->nr_segments) <= 0)
+		{
+			tmp = __tmp;
+			latency = __latency;
+
+			if (starving)
+				break;
+
+			if (q->head_active && !q->plugged)
+			{
+				latency -= tmp->nr_segments;
+				break;
+			}
+
+			list_add(&req->queue, &q->queue_head);
+			goto after_link;
+		}
+
+		if (tmp->queue.next == &q->queue_head)
+			break;
+
+		{
+			const int after_current = IN_ORDER(tmp,req);
+			const int before_next = IN_ORDER(req,blkdev_next_request(tmp));
+
+			if (!IN_ORDER(tmp,blkdev_next_request(tmp))) {
+				if (after_current || before_next)
+					break;
+			} else {
+				if (after_current && before_next)
+					break;
+			}
+		}
+	}
+
+	list_add(&req->queue, &tmp->queue);
+
+ after_link:
+	req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+}
+
 /*
  * add-request adds a request to the linked list.
  * It disables interrupts (aquires the request spinlock) so that it can muck
@@ -398,32 +545,20 @@ static inline void drive_stat_acct(struct request *req,
  * which is important for drive_stat_acct() above.
  */
 
-static inline void __add_request(request_queue_t * q, struct request * req)
+static inline void __add_request(request_queue_t * q, struct request * req,
+				 int empty, struct list_head * entry,
+				 int latency, int starving)
 {
-	int major = MAJOR(req->rq_dev);
-	struct request * tmp;
+	int major;
 
 	drive_stat_acct(req, req->nr_sectors, 1);
-	req->next = NULL;
 
-	if (!(tmp = q->current_request)) {
-		q->current_request = req;
+	if (empty) {
+		req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+		list_add(&req->queue, &q->queue_head);
 		return;
 	}
-	for ( ; tmp->next ; tmp = tmp->next) {
-		const int after_current = IN_ORDER(tmp,req);
-		const int before_next = IN_ORDER(req,tmp->next);
-
-		if (!IN_ORDER(tmp,tmp->next)) {
-			if (after_current || before_next)
-				break;
-		} else {
-			if (after_current && before_next)
-				break;
-		}
-	}
-	req->next = tmp->next;
-	tmp->next = req;
+	elevator_queue(q, req, entry, latency, starving);
 
 	/*
 	 * FIXME(eric) I don't understand why there is a need for this
@@ -432,6 +567,7 @@ static inline void __add_request(request_queue_t * q, struct request * req)
 	 * I am leaving this in here until I hear back from the COMPAQ
 	 * people.
 	 */
+	major = MAJOR(req->rq_dev);
 	if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
 	{
 		(q->request_fn)(q);
@@ -448,12 +584,14 @@ static inline void __add_request(request_queue_t * q, struct request * req)
  */
 static inline void attempt_merge (request_queue_t * q,
 				  struct request *req,
-				  int max_sectors)
+				  int max_sectors,
+				  int max_segments)
 {
-	struct request *next = req->next;
-
-	if (!next)
+	struct request *next;
+  
+	if (req->queue.next == &q->queue_head)
 		return;
+	next = blkdev_next_request(req);
 	if (req->sector + req->nr_sectors != next->sector)
 		return;
 	if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors)
@@ -464,25 +602,79 @@ static inline void attempt_merge (request_queue_t * q,
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if(!(q->merge_requests_fn)(q, req, next))
+	if(!(q->merge_requests_fn)(q, req, next, max_segments))
 		return;
 
+	elevator_merge_requests(&q->elevator, req, next);
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors += next->nr_sectors;
 	next->rq_status = RQ_INACTIVE;
-	req->next = next->next;
+	list_del(&next->queue);
 	wake_up (&wait_for_request);
 }
 
+static inline void elevator_debug(request_queue_t * q, kdev_t dev)
+{
+#ifdef DEBUG_ELEVATOR
+	int read_pendings = 0, nr_segments = 0;
+	elevator_t * elevator = &q->elevator;
+	struct list_head * entry = &q->queue_head;
+	static int counter;
+
+	if (counter++ % 100)
+		return;
+
+	while ((entry = entry->next) != &q->queue_head)
+	{
+		struct request * req;
+
+		req = blkdev_entry_to_request(entry);
+		if (!req->q)
+			continue;
+		if (req->cmd == READ)
+			read_pendings++;
+		nr_segments += req->nr_segments;
+	}
+
+	if (read_pendings != elevator->read_pendings)
+	{
+		printk(KERN_WARNING
+		       "%s: elevator read_pendings %d should be %d\n",
+		       kdevname(dev), elevator->read_pendings,
+		       read_pendings);
+		elevator->read_pendings = read_pendings;
+	}
+	if (nr_segments != q->nr_segments)
+	{
+		printk(KERN_WARNING
+		       "%s: elevator nr_segments %d should be %d\n",
+		       kdevname(dev), q->nr_segments,
+		       nr_segments);
+		q->nr_segments = nr_segments;
+	}
+#endif
+}
+
+static inline void elevator_account_request(request_queue_t * q, struct request * req)
+{
+	q->elevator.sequence++;
+	if (req->cmd == READ)
+		q->elevator.read_pendings++;
+	q->nr_segments++;
+}
+
 static inline void __make_request(request_queue_t * q, int rw,
 			   struct buffer_head * bh)
 {
 	int major = MAJOR(bh->b_rdev);
 	unsigned int sector, count;
-	struct request * req;
+	int max_segments = MAX_SEGMENTS;
+	struct request * req, * prev;
 	int rw_ahead, max_req, max_sectors;
 	unsigned long flags;
+	int orig_latency, latency, __latency, starving, __starving, empty;
+	struct list_head * entry, * __entry = NULL;
 
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
@@ -569,21 +761,33 @@ static inline void __make_request(request_queue_t * q, int rw,
 	 */
 	max_sectors = get_max_sectors(bh->b_rdev);
 
+	__latency = orig_latency = get_request_latency(&q->elevator, rw);
+
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
 	 */
 	spin_lock_irqsave(&io_request_lock,flags);
-	req = q->current_request;
-	if (!req) {
-		/* MD and loop can't handle plugging without deadlocking */
-		if (q->plug_device_fn)
-			q->plug_device_fn(q, bh->b_rdev); /* is atomic */
-		else
-			generic_plug_device(q, bh->b_rdev); /* is atomic */
+	elevator_debug(q, bh->b_rdev);
+
+	empty = 0;
+	if (list_empty(&q->queue_head)) {
+		empty = 1;
+		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
 		goto get_rq;
 	}
 
+	/* avoid write-bombs to not hurt iteractiveness of reads */
+	if (rw != READ && q->elevator.read_pendings)
+		max_segments = q->elevator.max_bomb_segments;
+
+	entry = seek_to_not_starving_chunk(q, &__latency, &starving);
+
+	__entry = entry;
+	__starving = starving;
+
+	latency = __latency;
+
 	if (q->head_active && !q->plugged) {
 		/*
 		 * The scsi disk and cdrom drivers completely remove the request
@@ -595,11 +799,18 @@ static inline void __make_request(request_queue_t * q, int rw,
 		 * entry may be busy being processed and we thus can't change
 		 * it.
 		 */
-		if ((req = req->next) == NULL)
-			goto get_rq;
+		if (entry == q->queue_head.next) {
+			latency -= blkdev_entry_to_request(entry)->nr_segments;
+			if ((entry = entry->next) == &q->queue_head)
+				goto get_rq;
+			starving = 0;
+		}
 	}
 
+	prev = NULL;
 	do {
+		req = blkdev_entry_to_request(entry);
+
 		if (req->sem)
 			continue;
 		if (req->cmd != rw)
@@ -610,6 +821,8 @@ static inline void __make_request(request_queue_t * q, int rw,
 			continue;
 		/* Can we add it to the end of this request? */
 		if (req->sector + req->nr_sectors == sector) {
+			if (latency - req->nr_segments < 0)
+				break;
 			/*
 			 * The merge_fn is a more advanced way
 			 * of accomplishing the same task.  Instead
@@ -622,16 +835,21 @@ static inline void __make_request(request_queue_t * q, int rw,
 			 * may suggest that we shouldn't merge
 			 * this 
 			 */
-			if(!(q->merge_fn)(q, req, bh))
+			if(!(q->back_merge_fn)(q, req, bh, max_segments))
 				continue;
 			req->bhtail->b_reqnext = bh;
 			req->bhtail = bh;
 		    	req->nr_sectors += count;
 			drive_stat_acct(req, count, 0);
+
+			elevator_merge_after(q, req, latency);
+
 			/* Can we now merge this req with the next? */
-			attempt_merge(q, req, max_sectors);
+			attempt_merge(q, req, max_sectors, max_segments);
 		/* or to the beginning? */
 		} else if (req->sector - count == sector) {
+			if (!prev && starving)
+				continue;
 			/*
 			 * The merge_fn is a more advanced way
 			 * of accomplishing the same task.  Instead
@@ -644,7 +862,7 @@ static inline void __make_request(request_queue_t * q, int rw,
 			 * may suggest that we shouldn't merge
 			 * this 
 			 */
-			if(!(q->merge_fn)(q, req, bh))
+			if(!(q->front_merge_fn)(q, req, bh, max_segments))
 				continue;
 		    	bh->b_reqnext = req->bh;
 		    	req->bh = bh;
@@ -653,13 +871,21 @@ static inline void __make_request(request_queue_t * q, int rw,
 		    	req->sector = sector;
 		    	req->nr_sectors += count;
 			drive_stat_acct(req, count, 0);
+
+			elevator_merge_before(q, req, latency);
+
+			if (prev)
+				attempt_merge(q, prev, max_sectors, max_segments);
 		} else
 			continue;
 
+		q->elevator.sequence++;
 		spin_unlock_irqrestore(&io_request_lock,flags);
 	    	return;
 
-	} while ((req = req->next) != NULL);
+	} while (prev = req,
+		 (latency -= req->nr_segments) >= 0 &&
+		 (entry = entry->next) != &q->queue_head);
 
 /* find an unused request. */
 get_rq:
@@ -675,6 +901,14 @@ get_rq:
 			goto end_io;
 		req = __get_request_wait(max_req, bh->b_rdev);
 		spin_lock_irqsave(&io_request_lock,flags);
+
+		/* lock got dropped so revalidate elevator */
+		empty = 1;
+		if (!list_empty(&q->queue_head)) {
+			empty = 0;
+			__latency = orig_latency;
+			__entry = seek_to_not_starving_chunk(q, &__latency, &__starving);
+		}
 	}
 	/*
 	 * Dont start the IO if the buffer has been
@@ -707,8 +941,10 @@ get_rq:
 	req->sem = NULL;
 	req->bh = bh;
 	req->bhtail = bh;
-	req->next = NULL;
-	__add_request(q, req);
+	req->q = q;
+	__add_request(q, req, empty, __entry, __latency, __starving);
+	elevator_account_request(q, req);
+
 	spin_unlock_irqrestore(&io_request_lock, flags);
 	return;
 
@@ -867,6 +1103,8 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
 
 void end_that_request_last(struct request *req)
 {
+	if (req->q)
+		BUG();
 	if (req->sem != NULL)
 		up(req->sem);
 	req->rq_status = RQ_INACTIVE;
@@ -886,7 +1124,6 @@ int __init blk_dev_init(void)
 	req = all_requests + NR_REQUEST;
 	while (--req >= all_requests) {
 		req->rq_status = RQ_INACTIVE;
-		req->next = NULL;
 	}
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
@@ -1001,4 +1238,5 @@ EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_pluggable);
+EXPORT_SYMBOL(blk_queue_make_request);
 EXPORT_SYMBOL(generic_make_request);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 587156935..3209aa46b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -14,6 +14,8 @@
  *
  * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
  *
+ * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
+ *
  * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
  *
  * Loadable modules and other fixes by AK, 1998
@@ -40,6 +42,10 @@
  *   it passes the underlying device's block number instead of the
  *   offset. This makes it change for a given block when the file is 
  *   moved/restored/copied and also doesn't work over NFS. 
+ * AV, Feb 12, 2000: we pass the logical block number now. It fixes the
+ *   problem above. Encryption modules that used to rely on the old scheme
+ *   should just call ->i_mapping->bmap() to calculate the physical block
+ *   number.
  */ 
 
 #include <linux/module.h>
@@ -52,6 +58,7 @@
 #include <linux/major.h>
 
 #include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
 
 #include <asm/uaccess.h>
 
@@ -73,14 +80,11 @@ static int max_loop = 8;
 static struct loop_device *loop_dev;
 static int *loop_sizes;
 static int *loop_blksizes;
+static devfs_handle_t devfs_handle = NULL;      /*  For the directory        */
 
 #define FALSE 0
 #define TRUE (!FALSE)
 
-/* Forward declaration of function to create missing blocks in the 
-   backing file (can happen if the backing file is sparse) */
-static int create_missing_block(struct loop_device *lo, int block, int blksize);
-
 /*
  * Transfer functions
  */
@@ -164,24 +168,133 @@ static void figure_loop_size(struct loop_device *lo)
 	loop_sizes[lo->lo_number] = size;
 }
 
+static int lo_send(struct loop_device *lo, char *data, int len, loff_t pos,
+	int blksize)
+{
+	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
+	struct address_space *mapping = lo->lo_dentry->d_inode->i_mapping;
+	struct address_space_operations *aops = mapping->a_ops;
+	struct page *page;
+	char *kaddr;
+	unsigned long index;
+	unsigned size, offset;
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & (PAGE_CACHE_SIZE - 1);
+	while (len > 0) {
+		int IV = index * (PAGE_CACHE_SIZE/blksize) + offset/blksize;
+		size = PAGE_CACHE_SIZE - offset;
+		if (size > len)
+			size = len;
+
+		page = grab_cache_page(mapping, index);
+		if (!page)
+			goto fail;
+		if (aops->prepare_write(page, offset, offset+size))
+			goto unlock;
+		kaddr = (char*)page_address(page);
+		if ((lo->transfer)(lo, WRITE, kaddr+offset, data, size, IV))
+			goto write_fail;
+		if (aops->commit_write(file, page, offset, offset+size))
+			goto unlock;
+		data += size;
+		len -= size;
+		offset = 0;
+		index++;
+		pos += size;
+		UnlockPage(page);
+		page_cache_release(page);
+	}
+	return 0;
+
+write_fail:
+	printk(KERN_ERR "loop: transfer error block %ld\n", index);
+	ClearPageUptodate(page);
+	kunmap(page);
+unlock:
+	UnlockPage(page);
+	page_cache_release(page);
+fail:
+	return -1;
+}
+
+struct lo_read_data {
+	struct loop_device *lo;
+	char *data;
+	int blksize;
+};
+
+static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+{
+	char *kaddr;
+	unsigned long count = desc->count;
+	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
+	struct loop_device *lo = p->lo;
+	int IV = page->index * (PAGE_CACHE_SIZE/p->blksize) + offset/p->blksize;
+
+	if (size > count)
+		size = count;
+
+	kaddr = (char*)kmap(page);
+	if ((lo->transfer)(lo,READ,kaddr+offset,p->data,size,IV)) {
+		size = 0;
+		printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
+		desc->error = -EINVAL;
+	}
+	kunmap(page);
+	
+	desc->count = count - size;
+	desc->written += size;
+	p->data += size;
+	return size;
+}
+
+static int lo_receive(struct loop_device *lo, char *data, int len, loff_t pos,
+	int blksize)
+{
+	struct file *file = lo->lo_backing_file;
+	struct lo_read_data cookie;
+	read_descriptor_t desc;
+
+	cookie.lo = lo;
+	cookie.data = data;
+	cookie.blksize = blksize;
+	desc.written = 0;
+	desc.count = len;
+	desc.buf = (char*)&cookie;
+	desc.error = 0;
+	do_generic_file_read(file, &pos, &desc, lo_read_actor);
+	return desc.error;
+}
+
 static void do_lo_request(request_queue_t * q)
 {
-	int	real_block, block, offset, len, blksize, size;
+	int	block, offset, len, blksize, size;
 	char	*dest_addr;
 	struct loop_device *lo;
 	struct buffer_head *bh;
 	struct request *current_request;
-	int	block_present;
+	loff_t pos;
 
 repeat:
 	INIT_REQUEST;
 	current_request=CURRENT;
-	CURRENT=current_request->next;
+	blkdev_dequeue_request(current_request);
 	if (MINOR(current_request->rq_dev) >= max_loop)
 		goto error_out;
 	lo = &loop_dev[MINOR(current_request->rq_dev)];
 	if (!lo->lo_dentry || !lo->transfer)
 		goto error_out;
+	if (current_request->cmd == WRITE) {
+		if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+			goto error_out;
+	} else if (current_request->cmd != READ) {
+		printk(KERN_ERR "unknown loop device command (%d)?!?", current_request->cmd);
+		goto error_out;
+	}
+
+	dest_addr = current_request->buffer;
+	len = current_request->current_nr_sectors << 9;
 
 	blksize = BLOCK_SIZE;
 	if (blksize_size[MAJOR(lo->lo_device)]) {
@@ -190,8 +303,9 @@ repeat:
 	      blksize = BLOCK_SIZE;
 	}
 
-	dest_addr = current_request->buffer;
-	
+	if (lo->lo_flags & LO_FLAGS_DO_BMAP)
+		goto file_backed;
+
 	if (blksize < 512) {
 		block = current_request->sector * (512/blksize);
 		offset = 0;
@@ -201,155 +315,79 @@ repeat:
 	}
 	block += lo->lo_offset / blksize;
 	offset += lo->lo_offset % blksize;
-	if (offset > blksize) {
+	if (offset >= blksize) {
 		block++;
 		offset -= blksize;
 	}
-	len = current_request->current_nr_sectors << 9;
-
-	if (current_request->cmd == WRITE) {
-		if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-			goto error_out;
-	} else if (current_request->cmd != READ) {
-		printk(KERN_ERR "unknown loop device command (%d)?!?", current_request->cmd);
-		goto error_out;
-	}
 	spin_unlock_irq(&io_request_lock);
+
 	while (len > 0) {
 
 		size = blksize - offset;
 		if (size > len)
 			size = len;
 
-		real_block = block;
-		block_present = TRUE;
-
-		if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-			real_block = bmap(lo->lo_dentry->d_inode, block);
-			if (!real_block) {
-
-				/* The backing file is a sparse file and this block
-				   doesn't exist.  If reading, return zeros.  If
-				   writing, force the underlying FS to create
-				   the block */
-				if (current_request->cmd == READ) {
-					memset(dest_addr, 0, size);
-					block_present = FALSE;
-				} else {
-					if (!create_missing_block(lo, block, blksize)) {
-						goto error_out_lock;
-					}
-					real_block = bmap(lo->lo_dentry->d_inode, block);
-				}
-
-			}
+		bh = getblk(lo->lo_device, block, blksize);
+		if (!bh) {
+			printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL",
+				kdevname(lo->lo_device),
+				block, blksize);
+			goto error_out_lock;
 		}
-
-		if (block_present) {
-			bh = getblk(lo->lo_device, real_block, blksize);
-			if (!bh) {
-				printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL",
-					kdevname(lo->lo_device),
-					block, blksize);
-				goto error_out_lock;
-			}
-			if (!buffer_uptodate(bh) && ((current_request->cmd == READ) ||
-						(offset || (len < blksize)))) {
-				ll_rw_block(READ, 1, &bh);
-				wait_on_buffer(bh);
-				if (!buffer_uptodate(bh)) {
-					brelse(bh);
-					goto error_out_lock;
-				}
-			}
-
-			if ((lo->transfer)(lo, current_request->cmd, bh->b_data + offset,
-					dest_addr, size, real_block)) {
-				printk(KERN_ERR "loop: transfer error block %d\n", block);
+		if (!buffer_uptodate(bh) && ((current_request->cmd == READ) ||
+					(offset || (len < blksize)))) {
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh)) {
 				brelse(bh);
 				goto error_out_lock;
 			}
+		}
 
-			if (current_request->cmd == WRITE) {
-				mark_buffer_uptodate(bh, 1);
-				mark_buffer_dirty(bh, 1);
-			}
+		if ((lo->transfer)(lo, current_request->cmd, bh->b_data + offset,
+				dest_addr, size, block)) {
+			printk(KERN_ERR "loop: transfer error block %d\n", block);
 			brelse(bh);
+			goto error_out_lock;
+		}
+
+		if (current_request->cmd == WRITE) {
+			mark_buffer_uptodate(bh, 1);
+			mark_buffer_dirty(bh, 1);
 		}
+		brelse(bh);
 		dest_addr += size;
 		len -= size;
 		offset = 0;
 		block++;
 	}
+	goto done;
+
+file_backed:
+	pos = ((loff_t)current_request->sector << 9) + lo->lo_offset;
+	spin_unlock_irq(&io_request_lock);
+	if (current_request->cmd == WRITE) {
+		if (lo_send(lo, dest_addr, len, pos, blksize))
+			goto error_out_lock;
+	} else {
+		if (lo_receive(lo, dest_addr, len, pos, blksize))
+			goto error_out_lock;
+	}
+done:
 	spin_lock_irq(&io_request_lock);
-	current_request->next=CURRENT;
-	CURRENT=current_request;
+	current_request->sector += current_request->current_nr_sectors;
+	current_request->nr_sectors -= current_request->current_nr_sectors;
+	list_add(&current_request->queue, &q->queue_head);
 	end_request(1);
 	goto repeat;
 error_out_lock:
 	spin_lock_irq(&io_request_lock);
 error_out:
-	current_request->next=CURRENT;
-	CURRENT=current_request;
+	list_add(&current_request->queue, &q->queue_head);
 	end_request(0);
 	goto repeat;
 }
 
-static int create_missing_block(struct loop_device *lo, int block, int blksize)
-{
-	struct file     *file;
-	loff_t          new_offset;
-	char            zero_buf[1] = { 0 };
-	ssize_t         retval;
-	mm_segment_t	old_fs;
-	struct inode	*inode;
-
-	file = lo->lo_backing_file;
-	if (file == NULL) {
-		printk(KERN_WARNING "loop: cannot create block - no backing file\n");
-		return FALSE;
-	}
-
-	if (file->f_op == NULL) {
-		printk(KERN_WARNING "loop: cannot create block - no file ops\n");
-		return FALSE;
-	}
-
-	new_offset = block * blksize;
-
-	if (file->f_op->llseek != NULL) {
-		file->f_op->llseek(file, new_offset, 0);
-	} else {
-		/* Do what the default llseek() code would have done */
-		file->f_pos = new_offset;
-		file->f_reada = 0;
-		file->f_version = ++event;
-	}
-
-	if (file->f_op->write == NULL) {
-		printk(KERN_WARNING "loop: cannot create block - file not writeable\n");
-		return FALSE;
-	}
-
-	old_fs = get_fs();
-	set_fs(get_ds());
-
-	inode = file->f_dentry->d_inode;
-	down(&inode->i_sem); 
-	retval = file->f_op->write(file, zero_buf, 1, &file->f_pos);
-	up(&inode->i_sem);
-	
-	set_fs(old_fs);
-
-	if (retval < 0) {
-		printk(KERN_WARNING "loop: cannot create block - FS write failed: code %Zi\n",
-			retval);
-		return FALSE;
-	} else {
-		return TRUE;
-	}
-}
-
 static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
 {
 	struct file	*file;
@@ -386,22 +424,13 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
 		   a file structure */
 		lo->lo_backing_file = NULL;
 	} else if (S_ISREG(inode->i_mode)) {
-		/*
-		 * Total crap. We should just use pagecache instead of trying
-		 * to redirect on block level.
-		 */
-		if (!inode->i_mapping->a_ops->bmap) {
-			printk(KERN_ERR "loop: device has no block access/not implemented\n");
-			goto out_putf;
-		}
-
-		/* Backed by a regular file - we need to hold onto
-		   a file structure for this file.  We'll use it to
-		   write to blocks that are not already present in 
-		   a sparse file.  We create a new file structure
-		   based on the one passed to us via 'arg'.  This is
-		   to avoid changing the file structure that the
-		   caller is using */
+		struct address_space_operations *aops;
+		/* Backed by a regular file - we need to hold onto a file
+		   structure for this file.  Friggin' NFS can't live without
+		   it on write and for reading we use do_generic_file_read(),
+		   so...  We create a new file structure based on the one
+		   passed to us via 'arg'.  This is to avoid changing the file
+		   structure that the caller is using */
 
 		lo->lo_device = inode->i_dev;
 		lo->lo_flags = LO_FLAGS_DO_BMAP;
@@ -424,17 +453,23 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
 				lo->lo_backing_file = NULL;
 			}
 		}
+		aops = inode->i_mapping->a_ops;
+		/*
+		 * If we can't read - sorry. If we only can't write - well,
+		 * it's going to be read-only.
+		 */
+		if (!aops->readpage)
+			error = -EINVAL;
+		else if (!aops->prepare_write || !aops->commit_write)
+			lo->lo_flags |= LO_FLAGS_READ_ONLY;
 	}
 	if (error)
 		goto out_putf;
 
-	if (IS_RDONLY (inode) || is_read_only(lo->lo_device)) {
+	if (IS_RDONLY (inode) || is_read_only(lo->lo_device))
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
-		set_device_ro(dev, 1);
-	} else {
-		vmtruncate (inode, 0);
-		set_device_ro(dev, 0);
-	}
+
+	set_device_ro(dev, (lo->lo_flags & LO_FLAGS_READ_ONLY)!=0);
 
 	lo->lo_dentry = dget(file->f_dentry);
 	lo->transfer = NULL;
@@ -719,11 +754,16 @@ int __init loop_init(void)
 {
 	int	i;
 
-	if (register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
+	if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
 		printk(KERN_WARNING "Unable to get major number %d for loop device\n",
 		       MAJOR_NR);
 		return -EIO;
 	}
+	devfs_handle = devfs_mk_dir (NULL, "loop", 0, NULL);
+	devfs_register_series (devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
+			       MAJOR_NR, 0,
+			       S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, 0, 0,
+			       &lo_fops, NULL);
 
 	if ((max_loop < 1) || (max_loop > 255)) {
 		printk (KERN_WARNING "loop: invalid max_loop (must be between 1 and 255), using default (8)\n");
@@ -755,6 +795,7 @@ int __init loop_init(void)
 	}		
 
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
 	for (i=0; i < max_loop; i++) {
 		memset(&loop_dev[i], 0, sizeof(struct loop_device));
 		loop_dev[i].lo_number = i;
@@ -772,7 +813,8 @@ int __init loop_init(void)
 #ifdef MODULE
 void cleanup_module(void) 
 {
-	if (unregister_blkdev(MAJOR_NR, "loop") != 0)
+	devfs_unregister (devfs_handle);
+	if (devfs_unregister_blkdev(MAJOR_NR, "loop") != 0)
 		printk(KERN_WARNING "loop: cannot unregister blkdev\n");
 
 	kfree (loop_dev);
diff --git a/drivers/block/lvm-snap.c b/drivers/block/lvm-snap.c
new file mode 100644
index 000000000..77500cc43
--- /dev/null
+++ b/drivers/block/lvm-snap.c
@@ -0,0 +1,434 @@
+/*
+ * kernel/lvm-snap.c
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ * LVM snapshot driver is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * LVM driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/smp_lock.h>
+#include <linux/types.h>
+#include <linux/iobuf.h>
+#include <linux/lvm.h>
+
+
+static char *lvm_snap_version = "LVM 0.8final (15/02/2000)\n";
+
+extern const char *const lvm_name;
+extern int lvm_blocksizes[];
+
+void lvm_snapshot_release(lv_t *);
+
+#define hashfn(dev,block,mask,chunk_size) \
+	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
+
+static inline lv_block_exception_t *
+lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * ret;
+	int i = 0;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	ret = NULL;
+	for (next = hash_table->next; next != hash_table; next = next->next)
+	{
+		lv_block_exception_t * exception;
+
+		exception = list_entry(next, lv_block_exception_t, hash);
+		if (exception->rsector_org == org_start &&
+		    exception->rdev_org == org_dev)
+		{
+			if (i)
+			{
+				/* fun, isn't it? :) */
+				list_del(next);
+				list_add(next, hash_table);
+			}
+			ret = exception;
+			break;
+		}
+		i++;
+	}
+	return ret;
+}
+
+static inline void lvm_hash_link(lv_block_exception_t * exception,
+				 kdev_t org_dev, unsigned long org_start,
+				 lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	list_add(&exception->hash, hash_table);
+}
+
+int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
+			     unsigned long pe_start, lv_t * lv)
+{
+	int ret;
+	unsigned long pe_off, pe_adjustment, __org_start;
+	kdev_t __org_dev;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * exception;
+
+	pe_off = pe_start % chunk_size;
+	pe_adjustment = (*org_sector-pe_off) % chunk_size;
+	__org_start = *org_sector - pe_adjustment;
+	__org_dev = *org_dev;
+
+	ret = 0;
+	exception = lvm_find_exception_table(__org_dev, __org_start, lv);
+	if (exception)
+	{
+		*org_dev = exception->rdev_new;
+		*org_sector = exception->rsector_new + pe_adjustment;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
+{
+	kdev_t last_dev;
+	int i;
+
+	/* no exception storage space available for this snapshot
+	   or error on this snapshot --> release it */
+	invalidate_buffers(lv_snap->lv_dev);
+
+	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
+		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
+			last_dev = lv_snap->lv_block_exception[i].rdev_new;
+			invalidate_buffers(last_dev);
+		}
+	}
+
+	lvm_snapshot_release(lv_snap);
+
+	printk(KERN_INFO
+	       "%s -- giving up to snapshot %s on %s due %s\n",
+	       lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
+	       reason);
+}
+
+static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
+					       unsigned long start,
+					       int nr_sectors,
+					       int blocksize)
+{
+	int i, sectors_per_block, nr_blocks;
+
+	sectors_per_block = blocksize >> 9;
+	nr_blocks = nr_sectors / sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr_blocks; i++)
+		blocks[i] = start++;
+}
+
+static inline int get_blksize(kdev_t dev)
+{
+	int correct_size = BLOCK_SIZE, i, major;
+
+	major = MAJOR(dev);
+	if (blksize_size[major])
+	{
+		i = blksize_size[major][MINOR(dev)];
+		if (i)
+			correct_size = i;
+	}
+	return correct_size;
+}
+
+#ifdef DEBUG_SNAPSHOT
+static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
+					 kdev_t dev)
+{
+	struct buffer_head * bh;
+	int sectors_per_block, i, blksize, minor;
+
+	minor = MINOR(dev);
+	blksize = lvm_blocksizes[minor];
+	sectors_per_block = blksize >> 9;
+	nr /= sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr; i++)
+	{
+		bh = get_hash_table(dev, start++, blksize);
+		if (bh)
+			bforget(bh);
+	}
+}
+#endif
+
+/*
+ * copy on write handler for one snapshot logical volume
+ *
+ * read the original blocks and store it/them on the new one(s).
+ * if there is no exception storage space free any longer --> release snapshot.
+ *
+ * this routine gets called for each _first_ write to a physical chunk.
+ */
+int lvm_snapshot_COW(kdev_t org_phys_dev,
+		     unsigned long org_phys_sector,
+		     unsigned long org_pe_start,
+		     unsigned long org_virt_sector,
+		     lv_t * lv_snap)
+{
+	const char * reason;
+	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
+	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
+	struct kiobuf * iobuf;
+	unsigned long blocks[KIO_MAX_SECTORS];
+	int blksize_snap, blksize_org, min_blksize, max_blksize;
+	int max_sectors, nr_sectors;
+
+	/* check if we are out of snapshot space */
+	if (idx >= lv_snap->lv_remap_end)
+		goto fail_out_of_space;
+
+	/* calculate physical boundaries of source chunk */
+	pe_off = org_pe_start % chunk_size;
+	org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size);
+	virt_start = org_virt_sector - (org_phys_sector - org_start);
+
+	/* calculate physical boundaries of destination chunk */
+	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+	snap_start = lv_snap->lv_block_exception[idx].rsector_new;
+
+#ifdef DEBUG_SNAPSHOT
+	printk(KERN_INFO
+	       "%s -- COW: "
+	       "org %02d:%02d faulting %lu start %lu, "
+	       "snap %02d:%02d start %lu, "
+	       "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
+	       lvm_name,
+	       MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
+	       org_start,
+	       MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
+	       chunk_size,
+	       org_pe_start, pe_off,
+	       org_virt_sector);
+#endif
+
+	iobuf = lv_snap->lv_iobuf;
+
+	blksize_org = get_blksize(org_phys_dev);
+	blksize_snap = get_blksize(snap_phys_dev);
+	max_blksize = max(blksize_org, blksize_snap);
+	min_blksize = min(blksize_org, blksize_snap);
+	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
+
+	if (chunk_size % (max_blksize>>9))
+		goto fail_blksize;
+
+	while (chunk_size)
+	{
+		nr_sectors = min(chunk_size, max_sectors);
+		chunk_size -= nr_sectors;
+
+		iobuf->length = nr_sectors << 9;
+
+		lvm_snapshot_prepare_blocks(blocks, org_start,
+					    nr_sectors, blksize_org);
+		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
+			       blocks, blksize_org) != (nr_sectors<<9))
+			goto fail_raw_read;
+
+		lvm_snapshot_prepare_blocks(blocks, snap_start,
+					    nr_sectors, blksize_snap);
+		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+			       blocks, blksize_snap) != (nr_sectors<<9))
+			goto fail_raw_write;
+	}
+
+#ifdef DEBUG_SNAPSHOT
+	/* invalidate the logcial snapshot buffer cache */
+	invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
+			      lv_snap->lv_dev);
+#endif
+
+	/* the original chunk is now stored on the snapshot volume
+	   so update the execption table */
+	lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
+	lv_snap->lv_block_exception[idx].rsector_org = org_start;
+	lvm_hash_link(lv_snap->lv_block_exception + idx,
+		      org_phys_dev, org_start, lv_snap);
+	lv_snap->lv_remap_ptr = idx + 1;
+	return 0;
+
+	/* slow path */
+ out:
+	lvm_drop_snapshot(lv_snap, reason);
+	return 1;
+
+ fail_out_of_space:
+	reason = "out of space";
+	goto out;
+ fail_raw_read:
+	reason = "read error";
+	goto out;
+ fail_raw_write:
+	reason = "write error";
+	goto out;
+ fail_blksize:
+	reason = "blocksize error";
+	goto out;
+}
+
+static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
+{
+	int bytes, nr_pages, err, i;
+
+	bytes = sectors << 9;
+	nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
+	err = expand_kiobuf(iobuf, nr_pages);
+	if (err)
+		goto out;
+
+	err = -ENOMEM;
+	iobuf->locked = 1;
+	iobuf->nr_pages = 0;
+	for (i = 0; i < nr_pages; i++)
+	{
+		struct page * page;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto out;
+#else
+		{
+			unsigned long addr = __get_free_page(GFP_USER);
+			if (!addr)
+				goto out;
+			iobuf->pagelist[i] = addr;
+			page = mem_map + MAP_NR(addr);
+		}
+#endif
+
+		iobuf->maplist[i] = page;
+		/* the only point to lock the page here is to be allowed
+		   to share unmap_kiobuf() in the fail-path */
+#ifndef LockPage
+#define LockPage(map) set_bit(PG_locked, &(map)->flags)
+#endif
+		LockPage(page);
+		iobuf->nr_pages++;
+	}
+	iobuf->offset = 0;
+
+	err = 0;
+ out:
+	return err;
+}
+
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 100;
+	mem *= 2;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+static int lvm_snapshot_alloc_hash_table(lv_t * lv)
+{
+	int err;
+	unsigned long buckets, max_buckets, size;
+	struct list_head * hash;
+
+	buckets = lv->lv_remap_end;
+	max_buckets = calc_max_buckets();
+	buckets = min(buckets, max_buckets);
+	while (buckets & (buckets-1))
+		buckets &= (buckets-1);
+
+	size = buckets * sizeof(struct list_head);
+
+	err = -ENOMEM;
+	hash = vmalloc(size);
+	lv->lv_snapshot_hash_table = hash;
+
+	if (!hash)
+		goto out;
+
+	lv->lv_snapshot_hash_mask = buckets-1;
+	while (buckets--)
+		INIT_LIST_HEAD(hash+buckets);
+	err = 0;
+ out:
+	return err;
+}
+
+int lvm_snapshot_alloc(lv_t * lv_snap)
+{
+	int err, blocksize, max_sectors;
+
+	err = alloc_kiovec(1, &lv_snap->lv_iobuf);
+	if (err)
+		goto out;
+
+	blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
+	max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
+
+	err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
+	if (err)
+		goto out_free_kiovec;
+
+	err = lvm_snapshot_alloc_hash_table(lv_snap);
+	if (err)
+		goto out_free_kiovec;
+ out:
+	return err;
+
+ out_free_kiovec:
+	unmap_kiobuf(lv_snap->lv_iobuf);
+	free_kiovec(1, &lv_snap->lv_iobuf);
+	goto out;
+}
+
+void lvm_snapshot_release(lv_t * lv)
+{
+	if (lv->lv_block_exception)
+	{
+		vfree(lv->lv_block_exception);
+		lv->lv_block_exception = NULL;
+	}
+	if (lv->lv_snapshot_hash_table)
+	{
+		vfree(lv->lv_snapshot_hash_table);
+		lv->lv_snapshot_hash_table = NULL;
+	}
+	if (lv->lv_iobuf)
+	{
+		free_kiovec(1, &lv->lv_iobuf);
+		lv->lv_iobuf = NULL;
+	}
+}
diff --git a/drivers/block/lvm.c b/drivers/block/lvm.c
new file mode 100644
index 000000000..6d2f2743e
--- /dev/null
+++ b/drivers/block/lvm.c
@@ -0,0 +1,2556 @@
+/*
+ * kernel/lvm.c
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * April-May,July-August,November 1998
+ * January-March,May,July,September,October 1999
+ * January,February 2000
+ *
+ *
+ * LVM driver is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * LVM driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
+ *                 and VG_STATUS_GET_NAMELIST
+ *    18/01/1998 - change lvm_chr_open/close lock handling
+ *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
+ *               - added   LV_STATUS_BYINDEX ioctl
+ *               - used lvm_status_byname_req_t and
+ *                      lvm_status_byindex_req_t vars
+ *    04/05/1998 - added multiple device support
+ *    08/05/1998 - added support to set/clear extendable flag in volume group
+ *    09/05/1998 - changed output of lvm_proc_get_info() because of
+ *                 support for free (eg. longer) logical volume names
+ *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
+ *                 <pascal@ramoth.xs4all.nl>)
+ *    25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
+ *    26/05/1998 - reactivated verify_area by access_ok
+ *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
+ *                 beyond 128/256 KB max allocation limit per call
+ *               - #ifdef blocked spin_lock calls to avoid compile errors
+ *                 with 2.0.x
+ *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
+ *                 and use of LVM_VERSION_CODE instead of my own macros
+ *                 (thanks to  Michael Marxmeier <mike@msede.com>)
+ *    07/07/1998 - added statistics in lvm_map()
+ *    08/07/1998 - saved statistics in lvm_do_lv_extend_reduce()
+ *    25/07/1998 - used __initfunc macro
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
+ *                 to sum of LVs open (no matter how often each is)
+ *    01/09/1998 - fixed lvm_gendisk.part[] index error
+ *    07/09/1998 - added copying of lv_current_pe-array
+ *                 in LV_STATUS_BYINDEX ioctl
+ *    17/11/1998 - added KERN_* levels to printk
+ *    13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename
+ *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
+ *                 by moving spinlock code from lvm_chr_open()
+ *                 to lvm_chr_ioctl()
+ *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
+ *               - allowed LVM_RESET and retrieval commands to go ahead;
+ *                 only other update ioctls are blocked now
+ *               - fixed pv->pe to NULL for pv_status
+ *               - using lv_req structure in lvm_chr_ioctl() now
+ *               - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
+ *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
+ *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
+ *                 handle lgoical volume private read ahead sector
+ *               - implemented LV read_ahead handling with lvm_blk_read()
+ *                 and lvm_blk_write()
+ *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
+ *                 to be used in drivers/block/genhd.c by disk_name()
+ *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
+ *               - enhanced gendisk insert/remove handling
+ *    16/02/1999 - changed to dynamic block minor number allocation to
+ *                 have as much as 99 volume groups with 256 logical volumes
+ *                 as the grand total; this allows having 1 volume group with
+ *                 up to 256 logical volumes in it
+ *    21/02/1999 - added LV open count information to proc filesystem
+ *               - substituted redundant LVM_RESET code by calls
+ *                 to lvm_do_vg_remove()
+ *    22/02/1999 - used schedule_timeout() to be more responsive
+ *                 in case of lvm_do_vg_remove() with lots of logical volumes
+ *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
+ *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
+ *               - enhanced lvm_hd_name support
+ *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
+ *                 memcpy_tofs/memcpy_fromfs macro redefinitions
+ *    06/07/1999 - corrected reads/writes statistic counter copy in case
+ *                 of striped logical volume
+ *    28/07/1999 - implemented snapshot logical volumes
+ *                 - lvm_chr_ioctl
+ *                   - LV_STATUS_BYINDEX
+ *                   - LV_STATUS_BYNAME
+ *                 - lvm_do_lv_create
+ *                 - lvm_do_lv_remove
+ *                 - lvm_map
+ *                 - new lvm_snapshot_remap_block
+ *                 - new lvm_snapshot_remap_new_block
+ *    08/10/1999 - implemented support for multiple snapshots per
+ *                 original logical volume
+ *    12/10/1999 - support for 2.3.19
+ *    11/11/1999 - support for 2.3.28
+ *    21/11/1999 - changed lvm_map() interface to buffer_head based
+ *    19/12/1999 - support for 2.3.33
+ *    01/01/2000 - changed locking concept in lvm_map(),
+ *                 lvm_do_vg_create() and lvm_do_lv_remove()
+ *    15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl()
+ *    24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc.
+ *    29/01/2000 - used kmalloc/kfree again for all small structures
+ *    20/01/2000 - cleaned up lvm_chr_ioctl by moving code
+ *                 to seperated functions
+ *               - avoided "/dev/" in proc filesystem output
+ *               - avoided inline strings functions lvm_strlen etc.
+ *    14/02/2000 - support for 2.3.43
+ *               - integrated Andrea Arcagnelli's snapshot code
+ *
+ */
+
+
+static char *lvm_version = "LVM version 0.8final  by Heinz Mauelshagen  (15/02/2000)\n";
+static char *lvm_short_version = "version 0.8final  (15/02/2000)";
+
+#define MAJOR_NR	LVM_BLK_MAJOR
+#define	DEVICE_OFF(device)
+
+#include <linux/config.h>
+#include <linux/version.h>
+
+#ifdef MODVERSIONS
+#undef MODULE
+#define MODULE
+#include <linux/modversions.h>
+#endif
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include <linux/hdreg.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <asm/ioctl.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif
+
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+
+#include <linux/errno.h>
+#include <linux/lvm.h>
+
+#define	LVM_CORRECT_READ_AHEAD( a) \
+   if      ( a < LVM_MIN_READ_AHEAD || \
+             a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD;
+
+#ifndef WRITEA
+#  define WRITEA WRITE
+#endif
+
+/*
+ * External function prototypes
+ */
+#ifdef MODULE
+int init_module(void);
+void cleanup_module(void);
+#else
+extern int lvm_init(void);
+#endif
+
+static void lvm_dummy_device_request(request_queue_t *);
+#define	DEVICE_REQUEST	lvm_dummy_device_request
+
+static void lvm_make_request_fn(int, struct buffer_head*);
+
+static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
+static int lvm_blk_open(struct inode *, struct file *);
+
+static ssize_t lvm_blk_read(struct file *, char *, size_t, loff_t *);
+static ssize_t lvm_blk_write(struct file *, const char *, size_t, loff_t *);
+
+static int lvm_chr_open(struct inode *, struct file *);
+
+static int lvm_chr_close(struct inode *, struct file *);
+static int lvm_blk_close(struct inode *, struct file *);
+
+static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+static int lvm_proc_get_info(char *, char **, off_t, int);
+static int (*lvm_proc_get_info_ptr) (char *, char **, off_t, int) =
+&lvm_proc_get_info;
+#endif
+
+#ifdef LVM_HD_NAME
+void lvm_hd_name(char *, int);
+#endif
+/* End external function prototypes */
+
+
+/*
+ * Internal function prototypes
+ */
+static void lvm_init_vars(void);
+
+/* external snapshot calls */
+int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
+int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
+int lvm_snapshot_alloc(lv_t *);
+void lvm_snapshot_release(lv_t *); 
+
+#ifdef LVM_HD_NAME
+extern void (*lvm_hd_name_ptr) (char *, int);
+#endif
+static int lvm_map(struct buffer_head *, int);
+static int lvm_do_lock_lvm(void);
+static int lvm_do_le_remap(vg_t *, void *);
+static int lvm_do_pe_lock_unlock(vg_t *r, void *);
+static int lvm_do_vg_create(int, void *);
+static int lvm_do_vg_extend(vg_t *, void *);
+static int lvm_do_vg_reduce(vg_t *, void *);
+static int lvm_do_vg_remove(int);
+static int lvm_do_lv_create(int, char *, lv_t *);
+static int lvm_do_lv_remove(int, char *, int);
+static int lvm_do_lv_extend_reduce(int, char *, lv_t *);
+static int lvm_do_lv_status_byname(vg_t *r, void *);
+static int lvm_do_lv_status_byindex(vg_t *, void *arg);
+static int lvm_do_pv_change(vg_t*, void*);
+static int lvm_do_pv_status(vg_t *, void *);
+static void lvm_geninit(struct gendisk *);
+#ifdef LVM_GET_INODE
+static struct inode *lvm_get_inode(int);
+void lvm_clear_inode(struct inode *);
+#endif
+/* END Internal function prototypes */
+
+
+/* volume group descriptor area pointers */
+static vg_t *vg[ABS_MAX_VG];
+static pv_t *pvp = NULL;
+static lv_t *lvp = NULL;
+static pe_t *pep = NULL;
+static pe_t *pep1 = NULL;
+
+
+/* map from block minor number to VG and LV numbers */
+typedef struct {
+	int vg_number;
+	int lv_number;
+} vg_lv_map_t;
+static vg_lv_map_t vg_lv_map[ABS_MAX_LV];
+
+
+/* Request structures (lvm_chr_ioctl()) */
+static pv_change_req_t pv_change_req;
+static pv_flush_req_t pv_flush_req;
+static pv_status_req_t pv_status_req;
+static pe_lock_req_t pe_lock_req;
+static le_remap_req_t le_remap_req;
+static lv_req_t lv_req;
+
+#ifdef LVM_TOTAL_RESET
+static int lvm_reset_spindown = 0;
+#endif
+
+static char pv_name[NAME_LEN];
+/* static char rootvg[NAME_LEN] = { 0, }; */
+static uint lv_open = 0;
+static const char *const lvm_name = LVM_NAME;
+static int lock = 0;
+static int loadtime = 0;
+static uint vg_count = 0;
+static long lvm_chr_open_count = 0;
+static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
+static DECLARE_WAIT_QUEUE_HEAD(lvm_snapshot_wait);
+static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
+static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait);
+
+static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
+
+static struct file_operations lvm_chr_fops =
+{
+	open:		lvm_chr_open,
+	release:	lvm_chr_close,
+	ioctl:		lvm_chr_ioctl,
+};
+
+static struct file_operations lvm_blk_fops =
+{
+        open:           lvm_blk_open,
+        release:        blkdev_close,
+        read:           lvm_blk_read,
+        write:          lvm_blk_write,
+        ioctl:          lvm_blk_ioctl,
+        fsync:          block_fsync,
+};
+
+#define BLOCK_DEVICE_OPERATIONS
+/* block device operations structure needed for 2.3.38? and above */
+static struct block_device_operations lvm_blk_dops =
+{
+	open: 		lvm_blk_open,
+	release:	lvm_blk_close,
+	ioctl:		lvm_blk_ioctl
+};
+
+/* gendisk structures */
+static struct hd_struct lvm_hd_struct[MAX_LV];
+static int lvm_blocksizes[MAX_LV] =
+{0,};
+static int lvm_size[MAX_LV] =
+{0,};
+static struct gendisk lvm_gendisk =
+{
+	MAJOR_NR,		/* major # */
+	LVM_NAME,		/* name of major */
+	0,			/* number of times minor is shifted
+				   to get real minor */
+	1,			/* maximum partitions per device */
+	lvm_hd_struct,		/* partition table */
+	lvm_size,		/* device size in blocks, copied
+				   to block_size[] */
+	MAX_LV,			/* number or real devices */
+	NULL,			/* internal */
+	NULL,			/* pointer to next gendisk struct (internal) */
+};
+
+
+#ifdef MODULE
+/*
+ * Module initialization...
+ */
+int init_module(void)
+#else
+/*
+ * Driver initialization...
+ */
+#ifdef __initfunc
+__initfunc(int lvm_init(void))
+#else
+int __init lvm_init(void)
+#endif
+#endif				/* #ifdef MODULE */
+{
+	struct gendisk *gendisk_ptr = NULL;
+
+	if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
+		printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
+		return -EIO;
+	}
+#ifdef BLOCK_DEVICE_OPERATIONS
+	if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
+#else
+	if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_fops) < 0)
+#endif
+	{
+		printk("%s -- register_blkdev failed\n", lvm_name);
+		if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
+			printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+		return -EIO;
+	}
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+	create_proc_info_entry(LVM_NAME, S_IFREG | S_IRUGO,
+			       &proc_root, lvm_proc_get_info_ptr);
+#endif
+
+	lvm_init_vars();
+	lvm_geninit(&lvm_gendisk);
+
+	/* insert our gendisk at the corresponding major */
+	if (gendisk_head != NULL) {
+		gendisk_ptr = gendisk_head;
+		while (gendisk_ptr->next != NULL &&
+		       gendisk_ptr->major > lvm_gendisk.major) {
+			gendisk_ptr = gendisk_ptr->next;
+		}
+		lvm_gendisk.next = gendisk_ptr->next;
+		gendisk_ptr->next = &lvm_gendisk;
+	} else {
+		gendisk_head = &lvm_gendisk;
+		lvm_gendisk.next = NULL;
+	}
+
+#ifdef LVM_HD_NAME
+	/* reference from drivers/block/genhd.c */
+	lvm_hd_name_ptr = lvm_hd_name;
+#endif
+
+	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
+	/* optional read root VGDA */
+/*
+   if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
+*/
+
+	printk(KERN_INFO
+	       "%s%s -- "
+#ifdef MODULE
+	       "Module"
+#else
+	       "Driver"
+#endif
+	       " successfully initialized\n",
+	       lvm_version, lvm_name);
+
+	return 0;
+} /* init_module() / lvm_init() */
+
+
+#ifdef MODULE
+/*
+ * Module cleanup...
+ */
+void cleanup_module(void)
+{
+	struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
+
+	if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
+		printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+	}
+	if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) {
+		printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
+	}
+	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+
+	gendisk_ptr = gendisk_ptr_prev = gendisk_head;
+	while (gendisk_ptr != NULL) {
+		if (gendisk_ptr == &lvm_gendisk)
+			break;
+		gendisk_ptr_prev = gendisk_ptr;
+		gendisk_ptr = gendisk_ptr->next;
+	}
+	/* delete our gendisk from chain */
+	if (gendisk_ptr == &lvm_gendisk)
+		gendisk_ptr_prev->next = gendisk_ptr->next;
+
+	blk_size[MAJOR_NR] = NULL;
+	blksize_size[MAJOR_NR] = NULL;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+	remove_proc_entry(LVM_NAME, &proc_root);
+#endif
+
+#ifdef LVM_HD_NAME
+	/* reference from linux/drivers/block/genhd.c */
+	lvm_hd_name_ptr = NULL;
+#endif
+
+	printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
+
+	return;
+}	/* void cleanup_module() */
+#endif	/* #ifdef MODULE */
+
+
+/*
+ * support function to initialize lvm variables
+ */
+#ifdef __initfunc
+__initfunc(void lvm_init_vars(void))
+#else
+void __init lvm_init_vars(void)
+#endif
+{
+	int v;
+
+	loadtime = CURRENT_TIME;
+
+	lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
+
+	pe_lock_req.lock = UNLOCK_PE;
+	pe_lock_req.data.lv_dev = \
+	    pe_lock_req.data.pv_dev = \
+	    pe_lock_req.data.pv_offset = 0;
+
+	/* Initialize VG pointers */
+	for (v = 0; v < ABS_MAX_VG; v++) vg[v] = NULL;
+
+	/* Initialize LV -> VG association */
+	for (v = 0; v < ABS_MAX_LV; v++) {
+		/* index ABS_MAX_VG never used for real VG */
+		vg_lv_map[v].vg_number = ABS_MAX_VG;
+		vg_lv_map[v].lv_number = -1;
+	}
+
+	return;
+} /* lvm_init_vars() */
+
+
+/********************************************************************
+ *
+ * Character device functions
+ *
+ ********************************************************************/
+
+/*
+ * character device open routine
+ */
+static int lvm_chr_open(struct inode *inode,
+			struct file *file)
+{
+	int minor = MINOR(inode->i_rdev);
+
+#ifdef DEBUG
+	printk(KERN_DEBUG
+	 "%s -- lvm_chr_open MINOR: %d  VG#: %d  mode: 0x%X  lock: %d\n",
+	       lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
+#endif
+
+	/* super user validation */
+	if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+	/* Group special file open */
+	if (VG_CHR(minor) > MAX_VG) return -ENXIO;
+
+	MOD_INC_USE_COUNT;
+
+	lvm_chr_open_count++;
+	return 0;
+} /* lvm_chr_open() */
+
+
+/*
+ * character device i/o-control routine
+ *
+ * Only one changing process can do changing ioctl at one time,
+ * others will block.
+ *
+ */
+static int lvm_chr_ioctl(struct inode *inode, struct file *file,
+			 uint command, ulong a)
+{
+	int minor = MINOR(inode->i_rdev);
+	uint extendable, l, v;
+	void *arg = (void *) a;
+	lv_t lv;
+	vg_t* vg_ptr = vg[VG_CHR(minor)];
+
+	/* otherwise cc will complain about unused variables */
+	(void) lvm_lock;
+
+
+#ifdef DEBUG_IOCTL
+	printk(KERN_DEBUG
+	       "%s -- lvm_chr_ioctl: command: 0x%X  MINOR: %d  "
+	       "VG#: %d  mode: 0x%X\n",
+	       lvm_name, command, minor, VG_CHR(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+	if (lvm_reset_spindown > 0) return -EACCES;
+#endif
+
+	/* Main command switch */
+	switch (command) {
+	case LVM_LOCK_LVM:
+		/* lock the LVM */
+		return lvm_do_lock_lvm();
+
+	case LVM_GET_IOP_VERSION:
+		/* check lvm version to ensure driver/tools+lib
+		   interoperability */
+		if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0)
+			return -EFAULT;
+		return 0;
+
+#ifdef LVM_TOTAL_RESET
+	case LVM_RESET:
+		/* lock reset function */
+		lvm_reset_spindown = 1;
+		for (v = 0; v < ABS_MAX_VG; v++) {
+			if (vg[v] != NULL) lvm_do_vg_remove(v);
+		}
+
+#ifdef MODULE
+		while (GET_USE_COUNT(&__this_module) < 1)
+			MOD_INC_USE_COUNT;
+		while (GET_USE_COUNT(&__this_module) > 1)
+			MOD_DEC_USE_COUNT;
+#endif				/* MODULE */
+		lock = 0;	/* release lock */
+		wake_up_interruptible(&lvm_wait);
+		return 0;
+#endif /* LVM_TOTAL_RESET */
+
+
+	case LE_REMAP:
+		/* remap a logical extent (after moving the physical extent) */
+		return lvm_do_le_remap(vg_ptr,arg);
+
+	case PE_LOCK_UNLOCK:
+		/* lock/unlock i/o to a physical extent to move it to another
+		   physical volume (move's done in user space's pvmove) */
+		return lvm_do_pe_lock_unlock(vg_ptr,arg);
+
+	case VG_CREATE:
+		/* create a VGDA */
+		return lvm_do_vg_create(minor, arg);
+
+	case VG_REMOVE:
+		/* remove an inactive VGDA */
+		return lvm_do_vg_remove(minor);
+
+	case VG_EXTEND:
+		/* extend a volume group */
+		return lvm_do_vg_extend(vg_ptr,arg);
+
+	case VG_REDUCE:
+		/* reduce a volume group */
+		return lvm_do_vg_reduce(vg_ptr,arg);
+
+
+	case VG_SET_EXTENDABLE:
+		/* set/clear extendability flag of volume group */
+		if (vg_ptr == NULL) return -ENXIO;
+		if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0)
+			return -EFAULT;
+
+		if (extendable == VG_EXTENDABLE ||
+		    extendable == ~VG_EXTENDABLE) {
+			if (extendable == VG_EXTENDABLE)
+				vg_ptr->vg_status |= VG_EXTENDABLE;
+			else
+				vg_ptr->vg_status &= ~VG_EXTENDABLE;
+		} else return -EINVAL;
+		return 0;
+
+
+	case VG_STATUS:
+		/* get volume group data (only the vg_t struct) */
+		if (vg_ptr == NULL) return -ENXIO;
+		if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0)
+			return -EFAULT;
+		return 0;
+
+
+	case VG_STATUS_GET_COUNT:
+		/* get volume group count */
+		if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
+			return -EFAULT;
+		return 0;
+
+
+	case VG_STATUS_GET_NAMELIST:
+		/* get volume group count */
+		for (l = v = 0; v < ABS_MAX_VG; v++) {
+			if (vg[v] != NULL) {
+				if (copy_to_user(arg + l++ * NAME_LEN,
+						 vg[v]->vg_name,
+						 NAME_LEN) != 0)
+					return -EFAULT;
+			}
+		}
+		return 0;
+
+
+	case LV_CREATE:
+	case LV_REMOVE:
+	case LV_EXTEND:
+	case LV_REDUCE:
+		/* create, remove, extend or reduce a logical volume */
+		if (vg_ptr == NULL) return -ENXIO;
+		if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
+			return -EFAULT;
+
+		if (command != LV_REMOVE) {
+			if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0)
+				return -EFAULT;
+		}
+		switch (command) {
+		case LV_CREATE:
+			return lvm_do_lv_create(minor, lv_req.lv_name, &lv);
+
+		case LV_REMOVE:
+			return lvm_do_lv_remove(minor, lv_req.lv_name, -1);
+
+		case LV_EXTEND:
+		case LV_REDUCE:
+			return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv);
+		}
+
+
+	case LV_STATUS_BYNAME:
+		/* get status of a logical volume by name */
+		return lvm_do_lv_status_byname(vg_ptr,arg);
+
+	case LV_STATUS_BYINDEX:
+		/* get status of a logical volume by index */
+		return lvm_do_lv_status_byindex(vg_ptr,arg);
+
+	case PV_CHANGE:
+		/* change a physical volume */
+		return lvm_do_pv_change(vg_ptr,arg);
+
+	case PV_STATUS:
+		/* get physical volume data (pv_t structure only) */
+		return lvm_do_pv_status(vg_ptr,arg);
+
+	case PV_FLUSH:
+		/* physical volume buffer flush/invalidate */
+		if (copy_from_user(&pv_flush_req, arg,
+				   sizeof(pv_flush_req)) != 0)
+			return -EFAULT;
+
+		fsync_dev(pv_flush_req.pv_dev);
+		invalidate_buffers(pv_flush_req.pv_dev);
+		return 0;
+
+	default:
+		printk(KERN_WARNING
+		       "%s -- lvm_chr_ioctl: unknown command %x\n",
+		       lvm_name, command);
+		return -EINVAL;
+	}
+
+	return 0;
+} /* lvm_chr_ioctl */
+
+
+/*
+ * character device close routine
+ */
+static int lvm_chr_close(struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+	int minor = MINOR(inode->i_rdev);
+	printk(KERN_DEBUG
+	     "%s -- lvm_chr_close   VG#: %d\n", lvm_name, VG_CHR(minor));
+#endif
+
+#ifdef LVM_TOTAL_RESET
+	if (lvm_reset_spindown > 0) {
+		lvm_reset_spindown = 0;
+		lvm_chr_open_count = 1;
+	}
+#endif
+
+	if (lvm_chr_open_count > 0) lvm_chr_open_count--;
+	if (lock == current->pid) {
+		lock = 0;	/* release lock */
+		wake_up_interruptible(&lvm_wait);
+	}
+
+#ifdef MODULE
+	if (GET_USE_COUNT(&__this_module) > 0) MOD_DEC_USE_COUNT;
+#endif
+
+	return 0;
+} /* lvm_chr_close() */
+
+
+
+/********************************************************************
+ *
+ * Block device functions
+ *
+ ********************************************************************/
+
+/*
+ * block device open routine
+ */
+static int lvm_blk_open(struct inode *inode, struct file *file)
+{
+	int minor = MINOR(inode->i_rdev);
+	lv_t *lv_ptr;
+	vg_t *vg_ptr = vg[VG_BLK(minor)];
+
+#ifdef DEBUG_LVM_BLK_OPEN
+	printk(KERN_DEBUG
+	  "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  mode: 0x%X\n",
+	    lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+	if (lvm_reset_spindown > 0)
+		return -EPERM;
+#endif
+
+	if (vg_ptr != NULL &&
+	    (vg_ptr->vg_status & VG_ACTIVE) &&
+	    (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL &&
+	    LV_BLK(minor) >= 0 &&
+	    LV_BLK(minor) < vg_ptr->lv_max) {
+
+		/* Check parallel LV spindown (LV remove) */
+		if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM;
+
+		/* Check inactive LV and open for read/write */
+		if (file->f_mode & O_RDWR) {
+			if (!(lv_ptr->lv_status & LV_ACTIVE)) return -EPERM;
+			if (!(lv_ptr->lv_access & LV_WRITE))  return -EACCES;
+		}
+
+#ifdef BLOCK_DEVICE_OPERATIONS
+		file->f_op = &lvm_blk_fops;
+#endif
+
+                /* be sure to increment VG counter */
+		if (lv_ptr->lv_open == 0) vg_ptr->lv_open++;
+		lv_ptr->lv_open++;
+
+		MOD_INC_USE_COUNT;
+
+#ifdef DEBUG_LVM_BLK_OPEN
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
+		       lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
+		       lv_ptr->lv_size);
+#endif
+
+		return 0;
+	}
+	return -ENXIO;
+} /* lvm_blk_open() */
+
+
+/*
+ * block device read
+ */
+static ssize_t lvm_blk_read(struct file *file, char *buffer,
+	      		    size_t size, loff_t * offset)
+{
+	int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+
+	read_ahead[MAJOR(file->f_dentry->d_inode->i_rdev)] =
+           vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead;
+	return block_read(file, buffer, size, offset);
+}
+
+
+/*
+ * block device write
+ */
+static ssize_t lvm_blk_write(struct file *file, const char *buffer,
+	      		     size_t size, loff_t * offset)
+{
+	int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+
+	read_ahead[MAJOR(file->f_dentry->d_inode->i_rdev)] =
+           vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead;
+	return block_write(file, buffer, size, offset);
+}
+
+
+/*
+ * block device i/o-control routine
+ */
+static int lvm_blk_ioctl(struct inode *inode, struct file *file,
+			 uint command, ulong a)
+{
+	int minor = MINOR(inode->i_rdev);
+	vg_t *vg_ptr = vg[VG_BLK(minor)];
+	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
+	void *arg = (void *) a;
+	struct hd_geometry *hd = (struct hd_geometry *) a;
+
+#ifdef DEBUG_IOCTL
+	printk(KERN_DEBUG
+	       "%s -- lvm_blk_ioctl MINOR: %d  command: 0x%X  arg: %X  "
+	       "VG#: %dl  LV#: %d\n",
+	       lvm_name, minor, command, (ulong) arg,
+	       VG_BLK(minor), LV_BLK(minor));
+#endif
+
+	switch (command) {
+	case BLKGETSIZE:
+		/* return device size */
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
+		       lvm_name, lv_ptr->lv_size);
+#endif
+		copy_to_user((long *) arg, &lv_ptr->lv_size,
+			     sizeof(lv_ptr->lv_size));
+		break;
+
+
+	case BLKFLSBUF:
+		/* flush buffer cache */
+		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
+#endif
+		fsync_dev(inode->i_rdev);
+		break;
+
+
+	case BLKRASET:
+		/* set read ahead for block device */
+		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
+		       lvm_name, (long) arg, MAJOR(inode->i_rdev), minor);
+#endif
+		if ((long) arg < LVM_MIN_READ_AHEAD ||
+		    (long) arg > LVM_MAX_READ_AHEAD)
+			return -EINVAL;
+		lv_ptr->lv_read_ahead = (long) arg;
+		break;
+
+
+	case BLKRAGET:
+		/* get current read ahead setting */
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
+#endif
+		copy_to_user((long *) arg, &lv_ptr->lv_read_ahead,
+			     sizeof(lv_ptr->lv_read_ahead));
+		break;
+
+
+	case HDIO_GETGEO:
+		/* get disk geometry */
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
+#endif
+		if (hd == NULL)
+			return -EINVAL;
+		{
+			unsigned char heads = 64;
+			unsigned char sectors = 32;
+			long start = 0;
+			short cylinders = lv_ptr->lv_size / heads / sectors;
+
+			if (copy_to_user((char *) &hd->heads, &heads,
+					 sizeof(heads)) != 0 ||
+			    copy_to_user((char *) &hd->sectors, &sectors,
+					 sizeof(sectors)) != 0 ||
+			    copy_to_user((short *) &hd->cylinders,
+				   &cylinders, sizeof(cylinders)) != 0 ||
+			    copy_to_user((long *) &hd->start, &start,
+					 sizeof(start)) != 0)
+				return -EFAULT;
+		}
+
+#ifdef DEBUG_IOCTL
+		printk(KERN_DEBUG
+		       "%s -- lvm_blk_ioctl -- cylinders: %d\n",
+		       lvm_name, lv_ptr->lv_size / heads / sectors);
+#endif
+		break;
+
+
+	case LV_SET_ACCESS:
+		/* set access flags of a logical volume */
+		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+		lv_ptr->lv_access = (ulong) arg;
+		break;
+
+
+	case LV_SET_STATUS:
+		/* set status flags of a logical volume */
+		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+		if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1)
+			return -EPERM;
+		lv_ptr->lv_status = (ulong) arg;
+		break;
+
+
+	case LV_SET_ALLOCATION:
+		/* set allocation flags of a logical volume */
+		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+		lv_ptr->lv_allocation = (ulong) arg;
+		break;
+
+
+	default:
+		printk(KERN_WARNING
+		       "%s -- lvm_blk_ioctl: unknown command %d\n",
+		       lvm_name, command);
+		return -EINVAL;
+	}
+
+	return 0;
+} /* lvm_blk_ioctl() */
+
+
+/*
+ * block device close routine
+ */
+static int lvm_blk_close(struct inode *inode, struct file *file)
+{
+	int minor = MINOR(inode->i_rdev);
+	vg_t *vg_ptr = vg[VG_BLK(minor)];
+	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
+
+#ifdef DEBUG
+	printk(KERN_DEBUG
+	       "%s -- lvm_blk_close MINOR: %d  VG#: %d  LV#: %d\n",
+	       lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
+#endif
+
+	sync_dev(inode->i_rdev);
+	if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
+	lv_ptr->lv_open--;
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+} /* lvm_blk_close() */
+
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+/*
+ * Support function /proc-Filesystem
+ */
+#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
+
+static int lvm_proc_get_info(char *page, char **start, off_t pos, int count)
+{
+	int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
+	 lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds;
+	static off_t sz;
+	off_t sz_last;
+	char allocation_flag, inactive_flag, rw_flag, stripes_flag;
+	char *lv_name, *pv_name;
+	static char *buf = NULL;
+	static char dummy_buf[160];	/* sized for 2 lines */
+	vg_t *vg_ptr;
+	lv_t *lv_ptr;
+	pv_t *pv_ptr;
+
+
+#ifdef DEBUG_LVM_PROC_GET_INFO
+	printk(KERN_DEBUG
+	       "%s - lvm_proc_get_info CALLED  pos: %lu  count: %d  whence: %d\n",
+	       lvm_name, pos, count, whence);
+#endif
+
+	if (pos == 0 || buf == NULL) {
+		sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
+		    lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0;
+
+		/* search for activity */
+		for (v = 0; v < ABS_MAX_VG; v++) {
+			if ((vg_ptr = vg[v]) != NULL) {
+				vg_counter++;
+				pv_counter += vg_ptr->pv_cur;
+				lv_counter += vg_ptr->lv_cur;
+				if (vg_ptr->lv_cur > 0) {
+					for (l = 0; l < vg[v]->lv_max; l++) {
+						if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
+							pe_t_bytes += lv_ptr->lv_allocated_le;
+							if (lv_ptr->lv_block_exception != NULL)
+								lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
+							if (lv_ptr->lv_open > 0) {
+								lv_open_counter++;
+								lv_open_total += lv_ptr->lv_open;
+							}
+						}
+					}
+				}
+			}
+		}
+		pe_t_bytes *= sizeof(pe_t);
+		lv_block_exception_t_bytes *= sizeof(lv_block_exception_t);
+
+		if (buf != NULL) {
+#ifdef DEBUG_KFREE
+			printk(KERN_DEBUG
+			       "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+			kfree(buf);
+			buf = NULL;
+		}
+		/* 2 times: first to get size to allocate buffer,
+		   2nd to fill the malloced buffer */
+		for (i = 0; i < 2; i++) {
+			sz = 0;
+			sz += sprintf(LVM_PROC_BUF,
+				      "LVM "
+#ifdef MODULE
+				      "module"
+#else
+				      "driver"
+#endif
+				      " %s\n\n"
+				    "Total:  %d VG%s  %d PV%s  %d LV%s ",
+				      lvm_short_version,
+				  vg_counter, vg_counter == 1 ? "" : "s",
+				  pv_counter, pv_counter == 1 ? "" : "s",
+				 lv_counter, lv_counter == 1 ? "" : "s");
+			sz += sprintf(LVM_PROC_BUF,
+				      "(%d LV%s open",
+				      lv_open_counter,
+				      lv_open_counter == 1 ? "" : "s");
+			if (lv_open_total > 0)
+				sz += sprintf(LVM_PROC_BUF,
+					      " %d times)\n",
+					      lv_open_total);
+			else
+				sz += sprintf(LVM_PROC_BUF, ")");
+			sz += sprintf(LVM_PROC_BUF,
+				      "\nGlobal: %lu bytes malloced   IOP version: %d   ",
+				      vg_counter * sizeof(vg_t) +
+				      pv_counter * sizeof(pv_t) +
+				      lv_counter * sizeof(lv_t) +
+			pe_t_bytes + lv_block_exception_t_bytes + sz_last,
+				      lvm_iop_version);
+
+			seconds = CURRENT_TIME - loadtime;
+			if (seconds < 0)
+				loadtime = CURRENT_TIME + seconds;
+			if (seconds / 86400 > 0) {
+				sz += sprintf(LVM_PROC_BUF, "%d day%s ",
+					      seconds / 86400,
+					      seconds / 86400 == 0 ||
+					 seconds / 86400 > 1 ? "s" : "");
+			}
+			sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n",
+				      (seconds % 86400) / 3600,
+				      (seconds % 3600) / 60,
+				      seconds % 60);
+
+			if (vg_counter > 0) {
+				for (v = 0; v < ABS_MAX_VG; v++) {
+					/* volume group */
+					if ((vg_ptr = vg[v]) != NULL) {
+						inactive_flag = ' ';
+						if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I';
+						sz += sprintf(LVM_PROC_BUF,
+							      "\nVG: %c%s  [%d PV, %d LV/%d open] "
+							      " PE Size: %d KB\n"
+							      "  Usage [KB/PE]: %d /%d total  "
+							      "%d /%d used  %d /%d free",
+							      inactive_flag,
+							      vg_ptr->vg_name,
+							      vg_ptr->pv_cur,
+							      vg_ptr->lv_cur,
+							      vg_ptr->lv_open,
+						     	      vg_ptr->pe_size >> 1,
+							      vg_ptr->pe_size * vg_ptr->pe_total >> 1,
+							      vg_ptr->pe_total,
+							      vg_ptr->pe_allocated * vg_ptr->pe_size >> 1,
+						     	      vg_ptr->pe_allocated,
+							      (vg_ptr->pe_total - vg_ptr->pe_allocated) *
+						     	      vg_ptr->pe_size >> 1,
+							      vg_ptr->pe_total - vg_ptr->pe_allocated);
+
+						/* physical volumes */
+						sz += sprintf(LVM_PROC_BUF,
+							      "\n  PV%s ",
+							      vg_ptr->pv_cur == 1 ? ": " : "s:");
+						c = 0;
+						for (p = 0; p < vg_ptr->pv_max; p++) {
+							if ((pv_ptr = vg_ptr->pv[p]) != NULL) {
+								inactive_flag = 'A';
+								if (!(pv_ptr->pv_status & PV_ACTIVE))
+									inactive_flag = 'I';
+								allocation_flag = 'A';
+								if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE))
+									allocation_flag = 'N';
+								pv_name = strchr(pv_ptr->pv_name+1,'/');
+								if ( pv_name == 0) pv_name = pv_ptr->pv_name;
+								else               pv_name++;
+								sz += sprintf(LVM_PROC_BUF,
+									      "[%c%c] %-21s %8d /%-6d  "
+									      "%8d /%-6d  %8d /%-6d",
+									      inactive_flag,
+									      allocation_flag,
+									      pv_name,
+									      pv_ptr->pe_total *
+									      pv_ptr->pe_size >> 1,
+									      pv_ptr->pe_total,
+									      pv_ptr->pe_allocated *
+									      pv_ptr->pe_size >> 1,
+									      pv_ptr->pe_allocated,
+									      (pv_ptr->pe_total -
+									       pv_ptr->pe_allocated) *
+									      pv_ptr->pe_size >> 1,
+									      pv_ptr->pe_total -
+									      pv_ptr->pe_allocated);
+								c++;
+								if (c < vg_ptr->pv_cur)
+									sz += sprintf(LVM_PROC_BUF,
+										      "\n       ");
+							}
+						}
+
+						/* logical volumes */
+						sz += sprintf(LVM_PROC_BUF,
+							   "\n    LV%s ",
+							      vg_ptr->lv_cur == 1 ? ": " : "s:");
+						c = 0;
+						for (l = 0; l < vg[v]->lv_max; l++) {
+							if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
+								inactive_flag = 'A';
+								if (!(lv_ptr->lv_status & LV_ACTIVE))
+									inactive_flag = 'I';
+								rw_flag = 'R';
+								if (lv_ptr->lv_access & LV_WRITE)
+									rw_flag = 'W';
+								allocation_flag = 'D';
+								if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
+									allocation_flag = 'C';
+								stripes_flag = 'L';
+								if (lv_ptr->lv_stripes > 1)
+									stripes_flag = 'S';
+								sz += sprintf(LVM_PROC_BUF,
+									      "[%c%c%c%c",
+									      inactive_flag,
+								 rw_flag,
+									      allocation_flag,
+									      stripes_flag);
+								if (lv_ptr->lv_stripes > 1)
+									sz += sprintf(LVM_PROC_BUF, "%-2d",
+										      lv_ptr->lv_stripes);
+								else
+									sz += sprintf(LVM_PROC_BUF, "  ");
+								lv_name = strrchr(lv_ptr->lv_name, '/');
+								if ( lv_name == 0) lv_name = lv_ptr->lv_name;
+								else               lv_name++;
+								sz += sprintf(LVM_PROC_BUF, "] %-25s", lv_name);
+								if (strlen(lv_name) > 25)
+									sz += sprintf(LVM_PROC_BUF,
+										      "\n                              ");
+								sz += sprintf(LVM_PROC_BUF, "%9d /%-6d   ",
+									      lv_ptr->lv_size >> 1,
+									      lv_ptr->lv_size / vg[v]->pe_size);
+
+								if (lv_ptr->lv_open == 0)
+									sz += sprintf(LVM_PROC_BUF, "close");
+								else
+									sz += sprintf(LVM_PROC_BUF, "%dx open",
+										      lv_ptr->lv_open);
+								c++;
+								if (c < vg_ptr->lv_cur)
+									sz += sprintf(LVM_PROC_BUF,
+										      "\n         ");
+							}
+						}
+						if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none");
+						sz += sprintf(LVM_PROC_BUF, "\n");
+					}
+				}
+			}
+			if (buf == NULL) {
+				if ((buf = vmalloc(sz)) == NULL) {
+					sz = 0;
+					return sprintf(page, "%s - vmalloc error at line %d\n",
+						     lvm_name, __LINE__);
+				}
+			}
+			sz_last = sz;
+		}
+	}
+	if (pos > sz - 1) {
+		vfree(buf);
+		buf = NULL;
+		return 0;
+	}
+	*start = &buf[pos];
+	if (sz - pos < count)
+		return sz - pos;
+	else
+		return count;
+} /* lvm_proc_get_info() */
+#endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */
+
+
+/*
+ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
+ * (see init_module/lvm_init)
+ */
+static int lvm_map(struct buffer_head *bh, int rw)
+{
+	int minor = MINOR(bh->b_dev);
+	int ret = 0;
+	ulong index;
+	ulong pe_start;
+	ulong size = bh->b_size >> 9;
+	ulong rsector_tmp = bh->b_blocknr * size;
+	ulong rsector_sav;
+	kdev_t rdev_tmp = bh->b_dev;
+	kdev_t rdev_sav;
+	lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];
+
+
+	if (!(lv->lv_status & LV_ACTIVE)) {
+		printk(KERN_ALERT
+		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
+		       lvm_name, lv->lv_name);
+		return -1;
+	}
+/*
+   if ( lv->lv_access & LV_SNAPSHOT)
+   printk ( "%s -- %02d:%02d  block: %lu  rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw);
+ */
+
+	/* take care of snapshot chunk writes before
+	   check for writable logical volume */
+	if ((lv->lv_access & LV_SNAPSHOT) &&
+	    MAJOR(bh->b_rdev) != 0 &&
+	    MAJOR(bh->b_rdev) != MAJOR_NR &&
+	    (rw == WRITEA || rw == WRITE))
+	{
+   printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d]  b_blocknr: %lu  b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_rdev), MINOR ( bh->b_rdev), bh->b_blocknr, bh->b_rsector);
+		return 0;
+	}
+
+	if ((rw == WRITE || rw == WRITEA) &&
+	    !(lv->lv_access & LV_WRITE)) {
+		printk(KERN_CRIT
+		    "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
+		       lvm_name, lv->lv_name);
+		return -1;
+	}
+#ifdef DEBUG_MAP
+	printk(KERN_DEBUG
+	       "%s - lvm_map minor:%d  *rdev: %02d:%02d  *rsector: %lu  "
+	       "size:%lu\n",
+	       lvm_name, minor,
+	       MAJOR(rdev_tmp),
+	       MINOR(rdev_tmp),
+	       rsector_tmp, size);
+#endif
+
+	if (rsector_tmp + size > lv->lv_size) {
+		printk(KERN_ALERT
+		       "%s - lvm_map *rsector: %lu or size: %lu wrong for"
+		    " minor: %2d\n", lvm_name, rsector_tmp, size, minor);
+		return -1;
+	}
+	rsector_sav = rsector_tmp;
+	rdev_sav = rdev_tmp;
+
+lvm_second_remap:
+	/* linear mapping */
+	if (lv->lv_stripes < 2) {
+		/* get the index */
+		index = rsector_tmp / vg[VG_BLK(minor)]->pe_size;
+		pe_start = lv->lv_current_pe[index].pe;
+		rsector_tmp = lv->lv_current_pe[index].pe +
+		    (rsector_tmp % vg[VG_BLK(minor)]->pe_size);
+		rdev_tmp = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+		printk(KERN_DEBUG
+		       "lv_current_pe[%ld].pe: %ld  rdev: %02d:%02d  rsector:%ld\n",
+		       index,
+		       lv->lv_current_pe[index].pe,
+		       MAJOR(rdev_tmp),
+		       MINOR(rdev_tmp),
+		       rsector_tmp);
+#endif
+
+		/* striped mapping */
+	} else {
+		ulong stripe_index;
+		ulong stripe_length;
+
+		stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes;
+		stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize;
+		index = rsector_tmp / stripe_length +
+		    (stripe_index % lv->lv_stripes) *
+		    (lv->lv_allocated_le / lv->lv_stripes);
+		pe_start = lv->lv_current_pe[index].pe;
+		rsector_tmp = lv->lv_current_pe[index].pe +
+		    (rsector_tmp % stripe_length) -
+		    (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
+		    stripe_index / lv->lv_stripes *
+		    (lv->lv_stripes - 1) * lv->lv_stripesize;
+		rdev_tmp = lv->lv_current_pe[index].dev;
+	}
+
+#ifdef DEBUG_MAP
+	printk(KERN_DEBUG
+	     "lv_current_pe[%ld].pe: %ld  rdev: %02d:%02d  rsector:%ld\n"
+	       "stripe_length: %ld  stripe_index: %ld\n",
+	       index,
+	       lv->lv_current_pe[index].pe,
+	       MAJOR(rdev_tmp),
+	       MINOR(rdev_tmp),
+	       rsector_tmp,
+	       stripe_length,
+	       stripe_index);
+#endif
+
+	/* handle physical extents on the move */
+	if (pe_lock_req.lock == LOCK_PE) {
+		if (rdev_tmp == pe_lock_req.data.pv_dev &&
+		    rsector_tmp >= pe_lock_req.data.pv_offset &&
+		    rsector_tmp < (pe_lock_req.data.pv_offset +
+				   vg[VG_BLK(minor)]->pe_size)) {
+			sleep_on(&lvm_map_wait);
+			rsector_tmp = rsector_sav;
+			rdev_tmp = rdev_sav;
+			goto lvm_second_remap;
+		}
+	}
+	/* statistic */
+	if (rw == WRITE || rw == WRITEA)
+		lv->lv_current_pe[index].writes++;
+	else
+		lv->lv_current_pe[index].reads++;
+
+	/* snapshot volume exception handling on physical device address base */
+	if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) {
+		/* original logical volume */
+		if (lv->lv_access & LV_SNAPSHOT_ORG) {
+			if (rw == WRITE || rw == WRITEA)
+			{
+				lv_t *lv_ptr;
+
+				/* start with first snapshot and loop thrugh all of them */
+				for (lv_ptr = lv->lv_snapshot_next;
+				     lv_ptr != NULL;
+				     lv_ptr = lv_ptr->lv_snapshot_next) {
+					down(&lv->lv_snapshot_org->lv_snapshot_sem);
+					/* do we still have exception storage for this snapshot free? */
+					if (lv_ptr->lv_block_exception != NULL) {
+						rdev_sav = rdev_tmp;
+						rsector_sav = rsector_tmp;
+						if (!lvm_snapshot_remap_block(&rdev_tmp,
+									      &rsector_tmp,
+									      pe_start,
+									      lv_ptr)) {
+							/* create a new mapping */
+							ret = lvm_snapshot_COW(rdev_tmp,
+									       rsector_tmp,
+									       pe_start,
+									       rsector_sav,
+									       lv_ptr); 
+						}
+						rdev_tmp = rdev_sav;
+						rsector_tmp = rsector_sav;
+					}
+					up(&lv->lv_snapshot_org->lv_snapshot_sem);
+				}
+			}
+		} else {
+			/* remap snapshot logical volume */
+			down(&lv->lv_snapshot_sem);
+			if (lv->lv_block_exception != NULL)
+				lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv);
+			up(&lv->lv_snapshot_sem);
+		}
+	}
+	bh->b_rdev = rdev_tmp;
+	bh->b_rsector = rsector_tmp;
+
+	return ret;
+} /* lvm_map() */
+
+
+/*
+ * internal support functions
+ */
+
+#ifdef LVM_HD_NAME
+/*
+ * generate "hard disk" name
+ */
+void lvm_hd_name(char *buf, int minor)
+{
+	int len = 0;
+	lv_t *lv_ptr;
+
+	if (vg[VG_BLK(minor)] == NULL ||
+	    (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL)
+		return;
+	len = strlen(lv_ptr->lv_name) - 5;
+	memcpy(buf, &lv_ptr->lv_name[5], len);
+	buf[len] = 0;
+	return;
+}
+#endif
+
+
+/*
+ * this one never should be called...
+ */
+static void lvm_dummy_device_request(request_queue_t * t)
+{
+	printk(KERN_EMERG
+	     "%s -- oops, got lvm request for %02d:%02d [sector: %lu]\n",
+	       lvm_name,
+	       MAJOR(CURRENT->rq_dev),
+	       MINOR(CURRENT->rq_dev),
+	       CURRENT->sector);
+	return;
+}
+
+
+/*
+ * make request function
+ */
+static void lvm_make_request_fn(int rw, struct buffer_head *bh)
+{
+	lvm_map(bh, rw);
+	if (bh->b_rdev != MD_MAJOR) generic_make_request(rw, bh); 
+	return;
+}
+
+
+/********************************************************************
+ *
+ * Character device support functions
+ *
+ ********************************************************************/
+/*
+ * character device support function logical volume manager lock
+ */
+static int lvm_do_lock_lvm(void)
+{
+lock_try_again:
+	spin_lock(&lvm_lock);
+	if (lock != 0 && lock != current->pid) {
+#ifdef DEBUG_IOCTL
+		printk(KERN_INFO "lvm_do_lock_lvm: %s is locked by pid %d ...\n",
+		       lvm_name, lock);
+#endif
+		spin_unlock(&lvm_lock);
+		interruptible_sleep_on(&lvm_wait);
+		if (current->sigpending != 0)
+			return -EINTR;
+#ifdef LVM_TOTAL_RESET
+		if (lvm_reset_spindown > 0)
+			return -EACCES;
+#endif
+		goto lock_try_again;
+	}
+	lock = current->pid;
+	spin_unlock(&lvm_lock);
+	return 0;
+} /* lvm_do_lock_lvm */
+
+
+/*
+ * character device support function lock/unlock physical extend
+ */
+static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
+{
+	uint p;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&pe_lock_req, arg,
+			   sizeof(pe_lock_req_t)) != 0) return -EFAULT;
+
+	switch (pe_lock_req.lock) {
+	case LOCK_PE:
+		for (p = 0; p < vg_ptr->pv_max; p++) {
+			if (vg_ptr->pv[p] != NULL &&
+			    pe_lock_req.data.pv_dev ==
+			    vg_ptr->pv[p]->pv_dev)
+				break;
+		}
+		if (p == vg_ptr->pv_max) return -ENXIO;
+
+		pe_lock_req.lock = UNLOCK_PE;
+		fsync_dev(pe_lock_req.data.lv_dev);
+		pe_lock_req.lock = LOCK_PE;
+		break;
+
+	case UNLOCK_PE:
+		pe_lock_req.lock = UNLOCK_PE;
+		pe_lock_req.data.lv_dev = \
+		pe_lock_req.data.pv_dev = \
+		pe_lock_req.data.pv_offset = 0;
+		wake_up(&lvm_map_wait);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+/*
+ * character device support function logical extend remap
+ */
+static int lvm_do_le_remap(vg_t *vg_ptr, void *arg)
+{
+	uint l, le;
+	lv_t *lv_ptr;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&le_remap_req, arg,
+			   sizeof(le_remap_req_t)) != 0)
+		return -EFAULT;
+
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		lv_ptr = vg_ptr->lv[l];
+		if (lv_ptr != NULL &&
+		    strcmp(lv_ptr->lv_name,
+			       le_remap_req.lv_name) == 0) {
+			for (le = 0; le < lv_ptr->lv_allocated_le;
+			     le++) {
+				if (lv_ptr->lv_current_pe[le].dev ==
+				    le_remap_req.old_dev &&
+				    lv_ptr->lv_current_pe[le].pe ==
+				    le_remap_req.old_pe) {
+					lv_ptr->lv_current_pe[le].dev =
+					    le_remap_req.new_dev;
+					lv_ptr->lv_current_pe[le].pe =
+					    le_remap_req.new_pe;
+					return 0;
+				}
+			}
+			return -EINVAL;
+		}
+	}
+	return -ENXIO;
+} /* lvm_do_le_remap() */
+
+
+/*
+ * character device support function VGDA create
+ */
+int lvm_do_vg_create(int minor, void *arg)
+{
+	int snaporg_minor = 0;
+	ulong l, p;
+	lv_t lv;
+	vg_t *vg_ptr;
+	pv_t *pv_ptr;
+	lv_t *lv_ptr;
+
+	if (vg[VG_CHR(minor)] != NULL) return -EPERM;
+
+	if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
+		printk(KERN_CRIT
+		       "%s -- VG_CREATE: kmalloc error VG at line %d\n",
+		       lvm_name, __LINE__);
+		return -ENOMEM;
+	}
+	/* get the volume group structure */
+	if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
+		kfree(vg_ptr);
+		return -EFAULT;
+	}
+	/* we are not that active so far... */
+	vg_ptr->vg_status &= ~VG_ACTIVE;
+	vg[VG_CHR(minor)] = vg_ptr;
+
+	vg[VG_CHR(minor)]->pe_allocated = 0;
+	if (vg_ptr->pv_max > ABS_MAX_PV) {
+		printk(KERN_WARNING
+		       "%s -- Can't activate VG: ABS_MAX_PV too small\n",
+		       lvm_name);
+		kfree(vg_ptr);
+		vg[VG_CHR(minor)] = NULL;
+		return -EPERM;
+	}
+	if (vg_ptr->lv_max > ABS_MAX_LV) {
+		printk(KERN_WARNING
+		"%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
+		       lvm_name, vg_ptr->lv_max);
+		kfree(vg_ptr);
+		vg_ptr = NULL;
+		return -EPERM;
+	}
+	/* get the physical volume structures */
+	vg_ptr->pv_act = vg_ptr->pv_cur = 0;
+	for (p = 0; p < vg_ptr->pv_max; p++) {
+		/* user space address */
+		if ((pvp = vg_ptr->pv[p]) != NULL) {
+			pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL);
+			if (pv_ptr == NULL) {
+				printk(KERN_CRIT
+				       "%s -- VG_CREATE: kmalloc error PV at line %d\n",
+				       lvm_name, __LINE__);
+				lvm_do_vg_remove(minor);
+				return -ENOMEM;
+			}
+			if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) {
+				lvm_do_vg_remove(minor);
+				return -EFAULT;
+			}
+			/* We don't need the PE list
+			   in kernel space as with LVs pe_t list (see below) */
+			pv_ptr->pe = NULL;
+			pv_ptr->pe_allocated = 0;
+			pv_ptr->pv_status = PV_ACTIVE;
+			vg_ptr->pv_act++;
+			vg_ptr->pv_cur++;
+
+#ifdef LVM_GET_INODE
+			/* insert a dummy inode for fs_may_mount */
+			pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev);
+#endif
+		}
+	}
+
+	/* get the logical volume structures */
+	vg_ptr->lv_cur = 0;
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		/* user space address */
+		if ((lvp = vg_ptr->lv[l]) != NULL) {
+			if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+				lvm_do_vg_remove(minor);
+				return -EFAULT;
+			}
+			vg_ptr->lv[l] = NULL;
+			if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+				lvm_do_vg_remove(minor);
+				return -EFAULT;
+			}
+		}
+	}
+
+	/* Second path to correct snapshot logical volumes which are not
+	   in place during first path above */
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
+		    vg_ptr->lv[l]->lv_access & LV_SNAPSHOT) {
+			snaporg_minor = lv_ptr->lv_snapshot_minor;
+			if (vg_ptr->lv[LV_BLK(snaporg_minor)] != NULL) {
+				/* get pointer to original logical volume */
+				lv_ptr = vg_ptr->lv[l]->lv_snapshot_org =
+				vg_ptr->lv[LV_BLK(snaporg_minor)];
+
+				/* set necessary fields of original logical volume */
+				lv_ptr->lv_access |= LV_SNAPSHOT_ORG;
+				lv_ptr->lv_snapshot_minor = 0;
+				lv_ptr->lv_snapshot_org = lv_ptr;
+				lv_ptr->lv_snapshot_prev = NULL;
+
+				/* find last snapshot logical volume in the chain */
+				while (lv_ptr->lv_snapshot_next != NULL)
+					lv_ptr = lv_ptr->lv_snapshot_next;
+
+				/* set back pointer to this last one in our new logical volume */
+				vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr;
+
+				/* last logical volume now points to our new snapshot volume */
+				lv_ptr->lv_snapshot_next = vg_ptr->lv[l];
+
+				/* now point to the new one */
+				lv_ptr = lv_ptr->lv_snapshot_next;
+
+				/* set necessary fields of new snapshot logical volume */
+				lv_ptr->lv_snapshot_next = NULL;
+				lv_ptr->lv_current_pe =
+				    vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
+				lv_ptr->lv_allocated_le =
+				    vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
+				lv_ptr->lv_current_le =
+				    vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_le;
+				lv_ptr->lv_size =
+				    vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_size;
+			}
+		}
+	}
+
+	vg_count++;
+
+	/* let's go active */
+	vg_ptr->vg_status |= VG_ACTIVE;
+
+	MOD_INC_USE_COUNT;
+
+	return 0;
+} /* lvm_do_vg_create() */
+
+
+/*
+ * character device support function VGDA extend
+ */
+static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg)
+{
+	uint p;
+	pv_t *pv_ptr;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (vg_ptr->pv_cur < vg_ptr->pv_max) {
+		for (p = 0; p < vg_ptr->pv_max; p++) {
+			if (vg_ptr->pv[p] == NULL) {
+				if ((pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL)) == NULL) {
+					printk(KERN_CRIT
+					       "%s -- VG_EXTEND: kmalloc error PV at line %d\n",
+					     lvm_name, __LINE__);
+					return -ENOMEM;
+				}
+				if (copy_from_user(pv_ptr, arg, sizeof(pv_t)) != 0) {
+					kfree(pv_ptr);
+					vg_ptr->pv[p] = NULL;
+				 	return -EFAULT;
+				}
+	
+				pv_ptr->pv_status = PV_ACTIVE;
+				/* We don't need the PE list
+				   in kernel space like LVs pe_t list */
+				pv_ptr->pe = NULL;
+				vg_ptr->pv_cur++;
+				vg_ptr->pv_act++;
+				vg_ptr->pe_total +=
+				    pv_ptr->pe_total;
+#ifdef LVM_GET_INODE
+				/* insert a dummy inode for fs_may_mount */
+				pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev);
+#endif
+				return 0;
+			}
+		}
+	}
+return -EPERM;
+} /* lvm_do_vg_extend() */
+
+
+/*
+ * character device support function VGDA reduce
+ */
+static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg)
+{
+	uint p;
+	pv_t *pv_ptr;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
+		return -EFAULT;
+
+	for (p = 0; p < vg_ptr->pv_max; p++) {
+		pv_ptr = vg_ptr->pv[p];
+		if (pv_ptr != NULL &&
+		    strcmp(pv_ptr->pv_name,
+			       pv_name) == 0) {
+			if (pv_ptr->lv_cur > 0) return -EPERM;
+			vg_ptr->pe_total -=
+			    pv_ptr->pe_total;
+			vg_ptr->pv_cur--;
+			vg_ptr->pv_act--;
+#ifdef LVM_GET_INODE
+			lvm_clear_inode(pv_ptr->inode);
+#endif
+			kfree(pv_ptr);
+			/* Make PV pointer array contiguous */
+			for (; p < vg_ptr->pv_max - 1; p++)
+				vg_ptr->pv[p] = vg_ptr->pv[p + 1];
+			vg_ptr->pv[p + 1] = NULL;
+			return 0;
+		}
+	}
+	return -ENXIO;
+} /* lvm_do_vg_reduce */
+
+
+/*
+ * character device support function VGDA remove
+ */
+static int lvm_do_vg_remove(int minor)
+{
+	int i;
+	vg_t *vg_ptr = vg[VG_CHR(minor)];
+	pv_t *pv_ptr;
+
+	if (vg_ptr == NULL) return -ENXIO;
+
+#ifdef LVM_TOTAL_RESET
+	if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+	if (vg_ptr->lv_open > 0)
+#endif
+		return -EPERM;
+
+	/* let's go inactive */
+	vg_ptr->vg_status &= ~VG_ACTIVE;
+
+	/* free LVs */
+	/* first free snapshot logical volumes */
+	for (i = 0; i < vg_ptr->lv_max; i++) {
+		if (vg_ptr->lv[i] != NULL &&
+		    vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) {
+			lvm_do_lv_remove(minor, NULL, i);
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(1);
+		}
+	}
+	/* then free the rest of the LVs */
+	for (i = 0; i < vg_ptr->lv_max; i++) {
+		if (vg_ptr->lv[i] != NULL) {
+			lvm_do_lv_remove(minor, NULL, i);
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(1);
+		}
+	}
+
+	/* free PVs */
+	for (i = 0; i < vg_ptr->pv_max; i++) {
+		if ((pv_ptr = vg_ptr->pv[i]) != NULL) {
+#ifdef DEBUG_KFREE
+			printk(KERN_DEBUG
+			       "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+			lvm_clear_inode(pv_ptr->inode);
+#endif
+			kfree(pv_ptr);
+			vg[VG_CHR(minor)]->pv[i] = NULL;
+		}
+	}
+
+#ifdef DEBUG_KFREE
+	printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+	kfree(vg_ptr);
+	vg[VG_CHR(minor)] = NULL;
+
+	vg_count--;
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+} /* lvm_do_vg_remove() */
+
+
+/*
+ * character device support function logical volume create
+ */
+static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv)
+{
+	int l, le, l_new, p, size;
+	ulong lv_status_save;
+	lv_block_exception_t *lvbe = lv->lv_block_exception;
+	vg_t *vg_ptr = vg[VG_CHR(minor)];
+	lv_t *lv_ptr = NULL;
+
+	if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
+	if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK)
+		return -EINVAL;
+
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		if (vg_ptr->lv[l] != NULL &&
+		    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
+			return -EEXIST;
+	}
+
+	/* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */
+	l_new = -1;
+	if (vg_ptr->lv[lv->lv_number] == NULL)
+		l_new = lv->lv_number;
+	else {
+		for (l = 0; l < vg_ptr->lv_max; l++) {
+			if (vg_ptr->lv[l] == NULL)
+				if (l_new == -1) l_new = l;
+		}
+	}
+	if (l_new == -1) return -EPERM;
+	else            l = l_new;
+
+	if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {;
+		printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
+		       lvm_name, __LINE__);
+		return -ENOMEM;
+	}
+	/* copy preloaded LV */
+	memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
+
+	lv_status_save = lv_ptr->lv_status;
+	lv_ptr->lv_status &= ~LV_ACTIVE;
+	lv_ptr->lv_snapshot_org = \
+	    lv_ptr->lv_snapshot_prev = \
+	    lv_ptr->lv_snapshot_next = NULL;
+	lv_ptr->lv_block_exception = NULL;
+	init_MUTEX(&lv_ptr->lv_snapshot_sem);
+	vg_ptr->lv[l] = lv_ptr;
+
+	/* get the PE structures from user space if this
+	   is no snapshot logical volume */
+	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
+		size = lv_ptr->lv_allocated_le * sizeof(pe_t);
+		if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
+			printk(KERN_CRIT
+			       "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
+			       "at line %d\n",
+			       lvm_name, size, __LINE__);
+#ifdef DEBUG_KFREE
+			printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+			kfree(lv_ptr);
+			vg[VG_CHR(minor)]->lv[l] = NULL;
+			return -ENOMEM;
+		}
+		if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
+			vfree(lv_ptr->lv_current_pe);
+			kfree(lv_ptr);
+			vg_ptr->lv[l] = NULL;
+			return -EFAULT;
+		}
+		/* correct the PE count in PVs */
+		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
+			vg_ptr->pe_allocated++;
+			for (p = 0; p < vg_ptr->pv_cur; p++) {
+				if (vg_ptr->pv[p]->pv_dev ==
+				    lv_ptr->lv_current_pe[le].dev)
+					vg_ptr->pv[p]->pe_allocated++;
+			}
+		}
+	} else {
+		/* Get snapshot exception data and block list */
+		if (lvbe != NULL) {
+			lv_ptr->lv_snapshot_org =
+			    vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
+			if (lv_ptr->lv_snapshot_org != NULL) {
+				size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
+				if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
+					printk(KERN_CRIT
+					       "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
+					       "of %d byte at line %d\n",
+					       lvm_name, size, __LINE__);
+#ifdef DEBUG_KFREE
+					printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+					kfree(lv_ptr);
+					vg_ptr->lv[l] = NULL;
+					return -ENOMEM;
+				}
+				if (copy_from_user(lv_ptr->lv_block_exception, lvbe, size)) {
+					vfree(lv_ptr->lv_block_exception);
+					kfree(lv_ptr);
+					vg[VG_CHR(minor)]->lv[l] = NULL;
+					return -EFAULT;
+				}
+				/* get pointer to original logical volume */
+				lv_ptr = lv_ptr->lv_snapshot_org;
+
+				lv_ptr->lv_snapshot_minor = 0;
+				lv_ptr->lv_snapshot_org = lv_ptr;
+				lv_ptr->lv_snapshot_prev = NULL;
+				/* walk thrugh the snapshot list */
+				while (lv_ptr->lv_snapshot_next != NULL)
+					lv_ptr = lv_ptr->lv_snapshot_next;
+				/* now lv_ptr points to the last existing snapshot in the chain */
+				vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr;
+				/* our new one now back points to the previous last in the chain */
+				lv_ptr = vg_ptr->lv[l];
+				/* now lv_ptr points to our new last snapshot logical volume */
+				lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org;
+				lv_ptr->lv_snapshot_next = NULL;
+				lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
+				lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
+				lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
+				lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
+				lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
+				lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
+				{
+					int err = lvm_snapshot_alloc(lv_ptr);
+					if (err)
+					{
+						vfree(lv_ptr->lv_block_exception);
+						kfree(lv_ptr);
+						vg[VG_CHR(minor)]->lv[l] = NULL;
+						 return err;
+					}
+				}
+			} else {
+				vfree(lv_ptr->lv_block_exception);
+				kfree(lv_ptr);
+				vg_ptr->lv[l] = NULL;
+				return -EFAULT;
+			}
+		} else {
+			kfree(vg_ptr->lv[l]);
+			vg_ptr->lv[l] = NULL;
+			return -EINVAL;
+		}
+	} /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
+
+	lv_ptr = vg_ptr->lv[l];
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
+	lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
+	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number;
+	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
+	LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
+	vg_ptr->lv_cur++;
+	lv_ptr->lv_status = lv_status_save;
+
+	/* optionally add our new snapshot LV */
+	if (lv_ptr->lv_access & LV_SNAPSHOT) {
+		/* sync the original logical volume */
+		fsync_dev(lv_ptr->lv_snapshot_org->lv_dev);
+		/* put ourselve into the chain */
+		lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr;
+		lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG;
+	}
+	return 0;
+} /* lvm_do_lv_create() */
+
+
+/*
+ * character device support function logical volume remove
+ */
+static int lvm_do_lv_remove(int minor, char *lv_name, int l)
+{
+	uint le, p;
+	vg_t *vg_ptr = vg[VG_CHR(minor)];
+	lv_t *lv_ptr;
+
+	if (l == -1) {
+		for (l = 0; l < vg_ptr->lv_max; l++) {
+			if (vg_ptr->lv[l] != NULL &&
+			    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) {
+				break;
+			}
+		}
+	}
+	if (l == vg_ptr->lv_max) return -ENXIO;
+
+	lv_ptr = vg_ptr->lv[l];
+#ifdef LVM_TOTAL_RESET
+	if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+	if (lv_ptr->lv_open > 0)
+#endif
+		return -EBUSY;
+
+	/* check for deletion of snapshot source while
+	   snapshot volume still exists */
+	if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
+	    lv_ptr->lv_snapshot_next != NULL)
+		return -EPERM;
+
+	lv_ptr->lv_status |= LV_SPINDOWN;
+
+	/* sync the buffers */
+	fsync_dev(lv_ptr->lv_dev);
+
+	lv_ptr->lv_status &= ~LV_ACTIVE;
+
+	/* invalidate the buffers */
+	invalidate_buffers(lv_ptr->lv_dev);
+
+	/* reset generic hd */
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
+	lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
+
+	/* reset VG/LV mapping */
+	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
+	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
+
+	/* correct the PE count in PVs if this is no snapshot logical volume */
+	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
+		/* only if this is no snapshot logical volume because
+		   we share the lv_current_pe[] structs with the
+		   original logical volume */
+		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
+			vg_ptr->pe_allocated--;
+			for (p = 0; p < vg_ptr->pv_cur; p++) {
+				if (vg_ptr->pv[p]->pv_dev ==
+				    lv_ptr->lv_current_pe[le].dev)
+					vg_ptr->pv[p]->pe_allocated--;
+			}
+		}
+		vfree(lv_ptr->lv_current_pe);
+		/* LV_SNAPSHOT */
+	} else {
+		/* remove this snapshot logical volume from the chain */
+		lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
+		if (lv_ptr->lv_snapshot_next != NULL) {
+			lv_ptr->lv_snapshot_next->lv_snapshot_prev =
+			    lv_ptr->lv_snapshot_prev;
+		}
+		/* no more snapshots? */
+		if (lv_ptr->lv_snapshot_org->lv_snapshot_next == NULL)
+			lv_ptr->lv_snapshot_org->lv_access &= ~LV_SNAPSHOT_ORG;
+		lvm_snapshot_release(lv_ptr);
+	}
+
+#ifdef DEBUG_KFREE
+	printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+	kfree(lv_ptr);
+	vg_ptr->lv[l] = NULL;
+	vg_ptr->lv_cur--;
+	return 0;
+} /* lvm_do_lv_remove() */
+
+
+/*
+ * character device support function logical volume extend / reduce
+ */
+static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv)
+{
+	int l, le, p, size, old_allocated_le;
+	uint32_t end, lv_status_save;
+	vg_t *vg_ptr = vg[VG_CHR(minor)];
+	lv_t *lv_ptr;
+	pe_t *pe;
+
+	if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
+
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		if (vg_ptr->lv[l] != NULL &&
+		    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
+			break;
+	}
+	if (l == vg_ptr->lv_max) return -ENXIO;
+	lv_ptr = vg_ptr->lv[l];
+
+	/* check for active snapshot */
+	if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) return -EPERM;
+
+	if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) {
+		printk(KERN_CRIT
+		"%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
+		       "of %d Byte at line %d\n",
+		       lvm_name, size, __LINE__);
+		return -ENOMEM;
+	}
+	/* get the PE structures from user space */
+	if (copy_from_user(pe, pep, size)) {
+		vfree(pe);
+		return -EFAULT;
+	}
+
+#ifdef DEBUG
+	printk(KERN_DEBUG
+	       "%s -- fsync_dev and "
+	       "invalidate_buffers for %s [%s] in %s\n",
+	       lvm_name, lv_ptr->lv_name,
+	       kdevname(lv_ptr->lv_dev),
+	       vg_ptr->vg_name);
+#endif
+
+	lv_ptr->lv_status |= LV_SPINDOWN;
+	fsync_dev(lv_ptr->lv_dev);
+	lv_ptr->lv_status &= ~LV_ACTIVE;
+	invalidate_buffers(lv_ptr->lv_dev);
+
+	/* reduce allocation counters on PV(s) */
+	for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
+		vg_ptr->pe_allocated--;
+		for (p = 0; p < vg_ptr->pv_cur; p++) {
+			if (vg_ptr->pv[p]->pv_dev ==
+			lv_ptr->lv_current_pe[le].dev) {
+				vg_ptr->pv[p]->pe_allocated--;
+				break;
+			}
+		}
+	}
+
+
+	/* save pointer to "old" lv/pe pointer array */
+	pep1 = lv_ptr->lv_current_pe;
+	end = lv_ptr->lv_current_le;
+
+	/* save open counter */
+	lv_open = lv_ptr->lv_open;
+
+	/* save # of old allocated logical extents */
+	old_allocated_le = lv_ptr->lv_allocated_le;
+
+	/* copy preloaded LV */
+	lv_status_save = lv->lv_status;
+	lv->lv_status |= LV_SPINDOWN;
+	lv->lv_status &= ~LV_ACTIVE;
+	memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
+	lv_ptr->lv_current_pe = pe;
+	lv_ptr->lv_open = lv_open;
+
+	/* save availiable i/o statistic data */
+	/* linear logical volume */
+	if (lv_ptr->lv_stripes < 2) {
+		/* Check what last LE shall be used */
+		if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le;
+		for (le = 0; le < end; le++) {
+			lv_ptr->lv_current_pe[le].reads  = pep1[le].reads;
+			lv_ptr->lv_current_pe[le].writes = pep1[le].writes;
+		}
+		/* striped logical volume */
+	} else {
+		uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
+
+		old_stripe_size = old_allocated_le / lv_ptr->lv_stripes;
+		new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes;
+		end = old_stripe_size;
+		if (end > new_stripe_size) end = new_stripe_size;
+		for (i = source = dest = 0;
+		     i < lv_ptr->lv_stripes; i++) {
+			for (j = 0; j < end; j++) {
+				lv_ptr->lv_current_pe[dest + j].reads =
+				    pep1[source + j].reads;
+				lv_ptr->lv_current_pe[dest + j].writes =
+				    pep1[source + j].writes;
+			}
+			source += old_stripe_size;
+			dest += new_stripe_size;
+		}
+	}
+	vfree(pep1);
+	pep1 = NULL;
+
+
+	/* extend the PE count in PVs */
+	for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
+		vg_ptr->pe_allocated++;
+		for (p = 0; p < vg_ptr->pv_cur; p++) {
+			if (vg_ptr->pv[p]->pv_dev ==
+			vg_ptr->lv[l]->lv_current_pe[le].dev) {
+				vg_ptr->pv[p]->pe_allocated++;
+				break;
+			}
+		}
+	}
+
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
+	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
+	lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
+	/* vg_lv_map array doesn't have to be changed here */
+
+	LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
+	lv_ptr->lv_status = lv_status_save;
+
+	return 0;
+} /* lvm_do_lv_extend_reduce() */
+
+
+/*
+ * character device support function logical volume status by name
+ */
+static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg)
+{
+	uint l;
+	ulong size;
+	lv_t lv;
+	lv_t *lv_ptr;
+	lv_status_byname_req_t lv_status_byname_req;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&lv_status_byname_req, arg,
+			   sizeof(lv_status_byname_req_t)) != 0)
+		return -EFAULT;
+
+	if (lv_status_byname_req.lv == NULL) return -EINVAL;
+	if (copy_from_user(&lv, lv_status_byname_req.lv,
+			   sizeof(lv_t)) != 0)
+		return -EFAULT;
+
+	for (l = 0; l < vg_ptr->lv_max; l++) {
+		lv_ptr = vg_ptr->lv[l];
+		if (lv_ptr != NULL &&
+		    strcmp(lv_ptr->lv_name,
+			    lv_status_byname_req.lv_name) == 0) {
+			if (copy_to_user(lv_status_byname_req.lv,
+					 lv_ptr,
+					 sizeof(lv_t)) != 0)
+				return -EFAULT;
+
+			if (lv.lv_current_pe != NULL) {
+				size = lv_ptr->lv_allocated_le *
+				       sizeof(pe_t);
+				if (copy_to_user(lv.lv_current_pe,
+						 lv_ptr->lv_current_pe,
+						 size) != 0)
+					return -EFAULT;
+			}
+			return 0;
+		}
+	}
+	return -ENXIO;
+} /* lvm_do_lv_status_byname() */
+
+
+/*
+ * character device support function logical volume status by index
+ */
+static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg)
+{
+	ulong size;
+	lv_t lv;
+	lv_t *lv_ptr;
+	lv_status_byindex_req_t lv_status_byindex_req;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&lv_status_byindex_req, arg,
+			   sizeof(lv_status_byindex_req)) != 0)
+		return -EFAULT;
+
+	if ((lvp = lv_status_byindex_req.lv) == NULL)
+		return -EINVAL;
+	if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
+		return -ENXIO;
+
+	if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0)
+		return -EFAULT;
+
+	if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0)
+		return -EFAULT;
+
+	if (lv.lv_current_pe != NULL) {
+		size = lv_ptr->lv_allocated_le * sizeof(pe_t);
+		if (copy_to_user(lv.lv_current_pe,
+			 	 lv_ptr->lv_current_pe,
+				 size) != 0)
+			return -EFAULT;
+	}
+	return 0;
+} /* lvm_do_lv_status_byindex() */
+
+
+/*
+ * character device support function physical volume change
+ */
+static int lvm_do_pv_change(vg_t *vg_ptr, void *arg)
+{
+	uint p;
+	pv_t *pv_ptr;
+#ifdef LVM_GET_INODE
+	struct inode *inode_sav;
+#endif
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&pv_change_req, arg,
+			   sizeof(pv_change_req)) != 0)
+		return -EFAULT;
+
+	for (p = 0; p < vg_ptr->pv_max; p++) {
+		pv_ptr = vg_ptr->pv[p];
+		if (pv_ptr != NULL &&
+		    strcmp(pv_ptr->pv_name,
+			       pv_change_req.pv_name) == 0) {
+#ifdef LVM_GET_INODE
+			inode_sav = pv_ptr->inode;
+#endif
+			if (copy_from_user(pv_ptr,
+					   pv_change_req.pv,
+					   sizeof(pv_t)) != 0)
+				return -EFAULT;
+
+			/* We don't need the PE list
+			   in kernel space as with LVs pe_t list */
+			pv_ptr->pe = NULL;
+#ifdef LVM_GET_INODE
+			pv_ptr->inode = inode_sav;
+#endif
+			return 0;
+		}
+	}
+	return -ENXIO;
+} /* lvm_do_pv_change() */
+
+/*
+ * character device support function get physical volume status
+ */
+static int lvm_do_pv_status(vg_t *vg_ptr, void *arg)
+{
+	uint p;
+	pv_t *pv_ptr;
+
+	if (vg_ptr == NULL) return -ENXIO;
+	if (copy_from_user(&pv_status_req, arg,
+			   sizeof(pv_status_req)) != 0)
+		return -EFAULT;
+
+	for (p = 0; p < vg_ptr->pv_max; p++) {
+		pv_ptr = vg_ptr->pv[p];
+		if (pv_ptr != NULL &&
+		    strcmp(pv_ptr->pv_name,
+			       pv_status_req.pv_name) == 0) {
+			if (copy_to_user(pv_status_req.pv,
+					 pv_ptr,
+				         sizeof(pv_t)) != 0)
+				return -EFAULT;
+			return 0;
+		}
+	}
+	return -ENXIO;
+} /* lvm_do_pv_status() */
+
+
+/*
+ * support function initialize gendisk variables
+ */
+#ifdef __initfunc
+__initfunc(void lvm_geninit(struct gendisk *lvm_gdisk))
+#else
+void __init
+ lvm_geninit(struct gendisk *lvm_gdisk)
+#endif
+{
+	int i = 0;
+
+#ifdef DEBUG_GENDISK
+	printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
+#endif
+
+	for (i = 0; i < MAX_LV; i++) {
+		lvm_gendisk.part[i].start_sect = -1;	/* avoid partition check */
+		lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
+		lvm_blocksizes[i] = BLOCK_SIZE;
+	}
+
+	blksize_size[MAJOR_NR] = lvm_blocksizes;
+	blk_size[MAJOR_NR] = lvm_size;
+
+	return;
+} /* lvm_gen_init() */
+
+
+#ifdef LVM_GET_INODE
+/*
+ * support function to get an empty inode
+ *
+ * Gets an empty inode to be inserted into the inode hash,
+ * so that a physical volume can't be mounted.
+ * This is analog to drivers/block/md.c
+ *
+ * Is this the real thing?
+ *
+ */
+struct inode *lvm_get_inode(int dev)
+{
+	struct inode *inode_this = NULL;
+
+	/* Lock the device by inserting a dummy inode. */
+	inode_this = get_empty_inode();
+	inode_this->i_dev = dev;
+	insert_inode_hash(inode_this);
+	return inode_this;
+}
+
+
+/*
+ * support function to clear an inode
+ *
+ */
+void lvm_clear_inode(struct inode *inode)
+{
+#ifdef I_FREEING
+	inode->i_state |= I_FREEING;
+#endif
+	clear_inode(inode);
+	return;
+}
+#endif /* #ifdef LVM_GET_INODE */
diff --git a/drivers/block/md.c b/drivers/block/md.c
index 752c7b0ab..b258fc6c5 100644
--- a/drivers/block/md.c
+++ b/drivers/block/md.c
@@ -11,6 +11,7 @@
    - kerneld support by Boris Tobotras <boris@xtalk.msk.su>
    - kmod support by: Cyrus Durgin
    - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>
+   - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -24,6 +25,7 @@
 
 #include <linux/config.h>
 #include <linux/raid/md.h>
+#include <linux/devfs_fs_kernel.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -68,6 +70,9 @@ static mdk_thread_t *md_recovery_thread = NULL;
 
 int md_size[MAX_MD_DEVS] = {0, };
 
+extern struct block_device_operations md_fops;
+static devfs_handle_t devfs_handle = NULL;
+
 static struct gendisk md_gendisk=
 {
 	MD_MAJOR,
@@ -78,7 +83,8 @@ static struct gendisk md_gendisk=
 	md_size,
 	MAX_MD_DEVS,
 	NULL,
-	NULL
+	NULL,
+	&md_fops,
 };
 
 void md_plug_device (request_queue_t *mdqueue, kdev_t dev)
@@ -3302,11 +3308,15 @@ int md__init md_init (void)
 			MD_MAJOR_VERSION, MD_MINOR_VERSION,
 			MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL);
 
-	if (register_blkdev (MD_MAJOR, "md", &md_fops))
+	if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops))
 	{
 		printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR);
 		return (-1);
 	}
+	devfs_handle = devfs_mk_dir (NULL, "md", 0, NULL);
+	devfs_register_series (devfs_handle, "%u",MAX_MD_DEVS,DEVFS_FL_DEFAULT,
+			       MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR, 0, 0,
+			       &md_fops, NULL);
 
 	blk_dev[MD_MAJOR].queue = md_get_queue;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0efcce8ed..abecb27c4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -184,10 +184,10 @@ struct request *nbd_read_stat(struct nbd_device *lo)
 	DEBUG("reading control, ");
 	reply.magic = 0;
 	result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
-	req = lo->tail;
 	if (result <= 0)
 		HARDFAIL("Recv control failed.");
 	memcpy(&xreq, reply.handle, sizeof(xreq));
+	req = blkdev_entry_prev_request(&lo->queue_head);
 
 	if (xreq != req)
 		FAIL("Unexpected handle received.\n");
@@ -216,47 +216,41 @@ void nbd_do_it(struct nbd_device *lo)
 {
 	struct request *req;
 
-	while (1) {
+	down (&lo->queue_lock);
+	while (!list_empty(&lo->queue_head)) {
 		req = nbd_read_stat(lo);
 		if (!req)
-			return;
-		down (&lo->queue_lock);
+			goto out;
 #ifdef PARANOIA
-		if (req != lo->tail) {
+		if (req != blkdev_entry_prev_request(&lo->queue_head)) {
 			printk(KERN_ALERT "NBD: I have problem...\n");
 		}
 		if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
 			printk(KERN_ALERT "NBD: request corrupted!\n");
-			goto next;
+			continue;
 		}
 		if (lo->magic != LO_MAGIC) {
 			printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
-			up (&lo->queue_lock);
-			return;
+			goto out;
 		}
 #endif
-		nbd_end_request(req);
-		if (lo->tail == lo->head) {
-#ifdef PARANOIA
-			if (lo->tail->next)
-				printk(KERN_ERR "NBD: I did not expect this\n");
-#endif
-			lo->head = NULL;
-		}
-		lo->tail = lo->tail->next;
-	next:
+		list_del(&req->queue);
 		up (&lo->queue_lock);
+		
+		nbd_end_request(req);
+
+		down (&lo->queue_lock);
 	}
+ out:
+	up (&lo->queue_lock);
 }
 
 void nbd_clear_que(struct nbd_device *lo)
 {
 	struct request *req;
 
-	while (1) {
-		req = lo->tail;
-		if (!req)
-			return;
+	while (!list_empty(&lo->queue_head)) {
+		req = blkdev_entry_prev_request(&lo->queue_head);
 #ifdef PARANOIA
 		if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
 			printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
@@ -268,15 +262,12 @@ void nbd_clear_que(struct nbd_device *lo)
 		}
 #endif
 		req->errors++;
+		list_del(&req->queue);
+		up(&lo->queue_lock);
+
 		nbd_end_request(req);
-		if (lo->tail == lo->head) {
-#ifdef PARANOIA
-			if (lo->tail->next)
-				printk(KERN_ERR "NBD: I did not assume this\n");
-#endif
-			lo->head = NULL;
-		}
-		lo->tail = lo->tail->next;
+
+		down(&lo->queue_lock);
 	}
 }
 
@@ -296,7 +287,7 @@ static void do_nbd_request(request_queue_t * q)
 	int dev;
 	struct nbd_device *lo;
 
-	while (CURRENT) {
+	while (!QUEUE_EMPTY) {
 		req = CURRENT;
 		dev = MINOR(req->rq_dev);
 #ifdef PARANOIA
@@ -314,28 +305,23 @@ static void do_nbd_request(request_queue_t * q)
 		requests_in++;
 #endif
 		req->errors = 0;
-		CURRENT = CURRENT->next;
-		req->next = NULL;
-
+		blkdev_dequeue_request(req);
 		spin_unlock_irq(&io_request_lock);
-		down (&lo->queue_lock);
-		if (lo->head == NULL) {
-			lo->head = req;
-			lo->tail = req;
-		} else {
-			lo->head->next = req;
-			lo->head = req;
-		}
 
+		down (&lo->queue_lock);
+		list_add(&req->queue, &lo->queue_head);
 		nbd_send_req(lo->sock, req);	/* Why does this block?         */
 		up (&lo->queue_lock);
+
 		spin_lock_irq(&io_request_lock);
 		continue;
 
 	      error_out:
 		req->errors++;
+		blkdev_dequeue_request(req);
+		spin_unlock(&io_request_lock);
 		nbd_end_request(req);
-		CURRENT = CURRENT->next;
+		spin_lock(&io_request_lock);
 	}
 	return;
 }
@@ -359,11 +345,14 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 	lo = &nbd_dev[dev];
 	switch (cmd) {
 	case NBD_CLEAR_SOCK:
+		down(&lo->queue_lock);
 		nbd_clear_que(lo);
-		if (lo->head || lo->tail) {
+		if (!list_empty(&lo->queue_head)) {
+			up(&lo->queue_lock);
 			printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
 			return -EBUSY;
 		}
+		up(&lo->queue_lock);
 		file = lo->file;
 		if (!file)
 			return -EINVAL;
@@ -415,8 +404,8 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 		return 0;
 #ifdef PARANOIA
 	case NBD_PRINT_DEBUG:
-		printk(KERN_INFO "NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n",
-		       dev, (long) lo->head, (long) lo->tail, requests_in, requests_out);
+		printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
+		       dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out);
 		return 0;
 #endif
 	case BLKGETSIZE:
@@ -480,6 +469,7 @@ int nbd_init(void)
 	blksize_size[MAJOR_NR] = nbd_blksizes;
 	blk_size[MAJOR_NR] = nbd_sizes;
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
+	blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
 	for (i = 0; i < MAX_NBD; i++) {
 		nbd_dev[i].refcnt = 0;
 		nbd_dev[i].file = NULL;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 7db6626f4..878709944 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -756,7 +756,7 @@ static void do_pcd_request (request_queue_t * q)
 
 	if (pcd_busy) return;
         while (1) {
-	    if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+	    if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
 	    INIT_REQUEST;
 	    if (CURRENT->cmd == READ) {
 		unit = MINOR(CURRENT->rq_dev);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 577d1354c..f40958ecd 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -156,6 +156,7 @@ static int pd_drive_count;
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/genhd.h>
@@ -339,6 +340,8 @@ static char *pd_errs[17] = { "ERR","INDEX","ECC","DRQ","SEEK","WRERR",
 
 /* kernel glue structures */
 
+extern struct block_device_operations pd_fops;
+
 static struct gendisk pd_gendisk = {
         PD_MAJOR,       /* Major number */
         PD_NAME,        /* Major name */
@@ -348,7 +351,8 @@ static struct gendisk pd_gendisk = {
         pd_sizes,       /* block sizes */
         0,              /* number */
         NULL,           /* internal */
-        NULL            /* next */
+        NULL,           /* next */
+	&pd_fops,       /* block device operations */
 };
 
 static struct block_device_operations pd_fops = {
@@ -386,8 +390,7 @@ int pd_init (void)
 {       int i;
 
 	if (disable) return -1;
-
-        if (register_blkdev(MAJOR_NR,name,&pd_fops)) {
+        if (devfs_register_blkdev(MAJOR_NR,name,&pd_fops)) {
                 printk("%s: unable to get major number %d\n",
                         name,major);
                 return -1;
@@ -592,8 +595,7 @@ void    cleanup_module(void)
 {       struct gendisk **gdp;
 	int unit;
 
-        unregister_blkdev(MAJOR_NR,name);
-
+        devfs_unregister_blkdev(MAJOR_NR,name);
         for(gdp=&gendisk_head;*gdp;gdp=&((*gdp)->next))
                 if (*gdp == &pd_gendisk) break;
         if (*gdp) *gdp = (*gdp)->next;
@@ -868,7 +870,7 @@ static void do_pd_request (request_queue_t * q)
 
         if (pd_busy) return;
 repeat:
-        if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+        if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
         INIT_REQUEST;
 
         pd_dev = MINOR(CURRENT->rq_dev);
@@ -890,7 +892,7 @@ repeat:
 	pd_cmd = CURRENT->cmd;
 	pd_run = pd_count;
         while ((pd_run <= cluster) &&
-	       (req = req->next) && 
+	       (req = blkdev_next_request(req)) && 
 	       (pd_block+pd_run == req->sector) &&
 	       (pd_cmd == req->cmd) &&
 	       (pd_dev == MINOR(req->rq_dev)))
@@ -922,7 +924,7 @@ static void pd_next_buf( int unit )
 	
 /* paranoia */
 
-	if ((!CURRENT) ||
+	if (QUEUE_EMPTY ||
 	    (CURRENT->cmd != pd_cmd) ||
 	    (MINOR(CURRENT->rq_dev) != pd_dev) ||
 	    (CURRENT->rq_status == RQ_INACTIVE) ||
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index d658a0369..4e7a5aaf4 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -854,7 +854,7 @@ static void do_pf_request (request_queue_t * q)
 
         if (pf_busy) return;
 repeat:
-        if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+        if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
         INIT_REQUEST;
 
         pf_unit = unit = DEVICE_NR(CURRENT->rq_dev);
@@ -874,7 +874,7 @@ repeat:
 	pf_cmd = CURRENT->cmd;
 	pf_run = pf_count;
         while ((pf_run <= cluster) &&
-	       (req = req->next) && 
+	       (req = blkdev_next_request(req)) && 
 	       (pf_block+pf_run == req->sector) &&
 	       (pf_cmd == req->cmd) &&
 	       (pf_unit == DEVICE_NR(req->rq_dev)))
@@ -904,7 +904,7 @@ static void pf_next_buf( int unit )
 	
 /* paranoia */
 
-	if ((!CURRENT) ||
+	if (QUEUE_EMPTY ||
 	    (CURRENT->cmd != pf_cmd) ||
 	    (DEVICE_NR(CURRENT->rq_dev) != pf_unit) ||
 	    (CURRENT->rq_status == RQ_INACTIVE) ||
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 434fac029..b4e36726c 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -164,6 +164,7 @@ static int pg_drive_count;
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/malloc.h>
@@ -286,6 +287,8 @@ void pg_init_units( void )
 	}
 } 
 
+static devfs_handle_t devfs_handle = NULL;
+
 int pg_init (void)      /* preliminary initialisation */
 
 {       int unit;
@@ -296,14 +299,17 @@ int pg_init (void)      /* preliminary initialisation */
 
 	if (pg_detect()) return -1;
 
-	if (register_chrdev(major,name,&pg_fops)) {
+	if (devfs_register_chrdev(major,name,&pg_fops)) {
 		printk("pg_init: unable to get major number %d\n",
 			major);
 		for (unit=0;unit<PG_UNITS;unit++)
 		  if (PG.present) pi_release(PI);
 		return -1;
 	}
-
+	devfs_handle = devfs_mk_dir (NULL, "pg", 2, NULL);
+	devfs_register_series (devfs_handle, "%u", 4, DEVFS_FL_DEFAULT,
+			       major, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0,
+			       &pg_fops, NULL);
 	return 0;
 }
 
@@ -332,7 +338,8 @@ void    cleanup_module(void)
 
 {       int unit;
 
-	unregister_chrdev(major,name);
+	devfs_unregister (devfs_handle);
+	devfs_unregister_chrdev(major,name);
 
 	for (unit=0;unit<PG_UNITS;unit++)
 	  if (PG.present) pi_release(PI);
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index ba24c9956..459ef7237 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -143,6 +143,7 @@ static int pt_drive_count;
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/malloc.h>
@@ -290,6 +291,8 @@ void pt_init_units( void )
         }
 } 
 
+static devfs_handle_t devfs_handle = NULL;
+
 int pt_init (void)      /* preliminary initialisation */
 
 {       int unit;
@@ -300,7 +303,7 @@ int pt_init (void)      /* preliminary initialisation */
 
 	if (pt_detect()) return -1;
 
-        if (register_chrdev(major,name,&pt_fops)) {
+	if (devfs_register_chrdev(major,name,&pt_fops)) {
                 printk("pt_init: unable to get major number %d\n",
                         major);
 	        for (unit=0;unit<PT_UNITS;unit++)
@@ -308,6 +311,13 @@ int pt_init (void)      /* preliminary initialisation */
                 return -1;
         }
 
+	devfs_handle = devfs_mk_dir (NULL, "pt", 2, NULL);
+	devfs_register_series (devfs_handle, "%u", 4, DEVFS_FL_DEFAULT,
+			       major, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0,
+			       &pt_fops, NULL);
+	devfs_register_series (devfs_handle, "%un", 4, DEVFS_FL_DEFAULT,
+			       major, 128, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0,
+			       &pt_fops, NULL);
         return 0;
 }
 
@@ -334,9 +344,10 @@ int     init_module(void)
 
 void    cleanup_module(void)
 
-{       int unit;
+{	int unit;
 
-        unregister_chrdev(major,name);
+	devfs_unregister (devfs_handle);
+	devfs_unregister_chrdev(major,name);
 
 	for (unit=0;unit<PT_UNITS;unit++)
 	  if (PT.present) pi_release(PI);
diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c
index 9f68ebbfc..305c89a00 100644
--- a/drivers/block/ps2esdi.c
+++ b/drivers/block/ps2esdi.c
@@ -41,6 +41,7 @@
 #include <linux/kernel.h>
 #include <linux/genhd.h>
 #include <linux/ps2esdi.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/blk.h>
 #include <linux/blkpg.h>
 #include <linux/mca.h>
@@ -164,7 +165,8 @@ static struct gendisk ps2esdi_gendisk =
 	ps2esdi_sizes,		/* block sizes */
 	0,			/* number */
 	(void *) ps2esdi_info,	/* internal */
-	NULL			/* next */
+	NULL,			/* next */
+	&ps2esdi_fops,          /* file operations */
 };
 
 /* initialization routine called by ll_rw_blk.c   */
@@ -173,7 +175,7 @@ int __init ps2esdi_init(void)
 
 	/* register the device - pass the name, major number and operations
 	   vector .                                                 */
-	if (register_blkdev(MAJOR_NR, "ed", &ps2esdi_fops)) {
+	if (devfs_register_blkdev(MAJOR_NR, "ed", &ps2esdi_fops)) {
 		printk("%s: Unable to get major number %d\n", DEVICE_NAME, MAJOR_NR);
 		return -1;
 	}
@@ -229,7 +231,7 @@ cleanup_module(void)
 	release_region(io_base, 4);
 	free_dma(dma_arb_level);
   	free_irq(PS2ESDI_IRQ, NULL)
-	unregister_blkdev(MAJOR_NR, "ed");
+	devfs_unregister_blkdev(MAJOR_NR, "ed");
 }
 #endif /* MODULE */
 
@@ -476,7 +478,7 @@ static void do_ps2esdi_request(request_queue_t * q)
 	if (virt_to_bus(CURRENT->buffer + CURRENT->nr_sectors * 512) > 16 * MB) {
 		printk("%s: DMA above 16MB not supported\n", DEVICE_NAME);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(q);
 		return;
 	}			/* check for above 16Mb dmas */
@@ -510,7 +512,7 @@ static void do_ps2esdi_request(request_queue_t * q)
 		default:
 			printk("%s: Unknown command\n", DEVICE_NAME);
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(q);
 			break;
 		}		/* handle different commands */
@@ -520,7 +522,7 @@ static void do_ps2esdi_request(request_queue_t * q)
 		printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives,
 		       CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(q);
 	}
 
@@ -591,7 +593,7 @@ static void ps2esdi_readwrite(int cmd, u_char drive, u_int block, u_int count)
 			return do_ps2esdi_request(NULL);
 		else {
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(NULL);
 		}
 	}
@@ -894,7 +896,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code)
 				do_ps2esdi_request(NULL);
 			else {
 				end_request(FAIL);
-				if (CURRENT)
+				if (!QUEUE_EMPTY)
 					do_ps2esdi_request(NULL);
 			}
 			break;
@@ -940,7 +942,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code)
 			do_ps2esdi_request(NULL);
 		else {
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(NULL);
 		}
 		break;
@@ -950,7 +952,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code)
 		outb((int_ret_code & 0xe0) | ATT_EOI, ESDI_ATTN);
 		outb(CTRL_ENABLE_INTR, ESDI_CONTROL);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(NULL);
 		break;
 
@@ -986,7 +988,7 @@ static void ps2esdi_continue_request(void)
 		do_ps2esdi_request(NULL);
 	} else {
 		end_request(SUCCES);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(NULL);
 	}
 }
diff --git a/drivers/block/raid1.c b/drivers/block/raid1.c
index 6671f83e8..057be0d64 100644
--- a/drivers/block/raid1.c
+++ b/drivers/block/raid1.c
@@ -211,7 +211,11 @@ raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh)
 	while (!( /* FIXME: now we are rather fault tolerant than nice */
 	r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL)
 	) )
+	{
 		printk ("raid1_make_request(#1): out of memory\n");
+		current->policy |= SCHED_YIELD;
+		schedule();
+	}
 	memset (r1_bh, 0, sizeof (struct raid1_bh));
 
 /*
@@ -298,7 +302,11 @@ raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh)
 		while (!( /* FIXME: now we are rather fault tolerant than nice */
 		mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL)
 		) )
+		{
 			printk ("raid1_make_request(#2): out of memory\n");
+			current->policy |= SCHED_YIELD;
+			schedule();
+		}
 		memset (mirror_bh[i], 0, sizeof (struct buffer_head));
 
 	/*
@@ -710,7 +718,11 @@ static int raid1_run (int minor, struct md_dev *mddev)
 	while (!( /* FIXME: now we are rather fault tolerant than nice */
 	mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL)
 	) )
+	{
 		printk ("raid1_run(): out of memory\n");
+		current->policy |= SCHED_YIELD;
+		schedule();
+	}
 	raid_conf = mddev->private;
 	memset(raid_conf, 0, sizeof(*raid_conf));
 
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 17a745d5b..19f485df0 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -58,6 +58,7 @@
 #include <linux/fd.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -97,6 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS];	/* Size of RAM disks in bytes   */
 static int rd_hardsec[NUM_RAMDISKS];		/* Size of real blocks in bytes */
 static int rd_blocksizes[NUM_RAMDISKS];		/* Size of 1024 byte blocks :)  */
 static int rd_kbsize[NUM_RAMDISKS];		/* Size in blocks of 1024 bytes */
+static devfs_handle_t devfs_handle = NULL;
 
 /*
  * Parameters for the boot-loading of the RAM disk.  These are set by
@@ -180,6 +182,8 @@ __setup("ramdisk_size=", ramdisk_size2);
  *  deleted, and make that my Ramdisk.  If the request is outside of the
  *  allocated size, we must get rid of it...
  *
+ * 19-JAN-1998  Richard Gooch <rgooch@atnf.csiro.au>  Added devfs support
+ *
  */
 static void rd_request(request_queue_t * q)
 {
@@ -362,6 +366,7 @@ static int rd_open(struct inode * inode, struct file * filp)
 	if (DEVICE_NR(inode->i_rdev) >= NUM_RAMDISKS)
 		return -ENXIO;
 
+	filp->f_op = &def_blk_fops;
 	MOD_INC_USE_COUNT;
 
 	return 0;
@@ -387,6 +392,7 @@ static void __exit rd_cleanup (void)
 	for (i = 0 ; i < NUM_RAMDISKS; i++)
 		destroy_buffers(MKDEV(MAJOR_NR, i));
 
+	devfs_unregister (devfs_handle);
 	unregister_blkdev( MAJOR_NR, "ramdisk" );
 	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
 }
@@ -418,6 +424,11 @@ int __init rd_init (void)
 		rd_blocksizes[i] = rd_blocksize;
 		rd_kbsize[i] = rd_size;
 	}
+	devfs_handle = devfs_mk_dir (NULL, "rd", 0, NULL);
+	devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS,
+			       DEVFS_FL_DEFAULT, MAJOR_NR, 0,
+			       S_IFBLK | S_IRUSR | S_IWUSR, 0, 0,
+			       &fd_fops, NULL);
 
 	hardsect_size[MAJOR_NR] = rd_hardsec;		/* Size of the RAM disk blocks */
 	blksize_size[MAJOR_NR] = rd_blocksizes;		/* Avoid set_blocksize() check */
@@ -560,7 +571,7 @@ done:
  */
 static void __init rd_load_image(kdev_t device, int offset, int unit)
 {
- 	struct inode inode, out_inode;
+ 	struct inode *inode, *out_inode;
 	struct file infile, outfile;
 	struct dentry in_dentry, out_dentry;
 	mm_segment_t fs;
@@ -574,25 +585,27 @@ static void __init rd_load_image(kdev_t device, int offset, int unit)
 	ram_device = MKDEV(MAJOR_NR, unit);
 
 	memset(&infile, 0, sizeof(infile));
-	memset(&inode, 0, sizeof(inode));
 	memset(&in_dentry, 0, sizeof(in_dentry));
-	inode.i_rdev = device;
-	init_waitqueue_head(&inode.i_wait);
+	inode = get_empty_inode();
+	inode->i_rdev = device;
+	inode->i_bdev = bdget(kdev_t_to_nr(device));
 	infile.f_mode = 1; /* read only */
 	infile.f_dentry = &in_dentry;
-	in_dentry.d_inode = &inode;
+	in_dentry.d_inode = inode;
 
 	memset(&outfile, 0, sizeof(outfile));
-	memset(&out_inode, 0, sizeof(out_inode));
 	memset(&out_dentry, 0, sizeof(out_dentry));
-	out_inode.i_rdev = ram_device;
-	init_waitqueue_head(&out_inode.i_wait);
+	out_inode = get_empty_inode();
+	out_inode->i_rdev = ram_device;
+	out_inode->i_bdev = bdget(kdev_t_to_nr(ram_device));
 	outfile.f_mode = 3; /* read/write */
 	outfile.f_dentry = &out_dentry;
-	out_dentry.d_inode = &out_inode;
+	out_dentry.d_inode = out_inode;
 
-	if (blkdev_open(&inode, &infile) != 0) return;
-	if (blkdev_open(&out_inode, &outfile) != 0) return;
+	if (blkdev_open(inode, &infile) != 0)
+		goto free_inodes;
+	if (blkdev_open(out_inode, &outfile) != 0)
+		goto free_inodes;
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -655,10 +668,10 @@ static void __init rd_load_image(kdev_t device, int offset, int unit)
 			rotate = 0;
 			invalidate_buffers(device);
 			if (infile.f_op->release)
-				infile.f_op->release(&inode, &infile);
+				infile.f_op->release(inode, &infile);
 			printk("Please insert disk #%d and press ENTER\n", i/devblocks+1);
 			wait_for_keypress();
-			if (blkdev_open(&inode, &infile) != 0)  {
+			if (blkdev_open(inode, &infile) != 0)  {
 				printk("Error opening disk.\n");
 				goto done;
 			}
@@ -678,11 +691,15 @@ static void __init rd_load_image(kdev_t device, int offset, int unit)
 successful_load:
 	invalidate_buffers(device);
 	ROOT_DEV = MKDEV(MAJOR_NR, unit);
+	if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0");
 
 done:
 	if (infile.f_op->release)
-		infile.f_op->release(&inode, &infile);
+		infile.f_op->release(inode, &infile);
 	set_fs(fs);
+free_inodes:
+	iput(inode);
+	iput(out_inode);
 }
 
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 911bafe23..f38e10209 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -305,7 +305,7 @@ static void start_request(struct floppy_state *fs)
 		wake_up(&fs->wait);
 		return;
 	}
-	while (CURRENT && fs->state == idle) {
+	while (!QUEUE_EMPTY && fs->state == idle) {
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
 		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 467cda26d..d37059d38 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -550,7 +550,7 @@ static void start_request(struct floppy_state *fs)
 		wake_up(&fs->wait);
 		return;
 	}
-	while (CURRENT && fs->state == idle) {
+	while (!QUEUE_EMPTY && fs->state == idle) {
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
 		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index b4c52d6a2..fde487ecd 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -41,6 +41,7 @@
 #include <linux/hdreg.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -87,21 +88,8 @@ XD_INFO xd_info[XD_MAXDRIVES];
    should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */
 
 #include <asm/page.h>
-/* coppied from floppy.c */
-static inline int __get_order(unsigned long size)
-{
-	int order;
-
-	size = (size-1) >> (PAGE_SHIFT-1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
-#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,__get_order(size))
-#define xd_dma_mem_free(addr, size) free_pages(addr, __get_order(size))
+#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size))
+#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
 static char *xd_dma_buffer = 0;
 
 static XD_SIGNATURE xd_sigs[] __initdata = {
@@ -130,6 +118,9 @@ static unsigned int xd_bases[] __initdata =
 static struct hd_struct xd_struct[XD_MAXDRIVES << 6];
 static int xd_sizes[XD_MAXDRIVES << 6], xd_access[XD_MAXDRIVES] = { 0, 0 };
 static int xd_blocksizes[XD_MAXDRIVES << 6];
+
+extern struct block_device_operations xd_fops;
+
 static struct gendisk xd_gendisk = {
 	MAJOR_NR,	/* Major number */
 	"xd",		/* Major name */
@@ -139,7 +130,8 @@ static struct gendisk xd_gendisk = {
 	xd_sizes,	/* block sizes */
 	0,		/* number */
 	(void *) xd_info,	/* internal */
-	NULL		/* next */
+	NULL,		/* next */
+	&xd_fops,	/* file operations */
 };
 static struct block_device_operations xd_fops = {
 	open:		xd_open,
@@ -164,13 +156,16 @@ static struct timer_list xd_timer = { NULL, NULL, 0, 0, (timeout_fn) xd_wakeup }
 static volatile u_char xd_error;
 static int nodma = XD_DONT_USE_DMA;
 
+static devfs_handle_t devfs_handle = NULL;
+
 /* xd_init: register the block device number and set up pointer tables */
 int __init xd_init (void)
 {
-	if (register_blkdev(MAJOR_NR,"xd",&xd_fops)) {
+	if (devfs_register_blkdev(MAJOR_NR,"xd",&xd_fops)) {
 		printk("xd: Unable to get major number %d\n",MAJOR_NR);
 		return -1;
 	}
+	devfs_handle = devfs_mk_dir (NULL, xd_gendisk.major_name, 0, NULL);
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	read_ahead[MAJOR_NR] = 8;	/* 8 sector (4kB) read ahead */
 	xd_gendisk.next = gendisk_head;
@@ -287,7 +282,7 @@ static void do_xd_request (request_queue_t * q)
 	sti();
 	if (xdc_busy)
 		return;
-	while (code = 0, CURRENT) {
+	while (code = 0, !QUEUE_EMPTY) {
 		INIT_REQUEST;	/* do some checking on the request structure */
 
 		if (CURRENT_DEV < xd_drives
@@ -1162,7 +1157,7 @@ int init_module(void)
 	printk(KERN_INFO "XD: Loaded as a module.\n");
 	if (!xd_drives) {
 		/* no drives detected - unload module */
-		unregister_blkdev(MAJOR_NR, "xd");
+		devfs_unregister_blkdev(MAJOR_NR, "xd");
 		xd_done();
 		return (-1);
 	}
@@ -1174,7 +1169,7 @@ void cleanup_module(void)
 {
 	int partition,dev,start;
 
-	unregister_blkdev(MAJOR_NR, "xd");
+	devfs_unregister_blkdev(MAJOR_NR, "xd");
 	for (dev = 0; dev < xd_drives; dev++) {
 		start = dev << xd_gendisk.minor_shift; 
 		for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) {
@@ -1186,6 +1181,7 @@ void cleanup_module(void)
 		}
 	}
 	xd_done();
+	devfs_unregister (devfs_handle);
 	if (xd_drives) {
 		free_irq(xd_irq, NULL);
 		free_dma(xd_dma);