diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-24 00:12:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-24 00:12:35 +0000 |
commit | 482368b1a8e45430672c58c9a42e7d2004367126 (patch) | |
tree | ce2a1a567d4d62dee7c2e71a46a99cf72cf1d606 /drivers/block | |
parent | e4d0251c6f56ab2e191afb70f80f382793e23f74 (diff) |
Merge with 2.3.47. Guys, this is buggy as shit. You've been warned.
Diffstat (limited to 'drivers/block')
38 files changed, 3931 insertions, 475 deletions
diff --git a/drivers/block/Config.in b/drivers/block/Config.in index 79bd8078e..01fc28943 100644 --- a/drivers/block/Config.in +++ b/drivers/block/Config.in @@ -206,8 +206,8 @@ bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then tristate ' Linear (append) mode' CONFIG_MD_LINEAR tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED - tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING - tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 +# tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING +# tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 fi if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then bool ' Boot support (linear, striped)' CONFIG_MD_BOOT diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 45e86000a..d912f8c08 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1009,37 +1009,58 @@ static boolean DAC960_ReportDeviceConfiguration(DAC960_Controller_T *Controller) } -static int DAC_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh) +static inline int DAC_new_segment(request_queue_t *q, struct request *req, + int __max_segments) { int max_segments; DAC960_Controller_T * Controller = q->queuedata; max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)]; + if (__max_segments < max_segments) + max_segments = __max_segments; - if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) { - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; + if (req->nr_segments < max_segments) { + req->nr_segments++; + q->nr_segments++; + return 1; } + return 0; +} - return 1; +static int DAC_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int __max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return DAC_new_segment(q, req, __max_segments); +} + +static int DAC_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int __max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return DAC_new_segment(q, req, __max_segments); } static int DAC_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next) + struct request *next, + int __max_segments) { int max_segments; DAC960_Controller_T * Controller = q->queuedata; int total_segments = req->nr_segments + next->nr_segments; max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)]; + if (__max_segments < max_segments) + max_segments = __max_segments; if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + { total_segments--; + q->nr_segments--; + } if (total_segments > max_segments) return 0; @@ -1068,7 +1089,7 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) /* Register the Block Device Major Number for this DAC960 Controller. */ - if (register_blkdev(MajorNumber, "rd", &DAC960_FileOperations) < 0) + if (devfs_register_blkdev(MajorNumber, "dac960", &DAC960_FileOperations) < 0) { DAC960_Error("UNABLE TO ACQUIRE MAJOR NUMBER %d - DETACHING\n", Controller, MajorNumber); @@ -1080,7 +1101,8 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) q = BLK_DEFAULT_QUEUE(MajorNumber); blk_init_queue(q, RequestFunctions[Controller->ControllerNumber]); blk_queue_headactive(q, 0); - q->merge_fn = DAC_merge_fn; + q->back_merge_fn = DAC_back_merge_fn; + q->front_merge_fn = DAC_front_merge_fn; q->merge_requests_fn = DAC_merge_requests_fn; q->queuedata = (void *) Controller; @@ -1108,12 +1130,13 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) Complete initialization of the Generic Disk Information structure. */ Controller->GenericDiskInfo.major = MajorNumber; - Controller->GenericDiskInfo.major_name = "rd"; + Controller->GenericDiskInfo.major_name = "dac960"; Controller->GenericDiskInfo.minor_shift = DAC960_MaxPartitionsBits; Controller->GenericDiskInfo.max_p = DAC960_MaxPartitions; Controller->GenericDiskInfo.nr_real = Controller->LogicalDriveCount; Controller->GenericDiskInfo.real_devices = Controller; Controller->GenericDiskInfo.next = NULL; + Controller->GenericDiskInfo.fops = &DAC960_FileOperations; /* Install the Generic Disk Information structure at the end of the list. */ @@ -1142,7 +1165,7 @@ static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller) /* Unregister the Block Device Major Number for this DAC960 Controller. */ - unregister_blkdev(MajorNumber, "rd"); + devfs_unregister_blkdev(MajorNumber, "dac960"); /* Remove the I/O Request Function. */ @@ -1156,7 +1179,6 @@ static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller) blk_size[MajorNumber] = NULL; blksize_size[MajorNumber] = NULL; max_sectors[MajorNumber] = NULL; - max_segments[MajorNumber] = NULL; /* Remove the Generic Disk Information structure from the list. */ @@ -1305,15 +1327,17 @@ static int DAC960_Finalize(NotifierBlock_T *NotifierBlock, static boolean DAC960_ProcessRequest(DAC960_Controller_T *Controller, boolean WaitForCommand) { - IO_Request_T **RequestQueuePointer = - &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.current_request; + struct list_head * queue_head; IO_Request_T *Request; DAC960_Command_T *Command; char *RequestBuffer; + + queue_head = &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.queue_head; while (true) { - Request = *RequestQueuePointer; - if (Request == NULL || Request->rq_status == RQ_INACTIVE) return false; + if (list_empty(queue_head)) return false; + Request = blkdev_entry_next_request(queue_head); + if (Request->rq_status == RQ_INACTIVE) return false; Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -1335,7 +1359,7 @@ static boolean DAC960_ProcessRequest(DAC960_Controller_T *Controller, Command->BufferHeader = Request->bh; RequestBuffer = Request->buffer; Request->rq_status = RQ_INACTIVE; - *RequestQueuePointer = Request->next; + blkdev_dequeue_request(Request); wake_up(&wait_for_request); if (Command->SegmentCount == 1) { @@ -2565,8 +2589,8 @@ static int DAC960_IOCTL(Inode_T *Inode, File_T *File, (long *) Argument); case BLKRAGET: /* Get Read-Ahead. */ - if ((int *) Argument == NULL) return -EINVAL; - return put_user(read_ahead[MAJOR(Inode->i_rdev)], (int *) Argument); + if ((long *) Argument == NULL) return -EINVAL; + return put_user(read_ahead[MAJOR(Inode->i_rdev)], (long *) Argument); case BLKRASET: /* Set Read-Ahead. */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 3f9c5f85b..9f313de8f 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -326,6 +326,14 @@ else endif endif +ifeq ($(CONFIG_BLK_DEV_LVM),y) +L_OBJS += lvm.o lvm-snap.o +else + ifeq ($(CONFIG_BLK_DEV_LVM),m) + M_OBJS += lvm-mod.o + endif +endif + ifeq ($(CONFIG_BLK_DEV_MD),y) LX_OBJS += md.o @@ -354,13 +362,9 @@ else endif ifeq ($(CONFIG_MD_RAID5),y) -LX_OBJS += xor.o -CFLAGS_xor.o := $(PROFILING) -fomit-frame-pointer L_OBJS += raid5.o else ifeq ($(CONFIG_MD_RAID5),m) - LX_OBJS += xor.o - CFLAGS_xor.o := $(PROFILING) -fomit-frame-pointer M_OBJS += raid5.o endif endif @@ -407,3 +411,6 @@ ide-mod.o: ide.o ide-features.o $(IDE_OBJS) ide-probe-mod.o: ide-probe.o ide-geometry.o $(LD) $(LD_RFLAG) -r -o $@ ide-probe.o ide-geometry.o + +lvm-mod.o: lvm.o lvm-snap.o + $(LD) -r -o $@ lvm.o lvm-snap.o diff --git a/drivers/block/README.lvm b/drivers/block/README.lvm new file mode 100644 index 000000000..3d652457f --- /dev/null +++ b/drivers/block/README.lvm @@ -0,0 +1,8 @@ + +This is the Logical Volume Manager driver for Linux, + +Tools, library that manage logical volumes can be found +at <http://linux.msede.com/lvm>. + +There you can obtain actual driver versions too. + diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index ef9e3fa7c..f2a102cf2 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -54,6 +54,7 @@ #include <linux/fs.h> #include <linux/kernel.h> #include <linux/genhd.h> +#include <linux/devfs_fs_kernel.h> #include <linux/delay.h> #include <linux/mm.h> #include <linux/major.h> @@ -769,7 +770,7 @@ static void unexpected_acsi_interrupt( void ) static void bad_rw_intr( void ) { - if (!CURRENT) + if (QUEUE_EMPTY) return; if (++CURRENT->errors >= MAX_ERRORS) @@ -843,7 +844,7 @@ static void acsi_times_out( unsigned long dummy ) DEVICE_INTR = NULL; printk( KERN_ERR "ACSI timeout\n" ); - if (!CURRENT) return; + if (QUEUE_EMPTY) return; if (++CURRENT->errors >= MAX_ERRORS) { #ifdef DEBUG printk( KERN_ERR "ACSI: too many errors.\n" ); @@ -953,7 +954,7 @@ static void redo_acsi_request( void ) unsigned long pbuffer; struct buffer_head *bh; - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) { + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) { if (!DEVICE_INTR) { ENABLE_IRQ(); stdma_release(); @@ -969,7 +970,7 @@ static void redo_acsi_request( void ) /* Another check here: An interrupt or timer event could have * happened since the last check! */ - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) { + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) { if (!DEVICE_INTR) { ENABLE_IRQ(); stdma_release(); @@ -979,7 +980,7 @@ static void redo_acsi_request( void ) if (DEVICE_INTR) return; - if (!CURRENT) { + if (QUEUE_EMPTY) { CLEAR_INTR; ENABLE_IRQ(); stdma_release(); @@ -1385,6 +1386,8 @@ static int acsi_mode_sense( int target, int lun, SENSE_DATA *sd ) ********************************************************************/ +extern struct block_device_operations acsi_fops; + static struct gendisk acsi_gendisk = { MAJOR_NR, /* Major number */ "ad", /* Major name */ @@ -1394,7 +1397,8 @@ static struct gendisk acsi_gendisk = { acsi_sizes, /* block sizes */ 0, /* number */ (void *)acsi_info, /* internal */ - NULL /* next */ + NULL, /* next */ + &acsi_fops, /* file operations */ }; #define MAX_SCSI_DEVICE_CODE 10 @@ -1776,16 +1780,14 @@ int acsi_init( void ) int err = 0; if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ACSI)) return 0; - - if (register_blkdev( MAJOR_NR, "ad", &acsi_fops )) { + if (devfs_register_blkdev( MAJOR_NR, "ad", &acsi_fops )) { printk( KERN_ERR "Unable to get major %d for ACSI\n", MAJOR_NR ); return -EBUSY; } - if (!(acsi_buffer = (char *)atari_stram_alloc( ACSI_BUFFER_SIZE, NULL, "acsi" ))) { printk( KERN_ERR "Unable to get ACSI ST-Ram buffer.\n" ); - unregister_blkdev( MAJOR_NR, "ad" ); + devfs_unregister_blkdev( MAJOR_NR, "ad" ); return -ENOMEM; } phys_acsi_buffer = virt_to_phys( acsi_buffer ); @@ -1824,7 +1826,7 @@ void cleanup_module(void) blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); atari_stram_free( acsi_buffer ); - if (unregister_blkdev( MAJOR_NR, "ad" ) != 0) + if (devfs_unregister_blkdev( MAJOR_NR, "ad" ) != 0) printk( KERN_ERR "acsi: cleanup_module failed\n"); for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) diff --git a/drivers/block/acsi_slm.c b/drivers/block/acsi_slm.c index e4d343be3..88fa04ac6 100644 --- a/drivers/block/acsi_slm.c +++ b/drivers/block/acsi_slm.c @@ -65,6 +65,7 @@ not be guaranteed. There are several ways to assure this: #include <linux/time.h> #include <linux/mm.h> #include <linux/malloc.h> +#include <linux/devfs_fs_kernel.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -987,23 +988,28 @@ int attach_slm( int target, int lun ) return( 1 ); } +static devfs_handle_t devfs_handle = NULL; int slm_init( void ) { - if (register_chrdev( MAJOR_NR, "slm", &slm_fops )) { + if (devfs_register_chrdev( MAJOR_NR, "slm", &slm_fops )) { printk( KERN_ERR "Unable to get major %d for ACSI SLM\n", MAJOR_NR ); return -EBUSY; } if (!(SLMBuffer = atari_stram_alloc( SLM_BUFFER_SIZE, NULL, "SLM" ))) { printk( KERN_ERR "Unable to get SLM ST-Ram buffer.\n" ); - unregister_chrdev( MAJOR_NR, "slm" ); + devfs_unregister_chrdev( MAJOR_NR, "slm" ); return -ENOMEM; } BufferP = SLMBuffer; SLMState = IDLE; + devfs_handle = devfs_mk_dir (NULL, "slm", 3, NULL); + devfs_register_series (devfs_handle, "%u", MAX_SLM, DEVFS_FL_DEFAULT, + MAJOR_NR, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0, + &slm_fops, NULL); return 0; } @@ -1026,7 +1032,8 @@ int init_module(void) void cleanup_module(void) { - if (unregister_chrdev( MAJOR_NR, "slm" ) != 0) + devfs_unregister (devfs_handle); + if (devfs_unregister_chrdev( MAJOR_NR, "slm" ) != 0) printk( KERN_ERR "acsi_slm: cleanup_module failed\n"); atari_stram_free( SLMBuffer ); } diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index e6bf5fa0c..0c7af176e 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1385,12 +1385,12 @@ static void redo_fd_request(void) char *data; unsigned long flags; - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){ + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){ return; } repeat: - if (!CURRENT) { + if (QUEUE_EMPTY) { /* Nothing left to do */ return; } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index b1e20b7d3..b7aa4241e 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -624,7 +624,7 @@ static void fd_error( void ) return; } - if (!CURRENT) return; + if (QUEUE_EMPTY) return; CURRENT->errors++; if (CURRENT->errors >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); @@ -1450,18 +1450,18 @@ static void redo_fd_request(void) int device, drive, type; DPRINT(("redo_fd_request: CURRENT=%08lx CURRENT->dev=%04x CURRENT->sector=%ld\n", - (unsigned long)CURRENT, CURRENT ? CURRENT->rq_dev : 0, - CURRENT ? CURRENT->sector : 0 )); + (unsigned long)CURRENT, !QUEUE_EMPTY ? CURRENT->rq_dev : 0, + !QUEUE_EMPTY ? CURRENT->sector : 0 )); IsFormatting = 0; - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){ + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){ return; } repeat: - if (!CURRENT) + if (QUEUE_EMPTY) goto the_end; if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 78269edf3..47291bef1 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -880,14 +880,16 @@ static void do_ida_request(int ctlr) cmdlist_t *c; int seg, sect; char *lastdataend; - request_queue_t * q; + struct list_head * queue_head; struct buffer_head *bh; struct request *creq; - q = &blk_dev[MAJOR_NR+ctlr].request_queue; + queue_head = &blk_dev[MAJOR_NR+ctlr].request_queue.queue_head; - creq = q->current_request; - if (creq == NULL || creq->rq_status == RQ_INACTIVE) + if (list_empty(queue_head)) + goto doreq_done; + creq = blkdev_entry_next_request(queue_head); + if (creq->rq_status == RQ_INACTIVE) goto doreq_done; if (ctlr != MAJOR(creq->rq_dev)-MAJOR_NR || @@ -961,10 +963,9 @@ DBGPX( bh->b_reqnext = NULL; DBGPX( printk("More to do on same request %p\n", creq); ); } else { -DBGPX( printk("Done with %p, queueing %p\n", creq, creq->next); ); - creq->rq_status = RQ_INACTIVE; - q->current_request = creq->next; - wake_up(&wait_for_request); +DBGPX( printk("Done with %p\n", creq); ); + blkdev_dequeue_request(creq); + end_that_request_last(creq); } c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE; diff --git a/drivers/block/cs5530.c b/drivers/block/cs5530.c index cf8b5fdda..3e26b8006 100644 --- a/drivers/block/cs5530.c +++ b/drivers/block/cs5530.c @@ -1,5 +1,5 @@ /* - * linux/drivers/block/cs5530.c Version 0.2 Jan 30, 2000 + * linux/drivers/block/cs5530.c Version 0.5 Feb 13, 2000 * * Copyright (C) 2000 Mark Lord <mlord@pobox.com> * May be copied or modified under the terms of the GNU General Public License @@ -285,8 +285,6 @@ static int cs5530_config_dma (ide_drive_t *drive) } outb(inb(hwif->dma_base+2)|(unit?0x40:0x20), hwif->dma_base+2); /* set DMA_capable bit */ - if (!strcmp(drive->name, "hdc")) /* FIXME */ - return 0; /* * Finally, turn DMA on in software, and exit. */ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7b956dfae..be7e25879 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -144,6 +144,10 @@ static int irqdma_allocated = 0; #define FDPATCHES #include <linux/fdreg.h> +/* + * 1998/1/21 -- Richard Gooch <rgooch@atnf.csiro.au> -- devfs support + */ + #include <linux/fd.h> #include <linux/hdreg.h> @@ -158,6 +162,7 @@ static int irqdma_allocated = 0; #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/devfs_fs_kernel.h> /* * PS/2 floppies have much slower step rates than regular floppies. @@ -196,7 +201,9 @@ static int use_virtual_dma=0; static unsigned short virtual_dma_port=0x3f0; void floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs); static int set_dor(int fdc, char mask, char data); -static inline int __get_order(unsigned long size); +static void register_devfs_entries (int drive); +static devfs_handle_t devfs_handle = NULL; + #define K_64 0x10000 /* 64KB */ #include <asm/floppy.h> @@ -213,26 +220,12 @@ static inline int __get_order(unsigned long size); /* Dma Memory related stuff */ -/* Pure 2^n version of get_order */ -static inline int __get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - #ifndef fd_dma_mem_free -#define fd_dma_mem_free(addr, size) free_pages(addr, __get_order(size)) +#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) #endif #ifndef fd_dma_mem_alloc -#define fd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,__get_order(size)) +#define fd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) #endif static inline void fallback_on_nodma_alloc(char **addr, size_t l) @@ -2276,7 +2269,7 @@ static void request_done(int uptodate) probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); - if (!CURRENT){ + if (QUEUE_EMPTY){ DPRINT("request list destroyed in floppy request done\n"); return; } @@ -2290,14 +2283,14 @@ static void request_done(int uptodate) DRS->maxtrack = 1; /* unlock chained buffers */ - while (current_count_sectors && CURRENT && + while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - if (current_count_sectors && CURRENT){ + if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ CURRENT->buffer += current_count_sectors <<9; CURRENT->current_nr_sectors -= current_count_sectors; @@ -2306,7 +2299,7 @@ static void request_done(int uptodate) return; } - if (current_count_sectors && !CURRENT) + if (current_count_sectors && QUEUE_EMPTY) DPRINT("request list destroyed in floppy request done\n"); } else { @@ -2869,14 +2862,14 @@ static void redo_fd_request(void) if (current_drive < N_DRIVE) floppy_off(current_drive); - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){ + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){ CLEAR_INTR; unlock_fdc(); return; } while(1){ - if (!CURRENT) { + if (QUEUE_EMPTY) { CLEAR_INTR; unlock_fdc(); return; @@ -3631,6 +3624,7 @@ static void config_types(void) first = 0; } printk("%s fd%d is %s", prepend, drive, name); + register_devfs_entries (drive); } *UDP = *params; } @@ -3844,6 +3838,37 @@ static struct block_device_operations floppy_fops = { revalidate: floppy_revalidate, }; +static void register_devfs_entries (int drive) +{ + int base_minor, i; + static char *table[] = + {"", "d360", "h1200", "u360", "u720", "h360", "h720", + "u1440", "u2880", "CompaQ", "h1440", "u1680", "h410", + "u820", "h1476", "u1722", "h420", "u830", "h1494", "u1743", + "h880", "u1040", "u1120", "h1600", "u1760", "u1920", + "u3200", "u3520", "u3840", "u1840", "u800", "u1600", + NULL + }; + static int t360[] = {1,0}, t1200[] = {2,5,6,10,12,14,16,18,20,23,0}, + t3in[] = {8,9,26,27,28, 7,11,15,19,24,25,29,31, 3,4,13,17,21,22,30,0}; + static int *table_sup[] = + {NULL, t360, t1200, t3in+5+8, t3in+5, t3in, t3in}; + + base_minor = (drive < 4) ? drive : (124 + drive); + if (UDP->cmos <= NUMBER(default_drive_params)) { + i = 0; + do { + char name[16]; + + sprintf (name, "%d%s", drive, table[table_sup[UDP->cmos][i]]); + devfs_register (devfs_handle, name, 0, DEVFS_FL_DEFAULT, MAJOR_NR, + base_minor + (table_sup[UDP->cmos][i] << 2), + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP |S_IWGRP, + 0, 0, &floppy_fops, NULL); + } while (table_sup[UDP->cmos][i++]); + } +} + /* * Floppy Driver initialization * ============================= @@ -4066,7 +4091,8 @@ int __init floppy_init(void) raw_cmd = 0; - if (register_blkdev(MAJOR_NR,"fd",&floppy_fops)) { + devfs_handle = devfs_mk_dir (NULL, "floppy", 0, NULL); + if (devfs_register_blkdev(MAJOR_NR,"fd",&floppy_fops)) { printk("Unable to get major %d for floppy\n",MAJOR_NR); return -EBUSY; } @@ -4097,7 +4123,7 @@ int __init floppy_init(void) use_virtual_dma = can_use_virtual_dma & 1; fdc_state[0].address = FDC1; if (fdc_state[0].address == -1) { - unregister_blkdev(MAJOR_NR,"fd"); + devfs_unregister_blkdev(MAJOR_NR,"fd"); del_timer(&fd_timeout); return -ENODEV; } @@ -4109,7 +4135,7 @@ int __init floppy_init(void) if (floppy_grab_irq_and_dma()){ del_timer(&fd_timeout); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - unregister_blkdev(MAJOR_NR,"fd"); + devfs_unregister_blkdev(MAJOR_NR,"fd"); del_timer(&fd_timeout); return -EBUSY; } @@ -4175,7 +4201,7 @@ int __init floppy_init(void) if (usage_count) floppy_release_irq_and_dma(); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - unregister_blkdev(MAJOR_NR,"fd"); + devfs_unregister_blkdev(MAJOR_NR,"fd"); } for (drive = 0; drive < N_DRIVE; drive++) { @@ -4413,7 +4439,8 @@ void cleanup_module(void) { int dummy; - unregister_blkdev(MAJOR_NR, "fd"); + devfs_unregister (devfs_handle); + devfs_unregister_blkdev(MAJOR_NR, "fd"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); /* eject disk, if any */ diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 95999e273..3f3237e87 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -4,6 +4,8 @@ * * Copyright (C) 1991-1998 Linus Torvalds * + * devfs support - jj, rgooch, 980122 + * * Moved partition checking code to fs/partitions* - Russell King * (linux@arm.uk.linux.org) */ diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 05a17a0c1..5520c17b0 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -32,6 +32,7 @@ #include <linux/sched.h> #include <linux/timer.h> #include <linux/fs.h> +#include <linux/devfs_fs_kernel.h> #include <linux/kernel.h> #include <linux/hdreg.h> #include <linux/genhd.h> @@ -145,7 +146,7 @@ static void dump_status (const char *msg, unsigned int stat) unsigned long flags; char devc; - devc = CURRENT ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?'; + devc = !QUEUE_EMPTY ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?'; save_flags (flags); sti(); #ifdef VERBOSE_ERRORS @@ -174,7 +175,7 @@ static void dump_status (const char *msg, unsigned int stat) if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (CURRENT) + if (!QUEUE_EMPTY) printk(", sector=%ld", CURRENT->sector); } printk("\n"); @@ -351,7 +352,7 @@ static void bad_rw_intr(void) { int dev; - if (!CURRENT) + if (QUEUE_EMPTY) return; dev = DEVICE_NR(CURRENT->rq_dev); if (++CURRENT->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { @@ -414,7 +415,7 @@ ok_to_read: #if (HD_DELAY > 0) last_req = read_timer(); #endif - if (CURRENT) + if (!QUEUE_EMPTY) hd_request(); return; } @@ -475,7 +476,7 @@ static void hd_times_out(void) unsigned int dev; DEVICE_INTR = NULL; - if (!CURRENT) + if (QUEUE_EMPTY) return; disable_irq(HD_IRQ); sti(); @@ -522,7 +523,7 @@ static void hd_request(void) { unsigned int dev, block, nsect, sec, track, head, cyl; - if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) return; + if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) return; if (DEVICE_INTR) return; repeat: @@ -662,6 +663,8 @@ static int hd_release(struct inode * inode, struct file * file) return 0; } +extern struct block_device_operations hd_fops; + static struct gendisk hd_gendisk = { MAJOR_NR, /* Major number */ "hd", /* Major name */ @@ -671,7 +674,8 @@ static struct gendisk hd_gendisk = { hd_sizes, /* block sizes */ 0, /* number */ NULL, /* internal use, not presently used */ - NULL /* next */ + NULL, /* next */ + &hd_fops, /* file operations */ }; static void hd_interrupt(int irq, void *dev_id, struct pt_regs *regs) @@ -800,7 +804,7 @@ static void hd_geninit(void) int __init hd_init(void) { - if (register_blkdev(MAJOR_NR,"hd",&hd_fops)) { + if (devfs_register_blkdev(MAJOR_NR,"hd",&hd_fops)) { printk("hd: unable to get major %d for hard disk\n",MAJOR_NR); return -1; } diff --git a/drivers/block/icside.c b/drivers/block/icside.c index 166d29abf..d0e8f8328 100644 --- a/drivers/block/icside.c +++ b/drivers/block/icside.c @@ -24,6 +24,8 @@ #include <asm/ecard.h> #include <asm/io.h> +extern char *ide_xfer_verbose (byte xfer_rate); + /* * Maximum number of interfaces per card */ diff --git a/drivers/block/ide-cd.c b/drivers/block/ide-cd.c index 48cf87c81..0f032ac8c 100644 --- a/drivers/block/ide-cd.c +++ b/drivers/block/ide-cd.c @@ -299,7 +299,6 @@ * Generic packet command support and error handling routines. */ - /* Mark that we've seen a media change, and invalidate our internal buffers. */ static void cdrom_saw_media_change (ide_drive_t *drive) @@ -2270,7 +2269,12 @@ static int ide_cdrom_register (ide_drive_t *drive, int nslots) devinfo->mask |= CDC_PLAY_AUDIO; if (!CDROM_CONFIG_FLAGS (drive)->close_tray) devinfo->mask |= CDC_CLOSE_TRAY; - + + devinfo->de = devfs_register (drive->de, "cd", 2, DEVFS_FL_DEFAULT, + HWIF(drive)->major, minor, + S_IFBLK | S_IRUGO | S_IWUGO, 0, 0, + ide_fops, NULL); + return register_cdrom (devinfo); } diff --git a/drivers/block/ide-disk.c b/drivers/block/ide-disk.c index 1209aa82a..e62295241 100644 --- a/drivers/block/ide-disk.c +++ b/drivers/block/ide-disk.c @@ -744,6 +744,8 @@ static int idedisk_cleanup (ide_drive_t *drive) static void idedisk_setup (ide_drive_t *drive) { + int i; + struct hd_driveid *id = drive->id; unsigned long capacity; @@ -764,6 +766,15 @@ static void idedisk_setup (ide_drive_t *drive) drive->doorlocking = 1; } } + for (i = 0; i < MAX_DRIVES; ++i) { + ide_hwif_t *hwif = HWIF(drive); + + if (drive != &hwif->drives[i]) continue; + hwif->gd->de_arr[i] = drive->de; + if (drive->removable) + hwif->gd->flags[i] |= GENHD_FL_REMOVABLE; + break; + } /* Extract geometry if we did not already have one for the drive */ if (!drive->cyl || !drive->head || !drive->sect) { diff --git a/drivers/block/ide-dma.c b/drivers/block/ide-dma.c index 1e450b7e6..3b6f5e56a 100644 --- a/drivers/block/ide-dma.c +++ b/drivers/block/ide-dma.c @@ -214,6 +214,10 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) struct scatterlist *sg = hwif->sg_table; int nents = 0; + if (rq->cmd == READ) + hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; + else + hwif->sg_dma_direction = PCI_DMA_TODEVICE; bh = rq->bh; do { unsigned char *virt_addr = bh->b_data; @@ -230,7 +234,7 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) nents++; } while (bh != NULL); - return pci_map_sg(hwif->pci_dev, sg, nents); + return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } /* @@ -265,7 +269,8 @@ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) printk("%s: DMA table too small\n", drive->name); pci_unmap_sg(HWIF(drive)->pci_dev, HWIF(drive)->sg_table, - HWIF(drive)->sg_nents); + HWIF(drive)->sg_nents, + HWIF(drive)->sg_dma_direction); return 0; /* revert to PIO for this request */ } else { u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); @@ -301,7 +306,7 @@ void ide_destroy_dmatable (ide_drive_t *drive) struct scatterlist *sg = HWIF(drive)->sg_table; int nents = HWIF(drive)->sg_nents; - pci_unmap_sg(dev, sg, nents); + pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); } /* diff --git a/drivers/block/ide-floppy.c b/drivers/block/ide-floppy.c index b24933637..e2977c754 100644 --- a/drivers/block/ide-floppy.c +++ b/drivers/block/ide-floppy.c @@ -1549,6 +1549,15 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); + for (i = 0; i < MAX_DRIVES; ++i) { + ide_hwif_t *hwif = HWIF(drive); + + if (drive != &hwif->drives[i]) continue; + hwif->gd->de_arr[i] = drive->de; + if (drive->removable) + hwif->gd->flags[i] |= GENHD_FL_REMOVABLE; + break; + } } static int idefloppy_cleanup (ide_drive_t *drive) diff --git a/drivers/block/ide-probe.c b/drivers/block/ide-probe.c index 33ca2900b..b57fa28da 100644 --- a/drivers/block/ide-probe.c +++ b/drivers/block/ide-probe.c @@ -406,7 +406,7 @@ static void probe_hwif (ide_hwif_t *hwif) ide_ioreg_t ide_control_reg = hwif->io_ports[IDE_CONTROL_OFFSET]; ide_ioreg_t region_low = hwif->io_ports[IDE_DATA_OFFSET]; ide_ioreg_t region_high = region_low; - ide_ioreg_t region_request = 8; + unsigned int region_request = 8; int i; if (hwif->noprobe) @@ -699,13 +699,28 @@ static void init_gendisk (ide_hwif_t *hwif) gd->nr_real = units; /* current num real drives */ gd->real_devices= hwif; /* ptr to internal data */ gd->next = NULL; /* linked list of major devs */ + gd->fops = ide_fops; /* file operations */ + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + if (gd->de_arr) + memset (gd->de_arr, 0, sizeof *gd->de_arr * units); + if (gd->flags) + memset (gd->flags, 0, sizeof *gd->flags * units); for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ; hwif->gd = *gdp = gd; /* link onto tail of list */ for (unit = 0; unit < units; ++unit) { - if (hwif->drives[unit].present) + if (hwif->drives[unit].present) { + char name[64]; + ide_add_generic_settings(hwif->drives + unit); + sprintf (name, "ide/host%d/bus%d/target%d/lun%d", + hwif->channel ? hwif->mate->index : hwif->index, + hwif->channel, unit, 0); + hwif->drives[unit].de = + devfs_mk_dir (NULL, name, 0, NULL); + } } } @@ -764,7 +779,7 @@ static int hwif_init (ide_hwif_t *hwif) printk("%s: request_fn NOT DEFINED\n", hwif->name); return (hwif->present = 0); } - if (register_blkdev (hwif->major, hwif->name, ide_fops)) { + if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) { printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major); return (hwif->present = 0); } diff --git a/drivers/block/ide-tape.c b/drivers/block/ide-tape.c index 9d2bc216f..1e1b6e44e 100644 --- a/drivers/block/ide-tape.c +++ b/drivers/block/ide-tape.c @@ -396,6 +396,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/major.h> +#include <linux/devfs_fs_kernel.h> #include <linux/errno.h> #include <linux/genhd.h> #include <linux/malloc.h> @@ -794,6 +795,7 @@ typedef struct { */ typedef struct { ide_drive_t *drive; + devfs_handle_t de_r, de_n; /* * Since a typical character device operation requires more @@ -5770,11 +5772,13 @@ static int idetape_cleanup (ide_drive_t *drive) DRIVER(drive)->busy = 0; (void) ide_unregister_subdriver (drive); drive->driver_data = NULL; + devfs_unregister (tape->de_r); + devfs_unregister (tape->de_n); kfree (tape); for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++) if (idetape_chrdevs[minor].drive != NULL) return 0; - unregister_chrdev (IDETAPE_MAJOR, "ht"); + devfs_unregister_chrdev (IDETAPE_MAJOR, "ht"); idetape_chrdev_present = 0; return 0; } @@ -5871,7 +5875,8 @@ int idetape_init (void) #endif return 0; } - if (!idetape_chrdev_present && register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) { + if (!idetape_chrdev_present && + devfs_register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) { printk (KERN_ERR "ide-tape: Failed to register character device interface\n"); MOD_DEC_USE_COUNT; #if ONSTREAM_DEBUG @@ -5905,10 +5910,21 @@ int idetape_init (void) for (minor = 0; idetape_chrdevs[minor].drive != NULL; minor++); idetape_setup (drive, tape, minor); idetape_chrdevs[minor].drive = drive; + tape->de_r = + devfs_register (drive->de, "mt", 2, DEVFS_FL_DEFAULT, + HWIF(drive)->major, minor, + S_IFCHR | S_IRUGO | S_IWUGO, 0, 0, + &idetape_fops, NULL); + tape->de_n = + devfs_register (drive->de, "mtn", 3, DEVFS_FL_DEFAULT, + HWIF(drive)->major, minor + 128, + S_IFCHR | S_IRUGO | S_IWUGO, 0, 0, + &idetape_fops, NULL); + devfs_register_tape (tape->de_r); supported++; failed--; } while ((drive = ide_scan_devices (ide_tape, idetape_driver.name, NULL, failed++)) != NULL); if (!idetape_chrdev_present && !supported) { - unregister_chrdev (IDETAPE_MAJOR, "ht"); + devfs_unregister_chrdev (IDETAPE_MAJOR, "ht"); } else idetape_chrdev_present = 1; ide_register_module (&idetape_module); diff --git a/drivers/block/ide.c b/drivers/block/ide.c index 06e1bbcc6..93da9bea2 100644 --- a/drivers/block/ide.c +++ b/drivers/block/ide.c @@ -501,8 +501,7 @@ void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); - hwgroup->drive->queue.current_request = rq->next; - blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL; + blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); } @@ -772,8 +771,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err) } } spin_lock_irqsave(&io_request_lock, flags); - drive->queue.current_request = rq->next; - blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL; + blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; rq->rq_status = RQ_INACTIVE; spin_unlock_irqrestore(&io_request_lock, flags); @@ -1076,7 +1074,7 @@ static ide_startstop_t start_request (ide_drive_t *drive) { ide_startstop_t startstop; unsigned long block, blockend; - struct request *rq = drive->queue.current_request; + struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); @@ -1159,13 +1157,12 @@ repeat: best = NULL; drive = hwgroup->drive; do { - if (drive->queue.current_request && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) { + if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) { if (!best || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - struct blk_dev_struct *bdev = &blk_dev[HWIF(drive)->major]; - if( !bdev->request_queue.plugged ) + if( !drive->queue.plugged ) best = drive; } } @@ -1229,7 +1226,6 @@ repeat: */ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) { - struct blk_dev_struct *bdev; ide_drive_t *drive; ide_hwif_t *hwif; ide_startstop_t startstop; @@ -1246,9 +1242,6 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) hwgroup->rq = NULL; drive = hwgroup->drive; do { - bdev = &blk_dev[HWIF(drive)->major]; - if( !bdev->request_queue.plugged ) - bdev->request_queue.current_request = NULL; /* (broken since patch-2.1.15) */ if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep))) sleep = drive->sleep; } while ((drive = drive->next) != hwgroup->drive); @@ -1285,10 +1278,9 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) drive->sleep = 0; drive->service_start = jiffies; - bdev = &blk_dev[hwif->major]; - if ( bdev->request_queue.plugged ) /* FIXME: paranoia */ + if ( drive->queue.plugged ) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); - bdev->request_queue.current_request = hwgroup->rq = drive->queue.current_request; + hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1670,7 +1662,7 @@ void ide_init_drive_cmd (struct request *rq) rq->sem = NULL; rq->bh = NULL; rq->bhtail = NULL; - rq->next = NULL; + rq->q = NULL; } /* @@ -1703,7 +1695,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned int major = HWIF(drive)->major; - struct request *cur_rq; + struct list_head * queue_head; DECLARE_MUTEX_LOCKED(sem); #ifdef CONFIG_BLK_DEV_PDC4030 @@ -1716,20 +1708,17 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio if (action == ide_wait) rq->sem = &sem; spin_lock_irqsave(&io_request_lock, flags); - cur_rq = drive->queue.current_request; - if (cur_rq == NULL || action == ide_preempt) { - rq->next = cur_rq; - drive->queue.current_request = rq; + queue_head = &drive->queue.queue_head; + if (list_empty(queue_head) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; } else { if (action == ide_wait || action == ide_end) { - while (cur_rq->next != NULL) /* find end of list */ - cur_rq = cur_rq->next; - } - rq->next = cur_rq->next; - cur_rq->next = rq; + queue_head = queue_head->prev; + } else + queue_head = queue_head->next; } + list_add(&rq->queue, queue_head); ide_do_request(hwgroup, 0); spin_unlock_irqrestore(&io_request_lock, flags); if (action == ide_wait) { @@ -1989,6 +1978,10 @@ void ide_unregister (unsigned int index) d = hwgroup->drive; for (i = 0; i < MAX_DRIVES; ++i) { drive = &hwif->drives[i]; + if (drive->de) { + devfs_unregister (drive->de); + drive->de = NULL; + } if (!drive->present) continue; while (hwgroup->drive->next != drive) @@ -2037,6 +2030,10 @@ void ide_unregister (unsigned int index) gd = *gdp; *gdp = gd->next; kfree(gd->sizes); kfree(gd->part); + if (gd->de_arr) + kfree (gd->de_arr); + if (gd->flags) + kfree (gd->flags); kfree(gd); } old_hwif = *hwif; diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 3ed507694..808878b3e 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE */ /* @@ -27,6 +28,8 @@ #include <linux/module.h> +#define DEBUG_ELEVATOR + /* * MAC Floppy IWM hooks */ @@ -147,6 +150,18 @@ request_queue_t * blk_get_queue (kdev_t dev) return ret; } +static inline int get_request_latency(elevator_t * elevator, int rw) +{ + int latency; + + if (rw != READ) + latency = elevator->write_latency; + else + latency = elevator->read_latency; + + return latency; +} + void blk_cleanup_queue(request_queue_t * q) { memset(q, 0, sizeof(*q)); @@ -167,55 +182,49 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) q->make_request_fn = mfn; } -static int ll_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh) +static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) { - if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) { - if (req->nr_segments < MAX_SEGMENTS) { - req->nr_segments++; - return 1; - } - return 0; + if (req->nr_segments < max_segments) { + req->nr_segments++; + q->nr_segments++; + return 1; } - return 1; + return 0; +} + +static int ll_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next) + struct request *next, int max_segments) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) { total_segments--; + q->nr_segments--; + } - if (total_segments > MAX_SEGMENTS) + if (total_segments > max_segments) return 0; req->nr_segments = total_segments; return 1; } -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) -{ - q->request_fn = rfn; - q->current_request = NULL; - q->merge_fn = ll_merge_fn; - q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = NULL; - q->plug_tq.sync = 0; - q->plug_tq.routine = &generic_unplug_device; - q->plug_tq.data = q; - q->plugged = 0; - /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. - */ - q->plug_device_fn = NULL; - q->head_active = 1; -} - /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -224,19 +233,44 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) * This is called with interrupts off and no requests on the queue. * (and with the request spinlock aquired) */ -inline void generic_plug_device (request_queue_t *q, kdev_t dev) +static void generic_plug_device (request_queue_t *q, kdev_t dev) { +#ifdef CONFIG_BLK_DEV_MD if (MAJOR(dev) == MD_MAJOR) { spin_unlock_irq(&io_request_lock); BUG(); } - if (q->current_request) +#endif + if (!list_empty(&q->queue_head)) return; q->plugged = 1; queue_task(&q->plug_tq, &tq_disk); } +void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +{ + INIT_LIST_HEAD(&q->queue_head); + q->elevator = ELEVATOR_DEFAULTS; + q->request_fn = rfn; + q->back_merge_fn = ll_back_merge_fn; + q->front_merge_fn = ll_front_merge_fn; + q->merge_requests_fn = ll_merge_requests_fn; + q->make_request_fn = NULL; + q->plug_tq.sync = 0; + q->plug_tq.routine = &generic_unplug_device; + q->plug_tq.data = q; + q->plugged = 0; + /* + * These booleans describe the queue properties. We set the + * default (and most common) values here. Other drivers can + * use the appropriate functions to alter the queue properties. + * as appropriate. + */ + q->plug_device_fn = generic_plug_device; + q->head_active = 1; +} + /* * remove the plug and let it rip.. */ @@ -248,7 +282,7 @@ void generic_unplug_device(void * data) spin_lock_irqsave(&io_request_lock,flags); if (q->plugged) { q->plugged = 0; - if (q->current_request) + if (!list_empty(&q->queue_head)) (q->request_fn)(q); } spin_unlock_irqrestore(&io_request_lock,flags); @@ -388,6 +422,119 @@ static inline void drive_stat_acct(struct request *req, printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +/* elevator */ + +#define elevator_sequence_after(a,b) ((int)((b)-(a)) < 0) +#define elevator_sequence_before(a,b) elevator_sequence_after(b,a) +#define elevator_sequence_after_eq(a,b) ((int)((b)-(a)) <= 0) +#define elevator_sequence_before_eq(a,b) elevator_sequence_after_eq(b,a) + +static inline struct list_head * seek_to_not_starving_chunk(request_queue_t * q, + int * lat, int * starving) +{ + int sequence = q->elevator.sequence; + struct list_head * entry = q->queue_head.prev; + int pos = 0; + + do { + struct request * req = blkdev_entry_to_request(entry); + if (elevator_sequence_before(req->elevator_sequence, sequence)) { + *lat -= q->nr_segments - pos; + *starving = 1; + return entry; + } + pos += req->nr_segments; + } while ((entry = entry->prev) != &q->queue_head); + + *starving = 0; + + return entry->next; +} + +static inline void elevator_merge_requests(elevator_t * e, struct request * req, struct request * next) +{ + if (elevator_sequence_before(next->elevator_sequence, req->elevator_sequence)) + req->elevator_sequence = next->elevator_sequence; + if (req->cmd == READ) + e->read_pendings--; + +} + +static inline int elevator_sequence(elevator_t * e, int latency) +{ + return latency + e->sequence; +} + +#define elevator_merge_before(q, req, lat) __elevator_merge((q), (req), (lat), 0) +#define elevator_merge_after(q, req, lat) __elevator_merge((q), (req), (lat), 1) +static inline void __elevator_merge(request_queue_t * q, struct request * req, int latency, int after) +{ + int sequence = elevator_sequence(&q->elevator, latency); + if (after) + sequence -= req->nr_segments; + if (elevator_sequence_before(sequence, req->elevator_sequence)) { + if (!after) + printk(KERN_WARNING __FUNCTION__ + ": req latency %d req latency %d\n", + req->elevator_sequence - q->elevator.sequence, + sequence - q->elevator.sequence); + req->elevator_sequence = sequence; + } +} + +static inline void elevator_queue(request_queue_t * q, + struct request * req, + struct list_head * entry, + int latency, int starving) +{ + struct request * tmp, * __tmp; + int __latency = latency; + + __tmp = tmp = blkdev_entry_to_request(entry); + + for (;; tmp = blkdev_next_request(tmp)) + { + if ((latency -= tmp->nr_segments) <= 0) + { + tmp = __tmp; + latency = __latency; + + if (starving) + break; + + if (q->head_active && !q->plugged) + { + latency -= tmp->nr_segments; + break; + } + + list_add(&req->queue, &q->queue_head); + goto after_link; + } + + if (tmp->queue.next == &q->queue_head) + break; + + { + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,blkdev_next_request(tmp)); + + if (!IN_ORDER(tmp,blkdev_next_request(tmp))) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } + } + } + + list_add(&req->queue, &tmp->queue); + + after_link: + req->elevator_sequence = elevator_sequence(&q->elevator, latency); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -398,32 +545,20 @@ static inline void drive_stat_acct(struct request *req, * which is important for drive_stat_acct() above. */ -static inline void __add_request(request_queue_t * q, struct request * req) +static inline void __add_request(request_queue_t * q, struct request * req, + int empty, struct list_head * entry, + int latency, int starving) { - int major = MAJOR(req->rq_dev); - struct request * tmp; + int major; drive_stat_acct(req, req->nr_sectors, 1); - req->next = NULL; - if (!(tmp = q->current_request)) { - q->current_request = req; + if (empty) { + req->elevator_sequence = elevator_sequence(&q->elevator, latency); + list_add(&req->queue, &q->queue_head); return; } - for ( ; tmp->next ; tmp = tmp->next) { - const int after_current = IN_ORDER(tmp,req); - const int before_next = IN_ORDER(req,tmp->next); - - if (!IN_ORDER(tmp,tmp->next)) { - if (after_current || before_next) - break; - } else { - if (after_current && before_next) - break; - } - } - req->next = tmp->next; - tmp->next = req; + elevator_queue(q, req, entry, latency, starving); /* * FIXME(eric) I don't understand why there is a need for this @@ -432,6 +567,7 @@ static inline void __add_request(request_queue_t * q, struct request * req) * I am leaving this in here until I hear back from the COMPAQ * people. */ + major = MAJOR(req->rq_dev); if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7) { (q->request_fn)(q); @@ -448,12 +584,14 @@ static inline void __add_request(request_queue_t * q, struct request * req) */ static inline void attempt_merge (request_queue_t * q, struct request *req, - int max_sectors) + int max_sectors, + int max_segments) { - struct request *next = req->next; - - if (!next) + struct request *next; + + if (req->queue.next == &q->queue_head) return; + next = blkdev_next_request(req); if (req->sector + req->nr_sectors != next->sector) return; if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors) @@ -464,25 +602,79 @@ static inline void attempt_merge (request_queue_t * q, * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if(!(q->merge_requests_fn)(q, req, next)) + if(!(q->merge_requests_fn)(q, req, next, max_segments)) return; + elevator_merge_requests(&q->elevator, req, next); req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors += next->nr_sectors; next->rq_status = RQ_INACTIVE; - req->next = next->next; + list_del(&next->queue); wake_up (&wait_for_request); } +static inline void elevator_debug(request_queue_t * q, kdev_t dev) +{ +#ifdef DEBUG_ELEVATOR + int read_pendings = 0, nr_segments = 0; + elevator_t * elevator = &q->elevator; + struct list_head * entry = &q->queue_head; + static int counter; + + if (counter++ % 100) + return; + + while ((entry = entry->next) != &q->queue_head) + { + struct request * req; + + req = blkdev_entry_to_request(entry); + if (!req->q) + continue; + if (req->cmd == READ) + read_pendings++; + nr_segments += req->nr_segments; + } + + if (read_pendings != elevator->read_pendings) + { + printk(KERN_WARNING + "%s: elevator read_pendings %d should be %d\n", + kdevname(dev), elevator->read_pendings, + read_pendings); + elevator->read_pendings = read_pendings; + } + if (nr_segments != q->nr_segments) + { + printk(KERN_WARNING + "%s: elevator nr_segments %d should be %d\n", + kdevname(dev), q->nr_segments, + nr_segments); + q->nr_segments = nr_segments; + } +#endif +} + +static inline void elevator_account_request(request_queue_t * q, struct request * req) +{ + q->elevator.sequence++; + if (req->cmd == READ) + q->elevator.read_pendings++; + q->nr_segments++; +} + static inline void __make_request(request_queue_t * q, int rw, struct buffer_head * bh) { int major = MAJOR(bh->b_rdev); unsigned int sector, count; - struct request * req; + int max_segments = MAX_SEGMENTS; + struct request * req, * prev; int rw_ahead, max_req, max_sectors; unsigned long flags; + int orig_latency, latency, __latency, starving, __starving, empty; + struct list_head * entry, * __entry = NULL; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -569,21 +761,33 @@ static inline void __make_request(request_queue_t * q, int rw, */ max_sectors = get_max_sectors(bh->b_rdev); + __latency = orig_latency = get_request_latency(&q->elevator, rw); + /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic */ spin_lock_irqsave(&io_request_lock,flags); - req = q->current_request; - if (!req) { - /* MD and loop can't handle plugging without deadlocking */ - if (q->plug_device_fn) - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ - else - generic_plug_device(q, bh->b_rdev); /* is atomic */ + elevator_debug(q, bh->b_rdev); + + empty = 0; + if (list_empty(&q->queue_head)) { + empty = 1; + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ goto get_rq; } + /* avoid write-bombs to not hurt iteractiveness of reads */ + if (rw != READ && q->elevator.read_pendings) + max_segments = q->elevator.max_bomb_segments; + + entry = seek_to_not_starving_chunk(q, &__latency, &starving); + + __entry = entry; + __starving = starving; + + latency = __latency; + if (q->head_active && !q->plugged) { /* * The scsi disk and cdrom drivers completely remove the request @@ -595,11 +799,18 @@ static inline void __make_request(request_queue_t * q, int rw, * entry may be busy being processed and we thus can't change * it. */ - if ((req = req->next) == NULL) - goto get_rq; + if (entry == q->queue_head.next) { + latency -= blkdev_entry_to_request(entry)->nr_segments; + if ((entry = entry->next) == &q->queue_head) + goto get_rq; + starving = 0; + } } + prev = NULL; do { + req = blkdev_entry_to_request(entry); + if (req->sem) continue; if (req->cmd != rw) @@ -610,6 +821,8 @@ static inline void __make_request(request_queue_t * q, int rw, continue; /* Can we add it to the end of this request? */ if (req->sector + req->nr_sectors == sector) { + if (latency - req->nr_segments < 0) + break; /* * The merge_fn is a more advanced way * of accomplishing the same task. Instead @@ -622,16 +835,21 @@ static inline void __make_request(request_queue_t * q, int rw, * may suggest that we shouldn't merge * this */ - if(!(q->merge_fn)(q, req, bh)) + if(!(q->back_merge_fn)(q, req, bh, max_segments)) continue; req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors += count; drive_stat_acct(req, count, 0); + + elevator_merge_after(q, req, latency); + /* Can we now merge this req with the next? */ - attempt_merge(q, req, max_sectors); + attempt_merge(q, req, max_sectors, max_segments); /* or to the beginning? */ } else if (req->sector - count == sector) { + if (!prev && starving) + continue; /* * The merge_fn is a more advanced way * of accomplishing the same task. Instead @@ -644,7 +862,7 @@ static inline void __make_request(request_queue_t * q, int rw, * may suggest that we shouldn't merge * this */ - if(!(q->merge_fn)(q, req, bh)) + if(!(q->front_merge_fn)(q, req, bh, max_segments)) continue; bh->b_reqnext = req->bh; req->bh = bh; @@ -653,13 +871,21 @@ static inline void __make_request(request_queue_t * q, int rw, req->sector = sector; req->nr_sectors += count; drive_stat_acct(req, count, 0); + + elevator_merge_before(q, req, latency); + + if (prev) + attempt_merge(q, prev, max_sectors, max_segments); } else continue; + q->elevator.sequence++; spin_unlock_irqrestore(&io_request_lock,flags); return; - } while ((req = req->next) != NULL); + } while (prev = req, + (latency -= req->nr_segments) >= 0 && + (entry = entry->next) != &q->queue_head); /* find an unused request. */ get_rq: @@ -675,6 +901,14 @@ get_rq: goto end_io; req = __get_request_wait(max_req, bh->b_rdev); spin_lock_irqsave(&io_request_lock,flags); + + /* lock got dropped so revalidate elevator */ + empty = 1; + if (!list_empty(&q->queue_head)) { + empty = 0; + __latency = orig_latency; + __entry = seek_to_not_starving_chunk(q, &__latency, &__starving); + } } /* * Dont start the IO if the buffer has been @@ -707,8 +941,10 @@ get_rq: req->sem = NULL; req->bh = bh; req->bhtail = bh; - req->next = NULL; - __add_request(q, req); + req->q = q; + __add_request(q, req, empty, __entry, __latency, __starving); + elevator_account_request(q, req); + spin_unlock_irqrestore(&io_request_lock, flags); return; @@ -867,6 +1103,8 @@ int end_that_request_first (struct request *req, int uptodate, char *name) void end_that_request_last(struct request *req) { + if (req->q) + BUG(); if (req->sem != NULL) up(req->sem); req->rq_status = RQ_INACTIVE; @@ -886,7 +1124,6 @@ int __init blk_dev_init(void) req = all_requests + NR_REQUEST; while (--req >= all_requests) { req->rq_status = RQ_INACTIVE; - req->next = NULL; } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); @@ -1001,4 +1238,5 @@ EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_pluggable); +EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 587156935..3209aa46b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -14,6 +14,8 @@ * * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 * + * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 + * * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 * * Loadable modules and other fixes by AK, 1998 @@ -40,6 +42,10 @@ * it passes the underlying device's block number instead of the * offset. This makes it change for a given block when the file is * moved/restored/copied and also doesn't work over NFS. + * AV, Feb 12, 2000: we pass the logical block number now. It fixes the + * problem above. Encryption modules that used to rely on the old scheme + * should just call ->i_mapping->bmap() to calculate the physical block + * number. */ #include <linux/module.h> @@ -52,6 +58,7 @@ #include <linux/major.h> #include <linux/init.h> +#include <linux/devfs_fs_kernel.h> #include <asm/uaccess.h> @@ -73,14 +80,11 @@ static int max_loop = 8; static struct loop_device *loop_dev; static int *loop_sizes; static int *loop_blksizes; +static devfs_handle_t devfs_handle = NULL; /* For the directory */ #define FALSE 0 #define TRUE (!FALSE) -/* Forward declaration of function to create missing blocks in the - backing file (can happen if the backing file is sparse) */ -static int create_missing_block(struct loop_device *lo, int block, int blksize); - /* * Transfer functions */ @@ -164,24 +168,133 @@ static void figure_loop_size(struct loop_device *lo) loop_sizes[lo->lo_number] = size; } +static int lo_send(struct loop_device *lo, char *data, int len, loff_t pos, + int blksize) +{ + struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ + struct address_space *mapping = lo->lo_dentry->d_inode->i_mapping; + struct address_space_operations *aops = mapping->a_ops; + struct page *page; + char *kaddr; + unsigned long index; + unsigned size, offset; + + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & (PAGE_CACHE_SIZE - 1); + while (len > 0) { + int IV = index * (PAGE_CACHE_SIZE/blksize) + offset/blksize; + size = PAGE_CACHE_SIZE - offset; + if (size > len) + size = len; + + page = grab_cache_page(mapping, index); + if (!page) + goto fail; + if (aops->prepare_write(page, offset, offset+size)) + goto unlock; + kaddr = (char*)page_address(page); + if ((lo->transfer)(lo, WRITE, kaddr+offset, data, size, IV)) + goto write_fail; + if (aops->commit_write(file, page, offset, offset+size)) + goto unlock; + data += size; + len -= size; + offset = 0; + index++; + pos += size; + UnlockPage(page); + page_cache_release(page); + } + return 0; + +write_fail: + printk(KERN_ERR "loop: transfer error block %ld\n", index); + ClearPageUptodate(page); + kunmap(page); +unlock: + UnlockPage(page); + page_cache_release(page); +fail: + return -1; +} + +struct lo_read_data { + struct loop_device *lo; + char *data; + int blksize; +}; + +static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long count = desc->count; + struct lo_read_data *p = (struct lo_read_data*)desc->buf; + struct loop_device *lo = p->lo; + int IV = page->index * (PAGE_CACHE_SIZE/p->blksize) + offset/p->blksize; + + if (size > count) + size = count; + + kaddr = (char*)kmap(page); + if ((lo->transfer)(lo,READ,kaddr+offset,p->data,size,IV)) { + size = 0; + printk(KERN_ERR "loop: transfer error block %ld\n",page->index); + desc->error = -EINVAL; + } + kunmap(page); + + desc->count = count - size; + desc->written += size; + p->data += size; + return size; +} + +static int lo_receive(struct loop_device *lo, char *data, int len, loff_t pos, + int blksize) +{ + struct file *file = lo->lo_backing_file; + struct lo_read_data cookie; + read_descriptor_t desc; + + cookie.lo = lo; + cookie.data = data; + cookie.blksize = blksize; + desc.written = 0; + desc.count = len; + desc.buf = (char*)&cookie; + desc.error = 0; + do_generic_file_read(file, &pos, &desc, lo_read_actor); + return desc.error; +} + static void do_lo_request(request_queue_t * q) { - int real_block, block, offset, len, blksize, size; + int block, offset, len, blksize, size; char *dest_addr; struct loop_device *lo; struct buffer_head *bh; struct request *current_request; - int block_present; + loff_t pos; repeat: INIT_REQUEST; current_request=CURRENT; - CURRENT=current_request->next; + blkdev_dequeue_request(current_request); if (MINOR(current_request->rq_dev) >= max_loop) goto error_out; lo = &loop_dev[MINOR(current_request->rq_dev)]; if (!lo->lo_dentry || !lo->transfer) goto error_out; + if (current_request->cmd == WRITE) { + if (lo->lo_flags & LO_FLAGS_READ_ONLY) + goto error_out; + } else if (current_request->cmd != READ) { + printk(KERN_ERR "unknown loop device command (%d)?!?", current_request->cmd); + goto error_out; + } + + dest_addr = current_request->buffer; + len = current_request->current_nr_sectors << 9; blksize = BLOCK_SIZE; if (blksize_size[MAJOR(lo->lo_device)]) { @@ -190,8 +303,9 @@ repeat: blksize = BLOCK_SIZE; } - dest_addr = current_request->buffer; - + if (lo->lo_flags & LO_FLAGS_DO_BMAP) + goto file_backed; + if (blksize < 512) { block = current_request->sector * (512/blksize); offset = 0; @@ -201,155 +315,79 @@ repeat: } block += lo->lo_offset / blksize; offset += lo->lo_offset % blksize; - if (offset > blksize) { + if (offset >= blksize) { block++; offset -= blksize; } - len = current_request->current_nr_sectors << 9; - - if (current_request->cmd == WRITE) { - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto error_out; - } else if (current_request->cmd != READ) { - printk(KERN_ERR "unknown loop device command (%d)?!?", current_request->cmd); - goto error_out; - } spin_unlock_irq(&io_request_lock); + while (len > 0) { size = blksize - offset; if (size > len) size = len; - real_block = block; - block_present = TRUE; - - if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - real_block = bmap(lo->lo_dentry->d_inode, block); - if (!real_block) { - - /* The backing file is a sparse file and this block - doesn't exist. If reading, return zeros. If - writing, force the underlying FS to create - the block */ - if (current_request->cmd == READ) { - memset(dest_addr, 0, size); - block_present = FALSE; - } else { - if (!create_missing_block(lo, block, blksize)) { - goto error_out_lock; - } - real_block = bmap(lo->lo_dentry->d_inode, block); - } - - } + bh = getblk(lo->lo_device, block, blksize); + if (!bh) { + printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL", + kdevname(lo->lo_device), + block, blksize); + goto error_out_lock; } - - if (block_present) { - bh = getblk(lo->lo_device, real_block, blksize); - if (!bh) { - printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL", - kdevname(lo->lo_device), - block, blksize); - goto error_out_lock; - } - if (!buffer_uptodate(bh) && ((current_request->cmd == READ) || - (offset || (len < blksize)))) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - goto error_out_lock; - } - } - - if ((lo->transfer)(lo, current_request->cmd, bh->b_data + offset, - dest_addr, size, real_block)) { - printk(KERN_ERR "loop: transfer error block %d\n", block); + if (!buffer_uptodate(bh) && ((current_request->cmd == READ) || + (offset || (len < blksize)))) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { brelse(bh); goto error_out_lock; } + } - if (current_request->cmd == WRITE) { - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 1); - } + if ((lo->transfer)(lo, current_request->cmd, bh->b_data + offset, + dest_addr, size, block)) { + printk(KERN_ERR "loop: transfer error block %d\n", block); brelse(bh); + goto error_out_lock; + } + + if (current_request->cmd == WRITE) { + mark_buffer_uptodate(bh, 1); + mark_buffer_dirty(bh, 1); } + brelse(bh); dest_addr += size; len -= size; offset = 0; block++; } + goto done; + +file_backed: + pos = ((loff_t)current_request->sector << 9) + lo->lo_offset; + spin_unlock_irq(&io_request_lock); + if (current_request->cmd == WRITE) { + if (lo_send(lo, dest_addr, len, pos, blksize)) + goto error_out_lock; + } else { + if (lo_receive(lo, dest_addr, len, pos, blksize)) + goto error_out_lock; + } +done: spin_lock_irq(&io_request_lock); - current_request->next=CURRENT; - CURRENT=current_request; + current_request->sector += current_request->current_nr_sectors; + current_request->nr_sectors -= current_request->current_nr_sectors; + list_add(¤t_request->queue, &q->queue_head); end_request(1); goto repeat; error_out_lock: spin_lock_irq(&io_request_lock); error_out: - current_request->next=CURRENT; - CURRENT=current_request; + list_add(¤t_request->queue, &q->queue_head); end_request(0); goto repeat; } -static int create_missing_block(struct loop_device *lo, int block, int blksize) -{ - struct file *file; - loff_t new_offset; - char zero_buf[1] = { 0 }; - ssize_t retval; - mm_segment_t old_fs; - struct inode *inode; - - file = lo->lo_backing_file; - if (file == NULL) { - printk(KERN_WARNING "loop: cannot create block - no backing file\n"); - return FALSE; - } - - if (file->f_op == NULL) { - printk(KERN_WARNING "loop: cannot create block - no file ops\n"); - return FALSE; - } - - new_offset = block * blksize; - - if (file->f_op->llseek != NULL) { - file->f_op->llseek(file, new_offset, 0); - } else { - /* Do what the default llseek() code would have done */ - file->f_pos = new_offset; - file->f_reada = 0; - file->f_version = ++event; - } - - if (file->f_op->write == NULL) { - printk(KERN_WARNING "loop: cannot create block - file not writeable\n"); - return FALSE; - } - - old_fs = get_fs(); - set_fs(get_ds()); - - inode = file->f_dentry->d_inode; - down(&inode->i_sem); - retval = file->f_op->write(file, zero_buf, 1, &file->f_pos); - up(&inode->i_sem); - - set_fs(old_fs); - - if (retval < 0) { - printk(KERN_WARNING "loop: cannot create block - FS write failed: code %Zi\n", - retval); - return FALSE; - } else { - return TRUE; - } -} - static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg) { struct file *file; @@ -386,22 +424,13 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg) a file structure */ lo->lo_backing_file = NULL; } else if (S_ISREG(inode->i_mode)) { - /* - * Total crap. We should just use pagecache instead of trying - * to redirect on block level. - */ - if (!inode->i_mapping->a_ops->bmap) { - printk(KERN_ERR "loop: device has no block access/not implemented\n"); - goto out_putf; - } - - /* Backed by a regular file - we need to hold onto - a file structure for this file. We'll use it to - write to blocks that are not already present in - a sparse file. We create a new file structure - based on the one passed to us via 'arg'. This is - to avoid changing the file structure that the - caller is using */ + struct address_space_operations *aops; + /* Backed by a regular file - we need to hold onto a file + structure for this file. Friggin' NFS can't live without + it on write and for reading we use do_generic_file_read(), + so... We create a new file structure based on the one + passed to us via 'arg'. This is to avoid changing the file + structure that the caller is using */ lo->lo_device = inode->i_dev; lo->lo_flags = LO_FLAGS_DO_BMAP; @@ -424,17 +453,23 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg) lo->lo_backing_file = NULL; } } + aops = inode->i_mapping->a_ops; + /* + * If we can't read - sorry. If we only can't write - well, + * it's going to be read-only. + */ + if (!aops->readpage) + error = -EINVAL; + else if (!aops->prepare_write || !aops->commit_write) + lo->lo_flags |= LO_FLAGS_READ_ONLY; } if (error) goto out_putf; - if (IS_RDONLY (inode) || is_read_only(lo->lo_device)) { + if (IS_RDONLY (inode) || is_read_only(lo->lo_device)) lo->lo_flags |= LO_FLAGS_READ_ONLY; - set_device_ro(dev, 1); - } else { - vmtruncate (inode, 0); - set_device_ro(dev, 0); - } + + set_device_ro(dev, (lo->lo_flags & LO_FLAGS_READ_ONLY)!=0); lo->lo_dentry = dget(file->f_dentry); lo->transfer = NULL; @@ -719,11 +754,16 @@ int __init loop_init(void) { int i; - if (register_blkdev(MAJOR_NR, "loop", &lo_fops)) { + if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) { printk(KERN_WARNING "Unable to get major number %d for loop device\n", MAJOR_NR); return -EIO; } + devfs_handle = devfs_mk_dir (NULL, "loop", 0, NULL); + devfs_register_series (devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT, + MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, 0, 0, + &lo_fops, NULL); if ((max_loop < 1) || (max_loop > 255)) { printk (KERN_WARNING "loop: invalid max_loop (must be between 1 and 255), using default (8)\n"); @@ -755,6 +795,7 @@ int __init loop_init(void) } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i=0; i < max_loop; i++) { memset(&loop_dev[i], 0, sizeof(struct loop_device)); loop_dev[i].lo_number = i; @@ -772,7 +813,8 @@ int __init loop_init(void) #ifdef MODULE void cleanup_module(void) { - if (unregister_blkdev(MAJOR_NR, "loop") != 0) + devfs_unregister (devfs_handle); + if (devfs_unregister_blkdev(MAJOR_NR, "loop") != 0) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); kfree (loop_dev); diff --git a/drivers/block/lvm-snap.c b/drivers/block/lvm-snap.c new file mode 100644 index 000000000..77500cc43 --- /dev/null +++ b/drivers/block/lvm-snap.c @@ -0,0 +1,434 @@ +/* + * kernel/lvm-snap.c + * + * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE + * + * LVM snapshot driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/blkdev.h> +#include <linux/smp_lock.h> +#include <linux/types.h> +#include <linux/iobuf.h> +#include <linux/lvm.h> + + +static char *lvm_snap_version = "LVM 0.8final (15/02/2000)\n"; + +extern const char *const lvm_name; +extern int lvm_blocksizes[]; + +void lvm_snapshot_release(lv_t *); + +#define hashfn(dev,block,mask,chunk_size) \ + ((HASHDEV(dev)^((block)/(chunk_size))) & (mask)) + +static inline lv_block_exception_t * +lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv) +{ + struct list_head * hash_table = lv->lv_snapshot_hash_table, * next; + unsigned long mask = lv->lv_snapshot_hash_mask; + int chunk_size = lv->lv_chunk_size; + lv_block_exception_t * ret; + int i = 0; + + hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)]; + ret = NULL; + for (next = hash_table->next; next != hash_table; next = next->next) + { + lv_block_exception_t * exception; + + exception = list_entry(next, lv_block_exception_t, hash); + if (exception->rsector_org == org_start && + exception->rdev_org == org_dev) + { + if (i) + { + /* fun, isn't it? :) */ + list_del(next); + list_add(next, hash_table); + } + ret = exception; + break; + } + i++; + } + return ret; +} + +static inline void lvm_hash_link(lv_block_exception_t * exception, + kdev_t org_dev, unsigned long org_start, + lv_t * lv) +{ + struct list_head * hash_table = lv->lv_snapshot_hash_table; + unsigned long mask = lv->lv_snapshot_hash_mask; + int chunk_size = lv->lv_chunk_size; + + hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)]; + list_add(&exception->hash, hash_table); +} + +int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector, + unsigned long pe_start, lv_t * lv) +{ + int ret; + unsigned long pe_off, pe_adjustment, __org_start; + kdev_t __org_dev; + int chunk_size = lv->lv_chunk_size; + lv_block_exception_t * exception; + + pe_off = pe_start % chunk_size; + pe_adjustment = (*org_sector-pe_off) % chunk_size; + __org_start = *org_sector - pe_adjustment; + __org_dev = *org_dev; + + ret = 0; + exception = lvm_find_exception_table(__org_dev, __org_start, lv); + if (exception) + { + *org_dev = exception->rdev_new; + *org_sector = exception->rsector_new + pe_adjustment; + ret = 1; + } + return ret; +} + +static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) +{ + kdev_t last_dev; + int i; + + /* no exception storage space available for this snapshot + or error on this snapshot --> release it */ + invalidate_buffers(lv_snap->lv_dev); + + for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) { + if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) { + last_dev = lv_snap->lv_block_exception[i].rdev_new; + invalidate_buffers(last_dev); + } + } + + lvm_snapshot_release(lv_snap); + + printk(KERN_INFO + "%s -- giving up to snapshot %s on %s due %s\n", + lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name, + reason); +} + +static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks, + unsigned long start, + int nr_sectors, + int blocksize) +{ + int i, sectors_per_block, nr_blocks; + + sectors_per_block = blocksize >> 9; + nr_blocks = nr_sectors / sectors_per_block; + start /= sectors_per_block; + + for (i = 0; i < nr_blocks; i++) + blocks[i] = start++; +} + +static inline int get_blksize(kdev_t dev) +{ + int correct_size = BLOCK_SIZE, i, major; + + major = MAJOR(dev); + if (blksize_size[major]) + { + i = blksize_size[major][MINOR(dev)]; + if (i) + correct_size = i; + } + return correct_size; +} + +#ifdef DEBUG_SNAPSHOT +static inline void invalidate_snap_cache(unsigned long start, unsigned long nr, + kdev_t dev) +{ + struct buffer_head * bh; + int sectors_per_block, i, blksize, minor; + + minor = MINOR(dev); + blksize = lvm_blocksizes[minor]; + sectors_per_block = blksize >> 9; + nr /= sectors_per_block; + start /= sectors_per_block; + + for (i = 0; i < nr; i++) + { + bh = get_hash_table(dev, start++, blksize); + if (bh) + bforget(bh); + } +} +#endif + +/* + * copy on write handler for one snapshot logical volume + * + * read the original blocks and store it/them on the new one(s). + * if there is no exception storage space free any longer --> release snapshot. + * + * this routine gets called for each _first_ write to a physical chunk. + */ +int lvm_snapshot_COW(kdev_t org_phys_dev, + unsigned long org_phys_sector, + unsigned long org_pe_start, + unsigned long org_virt_sector, + lv_t * lv_snap) +{ + const char * reason; + unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; + int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; + struct kiobuf * iobuf; + unsigned long blocks[KIO_MAX_SECTORS]; + int blksize_snap, blksize_org, min_blksize, max_blksize; + int max_sectors, nr_sectors; + + /* check if we are out of snapshot space */ + if (idx >= lv_snap->lv_remap_end) + goto fail_out_of_space; + + /* calculate physical boundaries of source chunk */ + pe_off = org_pe_start % chunk_size; + org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size); + virt_start = org_virt_sector - (org_phys_sector - org_start); + + /* calculate physical boundaries of destination chunk */ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_start = lv_snap->lv_block_exception[idx].rsector_new; + +#ifdef DEBUG_SNAPSHOT + printk(KERN_INFO + "%s -- COW: " + "org %02d:%02d faulting %lu start %lu, " + "snap %02d:%02d start %lu, " + "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n", + lvm_name, + MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector, + org_start, + MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start, + chunk_size, + org_pe_start, pe_off, + org_virt_sector); +#endif + + iobuf = lv_snap->lv_iobuf; + + blksize_org = get_blksize(org_phys_dev); + blksize_snap = get_blksize(snap_phys_dev); + max_blksize = max(blksize_org, blksize_snap); + min_blksize = min(blksize_org, blksize_snap); + max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); + + if (chunk_size % (max_blksize>>9)) + goto fail_blksize; + + while (chunk_size) + { + nr_sectors = min(chunk_size, max_sectors); + chunk_size -= nr_sectors; + + iobuf->length = nr_sectors << 9; + + lvm_snapshot_prepare_blocks(blocks, org_start, + nr_sectors, blksize_org); + if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, + blocks, blksize_org) != (nr_sectors<<9)) + goto fail_raw_read; + + lvm_snapshot_prepare_blocks(blocks, snap_start, + nr_sectors, blksize_snap); + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, + blocks, blksize_snap) != (nr_sectors<<9)) + goto fail_raw_write; + } + +#ifdef DEBUG_SNAPSHOT + /* invalidate the logcial snapshot buffer cache */ + invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size, + lv_snap->lv_dev); +#endif + + /* the original chunk is now stored on the snapshot volume + so update the execption table */ + lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev; + lv_snap->lv_block_exception[idx].rsector_org = org_start; + lvm_hash_link(lv_snap->lv_block_exception + idx, + org_phys_dev, org_start, lv_snap); + lv_snap->lv_remap_ptr = idx + 1; + return 0; + + /* slow path */ + out: + lvm_drop_snapshot(lv_snap, reason); + return 1; + + fail_out_of_space: + reason = "out of space"; + goto out; + fail_raw_read: + reason = "read error"; + goto out; + fail_raw_write: + reason = "write error"; + goto out; + fail_blksize: + reason = "blocksize error"; + goto out; +} + +static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) +{ + int bytes, nr_pages, err, i; + + bytes = sectors << 9; + nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT; + err = expand_kiobuf(iobuf, nr_pages); + if (err) + goto out; + + err = -ENOMEM; + iobuf->locked = 1; + iobuf->nr_pages = 0; + for (i = 0; i < nr_pages; i++) + { + struct page * page; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27) + page = alloc_page(GFP_KERNEL); + if (!page) + goto out; +#else + { + unsigned long addr = __get_free_page(GFP_USER); + if (!addr) + goto out; + iobuf->pagelist[i] = addr; + page = mem_map + MAP_NR(addr); + } +#endif + + iobuf->maplist[i] = page; + /* the only point to lock the page here is to be allowed + to share unmap_kiobuf() in the fail-path */ +#ifndef LockPage +#define LockPage(map) set_bit(PG_locked, &(map)->flags) +#endif + LockPage(page); + iobuf->nr_pages++; + } + iobuf->offset = 0; + + err = 0; + out: + return err; +} + +static int calc_max_buckets(void) +{ + unsigned long mem; + + mem = num_physpages << PAGE_SHIFT; + mem /= 100; + mem *= 2; + mem /= sizeof(struct list_head); + + return mem; +} + +static int lvm_snapshot_alloc_hash_table(lv_t * lv) +{ + int err; + unsigned long buckets, max_buckets, size; + struct list_head * hash; + + buckets = lv->lv_remap_end; + max_buckets = calc_max_buckets(); + buckets = min(buckets, max_buckets); + while (buckets & (buckets-1)) + buckets &= (buckets-1); + + size = buckets * sizeof(struct list_head); + + err = -ENOMEM; + hash = vmalloc(size); + lv->lv_snapshot_hash_table = hash; + + if (!hash) + goto out; + + lv->lv_snapshot_hash_mask = buckets-1; + while (buckets--) + INIT_LIST_HEAD(hash+buckets); + err = 0; + out: + return err; +} + +int lvm_snapshot_alloc(lv_t * lv_snap) +{ + int err, blocksize, max_sectors; + + err = alloc_kiovec(1, &lv_snap->lv_iobuf); + if (err) + goto out; + + blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)]; + max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); + + err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); + if (err) + goto out_free_kiovec; + + err = lvm_snapshot_alloc_hash_table(lv_snap); + if (err) + goto out_free_kiovec; + out: + return err; + + out_free_kiovec: + unmap_kiobuf(lv_snap->lv_iobuf); + free_kiovec(1, &lv_snap->lv_iobuf); + goto out; +} + +void lvm_snapshot_release(lv_t * lv) +{ + if (lv->lv_block_exception) + { + vfree(lv->lv_block_exception); + lv->lv_block_exception = NULL; + } + if (lv->lv_snapshot_hash_table) + { + vfree(lv->lv_snapshot_hash_table); + lv->lv_snapshot_hash_table = NULL; + } + if (lv->lv_iobuf) + { + free_kiovec(1, &lv->lv_iobuf); + lv->lv_iobuf = NULL; + } +} diff --git a/drivers/block/lvm.c b/drivers/block/lvm.c new file mode 100644 index 000000000..6d2f2743e --- /dev/null +++ b/drivers/block/lvm.c @@ -0,0 +1,2556 @@ +/* + * kernel/lvm.c + * + * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Germany + * + * February-November 1997 + * April-May,July-August,November 1998 + * January-March,May,July,September,October 1999 + * January,February 2000 + * + * + * LVM driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* + * Changelog + * + * 09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT + * and VG_STATUS_GET_NAMELIST + * 18/01/1998 - change lvm_chr_open/close lock handling + * 30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and + * - added LV_STATUS_BYINDEX ioctl + * - used lvm_status_byname_req_t and + * lvm_status_byindex_req_t vars + * 04/05/1998 - added multiple device support + * 08/05/1998 - added support to set/clear extendable flag in volume group + * 09/05/1998 - changed output of lvm_proc_get_info() because of + * support for free (eg. longer) logical volume names + * 12/05/1998 - added spin_locks (thanks to Pascal van Dam + * <pascal@ramoth.xs4all.nl>) + * 25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl() + * 26/05/1998 - reactivated verify_area by access_ok + * 07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go + * beyond 128/256 KB max allocation limit per call + * - #ifdef blocked spin_lock calls to avoid compile errors + * with 2.0.x + * 11/06/1998 - another enhancement to spinlock code in lvm_chr_open() + * and use of LVM_VERSION_CODE instead of my own macros + * (thanks to Michael Marxmeier <mike@msede.com>) + * 07/07/1998 - added statistics in lvm_map() + * 08/07/1998 - saved statistics in lvm_do_lv_extend_reduce() + * 25/07/1998 - used __initfunc macro + * 02/08/1998 - changes for official char/block major numbers + * 07/08/1998 - avoided init_module() and cleanup_module() to be static + * 30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters + * to sum of LVs open (no matter how often each is) + * 01/09/1998 - fixed lvm_gendisk.part[] index error + * 07/09/1998 - added copying of lv_current_pe-array + * in LV_STATUS_BYINDEX ioctl + * 17/11/1998 - added KERN_* levels to printk + * 13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename + * 07/02/1999 - fixed spinlock handling bug in case of LVM_RESET + * by moving spinlock code from lvm_chr_open() + * to lvm_chr_ioctl() + * - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl() + * - allowed LVM_RESET and retrieval commands to go ahead; + * only other update ioctls are blocked now + * - fixed pv->pe to NULL for pv_status + * - using lv_req structure in lvm_chr_ioctl() now + * - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce() + * caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE) + * 09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to + * handle lgoical volume private read ahead sector + * - implemented LV read_ahead handling with lvm_blk_read() + * and lvm_blk_write() + * 10/02/1999 - implemented 2.[12].* support function lvm_hd_name() + * to be used in drivers/block/genhd.c by disk_name() + * 12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO + * - enhanced gendisk insert/remove handling + * 16/02/1999 - changed to dynamic block minor number allocation to + * have as much as 99 volume groups with 256 logical volumes + * as the grand total; this allows having 1 volume group with + * up to 256 logical volumes in it + * 21/02/1999 - added LV open count information to proc filesystem + * - substituted redundant LVM_RESET code by calls + * to lvm_do_vg_remove() + * 22/02/1999 - used schedule_timeout() to be more responsive + * in case of lvm_do_vg_remove() with lots of logical volumes + * 19/03/1999 - fixed NULL pointer bug in module_init/lvm_init + * 17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0) + * - enhanced lvm_hd_name support + * 03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and + * memcpy_tofs/memcpy_fromfs macro redefinitions + * 06/07/1999 - corrected reads/writes statistic counter copy in case + * of striped logical volume + * 28/07/1999 - implemented snapshot logical volumes + * - lvm_chr_ioctl + * - LV_STATUS_BYINDEX + * - LV_STATUS_BYNAME + * - lvm_do_lv_create + * - lvm_do_lv_remove + * - lvm_map + * - new lvm_snapshot_remap_block + * - new lvm_snapshot_remap_new_block + * 08/10/1999 - implemented support for multiple snapshots per + * original logical volume + * 12/10/1999 - support for 2.3.19 + * 11/11/1999 - support for 2.3.28 + * 21/11/1999 - changed lvm_map() interface to buffer_head based + * 19/12/1999 - support for 2.3.33 + * 01/01/2000 - changed locking concept in lvm_map(), + * lvm_do_vg_create() and lvm_do_lv_remove() + * 15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl() + * 24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc. + * 29/01/2000 - used kmalloc/kfree again for all small structures + * 20/01/2000 - cleaned up lvm_chr_ioctl by moving code + * to seperated functions + * - avoided "/dev/" in proc filesystem output + * - avoided inline strings functions lvm_strlen etc. + * 14/02/2000 - support for 2.3.43 + * - integrated Andrea Arcagnelli's snapshot code + * + */ + + +static char *lvm_version = "LVM version 0.8final by Heinz Mauelshagen (15/02/2000)\n"; +static char *lvm_short_version = "version 0.8final (15/02/2000)"; + +#define MAJOR_NR LVM_BLK_MAJOR +#define DEVICE_OFF(device) + +#include <linux/config.h> +#include <linux/version.h> + +#ifdef MODVERSIONS +#undef MODULE +#define MODULE +#include <linux/modversions.h> +#endif + +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/init.h> + +#include <linux/hdreg.h> +#include <linux/stat.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/blkdev.h> +#include <linux/genhd.h> +#include <linux/locks.h> +#include <linux/smp_lock.h> +#include <asm/ioctl.h> +#include <asm/segment.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_KERNELD +#include <linux/kerneld.h> +#endif + +#include <linux/blk.h> +#include <linux/blkpg.h> + +#include <linux/errno.h> +#include <linux/lvm.h> + +#define LVM_CORRECT_READ_AHEAD( a) \ + if ( a < LVM_MIN_READ_AHEAD || \ + a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD; + +#ifndef WRITEA +# define WRITEA WRITE +#endif + +/* + * External function prototypes + */ +#ifdef MODULE +int init_module(void); +void cleanup_module(void); +#else +extern int lvm_init(void); +#endif + +static void lvm_dummy_device_request(request_queue_t *); +#define DEVICE_REQUEST lvm_dummy_device_request + +static void lvm_make_request_fn(int, struct buffer_head*); + +static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); +static int lvm_blk_open(struct inode *, struct file *); + +static ssize_t lvm_blk_read(struct file *, char *, size_t, loff_t *); +static ssize_t lvm_blk_write(struct file *, const char *, size_t, loff_t *); + +static int lvm_chr_open(struct inode *, struct file *); + +static int lvm_chr_close(struct inode *, struct file *); +static int lvm_blk_close(struct inode *, struct file *); + +static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +static int lvm_proc_get_info(char *, char **, off_t, int); +static int (*lvm_proc_get_info_ptr) (char *, char **, off_t, int) = +&lvm_proc_get_info; +#endif + +#ifdef LVM_HD_NAME +void lvm_hd_name(char *, int); +#endif +/* End external function prototypes */ + + +/* + * Internal function prototypes + */ +static void lvm_init_vars(void); + +/* external snapshot calls */ +int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); +int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); +int lvm_snapshot_alloc(lv_t *); +void lvm_snapshot_release(lv_t *); + +#ifdef LVM_HD_NAME +extern void (*lvm_hd_name_ptr) (char *, int); +#endif +static int lvm_map(struct buffer_head *, int); +static int lvm_do_lock_lvm(void); +static int lvm_do_le_remap(vg_t *, void *); +static int lvm_do_pe_lock_unlock(vg_t *r, void *); +static int lvm_do_vg_create(int, void *); +static int lvm_do_vg_extend(vg_t *, void *); +static int lvm_do_vg_reduce(vg_t *, void *); +static int lvm_do_vg_remove(int); +static int lvm_do_lv_create(int, char *, lv_t *); +static int lvm_do_lv_remove(int, char *, int); +static int lvm_do_lv_extend_reduce(int, char *, lv_t *); +static int lvm_do_lv_status_byname(vg_t *r, void *); +static int lvm_do_lv_status_byindex(vg_t *, void *arg); +static int lvm_do_pv_change(vg_t*, void*); +static int lvm_do_pv_status(vg_t *, void *); +static void lvm_geninit(struct gendisk *); +#ifdef LVM_GET_INODE +static struct inode *lvm_get_inode(int); +void lvm_clear_inode(struct inode *); +#endif +/* END Internal function prototypes */ + + +/* volume group descriptor area pointers */ +static vg_t *vg[ABS_MAX_VG]; +static pv_t *pvp = NULL; +static lv_t *lvp = NULL; +static pe_t *pep = NULL; +static pe_t *pep1 = NULL; + + +/* map from block minor number to VG and LV numbers */ +typedef struct { + int vg_number; + int lv_number; +} vg_lv_map_t; +static vg_lv_map_t vg_lv_map[ABS_MAX_LV]; + + +/* Request structures (lvm_chr_ioctl()) */ +static pv_change_req_t pv_change_req; +static pv_flush_req_t pv_flush_req; +static pv_status_req_t pv_status_req; +static pe_lock_req_t pe_lock_req; +static le_remap_req_t le_remap_req; +static lv_req_t lv_req; + +#ifdef LVM_TOTAL_RESET +static int lvm_reset_spindown = 0; +#endif + +static char pv_name[NAME_LEN]; +/* static char rootvg[NAME_LEN] = { 0, }; */ +static uint lv_open = 0; +static const char *const lvm_name = LVM_NAME; +static int lock = 0; +static int loadtime = 0; +static uint vg_count = 0; +static long lvm_chr_open_count = 0; +static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; +static DECLARE_WAIT_QUEUE_HEAD(lvm_snapshot_wait); +static DECLARE_WAIT_QUEUE_HEAD(lvm_wait); +static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait); + +static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; + +static struct file_operations lvm_chr_fops = +{ + open: lvm_chr_open, + release: lvm_chr_close, + ioctl: lvm_chr_ioctl, +}; + +static struct file_operations lvm_blk_fops = +{ + open: lvm_blk_open, + release: blkdev_close, + read: lvm_blk_read, + write: lvm_blk_write, + ioctl: lvm_blk_ioctl, + fsync: block_fsync, +}; + +#define BLOCK_DEVICE_OPERATIONS +/* block device operations structure needed for 2.3.38? and above */ +static struct block_device_operations lvm_blk_dops = +{ + open: lvm_blk_open, + release: lvm_blk_close, + ioctl: lvm_blk_ioctl +}; + +/* gendisk structures */ +static struct hd_struct lvm_hd_struct[MAX_LV]; +static int lvm_blocksizes[MAX_LV] = +{0,}; +static int lvm_size[MAX_LV] = +{0,}; +static struct gendisk lvm_gendisk = +{ + MAJOR_NR, /* major # */ + LVM_NAME, /* name of major */ + 0, /* number of times minor is shifted + to get real minor */ + 1, /* maximum partitions per device */ + lvm_hd_struct, /* partition table */ + lvm_size, /* device size in blocks, copied + to block_size[] */ + MAX_LV, /* number or real devices */ + NULL, /* internal */ + NULL, /* pointer to next gendisk struct (internal) */ +}; + + +#ifdef MODULE +/* + * Module initialization... + */ +int init_module(void) +#else +/* + * Driver initialization... + */ +#ifdef __initfunc +__initfunc(int lvm_init(void)) +#else +int __init lvm_init(void) +#endif +#endif /* #ifdef MODULE */ +{ + struct gendisk *gendisk_ptr = NULL; + + if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) { + printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); + return -EIO; + } +#ifdef BLOCK_DEVICE_OPERATIONS + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) +#else + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) +#endif + { + printk("%s -- register_blkdev failed\n", lvm_name); + if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) + printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); + return -EIO; + } +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + create_proc_info_entry(LVM_NAME, S_IFREG | S_IRUGO, + &proc_root, lvm_proc_get_info_ptr); +#endif + + lvm_init_vars(); + lvm_geninit(&lvm_gendisk); + + /* insert our gendisk at the corresponding major */ + if (gendisk_head != NULL) { + gendisk_ptr = gendisk_head; + while (gendisk_ptr->next != NULL && + gendisk_ptr->major > lvm_gendisk.major) { + gendisk_ptr = gendisk_ptr->next; + } + lvm_gendisk.next = gendisk_ptr->next; + gendisk_ptr->next = &lvm_gendisk; + } else { + gendisk_head = &lvm_gendisk; + lvm_gendisk.next = NULL; + } + +#ifdef LVM_HD_NAME + /* reference from drivers/block/genhd.c */ + lvm_hd_name_ptr = lvm_hd_name; +#endif + + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); + /* optional read root VGDA */ +/* + if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); +*/ + + printk(KERN_INFO + "%s%s -- " +#ifdef MODULE + "Module" +#else + "Driver" +#endif + " successfully initialized\n", + lvm_version, lvm_name); + + return 0; +} /* init_module() / lvm_init() */ + + +#ifdef MODULE +/* + * Module cleanup... + */ +void cleanup_module(void) +{ + struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL; + + if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { + printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); + } + if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) { + printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); + } + blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + + gendisk_ptr = gendisk_ptr_prev = gendisk_head; + while (gendisk_ptr != NULL) { + if (gendisk_ptr == &lvm_gendisk) + break; + gendisk_ptr_prev = gendisk_ptr; + gendisk_ptr = gendisk_ptr->next; + } + /* delete our gendisk from chain */ + if (gendisk_ptr == &lvm_gendisk) + gendisk_ptr_prev->next = gendisk_ptr->next; + + blk_size[MAJOR_NR] = NULL; + blksize_size[MAJOR_NR] = NULL; + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + remove_proc_entry(LVM_NAME, &proc_root); +#endif + +#ifdef LVM_HD_NAME + /* reference from linux/drivers/block/genhd.c */ + lvm_hd_name_ptr = NULL; +#endif + + printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); + + return; +} /* void cleanup_module() */ +#endif /* #ifdef MODULE */ + + +/* + * support function to initialize lvm variables + */ +#ifdef __initfunc +__initfunc(void lvm_init_vars(void)) +#else +void __init lvm_init_vars(void) +#endif +{ + int v; + + loadtime = CURRENT_TIME; + + lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; + + pe_lock_req.lock = UNLOCK_PE; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ + pe_lock_req.data.pv_offset = 0; + + /* Initialize VG pointers */ + for (v = 0; v < ABS_MAX_VG; v++) vg[v] = NULL; + + /* Initialize LV -> VG association */ + for (v = 0; v < ABS_MAX_LV; v++) { + /* index ABS_MAX_VG never used for real VG */ + vg_lv_map[v].vg_number = ABS_MAX_VG; + vg_lv_map[v].lv_number = -1; + } + + return; +} /* lvm_init_vars() */ + + +/******************************************************************** + * + * Character device functions + * + ********************************************************************/ + +/* + * character device open routine + */ +static int lvm_chr_open(struct inode *inode, + struct file *file) +{ + int minor = MINOR(inode->i_rdev); + +#ifdef DEBUG + printk(KERN_DEBUG + "%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n", + lvm_name, minor, VG_CHR(minor), file->f_mode, lock); +#endif + + /* super user validation */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + + /* Group special file open */ + if (VG_CHR(minor) > MAX_VG) return -ENXIO; + + MOD_INC_USE_COUNT; + + lvm_chr_open_count++; + return 0; +} /* lvm_chr_open() */ + + +/* + * character device i/o-control routine + * + * Only one changing process can do changing ioctl at one time, + * others will block. + * + */ +static int lvm_chr_ioctl(struct inode *inode, struct file *file, + uint command, ulong a) +{ + int minor = MINOR(inode->i_rdev); + uint extendable, l, v; + void *arg = (void *) a; + lv_t lv; + vg_t* vg_ptr = vg[VG_CHR(minor)]; + + /* otherwise cc will complain about unused variables */ + (void) lvm_lock; + + +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d " + "VG#: %d mode: 0x%X\n", + lvm_name, command, minor, VG_CHR(minor), file->f_mode); +#endif + +#ifdef LVM_TOTAL_RESET + if (lvm_reset_spindown > 0) return -EACCES; +#endif + + /* Main command switch */ + switch (command) { + case LVM_LOCK_LVM: + /* lock the LVM */ + return lvm_do_lock_lvm(); + + case LVM_GET_IOP_VERSION: + /* check lvm version to ensure driver/tools+lib + interoperability */ + if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0) + return -EFAULT; + return 0; + +#ifdef LVM_TOTAL_RESET + case LVM_RESET: + /* lock reset function */ + lvm_reset_spindown = 1; + for (v = 0; v < ABS_MAX_VG; v++) { + if (vg[v] != NULL) lvm_do_vg_remove(v); + } + +#ifdef MODULE + while (GET_USE_COUNT(&__this_module) < 1) + MOD_INC_USE_COUNT; + while (GET_USE_COUNT(&__this_module) > 1) + MOD_DEC_USE_COUNT; +#endif /* MODULE */ + lock = 0; /* release lock */ + wake_up_interruptible(&lvm_wait); + return 0; +#endif /* LVM_TOTAL_RESET */ + + + case LE_REMAP: + /* remap a logical extent (after moving the physical extent) */ + return lvm_do_le_remap(vg_ptr,arg); + + case PE_LOCK_UNLOCK: + /* lock/unlock i/o to a physical extent to move it to another + physical volume (move's done in user space's pvmove) */ + return lvm_do_pe_lock_unlock(vg_ptr,arg); + + case VG_CREATE: + /* create a VGDA */ + return lvm_do_vg_create(minor, arg); + + case VG_REMOVE: + /* remove an inactive VGDA */ + return lvm_do_vg_remove(minor); + + case VG_EXTEND: + /* extend a volume group */ + return lvm_do_vg_extend(vg_ptr,arg); + + case VG_REDUCE: + /* reduce a volume group */ + return lvm_do_vg_reduce(vg_ptr,arg); + + + case VG_SET_EXTENDABLE: + /* set/clear extendability flag of volume group */ + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0) + return -EFAULT; + + if (extendable == VG_EXTENDABLE || + extendable == ~VG_EXTENDABLE) { + if (extendable == VG_EXTENDABLE) + vg_ptr->vg_status |= VG_EXTENDABLE; + else + vg_ptr->vg_status &= ~VG_EXTENDABLE; + } else return -EINVAL; + return 0; + + + case VG_STATUS: + /* get volume group data (only the vg_t struct) */ + if (vg_ptr == NULL) return -ENXIO; + if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0) + return -EFAULT; + return 0; + + + case VG_STATUS_GET_COUNT: + /* get volume group count */ + if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0) + return -EFAULT; + return 0; + + + case VG_STATUS_GET_NAMELIST: + /* get volume group count */ + for (l = v = 0; v < ABS_MAX_VG; v++) { + if (vg[v] != NULL) { + if (copy_to_user(arg + l++ * NAME_LEN, + vg[v]->vg_name, + NAME_LEN) != 0) + return -EFAULT; + } + } + return 0; + + + case LV_CREATE: + case LV_REMOVE: + case LV_EXTEND: + case LV_REDUCE: + /* create, remove, extend or reduce a logical volume */ + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0) + return -EFAULT; + + if (command != LV_REMOVE) { + if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0) + return -EFAULT; + } + switch (command) { + case LV_CREATE: + return lvm_do_lv_create(minor, lv_req.lv_name, &lv); + + case LV_REMOVE: + return lvm_do_lv_remove(minor, lv_req.lv_name, -1); + + case LV_EXTEND: + case LV_REDUCE: + return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv); + } + + + case LV_STATUS_BYNAME: + /* get status of a logical volume by name */ + return lvm_do_lv_status_byname(vg_ptr,arg); + + case LV_STATUS_BYINDEX: + /* get status of a logical volume by index */ + return lvm_do_lv_status_byindex(vg_ptr,arg); + + case PV_CHANGE: + /* change a physical volume */ + return lvm_do_pv_change(vg_ptr,arg); + + case PV_STATUS: + /* get physical volume data (pv_t structure only) */ + return lvm_do_pv_status(vg_ptr,arg); + + case PV_FLUSH: + /* physical volume buffer flush/invalidate */ + if (copy_from_user(&pv_flush_req, arg, + sizeof(pv_flush_req)) != 0) + return -EFAULT; + + fsync_dev(pv_flush_req.pv_dev); + invalidate_buffers(pv_flush_req.pv_dev); + return 0; + + default: + printk(KERN_WARNING + "%s -- lvm_chr_ioctl: unknown command %x\n", + lvm_name, command); + return -EINVAL; + } + + return 0; +} /* lvm_chr_ioctl */ + + +/* + * character device close routine + */ +static int lvm_chr_close(struct inode *inode, struct file *file) +{ +#ifdef DEBUG + int minor = MINOR(inode->i_rdev); + printk(KERN_DEBUG + "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor)); +#endif + +#ifdef LVM_TOTAL_RESET + if (lvm_reset_spindown > 0) { + lvm_reset_spindown = 0; + lvm_chr_open_count = 1; + } +#endif + + if (lvm_chr_open_count > 0) lvm_chr_open_count--; + if (lock == current->pid) { + lock = 0; /* release lock */ + wake_up_interruptible(&lvm_wait); + } + +#ifdef MODULE + if (GET_USE_COUNT(&__this_module) > 0) MOD_DEC_USE_COUNT; +#endif + + return 0; +} /* lvm_chr_close() */ + + + +/******************************************************************** + * + * Block device functions + * + ********************************************************************/ + +/* + * block device open routine + */ +static int lvm_blk_open(struct inode *inode, struct file *file) +{ + int minor = MINOR(inode->i_rdev); + lv_t *lv_ptr; + vg_t *vg_ptr = vg[VG_BLK(minor)]; + +#ifdef DEBUG_LVM_BLK_OPEN + printk(KERN_DEBUG + "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n", + lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode); +#endif + +#ifdef LVM_TOTAL_RESET + if (lvm_reset_spindown > 0) + return -EPERM; +#endif + + if (vg_ptr != NULL && + (vg_ptr->vg_status & VG_ACTIVE) && + (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL && + LV_BLK(minor) >= 0 && + LV_BLK(minor) < vg_ptr->lv_max) { + + /* Check parallel LV spindown (LV remove) */ + if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM; + + /* Check inactive LV and open for read/write */ + if (file->f_mode & O_RDWR) { + if (!(lv_ptr->lv_status & LV_ACTIVE)) return -EPERM; + if (!(lv_ptr->lv_access & LV_WRITE)) return -EACCES; + } + +#ifdef BLOCK_DEVICE_OPERATIONS + file->f_op = &lvm_blk_fops; +#endif + + /* be sure to increment VG counter */ + if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; + lv_ptr->lv_open++; + + MOD_INC_USE_COUNT; + +#ifdef DEBUG_LVM_BLK_OPEN + printk(KERN_DEBUG + "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d size: %d\n", + lvm_name, minor, VG_BLK(minor), LV_BLK(minor), + lv_ptr->lv_size); +#endif + + return 0; + } + return -ENXIO; +} /* lvm_blk_open() */ + + +/* + * block device read + */ +static ssize_t lvm_blk_read(struct file *file, char *buffer, + size_t size, loff_t * offset) +{ + int minor = MINOR(file->f_dentry->d_inode->i_rdev); + + read_ahead[MAJOR(file->f_dentry->d_inode->i_rdev)] = + vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead; + return block_read(file, buffer, size, offset); +} + + +/* + * block device write + */ +static ssize_t lvm_blk_write(struct file *file, const char *buffer, + size_t size, loff_t * offset) +{ + int minor = MINOR(file->f_dentry->d_inode->i_rdev); + + read_ahead[MAJOR(file->f_dentry->d_inode->i_rdev)] = + vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead; + return block_write(file, buffer, size, offset); +} + + +/* + * block device i/o-control routine + */ +static int lvm_blk_ioctl(struct inode *inode, struct file *file, + uint command, ulong a) +{ + int minor = MINOR(inode->i_rdev); + vg_t *vg_ptr = vg[VG_BLK(minor)]; + lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; + void *arg = (void *) a; + struct hd_geometry *hd = (struct hd_geometry *) a; + +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %X " + "VG#: %dl LV#: %d\n", + lvm_name, minor, command, (ulong) arg, + VG_BLK(minor), LV_BLK(minor)); +#endif + + switch (command) { + case BLKGETSIZE: + /* return device size */ +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n", + lvm_name, lv_ptr->lv_size); +#endif + copy_to_user((long *) arg, &lv_ptr->lv_size, + sizeof(lv_ptr->lv_size)); + break; + + + case BLKFLSBUF: + /* flush buffer cache */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name); +#endif + fsync_dev(inode->i_rdev); + break; + + + case BLKRASET: + /* set read ahead for block device */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n", + lvm_name, (long) arg, MAJOR(inode->i_rdev), minor); +#endif + if ((long) arg < LVM_MIN_READ_AHEAD || + (long) arg > LVM_MAX_READ_AHEAD) + return -EINVAL; + lv_ptr->lv_read_ahead = (long) arg; + break; + + + case BLKRAGET: + /* get current read ahead setting */ +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name); +#endif + copy_to_user((long *) arg, &lv_ptr->lv_read_ahead, + sizeof(lv_ptr->lv_read_ahead)); + break; + + + case HDIO_GETGEO: + /* get disk geometry */ +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name); +#endif + if (hd == NULL) + return -EINVAL; + { + unsigned char heads = 64; + unsigned char sectors = 32; + long start = 0; + short cylinders = lv_ptr->lv_size / heads / sectors; + + if (copy_to_user((char *) &hd->heads, &heads, + sizeof(heads)) != 0 || + copy_to_user((char *) &hd->sectors, §ors, + sizeof(sectors)) != 0 || + copy_to_user((short *) &hd->cylinders, + &cylinders, sizeof(cylinders)) != 0 || + copy_to_user((long *) &hd->start, &start, + sizeof(start)) != 0) + return -EFAULT; + } + +#ifdef DEBUG_IOCTL + printk(KERN_DEBUG + "%s -- lvm_blk_ioctl -- cylinders: %d\n", + lvm_name, lv_ptr->lv_size / heads / sectors); +#endif + break; + + + case LV_SET_ACCESS: + /* set access flags of a logical volume */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + lv_ptr->lv_access = (ulong) arg; + break; + + + case LV_SET_STATUS: + /* set status flags of a logical volume */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1) + return -EPERM; + lv_ptr->lv_status = (ulong) arg; + break; + + + case LV_SET_ALLOCATION: + /* set allocation flags of a logical volume */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + lv_ptr->lv_allocation = (ulong) arg; + break; + + + default: + printk(KERN_WARNING + "%s -- lvm_blk_ioctl: unknown command %d\n", + lvm_name, command); + return -EINVAL; + } + + return 0; +} /* lvm_blk_ioctl() */ + + +/* + * block device close routine + */ +static int lvm_blk_close(struct inode *inode, struct file *file) +{ + int minor = MINOR(inode->i_rdev); + vg_t *vg_ptr = vg[VG_BLK(minor)]; + lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; + +#ifdef DEBUG + printk(KERN_DEBUG + "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n", + lvm_name, minor, VG_BLK(minor), LV_BLK(minor)); +#endif + + sync_dev(inode->i_rdev); + if (lv_ptr->lv_open == 1) vg_ptr->lv_open--; + lv_ptr->lv_open--; + + MOD_DEC_USE_COUNT; + + return 0; +} /* lvm_blk_close() */ + + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +/* + * Support function /proc-Filesystem + */ +#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) + +static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) +{ + int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, + lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds; + static off_t sz; + off_t sz_last; + char allocation_flag, inactive_flag, rw_flag, stripes_flag; + char *lv_name, *pv_name; + static char *buf = NULL; + static char dummy_buf[160]; /* sized for 2 lines */ + vg_t *vg_ptr; + lv_t *lv_ptr; + pv_t *pv_ptr; + + +#ifdef DEBUG_LVM_PROC_GET_INFO + printk(KERN_DEBUG + "%s - lvm_proc_get_info CALLED pos: %lu count: %d whence: %d\n", + lvm_name, pos, count, whence); +#endif + + if (pos == 0 || buf == NULL) { + sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ + lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0; + + /* search for activity */ + for (v = 0; v < ABS_MAX_VG; v++) { + if ((vg_ptr = vg[v]) != NULL) { + vg_counter++; + pv_counter += vg_ptr->pv_cur; + lv_counter += vg_ptr->lv_cur; + if (vg_ptr->lv_cur > 0) { + for (l = 0; l < vg[v]->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + pe_t_bytes += lv_ptr->lv_allocated_le; + if (lv_ptr->lv_block_exception != NULL) + lv_block_exception_t_bytes += lv_ptr->lv_remap_end; + if (lv_ptr->lv_open > 0) { + lv_open_counter++; + lv_open_total += lv_ptr->lv_open; + } + } + } + } + } + } + pe_t_bytes *= sizeof(pe_t); + lv_block_exception_t_bytes *= sizeof(lv_block_exception_t); + + if (buf != NULL) { +#ifdef DEBUG_KFREE + printk(KERN_DEBUG + "%s -- kfree %d\n", lvm_name, __LINE__); +#endif + kfree(buf); + buf = NULL; + } + /* 2 times: first to get size to allocate buffer, + 2nd to fill the malloced buffer */ + for (i = 0; i < 2; i++) { + sz = 0; + sz += sprintf(LVM_PROC_BUF, + "LVM " +#ifdef MODULE + "module" +#else + "driver" +#endif + " %s\n\n" + "Total: %d VG%s %d PV%s %d LV%s ", + lvm_short_version, + vg_counter, vg_counter == 1 ? "" : "s", + pv_counter, pv_counter == 1 ? "" : "s", + lv_counter, lv_counter == 1 ? "" : "s"); + sz += sprintf(LVM_PROC_BUF, + "(%d LV%s open", + lv_open_counter, + lv_open_counter == 1 ? "" : "s"); + if (lv_open_total > 0) + sz += sprintf(LVM_PROC_BUF, + " %d times)\n", + lv_open_total); + else + sz += sprintf(LVM_PROC_BUF, ")"); + sz += sprintf(LVM_PROC_BUF, + "\nGlobal: %lu bytes malloced IOP version: %d ", + vg_counter * sizeof(vg_t) + + pv_counter * sizeof(pv_t) + + lv_counter * sizeof(lv_t) + + pe_t_bytes + lv_block_exception_t_bytes + sz_last, + lvm_iop_version); + + seconds = CURRENT_TIME - loadtime; + if (seconds < 0) + loadtime = CURRENT_TIME + seconds; + if (seconds / 86400 > 0) { + sz += sprintf(LVM_PROC_BUF, "%d day%s ", + seconds / 86400, + seconds / 86400 == 0 || + seconds / 86400 > 1 ? "s" : ""); + } + sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n", + (seconds % 86400) / 3600, + (seconds % 3600) / 60, + seconds % 60); + + if (vg_counter > 0) { + for (v = 0; v < ABS_MAX_VG; v++) { + /* volume group */ + if ((vg_ptr = vg[v]) != NULL) { + inactive_flag = ' '; + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; + sz += sprintf(LVM_PROC_BUF, + "\nVG: %c%s [%d PV, %d LV/%d open] " + " PE Size: %d KB\n" + " Usage [KB/PE]: %d /%d total " + "%d /%d used %d /%d free", + inactive_flag, + vg_ptr->vg_name, + vg_ptr->pv_cur, + vg_ptr->lv_cur, + vg_ptr->lv_open, + vg_ptr->pe_size >> 1, + vg_ptr->pe_size * vg_ptr->pe_total >> 1, + vg_ptr->pe_total, + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, + vg_ptr->pe_allocated, + (vg_ptr->pe_total - vg_ptr->pe_allocated) * + vg_ptr->pe_size >> 1, + vg_ptr->pe_total - vg_ptr->pe_allocated); + + /* physical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n PV%s ", + vg_ptr->pv_cur == 1 ? ": " : "s:"); + c = 0; + for (p = 0; p < vg_ptr->pv_max; p++) { + if ((pv_ptr = vg_ptr->pv[p]) != NULL) { + inactive_flag = 'A'; + if (!(pv_ptr->pv_status & PV_ACTIVE)) + inactive_flag = 'I'; + allocation_flag = 'A'; + if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) + allocation_flag = 'N'; + pv_name = strchr(pv_ptr->pv_name+1,'/'); + if ( pv_name == 0) pv_name = pv_ptr->pv_name; + else pv_name++; + sz += sprintf(LVM_PROC_BUF, + "[%c%c] %-21s %8d /%-6d " + "%8d /%-6d %8d /%-6d", + inactive_flag, + allocation_flag, + pv_name, + pv_ptr->pe_total * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total, + pv_ptr->pe_allocated * + pv_ptr->pe_size >> 1, + pv_ptr->pe_allocated, + (pv_ptr->pe_total - + pv_ptr->pe_allocated) * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total - + pv_ptr->pe_allocated); + c++; + if (c < vg_ptr->pv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + + /* logical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n LV%s ", + vg_ptr->lv_cur == 1 ? ": " : "s:"); + c = 0; + for (l = 0; l < vg[v]->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + inactive_flag = 'A'; + if (!(lv_ptr->lv_status & LV_ACTIVE)) + inactive_flag = 'I'; + rw_flag = 'R'; + if (lv_ptr->lv_access & LV_WRITE) + rw_flag = 'W'; + allocation_flag = 'D'; + if (lv_ptr->lv_allocation & LV_CONTIGUOUS) + allocation_flag = 'C'; + stripes_flag = 'L'; + if (lv_ptr->lv_stripes > 1) + stripes_flag = 'S'; + sz += sprintf(LVM_PROC_BUF, + "[%c%c%c%c", + inactive_flag, + rw_flag, + allocation_flag, + stripes_flag); + if (lv_ptr->lv_stripes > 1) + sz += sprintf(LVM_PROC_BUF, "%-2d", + lv_ptr->lv_stripes); + else + sz += sprintf(LVM_PROC_BUF, " "); + lv_name = strrchr(lv_ptr->lv_name, '/'); + if ( lv_name == 0) lv_name = lv_ptr->lv_name; + else lv_name++; + sz += sprintf(LVM_PROC_BUF, "] %-25s", lv_name); + if (strlen(lv_name) > 25) + sz += sprintf(LVM_PROC_BUF, + "\n "); + sz += sprintf(LVM_PROC_BUF, "%9d /%-6d ", + lv_ptr->lv_size >> 1, + lv_ptr->lv_size / vg[v]->pe_size); + + if (lv_ptr->lv_open == 0) + sz += sprintf(LVM_PROC_BUF, "close"); + else + sz += sprintf(LVM_PROC_BUF, "%dx open", + lv_ptr->lv_open); + c++; + if (c < vg_ptr->lv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none"); + sz += sprintf(LVM_PROC_BUF, "\n"); + } + } + } + if (buf == NULL) { + if ((buf = vmalloc(sz)) == NULL) { + sz = 0; + return sprintf(page, "%s - vmalloc error at line %d\n", + lvm_name, __LINE__); + } + } + sz_last = sz; + } + } + if (pos > sz - 1) { + vfree(buf); + buf = NULL; + return 0; + } + *start = &buf[pos]; + if (sz - pos < count) + return sz - pos; + else + return count; +} /* lvm_proc_get_info() */ +#endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */ + + +/* + * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c + * (see init_module/lvm_init) + */ +static int lvm_map(struct buffer_head *bh, int rw) +{ + int minor = MINOR(bh->b_dev); + int ret = 0; + ulong index; + ulong pe_start; + ulong size = bh->b_size >> 9; + ulong rsector_tmp = bh->b_blocknr * size; + ulong rsector_sav; + kdev_t rdev_tmp = bh->b_dev; + kdev_t rdev_sav; + lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]; + + + if (!(lv->lv_status & LV_ACTIVE)) { + printk(KERN_ALERT + "%s - lvm_map: ll_rw_blk for inactive LV %s\n", + lvm_name, lv->lv_name); + return -1; + } +/* + if ( lv->lv_access & LV_SNAPSHOT) + printk ( "%s -- %02d:%02d block: %lu rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw); + */ + + /* take care of snapshot chunk writes before + check for writable logical volume */ + if ((lv->lv_access & LV_SNAPSHOT) && + MAJOR(bh->b_rdev) != 0 && + MAJOR(bh->b_rdev) != MAJOR_NR && + (rw == WRITEA || rw == WRITE)) + { + printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d] b_blocknr: %lu b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_rdev), MINOR ( bh->b_rdev), bh->b_blocknr, bh->b_rsector); + return 0; + } + + if ((rw == WRITE || rw == WRITEA) && + !(lv->lv_access & LV_WRITE)) { + printk(KERN_CRIT + "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", + lvm_name, lv->lv_name); + return -1; + } +#ifdef DEBUG_MAP + printk(KERN_DEBUG + "%s - lvm_map minor:%d *rdev: %02d:%02d *rsector: %lu " + "size:%lu\n", + lvm_name, minor, + MAJOR(rdev_tmp), + MINOR(rdev_tmp), + rsector_tmp, size); +#endif + + if (rsector_tmp + size > lv->lv_size) { + printk(KERN_ALERT + "%s - lvm_map *rsector: %lu or size: %lu wrong for" + " minor: %2d\n", lvm_name, rsector_tmp, size, minor); + return -1; + } + rsector_sav = rsector_tmp; + rdev_sav = rdev_tmp; + +lvm_second_remap: + /* linear mapping */ + if (lv->lv_stripes < 2) { + /* get the index */ + index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; + pe_start = lv->lv_current_pe[index].pe; + rsector_tmp = lv->lv_current_pe[index].pe + + (rsector_tmp % vg[VG_BLK(minor)]->pe_size); + rdev_tmp = lv->lv_current_pe[index].dev; + +#ifdef DEBUG_MAP + printk(KERN_DEBUG + "lv_current_pe[%ld].pe: %ld rdev: %02d:%02d rsector:%ld\n", + index, + lv->lv_current_pe[index].pe, + MAJOR(rdev_tmp), + MINOR(rdev_tmp), + rsector_tmp); +#endif + + /* striped mapping */ + } else { + ulong stripe_index; + ulong stripe_length; + + stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes; + stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize; + index = rsector_tmp / stripe_length + + (stripe_index % lv->lv_stripes) * + (lv->lv_allocated_le / lv->lv_stripes); + pe_start = lv->lv_current_pe[index].pe; + rsector_tmp = lv->lv_current_pe[index].pe + + (rsector_tmp % stripe_length) - + (stripe_index % lv->lv_stripes) * lv->lv_stripesize - + stripe_index / lv->lv_stripes * + (lv->lv_stripes - 1) * lv->lv_stripesize; + rdev_tmp = lv->lv_current_pe[index].dev; + } + +#ifdef DEBUG_MAP + printk(KERN_DEBUG + "lv_current_pe[%ld].pe: %ld rdev: %02d:%02d rsector:%ld\n" + "stripe_length: %ld stripe_index: %ld\n", + index, + lv->lv_current_pe[index].pe, + MAJOR(rdev_tmp), + MINOR(rdev_tmp), + rsector_tmp, + stripe_length, + stripe_index); +#endif + + /* handle physical extents on the move */ + if (pe_lock_req.lock == LOCK_PE) { + if (rdev_tmp == pe_lock_req.data.pv_dev && + rsector_tmp >= pe_lock_req.data.pv_offset && + rsector_tmp < (pe_lock_req.data.pv_offset + + vg[VG_BLK(minor)]->pe_size)) { + sleep_on(&lvm_map_wait); + rsector_tmp = rsector_sav; + rdev_tmp = rdev_sav; + goto lvm_second_remap; + } + } + /* statistic */ + if (rw == WRITE || rw == WRITEA) + lv->lv_current_pe[index].writes++; + else + lv->lv_current_pe[index].reads++; + + /* snapshot volume exception handling on physical device address base */ + if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) { + /* original logical volume */ + if (lv->lv_access & LV_SNAPSHOT_ORG) { + if (rw == WRITE || rw == WRITEA) + { + lv_t *lv_ptr; + + /* start with first snapshot and loop thrugh all of them */ + for (lv_ptr = lv->lv_snapshot_next; + lv_ptr != NULL; + lv_ptr = lv_ptr->lv_snapshot_next) { + down(&lv->lv_snapshot_org->lv_snapshot_sem); + /* do we still have exception storage for this snapshot free? */ + if (lv_ptr->lv_block_exception != NULL) { + rdev_sav = rdev_tmp; + rsector_sav = rsector_tmp; + if (!lvm_snapshot_remap_block(&rdev_tmp, + &rsector_tmp, + pe_start, + lv_ptr)) { + /* create a new mapping */ + ret = lvm_snapshot_COW(rdev_tmp, + rsector_tmp, + pe_start, + rsector_sav, + lv_ptr); + } + rdev_tmp = rdev_sav; + rsector_tmp = rsector_sav; + } + up(&lv->lv_snapshot_org->lv_snapshot_sem); + } + } + } else { + /* remap snapshot logical volume */ + down(&lv->lv_snapshot_sem); + if (lv->lv_block_exception != NULL) + lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv); + up(&lv->lv_snapshot_sem); + } + } + bh->b_rdev = rdev_tmp; + bh->b_rsector = rsector_tmp; + + return ret; +} /* lvm_map() */ + + +/* + * internal support functions + */ + +#ifdef LVM_HD_NAME +/* + * generate "hard disk" name + */ +void lvm_hd_name(char *buf, int minor) +{ + int len = 0; + lv_t *lv_ptr; + + if (vg[VG_BLK(minor)] == NULL || + (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL) + return; + len = strlen(lv_ptr->lv_name) - 5; + memcpy(buf, &lv_ptr->lv_name[5], len); + buf[len] = 0; + return; +} +#endif + + +/* + * this one never should be called... + */ +static void lvm_dummy_device_request(request_queue_t * t) +{ + printk(KERN_EMERG + "%s -- oops, got lvm request for %02d:%02d [sector: %lu]\n", + lvm_name, + MAJOR(CURRENT->rq_dev), + MINOR(CURRENT->rq_dev), + CURRENT->sector); + return; +} + + +/* + * make request function + */ +static void lvm_make_request_fn(int rw, struct buffer_head *bh) +{ + lvm_map(bh, rw); + if (bh->b_rdev != MD_MAJOR) generic_make_request(rw, bh); + return; +} + + +/******************************************************************** + * + * Character device support functions + * + ********************************************************************/ +/* + * character device support function logical volume manager lock + */ +static int lvm_do_lock_lvm(void) +{ +lock_try_again: + spin_lock(&lvm_lock); + if (lock != 0 && lock != current->pid) { +#ifdef DEBUG_IOCTL + printk(KERN_INFO "lvm_do_lock_lvm: %s is locked by pid %d ...\n", + lvm_name, lock); +#endif + spin_unlock(&lvm_lock); + interruptible_sleep_on(&lvm_wait); + if (current->sigpending != 0) + return -EINTR; +#ifdef LVM_TOTAL_RESET + if (lvm_reset_spindown > 0) + return -EACCES; +#endif + goto lock_try_again; + } + lock = current->pid; + spin_unlock(&lvm_lock); + return 0; +} /* lvm_do_lock_lvm */ + + +/* + * character device support function lock/unlock physical extend + */ +static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) +{ + uint p; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&pe_lock_req, arg, + sizeof(pe_lock_req_t)) != 0) return -EFAULT; + + switch (pe_lock_req.lock) { + case LOCK_PE: + for (p = 0; p < vg_ptr->pv_max; p++) { + if (vg_ptr->pv[p] != NULL && + pe_lock_req.data.pv_dev == + vg_ptr->pv[p]->pv_dev) + break; + } + if (p == vg_ptr->pv_max) return -ENXIO; + + pe_lock_req.lock = UNLOCK_PE; + fsync_dev(pe_lock_req.data.lv_dev); + pe_lock_req.lock = LOCK_PE; + break; + + case UNLOCK_PE: + pe_lock_req.lock = UNLOCK_PE; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ + pe_lock_req.data.pv_offset = 0; + wake_up(&lvm_map_wait); + break; + + default: + return -EINVAL; + } + return 0; +} + + +/* + * character device support function logical extend remap + */ +static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) +{ + uint l, le; + lv_t *lv_ptr; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&le_remap_req, arg, + sizeof(le_remap_req_t)) != 0) + return -EFAULT; + + for (l = 0; l < vg_ptr->lv_max; l++) { + lv_ptr = vg_ptr->lv[l]; + if (lv_ptr != NULL && + strcmp(lv_ptr->lv_name, + le_remap_req.lv_name) == 0) { + for (le = 0; le < lv_ptr->lv_allocated_le; + le++) { + if (lv_ptr->lv_current_pe[le].dev == + le_remap_req.old_dev && + lv_ptr->lv_current_pe[le].pe == + le_remap_req.old_pe) { + lv_ptr->lv_current_pe[le].dev = + le_remap_req.new_dev; + lv_ptr->lv_current_pe[le].pe = + le_remap_req.new_pe; + return 0; + } + } + return -EINVAL; + } + } + return -ENXIO; +} /* lvm_do_le_remap() */ + + +/* + * character device support function VGDA create + */ +int lvm_do_vg_create(int minor, void *arg) +{ + int snaporg_minor = 0; + ulong l, p; + lv_t lv; + vg_t *vg_ptr; + pv_t *pv_ptr; + lv_t *lv_ptr; + + if (vg[VG_CHR(minor)] != NULL) return -EPERM; + + if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: kmalloc error VG at line %d\n", + lvm_name, __LINE__); + return -ENOMEM; + } + /* get the volume group structure */ + if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) { + kfree(vg_ptr); + return -EFAULT; + } + /* we are not that active so far... */ + vg_ptr->vg_status &= ~VG_ACTIVE; + vg[VG_CHR(minor)] = vg_ptr; + + vg[VG_CHR(minor)]->pe_allocated = 0; + if (vg_ptr->pv_max > ABS_MAX_PV) { + printk(KERN_WARNING + "%s -- Can't activate VG: ABS_MAX_PV too small\n", + lvm_name); + kfree(vg_ptr); + vg[VG_CHR(minor)] = NULL; + return -EPERM; + } + if (vg_ptr->lv_max > ABS_MAX_LV) { + printk(KERN_WARNING + "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n", + lvm_name, vg_ptr->lv_max); + kfree(vg_ptr); + vg_ptr = NULL; + return -EPERM; + } + /* get the physical volume structures */ + vg_ptr->pv_act = vg_ptr->pv_cur = 0; + for (p = 0; p < vg_ptr->pv_max; p++) { + /* user space address */ + if ((pvp = vg_ptr->pv[p]) != NULL) { + pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); + if (pv_ptr == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: kmalloc error PV at line %d\n", + lvm_name, __LINE__); + lvm_do_vg_remove(minor); + return -ENOMEM; + } + if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + /* We don't need the PE list + in kernel space as with LVs pe_t list (see below) */ + pv_ptr->pe = NULL; + pv_ptr->pe_allocated = 0; + pv_ptr->pv_status = PV_ACTIVE; + vg_ptr->pv_act++; + vg_ptr->pv_cur++; + +#ifdef LVM_GET_INODE + /* insert a dummy inode for fs_may_mount */ + pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); +#endif + } + } + + /* get the logical volume structures */ + vg_ptr->lv_cur = 0; + for (l = 0; l < vg_ptr->lv_max; l++) { + /* user space address */ + if ((lvp = vg_ptr->lv[l]) != NULL) { + if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + vg_ptr->lv[l] = NULL; + if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + } + } + + /* Second path to correct snapshot logical volumes which are not + in place during first path above */ + for (l = 0; l < vg_ptr->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL && + vg_ptr->lv[l]->lv_access & LV_SNAPSHOT) { + snaporg_minor = lv_ptr->lv_snapshot_minor; + if (vg_ptr->lv[LV_BLK(snaporg_minor)] != NULL) { + /* get pointer to original logical volume */ + lv_ptr = vg_ptr->lv[l]->lv_snapshot_org = + vg_ptr->lv[LV_BLK(snaporg_minor)]; + + /* set necessary fields of original logical volume */ + lv_ptr->lv_access |= LV_SNAPSHOT_ORG; + lv_ptr->lv_snapshot_minor = 0; + lv_ptr->lv_snapshot_org = lv_ptr; + lv_ptr->lv_snapshot_prev = NULL; + + /* find last snapshot logical volume in the chain */ + while (lv_ptr->lv_snapshot_next != NULL) + lv_ptr = lv_ptr->lv_snapshot_next; + + /* set back pointer to this last one in our new logical volume */ + vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; + + /* last logical volume now points to our new snapshot volume */ + lv_ptr->lv_snapshot_next = vg_ptr->lv[l]; + + /* now point to the new one */ + lv_ptr = lv_ptr->lv_snapshot_next; + + /* set necessary fields of new snapshot logical volume */ + lv_ptr->lv_snapshot_next = NULL; + lv_ptr->lv_current_pe = + vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_pe; + lv_ptr->lv_allocated_le = + vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_allocated_le; + lv_ptr->lv_current_le = + vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_le; + lv_ptr->lv_size = + vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_size; + } + } + } + + vg_count++; + + /* let's go active */ + vg_ptr->vg_status |= VG_ACTIVE; + + MOD_INC_USE_COUNT; + + return 0; +} /* lvm_do_vg_create() */ + + +/* + * character device support function VGDA extend + */ +static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg) +{ + uint p; + pv_t *pv_ptr; + + if (vg_ptr == NULL) return -ENXIO; + if (vg_ptr->pv_cur < vg_ptr->pv_max) { + for (p = 0; p < vg_ptr->pv_max; p++) { + if (vg_ptr->pv[p] == NULL) { + if ((pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL)) == NULL) { + printk(KERN_CRIT + "%s -- VG_EXTEND: kmalloc error PV at line %d\n", + lvm_name, __LINE__); + return -ENOMEM; + } + if (copy_from_user(pv_ptr, arg, sizeof(pv_t)) != 0) { + kfree(pv_ptr); + vg_ptr->pv[p] = NULL; + return -EFAULT; + } + + pv_ptr->pv_status = PV_ACTIVE; + /* We don't need the PE list + in kernel space like LVs pe_t list */ + pv_ptr->pe = NULL; + vg_ptr->pv_cur++; + vg_ptr->pv_act++; + vg_ptr->pe_total += + pv_ptr->pe_total; +#ifdef LVM_GET_INODE + /* insert a dummy inode for fs_may_mount */ + pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); +#endif + return 0; + } + } + } +return -EPERM; +} /* lvm_do_vg_extend() */ + + +/* + * character device support function VGDA reduce + */ +static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) +{ + uint p; + pv_t *pv_ptr; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0) + return -EFAULT; + + for (p = 0; p < vg_ptr->pv_max; p++) { + pv_ptr = vg_ptr->pv[p]; + if (pv_ptr != NULL && + strcmp(pv_ptr->pv_name, + pv_name) == 0) { + if (pv_ptr->lv_cur > 0) return -EPERM; + vg_ptr->pe_total -= + pv_ptr->pe_total; + vg_ptr->pv_cur--; + vg_ptr->pv_act--; +#ifdef LVM_GET_INODE + lvm_clear_inode(pv_ptr->inode); +#endif + kfree(pv_ptr); + /* Make PV pointer array contiguous */ + for (; p < vg_ptr->pv_max - 1; p++) + vg_ptr->pv[p] = vg_ptr->pv[p + 1]; + vg_ptr->pv[p + 1] = NULL; + return 0; + } + } + return -ENXIO; +} /* lvm_do_vg_reduce */ + + +/* + * character device support function VGDA remove + */ +static int lvm_do_vg_remove(int minor) +{ + int i; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + pv_t *pv_ptr; + + if (vg_ptr == NULL) return -ENXIO; + +#ifdef LVM_TOTAL_RESET + if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0) +#else + if (vg_ptr->lv_open > 0) +#endif + return -EPERM; + + /* let's go inactive */ + vg_ptr->vg_status &= ~VG_ACTIVE; + + /* free LVs */ + /* first free snapshot logical volumes */ + for (i = 0; i < vg_ptr->lv_max; i++) { + if (vg_ptr->lv[i] != NULL && + vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) { + lvm_do_lv_remove(minor, NULL, i); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(1); + } + } + /* then free the rest of the LVs */ + for (i = 0; i < vg_ptr->lv_max; i++) { + if (vg_ptr->lv[i] != NULL) { + lvm_do_lv_remove(minor, NULL, i); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(1); + } + } + + /* free PVs */ + for (i = 0; i < vg_ptr->pv_max; i++) { + if ((pv_ptr = vg_ptr->pv[i]) != NULL) { +#ifdef DEBUG_KFREE + printk(KERN_DEBUG + "%s -- kfree %d\n", lvm_name, __LINE__); +#endif +#ifdef LVM_GET_INODE + lvm_clear_inode(pv_ptr->inode); +#endif + kfree(pv_ptr); + vg[VG_CHR(minor)]->pv[i] = NULL; + } + } + +#ifdef DEBUG_KFREE + printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); +#endif + kfree(vg_ptr); + vg[VG_CHR(minor)] = NULL; + + vg_count--; + + MOD_DEC_USE_COUNT; + + return 0; +} /* lvm_do_vg_remove() */ + + +/* + * character device support function logical volume create + */ +static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) +{ + int l, le, l_new, p, size; + ulong lv_status_save; + lv_block_exception_t *lvbe = lv->lv_block_exception; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + lv_t *lv_ptr = NULL; + + if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; + if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) + return -EINVAL; + + for (l = 0; l < vg_ptr->lv_max; l++) { + if (vg_ptr->lv[l] != NULL && + strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) + return -EEXIST; + } + + /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */ + l_new = -1; + if (vg_ptr->lv[lv->lv_number] == NULL) + l_new = lv->lv_number; + else { + for (l = 0; l < vg_ptr->lv_max; l++) { + if (vg_ptr->lv[l] == NULL) + if (l_new == -1) l_new = l; + } + } + if (l_new == -1) return -EPERM; + else l = l_new; + + if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {; + printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n", + lvm_name, __LINE__); + return -ENOMEM; + } + /* copy preloaded LV */ + memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); + + lv_status_save = lv_ptr->lv_status; + lv_ptr->lv_status &= ~LV_ACTIVE; + lv_ptr->lv_snapshot_org = \ + lv_ptr->lv_snapshot_prev = \ + lv_ptr->lv_snapshot_next = NULL; + lv_ptr->lv_block_exception = NULL; + init_MUTEX(&lv_ptr->lv_snapshot_sem); + vg_ptr->lv[l] = lv_ptr; + + /* get the PE structures from user space if this + is no snapshot logical volume */ + if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { + size = lv_ptr->lv_allocated_le * sizeof(pe_t); + if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) { + printk(KERN_CRIT + "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte " + "at line %d\n", + lvm_name, size, __LINE__); +#ifdef DEBUG_KFREE + printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); +#endif + kfree(lv_ptr); + vg[VG_CHR(minor)]->lv[l] = NULL; + return -ENOMEM; + } + if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) { + vfree(lv_ptr->lv_current_pe); + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + return -EFAULT; + } + /* correct the PE count in PVs */ + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { + vg_ptr->pe_allocated++; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + lv_ptr->lv_current_pe[le].dev) + vg_ptr->pv[p]->pe_allocated++; + } + } + } else { + /* Get snapshot exception data and block list */ + if (lvbe != NULL) { + lv_ptr->lv_snapshot_org = + vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)]; + if (lv_ptr->lv_snapshot_org != NULL) { + size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t); + if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) { + printk(KERN_CRIT + "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION " + "of %d byte at line %d\n", + lvm_name, size, __LINE__); +#ifdef DEBUG_KFREE + printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); +#endif + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + return -ENOMEM; + } + if (copy_from_user(lv_ptr->lv_block_exception, lvbe, size)) { + vfree(lv_ptr->lv_block_exception); + kfree(lv_ptr); + vg[VG_CHR(minor)]->lv[l] = NULL; + return -EFAULT; + } + /* get pointer to original logical volume */ + lv_ptr = lv_ptr->lv_snapshot_org; + + lv_ptr->lv_snapshot_minor = 0; + lv_ptr->lv_snapshot_org = lv_ptr; + lv_ptr->lv_snapshot_prev = NULL; + /* walk thrugh the snapshot list */ + while (lv_ptr->lv_snapshot_next != NULL) + lv_ptr = lv_ptr->lv_snapshot_next; + /* now lv_ptr points to the last existing snapshot in the chain */ + vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; + /* our new one now back points to the previous last in the chain */ + lv_ptr = vg_ptr->lv[l]; + /* now lv_ptr points to our new last snapshot logical volume */ + lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org; + lv_ptr->lv_snapshot_next = NULL; + lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; + lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; + lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; + lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; + lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes; + lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize; + { + int err = lvm_snapshot_alloc(lv_ptr); + if (err) + { + vfree(lv_ptr->lv_block_exception); + kfree(lv_ptr); + vg[VG_CHR(minor)]->lv[l] = NULL; + return err; + } + } + } else { + vfree(lv_ptr->lv_block_exception); + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + return -EFAULT; + } + } else { + kfree(vg_ptr->lv[l]); + vg_ptr->lv[l] = NULL; + return -EINVAL; + } + } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */ + + lv_ptr = vg_ptr->lv[l]; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number; + vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number; + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + vg_ptr->lv_cur++; + lv_ptr->lv_status = lv_status_save; + + /* optionally add our new snapshot LV */ + if (lv_ptr->lv_access & LV_SNAPSHOT) { + /* sync the original logical volume */ + fsync_dev(lv_ptr->lv_snapshot_org->lv_dev); + /* put ourselve into the chain */ + lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr; + lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG; + } + return 0; +} /* lvm_do_lv_create() */ + + +/* + * character device support function logical volume remove + */ +static int lvm_do_lv_remove(int minor, char *lv_name, int l) +{ + uint le, p; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + lv_t *lv_ptr; + + if (l == -1) { + for (l = 0; l < vg_ptr->lv_max; l++) { + if (vg_ptr->lv[l] != NULL && + strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) { + break; + } + } + } + if (l == vg_ptr->lv_max) return -ENXIO; + + lv_ptr = vg_ptr->lv[l]; +#ifdef LVM_TOTAL_RESET + if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0) +#else + if (lv_ptr->lv_open > 0) +#endif + return -EBUSY; + + /* check for deletion of snapshot source while + snapshot volume still exists */ + if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) && + lv_ptr->lv_snapshot_next != NULL) + return -EPERM; + + lv_ptr->lv_status |= LV_SPINDOWN; + + /* sync the buffers */ + fsync_dev(lv_ptr->lv_dev); + + lv_ptr->lv_status &= ~LV_ACTIVE; + + /* invalidate the buffers */ + invalidate_buffers(lv_ptr->lv_dev); + + /* reset generic hd */ + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0; + lvm_size[MINOR(lv_ptr->lv_dev)] = 0; + + /* reset VG/LV mapping */ + vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG; + vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1; + + /* correct the PE count in PVs if this is no snapshot logical volume */ + if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { + /* only if this is no snapshot logical volume because + we share the lv_current_pe[] structs with the + original logical volume */ + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { + vg_ptr->pe_allocated--; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + lv_ptr->lv_current_pe[le].dev) + vg_ptr->pv[p]->pe_allocated--; + } + } + vfree(lv_ptr->lv_current_pe); + /* LV_SNAPSHOT */ + } else { + /* remove this snapshot logical volume from the chain */ + lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; + if (lv_ptr->lv_snapshot_next != NULL) { + lv_ptr->lv_snapshot_next->lv_snapshot_prev = + lv_ptr->lv_snapshot_prev; + } + /* no more snapshots? */ + if (lv_ptr->lv_snapshot_org->lv_snapshot_next == NULL) + lv_ptr->lv_snapshot_org->lv_access &= ~LV_SNAPSHOT_ORG; + lvm_snapshot_release(lv_ptr); + } + +#ifdef DEBUG_KFREE + printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); +#endif + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + vg_ptr->lv_cur--; + return 0; +} /* lvm_do_lv_remove() */ + + +/* + * character device support function logical volume extend / reduce + */ +static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) +{ + int l, le, p, size, old_allocated_le; + uint32_t end, lv_status_save; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + lv_t *lv_ptr; + pe_t *pe; + + if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; + + for (l = 0; l < vg_ptr->lv_max; l++) { + if (vg_ptr->lv[l] != NULL && + strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) + break; + } + if (l == vg_ptr->lv_max) return -ENXIO; + lv_ptr = vg_ptr->lv[l]; + + /* check for active snapshot */ + if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) return -EPERM; + + if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE " + "of %d Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; + } + /* get the PE structures from user space */ + if (copy_from_user(pe, pep, size)) { + vfree(pe); + return -EFAULT; + } + +#ifdef DEBUG + printk(KERN_DEBUG + "%s -- fsync_dev and " + "invalidate_buffers for %s [%s] in %s\n", + lvm_name, lv_ptr->lv_name, + kdevname(lv_ptr->lv_dev), + vg_ptr->vg_name); +#endif + + lv_ptr->lv_status |= LV_SPINDOWN; + fsync_dev(lv_ptr->lv_dev); + lv_ptr->lv_status &= ~LV_ACTIVE; + invalidate_buffers(lv_ptr->lv_dev); + + /* reduce allocation counters on PV(s) */ + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { + vg_ptr->pe_allocated--; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + lv_ptr->lv_current_pe[le].dev) { + vg_ptr->pv[p]->pe_allocated--; + break; + } + } + } + + + /* save pointer to "old" lv/pe pointer array */ + pep1 = lv_ptr->lv_current_pe; + end = lv_ptr->lv_current_le; + + /* save open counter */ + lv_open = lv_ptr->lv_open; + + /* save # of old allocated logical extents */ + old_allocated_le = lv_ptr->lv_allocated_le; + + /* copy preloaded LV */ + lv_status_save = lv->lv_status; + lv->lv_status |= LV_SPINDOWN; + lv->lv_status &= ~LV_ACTIVE; + memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); + lv_ptr->lv_current_pe = pe; + lv_ptr->lv_open = lv_open; + + /* save availiable i/o statistic data */ + /* linear logical volume */ + if (lv_ptr->lv_stripes < 2) { + /* Check what last LE shall be used */ + if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le; + for (le = 0; le < end; le++) { + lv_ptr->lv_current_pe[le].reads = pep1[le].reads; + lv_ptr->lv_current_pe[le].writes = pep1[le].writes; + } + /* striped logical volume */ + } else { + uint i, j, source, dest, end, old_stripe_size, new_stripe_size; + + old_stripe_size = old_allocated_le / lv_ptr->lv_stripes; + new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes; + end = old_stripe_size; + if (end > new_stripe_size) end = new_stripe_size; + for (i = source = dest = 0; + i < lv_ptr->lv_stripes; i++) { + for (j = 0; j < end; j++) { + lv_ptr->lv_current_pe[dest + j].reads = + pep1[source + j].reads; + lv_ptr->lv_current_pe[dest + j].writes = + pep1[source + j].writes; + } + source += old_stripe_size; + dest += new_stripe_size; + } + } + vfree(pep1); + pep1 = NULL; + + + /* extend the PE count in PVs */ + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { + vg_ptr->pe_allocated++; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + vg_ptr->lv[l]->lv_current_pe[le].dev) { + vg_ptr->pv[p]->pe_allocated++; + break; + } + } + } + + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + /* vg_lv_map array doesn't have to be changed here */ + + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + lv_ptr->lv_status = lv_status_save; + + return 0; +} /* lvm_do_lv_extend_reduce() */ + + +/* + * character device support function logical volume status by name + */ +static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) +{ + uint l; + ulong size; + lv_t lv; + lv_t *lv_ptr; + lv_status_byname_req_t lv_status_byname_req; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&lv_status_byname_req, arg, + sizeof(lv_status_byname_req_t)) != 0) + return -EFAULT; + + if (lv_status_byname_req.lv == NULL) return -EINVAL; + if (copy_from_user(&lv, lv_status_byname_req.lv, + sizeof(lv_t)) != 0) + return -EFAULT; + + for (l = 0; l < vg_ptr->lv_max; l++) { + lv_ptr = vg_ptr->lv[l]; + if (lv_ptr != NULL && + strcmp(lv_ptr->lv_name, + lv_status_byname_req.lv_name) == 0) { + if (copy_to_user(lv_status_byname_req.lv, + lv_ptr, + sizeof(lv_t)) != 0) + return -EFAULT; + + if (lv.lv_current_pe != NULL) { + size = lv_ptr->lv_allocated_le * + sizeof(pe_t); + if (copy_to_user(lv.lv_current_pe, + lv_ptr->lv_current_pe, + size) != 0) + return -EFAULT; + } + return 0; + } + } + return -ENXIO; +} /* lvm_do_lv_status_byname() */ + + +/* + * character device support function logical volume status by index + */ +static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) +{ + ulong size; + lv_t lv; + lv_t *lv_ptr; + lv_status_byindex_req_t lv_status_byindex_req; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&lv_status_byindex_req, arg, + sizeof(lv_status_byindex_req)) != 0) + return -EFAULT; + + if ((lvp = lv_status_byindex_req.lv) == NULL) + return -EINVAL; + if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL) + return -ENXIO; + + if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) + return -EFAULT; + + if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0) + return -EFAULT; + + if (lv.lv_current_pe != NULL) { + size = lv_ptr->lv_allocated_le * sizeof(pe_t); + if (copy_to_user(lv.lv_current_pe, + lv_ptr->lv_current_pe, + size) != 0) + return -EFAULT; + } + return 0; +} /* lvm_do_lv_status_byindex() */ + + +/* + * character device support function physical volume change + */ +static int lvm_do_pv_change(vg_t *vg_ptr, void *arg) +{ + uint p; + pv_t *pv_ptr; +#ifdef LVM_GET_INODE + struct inode *inode_sav; +#endif + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&pv_change_req, arg, + sizeof(pv_change_req)) != 0) + return -EFAULT; + + for (p = 0; p < vg_ptr->pv_max; p++) { + pv_ptr = vg_ptr->pv[p]; + if (pv_ptr != NULL && + strcmp(pv_ptr->pv_name, + pv_change_req.pv_name) == 0) { +#ifdef LVM_GET_INODE + inode_sav = pv_ptr->inode; +#endif + if (copy_from_user(pv_ptr, + pv_change_req.pv, + sizeof(pv_t)) != 0) + return -EFAULT; + + /* We don't need the PE list + in kernel space as with LVs pe_t list */ + pv_ptr->pe = NULL; +#ifdef LVM_GET_INODE + pv_ptr->inode = inode_sav; +#endif + return 0; + } + } + return -ENXIO; +} /* lvm_do_pv_change() */ + +/* + * character device support function get physical volume status + */ +static int lvm_do_pv_status(vg_t *vg_ptr, void *arg) +{ + uint p; + pv_t *pv_ptr; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&pv_status_req, arg, + sizeof(pv_status_req)) != 0) + return -EFAULT; + + for (p = 0; p < vg_ptr->pv_max; p++) { + pv_ptr = vg_ptr->pv[p]; + if (pv_ptr != NULL && + strcmp(pv_ptr->pv_name, + pv_status_req.pv_name) == 0) { + if (copy_to_user(pv_status_req.pv, + pv_ptr, + sizeof(pv_t)) != 0) + return -EFAULT; + return 0; + } + } + return -ENXIO; +} /* lvm_do_pv_status() */ + + +/* + * support function initialize gendisk variables + */ +#ifdef __initfunc +__initfunc(void lvm_geninit(struct gendisk *lvm_gdisk)) +#else +void __init + lvm_geninit(struct gendisk *lvm_gdisk) +#endif +{ + int i = 0; + +#ifdef DEBUG_GENDISK + printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name); +#endif + + for (i = 0; i < MAX_LV; i++) { + lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */ + lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0; + lvm_blocksizes[i] = BLOCK_SIZE; + } + + blksize_size[MAJOR_NR] = lvm_blocksizes; + blk_size[MAJOR_NR] = lvm_size; + + return; +} /* lvm_gen_init() */ + + +#ifdef LVM_GET_INODE +/* + * support function to get an empty inode + * + * Gets an empty inode to be inserted into the inode hash, + * so that a physical volume can't be mounted. + * This is analog to drivers/block/md.c + * + * Is this the real thing? + * + */ +struct inode *lvm_get_inode(int dev) +{ + struct inode *inode_this = NULL; + + /* Lock the device by inserting a dummy inode. */ + inode_this = get_empty_inode(); + inode_this->i_dev = dev; + insert_inode_hash(inode_this); + return inode_this; +} + + +/* + * support function to clear an inode + * + */ +void lvm_clear_inode(struct inode *inode) +{ +#ifdef I_FREEING + inode->i_state |= I_FREEING; +#endif + clear_inode(inode); + return; +} +#endif /* #ifdef LVM_GET_INODE */ diff --git a/drivers/block/md.c b/drivers/block/md.c index 752c7b0ab..b258fc6c5 100644 --- a/drivers/block/md.c +++ b/drivers/block/md.c @@ -11,6 +11,7 @@ - kerneld support by Boris Tobotras <boris@xtalk.msk.su> - kmod support by: Cyrus Durgin - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com> + - Devfs support by Richard Gooch <rgooch@atnf.csiro.au> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +25,7 @@ #include <linux/config.h> #include <linux/raid/md.h> +#include <linux/devfs_fs_kernel.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> @@ -68,6 +70,9 @@ static mdk_thread_t *md_recovery_thread = NULL; int md_size[MAX_MD_DEVS] = {0, }; +extern struct block_device_operations md_fops; +static devfs_handle_t devfs_handle = NULL; + static struct gendisk md_gendisk= { MD_MAJOR, @@ -78,7 +83,8 @@ static struct gendisk md_gendisk= md_size, MAX_MD_DEVS, NULL, - NULL + NULL, + &md_fops, }; void md_plug_device (request_queue_t *mdqueue, kdev_t dev) @@ -3302,11 +3308,15 @@ int md__init md_init (void) MD_MAJOR_VERSION, MD_MINOR_VERSION, MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL); - if (register_blkdev (MD_MAJOR, "md", &md_fops)) + if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops)) { printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR); return (-1); } + devfs_handle = devfs_mk_dir (NULL, "md", 0, NULL); + devfs_register_series (devfs_handle, "%u",MAX_MD_DEVS,DEVFS_FL_DEFAULT, + MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR, 0, 0, + &md_fops, NULL); blk_dev[MD_MAJOR].queue = md_get_queue; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0efcce8ed..abecb27c4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -184,10 +184,10 @@ struct request *nbd_read_stat(struct nbd_device *lo) DEBUG("reading control, "); reply.magic = 0; result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply)); - req = lo->tail; if (result <= 0) HARDFAIL("Recv control failed."); memcpy(&xreq, reply.handle, sizeof(xreq)); + req = blkdev_entry_prev_request(&lo->queue_head); if (xreq != req) FAIL("Unexpected handle received.\n"); @@ -216,47 +216,41 @@ void nbd_do_it(struct nbd_device *lo) { struct request *req; - while (1) { + down (&lo->queue_lock); + while (!list_empty(&lo->queue_head)) { req = nbd_read_stat(lo); if (!req) - return; - down (&lo->queue_lock); + goto out; #ifdef PARANOIA - if (req != lo->tail) { + if (req != blkdev_entry_prev_request(&lo->queue_head)) { printk(KERN_ALERT "NBD: I have problem...\n"); } if (lo != &nbd_dev[MINOR(req->rq_dev)]) { printk(KERN_ALERT "NBD: request corrupted!\n"); - goto next; + continue; } if (lo->magic != LO_MAGIC) { printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n"); - up (&lo->queue_lock); - return; + goto out; } #endif - nbd_end_request(req); - if (lo->tail == lo->head) { -#ifdef PARANOIA - if (lo->tail->next) - printk(KERN_ERR "NBD: I did not expect this\n"); -#endif - lo->head = NULL; - } - lo->tail = lo->tail->next; - next: + list_del(&req->queue); up (&lo->queue_lock); + + nbd_end_request(req); + + down (&lo->queue_lock); } + out: + up (&lo->queue_lock); } void nbd_clear_que(struct nbd_device *lo) { struct request *req; - while (1) { - req = lo->tail; - if (!req) - return; + while (!list_empty(&lo->queue_head)) { + req = blkdev_entry_prev_request(&lo->queue_head); #ifdef PARANOIA if (lo != &nbd_dev[MINOR(req->rq_dev)]) { printk(KERN_ALERT "NBD: request corrupted when clearing!\n"); @@ -268,15 +262,12 @@ void nbd_clear_que(struct nbd_device *lo) } #endif req->errors++; + list_del(&req->queue); + up(&lo->queue_lock); + nbd_end_request(req); - if (lo->tail == lo->head) { -#ifdef PARANOIA - if (lo->tail->next) - printk(KERN_ERR "NBD: I did not assume this\n"); -#endif - lo->head = NULL; - } - lo->tail = lo->tail->next; + + down(&lo->queue_lock); } } @@ -296,7 +287,7 @@ static void do_nbd_request(request_queue_t * q) int dev; struct nbd_device *lo; - while (CURRENT) { + while (!QUEUE_EMPTY) { req = CURRENT; dev = MINOR(req->rq_dev); #ifdef PARANOIA @@ -314,28 +305,23 @@ static void do_nbd_request(request_queue_t * q) requests_in++; #endif req->errors = 0; - CURRENT = CURRENT->next; - req->next = NULL; - + blkdev_dequeue_request(req); spin_unlock_irq(&io_request_lock); - down (&lo->queue_lock); - if (lo->head == NULL) { - lo->head = req; - lo->tail = req; - } else { - lo->head->next = req; - lo->head = req; - } + down (&lo->queue_lock); + list_add(&req->queue, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); + spin_lock_irq(&io_request_lock); continue; error_out: req->errors++; + blkdev_dequeue_request(req); + spin_unlock(&io_request_lock); nbd_end_request(req); - CURRENT = CURRENT->next; + spin_lock(&io_request_lock); } return; } @@ -359,11 +345,14 @@ static int nbd_ioctl(struct inode *inode, struct file *file, lo = &nbd_dev[dev]; switch (cmd) { case NBD_CLEAR_SOCK: + down(&lo->queue_lock); nbd_clear_que(lo); - if (lo->head || lo->tail) { + if (!list_empty(&lo->queue_head)) { + up(&lo->queue_lock); printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n"); return -EBUSY; } + up(&lo->queue_lock); file = lo->file; if (!file) return -EINVAL; @@ -415,8 +404,8 @@ static int nbd_ioctl(struct inode *inode, struct file *file, return 0; #ifdef PARANOIA case NBD_PRINT_DEBUG: - printk(KERN_INFO "NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n", - dev, (long) lo->head, (long) lo->tail, requests_in, requests_out); + printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n", + dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out); return 0; #endif case BLKGETSIZE: @@ -480,6 +469,7 @@ int nbd_init(void) blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); + blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; nbd_dev[i].file = NULL; diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 7db6626f4..878709944 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -756,7 +756,7 @@ static void do_pcd_request (request_queue_t * q) if (pcd_busy) return; while (1) { - if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return; + if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return; INIT_REQUEST; if (CURRENT->cmd == READ) { unit = MINOR(CURRENT->rq_dev); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 577d1354c..f40958ecd 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -156,6 +156,7 @@ static int pd_drive_count; #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/devfs_fs_kernel.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/genhd.h> @@ -339,6 +340,8 @@ static char *pd_errs[17] = { "ERR","INDEX","ECC","DRQ","SEEK","WRERR", /* kernel glue structures */ +extern struct block_device_operations pd_fops; + static struct gendisk pd_gendisk = { PD_MAJOR, /* Major number */ PD_NAME, /* Major name */ @@ -348,7 +351,8 @@ static struct gendisk pd_gendisk = { pd_sizes, /* block sizes */ 0, /* number */ NULL, /* internal */ - NULL /* next */ + NULL, /* next */ + &pd_fops, /* block device operations */ }; static struct block_device_operations pd_fops = { @@ -386,8 +390,7 @@ int pd_init (void) { int i; if (disable) return -1; - - if (register_blkdev(MAJOR_NR,name,&pd_fops)) { + if (devfs_register_blkdev(MAJOR_NR,name,&pd_fops)) { printk("%s: unable to get major number %d\n", name,major); return -1; @@ -592,8 +595,7 @@ void cleanup_module(void) { struct gendisk **gdp; int unit; - unregister_blkdev(MAJOR_NR,name); - + devfs_unregister_blkdev(MAJOR_NR,name); for(gdp=&gendisk_head;*gdp;gdp=&((*gdp)->next)) if (*gdp == &pd_gendisk) break; if (*gdp) *gdp = (*gdp)->next; @@ -868,7 +870,7 @@ static void do_pd_request (request_queue_t * q) if (pd_busy) return; repeat: - if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return; + if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return; INIT_REQUEST; pd_dev = MINOR(CURRENT->rq_dev); @@ -890,7 +892,7 @@ repeat: pd_cmd = CURRENT->cmd; pd_run = pd_count; while ((pd_run <= cluster) && - (req = req->next) && + (req = blkdev_next_request(req)) && (pd_block+pd_run == req->sector) && (pd_cmd == req->cmd) && (pd_dev == MINOR(req->rq_dev))) @@ -922,7 +924,7 @@ static void pd_next_buf( int unit ) /* paranoia */ - if ((!CURRENT) || + if (QUEUE_EMPTY || (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index d658a0369..4e7a5aaf4 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -854,7 +854,7 @@ static void do_pf_request (request_queue_t * q) if (pf_busy) return; repeat: - if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return; + if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return; INIT_REQUEST; pf_unit = unit = DEVICE_NR(CURRENT->rq_dev); @@ -874,7 +874,7 @@ repeat: pf_cmd = CURRENT->cmd; pf_run = pf_count; while ((pf_run <= cluster) && - (req = req->next) && + (req = blkdev_next_request(req)) && (pf_block+pf_run == req->sector) && (pf_cmd == req->cmd) && (pf_unit == DEVICE_NR(req->rq_dev))) @@ -904,7 +904,7 @@ static void pf_next_buf( int unit ) /* paranoia */ - if ((!CURRENT) || + if (QUEUE_EMPTY || (CURRENT->cmd != pf_cmd) || (DEVICE_NR(CURRENT->rq_dev) != pf_unit) || (CURRENT->rq_status == RQ_INACTIVE) || diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 434fac029..b4e36726c 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -164,6 +164,7 @@ static int pg_drive_count; #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/devfs_fs_kernel.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/malloc.h> @@ -286,6 +287,8 @@ void pg_init_units( void ) } } +static devfs_handle_t devfs_handle = NULL; + int pg_init (void) /* preliminary initialisation */ { int unit; @@ -296,14 +299,17 @@ int pg_init (void) /* preliminary initialisation */ if (pg_detect()) return -1; - if (register_chrdev(major,name,&pg_fops)) { + if (devfs_register_chrdev(major,name,&pg_fops)) { printk("pg_init: unable to get major number %d\n", major); for (unit=0;unit<PG_UNITS;unit++) if (PG.present) pi_release(PI); return -1; } - + devfs_handle = devfs_mk_dir (NULL, "pg", 2, NULL); + devfs_register_series (devfs_handle, "%u", 4, DEVFS_FL_DEFAULT, + major, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0, + &pg_fops, NULL); return 0; } @@ -332,7 +338,8 @@ void cleanup_module(void) { int unit; - unregister_chrdev(major,name); + devfs_unregister (devfs_handle); + devfs_unregister_chrdev(major,name); for (unit=0;unit<PG_UNITS;unit++) if (PG.present) pi_release(PI); diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index ba24c9956..459ef7237 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -143,6 +143,7 @@ static int pt_drive_count; #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/devfs_fs_kernel.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/malloc.h> @@ -290,6 +291,8 @@ void pt_init_units( void ) } } +static devfs_handle_t devfs_handle = NULL; + int pt_init (void) /* preliminary initialisation */ { int unit; @@ -300,7 +303,7 @@ int pt_init (void) /* preliminary initialisation */ if (pt_detect()) return -1; - if (register_chrdev(major,name,&pt_fops)) { + if (devfs_register_chrdev(major,name,&pt_fops)) { printk("pt_init: unable to get major number %d\n", major); for (unit=0;unit<PT_UNITS;unit++) @@ -308,6 +311,13 @@ int pt_init (void) /* preliminary initialisation */ return -1; } + devfs_handle = devfs_mk_dir (NULL, "pt", 2, NULL); + devfs_register_series (devfs_handle, "%u", 4, DEVFS_FL_DEFAULT, + major, 0, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0, + &pt_fops, NULL); + devfs_register_series (devfs_handle, "%un", 4, DEVFS_FL_DEFAULT, + major, 128, S_IFCHR | S_IRUSR | S_IWUSR, 0, 0, + &pt_fops, NULL); return 0; } @@ -334,9 +344,10 @@ int init_module(void) void cleanup_module(void) -{ int unit; +{ int unit; - unregister_chrdev(major,name); + devfs_unregister (devfs_handle); + devfs_unregister_chrdev(major,name); for (unit=0;unit<PT_UNITS;unit++) if (PT.present) pi_release(PI); diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c index 9f68ebbfc..305c89a00 100644 --- a/drivers/block/ps2esdi.c +++ b/drivers/block/ps2esdi.c @@ -41,6 +41,7 @@ #include <linux/kernel.h> #include <linux/genhd.h> #include <linux/ps2esdi.h> +#include <linux/devfs_fs_kernel.h> #include <linux/blk.h> #include <linux/blkpg.h> #include <linux/mca.h> @@ -164,7 +165,8 @@ static struct gendisk ps2esdi_gendisk = ps2esdi_sizes, /* block sizes */ 0, /* number */ (void *) ps2esdi_info, /* internal */ - NULL /* next */ + NULL, /* next */ + &ps2esdi_fops, /* file operations */ }; /* initialization routine called by ll_rw_blk.c */ @@ -173,7 +175,7 @@ int __init ps2esdi_init(void) /* register the device - pass the name, major number and operations vector . */ - if (register_blkdev(MAJOR_NR, "ed", &ps2esdi_fops)) { + if (devfs_register_blkdev(MAJOR_NR, "ed", &ps2esdi_fops)) { printk("%s: Unable to get major number %d\n", DEVICE_NAME, MAJOR_NR); return -1; } @@ -229,7 +231,7 @@ cleanup_module(void) release_region(io_base, 4); free_dma(dma_arb_level); free_irq(PS2ESDI_IRQ, NULL) - unregister_blkdev(MAJOR_NR, "ed"); + devfs_unregister_blkdev(MAJOR_NR, "ed"); } #endif /* MODULE */ @@ -476,7 +478,7 @@ static void do_ps2esdi_request(request_queue_t * q) if (virt_to_bus(CURRENT->buffer + CURRENT->nr_sectors * 512) > 16 * MB) { printk("%s: DMA above 16MB not supported\n", DEVICE_NAME); end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(q); return; } /* check for above 16Mb dmas */ @@ -510,7 +512,7 @@ static void do_ps2esdi_request(request_queue_t * q) default: printk("%s: Unknown command\n", DEVICE_NAME); end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(q); break; } /* handle different commands */ @@ -520,7 +522,7 @@ static void do_ps2esdi_request(request_queue_t * q) printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives, CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects); end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(q); } @@ -591,7 +593,7 @@ static void ps2esdi_readwrite(int cmd, u_char drive, u_int block, u_int count) return do_ps2esdi_request(NULL); else { end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(NULL); } } @@ -894,7 +896,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code) do_ps2esdi_request(NULL); else { end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(NULL); } break; @@ -940,7 +942,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code) do_ps2esdi_request(NULL); else { end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(NULL); } break; @@ -950,7 +952,7 @@ static void ps2esdi_normal_interrupt_handler(u_int int_ret_code) outb((int_ret_code & 0xe0) | ATT_EOI, ESDI_ATTN); outb(CTRL_ENABLE_INTR, ESDI_CONTROL); end_request(FAIL); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(NULL); break; @@ -986,7 +988,7 @@ static void ps2esdi_continue_request(void) do_ps2esdi_request(NULL); } else { end_request(SUCCES); - if (CURRENT) + if (!QUEUE_EMPTY) do_ps2esdi_request(NULL); } } diff --git a/drivers/block/raid1.c b/drivers/block/raid1.c index 6671f83e8..057be0d64 100644 --- a/drivers/block/raid1.c +++ b/drivers/block/raid1.c @@ -211,7 +211,11 @@ raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) while (!( /* FIXME: now we are rather fault tolerant than nice */ r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL) ) ) + { printk ("raid1_make_request(#1): out of memory\n"); + current->policy |= SCHED_YIELD; + schedule(); + } memset (r1_bh, 0, sizeof (struct raid1_bh)); /* @@ -298,7 +302,11 @@ raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) while (!( /* FIXME: now we are rather fault tolerant than nice */ mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL) ) ) + { printk ("raid1_make_request(#2): out of memory\n"); + current->policy |= SCHED_YIELD; + schedule(); + } memset (mirror_bh[i], 0, sizeof (struct buffer_head)); /* @@ -710,7 +718,11 @@ static int raid1_run (int minor, struct md_dev *mddev) while (!( /* FIXME: now we are rather fault tolerant than nice */ mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL) ) ) + { printk ("raid1_run(): out of memory\n"); + current->policy |= SCHED_YIELD; + schedule(); + } raid_conf = mddev->private; memset(raid_conf, 0, sizeof(*raid_conf)); diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 17a745d5b..19f485df0 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -58,6 +58,7 @@ #include <linux/fd.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/devfs_fs_kernel.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -97,6 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static devfs_handle_t devfs_handle = NULL; /* * Parameters for the boot-loading of the RAM disk. These are set by @@ -180,6 +182,8 @@ __setup("ramdisk_size=", ramdisk_size2); * deleted, and make that my Ramdisk. If the request is outside of the * allocated size, we must get rid of it... * + * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support + * */ static void rd_request(request_queue_t * q) { @@ -362,6 +366,7 @@ static int rd_open(struct inode * inode, struct file * filp) if (DEVICE_NR(inode->i_rdev) >= NUM_RAMDISKS) return -ENXIO; + filp->f_op = &def_blk_fops; MOD_INC_USE_COUNT; return 0; @@ -387,6 +392,7 @@ static void __exit rd_cleanup (void) for (i = 0 ; i < NUM_RAMDISKS; i++) destroy_buffers(MKDEV(MAJOR_NR, i)); + devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); } @@ -418,6 +424,11 @@ int __init rd_init (void) rd_blocksizes[i] = rd_blocksize; rd_kbsize[i] = rd_size; } + devfs_handle = devfs_mk_dir (NULL, "rd", 0, NULL); + devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS, + DEVFS_FL_DEFAULT, MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR, 0, 0, + &fd_fops, NULL); hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ @@ -560,7 +571,7 @@ done: */ static void __init rd_load_image(kdev_t device, int offset, int unit) { - struct inode inode, out_inode; + struct inode *inode, *out_inode; struct file infile, outfile; struct dentry in_dentry, out_dentry; mm_segment_t fs; @@ -574,25 +585,27 @@ static void __init rd_load_image(kdev_t device, int offset, int unit) ram_device = MKDEV(MAJOR_NR, unit); memset(&infile, 0, sizeof(infile)); - memset(&inode, 0, sizeof(inode)); memset(&in_dentry, 0, sizeof(in_dentry)); - inode.i_rdev = device; - init_waitqueue_head(&inode.i_wait); + inode = get_empty_inode(); + inode->i_rdev = device; + inode->i_bdev = bdget(kdev_t_to_nr(device)); infile.f_mode = 1; /* read only */ infile.f_dentry = &in_dentry; - in_dentry.d_inode = &inode; + in_dentry.d_inode = inode; memset(&outfile, 0, sizeof(outfile)); - memset(&out_inode, 0, sizeof(out_inode)); memset(&out_dentry, 0, sizeof(out_dentry)); - out_inode.i_rdev = ram_device; - init_waitqueue_head(&out_inode.i_wait); + out_inode = get_empty_inode(); + out_inode->i_rdev = ram_device; + out_inode->i_bdev = bdget(kdev_t_to_nr(ram_device)); outfile.f_mode = 3; /* read/write */ outfile.f_dentry = &out_dentry; - out_dentry.d_inode = &out_inode; + out_dentry.d_inode = out_inode; - if (blkdev_open(&inode, &infile) != 0) return; - if (blkdev_open(&out_inode, &outfile) != 0) return; + if (blkdev_open(inode, &infile) != 0) + goto free_inodes; + if (blkdev_open(out_inode, &outfile) != 0) + goto free_inodes; fs = get_fs(); set_fs(KERNEL_DS); @@ -655,10 +668,10 @@ static void __init rd_load_image(kdev_t device, int offset, int unit) rotate = 0; invalidate_buffers(device); if (infile.f_op->release) - infile.f_op->release(&inode, &infile); + infile.f_op->release(inode, &infile); printk("Please insert disk #%d and press ENTER\n", i/devblocks+1); wait_for_keypress(); - if (blkdev_open(&inode, &infile) != 0) { + if (blkdev_open(inode, &infile) != 0) { printk("Error opening disk.\n"); goto done; } @@ -678,11 +691,15 @@ static void __init rd_load_image(kdev_t device, int offset, int unit) successful_load: invalidate_buffers(device); ROOT_DEV = MKDEV(MAJOR_NR, unit); + if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0"); done: if (infile.f_op->release) - infile.f_op->release(&inode, &infile); + infile.f_op->release(inode, &infile); set_fs(fs); +free_inodes: + iput(inode); + iput(out_inode); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 911bafe23..f38e10209 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -305,7 +305,7 @@ static void start_request(struct floppy_state *fs) wake_up(&fs->wait); return; } - while (CURRENT && fs->state == idle) { + while (!QUEUE_EMPTY && fs->state == idle) { if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); if (CURRENT->bh && !buffer_locked(CURRENT->bh)) diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c index 467cda26d..d37059d38 100644 --- a/drivers/block/swim_iop.c +++ b/drivers/block/swim_iop.c @@ -550,7 +550,7 @@ static void start_request(struct floppy_state *fs) wake_up(&fs->wait); return; } - while (CURRENT && fs->state == idle) { + while (!QUEUE_EMPTY && fs->state == idle) { if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); if (CURRENT->bh && !buffer_locked(CURRENT->bh)) diff --git a/drivers/block/xd.c b/drivers/block/xd.c index b4c52d6a2..fde487ecd 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -41,6 +41,7 @@ #include <linux/hdreg.h> #include <linux/ioport.h> #include <linux/init.h> +#include <linux/devfs_fs_kernel.h> #include <asm/system.h> #include <asm/io.h> @@ -87,21 +88,8 @@ XD_INFO xd_info[XD_MAXDRIVES]; should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ #include <asm/page.h> -/* coppied from floppy.c */ -static inline int __get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,__get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, __get_order(size)) +#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) +#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) static char *xd_dma_buffer = 0; static XD_SIGNATURE xd_sigs[] __initdata = { @@ -130,6 +118,9 @@ static unsigned int xd_bases[] __initdata = static struct hd_struct xd_struct[XD_MAXDRIVES << 6]; static int xd_sizes[XD_MAXDRIVES << 6], xd_access[XD_MAXDRIVES] = { 0, 0 }; static int xd_blocksizes[XD_MAXDRIVES << 6]; + +extern struct block_device_operations xd_fops; + static struct gendisk xd_gendisk = { MAJOR_NR, /* Major number */ "xd", /* Major name */ @@ -139,7 +130,8 @@ static struct gendisk xd_gendisk = { xd_sizes, /* block sizes */ 0, /* number */ (void *) xd_info, /* internal */ - NULL /* next */ + NULL, /* next */ + &xd_fops, /* file operations */ }; static struct block_device_operations xd_fops = { open: xd_open, @@ -164,13 +156,16 @@ static struct timer_list xd_timer = { NULL, NULL, 0, 0, (timeout_fn) xd_wakeup } static volatile u_char xd_error; static int nodma = XD_DONT_USE_DMA; +static devfs_handle_t devfs_handle = NULL; + /* xd_init: register the block device number and set up pointer tables */ int __init xd_init (void) { - if (register_blkdev(MAJOR_NR,"xd",&xd_fops)) { + if (devfs_register_blkdev(MAJOR_NR,"xd",&xd_fops)) { printk("xd: Unable to get major number %d\n",MAJOR_NR); return -1; } + devfs_handle = devfs_mk_dir (NULL, xd_gendisk.major_name, 0, NULL); blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ xd_gendisk.next = gendisk_head; @@ -287,7 +282,7 @@ static void do_xd_request (request_queue_t * q) sti(); if (xdc_busy) return; - while (code = 0, CURRENT) { + while (code = 0, !QUEUE_EMPTY) { INIT_REQUEST; /* do some checking on the request structure */ if (CURRENT_DEV < xd_drives @@ -1162,7 +1157,7 @@ int init_module(void) printk(KERN_INFO "XD: Loaded as a module.\n"); if (!xd_drives) { /* no drives detected - unload module */ - unregister_blkdev(MAJOR_NR, "xd"); + devfs_unregister_blkdev(MAJOR_NR, "xd"); xd_done(); return (-1); } @@ -1174,7 +1169,7 @@ void cleanup_module(void) { int partition,dev,start; - unregister_blkdev(MAJOR_NR, "xd"); + devfs_unregister_blkdev(MAJOR_NR, "xd"); for (dev = 0; dev < xd_drives; dev++) { start = dev << xd_gendisk.minor_shift; for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { @@ -1186,6 +1181,7 @@ void cleanup_module(void) } } xd_done(); + devfs_unregister (devfs_handle); if (xd_drives) { free_irq(xd_irq, NULL); free_dma(xd_dma); |