diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-31 22:22:27 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-31 22:22:27 +0000 |
commit | 825423e4c4f18289df2393951cfd2a7a31fc0464 (patch) | |
tree | 4ad80e981c3d9effa910d2247d118d254f9a5d09 /drivers/block | |
parent | c4693dc4856ab907a5c02187a8d398861bebfc7e (diff) |
Merge with Linux 2.4.1.
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/DAC960.c | 8 | ||||
-rw-r--r-- | drivers/block/cciss.c | 1 | ||||
-rw-r--r-- | drivers/block/cpqarray.c | 158 | ||||
-rw-r--r-- | drivers/block/elevator.c | 130 | ||||
-rw-r--r-- | drivers/block/ll_rw_blk.c | 335 | ||||
-rw-r--r-- | drivers/block/paride/pd.c | 2 | ||||
-rw-r--r-- | drivers/block/paride/pf.c | 2 |
7 files changed, 348 insertions, 288 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 5e8dc19f9..d0aac4c7d 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1820,7 +1820,6 @@ static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, Request->nr_segments < Controller->DriverScatterGatherLimit) { Request->nr_segments++; - RequestQueue->elevator.nr_segments++; return true; } return false; @@ -1844,7 +1843,6 @@ static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, Request->nr_segments < Controller->DriverScatterGatherLimit) { Request->nr_segments++; - RequestQueue->elevator.nr_segments++; return true; } return false; @@ -1864,17 +1862,12 @@ static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, DAC960_Controller_T *Controller = (DAC960_Controller_T *) RequestQueue->queuedata; int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - int SameSegment = 0; if (Request->bhtail->b_data + Request->bhtail->b_size == NextRequest->bh->b_data) - { TotalSegments--; - SameSegment = 1; - } if (TotalSegments > MaxSegments || TotalSegments > Controller->DriverScatterGatherLimit) return false; - RequestQueue->elevator.nr_segments -= SameSegment; Request->nr_segments = TotalSegments; return true; } @@ -2834,6 +2827,7 @@ static void DAC960_RequestFunction(RequestQueue_T *RequestQueue) static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { + blk_finished_io(BufferHeader->b_size >> 9); BufferHeader->b_end_io(BufferHeader, SuccessfulIO); } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 35afe2a7f..47937edcc 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1086,6 +1086,7 @@ static inline void complete_buffers( struct buffer_head *bh, int status) { xbh = bh->b_reqnext; bh->b_reqnext = NULL; + blk_finished_io(bh->b_size >> 9); bh->b_end_io(bh, status); bh = xbh; } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index afbf60ddc..7016cfff4 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -140,23 +140,7 @@ static int ida_release(struct inode *inode, struct file *filep); static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); static int ida_ctlr_ioctl(int ctlr, int dsk, ida_ioctl_t *io); -static void do_ida_request(int i); -/* - * This is a hack. This driver eats a major number for each controller, and - * sets blkdev[xxx].request_fn to each one of these so the real request - * function knows what controller its working with. - */ -#define DO_IDA_REQUEST(x) { do_ida_request(x); } - -static void do_ida_request0(request_queue_t * q) DO_IDA_REQUEST(0); -static void do_ida_request1(request_queue_t * q) DO_IDA_REQUEST(1); -static void do_ida_request2(request_queue_t * q) DO_IDA_REQUEST(2); -static void do_ida_request3(request_queue_t * q) DO_IDA_REQUEST(3); -static void do_ida_request4(request_queue_t * q) DO_IDA_REQUEST(4); -static void do_ida_request5(request_queue_t * q) DO_IDA_REQUEST(5); -static void do_ida_request6(request_queue_t * q) DO_IDA_REQUEST(6); -static void do_ida_request7(request_queue_t * q) DO_IDA_REQUEST(7); - +static void do_ida_request(request_queue_t *q); static void start_io(ctlr_info_t *h); static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); @@ -362,6 +346,47 @@ void cleanup_module(void) } #endif /* MODULE */ +static inline int cpq_new_segment(request_queue_t *q, struct request *rq, + int max_segments) +{ + if (rq->nr_segments < SG_MAX) { + rq->nr_segments++; + return 1; + } + return 0; +} + +static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, + struct buffer_head *bh, int max_segments) +{ + if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) + return 1; + return cpq_new_segment(q, rq, max_segments); +} + +static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == rq->bh->b_data) + return 1; + return cpq_new_segment(q, rq, max_segments); +} + +static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, + struct request *nxt, int max_segments) +{ + int total_segments = rq->nr_segments + nxt->nr_segments; + + if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) + total_segments--; + + if (total_segments > SG_MAX) + return 0; + + rq->nr_segments = total_segments; + return 1; +} + /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -369,12 +394,7 @@ void cleanup_module(void) */ int __init cpqarray_init(void) { - void (*request_fns[MAX_CTLR])(request_queue_t *) = { - do_ida_request0, do_ida_request1, - do_ida_request2, do_ida_request3, - do_ida_request4, do_ida_request5, - do_ida_request6, do_ida_request7, - }; + request_queue_t *q; int i,j; int num_cntlrs_reg = 0; @@ -495,16 +515,20 @@ int __init cpqarray_init(void) hba[i]->access.set_intr_mask(hba[i], FIFO_NOT_EMPTY); - ida_procinit(i); - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + i), - request_fns[i]); - blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR + i), 0); + q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); + q->queuedata = hba[i]; + blk_init_queue(q, do_ida_request); + blk_queue_headactive(q, 0); blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; + q->back_merge_fn = cpq_back_merge_fn; + q->front_merge_fn = cpq_front_merge_fn; + q->merge_requests_fn = cpq_merge_requests_fn; + ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -872,37 +896,30 @@ static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c) * are in here (either via the dummy do_ida_request functions or by being * called from the interrupt handler */ -static void do_ida_request(int ctlr) +static void do_ida_request(request_queue_t *q) { - ctlr_info_t *h = hba[ctlr]; + ctlr_info_t *h = q->queuedata; cmdlist_t *c; int seg, sect; char *lastdataend; - struct list_head * queue_head; + struct list_head * queue_head = &q->queue_head; struct buffer_head *bh; struct request *creq; - queue_head = &blk_dev[MAJOR_NR+ctlr].request_queue.queue_head; - - if (list_empty(queue_head)) - { + if (q->plugged || list_empty(queue_head)) { start_io(h); return; } creq = blkdev_entry_next_request(queue_head); - if (creq->rq_status == RQ_INACTIVE) - { - start_io(h); - return; - } - + if (creq->nr_segments > SG_MAX) + BUG(); - if (ctlr != MAJOR(creq->rq_dev)-MAJOR_NR || - ctlr > nr_ctlr || h == NULL) + if (h->ctlr != MAJOR(creq->rq_dev)-MAJOR_NR || h->ctlr > nr_ctlr) { printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", - ctlr, creq->rq_dev, creq); + h->ctlr, creq->rq_dev, creq); + blkdev_dequeue_request(creq); complete_buffers(creq->bh, 0); start_io(h); return; @@ -916,12 +933,12 @@ static void do_ida_request(int ctlr) bh = creq->bh; - c->ctlr = ctlr; + c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector; + c->req.hdr.blk = ida[(h->ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector; c->bh = bh; DBGPX( if (bh == NULL) @@ -933,21 +950,16 @@ DBGPX( sect = 0; while(bh) { sect += bh->b_size/512; -DBGPX( - if (bh->b_size % 512) { - printk("Oh damn. %d+%d, size = %d\n", creq->sector, sect, bh->b_size); - panic("b_size %% 512 != 0"); - } -); if (bh->b_data == lastdataend) { c->req.sg[seg-1].size += bh->b_size; lastdataend += bh->b_size; } else { + if (seg == SG_MAX) + BUG(); c->req.sg[seg].size = bh->b_size; c->req.sg[seg].addr = (__u32)virt_to_bus(bh->b_data); lastdataend = bh->b_data + bh->b_size; - if (++seg == SG_MAX) - break; + seg++; } bh = bh->b_reqnext; } @@ -955,30 +967,24 @@ DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); c->req.hdr.sg_cnt = seg; c->req.hdr.blk_cnt = sect; - creq->sector += sect; - creq->nr_sectors -= sect; - - /* Ready the next request: - * Fix up creq if we still have more buffers in the buffer chain, or - * mark this request as done and ready the next one. + /* + * Since we control our own merging, we know that this request + * is now fully setup and there's nothing left. */ - if (creq->nr_sectors) { -DBGPX( - if (bh==NULL) { - printk("sector=%d, nr_sectors=%d, sect=%d, seg=%d\n", - creq->sector, creq->nr_sectors, sect, seg); - panic("mother..."); - } -); - creq->bh = bh->b_reqnext; - bh->b_reqnext = NULL; -DBGPX( printk("More to do on same request %p\n", creq); ); - } else { -DBGPX( printk("Done with %p\n", creq); ); - blkdev_dequeue_request(creq); - end_that_request_last(creq); + if (creq->nr_sectors != sect) { + printk("ida: %ld != %d sectors\n", creq->nr_sectors, sect); + BUG(); } + blkdev_dequeue_request(creq); + + /* + * ehh, we can't really end the request here since it's not + * even started yet. for now it shouldn't hurt though + */ +DBGPX( printk("Done with %p\n", creq); ); + end_that_request_last(creq); + c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE; c->type = CMD_RWREQ; @@ -1025,6 +1031,7 @@ static inline void complete_buffers(struct buffer_head *bh, int ok) xbh = bh->b_reqnext; bh->b_reqnext = NULL; + blk_finished_io(bh->b_size >> 9); bh->b_end_io(bh, ok); bh = xbh; @@ -1072,7 +1079,6 @@ static void do_ida_intr(int irq, void *dev_id, struct pt_regs *regs) unsigned long flags; __u32 a,a1; - istat = h->access.intr_pending(h); /* Is this interrupt for us? */ if (istat == 0) @@ -1116,7 +1122,7 @@ static void do_ida_intr(int irq, void *dev_id, struct pt_regs *regs) /* * See if we can queue up some more IO */ - do_ida_request(h->ctlr); + do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); spin_unlock_irqrestore(&io_request_lock, flags); } diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 1200773c2..9917ad055 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -24,125 +24,115 @@ #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/blk.h> +#include <linux/module.h> #include <asm/uaccess.h> -/* - * Order ascending, but only allow a request to be skipped a certain - * number of times - */ -void elevator_linus(struct request *req, elevator_t *elevator, - struct list_head *real_head, - struct list_head *head, int orig_latency) -{ - struct list_head *entry = real_head; - struct request *tmp; - - req->elevator_sequence = orig_latency; - - while ((entry = entry->prev) != head) { - tmp = blkdev_entry_to_request(entry); - if (IN_ORDER(tmp, req)) - break; - if (!tmp->elevator_sequence) - break; - tmp->elevator_sequence--; - } - list_add(&req->queue, entry); -} - int elevator_linus_merge(request_queue_t *q, struct request **req, + struct list_head * head, struct buffer_head *bh, int rw, - int *max_sectors, int *max_segments) + int max_sectors, int max_segments) { - struct list_head *entry, *head = &q->queue_head; + struct list_head *entry = &q->queue_head; unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; - entry = head; - if (q->head_active && !q->plugged) - head = head->next; - while ((entry = entry->prev) != head) { - struct request *__rq = *req = blkdev_entry_to_request(entry); + struct request *__rq = blkdev_entry_to_request(entry); + + /* + * simply "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) { + *req = __rq; + break; + } + if (__rq->sem) continue; if (__rq->cmd != rw) continue; - if (__rq->nr_sectors + count > *max_sectors) - continue; if (__rq->rq_dev != bh->b_rdev) continue; + if (__rq->nr_sectors + count > max_sectors) + continue; + if (__rq->elevator_sequence < count) + break; if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { ret = ELEVATOR_BACK_MERGE; + *req = __rq; break; - } - if (!__rq->elevator_sequence) - break; - if (__rq->sector - count == bh->b_rsector) { - __rq->elevator_sequence--; + } else if (__rq->sector - count == bh->b_rsector) { ret = ELEVATOR_FRONT_MERGE; + __rq->elevator_sequence -= count; + *req = __rq; break; - } + } else if (!*req && BHRQ_IN_ORDER(bh, __rq)) + *req = __rq; } + return ret; +} + +void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) +{ + struct list_head *entry = &req->queue, *head = &q->queue_head; + /* * second pass scan of requests that got passed over, if any */ - if (ret != ELEVATOR_NO_MERGE && *req) { - while ((entry = entry->next) != &q->queue_head) { - struct request *tmp = blkdev_entry_to_request(entry); - tmp->elevator_sequence--; - } + while ((entry = entry->next) != head) { + struct request *tmp = blkdev_entry_to_request(entry); + tmp->elevator_sequence -= count; } - - return ret; } -/* - * No request sorting, just add it to the back of the list - */ -void elevator_noop(struct request *req, elevator_t *elevator, - struct list_head *real_head, struct list_head *head, - int orig_latency) +void elevator_linus_merge_req(struct request *req, struct request *next) { - list_add_tail(&req->queue, real_head); + if (next->elevator_sequence < req->elevator_sequence) + req->elevator_sequence = next->elevator_sequence; } /* - * See if we can find a request that is buffer can be coalesced with. + * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, + struct list_head * head, struct buffer_head *bh, int rw, - int *max_sectors, int *max_segments) + int max_sectors, int max_segments) { - struct list_head *entry, *head = &q->queue_head; + struct list_head *entry; unsigned int count = bh->b_size >> 9; - if (q->head_active && !q->plugged) - head = head->next; + if (list_empty(&q->queue_head)) + return ELEVATOR_NO_MERGE; - entry = head; + entry = &q->queue_head; while ((entry = entry->prev) != head) { - struct request *__rq = *req = blkdev_entry_to_request(entry); - if (__rq->sem) - continue; + struct request *__rq = blkdev_entry_to_request(entry); + if (__rq->cmd != rw) continue; - if (__rq->nr_sectors + count > *max_sectors) - continue; if (__rq->rq_dev != bh->b_rdev) continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) + if (__rq->nr_sectors + count > max_sectors) + continue; + if (__rq->sem) + continue; + if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { + *req = __rq; return ELEVATOR_BACK_MERGE; - if (__rq->sector - count == bh->b_rsector) + } else if (__rq->sector - count == bh->b_rsector) { + *req = __rq; return ELEVATOR_FRONT_MERGE; + } } + + *req = blkdev_entry_to_request(q->queue_head.prev); return ELEVATOR_NO_MERGE; } -/* - * The noop "elevator" does not do any accounting - */ -void elevator_noop_dequeue(struct request *req) {} +void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} + +void elevator_noop_merge_req(struct request *req, struct request *next) {} int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) { diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index ef71dddc7..836a89f1e 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -19,6 +19,7 @@ #include <linux/config.h> #include <linux/locks.h> #include <linux/mm.h> +#include <linux/swap.h> #include <linux/init.h> #include <linux/smp_lock.h> @@ -38,8 +39,6 @@ extern int mac_floppy_init(void); #endif -extern int lvm_init(void); - /* * For the allocated request tables */ @@ -118,6 +117,19 @@ int * max_readahead[MAX_BLKDEV]; */ int * max_sectors[MAX_BLKDEV]; +/* + * queued sectors for all devices, used to make sure we don't fill all + * of memory with locked buffers + */ +atomic_t queued_sectors; + +/* + * high and low watermark for above + */ +static int high_queued_sectors, low_queued_sectors; +static int batch_requests, queue_nr_requests; +static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait); + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -125,7 +137,7 @@ static inline int get_max_sectors(kdev_t dev) return max_sectors[MAJOR(dev)][MINOR(dev)]; } -static inline request_queue_t *__blk_get_queue(kdev_t dev) +inline request_queue_t *__blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -153,17 +165,14 @@ request_queue_t *blk_get_queue(kdev_t dev) static int __blk_cleanup_queue(struct list_head *head) { - struct list_head *entry; struct request *rq; int i = 0; if (list_empty(head)) return 0; - entry = head->next; do { - rq = list_entry(entry, struct request, table); - entry = entry->next; + rq = list_entry(head->next, struct request, table); list_del(&rq->table); kmem_cache_free(request_cachep, rq); i++; @@ -188,10 +197,12 @@ static int __blk_cleanup_queue(struct list_head *head) **/ void blk_cleanup_queue(request_queue_t * q) { - int count = QUEUE_NR_REQUESTS; + int count = queue_nr_requests; count -= __blk_cleanup_queue(&q->request_freelist[READ]); count -= __blk_cleanup_queue(&q->request_freelist[WRITE]); + count -= __blk_cleanup_queue(&q->pending_freelist[READ]); + count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]); if (count) printk("blk_cleanup_queue: leaked requests (%d)\n", count); @@ -290,7 +301,6 @@ static inline int ll_new_segment(request_queue_t *q, struct request *req, int ma { if (req->nr_segments < max_segments) { req->nr_segments++; - q->elevator.nr_segments++; return 1; } return 0; @@ -316,18 +326,13 @@ static int ll_merge_requests_fn(request_queue_t *q, struct request *req, struct request *next, int max_segments) { int total_segments = req->nr_segments + next->nr_segments; - int same_segment; - same_segment = 0; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) { + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) total_segments--; - same_segment = 1; - } if (total_segments > max_segments) return 0; - q->elevator.nr_segments -= same_segment; req->nr_segments = total_segments; return 1; } @@ -364,7 +369,7 @@ static inline void __generic_unplug_device(request_queue_t *q) } } -static void generic_unplug_device(void *data) +void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; @@ -379,19 +384,24 @@ static void blk_init_free_list(request_queue_t *q) struct request *rq; int i; + INIT_LIST_HEAD(&q->request_freelist[READ]); + INIT_LIST_HEAD(&q->request_freelist[WRITE]); + INIT_LIST_HEAD(&q->pending_freelist[READ]); + INIT_LIST_HEAD(&q->pending_freelist[WRITE]); + q->pending_free[READ] = q->pending_free[WRITE] = 0; + /* - * Divide requests in half between read and write. This used to - * be a 2/3 advantage for reads, but now reads can steal from - * the write free list. + * Divide requests in half between read and write */ - for (i = 0; i < QUEUE_NR_REQUESTS; i++) { + for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; list_add(&rq->table, &q->request_freelist[i & 1]); } init_waitqueue_head(&q->wait_for_request); - spin_lock_init(&q->request_lock); + spin_lock_init(&q->queue_lock); } static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); @@ -426,14 +436,12 @@ static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); * blk_queue_headactive(). * * Note: - * blk_init_queue() must be paired with a blk_cleanup-queue() call + * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) { INIT_LIST_HEAD(&q->queue_head); - INIT_LIST_HEAD(&q->request_freelist[READ]); - INIT_LIST_HEAD(&q->request_freelist[WRITE]); elevator_init(&q->elevator, ELEVATOR_LINUS); blk_init_free_list(q); q->request_fn = rfn; @@ -455,7 +463,6 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) q->head_active = 1; } - #define blkdev_free_rq(list) list_entry((list)->next, struct request, table); /* * Get a free request. io_request_lock must be held and interrupts @@ -463,37 +470,16 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) */ static inline struct request *get_request(request_queue_t *q, int rw) { - struct list_head *list = &q->request_freelist[rw]; - struct request *rq; - - /* - * Reads get preferential treatment and are allowed to steal - * from the write free list if necessary. - */ - if (!list_empty(list)) { - rq = blkdev_free_rq(list); - goto got_rq; - } + struct request *rq = NULL; - /* - * if the WRITE list is non-empty, we know that rw is READ - * and that the READ list is empty. allow reads to 'steal' - * from the WRITE list. - */ - if (!list_empty(&q->request_freelist[WRITE])) { - list = &q->request_freelist[WRITE]; - rq = blkdev_free_rq(list); - goto got_rq; + if (!list_empty(&q->request_freelist[rw])) { + rq = blkdev_free_rq(&q->request_freelist[rw]); + list_del(&rq->table); + rq->rq_status = RQ_ACTIVE; + rq->special = NULL; + rq->q = q; } - return NULL; - -got_rq: - list_del(&rq->table); - rq->free_list = list; - rq->rq_status = RQ_ACTIVE; - rq->special = NULL; - rq->q = q; return rq; } @@ -581,40 +567,42 @@ inline void drive_stat_acct (kdev_t dev, int rw, /* * add-request adds a request to the linked list. - * It disables interrupts (acquires the request spinlock) so that it can muck - * with the request-lists in peace. Thus it should be called with no spinlocks - * held. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. * * By this point, req->cmd is always either READ/WRITE, never READA, * which is important for drive_stat_acct() above. */ - static inline void add_request(request_queue_t * q, struct request * req, - struct list_head *head, int lat) + struct list_head *insert_here) { int major; drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(&io_request_lock); + BUG(); + } + /* - * let selected elevator insert the request + * elevator indicated where it wants this request to be + * inserted at elevator_merge time */ - q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat); - - /* - * FIXME(eric) I don't understand why there is a need for this - * special case code. It clearly doesn't fit any more with - * the new queueing architecture, and it got added in 2.3.10. - * I am leaving this in here until I hear back from the COMPAQ - * people. - */ + list_add(&req->queue, insert_here); + major = MAJOR(req->rq_dev); - if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7) - (q->request_fn)(q); - if (major >= COMPAQ_CISS_MAJOR+0 && major <= COMPAQ_CISS_MAJOR+7) - (q->request_fn)(q); if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7) - (q->request_fn)(q); + q->request_fn(q); +} + +void inline blk_refill_freelist(request_queue_t *q, int rw) +{ + if (q->pending_free[rw]) { + list_splice(&q->pending_freelist[rw], &q->request_freelist[rw]); + INIT_LIST_HEAD(&q->pending_freelist[rw]); + q->pending_free[rw] = 0; + } } /* @@ -622,15 +610,34 @@ static inline void add_request(request_queue_t * q, struct request * req, */ void inline blkdev_release_request(struct request *req) { + request_queue_t *q = req->q; + int rw = req->cmd; + req->rq_status = RQ_INACTIVE; + req->q = NULL; /* - * Request may not have originated from ll_rw_blk + * Request may not have originated from ll_rw_blk. if not, + * asumme it has free buffers and check waiters */ - if (req->free_list) { - list_add(&req->table, req->free_list); - req->free_list = NULL; - wake_up(&req->q->wait_for_request); + if (q) { + /* + * we've released enough buffers to start I/O again + */ + if (waitqueue_active(&blk_buffers_wait) + && atomic_read(&queued_sectors) < low_queued_sectors) + wake_up(&blk_buffers_wait); + + /* + * Add to pending free list and batch wakeups + */ + list_add(&req->table, &q->pending_freelist[rw]); + + if (++q->pending_free[rw] >= batch_requests) { + int wake_up = q->pending_free[rw]; + blk_refill_freelist(q, rw); + wake_up_nr(&q->wait_for_request, wake_up); + } } } @@ -658,9 +665,10 @@ static void attempt_merge(request_queue_t * q, * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if(!(q->merge_requests_fn)(q, req, next, max_segments)) + if (!q->merge_requests_fn(q, req, next, max_segments)) return; + q->elevator.elevator_merge_req_fn(req, next); req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; @@ -699,7 +707,7 @@ static int __make_request(request_queue_t * q, int rw, int max_segments = MAX_SEGMENTS; struct request * req = NULL, *freereq = NULL; int rw_ahead, max_sectors, el_ret; - struct list_head *head; + struct list_head *head, *insert_here; int latency; elevator_t *elevator = &q->elevator; @@ -713,6 +721,7 @@ static int __make_request(request_queue_t * q, int rw, rw = READ; /* drop into READ */ case READ: case WRITE: + latency = elevator_request_latency(elevator, rw); break; default: BUG(); @@ -741,38 +750,33 @@ static int __make_request(request_queue_t * q, int rw, */ max_sectors = get_max_sectors(bh->b_rdev); - latency = elevator_request_latency(elevator, rw); - +again: + head = &q->queue_head; /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic */ -again: spin_lock_irq(&io_request_lock); - /* - * skip first entry, for devices with active queue head - */ - head = &q->queue_head; - if (q->head_active && !q->plugged) - head = head->next; - + insert_here = head->prev; if (list_empty(head)) { q->plug_device_fn(q, bh->b_rdev); /* is atomic */ goto get_rq; - } + } else if (q->head_active && !q->plugged) + head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, bh, rw, - &max_sectors, &max_segments); + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw, + max_sectors, max_segments); switch (el_ret) { case ELEVATOR_BACK_MERGE: if (!q->back_merge_fn(q, req, bh, max_segments)) break; + elevator->elevator_merge_cleanup_fn(q, req, count); req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors = req->hard_nr_sectors += count; - req->e = elevator; + blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); attempt_back_merge(q, req, max_sectors, max_segments); goto out; @@ -780,20 +784,29 @@ again: case ELEVATOR_FRONT_MERGE: if (!q->front_merge_fn(q, req, bh, max_segments)) break; + elevator->elevator_merge_cleanup_fn(q, req, count); bh->b_reqnext = req->bh; req->bh = bh; req->buffer = bh->b_data; req->current_nr_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; - req->e = elevator; + blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); attempt_front_merge(q, head, req, max_sectors, max_segments); goto out; + /* * elevator says don't/can't merge. get new request */ case ELEVATOR_NO_MERGE: + /* + * use elevator hints as to where to insert the + * request. if no hints, just add it to the back + * of the queue + */ + if (req) + insert_here = &req->queue; break; default: @@ -802,10 +815,9 @@ again: } /* - * Grab a free request from the freelist. Read first try their - * own queue - if that is empty, we steal from the write list. - * Writes must block if the write list is empty, and read aheads - * are not crucial. + * Grab a free request from the freelist - if that is empty, check + * if we are doing read ahead and abort instead of blocking for + * a free slot. */ get_rq: if (freereq) { @@ -821,6 +833,7 @@ get_rq: } /* fill up the request-info, and add it to the queue */ + req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; @@ -833,13 +846,13 @@ get_rq: req->bh = bh; req->bhtail = bh; req->rq_dev = bh->b_rdev; - req->e = elevator; - add_request(q, req, head, latency); + blk_started_io(count); + add_request(q, req, insert_here); out: - if (!q->plugged) - (q->request_fn)(q); if (freereq) blkdev_release_request(freereq); + if (!q->plugged) + q->request_fn(q); spin_unlock_irq(&io_request_lock); return 0; end_io: @@ -886,13 +899,13 @@ void generic_make_request (int rw, struct buffer_head * bh) int major = MAJOR(bh->b_rdev); request_queue_t *q; - if (!bh->b_end_io) BUG(); + if (!bh->b_end_io) + BUG(); + if (blk_size[major]) { unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1; - unsigned int sector, count; - - count = bh->b_size >> 9; - sector = bh->b_rsector; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; if (maxsector < count || maxsector - count < sector) { bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); @@ -903,7 +916,7 @@ void generic_make_request (int rw, struct buffer_head * bh) when mounting a device. */ printk(KERN_INFO "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n", + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", kdevname(bh->b_rdev), rw, (sector + count)>>1, blk_size[major][MINOR(bh->b_rdev)]); @@ -930,15 +943,13 @@ void generic_make_request (int rw, struct buffer_head * bh) buffer_IO_error(bh); break; } - - } - while (q->make_request_fn(q, rw, bh)); + } while (q->make_request_fn(q, rw, bh)); } /** * submit_bh: submit a buffer_head to the block device later for I/O - * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead) + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) * @bh: The &struct buffer_head which describes the I/O * * submit_bh() is very similar in purpose to generic_make_request(), and @@ -961,7 +972,7 @@ void submit_bh(int rw, struct buffer_head * bh) * further remap this. */ bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * (bh->b_size>>9); + bh->b_rsector = bh->b_blocknr * (bh->b_size >> 9); generic_make_request(rw, bh); @@ -1021,6 +1032,9 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) int correct_size; int i; + if (!nr) + return; + major = MAJOR(bhs[0]->b_dev); /* Determine correct block size for this device. */ @@ -1033,9 +1047,8 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { - struct buffer_head *bh; - bh = bhs[i]; - if (bh->b_size != correct_size) { + struct buffer_head *bh = bhs[i]; + if (bh->b_size % correct_size) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1051,8 +1064,17 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) } for (i = 0; i < nr; i++) { - struct buffer_head *bh; - bh = bhs[i]; + struct buffer_head *bh = bhs[i]; + + /* + * don't lock any more buffers if we are above the high + * water mark. instead start I/O on the queued stuff. + */ + if (atomic_read(&queued_sectors) >= high_queued_sectors) { + run_task_queue(&tq_disk); + wait_event(blk_buffers_wait, + atomic_read(&queued_sectors) < low_queued_sectors); + } /* Only one thread can actually submit the I/O. */ if (test_and_set_bit(BH_Lock, &bh->b_state)) @@ -1096,12 +1118,25 @@ sorry: extern int stram_device_init (void); #endif -/* - * First step of what used to be end_request + +/** + * end_that_request_first - end I/O on one buffer. + * @req: the request being processed + * @uptodate: 0 for I/O error + * @name: the name printed for an I/O error * - * 0 means continue with end_that_request_last, - * 1 means we are done - */ + * Description: + * Ends I/O on the first buffer attached to @req, and sets it up + * for the next buffer_head (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + * + * Caveat: + * Drivers implementing their own end_request handling must call + * blk_finished_io() appropriately. + **/ int end_that_request_first (struct request *req, int uptodate, char *name) { @@ -1115,6 +1150,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name) if ((bh = req->bh) != NULL) { nsect = bh->b_size >> 9; + blk_finished_io(nsect); req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); @@ -1138,19 +1174,18 @@ int end_that_request_first (struct request *req, int uptodate, char *name) void end_that_request_last(struct request *req) { - if (req->e) { - printk("end_that_request_last called with non-dequeued req\n"); - BUG(); - } if (req->sem != NULL) up(req->sem); blkdev_release_request(req); } +#define MB(kb) ((kb) << 10) + int __init blk_dev_init(void) { struct blk_dev_struct *dev; + int total_ram; request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), @@ -1165,6 +1200,44 @@ int __init blk_dev_init(void) memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); + + atomic_set(&queued_sectors, 0); + total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + + /* + * Try to keep 128MB max hysteris. If not possible, + * use half of RAM + */ + high_queued_sectors = (total_ram * 2) / 3; + low_queued_sectors = high_queued_sectors / 3; + if (high_queued_sectors - low_queued_sectors > MB(128)) + low_queued_sectors = high_queued_sectors - MB(128); + + + /* + * make it sectors (512b) + */ + high_queued_sectors <<= 1; + low_queued_sectors <<= 1; + + /* + * Scale free request slots per queue too + */ + total_ram = (total_ram + MB(32) - 1) & ~(MB(32) - 1); + if ((queue_nr_requests = total_ram >> 9) > QUEUE_NR_REQUESTS) + queue_nr_requests = QUEUE_NR_REQUESTS; + + /* + * adjust batch frees according to queue length, with upper limit + */ + if ((batch_requests = queue_nr_requests >> 3) > 32) + batch_requests = 32; + + printk("block: queued sectors max/low %dkB/%dkB, %d slots per queue\n", + high_queued_sectors / 2, + low_queued_sectors / 2, + queue_nr_requests); + #ifdef CONFIG_AMIGA_Z2RAM z2_init(); #endif @@ -1268,9 +1341,6 @@ int __init blk_dev_init(void) #ifdef CONFIG_SUN_JSFLASH jsfd_init(); #endif -#ifdef CONFIG_BLK_DEV_LVM - lvm_init(); -#endif return 0; }; @@ -1279,9 +1349,12 @@ EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); +EXPORT_SYMBOL(__blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_pluggable); EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(queued_sectors); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9b1ff3a32..172acadbe 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -392,7 +392,6 @@ static inline int pd_new_segment(request_queue_t *q, struct request *req, int ma if (req->nr_segments < max_segments) { req->nr_segments++; - q->elevator.nr_segments++; return 1; } return 0; @@ -432,7 +431,6 @@ static int pd_merge_requests_fn(request_queue_t *q, struct request *req, if (total_segments > max_segments) return 0; - q->elevator.nr_segments -= same_segment; req->nr_segments = total_segments; return 1; } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 53f6ca3df..b71703f9b 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -346,7 +346,6 @@ static inline int pf_new_segment(request_queue_t *q, struct request *req, int ma if (req->nr_segments < max_segments) { req->nr_segments++; - q->elevator.nr_segments++; return 1; } return 0; @@ -386,7 +385,6 @@ static int pf_merge_requests_fn(request_queue_t *q, struct request *req, if (total_segments > max_segments) return 0; - q->elevator.nr_segments -= same_segment; req->nr_segments = total_segments; return 1; } |