Merge with Linux 2.4.1.

author: Ralf Baechle <ralf@linux-mips.org> 2001-01-31 22:22:27 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2001-01-31 22:22:27 +0000
commit: 825423e4c4f18289df2393951cfd2a7a31fc0464 (patch)
tree: 4ad80e981c3d9effa910d2247d118d254f9a5d09 /drivers/block
parent: c4693dc4856ab907a5c02187a8d398861bebfc7e (diff)
7 files changed, 348 insertions, 288 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 5e8dc19f9..d0aac4c7d 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1820,7 +1820,6 @@ static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue,
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1844,7 +1843,6 @@ static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue,
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1864,17 +1862,12 @@ static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue,
   DAC960_Controller_T *Controller =
     (DAC960_Controller_T *) RequestQueue->queuedata;
   int TotalSegments = Request->nr_segments + NextRequest->nr_segments;
-  int SameSegment = 0;
   if (Request->bhtail->b_data + Request->bhtail->b_size
       == NextRequest->bh->b_data)
-    {
       TotalSegments--;
-      SameSegment = 1;
-    }
   if (TotalSegments > MaxSegments ||
       TotalSegments > Controller->DriverScatterGatherLimit)
     return false;
-  RequestQueue->elevator.nr_segments -= SameSegment;
   Request->nr_segments = TotalSegments;
   return true;
 }
@@ -2834,6 +2827,7 @@ static void DAC960_RequestFunction(RequestQueue_T *RequestQueue)
 static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader,
 						 boolean SuccessfulIO)
 {
+  blk_finished_io(BufferHeader->b_size >> 9);
   BufferHeader->b_end_io(BufferHeader, SuccessfulIO);
 }
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 35afe2a7f..47937edcc 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1086,6 +1086,7 @@ static inline void complete_buffers( struct buffer_head *bh, int status)
 	{
 		xbh = bh->b_reqnext; 
 		bh->b_reqnext = NULL; 
+		blk_finished_io(bh->b_size >> 9);
 		bh->b_end_io(bh, status);
 		bh = xbh;
 	}
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index afbf60ddc..7016cfff4 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -140,23 +140,7 @@ static int ida_release(struct inode *inode, struct file *filep);
 static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg);
 static int ida_ctlr_ioctl(int ctlr, int dsk, ida_ioctl_t *io);
 
-static void do_ida_request(int i);
-/*
- * This is a hack.  This driver eats a major number for each controller, and
- * sets blkdev[xxx].request_fn to each one of these so the real request
- * function knows what controller its working with.
- */
-#define DO_IDA_REQUEST(x) { do_ida_request(x); }
-
-static void do_ida_request0(request_queue_t * q) DO_IDA_REQUEST(0);
-static void do_ida_request1(request_queue_t * q) DO_IDA_REQUEST(1);
-static void do_ida_request2(request_queue_t * q) DO_IDA_REQUEST(2);
-static void do_ida_request3(request_queue_t * q) DO_IDA_REQUEST(3);
-static void do_ida_request4(request_queue_t * q) DO_IDA_REQUEST(4);
-static void do_ida_request5(request_queue_t * q) DO_IDA_REQUEST(5);
-static void do_ida_request6(request_queue_t * q) DO_IDA_REQUEST(6);
-static void do_ida_request7(request_queue_t * q) DO_IDA_REQUEST(7);
-
+static void do_ida_request(request_queue_t *q);
 static void start_io(ctlr_info_t *h);
 
 static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c);
@@ -362,6 +346,47 @@ void cleanup_module(void)
 }
 #endif /* MODULE */
 
+static inline int cpq_new_segment(request_queue_t *q, struct request *rq,
+				  int max_segments)
+{
+	if (rq->nr_segments < SG_MAX) {
+		rq->nr_segments++;
+		return 1;
+	}
+	return 0;
+}
+
+static int cpq_back_merge_fn(request_queue_t *q, struct request *rq,
+			     struct buffer_head *bh, int max_segments)
+{
+	if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data)
+		return 1;
+	return cpq_new_segment(q, rq, max_segments);
+}
+
+static int cpq_front_merge_fn(request_queue_t *q, struct request *rq,
+			     struct buffer_head *bh, int max_segments)
+{
+	if (bh->b_data + bh->b_size == rq->bh->b_data)
+		return 1;
+	return cpq_new_segment(q, rq, max_segments);
+}
+
+static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq,
+				 struct request *nxt, int max_segments)
+{
+	int total_segments = rq->nr_segments + nxt->nr_segments;
+
+	if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data)
+		total_segments--;
+
+	if (total_segments > SG_MAX)
+		return 0;
+
+	rq->nr_segments = total_segments;
+	return 1;
+}
+
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -369,12 +394,7 @@ void cleanup_module(void)
  */
 int __init cpqarray_init(void)
 {
-	void (*request_fns[MAX_CTLR])(request_queue_t *) = {
-		do_ida_request0, do_ida_request1,
-		do_ida_request2, do_ida_request3,
-		do_ida_request4, do_ida_request5,
-		do_ida_request6, do_ida_request7,
-	};
+	request_queue_t *q;
 	int i,j;
 	int num_cntlrs_reg = 0;
 
@@ -495,16 +515,20 @@ int __init cpqarray_init(void)
 
 		hba[i]->access.set_intr_mask(hba[i], FIFO_NOT_EMPTY);
 
-
 		ida_procinit(i);
 
-		blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + i), 
-			request_fns[i]);		
-		blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR + i), 0);
+		q = BLK_DEFAULT_QUEUE(MAJOR_NR + i);
+		q->queuedata = hba[i];
+		blk_init_queue(q, do_ida_request);
+		blk_queue_headactive(q, 0);
 		blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256);
 		hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256);
 		read_ahead[MAJOR_NR+i] = READ_AHEAD;
 
+		q->back_merge_fn = cpq_back_merge_fn;
+		q->front_merge_fn = cpq_front_merge_fn;
+		q->merge_requests_fn = cpq_merge_requests_fn;
+
 		ida_gendisk[i].major = MAJOR_NR + i;
 		ida_gendisk[i].major_name = "ida";
 		ida_gendisk[i].minor_shift = NWD_SHIFT;
@@ -872,37 +896,30 @@ static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c)
  * are in here (either via the dummy do_ida_request functions or by being
  * called from the interrupt handler
  */
-static void do_ida_request(int ctlr)
+static void do_ida_request(request_queue_t *q)
 {
-	ctlr_info_t *h = hba[ctlr];
+	ctlr_info_t *h = q->queuedata;
 	cmdlist_t *c;
 	int seg, sect;
 	char *lastdataend;
-	struct list_head * queue_head;
+	struct list_head * queue_head = &q->queue_head;
 	struct buffer_head *bh;
 	struct request *creq;
 
-	queue_head = &blk_dev[MAJOR_NR+ctlr].request_queue.queue_head;
-
-	if (list_empty(queue_head))
-	{
+	if (q->plugged || list_empty(queue_head)) {
 		start_io(h);
 		return;
 	}
 
 	creq = blkdev_entry_next_request(queue_head);
-	if (creq->rq_status == RQ_INACTIVE)
-	{	
-                start_io(h);
-                return;
-        }
-
+	if (creq->nr_segments > SG_MAX)
+		BUG();
 
-	if (ctlr != MAJOR(creq->rq_dev)-MAJOR_NR ||
-		ctlr > nr_ctlr || h == NULL) 
+	if (h->ctlr != MAJOR(creq->rq_dev)-MAJOR_NR || h->ctlr > nr_ctlr)
 	{
 		printk(KERN_WARNING "doreq cmd for %d, %x at %p\n",
-				ctlr, creq->rq_dev, creq);
+				h->ctlr, creq->rq_dev, creq);
+		blkdev_dequeue_request(creq);
 		complete_buffers(creq->bh, 0);
 		start_io(h);
                 return;
@@ -916,12 +933,12 @@ static void do_ida_request(int ctlr)
 
 	bh = creq->bh;
 
-	c->ctlr = ctlr;
+	c->ctlr = h->ctlr;
 	c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT;
 	c->hdr.size = sizeof(rblk_t) >> 2;
 	c->size += sizeof(rblk_t);
 
-	c->req.hdr.blk = ida[(ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector;
+	c->req.hdr.blk = ida[(h->ctlr<<CTLR_SHIFT) + MINOR(creq->rq_dev)].start_sect + creq->sector;
 	c->bh = bh;
 DBGPX(
 	if (bh == NULL)
@@ -933,21 +950,16 @@ DBGPX(
 	sect = 0;
 	while(bh) {
 		sect += bh->b_size/512;
-DBGPX(
-		if (bh->b_size % 512) {
-			printk("Oh damn.  %d+%d, size = %d\n", creq->sector, sect, bh->b_size);
-			panic("b_size %% 512 != 0");
-		}
-);
 		if (bh->b_data == lastdataend) {
 			c->req.sg[seg-1].size += bh->b_size;
 			lastdataend += bh->b_size;
 		} else {
+			if (seg == SG_MAX)
+				BUG();
 			c->req.sg[seg].size = bh->b_size;
 			c->req.sg[seg].addr = (__u32)virt_to_bus(bh->b_data);
 			lastdataend = bh->b_data + bh->b_size;
-			if (++seg == SG_MAX)
-				break;
+			seg++;
 		}
 		bh = bh->b_reqnext;
 	}
@@ -955,30 +967,24 @@ DBGPX(	printk("Submitting %d sectors in %d segments\n", sect, seg); );
 	c->req.hdr.sg_cnt = seg;
 	c->req.hdr.blk_cnt = sect;
 
-	creq->sector += sect;
-	creq->nr_sectors -= sect;
-
-	/* Ready the next request:
-	 * Fix up creq if we still have more buffers in the buffer chain, or
-	 * mark this request as done and ready the next one.
+	/*
+	 * Since we control our own merging, we know that this request
+	 * is now fully setup and there's nothing left.
          */
-	if (creq->nr_sectors) {
-DBGPX(
-		if (bh==NULL) {
-			printk("sector=%d, nr_sectors=%d, sect=%d, seg=%d\n",
-				creq->sector, creq->nr_sectors, sect, seg);
-			panic("mother...");
-		}
-);
-		creq->bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-DBGPX(		printk("More to do on same request %p\n", creq); );
-	} else {
-DBGPX(		printk("Done with %p\n", creq); );
-		blkdev_dequeue_request(creq);
-		end_that_request_last(creq);
+	if (creq->nr_sectors != sect) {
+		printk("ida: %ld != %d sectors\n", creq->nr_sectors, sect);
+		BUG();
 	}
 
+	blkdev_dequeue_request(creq);
+
+	/*
+	 * ehh, we can't really end the request here since it's not
+	 * even started yet. for now it shouldn't hurt though
+	 */
+DBGPX(	printk("Done with %p\n", creq); );
+	end_that_request_last(creq);
+
 	c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE;
 	c->type = CMD_RWREQ;
 
@@ -1025,6 +1031,7 @@ static inline void complete_buffers(struct buffer_head *bh, int ok)
 		xbh = bh->b_reqnext;
 		bh->b_reqnext = NULL;
 		
+		blk_finished_io(bh->b_size >> 9);
 		bh->b_end_io(bh, ok);
 
 		bh = xbh;
@@ -1072,7 +1079,6 @@ static void do_ida_intr(int irq, void *dev_id, struct pt_regs *regs)
 	unsigned long flags;
 	__u32 a,a1;
 
-
 	istat = h->access.intr_pending(h);
 	/* Is this interrupt for us? */
 	if (istat == 0)
@@ -1116,7 +1122,7 @@ static void do_ida_intr(int irq, void *dev_id, struct pt_regs *regs)
 	/*
 	 * See if we can queue up some more IO
 	 */
-	do_ida_request(h->ctlr);
+	do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr));
 	spin_unlock_irqrestore(&io_request_lock, flags);
 }
 
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 1200773c2..9917ad055 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -24,125 +24,115 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/blk.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
-/*
- * Order ascending, but only allow a request to be skipped a certain
- * number of times
- */
-void elevator_linus(struct request *req, elevator_t *elevator,
-		    struct list_head *real_head,
-		    struct list_head *head, int orig_latency)
-{
-	struct list_head *entry = real_head;
-	struct request *tmp;
-
-	req->elevator_sequence = orig_latency;
-
-	while ((entry = entry->prev) != head) {
-		tmp = blkdev_entry_to_request(entry);
-		if (IN_ORDER(tmp, req))
-			break;
-		if (!tmp->elevator_sequence)
-			break;
-		tmp->elevator_sequence--;
-	}
-	list_add(&req->queue, entry);
-}
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
+			 struct list_head * head,
 			 struct buffer_head *bh, int rw,
-			 int *max_sectors, int *max_segments)
+			 int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry = &q->queue_head;
 	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
 
-	entry = head;
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
+		struct request *__rq = blkdev_entry_to_request(entry);
+
+		/*
+		 * simply "aging" of requests in queue
+		 */
+		if (__rq->elevator_sequence-- <= 0) {
+			*req = __rq;
+			break;
+		}
+
 		if (__rq->sem)
 			continue;
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->elevator_sequence < count)
+			break;
 		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
 			ret = ELEVATOR_BACK_MERGE;
+			*req = __rq;
 			break;
-		}
-		if (!__rq->elevator_sequence)
-			break;
-		if (__rq->sector - count == bh->b_rsector) {
-			__rq->elevator_sequence--;
+		} else if (__rq->sector - count == bh->b_rsector) {
 			ret = ELEVATOR_FRONT_MERGE;
+			__rq->elevator_sequence -= count;
+			*req = __rq;
 			break;
-		}
+		} else if (!*req && BHRQ_IN_ORDER(bh, __rq))
+			*req = __rq;
 	}
 
+	return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+	struct list_head *entry = &req->queue, *head = &q->queue_head;
+
 	/*
 	 * second pass scan of requests that got passed over, if any
 	 */
-	if (ret != ELEVATOR_NO_MERGE && *req) {
-		while ((entry = entry->next) != &q->queue_head) {
-			struct request *tmp = blkdev_entry_to_request(entry);
-			tmp->elevator_sequence--;
-		}
+	while ((entry = entry->next) != head) {
+		struct request *tmp = blkdev_entry_to_request(entry);
+		tmp->elevator_sequence -= count;
 	}
-
-	return ret;
 }
 
-/*
- * No request sorting, just add it to the back of the list
- */
-void elevator_noop(struct request *req, elevator_t *elevator,
-		   struct list_head *real_head, struct list_head *head,
-		   int orig_latency)
+void elevator_linus_merge_req(struct request *req, struct request *next)
 {
-	list_add_tail(&req->queue, real_head);
+	if (next->elevator_sequence < req->elevator_sequence)
+		req->elevator_sequence = next->elevator_sequence;
 }
 
 /*
- * See if we can find a request that is buffer can be coalesced with.
+ * See if we can find a request that this buffer can be coalesced with.
  */
 int elevator_noop_merge(request_queue_t *q, struct request **req,
+			struct list_head * head,
 			struct buffer_head *bh, int rw,
-			int *max_sectors, int *max_segments)
+			int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry;
 	unsigned int count = bh->b_size >> 9;
 
-	if (q->head_active && !q->plugged)
-		head = head->next;
+	if (list_empty(&q->queue_head))
+		return ELEVATOR_NO_MERGE;
 
-	entry = head;
+	entry = &q->queue_head;
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
-		if (__rq->sem)
-			continue;
+		struct request *__rq = blkdev_entry_to_request(entry);
+
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector)
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->sem)
+			continue;
+		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_BACK_MERGE;
-		if (__rq->sector - count == bh->b_rsector)
+		} else if (__rq->sector - count == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_FRONT_MERGE;
+		}
 	}
+
+	*req = blkdev_entry_to_request(q->queue_head.prev);
 	return ELEVATOR_NO_MERGE;
 }
 
-/*
- * The noop "elevator" does not do any accounting
- */
-void elevator_noop_dequeue(struct request *req) {}
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
 
 int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
 {
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index ef71dddc7..836a89f1e 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -19,6 +19,7 @@
 #include <linux/config.h>
 #include <linux/locks.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 
@@ -38,8 +39,6 @@
 extern int mac_floppy_init(void);
 #endif
 
-extern int lvm_init(void);
-
 /*
  * For the allocated request tables
  */
@@ -118,6 +117,19 @@ int * max_readahead[MAX_BLKDEV];
  */
 int * max_sectors[MAX_BLKDEV];
 
+/*
+ * queued sectors for all devices, used to make sure we don't fill all
+ * of memory with locked buffers
+ */
+atomic_t queued_sectors;
+
+/*
+ * high and low watermark for above
+ */
+static int high_queued_sectors, low_queued_sectors;
+static int batch_requests, queue_nr_requests;
+static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait);
+
 static inline int get_max_sectors(kdev_t dev)
 {
 	if (!max_sectors[MAJOR(dev)])
@@ -125,7 +137,7 @@ static inline int get_max_sectors(kdev_t dev)
 	return max_sectors[MAJOR(dev)][MINOR(dev)];
 }
 
-static inline request_queue_t *__blk_get_queue(kdev_t dev)
+inline request_queue_t *__blk_get_queue(kdev_t dev)
 {
 	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 
@@ -153,17 +165,14 @@ request_queue_t *blk_get_queue(kdev_t dev)
 
 static int __blk_cleanup_queue(struct list_head *head)
 {
-	struct list_head *entry;
 	struct request *rq;
 	int i = 0;
 
 	if (list_empty(head))
 		return 0;
 
-	entry = head->next;
 	do {
-		rq = list_entry(entry, struct request, table);
-		entry = entry->next;
+		rq = list_entry(head->next, struct request, table);
 		list_del(&rq->table);
 		kmem_cache_free(request_cachep, rq);
 		i++;
@@ -188,10 +197,12 @@ static int __blk_cleanup_queue(struct list_head *head)
  **/
 void blk_cleanup_queue(request_queue_t * q)
 {
-	int count = QUEUE_NR_REQUESTS;
+	int count = queue_nr_requests;
 
 	count -= __blk_cleanup_queue(&q->request_freelist[READ]);
 	count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[READ]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]);
 
 	if (count)
 		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
@@ -290,7 +301,6 @@ static inline int ll_new_segment(request_queue_t *q, struct request *req, int ma
 {
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -316,18 +326,13 @@ static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 				struct request *next, int max_segments)
 {
 	int total_segments = req->nr_segments + next->nr_segments;
-	int same_segment;
 
-	same_segment = 0;
-	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
+	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
 		total_segments--;
-		same_segment = 1;
-	}
     
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
@@ -364,7 +369,7 @@ static inline void __generic_unplug_device(request_queue_t *q)
 	}
 }
 
-static void generic_unplug_device(void *data)
+void generic_unplug_device(void *data)
 {
 	request_queue_t *q = (request_queue_t *) data;
 	unsigned long flags;
@@ -379,19 +384,24 @@ static void blk_init_free_list(request_queue_t *q)
 	struct request *rq;
 	int i;
 
+	INIT_LIST_HEAD(&q->request_freelist[READ]);
+	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
+	INIT_LIST_HEAD(&q->pending_freelist[READ]);
+	INIT_LIST_HEAD(&q->pending_freelist[WRITE]);
+	q->pending_free[READ] = q->pending_free[WRITE] = 0;
+
 	/*
-	 * Divide requests in half between read and write. This used to
-	 * be a 2/3 advantage for reads, but now reads can steal from
-	 * the write free list.
+	 * Divide requests in half between read and write
 	 */
-	for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
+	for (i = 0; i < queue_nr_requests; i++) {
 		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
+		memset(rq, 0, sizeof(struct request));
 		rq->rq_status = RQ_INACTIVE;
 		list_add(&rq->table, &q->request_freelist[i & 1]);
 	}
 
 	init_waitqueue_head(&q->wait_for_request);
-	spin_lock_init(&q->request_lock);
+	spin_lock_init(&q->queue_lock);
 }
 
 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
@@ -426,14 +436,12 @@ static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
  *    blk_queue_headactive().
  *
  * Note:
- *    blk_init_queue() must be paired with a blk_cleanup-queue() call
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 {
 	INIT_LIST_HEAD(&q->queue_head);
-	INIT_LIST_HEAD(&q->request_freelist[READ]);
-	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
 	elevator_init(&q->elevator, ELEVATOR_LINUS);
 	blk_init_free_list(q);
 	q->request_fn     	= rfn;
@@ -455,7 +463,6 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 	q->head_active    	= 1;
 }
 
-
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
 /*
  * Get a free request. io_request_lock must be held and interrupts
@@ -463,37 +470,16 @@ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
  */
 static inline struct request *get_request(request_queue_t *q, int rw)
 {
-	struct list_head *list = &q->request_freelist[rw];
-	struct request *rq;
-
-	/*
-	 * Reads get preferential treatment and are allowed to steal
-	 * from the write free list if necessary.
-	 */
-	if (!list_empty(list)) {
-		rq = blkdev_free_rq(list);
-		goto got_rq;
-	}
+	struct request *rq = NULL;
 
-	/*
-	 * if the WRITE list is non-empty, we know that rw is READ
-	 * and that the READ list is empty. allow reads to 'steal'
-	 * from the WRITE list.
-	 */
-	if (!list_empty(&q->request_freelist[WRITE])) {
-		list = &q->request_freelist[WRITE];
-		rq = blkdev_free_rq(list);
-		goto got_rq;
+	if (!list_empty(&q->request_freelist[rw])) {
+		rq = blkdev_free_rq(&q->request_freelist[rw]);
+		list_del(&rq->table);
+		rq->rq_status = RQ_ACTIVE;
+		rq->special = NULL;
+		rq->q = q;
 	}
 
-	return NULL;
-
-got_rq:
-	list_del(&rq->table);
-	rq->free_list = list;
-	rq->rq_status = RQ_ACTIVE;
-	rq->special = NULL;
-	rq->q = q;
 	return rq;
 }
 
@@ -581,40 +567,42 @@ inline void drive_stat_acct (kdev_t dev, int rw,
 
 /*
  * add-request adds a request to the linked list.
- * It disables interrupts (acquires the request spinlock) so that it can muck
- * with the request-lists in peace. Thus it should be called with no spinlocks
- * held.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
  *
  * By this point, req->cmd is always either READ/WRITE, never READA,
  * which is important for drive_stat_acct() above.
  */
-
 static inline void add_request(request_queue_t * q, struct request * req,
-			       struct list_head *head, int lat)
+			       struct list_head *insert_here)
 {
 	int major;
 
 	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
 	/*
-	 * let selected elevator insert the request
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
 	 */
-	q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
-
-        /*
-	 * FIXME(eric) I don't understand why there is a need for this
-	 * special case code.  It clearly doesn't fit any more with
-	 * the new queueing architecture, and it got added in 2.3.10.
-	 * I am leaving this in here until I hear back from the COMPAQ
-	 * people.
-         */
+	list_add(&req->queue, insert_here);
+
 	major = MAJOR(req->rq_dev);
-	if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
-		(q->request_fn)(q);
-	if (major >= COMPAQ_CISS_MAJOR+0 && major <= COMPAQ_CISS_MAJOR+7)
-                (q->request_fn)(q);
 	if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7)
-		(q->request_fn)(q);
+		q->request_fn(q);
+}
+
+void inline blk_refill_freelist(request_queue_t *q, int rw)
+{
+	if (q->pending_free[rw]) {
+		list_splice(&q->pending_freelist[rw], &q->request_freelist[rw]);
+		INIT_LIST_HEAD(&q->pending_freelist[rw]);
+		q->pending_free[rw] = 0;
+	}
 }
 
 /*
@@ -622,15 +610,34 @@ static inline void add_request(request_queue_t * q, struct request * req,
  */
 void inline blkdev_release_request(struct request *req)
 {
+	request_queue_t *q = req->q;
+	int rw = req->cmd;
+
 	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
 
 	/*
-	 * Request may not have originated from ll_rw_blk
+	 * Request may not have originated from ll_rw_blk. if not,
+	 * asumme it has free buffers and check waiters
 	 */
-	if (req->free_list) {
-		list_add(&req->table, req->free_list);
-		req->free_list = NULL;
-		wake_up(&req->q->wait_for_request);
+	if (q) {
+		/*
+		 * we've released enough buffers to start I/O again
+		 */
+		if (waitqueue_active(&blk_buffers_wait)
+		    && atomic_read(&queued_sectors) < low_queued_sectors)
+			wake_up(&blk_buffers_wait);
+
+		/*
+		 * Add to pending free list and batch wakeups
+		 */
+		list_add(&req->table, &q->pending_freelist[rw]);
+
+		if (++q->pending_free[rw] >= batch_requests) {
+			int wake_up = q->pending_free[rw];
+			blk_refill_freelist(q, rw);
+			wake_up_nr(&q->wait_for_request, wake_up);
+		}
 	}
 }
 
@@ -658,9 +665,10 @@ static void attempt_merge(request_queue_t * q,
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if(!(q->merge_requests_fn)(q, req, next, max_segments))
+	if (!q->merge_requests_fn(q, req, next, max_segments))
 		return;
 
+	q->elevator.elevator_merge_req_fn(req, next);
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
@@ -699,7 +707,7 @@ static int __make_request(request_queue_t * q, int rw,
 	int max_segments = MAX_SEGMENTS;
 	struct request * req = NULL, *freereq = NULL;
 	int rw_ahead, max_sectors, el_ret;
-	struct list_head *head;
+	struct list_head *head, *insert_here;
 	int latency;
 	elevator_t *elevator = &q->elevator;
 
@@ -713,6 +721,7 @@ static int __make_request(request_queue_t * q, int rw,
 			rw = READ;	/* drop into READ */
 		case READ:
 		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
 			break;
 		default:
 			BUG();
@@ -741,38 +750,33 @@ static int __make_request(request_queue_t * q, int rw,
 	 */
 	max_sectors = get_max_sectors(bh->b_rdev);
 
-	latency = elevator_request_latency(elevator, rw);
-
+again:
+	head = &q->queue_head;
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
 	 */
-again:
 	spin_lock_irq(&io_request_lock);
 
-	/*
-	 * skip first entry, for devices with active queue head
-	 */
-	head = &q->queue_head;
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
+	insert_here = head->prev;
 	if (list_empty(head)) {
 		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
 		goto get_rq;
-	}
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,
-					     &max_sectors, &max_segments);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,
+					     max_sectors, max_segments);
 	switch (el_ret) {
 
 		case ELEVATOR_BACK_MERGE:
 			if (!q->back_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			req->bhtail->b_reqnext = bh;
 			req->bhtail = bh;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
+			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_back_merge(q, req, max_sectors, max_segments);
 			goto out;
@@ -780,20 +784,29 @@ again:
 		case ELEVATOR_FRONT_MERGE:
 			if (!q->front_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			bh->b_reqnext = req->bh;
 			req->bh = bh;
 			req->buffer = bh->b_data;
 			req->current_nr_sectors = count;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
+			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_front_merge(q, head, req, max_sectors, max_segments);
 			goto out;
+
 		/*
 		 * elevator says don't/can't merge. get new request
 		 */
 		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
 			break;
 
 		default:
@@ -802,10 +815,9 @@ again:
 	}
 		
 	/*
-	 * Grab a free request from the freelist. Read first try their
-	 * own queue - if that is empty, we steal from the write list.
-	 * Writes must block if the write list is empty, and read aheads
-	 * are not crucial.
+	 * Grab a free request from the freelist - if that is empty, check
+	 * if we are doing read ahead and abort instead of blocking for
+	 * a free slot.
 	 */
 get_rq:
 	if (freereq) {
@@ -821,6 +833,7 @@ get_rq:
 	}
 
 /* fill up the request-info, and add it to the queue */
+	req->elevator_sequence = latency;
 	req->cmd = rw;
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
@@ -833,13 +846,13 @@ get_rq:
 	req->bh = bh;
 	req->bhtail = bh;
 	req->rq_dev = bh->b_rdev;
-	req->e = elevator;
-	add_request(q, req, head, latency);
+	blk_started_io(count);
+	add_request(q, req, insert_here);
 out:
-	if (!q->plugged)
-		(q->request_fn)(q);
 	if (freereq)
 		blkdev_release_request(freereq);
+	if (!q->plugged)
+		q->request_fn(q);
 	spin_unlock_irq(&io_request_lock);
 	return 0;
 end_io:
@@ -886,13 +899,13 @@ void generic_make_request (int rw, struct buffer_head * bh)
 	int major = MAJOR(bh->b_rdev);
 	request_queue_t *q;
 
-	if (!bh->b_end_io) BUG();
+	if (!bh->b_end_io)
+		BUG();
+
 	if (blk_size[major]) {
 		unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
-		unsigned int sector, count;
-
-		count = bh->b_size >> 9;
-		sector = bh->b_rsector;
+		unsigned long sector = bh->b_rsector;
+		unsigned int count = bh->b_size >> 9;
 
 		if (maxsector < count || maxsector - count < sector) {
 			bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
@@ -903,7 +916,7 @@ void generic_make_request (int rw, struct buffer_head * bh)
 				   when mounting a device. */
 				printk(KERN_INFO
 				       "attempt to access beyond end of device\n");
-				printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
+				printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
 				       kdevname(bh->b_rdev), rw,
 				       (sector + count)>>1,
 				       blk_size[major][MINOR(bh->b_rdev)]);
@@ -930,15 +943,13 @@ void generic_make_request (int rw, struct buffer_head * bh)
 			buffer_IO_error(bh);
 			break;
 		}
-
-	}
-	while (q->make_request_fn(q, rw, bh));
+	} while (q->make_request_fn(q, rw, bh));
 }
 
 
 /**
  * submit_bh: submit a buffer_head to the block device later for I/O
- * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead)
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bh: The &struct buffer_head which describes the I/O
  *
  * submit_bh() is very similar in purpose to generic_make_request(), and
@@ -961,7 +972,7 @@ void submit_bh(int rw, struct buffer_head * bh)
 	 * further remap this.
 	 */
 	bh->b_rdev = bh->b_dev;
-	bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
+	bh->b_rsector = bh->b_blocknr * (bh->b_size >> 9);
 
 	generic_make_request(rw, bh);
 
@@ -1021,6 +1032,9 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 	int correct_size;
 	int i;
 
+	if (!nr)
+		return;
+
 	major = MAJOR(bhs[0]->b_dev);
 
 	/* Determine correct block size for this device. */
@@ -1033,9 +1047,8 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 
 	/* Verify requested block sizes. */
 	for (i = 0; i < nr; i++) {
-		struct buffer_head *bh;
-		bh = bhs[i];
-		if (bh->b_size != correct_size) {
+		struct buffer_head *bh = bhs[i];
+		if (bh->b_size % correct_size) {
 			printk(KERN_NOTICE "ll_rw_block: device %s: "
 			       "only %d-char blocks implemented (%u)\n",
 			       kdevname(bhs[0]->b_dev),
@@ -1051,8 +1064,17 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 	}
 
 	for (i = 0; i < nr; i++) {
-		struct buffer_head *bh;
-		bh = bhs[i];
+		struct buffer_head *bh = bhs[i];
+
+		/*
+		 * don't lock any more buffers if we are above the high
+		 * water mark. instead start I/O on the queued stuff.
+		 */
+		if (atomic_read(&queued_sectors) >= high_queued_sectors) {
+			run_task_queue(&tq_disk);
+			wait_event(blk_buffers_wait,
+			 atomic_read(&queued_sectors) < low_queued_sectors);
+		}
 
 		/* Only one thread can actually submit the I/O. */
 		if (test_and_set_bit(BH_Lock, &bh->b_state))
@@ -1096,12 +1118,25 @@ sorry:
 extern int stram_device_init (void);
 #endif
 
-/*
- * First step of what used to be end_request
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
  *
- * 0 means continue with end_that_request_last,
- * 1 means we are done
- */
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
 
 int end_that_request_first (struct request *req, int uptodate, char *name)
 {
@@ -1115,6 +1150,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
 
 	if ((bh = req->bh) != NULL) {
 		nsect = bh->b_size >> 9;
+		blk_finished_io(nsect);
 		req->bh = bh->b_reqnext;
 		bh->b_reqnext = NULL;
 		bh->b_end_io(bh, uptodate);
@@ -1138,19 +1174,18 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
 
 void end_that_request_last(struct request *req)
 {
-	if (req->e) {
-		printk("end_that_request_last called with non-dequeued req\n");
-		BUG();
-	}
 	if (req->sem != NULL)
 		up(req->sem);
 
 	blkdev_release_request(req);
 }
 
+#define MB(kb)	((kb) << 10)
+
 int __init blk_dev_init(void)
 {
 	struct blk_dev_struct *dev;
+	int total_ram;
 
 	request_cachep = kmem_cache_create("blkdev_requests",
 					   sizeof(struct request),
@@ -1165,6 +1200,44 @@ int __init blk_dev_init(void)
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
 	memset(max_sectors, 0, sizeof(max_sectors));
+
+	atomic_set(&queued_sectors, 0);
+	total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
+
+	/*
+	 * Try to keep 128MB max hysteris. If not possible,
+	 * use half of RAM
+	 */
+	high_queued_sectors = (total_ram * 2) / 3;
+	low_queued_sectors = high_queued_sectors / 3;
+	if (high_queued_sectors - low_queued_sectors > MB(128))
+		low_queued_sectors = high_queued_sectors - MB(128);
+
+
+	/*
+	 * make it sectors (512b)
+	 */
+	high_queued_sectors <<= 1;
+	low_queued_sectors <<= 1;
+
+	/*
+	 * Scale free request slots per queue too
+	 */
+	total_ram = (total_ram + MB(32) - 1) & ~(MB(32) - 1);
+	if ((queue_nr_requests = total_ram >> 9) > QUEUE_NR_REQUESTS)
+		queue_nr_requests = QUEUE_NR_REQUESTS;
+
+	/*
+	 * adjust batch frees according to queue length, with upper limit
+	 */
+	if ((batch_requests = queue_nr_requests >> 3) > 32)
+		batch_requests = 32;
+
+	printk("block: queued sectors max/low %dkB/%dkB, %d slots per queue\n",
+						high_queued_sectors / 2,
+						low_queued_sectors / 2,
+						queue_nr_requests);
+
 #ifdef CONFIG_AMIGA_Z2RAM
 	z2_init();
 #endif
@@ -1268,9 +1341,6 @@ int __init blk_dev_init(void)
 #ifdef CONFIG_SUN_JSFLASH
 	jsfd_init();
 #endif
-#ifdef CONFIG_BLK_DEV_LVM
-	lvm_init();
-#endif
 	return 0;
 };
 
@@ -1279,9 +1349,12 @@ EXPORT_SYMBOL(end_that_request_first);
 EXPORT_SYMBOL(end_that_request_last);
 EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(__blk_get_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_pluggable);
 EXPORT_SYMBOL(blk_queue_make_request);
 EXPORT_SYMBOL(generic_make_request);
 EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
+EXPORT_SYMBOL(queued_sectors);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9b1ff3a32..172acadbe 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -392,7 +392,6 @@ static inline int pd_new_segment(request_queue_t *q, struct request *req, int ma
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -432,7 +431,6 @@ static int pd_merge_requests_fn(request_queue_t *q, struct request *req,
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 53f6ca3df..b71703f9b 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -346,7 +346,6 @@ static inline int pf_new_segment(request_queue_t *q, struct request *req, int ma
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -386,7 +385,6 @@ static int pf_merge_requests_fn(request_queue_t *q, struct request *req,
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
author	Ralf Baechle <ralf@linux-mips.org>	2001-01-31 22:22:27 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2001-01-31 22:22:27 +0000
commit	825423e4c4f18289df2393951cfd2a7a31fc0464 (patch)
tree	4ad80e981c3d9effa910d2247d118d254f9a5d09 /drivers/block
parent	c4693dc4856ab907a5c02187a8d398861bebfc7e (diff)