From e61220e49edf2de540661b2798f5af969671f9c2 Mon Sep 17 00:00:00 2001
From: Tatyana Brokhman <tlinder@codeaurora.org>
Date: Thu, 20 Sep 2012 10:46:10 +0300
Subject: [PATCH 1/2] block: Adding ROW scheduling algorithm

This patch adds the implementation of a new scheduling algorithm - ROW.
The policy of this algorithm is to prioritize READ requests over WRITE
as much as possible without starving the WRITE requests.

Change-Id: I4ed52ea21d43b0e7c0769b2599779a3d3869c519
Signed-off-by: Tatyana Brokhman <tlinder@codeaurora.org>
---
 Documentation/block/row-iosched.txt | 117 +++++
 block/Kconfig.iosched               |  46 ++
 block/Makefile                      |   6 +
 block/row-iosched.c                 | 683 ++++++++++++++++++++++++++++
 4 files changed, 852 insertions(+)
 create mode 100644 Documentation/block/row-iosched.txt
 create mode 100644 block/row-iosched.c

diff --git a/Documentation/block/row-iosched.txt b/Documentation/block/row-iosched.txt
new file mode 100644
index 000000000000..987bd883444d
--- /dev/null
+++ b/Documentation/block/row-iosched.txt
@@ -0,0 +1,117 @@
+Introduction
+============
+
+The ROW scheduling algorithm will be used in mobile devices as default
+block layer IO scheduling algorithm. ROW stands for "READ Over WRITE"
+which is the main requests dispatch policy of this algorithm.
+
+The ROW IO scheduler was developed with the mobile devices needs in
+mind. In mobile devices we favor user experience upon everything else,
+thus we want to give READ IO requests as much priority as possible.
+The main idea of the ROW scheduling policy is:
+If there are READ requests in pipe - dispatch them but don't starve
+the WRITE requests too much.
+
+Software description
+====================
+The requests are kept in queues according to their priority. The
+dispatching of requests is done in a Round Robin manner with a
+different slice for each queue. The dispatch quantum for a specific
+queue is defined according to the queues priority. READ queues are
+given bigger dispatch quantum than the WRITE queues, within a dispatch
+cycle.
+
+At the moment there are 6 types of queues the requests are
+distributed to:
+-	High priority READ queue
+-	High priority Synchronous WRITE queue
+-	Regular priority READ queue
+-	Regular priority Synchronous WRITE queue
+-	Regular priority WRITE queue
+-	Low priority READ queue
+
+If in a certain dispatch cycle one of the queues was empty and didn't
+use its quantum that queue will be marked as "un-served". If we're in a
+middle of a dispatch cycle dispatching from queue Y and a request
+arrives for queue X that was un-served in the previous cycle, if X's
+priority is higher than Y's, queue X will be preempted in the favor of
+queue Y. This won't mean that cycle is restarted. The "dispatched"
+counter of queue X will remain unchanged. Once queue Y uses up it's quantum
+(or there will be no more requests left on it) we'll switch back to queue X
+and allow it to finish it's quantum.
+
+For READ requests queues we allow idling in within a dispatch quantum in
+order to give the application a chance to insert more requests. Idling
+means adding some extra time for serving a certain queue even if the
+queue is empty. The idling is enabled if we identify the application is
+inserting requests in a high frequency.
+
+For idling on READ queues we use timer mechanism. When the timer expires,
+if there are requests in the scheduler we will signal the underlying driver
+(for example the MMC driver) to fetch another request for dispatch.
+
+The ROW algorithm takes the scheduling policy one step further, making
+it a bit more "user-needs oriented", by allowing the application to
+hint on the urgency of its requests. For example: even among the READ
+requests several requests may be more urgent for completion then others.
+The former will go to the High priority READ queue, that is given the
+bigger dispatch quantum than any other queue.
+
+ROW scheduler will support special services for block devices that
+supports High Priority Requests. That is, the scheduler may inform the
+device upon urgent requests using new callback make_urgent_request.
+In addition it will support rescheduling of requests that were
+interrupted. For example, if the device issues a long write request and
+a sudden high priority read interrupt pops in, the scheduler will
+inform the device about the urgent request, so the device can stop the
+current write request and serve the high priority read request. In such
+a case the device may also send back to the scheduler the reminder of
+the interrupted write request, such that the scheduler may continue
+sending high priority requests without the need to interrupt the
+ongoing write again and again. The write remainder will be sent later on
+according to the scheduler policy.
+
+Design
+======
+Existing algorithms (cfq, deadline) sort the io requests according LBA.
+When deciding on the next request to dispatch they choose the closest
+request to the current disk head position (from handling last
+dispatched request). This is done in order to reduce the disk head
+movement to a minimum.
+We feel that this functionality isn't really needed in mobile devices.
+Usually applications that write/read large chunks of data insert the
+requests in already sorted LBA order. Thus dealing with sort trees adds
+unnecessary complexity.
+
+We're planing to try this enhancement in the future to check if the
+performance is influenced by it.
+
+SMP/multi-core
+==============
+At the moment the code is acceded from 2 contexts:
+- Application context (from block/elevator layer): adding the requests.
+- Underlying driver context (for example the mmc driver thread): dispatching
+  the requests and notifying on completion.
+
+One lock is used to synchronize between the two. This lock is provided
+by the underlying driver along with the dispatch queue.
+
+Config options
+==============
+1. hp_read_quantum: dispatch quantum for the high priority READ queue
+2. rp_read_quantum: dispatch quantum for the regular priority READ queue
+3. hp_swrite_quantum: dispatch quantum for the high priority Synchronous
+   WRITE queue
+4. rp_swrite_quantum: dispatch quantum for the regular priority
+   Synchronous WRITE queue
+5. rp_write_quantum: dispatch quantum for the regular priority WRITE
+   queue
+6. lp_read_quantum: dispatch quantum for the low priority READ queue
+7. lp_swrite_quantum: dispatch quantum for the low priority Synchronous
+   WRITE queue
+8. read_idle: how long to idle on read queue in Msec (in case idling
+   is enabled on that queue).
+9. read_idle_freq: frequency of inserting READ requests that will
+   trigger idling. This is the time in Msec between inserting two READ
+   requests
+
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 2f2158e05a91..4251217c2522 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -44,6 +44,52 @@ config BFQ_CGROUP_DEBUG
 	Enable some debugging help. Currently it exports additional stat
 	files in a cgroup which can be useful for debugging.
 
+config IOSCHED_ROW
+	tristate "ROW I/O scheduler"
+	default y
+	---help---
+	  The ROW I/O scheduler gives priority to READ requests over the
+	  WRITE requests when dispatching, without starving WRITE requests.
+	  Requests are kept in priority queues. Dispatching is done in a RR
+	  manner when the dispatch quantum for each queue is calculated
+	  according to queue priority.
+	  Most suitable for mobile devices.
+
+choice
+	prompt "Default I/O scheduler"
+	default DEFAULT_CFQ
+	help
+	  Select the I/O scheduler which will be used by default for all
+	  block devices.
+
+	config DEFAULT_DEADLINE
+		bool "Deadline" if IOSCHED_DEADLINE=y
+
+	config DEFAULT_ROW
+		bool "ROW" if IOSCHED_ROW=y
+		help
+		  The ROW I/O scheduler gives priority to READ requests
+		  over the WRITE requests when dispatching, without starving
+		  WRITE requests. Requests are kept in priority queues.
+		  Dispatching is done in a RR manner when the dispatch quantum
+		  for each queue is defined according to queue priority.
+		  Most suitable for mobile devices.
+
+	config DEFAULT_CFQ
+		bool "CFQ" if IOSCHED_CFQ=y
+
+	config DEFAULT_NOOP
+		bool "No-op"
+
+endchoice
+
+config DEFAULT_IOSCHED
+	string
+	default "deadline" if DEFAULT_DEADLINE
+	default "row" if DEFAULT_ROW
+	default "cfq" if DEFAULT_CFQ
+	default "noop" if DEFAULT_NOOP
+
 endmenu
 
 endif
diff --git a/block/Makefile b/block/Makefile
index 8d841f5f986f..f09bf0d75e14 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -24,6 +24,12 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
 
+obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
+obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
+obj-$(CONFIG_IOSCHED_ROW)	+= row-iosched.o
+obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
+obj-$(CONFIG_IOSCHED_TEST)	+= test-iosched.o
+
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY_T10)	+= t10-pi.o
diff --git a/block/row-iosched.c b/block/row-iosched.c
new file mode 100644
index 000000000000..ad797387f168
--- /dev/null
+++ b/block/row-iosched.c
@@ -0,0 +1,683 @@
+/*
+ * ROW (Read Over Write) I/O scheduler.
+ *
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* See Documentation/block/row-iosched.txt */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/blktrace_api.h>
+#include <linux/jiffies.h>
+
+/*
+ * enum row_queue_prio - Priorities of the ROW queues
+ *
+ * This enum defines the priorities (and the number of queues)
+ * the requests will be disptributed to. The higher priority -
+ * the bigger is the dispatch quantum given to that queue.
+ * ROWQ_PRIO_HIGH_READ - is the higher priority queue.
+ *
+ */
+enum row_queue_prio {
+	ROWQ_PRIO_HIGH_READ = 0,
+	ROWQ_PRIO_REG_READ,
+	ROWQ_PRIO_HIGH_SWRITE,
+	ROWQ_PRIO_REG_SWRITE,
+	ROWQ_PRIO_REG_WRITE,
+	ROWQ_PRIO_LOW_READ,
+	ROWQ_PRIO_LOW_SWRITE,
+	ROWQ_MAX_PRIO,
+};
+
+/* Flags indicating whether idling is enabled on the queue */
+static const bool queue_idling_enabled[] = {
+	true,	/* ROWQ_PRIO_HIGH_READ */
+	true,	/* ROWQ_PRIO_REG_READ */
+	false,	/* ROWQ_PRIO_HIGH_SWRITE */
+	false,	/* ROWQ_PRIO_REG_SWRITE */
+	false,	/* ROWQ_PRIO_REG_WRITE */
+	false,	/* ROWQ_PRIO_LOW_READ */
+	false,	/* ROWQ_PRIO_LOW_SWRITE */
+};
+
+/* Default values for row queues quantums in each dispatch cycle */
+static const int queue_quantum[] = {
+	100,	/* ROWQ_PRIO_HIGH_READ */
+	100,	/* ROWQ_PRIO_REG_READ */
+	2,	/* ROWQ_PRIO_HIGH_SWRITE */
+	1,	/* ROWQ_PRIO_REG_SWRITE */
+	1,	/* ROWQ_PRIO_REG_WRITE */
+	1,	/* ROWQ_PRIO_LOW_READ */
+	1	/* ROWQ_PRIO_LOW_SWRITE */
+};
+
+/* Default values for idling on read queues */
+#define ROW_IDLE_TIME 50	/* 5 msec */
+#define ROW_READ_FREQ 70	/* 7 msec */
+
+/**
+ * struct rowq_idling_data -  parameters for idling on the queue
+ * @idle_trigger_time:	time (in jiffies). If a new request was
+ *			inserted before this time value, idling
+ *			will be enabled.
+ * @begin_idling:	flag indicating wether we should idle
+ *
+ */
+struct rowq_idling_data {
+	unsigned long		idle_trigger_time;
+	bool			begin_idling;
+};
+
+/**
+ * struct row_queue - requests grouping structure
+ * @rdata:		parent row_data structure
+ * @fifo:		fifo of requests
+ * @prio:		queue priority (enum row_queue_prio)
+ * @nr_dispatched:	number of requests already dispatched in
+ *			the current dispatch cycle
+ * @slice:		number of requests to dispatch in a cycle
+ * @idle_data:		data for idling on queues
+ *
+ */
+struct row_queue {
+	struct row_data		*rdata;
+	struct list_head	fifo;
+	enum row_queue_prio	prio;
+
+	unsigned int		nr_dispatched;
+	unsigned int		slice;
+
+	/* used only for READ queues */
+	struct rowq_idling_data	idle_data;
+};
+
+/**
+ * struct idling_data - data for idling on empty rqueue
+ * @idle_time:		idling duration (msec)
+ * @freq:		min time between two requests that
+ *			triger idling (msec)
+ * @idle_work:		pointer to struct delayed_work
+ *
+ */
+struct idling_data {
+	unsigned long			idle_time;
+	unsigned long			freq;
+
+	struct workqueue_struct	*idle_workqueue;
+	struct delayed_work		idle_work;
+};
+
+/**
+ * struct row_queue - Per block device rqueue structure
+ * @dispatch_queue:	dispatch rqueue
+ * @row_queues:		array of priority request queues with
+ *			dispatch quantum per rqueue
+ * @curr_queue:		index in the row_queues array of the
+ *			currently serviced rqueue
+ * @read_idle:		data for idling after READ request
+ * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
+ *			scheduler, nr_reqs[1] holds the number of all WRITE
+ *			requests in scheduler
+ * @cycle_flags:	used for marking unserved queueus
+ *
+ */
+struct row_data {
+	struct request_queue		*dispatch_queue;
+
+	struct {
+		struct row_queue	rqueue;
+		int			disp_quantum;
+	} row_queues[ROWQ_MAX_PRIO];
+
+	enum row_queue_prio		curr_queue;
+
+	struct idling_data		read_idle;
+	unsigned int			nr_reqs[2];
+
+	unsigned int			cycle_flags;
+};
+
+#define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0]))
+
+#define row_log(q, fmt, args...)   \
+	blk_add_trace_msg(q, "%s():" fmt , __func__, ##args)
+#define row_log_rowq(rdata, rowq_id, fmt, args...)		\
+	blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \
+		rowq_id, ##args)
+
+static inline void row_mark_rowq_unserved(struct row_data *rd,
+					 enum row_queue_prio qnum)
+{
+	rd->cycle_flags |= (1 << qnum);
+}
+
+static inline void row_clear_rowq_unserved(struct row_data *rd,
+					  enum row_queue_prio qnum)
+{
+	rd->cycle_flags &= ~(1 << qnum);
+}
+
+static inline int row_rowq_unserved(struct row_data *rd,
+				   enum row_queue_prio qnum)
+{
+	return rd->cycle_flags & (1 << qnum);
+}
+
+/******************** Static helper functions ***********************/
+/*
+ * kick_queue() - Wake up device driver queue thread
+ * @work:	pointer to struct work_struct
+ *
+ * This is a idling delayed work function. It's purpose is to wake up the
+ * device driver in order for it to start fetching requests.
+ *
+ */
+static void kick_queue(struct work_struct *work)
+{
+	struct delayed_work *idle_work = to_delayed_work(work);
+	struct idling_data *read_data =
+		container_of(idle_work, struct idling_data, idle_work);
+	struct row_data *rd =
+		container_of(read_data, struct row_data, read_idle);
+
+	row_log_rowq(rd, rd->curr_queue, "Performing delayed work");
+	/* Mark idling process as done */
+	rd->row_queues[rd->curr_queue].rqueue.idle_data.begin_idling = false;
+
+	if (!(rd->nr_reqs[0] + rd->nr_reqs[1]))
+		row_log(rd->dispatch_queue, "No requests in scheduler");
+	else {
+		spin_lock_irq(rd->dispatch_queue->queue_lock);
+		__blk_run_queue(rd->dispatch_queue);
+		spin_unlock_irq(rd->dispatch_queue->queue_lock);
+	}
+}
+
+/*
+ * row_restart_disp_cycle() - Restart the dispatch cycle
+ * @rd:	pointer to struct row_data
+ *
+ * This function restarts the dispatch cycle by:
+ * - Setting current queue to ROWQ_PRIO_HIGH_READ
+ * - For each queue: reset the number of requests dispatched in
+ *   the cycle
+ */
+static inline void row_restart_disp_cycle(struct row_data *rd)
+{
+	int i;
+
+	for (i = 0; i < ROWQ_MAX_PRIO; i++)
+		rd->row_queues[i].rqueue.nr_dispatched = 0;
+
+	rd->curr_queue = ROWQ_PRIO_HIGH_READ;
+	row_log(rd->dispatch_queue, "Restarting cycle");
+}
+
+static inline void row_get_next_queue(struct row_data *rd)
+{
+	rd->curr_queue++;
+	if (rd->curr_queue == ROWQ_MAX_PRIO)
+		row_restart_disp_cycle(rd);
+}
+
+/******************* Elevator callback functions *********************/
+
+/*
+ * row_add_request() - Add request to the scheduler
+ * @q:	requests queue
+ * @rq:	request to add
+ *
+ */
+static void row_add_request(struct request_queue *q,
+			    struct request *rq)
+{
+	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
+	struct row_queue *rqueue = RQ_ROWQ(rq);
+
+	list_add_tail(&rq->queuelist, &rqueue->fifo);
+	rd->nr_reqs[rq_data_dir(rq)]++;
+	rq_set_fifo_time(rq, jiffies); /* for statistics*/
+
+	if (queue_idling_enabled[rqueue->prio]) {
+		if (delayed_work_pending(&rd->read_idle.idle_work))
+			(void)cancel_delayed_work(
+				&rd->read_idle.idle_work);
+		if (time_before(jiffies, rqueue->idle_data.idle_trigger_time)) {
+			rqueue->idle_data.begin_idling = true;
+			row_log_rowq(rd, rqueue->prio, "Enable idling");
+		} else
+			rqueue->idle_data.begin_idling = false;
+
+		rqueue->idle_data.idle_trigger_time =
+			jiffies + msecs_to_jiffies(rd->read_idle.freq);
+	}
+	row_log_rowq(rd, rqueue->prio, "added request");
+}
+
+/*
+ * row_remove_request() -  Remove given request from scheduler
+ * @q:	requests queue
+ * @rq:	request to remove
+ *
+ */
+static void row_remove_request(struct request_queue *q,
+			       struct request *rq)
+{
+	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
+
+	rq_fifo_clear(rq);
+	rd->nr_reqs[rq_data_dir(rq)]--;
+}
+
+/*
+ * row_dispatch_insert() - move request to dispatch queue
+ * @rd:	pointer to struct row_data
+ *
+ * This function moves the next request to dispatch from
+ * rd->curr_queue to the dispatch queue
+ *
+ */
+static void row_dispatch_insert(struct row_data *rd)
+{
+	struct request *rq;
+
+	rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].rqueue.fifo.next);
+	row_remove_request(rd->dispatch_queue, rq);
+	elv_dispatch_add_tail(rd->dispatch_queue, rq);
+	rd->row_queues[rd->curr_queue].rqueue.nr_dispatched++;
+	row_clear_rowq_unserved(rd, rd->curr_queue);
+	row_log_rowq(rd, rd->curr_queue, " Dispatched request nr_disp = %d",
+		     rd->row_queues[rd->curr_queue].rqueue.nr_dispatched);
+}
+
+/*
+ * row_choose_queue() -  choose the next queue to dispatch from
+ * @rd:	pointer to struct row_data
+ *
+ * Updates rd->curr_queue. Returns 1 if there are requests to
+ * dispatch, 0 if there are no requests in scheduler
+ *
+ */
+static int row_choose_queue(struct row_data *rd)
+{
+	int prev_curr_queue = rd->curr_queue;
+
+	if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) {
+		row_log(rd->dispatch_queue, "No more requests in scheduler");
+		return 0;
+	}
+
+	row_get_next_queue(rd);
+
+	/*
+	 * Loop over all queues to find the next queue that is not empty.
+	 * Stop when you get back to curr_queue
+	 */
+	while (list_empty(&rd->row_queues[rd->curr_queue].rqueue.fifo)
+	       && rd->curr_queue != prev_curr_queue) {
+		/* Mark rqueue as unserved */
+		row_mark_rowq_unserved(rd, rd->curr_queue);
+		row_get_next_queue(rd);
+	}
+
+	return 1;
+}
+
+/*
+ * row_dispatch_requests() - selects the next request to dispatch
+ * @q:		requests queue
+ * @force:	ignored
+ *
+ * Return 0 if no requests were moved to the dispatch queue.
+ *	  1 otherwise
+ *
+ */
+static int row_dispatch_requests(struct request_queue *q, int force)
+{
+	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
+	int ret = 0, currq, i;
+
+	currq = rd->curr_queue;
+
+	/*
+	 * Find the first unserved queue (with higher priority then currq)
+	 * that is not empty
+	 */
+	for (i = 0; i < currq; i++) {
+		if (row_rowq_unserved(rd, i) &&
+		    !list_empty(&rd->row_queues[i].rqueue.fifo)) {
+			row_log_rowq(rd, currq,
+				" Preemting for unserved rowq%d", i);
+			rd->curr_queue = i;
+			row_dispatch_insert(rd);
+			ret = 1;
+			goto done;
+		}
+	}
+
+	if (rd->row_queues[currq].rqueue.nr_dispatched >=
+	    rd->row_queues[currq].disp_quantum) {
+		rd->row_queues[currq].rqueue.nr_dispatched = 0;
+		row_log_rowq(rd, currq, "Expiring rqueue");
+		ret = row_choose_queue(rd);
+		if (ret)
+			row_dispatch_insert(rd);
+		goto done;
+	}
+
+	/* Dispatch from curr_queue */
+	if (list_empty(&rd->row_queues[currq].rqueue.fifo)) {
+		/* check idling */
+		if (delayed_work_pending(&rd->read_idle.idle_work)) {
+			row_log_rowq(rd, currq,
+				     "Delayed work pending. Exiting");
+			goto done;
+		}
+
+		if (queue_idling_enabled[currq] &&
+		    rd->row_queues[currq].rqueue.idle_data.begin_idling) {
+			if (!queue_delayed_work(rd->read_idle.idle_workqueue,
+			    &rd->read_idle.idle_work,
+			    jiffies +
+			    msecs_to_jiffies(rd->read_idle.idle_time))) {
+				row_log_rowq(rd, currq,
+					     "Work already on queue!");
+				pr_err("ROW_BUG: Work already on queue!");
+			} else
+				row_log_rowq(rd, currq,
+				     "Scheduled delayed work. exiting");
+			goto done;
+		} else {
+			row_log_rowq(rd, currq,
+				     "Currq empty. Choose next queue");
+			ret = row_choose_queue(rd);
+			if (!ret)
+				goto done;
+		}
+	}
+
+	ret = 1;
+	row_dispatch_insert(rd);
+
+done:
+	return ret;
+}
+
+/*
+ * row_init_queue() - Init scheduler data structures
+ * @q:	requests queue
+ *
+ * Return pointer to struct row_data to be saved in elevator for
+ * this dispatch queue
+ *
+ */
+static int row_init_queue(struct request_queue *q)
+{
+
+	struct row_data *rdata;
+	int i;
+
+	rdata = kmalloc_node(sizeof(*rdata),
+			     GFP_KERNEL | __GFP_ZERO, q->node);
+	if (!rdata)
+		return -ENOMEM;
+
+	for (i = 0; i < ROWQ_MAX_PRIO; i++) {
+		INIT_LIST_HEAD(&rdata->row_queues[i].rqueue.fifo);
+		rdata->row_queues[i].disp_quantum = queue_quantum[i];
+		rdata->row_queues[i].rqueue.rdata = rdata;
+		rdata->row_queues[i].rqueue.prio = i;
+		rdata->row_queues[i].rqueue.idle_data.begin_idling = false;
+	}
+
+	/*
+	 * Currently idling is enabled only for READ queues. If we want to
+	 * enable it for write queues also, note that idling frequency will
+	 * be the same in both cases
+	 */
+	rdata->read_idle.idle_time = ROW_IDLE_TIME;
+	rdata->read_idle.freq = ROW_READ_FREQ;
+	rdata->read_idle.idle_workqueue = alloc_workqueue("row_idle_work",
+					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+	if (!rdata->read_idle.idle_workqueue)
+		panic("Failed to create idle workqueue\n");
+	INIT_DELAYED_WORK(&rdata->read_idle.idle_work, kick_queue);
+
+	rdata->curr_queue = ROWQ_PRIO_HIGH_READ;
+	rdata->dispatch_queue = q;
+	q->elevator->elevator_data = rdata;
+
+	rdata->nr_reqs[READ] = rdata->nr_reqs[WRITE] = 0;
+
+	return 0;
+}
+
+/*
+ * row_exit_queue() - called on unloading the RAW scheduler
+ * @e:	poiner to struct elevator_queue
+ *
+ */
+static void row_exit_queue(struct elevator_queue *e)
+{
+	struct row_data *rd = (struct row_data *)e->elevator_data;
+	int i;
+
+	for (i = 0; i < ROWQ_MAX_PRIO; i++)
+		BUG_ON(!list_empty(&rd->row_queues[i].rqueue.fifo));
+	(void)cancel_delayed_work_sync(&rd->read_idle.idle_work);
+	kfree(rd);
+}
+
+/*
+ * row_merged_requests() - Called when 2 requests are merged
+ * @q:		requests queue
+ * @rq:		request the two requests were merged into
+ * @next:	request that was merged
+ */
+static void row_merged_requests(struct request_queue *q, struct request *rq,
+				 struct request *next)
+{
+	struct row_queue   *rqueue = RQ_ROWQ(next);
+
+	list_del_init(&next->queuelist);
+
+	rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
+}
+
+/*
+ * get_queue_type() - Get queue type for a given request
+ *
+ * This is a helping function which purpose is to determine what
+ * ROW queue the given request should be added to (and
+ * dispatched from leter on)
+ *
+ * TODO: Right now only 3 queues are used REG_READ, REG_WRITE
+ * and REG_SWRITE
+ */
+static enum row_queue_prio get_queue_type(struct request *rq)
+{
+	const int data_dir = rq_data_dir(rq);
+	const bool is_sync = rq_is_sync(rq);
+
+	if (data_dir == READ)
+		return ROWQ_PRIO_REG_READ;
+	else if (is_sync)
+		return ROWQ_PRIO_REG_SWRITE;
+	else
+		return ROWQ_PRIO_REG_WRITE;
+}
+
+/*
+ * row_set_request() - Set ROW data structures associated with this request.
+ * @q:		requests queue
+ * @rq:		pointer to the request
+ * @gfp_mask:	ignored
+ *
+ */
+static int
+row_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
+		gfp_t gfp_mask)
+{
+	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	rq->elv.priv[0] =
+		(void *)(&rd->row_queues[get_queue_type(rq)]);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 0;
+}
+
+/********** Helping sysfs functions/defenitions for ROW attributes ******/
+static ssize_t row_var_show(int var, char *page)
+{
+	return snprintf(page, 100, "%d\n", var);
+}
+
+static ssize_t row_var_store(int *var, const char *page, size_t count)
+{
+	int err;
+	err = kstrtoul(page, 10, (unsigned long *)var);
+
+	return count;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct row_data *rowd = e->elevator_data;			\
+	int __data = __VAR;						\
+	if (__CONV)							\
+		__data = jiffies_to_msecs(__data);			\
+	return row_var_show(__data, (page));			\
+}
+SHOW_FUNCTION(row_hp_read_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 0);
+SHOW_FUNCTION(row_rp_read_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, 0);
+SHOW_FUNCTION(row_hp_swrite_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, 0);
+SHOW_FUNCTION(row_rp_swrite_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, 0);
+SHOW_FUNCTION(row_rp_write_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, 0);
+SHOW_FUNCTION(row_lp_read_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, 0);
+SHOW_FUNCTION(row_lp_swrite_quantum_show,
+	rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 0);
+SHOW_FUNCTION(row_read_idle_show, rowd->read_idle.idle_time, 1);
+SHOW_FUNCTION(row_read_idle_freq_show, rowd->read_idle.freq, 1);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
+static ssize_t __FUNC(struct elevator_queue *e,				\
+		const char *page, size_t count)				\
+{									\
+	struct row_data *rowd = e->elevator_data;			\
+	int __data;						\
+	int ret = row_var_store(&__data, (page), count);		\
+	if (__CONV)							\
+		__data = (int)msecs_to_jiffies(__data);			\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	*(__PTR) = __data;						\
+	return ret;							\
+}
+STORE_FUNCTION(row_hp_read_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_rp_read_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_hp_swrite_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_rp_swrite_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_rp_write_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_lp_read_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, 0,
+		INT_MAX, 0);
+STORE_FUNCTION(row_lp_swrite_quantum_store,
+		&rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 0,
+		INT_MAX, 1);
+STORE_FUNCTION(row_read_idle_store, &rowd->read_idle.idle_time, 1, INT_MAX, 1);
+STORE_FUNCTION(row_read_idle_freq_store, &rowd->read_idle.freq,
+				1, INT_MAX, 1);
+
+#undef STORE_FUNCTION
+
+#define ROW_ATTR(name) \
+	__ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \
+				      row_##name##_store)
+
+static struct elv_fs_entry row_attrs[] = {
+	ROW_ATTR(hp_read_quantum),
+	ROW_ATTR(rp_read_quantum),
+	ROW_ATTR(hp_swrite_quantum),
+	ROW_ATTR(rp_swrite_quantum),
+	ROW_ATTR(rp_write_quantum),
+	ROW_ATTR(lp_read_quantum),
+	ROW_ATTR(lp_swrite_quantum),
+	ROW_ATTR(read_idle),
+	ROW_ATTR(read_idle_freq),
+	__ATTR_NULL
+};
+
+static struct elevator_type iosched_row = {
+	.ops = {
+		.elevator_merge_req_fn		= row_merged_requests,
+		.elevator_dispatch_fn		= row_dispatch_requests,
+		.elevator_add_req_fn		= row_add_request,
+		.elevator_former_req_fn		= elv_rb_former_request,
+		.elevator_latter_req_fn		= elv_rb_latter_request,
+		.elevator_set_req_fn		= row_set_request,
+		.elevator_init_fn		= row_init_queue,
+		.elevator_exit_fn		= row_exit_queue,
+	},
+
+	.elevator_attrs = row_attrs,
+	.elevator_name = "row",
+	.elevator_owner = THIS_MODULE,
+};
+
+static int __init row_init(void)
+{
+	elv_register(&iosched_row);
+	return 0;
+}
+
+static void __exit row_exit(void)
+{
+	elv_unregister(&iosched_row);
+}
+
+module_init(row_init);
+module_exit(row_exit);
+
+MODULE_LICENSE("GPLv2");
+MODULE_DESCRIPTION("Read Over Write IO scheduler");
-- 
Gitee


From 987c1e1c53725f44379ad36d9c239df89de462b4 Mon Sep 17 00:00:00 2001
From: Eirlys <ty.eirlys@gmail.com>
Date: Thu, 29 Sep 2022 23:05:00 +0800
Subject: [PATCH 2/2] adopt row to openeuler

---
 block/row-iosched.c | 390 +++++++++++++-------------------------------
 1 file changed, 114 insertions(+), 276 deletions(-)

diff --git a/block/row-iosched.c b/block/row-iosched.c
index ad797387f168..9a7a37c8e7c6 100644
--- a/block/row-iosched.c
+++ b/block/row-iosched.c
@@ -1,20 +1,3 @@
-/*
- * ROW (Read Over Write) I/O scheduler.
- *
- * Copyright (c) 2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-/* See Documentation/block/row-iosched.txt */
-
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
@@ -26,16 +9,9 @@
 #include <linux/compiler.h>
 #include <linux/blktrace_api.h>
 #include <linux/jiffies.h>
+#include <linux/sbitmap.h>
+#include <linux/blk-mq.h>
 
-/*
- * enum row_queue_prio - Priorities of the ROW queues
- *
- * This enum defines the priorities (and the number of queues)
- * the requests will be disptributed to. The higher priority -
- * the bigger is the dispatch quantum given to that queue.
- * ROWQ_PRIO_HIGH_READ - is the higher priority queue.
- *
- */
 enum row_queue_prio {
 	ROWQ_PRIO_HIGH_READ = 0,
 	ROWQ_PRIO_REG_READ,
@@ -47,7 +23,7 @@ enum row_queue_prio {
 	ROWQ_MAX_PRIO,
 };
 
-/* Flags indicating whether idling is enabled on the queue */
+/* whether idling is enabled on the queue */
 static const bool queue_idling_enabled[] = {
 	true,	/* ROWQ_PRIO_HIGH_READ */
 	true,	/* ROWQ_PRIO_REG_READ */
@@ -73,30 +49,12 @@ static const int queue_quantum[] = {
 #define ROW_IDLE_TIME 50	/* 5 msec */
 #define ROW_READ_FREQ 70	/* 7 msec */
 
-/**
- * struct rowq_idling_data -  parameters for idling on the queue
- * @idle_trigger_time:	time (in jiffies). If a new request was
- *			inserted before this time value, idling
- *			will be enabled.
- * @begin_idling:	flag indicating wether we should idle
- *
- */
+
 struct rowq_idling_data {
-	unsigned long		idle_trigger_time;
-	bool			begin_idling;
+	unsigned long		idle_trigger_time;  
+	bool			begin_idling;  
 };
 
-/**
- * struct row_queue - requests grouping structure
- * @rdata:		parent row_data structure
- * @fifo:		fifo of requests
- * @prio:		queue priority (enum row_queue_prio)
- * @nr_dispatched:	number of requests already dispatched in
- *			the current dispatch cycle
- * @slice:		number of requests to dispatch in a cycle
- * @idle_data:		data for idling on queues
- *
- */
 struct row_queue {
 	struct row_data		*rdata;
 	struct list_head	fifo;
@@ -109,106 +67,58 @@ struct row_queue {
 	struct rowq_idling_data	idle_data;
 };
 
-/**
- * struct idling_data - data for idling on empty rqueue
- * @idle_time:		idling duration (msec)
- * @freq:		min time between two requests that
- *			triger idling (msec)
- * @idle_work:		pointer to struct delayed_work
- *
- */
 struct idling_data {
-	unsigned long			idle_time;
-	unsigned long			freq;
+	unsigned long		idle_time; 
+	unsigned long		freq; 
 
-	struct workqueue_struct	*idle_workqueue;
-	struct delayed_work		idle_work;
+	struct delayed_work	idle_work; 
 };
 
-/**
- * struct row_queue - Per block device rqueue structure
- * @dispatch_queue:	dispatch rqueue
- * @row_queues:		array of priority request queues with
- *			dispatch quantum per rqueue
- * @curr_queue:		index in the row_queues array of the
- *			currently serviced rqueue
- * @read_idle:		data for idling after READ request
- * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
- *			scheduler, nr_reqs[1] holds the number of all WRITE
- *			requests in scheduler
- * @cycle_flags:	used for marking unserved queueus
- *
- */
+
 struct row_data {
-	struct request_queue		*dispatch_queue;
+	struct list_head dispatch_queue;
 
 	struct {
 		struct row_queue	rqueue;
 		int			disp_quantum;
-	} row_queues[ROWQ_MAX_PRIO];
+	} row_queues[ROWQ_MAX_PRIO]; 
 
-	enum row_queue_prio		curr_queue;
+	enum row_queue_prio		curr_queue; 
 
-	struct idling_data		read_idle;
-	unsigned int			nr_reqs[2];
+	struct idling_data		read_idle; 
+	unsigned int			nr_reqs[2]; //0-read,1-write
 
 	unsigned int			cycle_flags;
+
+	spinlock_t lock;
 };
 
 #define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0]))
 
-#define row_log(q, fmt, args...)   \
-	blk_add_trace_msg(q, "%s():" fmt , __func__, ##args)
-#define row_log_rowq(rdata, rowq_id, fmt, args...)		\
-	blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \
-		rowq_id, ##args)
 
-static inline void row_mark_rowq_unserved(struct row_data *rd,
-					 enum row_queue_prio qnum)
-{
+static inline void row_mark_rowq_unserved(struct row_data *rd, enum row_queue_prio qnum) {
 	rd->cycle_flags |= (1 << qnum);
 }
 
-static inline void row_clear_rowq_unserved(struct row_data *rd,
-					  enum row_queue_prio qnum)
-{
+static inline void row_clear_rowq_unserved(struct row_data *rd, enum row_queue_prio qnum) {
 	rd->cycle_flags &= ~(1 << qnum);
 }
 
-static inline int row_rowq_unserved(struct row_data *rd,
-				   enum row_queue_prio qnum)
-{
+static inline int row_rowq_unserved(struct row_data *rd, enum row_queue_prio qnum) {
 	return rd->cycle_flags & (1 << qnum);
 }
 
-/******************** Static helper functions ***********************/
-/*
- * kick_queue() - Wake up device driver queue thread
- * @work:	pointer to struct work_struct
- *
- * This is a idling delayed work function. It's purpose is to wake up the
- * device driver in order for it to start fetching requests.
- *
- */
-static void kick_queue(struct work_struct *work)
-{
+
+static void kick_queue(struct work_struct *work) {
 	struct delayed_work *idle_work = to_delayed_work(work);
 	struct idling_data *read_data =
 		container_of(idle_work, struct idling_data, idle_work);
 	struct row_data *rd =
 		container_of(read_data, struct row_data, read_idle);
 
-	row_log_rowq(rd, rd->curr_queue, "Performing delayed work");
 	/* Mark idling process as done */
 	rd->row_queues[rd->curr_queue].rqueue.idle_data.begin_idling = false;
 
-	if (!(rd->nr_reqs[0] + rd->nr_reqs[1]))
-		row_log(rd->dispatch_queue, "No requests in scheduler");
-	else {
-		spin_lock_irq(rd->dispatch_queue->queue_lock);
-		__blk_run_queue(rd->dispatch_queue);
-		spin_unlock_irq(rd->dispatch_queue->queue_lock);
-	}
 }
 
 /*
@@ -220,19 +130,16 @@ static void kick_queue(struct work_struct *work)
  * - For each queue: reset the number of requests dispatched in
  *   the cycle
  */
-static inline void row_restart_disp_cycle(struct row_data *rd)
-{
+static inline void row_restart_disp_cycle(struct row_data *rd) {
 	int i;
 
 	for (i = 0; i < ROWQ_MAX_PRIO; i++)
 		rd->row_queues[i].rqueue.nr_dispatched = 0;
 
 	rd->curr_queue = ROWQ_PRIO_HIGH_READ;
-	row_log(rd->dispatch_queue, "Restarting cycle");
 }
 
-static inline void row_get_next_queue(struct row_data *rd)
-{
+static inline void row_get_next_queue(struct row_data *rd) {
 	rd->curr_queue++;
 	if (rd->curr_queue == ROWQ_MAX_PRIO)
 		row_restart_disp_cycle(rd);
@@ -240,21 +147,13 @@ static inline void row_get_next_queue(struct row_data *rd)
 
 /******************* Elevator callback functions *********************/
 
-/*
- * row_add_request() - Add request to the scheduler
- * @q:	requests queue
- * @rq:	request to add
- *
- */
-static void row_add_request(struct request_queue *q,
-			    struct request *rq)
-{
+static void row_add_request(struct request_queue *q, struct request *rq) {
 	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
 	struct row_queue *rqueue = RQ_ROWQ(rq);
 
 	list_add_tail(&rq->queuelist, &rqueue->fifo);
 	rd->nr_reqs[rq_data_dir(rq)]++;
-	rq_set_fifo_time(rq, jiffies); /* for statistics*/
+	
 
 	if (queue_idling_enabled[rqueue->prio]) {
 		if (delayed_work_pending(&rd->read_idle.idle_work))
@@ -262,66 +161,62 @@ static void row_add_request(struct request_queue *q,
 				&rd->read_idle.idle_work);
 		if (time_before(jiffies, rqueue->idle_data.idle_trigger_time)) {
 			rqueue->idle_data.begin_idling = true;
-			row_log_rowq(rd, rqueue->prio, "Enable idling");
+			
 		} else
 			rqueue->idle_data.begin_idling = false;
 
 		rqueue->idle_data.idle_trigger_time =
 			jiffies + msecs_to_jiffies(rd->read_idle.freq);
 	}
-	row_log_rowq(rd, rqueue->prio, "added request");
+	
 }
 
-/*
- * row_remove_request() -  Remove given request from scheduler
- * @q:	requests queue
- * @rq:	request to remove
- *
- */
-static void row_remove_request(struct request_queue *q,
-			       struct request *rq)
-{
+static void row_add_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool at_head) {
+
+    struct request_queue *q = hctx->queue;
+	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
+
+    spin_lock(&rd->lock);
+    while (!list_empty(list)) {
+        struct request *rq;
+
+        rq = list_first_entry(list, struct request, queuelist); // 将request取出       
+        list_del_init(&rq->queuelist);
+        row_add_request(q, rq);
+    }
+    spin_unlock(&rd->lock);
+}
+
+
+static void row_remove_request(struct request_queue *q, struct request *rq) {
 	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
 
 	rq_fifo_clear(rq);
+	list_del_init(&rq->queuelist);
 	rd->nr_reqs[rq_data_dir(rq)]--;
+	elv_rqhash_del(q, rq);
 }
 
-/*
- * row_dispatch_insert() - move request to dispatch queue
- * @rd:	pointer to struct row_data
- *
- * This function moves the next request to dispatch from
- * rd->curr_queue to the dispatch queue
- *
- */
-static void row_dispatch_insert(struct row_data *rd)
-{
-	struct request *rq;
+
+static struct request * row_dispatch_insert(struct row_data *rd) {
+	
+    struct request *rq;
 
 	rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].rqueue.fifo.next);
-	row_remove_request(rd->dispatch_queue, rq);
-	elv_dispatch_add_tail(rd->dispatch_queue, rq);
+	row_remove_request(rq->q, rq);
+	// elv_dispatch_add_tail(rd->dispatch_queue, rq);
+	list_add_tail(&rq->queuelist, &rd->dispatch_queue);
 	rd->row_queues[rd->curr_queue].rqueue.nr_dispatched++;
 	row_clear_rowq_unserved(rd, rd->curr_queue);
-	row_log_rowq(rd, rd->curr_queue, " Dispatched request nr_disp = %d",
-		     rd->row_queues[rd->curr_queue].rqueue.nr_dispatched);
+	
+    return rq;
 }
 
-/*
- * row_choose_queue() -  choose the next queue to dispatch from
- * @rd:	pointer to struct row_data
- *
- * Updates rd->curr_queue. Returns 1 if there are requests to
- * dispatch, 0 if there are no requests in scheduler
- *
- */
-static int row_choose_queue(struct row_data *rd)
-{
+static int row_choose_queue(struct row_data *rd) {
 	int prev_curr_queue = rd->curr_queue;
 
 	if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) {
-		row_log(rd->dispatch_queue, "No more requests in scheduler");
+		
 		return 0;
 	}
 
@@ -341,19 +236,12 @@ static int row_choose_queue(struct row_data *rd)
 	return 1;
 }
 
-/*
- * row_dispatch_requests() - selects the next request to dispatch
- * @q:		requests queue
- * @force:	ignored
- *
- * Return 0 if no requests were moved to the dispatch queue.
- *	  1 otherwise
- *
- */
-static int row_dispatch_requests(struct request_queue *q, int force)
-{
+static struct request *row_dispatch_requests(struct blk_mq_hw_ctx *hctx) {
+
+    struct request_queue *q = hctx->queue;
 	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
-	int ret = 0, currq, i;
+	int currq, i;
+    struct request *rq = NULL;
 
 	currq = rd->curr_queue;
 
@@ -364,11 +252,9 @@ static int row_dispatch_requests(struct request_queue *q, int force)
 	for (i = 0; i < currq; i++) {
 		if (row_rowq_unserved(rd, i) &&
 		    !list_empty(&rd->row_queues[i].rqueue.fifo)) {
-			row_log_rowq(rd, currq,
-				" Preemting for unserved rowq%d", i);
+			
 			rd->curr_queue = i;
-			row_dispatch_insert(rd);
-			ret = 1;
+			rq = row_dispatch_insert(rd);
 			goto done;
 		}
 	}
@@ -376,10 +262,9 @@ static int row_dispatch_requests(struct request_queue *q, int force)
 	if (rd->row_queues[currq].rqueue.nr_dispatched >=
 	    rd->row_queues[currq].disp_quantum) {
 		rd->row_queues[currq].rqueue.nr_dispatched = 0;
-		row_log_rowq(rd, currq, "Expiring rqueue");
-		ret = row_choose_queue(rd);
-		if (ret)
-			row_dispatch_insert(rd);
+	
+		if (row_choose_queue(rd))
+			rq = row_dispatch_insert(rd);
 		goto done;
 	}
 
@@ -387,59 +272,50 @@ static int row_dispatch_requests(struct request_queue *q, int force)
 	if (list_empty(&rd->row_queues[currq].rqueue.fifo)) {
 		/* check idling */
 		if (delayed_work_pending(&rd->read_idle.idle_work)) {
-			row_log_rowq(rd, currq,
-				     "Delayed work pending. Exiting");
+			
 			goto done;
 		}
 
 		if (queue_idling_enabled[currq] &&
 		    rd->row_queues[currq].rqueue.idle_data.begin_idling) {
-			if (!queue_delayed_work(rd->read_idle.idle_workqueue,
-			    &rd->read_idle.idle_work,
-			    jiffies +
-			    msecs_to_jiffies(rd->read_idle.idle_time))) {
-				row_log_rowq(rd, currq,
-					     "Work already on queue!");
+			if (! schedule_delayed_work(&rd->read_idle.idle_work, jiffies +
+			     msecs_to_jiffies(rd->read_idle.idle_time))) {
+				
 				pr_err("ROW_BUG: Work already on queue!");
 			} else
-				row_log_rowq(rd, currq,
-				     "Scheduled delayed work. exiting");
+				
 			goto done;
 		} else {
-			row_log_rowq(rd, currq,
-				     "Currq empty. Choose next queue");
-			ret = row_choose_queue(rd);
-			if (!ret)
+			
+			if (!row_choose_queue(rd))
 				goto done;
 		}
 	}
 
-	ret = 1;
-	row_dispatch_insert(rd);
+	rq = row_dispatch_insert(rd);
 
 done:
-	return ret;
+	return rq;
 }
 
-/*
- * row_init_queue() - Init scheduler data structures
- * @q:	requests queue
- *
- * Return pointer to struct row_data to be saved in elevator for
- * this dispatch queue
- *
- */
-static int row_init_queue(struct request_queue *q)
-{
+
+static int row_init_queue(struct request_queue *q, struct elevator_type *e) {
 
 	struct row_data *rdata;
 	int i;
+    struct elevator_queue *eq;
 
 	rdata = kmalloc_node(sizeof(*rdata),
 			     GFP_KERNEL | __GFP_ZERO, q->node);
 	if (!rdata)
 		return -ENOMEM;
 
+    eq = elevator_alloc(q, e);
+	if (!eq)
+		return -ENOMEM;
+    eq->elevator_data = rdata; // 关联row_data和elevator_queue
+
+
 	for (i = 0; i < ROWQ_MAX_PRIO; i++) {
 		INIT_LIST_HEAD(&rdata->row_queues[i].rqueue.fifo);
 		rdata->row_queues[i].disp_quantum = queue_quantum[i];
@@ -455,28 +331,22 @@ static int row_init_queue(struct request_queue *q)
 	 */
 	rdata->read_idle.idle_time = ROW_IDLE_TIME;
 	rdata->read_idle.freq = ROW_READ_FREQ;
-	rdata->read_idle.idle_workqueue = alloc_workqueue("row_idle_work",
-					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
-	if (!rdata->read_idle.idle_workqueue)
-		panic("Failed to create idle workqueue\n");
 	INIT_DELAYED_WORK(&rdata->read_idle.idle_work, kick_queue);
 
 	rdata->curr_queue = ROWQ_PRIO_HIGH_READ;
-	rdata->dispatch_queue = q;
-	q->elevator->elevator_data = rdata;
+	//rdata->dispatch_queue = q;
+	INIT_LIST_HEAD(&rdata->dispatch_queue);
+
+    spin_lock_init(&rdata->lock);
 
 	rdata->nr_reqs[READ] = rdata->nr_reqs[WRITE] = 0;
 
+    q->elevator = eq; // 关联elevator_queue和request_queue
+
 	return 0;
 }
 
-/*
- * row_exit_queue() - called on unloading the RAW scheduler
- * @e:	poiner to struct elevator_queue
- *
- */
-static void row_exit_queue(struct elevator_queue *e)
-{
+static void row_exit_queue(struct elevator_queue *e) {
 	struct row_data *rd = (struct row_data *)e->elevator_data;
 	int i;
 
@@ -486,34 +356,17 @@ static void row_exit_queue(struct elevator_queue *e)
 	kfree(rd);
 }
 
-/*
- * row_merged_requests() - Called when 2 requests are merged
- * @q:		requests queue
- * @rq:		request the two requests were merged into
- * @next:	request that was merged
- */
-static void row_merged_requests(struct request_queue *q, struct request *rq,
-				 struct request *next)
-{
-	struct row_queue   *rqueue = RQ_ROWQ(next);
+static void row_merged_requests(struct request_queue *q, struct request *rq, struct request *next) {
+	
+    struct row_queue   *rqueue = RQ_ROWQ(next);
 
 	list_del_init(&next->queuelist);
 
 	rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
 }
 
-/*
- * get_queue_type() - Get queue type for a given request
- *
- * This is a helping function which purpose is to determine what
- * ROW queue the given request should be added to (and
- * dispatched from leter on)
- *
- * TODO: Right now only 3 queues are used REG_READ, REG_WRITE
- * and REG_SWRITE
- */
-static enum row_queue_prio get_queue_type(struct request *rq)
-{
+
+static enum row_queue_prio get_queue_type(struct request *rq) {
 	const int data_dir = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
 
@@ -525,36 +378,22 @@ static enum row_queue_prio get_queue_type(struct request *rq)
 		return ROWQ_PRIO_REG_WRITE;
 }
 
-/*
- * row_set_request() - Set ROW data structures associated with this request.
- * @q:		requests queue
- * @rq:		pointer to the request
- * @gfp_mask:	ignored
- *
- */
-static int
-row_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
-		gfp_t gfp_mask)
-{
-	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
-	unsigned long flags;
+static void row_set_request(struct request *rq) {
 
-	spin_lock_irqsave(q->queue_lock, flags);
+    struct row_queue *q = RQ_ROWQ(rq);
+	struct row_data *rd = (struct row_data *)q->rdata;
+	
 	rq->elv.priv[0] =
 		(void *)(&rd->row_queues[get_queue_type(rq)]);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
 }
 
 /********** Helping sysfs functions/defenitions for ROW attributes ******/
-static ssize_t row_var_show(int var, char *page)
-{
+static ssize_t row_var_show(int var, char *page) {
 	return snprintf(page, 100, "%d\n", var);
 }
 
-static ssize_t row_var_store(int *var, const char *page, size_t count)
-{
+static ssize_t row_var_store(int *var, const char *page, size_t count) {
 	int err;
 	err = kstrtoul(page, 10, (unsigned long *)var);
 
@@ -650,16 +489,15 @@ static struct elv_fs_entry row_attrs[] = {
 
 static struct elevator_type iosched_row = {
 	.ops = {
-		.elevator_merge_req_fn		= row_merged_requests,
-		.elevator_dispatch_fn		= row_dispatch_requests,
-		.elevator_add_req_fn		= row_add_request,
-		.elevator_former_req_fn		= elv_rb_former_request,
-		.elevator_latter_req_fn		= elv_rb_latter_request,
-		.elevator_set_req_fn		= row_set_request,
-		.elevator_init_fn		= row_init_queue,
-		.elevator_exit_fn		= row_exit_queue,
+		.requests_merged	=   row_merged_requests,
+		.dispatch_request	=   row_dispatch_requests,
+		.insert_requests    =   row_add_requests,
+		.former_request	    =   elv_rb_former_request,
+		.next_request       =   elv_rb_latter_request,
+		.prepare_request	=   row_set_request,
+		.init_sched		    =   row_init_queue,
+		.exit_sched		    =   row_exit_queue,
 	},
-
 	.elevator_attrs = row_attrs,
 	.elevator_name = "row",
 	.elevator_owner = THIS_MODULE,
@@ -679,5 +517,5 @@ static void __exit row_exit(void)
 module_init(row_init);
 module_exit(row_exit);
 
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Read Over Write IO scheduler");
-- 
Gitee