about summary refs log tree commit diff
diff options
context:
space:
mode:
authoraszlig <aszlig@redmoonstudios.org>2017-04-19 21:52:04 +0200
committeraszlig <aszlig@redmoonstudios.org>2017-04-19 21:55:19 +0200
commitea04b4fb4a7f2db3351522066b3e08a77f7d35da (patch)
tree25c4158aa4f86a9e52bb13b251db73ae7c549f8b
parent10d0f0c02e50552e9129608340ab43ec0ff50ea0 (diff)
aszlig/kernel: Fix BFQ patch for kernel 4.11-rc7
Previously in d6848012b86088cbfd70666a0cfae95c567e7199 I've just rebased
the patch I had against 4.10 against kernel 4.11, but that didn't work
out so well.

So this is now a rebase against the new branch from Paolo Valente at:

https://github.com/linusw/linux-bfq/tree/bfq-v8

Hopefully this time it will compile ;-)

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
-rw-r--r--modules/user/aszlig/system/bfq.patch398
1 files changed, 257 insertions, 141 deletions
diff --git a/modules/user/aszlig/system/bfq.patch b/modules/user/aszlig/system/bfq.patch
index 6e3fe250..54f82499 100644
--- a/modules/user/aszlig/system/bfq.patch
+++ b/modules/user/aszlig/system/bfq.patch
@@ -602,23 +602,23 @@ index 58fc8684788d..99a42261677a 100644
  
  config MQ_IOSCHED_DEADLINE
 diff --git a/block/Makefile b/block/Makefile
-index 081bb680789b..6defc8b5dca0 100644
+index 081bb680789b..91869f2ef2dc 100644
 --- a/block/Makefile
 +++ b/block/Makefile
-@@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
- obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
+@@ -20,6 +20,7 @@ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
  obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
  obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
-+obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
  obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
++obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
  
  obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
+ obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
 diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
 new file mode 100644
-index 000000000000..c59227288ae0
+index 000000000000..39daaf405dc6
 --- /dev/null
 +++ b/block/bfq-cgroup.c
-@@ -0,0 +1,1194 @@
+@@ -0,0 +1,1190 @@
 +/*
 + * BFQ: CGROUPS support.
 + *
@@ -1390,7 +1390,6 @@ index 000000000000..c59227288ae0
 +
 +	__bfq_deactivate_entity(entity, false);
 +	bfq_put_async_queues(bfqd, bfqg);
-+	BUG_ON(entity->tree);
 +
 +	/*
 +	 * @blkg is going offline and will be ignored by
@@ -1759,6 +1758,9 @@ index 000000000000..c59227288ae0
 +static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
 +static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
 +
++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  struct bfq_group *bfqg) {}
++
 +static void bfq_init_entity(struct bfq_entity *entity,
 +			    struct bfq_group *bfqg)
 +{
@@ -1773,13 +1775,7 @@ index 000000000000..c59227288ae0
 +	entity->sched_data = &bfqg->sched_data;
 +}
 +
-+static struct bfq_group *
-+bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
-+{
-+	struct bfq_data *bfqd = bic_to_bfqd(bic);
-+
-+	return bfqd->root_group;
-+}
++static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
 +
 +static void bfq_end_wr_async(struct bfq_data *bfqd)
 +{
@@ -1857,10 +1853,10 @@ index 000000000000..fb7bb8f08b75
 +}
 diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
 new file mode 100644
-index 000000000000..f3c1db326a4e
+index 000000000000..6d1f54fb9c2b
 --- /dev/null
 +++ b/block/bfq-iosched.c
-@@ -0,0 +1,5297 @@
+@@ -0,0 +1,5336 @@
 +/*
 + * Budget Fair Queueing (BFQ) I/O scheduler.
 + *
@@ -1872,7 +1868,7 @@ index 000000000000..f3c1db326a4e
 + *
 + * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
 + *
-+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
++ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
 + *
 + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
 + * file.
@@ -1939,8 +1935,8 @@ index 000000000000..f3c1db326a4e
 +#include <linux/jiffies.h>
 +#include <linux/rbtree.h>
 +#include <linux/ioprio.h>
-+#include "bfq.h"
 +#include "blk.h"
++#include "bfq.h"
 +
 +/* Expiration time of sync (0) and async (1) requests, in ns. */
 +static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
@@ -1970,7 +1966,7 @@ index 000000000000..f3c1db326a4e
 +/* Default timeout values, in jiffies, approximating CFQ defaults. */
 +static const int bfq_timeout = (HZ / 8);
 +
-+struct kmem_cache *bfq_pool;
++static struct kmem_cache *bfq_pool;
 +
 +/* Below this threshold (in ns), we consider thinktime immediate. */
 +#define BFQ_MIN_TT		(2 * NSEC_PER_MSEC)
@@ -2337,6 +2333,22 @@ index 000000000000..f3c1db326a4e
 +
 +	entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
 +					 GFP_ATOMIC);
++
++	/*
++	 * In the unlucky event of an allocation failure, we just
++	 * exit. This will cause the weight of entity to not be
++	 * considered in bfq_differentiated_weights, which, in its
++	 * turn, causes the scenario to be deemed wrongly symmetric in
++	 * case entity's weight would have been the only weight making
++	 * the scenario asymmetric. On the bright side, no unbalance
++	 * will however occur when entity becomes inactive again (the
++	 * invocation of this function is triggered by an activation
++	 * of entity). In fact, bfq_weights_tree_remove does nothing
++	 * if !entity->weight_counter.
++	 */
++	if (unlikely(!entity->weight_counter))
++		return;
++
 +	entity->weight_counter->weight = entity->weight;
 +	rb_link_node(&entity->weight_counter->weights_node, parent, new);
 +	rb_insert_color(&entity->weight_counter->weights_node, root);
@@ -2373,13 +2385,45 @@ index 000000000000..f3c1db326a4e
 +	entity->weight_counter = NULL;
 +}
 +
++/*
++ * Return expired entry, or NULL to just start from scratch in rbtree.
++ */
++static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
++				      struct request *last)
++{
++	struct request *rq;
++
++	if (bfq_bfqq_fifo_expire(bfqq))
++		return NULL;
++
++	bfq_mark_bfqq_fifo_expire(bfqq);
++
++	rq = rq_entry_fifo(bfqq->fifo.next);
++
++	if (rq == last || ktime_get_ns() < rq->fifo_time)
++		return NULL;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq);
++	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
++	return rq;
++}
++
 +static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
 +					struct bfq_queue *bfqq,
 +					struct request *last)
 +{
 +	struct rb_node *rbnext = rb_next(&last->rb_node);
 +	struct rb_node *rbprev = rb_prev(&last->rb_node);
-+	struct request *next = NULL, *prev = NULL;
++	struct request *next, *prev = NULL;
++
++	BUG_ON(list_empty(&bfqq->fifo));
++
++	/* Follow expired path, else get first next available. */
++	next = bfq_check_fifo(bfqq, last);
++	if (next) {
++		BUG_ON(next == last);
++		return next;
++	}
 +
 +	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
 +
@@ -3212,7 +3256,6 @@ index 000000000000..f3c1db326a4e
 +
 +		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
 +				false, BFQ_BFQQ_PREEMPTED);
-+		BUG_ON(in_serv->entity.budget < 0);
 +	}
 +}
 +
@@ -3376,12 +3419,15 @@ index 000000000000..f3c1db326a4e
 +	elv_rb_del(&bfqq->sort_list, rq);
 +
 +	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		bfqq->next_rq = NULL;
++
 +		BUG_ON(bfqq->entity.budget < 0);
 +
 +		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
++			BUG_ON(bfqq->ref < 2); /* referred by rq and on tree */
 +			bfq_del_bfqq_busy(bfqd, bfqq, false);
-+
-+			/* bfqq emptied. In normal operation, when
++			/*
++			 * bfqq emptied. In normal operation, when
 +			 * bfqq is empty, bfqq->entity.service and
 +			 * bfqq->entity.budget must contain,
 +			 * respectively, the service received and the
@@ -3390,7 +3436,8 @@ index 000000000000..f3c1db326a4e
 +			 * this last removal occurred while bfqq is
 +			 * not in service. To avoid inconsistencies,
 +			 * reset both bfqq->entity.service and
-+			 * bfqq->entity.budget.
++			 * bfqq->entity.budget, if bfqq has still a
++			 * process that may issue I/O requests to it.
 +			 */
 +			bfqq->entity.budget = bfqq->entity.service = 0;
 +		}
@@ -3411,8 +3458,8 @@ index 000000000000..f3c1db326a4e
 +	bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
 +}
 +
-+static int bfq_merge(struct request_queue *q, struct request **req,
-+		     struct bio *bio)
++static enum elv_merge bfq_merge(struct request_queue *q, struct request **req,
++				struct bio *bio)
 +{
 +	struct bfq_data *bfqd = q->elevator->elevator_data;
 +	struct request *__rq;
@@ -3427,7 +3474,7 @@ index 000000000000..f3c1db326a4e
 +}
 +
 +static void bfq_merged_request(struct request_queue *q, struct request *req,
-+			       int type)
++			       enum elv_merge type)
 +{
 +	if (type == ELEVATOR_FRONT_MERGE &&
 +	    rb_prev(&req->rb_node) &&
@@ -3718,7 +3765,7 @@ index 000000000000..f3c1db326a4e
 + * positives. In case bfqq is weight-raised, such false positives
 + * would evidently degrade latency guarantees for bfqq.
 + */
-+bool wr_from_too_long(struct bfq_queue *bfqq)
++static bool wr_from_too_long(struct bfq_queue *bfqq)
 +{
 +	return bfqq->wr_coeff > 1 &&
 +		time_is_before_jiffies(bfqq->last_wr_start_finish +
@@ -3879,7 +3926,8 @@ index 000000000000..f3c1db326a4e
 +		new_bfqq->wr_coeff = bfqq->wr_coeff;
 +		new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
 +		new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
-+		new_bfqq->wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
++		new_bfqq->wr_start_at_switch_to_srt =
++			bfqq->wr_start_at_switch_to_srt;
 +		if (bfq_bfqq_busy(new_bfqq))
 +			bfqd->wr_busy_queues++;
 +		new_bfqq->entity.prio_changed = 1;
@@ -3922,6 +3970,7 @@ index 000000000000..f3c1db326a4e
 +	 */
 +	new_bfqq->bic = NULL;
 +	bfqq->bic = NULL;
++	/* release process reference to bfqq */
 +	bfq_put_queue(bfqq);
 +}
 +
@@ -4149,7 +4198,7 @@ index 000000000000..f3c1db326a4e
 + * function of the estimated peak rate. See comments on
 + * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
 + */
-+void update_thr_responsiveness_params(struct bfq_data *bfqd)
++static void update_thr_responsiveness_params(struct bfq_data *bfqd)
 +{
 +	int dev_type = blk_queue_nonrot(bfqd->queue);
 +
@@ -4184,7 +4233,7 @@ index 000000000000..f3c1db326a4e
 +		BFQ_RATE_SHIFT);
 +}
 +
-+void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq)
++static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq)
 +{
 +	if (rq != NULL) { /* new rq dispatch now, reset accordingly */
 +		bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ;
@@ -4201,7 +4250,7 @@ index 000000000000..f3c1db326a4e
 +		bfqd->tot_sectors_dispatched);
 +}
 +
-+void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
++static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
 +{
 +	u32 rate, weight, divisor;
 +
@@ -4251,7 +4300,7 @@ index 000000000000..f3c1db326a4e
 +	 *   total, and rate is below the current estimated peak rate
 +	 * - rate is unreasonably high (> 20M sectors/sec)
 +	 */
-+	if ((bfqd->peak_rate_samples > (3 * bfqd->sequential_samples)>>2 &&
++	if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 &&
 +	     rate <= bfqd->peak_rate) ||
 +		rate > 20<<BFQ_RATE_SHIFT) {
 +		bfq_log(bfqd,
@@ -4366,7 +4415,7 @@ index 000000000000..f3c1db326a4e
 + * of the observed dispatch rate. The function assumes to be invoked
 + * on every request dispatch.
 + */
-+void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
++static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
 +{
 +	u64 now_ns = ktime_get_ns();
 +
@@ -4467,29 +4516,6 @@ index 000000000000..f3c1db326a4e
 +	elv_dispatch_sort(q, rq);
 +}
 +
-+/*
-+ * Return expired entry, or NULL to just start from scratch in rbtree.
-+ */
-+static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
-+{
-+	struct request *rq = NULL;
-+
-+	if (bfq_bfqq_fifo_expire(bfqq))
-+		return NULL;
-+
-+	bfq_mark_bfqq_fifo_expire(bfqq);
-+
-+	if (list_empty(&bfqq->fifo))
-+		return NULL;
-+
-+	rq = rq_entry_fifo(bfqq->fifo.next);
-+
-+	if (ktime_get_ns() < rq->fifo_time)
-+		return NULL;
-+
-+	return rq;
-+}
-+
 +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 +{
 +	BUG_ON(bfqq != bfqd->in_service_queue);
@@ -4755,8 +4781,8 @@ index 000000000000..f3c1db326a4e
 +	delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
 +	delta_usecs = ktime_to_us(delta_ktime);
 +
-+	/* don't trust short/unrealistic values. */
-+	if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) {
++	/* don't use too short time intervals */
++	if (delta_usecs < 1000) {
 +		if (blk_queue_nonrot(bfqd->queue))
 +			 /*
 +			  * give same worst-case guarantees as idling
@@ -4766,7 +4792,7 @@ index 000000000000..f3c1db326a4e
 +		else /* charge at least one seek */
 +			*delta_ms = bfq_slice_idle / NSEC_PER_MSEC;
 +
-+		bfq_log(bfqd, "bfq_bfqq_is_slow: unrealistic %u", delta_usecs);
++		bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs);
 +
 +		return slow;
 +	}
@@ -4918,6 +4944,7 @@ index 000000000000..f3c1db326a4e
 +	bool slow;
 +	unsigned long delta = 0;
 +	struct bfq_entity *entity = &bfqq->entity;
++	int ref;
 +
 +	BUG_ON(bfqq != bfqd->in_service_queue);
 +
@@ -5025,12 +5052,15 @@ index 000000000000..f3c1db326a4e
 +	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
 +	BUG_ON(bfqq->next_rq == NULL &&
 +	       bfqq->entity.budget < bfqq->entity.service);
++	ref = bfqq->ref;
 +	__bfq_bfqq_expire(bfqd, bfqq);
 +
-+	BUG_ON(!bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
++	BUG_ON(ref > 1 &&
++	       !bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
 +		!bfq_class_idle(bfqq));
 +
-+	if (!bfq_bfqq_busy(bfqq) &&
++	/* mark bfqq as waiting a request only if a bic still points to it */
++	if (ref > 1 && !bfq_bfqq_busy(bfqq) &&
 +	    reason != BFQ_BFQQ_BUDGET_TIMEOUT &&
 +	    reason != BFQ_BFQQ_BUDGET_EXHAUSTED)
 +		bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
@@ -5355,14 +5385,29 @@ index 000000000000..f3c1db326a4e
 +	    !bfq_bfqq_must_idle(bfqq))
 +		goto expire;
 +
++check_queue:
++	/*
++	 * This loop is rarely executed more than once. Even when it
++	 * happens, it is much more convenient to re-execute this loop
++	 * than to return NULL and trigger a new dispatch to get a
++	 * request served.
++	 */
 +	next_rq = bfqq->next_rq;
 +	/*
 +	 * If bfqq has requests queued and it has enough budget left to
 +	 * serve them, keep the queue, otherwise expire it.
 +	 */
 +	if (next_rq) {
++		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
 +		if (bfq_serv_to_charge(next_rq, bfqq) >
 +			bfq_bfqq_budget_left(bfqq)) {
++			/*
++			 * Expire the queue for budget exhaustion,
++			 * which makes sure that the next budget is
++			 * enough to serve the next request, even if
++			 * it comes from the fifo expired path.
++			 */
 +			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
 +			goto expire;
 +		} else {
@@ -5410,9 +5455,16 @@ index 000000000000..f3c1db326a4e
 +	bfq_bfqq_expire(bfqd, bfqq, false, reason);
 +new_queue:
 +	bfqq = bfq_set_in_service_queue(bfqd);
-+	bfq_log(bfqd, "select_queue: new queue %d returned",
-+		bfqq ? bfqq->pid : 0);
++	if (bfqq) {
++		bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue");
++		goto check_queue;
++	}
 +keep_queue:
++	if (bfqq)
++		bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue");
++	else
++		bfq_log(bfqd, "select_queue: no queue returned");
++
 +	return bfqq;
 +}
 +
@@ -5478,45 +5530,17 @@ index 000000000000..f3c1db326a4e
 +				struct bfq_queue *bfqq)
 +{
 +	int dispatched = 0;
-+	struct request *rq;
++	struct request *rq = bfqq->next_rq;
 +	unsigned long service_to_charge;
 +
 +	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
-+
-+	/* Follow expired path, else get first next available. */
-+	rq = bfq_check_fifo(bfqq);
-+	if (!rq)
-+		rq = bfqq->next_rq;
++	BUG_ON(!rq);
 +	service_to_charge = bfq_serv_to_charge(rq, bfqq);
 +
-+	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
-+		/*
-+		 * This may happen if the next rq is chosen in fifo order
-+		 * instead of sector order. The budget is properly
-+		 * dimensioned to be always sufficient to serve the next
-+		 * request only if it is chosen in sector order. The reason
-+		 * is that it would be quite inefficient and little useful
-+		 * to always make sure that the budget is large enough to
-+		 * serve even the possible next rq in fifo order.
-+		 * In fact, requests are seldom served in fifo order.
-+		 *
-+		 * Expire the queue for budget exhaustion, and make sure
-+		 * that the next act_budget is enough to serve the next
-+		 * request, even if it comes from the fifo expired path.
-+		 */
-+		bfqq->next_rq = rq;
-+		/*
-+		 * Since this dispatch is failed, make sure that
-+		 * a new one will be performed
-+		 */
-+		if (!bfqd->rq_in_driver)
-+			bfq_schedule_dispatch(bfqd);
-+		BUG_ON(bfqq->entity.budget < bfqq->entity.service);
-+		goto expire;
-+	}
++	BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq));
 +
 +	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
-+	/* Finally, insert request into driver dispatch list. */
++
 +	bfq_bfqq_served(bfqq, service_to_charge);
 +
 +	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
@@ -5656,7 +5680,8 @@ index 000000000000..f3c1db326a4e
 + * Task holds one reference to the queue, dropped when task exits.  Each rq
 + * in-flight on this queue also holds a reference, dropped when rq is freed.
 + *
-+ * Queue lock must be held here.
++ * Queue lock must be held here. Recall not to use bfqq after calling
++ * this function on it.
 + */
 +static void bfq_put_queue(struct bfq_queue *bfqq)
 +{
@@ -5725,7 +5750,7 @@ index 000000000000..f3c1db326a4e
 +
 +	bfq_put_cooperator(bfqq);
 +
-+	bfq_put_queue(bfqq);
++	bfq_put_queue(bfqq); /* release process reference */
 +}
 +
 +static void bfq_init_icq(struct io_cq *icq)
@@ -5769,7 +5794,7 @@ index 000000000000..f3c1db326a4e
 +	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
 +	switch (ioprio_class) {
 +	default:
-+		dev_err(bfqq->bfqd->queue->backing_dev_info.dev,
++		dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
 +			"bfq: bad prio class %d\n", ioprio_class);
 +	case IOPRIO_CLASS_NONE:
 +		/*
@@ -5824,6 +5849,7 @@ index 000000000000..f3c1db326a4e
 +
 +	bfqq = bic_to_bfqq(bic, false);
 +	if (bfqq) {
++		/* release process reference on this queue */
 +		bfq_put_queue(bfqq);
 +		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
 +		bic_set_bfqq(bic, bfqq, false);
@@ -5945,14 +5971,20 @@ index 000000000000..f3c1db326a4e
 +	 * prune it.
 +	 */
 +	if (async_bfqq) {
-+		bfqq->ref++;
++		bfqq->ref++; /*
++			      * Extra group reference, w.r.t. sync
++			      * queue. This extra reference is removed
++			      * only if bfqq->bfqg disappears, to
++			      * guarantee that this queue is not freed
++			      * until its group goes away.
++			      */
 +		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
 +			     bfqq, bfqq->ref);
 +		*async_bfqq = bfqq;
 +	}
 +
 +out:
-+	bfqq->ref++;
++	bfqq->ref++; /* get a process reference to this queue */
 +	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref);
 +	rcu_read_unlock();
 +	return bfqq;
@@ -6126,10 +6158,14 @@ index 000000000000..f3c1db326a4e
 +			bfqq->allocated[rq_data_dir(rq)]--;
 +			new_bfqq->ref++;
 +			bfq_clear_bfqq_just_created(bfqq);
-+			bfq_put_queue(bfqq);
 +			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
 +				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
 +						bfqq, new_bfqq);
++			/*
++			 * rq is about to be enqueued into new_bfqq,
++			 * release rq reference on bfqq
++			 */
++			bfq_put_queue(bfqq);
 +			rq->elv.priv[1] = new_bfqq;
 +			bfqq = new_bfqq;
 +		}
@@ -6549,9 +6585,8 @@ index 000000000000..f3c1db326a4e
 +	cancel_work_sync(&bfqd->unplug_work);
 +}
 +
-+#ifdef CONFIG_BFQ_GROUP_IOSCHED
 +static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
-+					struct bfq_queue **bfqq_ptr)
++				 struct bfq_queue **bfqq_ptr)
 +{
 +	struct bfq_group *root_group = bfqd->root_group;
 +	struct bfq_queue *bfqq = *bfqq_ptr;
@@ -6582,7 +6617,6 @@ index 000000000000..f3c1db326a4e
 +
 +	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
 +}
-+#endif
 +
 +static void bfq_exit_queue(struct elevator_queue *e)
 +{
@@ -6607,6 +6641,7 @@ index 000000000000..f3c1db326a4e
 +#ifdef CONFIG_BFQ_GROUP_IOSCHED
 +	blkcg_deactivate_policy(q, &blkcg_policy_bfq);
 +#else
++	bfq_put_async_queues(bfqd, bfqd->root_group);
 +	kfree(bfqd->root_group);
 +#endif
 +
@@ -7029,7 +7064,7 @@ index 000000000000..f3c1db326a4e
 +};
 +
 +static struct elevator_type iosched_bfq = {
-+	.ops = {
++	.ops.sq = {
 +		.elevator_merge_fn =		bfq_merge,
 +		.elevator_merged_fn =		bfq_merged_request,
 +		.elevator_merge_req_fn =	bfq_merged_requests,
@@ -7081,7 +7116,7 @@ index 000000000000..f3c1db326a4e
 +static int __init bfq_init(void)
 +{
 +	int ret;
-+	char msg[60] = "BFQ I/O-scheduler: v8r7";
++	char msg[60] = "BFQ I/O-scheduler: v8r10-rc1";
 +
 +#ifdef CONFIG_BFQ_GROUP_IOSCHED
 +	ret = blkcg_policy_register(&blkcg_policy_bfq);
@@ -7107,7 +7142,7 @@ index 000000000000..f3c1db326a4e
 +	 * be run for a long time.
 +	 */
 +	T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
-+	T_slow[1] = msecs_to_jiffies(1000); /* actually 1.5 sec */
++	T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
 +	T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
 +	T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
 +
@@ -7160,10 +7195,10 @@ index 000000000000..f3c1db326a4e
 +MODULE_LICENSE("GPL");
 diff --git a/block/bfq-sched.c b/block/bfq-sched.c
 new file mode 100644
-index 000000000000..797bce75db01
+index 000000000000..6ab75b6bfd96
 --- /dev/null
 +++ b/block/bfq-sched.c
-@@ -0,0 +1,1933 @@
+@@ -0,0 +1,2014 @@
 +/*
 + * BFQ: Hierarchical B-WF2Q+ scheduler.
 + *
@@ -7291,7 +7326,8 @@ index 000000000000..797bce75db01
 +	if (next_in_service) {
 +		parent_sched_may_change = !sd->next_in_service ||
 +			bfq_update_parent_budget(next_in_service);
-+	}
++	} else
++		parent_sched_may_change = sd->next_in_service;
 +
 +	sd->next_in_service = next_in_service;
 +
@@ -7320,7 +7356,13 @@ index 000000000000..797bce75db01
 +#define for_each_entity(entity)				\
 +	for (; entity ; entity = entity->parent)
 +
-+#define for_each_entity_safe(entity, parent) \
++/*
++ * For each iteration, compute parent in advance, so as to be safe if
++ * entity is deallocated during the iteration. Such a deallocation may
++ * happen as a consequence of a bfq_put_queue that frees the bfq_queue
++ * containing entity.
++ */
++#define for_each_entity_safe(entity, parent)				\
 +	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
 +
 +/*
@@ -7857,27 +7899,31 @@ index 000000000000..797bce75db01
 +}
 +
 +/**
-+ * bfq_forget_entity - remove an entity from the wfq trees.
++ * bfq_forget_entity - do not consider entity any longer for scheduling
 + * @st: the service tree.
 + * @entity: the entity being removed.
-+ *
-+ * Update the device status and forget everything about @entity, putting
-+ * the device reference to it, if it is a queue.  Entities belonging to
-+ * groups are not refcounted.
++ * @is_in_service: true if entity is currently the in-service entity.
++ *
++ * Forget everything about @entity. In addition, if entity represents
++ * a queue, and the latter is not in service, then release the service
++ * reference to the queue (the one taken through bfq_get_entity). In
++ * fact, in this case, there is really no more service reference to
++ * the queue, as the latter is also outside any service tree. If,
++ * instead, the queue is in service, then __bfq_bfqd_reset_in_service
++ * will take care of putting the reference when the queue finally
++ * stops being served.
 + */
 +static void bfq_forget_entity(struct bfq_service_tree *st,
-+			      struct bfq_entity *entity)
++			      struct bfq_entity *entity,
++			      bool is_in_service)
 +{
 +	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
-+	struct bfq_sched_data *sd;
-+
 +	BUG_ON(!entity->on_st);
 +
 +	entity->on_st = false;
 +	st->wsum -= entity->weight;
-+	if (bfqq) {
-+		sd = entity->sched_data;
-+		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
++	if (bfqq && !is_in_service) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity (before): %p %d",
 +			     bfqq, bfqq->ref);
 +		bfq_put_queue(bfqq);
 +	}
@@ -7892,7 +7938,8 @@ index 000000000000..797bce75db01
 +				struct bfq_entity *entity)
 +{
 +	bfq_idle_extract(st, entity);
-+	bfq_forget_entity(st, entity);
++	bfq_forget_entity(st, entity,
++			  entity == entity->sched_data->in_service_entity);
 +}
 +
 +/**
@@ -8248,6 +8295,12 @@ index 000000000000..797bce75db01
 +		 */
 +		entity->start = min_vstart;
 +		st->wsum += entity->weight;
++		/*
++		 * entity is about to be inserted into a service tree,
++		 * and then set in service: get a reference to make
++		 * sure entity does not disappear until it is no
++		 * longer in service or scheduled for service.
++		 */
 +		bfq_get_entity(entity);
 +
 +		BUG_ON(entity->on_st && bfqq);
@@ -8430,27 +8483,27 @@ index 000000000000..797bce75db01
 +{
 +	struct bfq_sched_data *sd = entity->sched_data;
 +	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
-+	bool was_in_service = entity == sd->in_service_entity;
++	bool is_in_service = entity == sd->in_service_entity;
 +
 +	if (!entity->on_st) { /* entity never activated, or already inactive */
 +		BUG_ON(entity == entity->sched_data->in_service_entity);
 +		return false;
 +	}
 +
-+	BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
++	BUG_ON(is_in_service && entity->tree && entity->tree != &st->active);
 +
-+	if (was_in_service)
++	if (is_in_service)
 +		bfq_calc_finish(entity, entity->service);
 +
 +	if (entity->tree == &st->active)
 +		bfq_active_extract(st, entity);
-+	else if (!was_in_service && entity->tree == &st->idle)
++	else if (!is_in_service && entity->tree == &st->idle)
 +		bfq_idle_extract(st, entity);
 +	else if (entity->tree)
 +		BUG();
 +
 +	if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
-+		bfq_forget_entity(st, entity);
++		bfq_forget_entity(st, entity, is_in_service);
 +	else
 +		bfq_idle_insert(st, entity);
 +
@@ -8467,7 +8520,7 @@ index 000000000000..797bce75db01
 +				  bool expiration)
 +{
 +	struct bfq_sched_data *sd;
-+	struct bfq_entity *parent;
++	struct bfq_entity *parent = NULL;
 +
 +	for_each_entity_safe(entity, parent) {
 +		sd = entity->sched_data;
@@ -8486,8 +8539,8 @@ index 000000000000..797bce75db01
 +
 +		if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
 +			/*
-+			 * Entity is not any tree any more, so, this
-+			 * deactivation is a no-op, and there is
++			 * entity is not in any tree any more, so
++			 * this deactivation is a no-op, and there is
 +			 * nothing to change for upper-level entities
 +			 * (in case of expiration, this can never
 +			 * happen).
@@ -8987,14 +9040,16 @@ index 000000000000..797bce75db01
 +
 +static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 +{
-+	struct bfq_entity *entity = &bfqd->in_service_queue->entity;
++	struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
++	struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
++	struct bfq_entity *entity = in_serv_entity;
 +
 +	if (bfqd->in_service_bic) {
 +		put_io_context(bfqd->in_service_bic->icq.ioc);
 +		bfqd->in_service_bic = NULL;
 +	}
 +
-+	bfq_clear_bfqq_wait_request(bfqd->in_service_queue);
++	bfq_clear_bfqq_wait_request(in_serv_bfqq);
 +	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
 +	bfqd->in_service_queue = NULL;
 +
@@ -9006,6 +9061,66 @@ index 000000000000..797bce75db01
 +	 */
 +	for_each_entity(entity)
 +		entity->sched_data->in_service_entity = NULL;
++
++	/*
++	 * in_serv_entity is no longer in service, so, if it is in no
++	 * service tree either, then release the service reference to
++	 * the queue it represents (taken with bfq_get_entity).
++	 */
++	if (!in_serv_entity->on_st)
++		bfq_put_queue(in_serv_bfqq);
++}
++
++static void set_next_in_service_bfqq(struct bfq_data *bfqd)
++{
++	struct bfq_entity *entity = NULL;
++	struct bfq_queue *bfqq;
++	struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
++
++	BUG_ON(!sd);
++
++	/* Traverse the path from the root to the in-service leaf entity */
++	for (; sd ; sd = entity->my_sched_data) {
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		if (entity) {
++			struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++			bfq_log_bfqg(bfqd, bfqg,
++			"set_next_in_service_bfqq: lookup in this group");
++		} else
++			bfq_log_bfqg(bfqd, bfqd->root_group,
++			"set_next_in_service_bfqq: lookup in root group");
++#endif
++
++		entity = sd->next_in_service;
++
++		if (!entity) {
++			bfqd->next_in_service_queue = NULL;
++			return;
++		}
++
++		/* Log some information */
++		bfqq = bfq_entity_to_bfqq(entity);
++		if (bfqq)
++			bfq_log_bfqq(bfqd, bfqq,
++			"set_next_in_service_bfqq: this queue, finish %llu",
++				(((entity->finish>>10)*1000)>>10)>>2);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		else {
++			struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++			bfq_log_bfqg(bfqd, bfqg,
++			"set_next_in_service_bfqq: this entity, finish %llu",
++				(((entity->finish>>10)*1000)>>10)>>2);
++		}
++#endif
++
++	}
++	BUG_ON(!bfq_entity_to_bfqq(entity));
++
++	bfqd->next_in_service_queue = bfq_entity_to_bfqq(entity);
 +}
 +
 +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
@@ -9014,6 +9129,7 @@ index 000000000000..797bce75db01
 +	struct bfq_entity *entity = &bfqq->entity;
 +
 +	bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
++	set_next_in_service_bfqq(bfqd);
 +}
 +
 +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
@@ -9028,6 +9144,7 @@ index 000000000000..797bce75db01
 +	bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
 +				    false);
 +	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
++	set_next_in_service_bfqq(bfqd);
 +}
 +
 +static void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
@@ -9036,6 +9153,7 @@ index 000000000000..797bce75db01
 +
 +	bfq_activate_requeue_entity(entity, false,
 +				    bfqq == bfqd->in_service_queue);
++	set_next_in_service_bfqq(bfqd);
 +}
 +
 +static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
@@ -9070,8 +9188,6 @@ index 000000000000..797bce75db01
 +	BUG_ON(bfqq->entity.budget < 0);
 +
 +	bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
-+
-+	BUG_ON(bfqq->entity.budget < 0);
 +}
 +
 +/*
@@ -9099,12 +9215,12 @@ index 000000000000..797bce75db01
 +}
 diff --git a/block/bfq.h b/block/bfq.h
 new file mode 100644
-index 000000000000..bef8244cc03f
+index 000000000000..67d56670e678
 --- /dev/null
 +++ b/block/bfq.h
 @@ -0,0 +1,933 @@
 +/*
-+ * BFQ v8r7 for 4.9.0: data structures and common functions prototypes.
++ * BFQ v8r10-rc1 for 4.11.0: data structures and common functions prototypes.
 + *
 + * Based on ideas and code from CFQ:
 + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
@@ -9114,7 +9230,7 @@ index 000000000000..bef8244cc03f
 + *
 + * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
 + *
-+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
++ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
 + */
 +
 +#ifndef _BFQ_H
@@ -9122,8 +9238,6 @@ index 000000000000..bef8244cc03f
 +
 +#include <linux/blktrace_api.h>
 +#include <linux/hrtimer.h>
-+#include <linux/ioprio.h>
-+#include <linux/rbtree.h>
 +#include <linux/blk-cgroup.h>
 +
 +#define BFQ_IOPRIO_CLASSES	3
@@ -9545,6 +9659,8 @@ index 000000000000..bef8244cc03f
 +
 +	/* bfq_queue in service */
 +	struct bfq_queue *in_service_queue;
++	/* candidate bfq_queue to become the next in-service queue */
++	struct bfq_queue *next_in_service_queue;
 +	/* bfq_io_cq (bic) associated with the @in_service_queue */
 +	struct bfq_io_cq *in_service_bic;
 +