From ea04b4fb4a7f2db3351522066b3e08a77f7d35da Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 19 Apr 2017 21:52:04 +0200 Subject: aszlig/kernel: Fix BFQ patch for kernel 4.11-rc7 Previously in d6848012b86088cbfd70666a0cfae95c567e7199 I've just rebased the patch I had against 4.10 against kernel 4.11, but that didn't work out so well. So this is now a rebase against the new branch from Paolo Valente at: https://github.com/linusw/linux-bfq/tree/bfq-v8 Hopefully this time it will compile ;-) Signed-off-by: aszlig --- modules/user/aszlig/system/bfq.patch | 398 ++++++++++++++++++++++------------- 1 file changed, 257 insertions(+), 141 deletions(-) (limited to 'modules/user') diff --git a/modules/user/aszlig/system/bfq.patch b/modules/user/aszlig/system/bfq.patch index 6e3fe250..54f82499 100644 --- a/modules/user/aszlig/system/bfq.patch +++ b/modules/user/aszlig/system/bfq.patch @@ -602,23 +602,23 @@ index 58fc8684788d..99a42261677a 100644 config MQ_IOSCHED_DEADLINE diff --git a/block/Makefile b/block/Makefile -index 081bb680789b..6defc8b5dca0 100644 +index 081bb680789b..91869f2ef2dc 100644 --- a/block/Makefile +++ b/block/Makefile -@@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o - obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o +@@ -20,6 +20,7 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o ++obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o + obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 -index 000000000000..c59227288ae0 +index 000000000000..39daaf405dc6 --- /dev/null +++ b/block/bfq-cgroup.c -@@ -0,0 +1,1194 @@ +@@ -0,0 +1,1190 @@ +/* + * BFQ: CGROUPS support. + * @@ -1390,7 +1390,6 @@ index 000000000000..c59227288ae0 + + __bfq_deactivate_entity(entity, false); + bfq_put_async_queues(bfqd, bfqg); -+ BUG_ON(entity->tree); + + /* + * @blkg is going offline and will be ignored by @@ -1759,6 +1758,9 @@ index 000000000000..c59227288ae0 +static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } +static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { } + ++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ struct bfq_group *bfqg) {} ++ +static void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ @@ -1773,13 +1775,7 @@ index 000000000000..c59227288ae0 + entity->sched_data = &bfqg->sched_data; +} + -+static struct bfq_group * -+bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ -+ return bfqd->root_group; -+} ++static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} + +static void bfq_end_wr_async(struct bfq_data *bfqd) +{ @@ -1857,10 +1853,10 @@ index 000000000000..fb7bb8f08b75 +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 -index 000000000000..f3c1db326a4e +index 000000000000..6d1f54fb9c2b --- /dev/null +++ b/block/bfq-iosched.c -@@ -0,0 +1,5297 @@ +@@ -0,0 +1,5336 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * @@ -1872,7 +1868,7 @@ index 000000000000..f3c1db326a4e + * + * Copyright (C) 2015 Paolo Valente + * -+ * Copyright (C) 2016 Paolo Valente ++ * Copyright (C) 2017 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. @@ -1939,8 +1935,8 @@ index 000000000000..f3c1db326a4e +#include +#include +#include -+#include "bfq.h" +#include "blk.h" ++#include "bfq.h" + +/* Expiration time of sync (0) and async (1) requests, in ns. */ +static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; @@ -1970,7 +1966,7 @@ index 000000000000..f3c1db326a4e +/* Default timeout values, in jiffies, approximating CFQ defaults. */ +static const int bfq_timeout = (HZ / 8); + -+struct kmem_cache *bfq_pool; ++static struct kmem_cache *bfq_pool; + +/* Below this threshold (in ns), we consider thinktime immediate. */ +#define BFQ_MIN_TT (2 * NSEC_PER_MSEC) @@ -2337,6 +2333,22 @@ index 000000000000..f3c1db326a4e + + entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); ++ ++ /* ++ * In the unlucky event of an allocation failure, we just ++ * exit. This will cause the weight of entity to not be ++ * considered in bfq_differentiated_weights, which, in its ++ * turn, causes the scenario to be deemed wrongly symmetric in ++ * case entity's weight would have been the only weight making ++ * the scenario asymmetric. On the bright side, no unbalance ++ * will however occur when entity becomes inactive again (the ++ * invocation of this function is triggered by an activation ++ * of entity). In fact, bfq_weights_tree_remove does nothing ++ * if !entity->weight_counter. ++ */ ++ if (unlikely(!entity->weight_counter)) ++ return; ++ + entity->weight_counter->weight = entity->weight; + rb_link_node(&entity->weight_counter->weights_node, parent, new); + rb_insert_color(&entity->weight_counter->weights_node, root); @@ -2373,13 +2385,45 @@ index 000000000000..f3c1db326a4e + entity->weight_counter = NULL; +} + ++/* ++ * Return expired entry, or NULL to just start from scratch in rbtree. ++ */ ++static struct request *bfq_check_fifo(struct bfq_queue *bfqq, ++ struct request *last) ++{ ++ struct request *rq; ++ ++ if (bfq_bfqq_fifo_expire(bfqq)) ++ return NULL; ++ ++ bfq_mark_bfqq_fifo_expire(bfqq); ++ ++ rq = rq_entry_fifo(bfqq->fifo.next); ++ ++ if (rq == last || ktime_get_ns() < rq->fifo_time) ++ return NULL; ++ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); ++ BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); ++ return rq; ++} ++ +static struct request *bfq_find_next_rq(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *last) +{ + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); -+ struct request *next = NULL, *prev = NULL; ++ struct request *next, *prev = NULL; ++ ++ BUG_ON(list_empty(&bfqq->fifo)); ++ ++ /* Follow expired path, else get first next available. */ ++ next = bfq_check_fifo(bfqq, last); ++ if (next) { ++ BUG_ON(next == last); ++ return next; ++ } + + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); + @@ -3212,7 +3256,6 @@ index 000000000000..f3c1db326a4e + + bfq_bfqq_expire(bfqd, bfqd->in_service_queue, + false, BFQ_BFQQ_PREEMPTED); -+ BUG_ON(in_serv->entity.budget < 0); + } +} + @@ -3376,12 +3419,15 @@ index 000000000000..f3c1db326a4e + elv_rb_del(&bfqq->sort_list, rq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { ++ bfqq->next_rq = NULL; ++ + BUG_ON(bfqq->entity.budget < 0); + + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) { ++ BUG_ON(bfqq->ref < 2); /* referred by rq and on tree */ + bfq_del_bfqq_busy(bfqd, bfqq, false); -+ -+ /* bfqq emptied. In normal operation, when ++ /* ++ * bfqq emptied. In normal operation, when + * bfqq is empty, bfqq->entity.service and + * bfqq->entity.budget must contain, + * respectively, the service received and the @@ -3390,7 +3436,8 @@ index 000000000000..f3c1db326a4e + * this last removal occurred while bfqq is + * not in service. To avoid inconsistencies, + * reset both bfqq->entity.service and -+ * bfqq->entity.budget. ++ * bfqq->entity.budget, if bfqq has still a ++ * process that may issue I/O requests to it. + */ + bfqq->entity.budget = bfqq->entity.service = 0; + } @@ -3411,8 +3458,8 @@ index 000000000000..f3c1db326a4e + bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); +} + -+static int bfq_merge(struct request_queue *q, struct request **req, -+ struct bio *bio) ++static enum elv_merge bfq_merge(struct request_queue *q, struct request **req, ++ struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct request *__rq; @@ -3427,7 +3474,7 @@ index 000000000000..f3c1db326a4e +} + +static void bfq_merged_request(struct request_queue *q, struct request *req, -+ int type) ++ enum elv_merge type) +{ + if (type == ELEVATOR_FRONT_MERGE && + rb_prev(&req->rb_node) && @@ -3718,7 +3765,7 @@ index 000000000000..f3c1db326a4e + * positives. In case bfqq is weight-raised, such false positives + * would evidently degrade latency guarantees for bfqq. + */ -+bool wr_from_too_long(struct bfq_queue *bfqq) ++static bool wr_from_too_long(struct bfq_queue *bfqq) +{ + return bfqq->wr_coeff > 1 && + time_is_before_jiffies(bfqq->last_wr_start_finish + @@ -3879,7 +3926,8 @@ index 000000000000..f3c1db326a4e + new_bfqq->wr_coeff = bfqq->wr_coeff; + new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time; + new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; -+ new_bfqq->wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; ++ new_bfqq->wr_start_at_switch_to_srt = ++ bfqq->wr_start_at_switch_to_srt; + if (bfq_bfqq_busy(new_bfqq)) + bfqd->wr_busy_queues++; + new_bfqq->entity.prio_changed = 1; @@ -3922,6 +3970,7 @@ index 000000000000..f3c1db326a4e + */ + new_bfqq->bic = NULL; + bfqq->bic = NULL; ++ /* release process reference to bfqq */ + bfq_put_queue(bfqq); +} + @@ -4149,7 +4198,7 @@ index 000000000000..f3c1db326a4e + * function of the estimated peak rate. See comments on + * bfq_calc_max_budget(), and on T_slow and T_fast arrays. + */ -+void update_thr_responsiveness_params(struct bfq_data *bfqd) ++static void update_thr_responsiveness_params(struct bfq_data *bfqd) +{ + int dev_type = blk_queue_nonrot(bfqd->queue); + @@ -4184,7 +4233,7 @@ index 000000000000..f3c1db326a4e + BFQ_RATE_SHIFT); +} + -+void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq) ++static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq) +{ + if (rq != NULL) { /* new rq dispatch now, reset accordingly */ + bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ; @@ -4201,7 +4250,7 @@ index 000000000000..f3c1db326a4e + bfqd->tot_sectors_dispatched); +} + -+void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) ++static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) +{ + u32 rate, weight, divisor; + @@ -4251,7 +4300,7 @@ index 000000000000..f3c1db326a4e + * total, and rate is below the current estimated peak rate + * - rate is unreasonably high (> 20M sectors/sec) + */ -+ if ((bfqd->peak_rate_samples > (3 * bfqd->sequential_samples)>>2 && ++ if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 && + rate <= bfqd->peak_rate) || + rate > 20<fifo)) -+ return NULL; -+ -+ rq = rq_entry_fifo(bfqq->fifo.next); -+ -+ if (ktime_get_ns() < rq->fifo_time) -+ return NULL; -+ -+ return rq; -+} -+ +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfqq != bfqd->in_service_queue); @@ -4755,8 +4781,8 @@ index 000000000000..f3c1db326a4e + delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start); + delta_usecs = ktime_to_us(delta_ktime); + -+ /* don't trust short/unrealistic values. */ -+ if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) { ++ /* don't use too short time intervals */ ++ if (delta_usecs < 1000) { + if (blk_queue_nonrot(bfqd->queue)) + /* + * give same worst-case guarantees as idling @@ -4766,7 +4792,7 @@ index 000000000000..f3c1db326a4e + else /* charge at least one seek */ + *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; + -+ bfq_log(bfqd, "bfq_bfqq_is_slow: unrealistic %u", delta_usecs); ++ bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); + + return slow; + } @@ -4918,6 +4944,7 @@ index 000000000000..f3c1db326a4e + bool slow; + unsigned long delta = 0; + struct bfq_entity *entity = &bfqq->entity; ++ int ref; + + BUG_ON(bfqq != bfqd->in_service_queue); + @@ -5025,12 +5052,15 @@ index 000000000000..f3c1db326a4e + __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + BUG_ON(bfqq->next_rq == NULL && + bfqq->entity.budget < bfqq->entity.service); ++ ref = bfqq->ref; + __bfq_bfqq_expire(bfqd, bfqq); + -+ BUG_ON(!bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED && ++ BUG_ON(ref > 1 && ++ !bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED && + !bfq_class_idle(bfqq)); + -+ if (!bfq_bfqq_busy(bfqq) && ++ /* mark bfqq as waiting a request only if a bic still points to it */ ++ if (ref > 1 && !bfq_bfqq_busy(bfqq) && + reason != BFQ_BFQQ_BUDGET_TIMEOUT && + reason != BFQ_BFQQ_BUDGET_EXHAUSTED) + bfq_mark_bfqq_non_blocking_wait_rq(bfqq); @@ -5355,14 +5385,29 @@ index 000000000000..f3c1db326a4e + !bfq_bfqq_must_idle(bfqq)) + goto expire; + ++check_queue: ++ /* ++ * This loop is rarely executed more than once. Even when it ++ * happens, it is much more convenient to re-execute this loop ++ * than to return NULL and trigger a new dispatch to get a ++ * request served. ++ */ + next_rq = bfqq->next_rq; + /* + * If bfqq has requests queued and it has enough budget left to + * serve them, keep the queue, otherwise expire it. + */ + if (next_rq) { ++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); ++ + if (bfq_serv_to_charge(next_rq, bfqq) > + bfq_bfqq_budget_left(bfqq)) { ++ /* ++ * Expire the queue for budget exhaustion, ++ * which makes sure that the next budget is ++ * enough to serve the next request, even if ++ * it comes from the fifo expired path. ++ */ + reason = BFQ_BFQQ_BUDGET_EXHAUSTED; + goto expire; + } else { @@ -5410,9 +5455,16 @@ index 000000000000..f3c1db326a4e + bfq_bfqq_expire(bfqd, bfqq, false, reason); +new_queue: + bfqq = bfq_set_in_service_queue(bfqd); -+ bfq_log(bfqd, "select_queue: new queue %d returned", -+ bfqq ? bfqq->pid : 0); ++ if (bfqq) { ++ bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); ++ goto check_queue; ++ } +keep_queue: ++ if (bfqq) ++ bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); ++ else ++ bfq_log(bfqd, "select_queue: no queue returned"); ++ + return bfqq; +} + @@ -5478,45 +5530,17 @@ index 000000000000..f3c1db326a4e + struct bfq_queue *bfqq) +{ + int dispatched = 0; -+ struct request *rq; ++ struct request *rq = bfqq->next_rq; + unsigned long service_to_charge; + + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Follow expired path, else get first next available. */ -+ rq = bfq_check_fifo(bfqq); -+ if (!rq) -+ rq = bfqq->next_rq; ++ BUG_ON(!rq); + service_to_charge = bfq_serv_to_charge(rq, bfqq); + -+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { -+ /* -+ * This may happen if the next rq is chosen in fifo order -+ * instead of sector order. The budget is properly -+ * dimensioned to be always sufficient to serve the next -+ * request only if it is chosen in sector order. The reason -+ * is that it would be quite inefficient and little useful -+ * to always make sure that the budget is large enough to -+ * serve even the possible next rq in fifo order. -+ * In fact, requests are seldom served in fifo order. -+ * -+ * Expire the queue for budget exhaustion, and make sure -+ * that the next act_budget is enough to serve the next -+ * request, even if it comes from the fifo expired path. -+ */ -+ bfqq->next_rq = rq; -+ /* -+ * Since this dispatch is failed, make sure that -+ * a new one will be performed -+ */ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); -+ goto expire; -+ } ++ BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq)); + + BUG_ON(bfqq->entity.budget < bfqq->entity.service); -+ /* Finally, insert request into driver dispatch list. */ ++ + bfq_bfqq_served(bfqq, service_to_charge); + + BUG_ON(bfqq->entity.budget < bfqq->entity.service); @@ -5656,7 +5680,8 @@ index 000000000000..f3c1db326a4e + * Task holds one reference to the queue, dropped when task exits. Each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. + * -+ * Queue lock must be held here. ++ * Queue lock must be held here. Recall not to use bfqq after calling ++ * this function on it. + */ +static void bfq_put_queue(struct bfq_queue *bfqq) +{ @@ -5725,7 +5750,7 @@ index 000000000000..f3c1db326a4e + + bfq_put_cooperator(bfqq); + -+ bfq_put_queue(bfqq); ++ bfq_put_queue(bfqq); /* release process reference */ +} + +static void bfq_init_icq(struct io_cq *icq) @@ -5769,7 +5794,7 @@ index 000000000000..f3c1db326a4e + ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); + switch (ioprio_class) { + default: -+ dev_err(bfqq->bfqd->queue->backing_dev_info.dev, ++ dev_err(bfqq->bfqd->queue->backing_dev_info->dev, + "bfq: bad prio class %d\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* @@ -5824,6 +5849,7 @@ index 000000000000..f3c1db326a4e + + bfqq = bic_to_bfqq(bic, false); + if (bfqq) { ++ /* release process reference on this queue */ + bfq_put_queue(bfqq); + bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); + bic_set_bfqq(bic, bfqq, false); @@ -5945,14 +5971,20 @@ index 000000000000..f3c1db326a4e + * prune it. + */ + if (async_bfqq) { -+ bfqq->ref++; ++ bfqq->ref++; /* ++ * Extra group reference, w.r.t. sync ++ * queue. This extra reference is removed ++ * only if bfqq->bfqg disappears, to ++ * guarantee that this queue is not freed ++ * until its group goes away. ++ */ + bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfqq, bfqq->ref); + *async_bfqq = bfqq; + } + +out: -+ bfqq->ref++; ++ bfqq->ref++; /* get a process reference to this queue */ + bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); + rcu_read_unlock(); + return bfqq; @@ -6126,10 +6158,14 @@ index 000000000000..f3c1db326a4e + bfqq->allocated[rq_data_dir(rq)]--; + new_bfqq->ref++; + bfq_clear_bfqq_just_created(bfqq); -+ bfq_put_queue(bfqq); + if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) + bfq_merge_bfqqs(bfqd, RQ_BIC(rq), + bfqq, new_bfqq); ++ /* ++ * rq is about to be enqueued into new_bfqq, ++ * release rq reference on bfqq ++ */ ++ bfq_put_queue(bfqq); + rq->elv.priv[1] = new_bfqq; + bfqq = new_bfqq; + } @@ -6549,9 +6585,8 @@ index 000000000000..f3c1db326a4e + cancel_work_sync(&bfqd->unplug_work); +} + -+#ifdef CONFIG_BFQ_GROUP_IOSCHED +static void __bfq_put_async_bfqq(struct bfq_data *bfqd, -+ struct bfq_queue **bfqq_ptr) ++ struct bfq_queue **bfqq_ptr) +{ + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; @@ -6582,7 +6617,6 @@ index 000000000000..f3c1db326a4e + + __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); +} -+#endif + +static void bfq_exit_queue(struct elevator_queue *e) +{ @@ -6607,6 +6641,7 @@ index 000000000000..f3c1db326a4e +#ifdef CONFIG_BFQ_GROUP_IOSCHED + blkcg_deactivate_policy(q, &blkcg_policy_bfq); +#else ++ bfq_put_async_queues(bfqd, bfqd->root_group); + kfree(bfqd->root_group); +#endif + @@ -7029,7 +7064,7 @@ index 000000000000..f3c1db326a4e +}; + +static struct elevator_type iosched_bfq = { -+ .ops = { ++ .ops.sq = { + .elevator_merge_fn = bfq_merge, + .elevator_merged_fn = bfq_merged_request, + .elevator_merge_req_fn = bfq_merged_requests, @@ -7081,7 +7116,7 @@ index 000000000000..f3c1db326a4e +static int __init bfq_init(void) +{ + int ret; -+ char msg[60] = "BFQ I/O-scheduler: v8r7"; ++ char msg[60] = "BFQ I/O-scheduler: v8r10-rc1"; + +#ifdef CONFIG_BFQ_GROUP_IOSCHED + ret = blkcg_policy_register(&blkcg_policy_bfq); @@ -7107,7 +7142,7 @@ index 000000000000..f3c1db326a4e + * be run for a long time. + */ + T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */ -+ T_slow[1] = msecs_to_jiffies(1000); /* actually 1.5 sec */ ++ T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */ + T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */ + T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */ + @@ -7160,10 +7195,10 @@ index 000000000000..f3c1db326a4e +MODULE_LICENSE("GPL"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 -index 000000000000..797bce75db01 +index 000000000000..6ab75b6bfd96 --- /dev/null +++ b/block/bfq-sched.c -@@ -0,0 +1,1933 @@ +@@ -0,0 +1,2014 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * @@ -7291,7 +7326,8 @@ index 000000000000..797bce75db01 + if (next_in_service) { + parent_sched_may_change = !sd->next_in_service || + bfq_update_parent_budget(next_in_service); -+ } ++ } else ++ parent_sched_may_change = sd->next_in_service; + + sd->next_in_service = next_in_service; + @@ -7320,7 +7356,13 @@ index 000000000000..797bce75db01 +#define for_each_entity(entity) \ + for (; entity ; entity = entity->parent) + -+#define for_each_entity_safe(entity, parent) \ ++/* ++ * For each iteration, compute parent in advance, so as to be safe if ++ * entity is deallocated during the iteration. Such a deallocation may ++ * happen as a consequence of a bfq_put_queue that frees the bfq_queue ++ * containing entity. ++ */ ++#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + +/* @@ -7857,27 +7899,31 @@ index 000000000000..797bce75db01 +} + +/** -+ * bfq_forget_entity - remove an entity from the wfq trees. ++ * bfq_forget_entity - do not consider entity any longer for scheduling + * @st: the service tree. + * @entity: the entity being removed. -+ * -+ * Update the device status and forget everything about @entity, putting -+ * the device reference to it, if it is a queue. Entities belonging to -+ * groups are not refcounted. ++ * @is_in_service: true if entity is currently the in-service entity. ++ * ++ * Forget everything about @entity. In addition, if entity represents ++ * a queue, and the latter is not in service, then release the service ++ * reference to the queue (the one taken through bfq_get_entity). In ++ * fact, in this case, there is really no more service reference to ++ * the queue, as the latter is also outside any service tree. If, ++ * instead, the queue is in service, then __bfq_bfqd_reset_in_service ++ * will take care of putting the reference when the queue finally ++ * stops being served. + */ +static void bfq_forget_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) ++ struct bfq_entity *entity, ++ bool is_in_service) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_sched_data *sd; -+ + BUG_ON(!entity->on_st); + + entity->on_st = false; + st->wsum -= entity->weight; -+ if (bfqq) { -+ sd = entity->sched_data; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", ++ if (bfqq && !is_in_service) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity (before): %p %d", + bfqq, bfqq->ref); + bfq_put_queue(bfqq); + } @@ -7892,7 +7938,8 @@ index 000000000000..797bce75db01 + struct bfq_entity *entity) +{ + bfq_idle_extract(st, entity); -+ bfq_forget_entity(st, entity); ++ bfq_forget_entity(st, entity, ++ entity == entity->sched_data->in_service_entity); +} + +/** @@ -8248,6 +8295,12 @@ index 000000000000..797bce75db01 + */ + entity->start = min_vstart; + st->wsum += entity->weight; ++ /* ++ * entity is about to be inserted into a service tree, ++ * and then set in service: get a reference to make ++ * sure entity does not disappear until it is no ++ * longer in service or scheduled for service. ++ */ + bfq_get_entity(entity); + + BUG_ON(entity->on_st && bfqq); @@ -8430,27 +8483,27 @@ index 000000000000..797bce75db01 +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ bool was_in_service = entity == sd->in_service_entity; ++ bool is_in_service = entity == sd->in_service_entity; + + if (!entity->on_st) { /* entity never activated, or already inactive */ + BUG_ON(entity == entity->sched_data->in_service_entity); + return false; + } + -+ BUG_ON(was_in_service && entity->tree && entity->tree != &st->active); ++ BUG_ON(is_in_service && entity->tree && entity->tree != &st->active); + -+ if (was_in_service) ++ if (is_in_service) + bfq_calc_finish(entity, entity->service); + + if (entity->tree == &st->active) + bfq_active_extract(st, entity); -+ else if (!was_in_service && entity->tree == &st->idle) ++ else if (!is_in_service && entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree) + BUG(); + + if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime)) -+ bfq_forget_entity(st, entity); ++ bfq_forget_entity(st, entity, is_in_service); + else + bfq_idle_insert(st, entity); + @@ -8467,7 +8520,7 @@ index 000000000000..797bce75db01 + bool expiration) +{ + struct bfq_sched_data *sd; -+ struct bfq_entity *parent; ++ struct bfq_entity *parent = NULL; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; @@ -8486,8 +8539,8 @@ index 000000000000..797bce75db01 + + if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) { + /* -+ * Entity is not any tree any more, so, this -+ * deactivation is a no-op, and there is ++ * entity is not in any tree any more, so ++ * this deactivation is a no-op, and there is + * nothing to change for upper-level entities + * (in case of expiration, this can never + * happen). @@ -8987,14 +9040,16 @@ index 000000000000..797bce75db01 + +static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +{ -+ struct bfq_entity *entity = &bfqd->in_service_queue->entity; ++ struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; ++ struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; ++ struct bfq_entity *entity = in_serv_entity; + + if (bfqd->in_service_bic) { + put_io_context(bfqd->in_service_bic->icq.ioc); + bfqd->in_service_bic = NULL; + } + -+ bfq_clear_bfqq_wait_request(bfqd->in_service_queue); ++ bfq_clear_bfqq_wait_request(in_serv_bfqq); + hrtimer_try_to_cancel(&bfqd->idle_slice_timer); + bfqd->in_service_queue = NULL; + @@ -9006,6 +9061,66 @@ index 000000000000..797bce75db01 + */ + for_each_entity(entity) + entity->sched_data->in_service_entity = NULL; ++ ++ /* ++ * in_serv_entity is no longer in service, so, if it is in no ++ * service tree either, then release the service reference to ++ * the queue it represents (taken with bfq_get_entity). ++ */ ++ if (!in_serv_entity->on_st) ++ bfq_put_queue(in_serv_bfqq); ++} ++ ++static void set_next_in_service_bfqq(struct bfq_data *bfqd) ++{ ++ struct bfq_entity *entity = NULL; ++ struct bfq_queue *bfqq; ++ struct bfq_sched_data *sd = &bfqd->root_group->sched_data; ++ ++ BUG_ON(!sd); ++ ++ /* Traverse the path from the root to the in-service leaf entity */ ++ for (; sd ; sd = entity->my_sched_data) { ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ if (entity) { ++ struct bfq_group *bfqg = ++ container_of(entity, struct bfq_group, entity); ++ ++ bfq_log_bfqg(bfqd, bfqg, ++ "set_next_in_service_bfqq: lookup in this group"); ++ } else ++ bfq_log_bfqg(bfqd, bfqd->root_group, ++ "set_next_in_service_bfqq: lookup in root group"); ++#endif ++ ++ entity = sd->next_in_service; ++ ++ if (!entity) { ++ bfqd->next_in_service_queue = NULL; ++ return; ++ } ++ ++ /* Log some information */ ++ bfqq = bfq_entity_to_bfqq(entity); ++ if (bfqq) ++ bfq_log_bfqq(bfqd, bfqq, ++ "set_next_in_service_bfqq: this queue, finish %llu", ++ (((entity->finish>>10)*1000)>>10)>>2); ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ else { ++ struct bfq_group *bfqg = ++ container_of(entity, struct bfq_group, entity); ++ ++ bfq_log_bfqg(bfqd, bfqg, ++ "set_next_in_service_bfqq: this entity, finish %llu", ++ (((entity->finish>>10)*1000)>>10)>>2); ++ } ++#endif ++ ++ } ++ BUG_ON(!bfq_entity_to_bfqq(entity)); ++ ++ bfqd->next_in_service_queue = bfq_entity_to_bfqq(entity); +} + +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, @@ -9014,6 +9129,7 @@ index 000000000000..797bce75db01 + struct bfq_entity *entity = &bfqq->entity; + + bfq_deactivate_entity(entity, ins_into_idle_tree, expiration); ++ set_next_in_service_bfqq(bfqd); +} + +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -9028,6 +9144,7 @@ index 000000000000..797bce75db01 + bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq), + false); + bfq_clear_bfqq_non_blocking_wait_rq(bfqq); ++ set_next_in_service_bfqq(bfqd); +} + +static void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -9036,6 +9153,7 @@ index 000000000000..797bce75db01 + + bfq_activate_requeue_entity(entity, false, + bfqq == bfqd->in_service_queue); ++ set_next_in_service_bfqq(bfqd); +} + +static void bfqg_stats_update_dequeue(struct bfq_group *bfqg); @@ -9070,8 +9188,6 @@ index 000000000000..797bce75db01 + BUG_ON(bfqq->entity.budget < 0); + + bfq_deactivate_bfqq(bfqd, bfqq, true, expiration); -+ -+ BUG_ON(bfqq->entity.budget < 0); +} + +/* @@ -9099,12 +9215,12 @@ index 000000000000..797bce75db01 +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 -index 000000000000..bef8244cc03f +index 000000000000..67d56670e678 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,933 @@ +/* -+ * BFQ v8r7 for 4.9.0: data structures and common functions prototypes. ++ * BFQ v8r10-rc1 for 4.11.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe @@ -9114,7 +9230,7 @@ index 000000000000..bef8244cc03f + * + * Copyright (C) 2015 Paolo Valente + * -+ * Copyright (C) 2016 Paolo Valente ++ * Copyright (C) 2017 Paolo Valente + */ + +#ifndef _BFQ_H @@ -9122,8 +9238,6 @@ index 000000000000..bef8244cc03f + +#include +#include -+#include -+#include +#include + +#define BFQ_IOPRIO_CLASSES 3 @@ -9545,6 +9659,8 @@ index 000000000000..bef8244cc03f + + /* bfq_queue in service */ + struct bfq_queue *in_service_queue; ++ /* candidate bfq_queue to become the next in-service queue */ ++ struct bfq_queue *next_in_service_queue; + /* bfq_io_cq (bic) associated with the @in_service_queue */ + struct bfq_io_cq *in_service_bic; + -- cgit 1.4.1