From 6a97e4d55a55a82d517e1540376d5014e43004e3 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez <ruben@trisquel.info> Date: Fri, 9 Feb 2018 16:40:31 -0500 Subject: [PATCH] Updated linux-hwe for v4.13 --- ...onfig-build-bits-for-BFQ-v7r11-4.10..patch | 103 - ...e-the-BFQ-v7r11-I-O-sched-for-4.10.0.patch | 7109 ------------- ...rly-Queue-Merge-EQM-to-BFQ-v7r11-for.patch | 1101 -- ...for-4.10.0-into-BFQ-v8r11-for-4.10.0.patch | 9308 ----------------- .../linux-hwe/{deblob-4.10 => deblob-4.13} | 169 +- helpers/DATA/linux-hwe/deblob-check | 210 +- .../linux-hwe/silent-accept-firmware.patch | 862 +- .../linux-hwe/silent-accept-firmware.patch.1 | 990 ++ helpers/make-linux-hwe | 5 +- 9 files changed, 1769 insertions(+), 18088 deletions(-) delete mode 100644 helpers/DATA/linux-hwe/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.10..patch delete mode 100644 helpers/DATA/linux-hwe/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.10.0.patch delete mode 100644 helpers/DATA/linux-hwe/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch delete mode 100644 helpers/DATA/linux-hwe/0004-Turn-BFQ-v7r11-for-4.10.0-into-BFQ-v8r11-for-4.10.0.patch rename helpers/DATA/linux-hwe/{deblob-4.10 => deblob-4.13} (95%) create mode 100644 helpers/DATA/linux-hwe/silent-accept-firmware.patch.1 diff --git a/helpers/DATA/linux-hwe/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.10..patch b/helpers/DATA/linux-hwe/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.10..patch deleted file mode 100644 index 45f4fd2e..00000000 --- a/helpers/DATA/linux-hwe/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.10..patch +++ /dev/null @@ -1,103 +0,0 @@ -From 8500f47272575b4616beb487c483019248d8c501 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@unimore.it> -Date: Tue, 7 Apr 2015 13:39:12 +0200 -Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.10.0 - -Update Kconfig.iosched and do the related Makefile changes to include -kernel configuration options for BFQ. Also increase the number of -policies supported by the blkio controller so that BFQ can add its -own. - -Signed-off-by: Paolo Valente <paolo.valente@unimore.it> -Signed-off-by: Arianna Avanzini <avanzini@google.com> ---- - block/Kconfig.iosched | 32 ++++++++++++++++++++++++++++++++ - block/Makefile | 1 + - include/linux/blkdev.h | 2 +- - 3 files changed, 34 insertions(+), 1 deletion(-) - -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index 421bef9..0ee5f0f 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED - ---help--- - Enable group IO scheduling in CFQ. - -+config IOSCHED_BFQ -+ tristate "BFQ I/O scheduler" -+ default n -+ ---help--- -+ The BFQ I/O scheduler tries to distribute bandwidth among -+ all processes according to their weights. -+ It aims at distributing the bandwidth as desired, independently of -+ the disk parameters and with any workload. It also tries to -+ guarantee low latency to interactive and soft real-time -+ applications. If compiled built-in (saying Y here), BFQ can -+ be configured to support hierarchical scheduling. -+ -+config CGROUP_BFQIO -+ bool "BFQ hierarchical scheduling support" -+ depends on CGROUPS && IOSCHED_BFQ=y -+ default n -+ ---help--- -+ Enable hierarchical scheduling in BFQ, using the cgroups -+ filesystem interface. The name of the subsystem will be -+ bfqio. -+ - choice - prompt "Default I/O scheduler" - default DEFAULT_CFQ -@@ -52,6 +73,16 @@ choice - config DEFAULT_CFQ - bool "CFQ" if IOSCHED_CFQ=y - -+ config DEFAULT_BFQ -+ bool "BFQ" if IOSCHED_BFQ=y -+ help -+ Selects BFQ as the default I/O scheduler which will be -+ used by default for all block devices. -+ The BFQ I/O scheduler aims at distributing the bandwidth -+ as desired, independently of the disk parameters and with -+ any workload. It also tries to guarantee low latency to -+ interactive and soft real-time applications. -+ - config DEFAULT_NOOP - bool "No-op" - -@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED - string - default "deadline" if DEFAULT_DEADLINE - default "cfq" if DEFAULT_CFQ -+ default "bfq" if DEFAULT_BFQ - default "noop" if DEFAULT_NOOP - - endmenu -diff --git a/block/Makefile b/block/Makefile -index a827f98..3b14703 100644 ---- a/block/Makefile -+++ b/block/Makefile -@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o - obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o - obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o - obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o - - obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o - obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o -diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 1ca8e8f..8e2d6ed 100644 ---- a/include/linux/blkdev.h -+++ b/include/linux/blkdev.h -@@ -47,7 +47,7 @@ struct rq_wb; - * Maximum number of blkcg policies allowed to be registered concurrently. - * Defined here to simplify include dependency. - */ --#define BLKCG_MAX_POLS 2 -+#define BLKCG_MAX_POLS 3 - - typedef void (rq_end_io_fn)(struct request *, int); - --- -2.10.0 - diff --git a/helpers/DATA/linux-hwe/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.10.0.patch b/helpers/DATA/linux-hwe/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.10.0.patch deleted file mode 100644 index 0812a579..00000000 --- a/helpers/DATA/linux-hwe/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.10.0.patch +++ /dev/null @@ -1,7109 +0,0 @@ -From 2f56e91506b329ffc29d0f184924ad0123c9ba9e Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@unimore.it> -Date: Thu, 9 May 2013 19:10:02 +0200 -Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched for 4.10.0 - -The general structure is borrowed from CFQ, as much of the code for -handling I/O contexts. Over time, several useful features have been -ported from CFQ as well (details in the changelog in README.BFQ). A -(bfq_)queue is associated to each task doing I/O on a device, and each -time a scheduling decision has to be made a queue is selected and served -until it expires. - - - Slices are given in the service domain: tasks are assigned - budgets, measured in number of sectors. Once got the disk, a task - must however consume its assigned budget within a configurable - maximum time (by default, the maximum possible value of the - budgets is automatically computed to comply with this timeout). - This allows the desired latency vs "throughput boosting" tradeoff - to be set. - - - Budgets are scheduled according to a variant of WF2Q+, implemented - using an augmented rb-tree to take eligibility into account while - preserving an O(log N) overall complexity. - - - A low-latency tunable is provided; if enabled, both interactive - and soft real-time applications are guaranteed a very low latency. - - - Latency guarantees are preserved also in the presence of NCQ. - - - Also with flash-based devices, a high throughput is achieved - while still preserving latency guarantees. - - - BFQ features Early Queue Merge (EQM), a sort of fusion of the - cooperating-queue-merging and the preemption mechanisms present - in CFQ. EQM is in fact a unified mechanism that tries to get a - sequential read pattern, and hence a high throughput, with any - set of processes performing interleaved I/O over a contiguous - sequence of sectors. - - - BFQ supports full hierarchical scheduling, exporting a cgroups - interface. Since each node has a full scheduler, each group can - be assigned its own weight. - - - If the cgroups interface is not used, only I/O priorities can be - assigned to processes, with ioprio values mapped to weights - with the relation weight = IOPRIO_BE_NR - ioprio. - - - ioprio classes are served in strict priority order, i.e., lower - priority queues are not served as long as there are higher - priority queues. Among queues in the same class the bandwidth is - distributed in proportion to the weight of each queue. A very - thin extra bandwidth is however guaranteed to the Idle class, to - prevent it from starving. - -Signed-off-by: Paolo Valente <paolo.valente@unimore.it> -Signed-off-by: Arianna Avanzini <avanzini@google.com> ---- - block/Kconfig.iosched | 6 +- - block/bfq-cgroup.c | 1186 ++++++++++++++++ - block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 3763 +++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1199 ++++++++++++++++ - block/bfq.h | 801 +++++++++++ - 6 files changed, 6987 insertions(+), 4 deletions(-) - create mode 100644 block/bfq-cgroup.c - create mode 100644 block/bfq-ioc.c - create mode 100644 block/bfq-iosched.c - create mode 100644 block/bfq-sched.c - create mode 100644 block/bfq.h - -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index 0ee5f0f..f78cd1a 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -51,14 +51,12 @@ config IOSCHED_BFQ - applications. If compiled built-in (saying Y here), BFQ can - be configured to support hierarchical scheduling. - --config CGROUP_BFQIO -+config BFQ_GROUP_IOSCHED - bool "BFQ hierarchical scheduling support" - depends on CGROUPS && IOSCHED_BFQ=y - default n - ---help--- -- Enable hierarchical scheduling in BFQ, using the cgroups -- filesystem interface. The name of the subsystem will be -- bfqio. -+ Enable hierarchical scheduling in BFQ, using the blkio controller. - - choice - prompt "Default I/O scheduler" -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -new file mode 100644 -index 0000000..8b08a57 ---- /dev/null -+++ b/block/bfq-cgroup.c -@@ -0,0 +1,1186 @@ -+/* -+ * BFQ: CGROUPS support. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -+ * -+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> -+ * Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ */ -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ -+/* bfqg stats flags */ -+enum bfqg_stats_flags { -+ BFQG_stats_waiting = 0, -+ BFQG_stats_idling, -+ BFQG_stats_empty, -+}; -+ -+#define BFQG_FLAG_FNS(name) \ -+static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ -+{ \ -+ stats->flags |= (1 << BFQG_stats_##name); \ -+} \ -+static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ -+{ \ -+ stats->flags &= ~(1 << BFQG_stats_##name); \ -+} \ -+static int bfqg_stats_##name(struct bfqg_stats *stats) \ -+{ \ -+ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ -+} \ -+ -+BFQG_FLAG_FNS(waiting) -+BFQG_FLAG_FNS(idling) -+BFQG_FLAG_FNS(empty) -+#undef BFQG_FLAG_FNS -+ -+/* This should be called with the queue_lock held. */ -+static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) -+{ -+ unsigned long long now; -+ -+ if (!bfqg_stats_waiting(stats)) -+ return; -+ -+ now = sched_clock(); -+ if (time_after64(now, stats->start_group_wait_time)) -+ blkg_stat_add(&stats->group_wait_time, -+ now - stats->start_group_wait_time); -+ bfqg_stats_clear_waiting(stats); -+} -+ -+/* This should be called with the queue_lock held. */ -+static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, -+ struct bfq_group *curr_bfqg) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ -+ if (bfqg_stats_waiting(stats)) -+ return; -+ if (bfqg == curr_bfqg) -+ return; -+ stats->start_group_wait_time = sched_clock(); -+ bfqg_stats_mark_waiting(stats); -+} -+ -+/* This should be called with the queue_lock held. */ -+static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) -+{ -+ unsigned long long now; -+ -+ if (!bfqg_stats_empty(stats)) -+ return; -+ -+ now = sched_clock(); -+ if (time_after64(now, stats->start_empty_time)) -+ blkg_stat_add(&stats->empty_time, -+ now - stats->start_empty_time); -+ bfqg_stats_clear_empty(stats); -+} -+ -+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg) -+{ -+ blkg_stat_add(&bfqg->stats.dequeue, 1); -+} -+ -+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ -+ if (blkg_rwstat_total(&stats->queued)) -+ return; -+ -+ /* -+ * group is already marked empty. This can happen if bfqq got new -+ * request in parent group and moved to this group while being added -+ * to service tree. Just ignore the event and move on. -+ */ -+ if (bfqg_stats_empty(stats)) -+ return; -+ -+ stats->start_empty_time = sched_clock(); -+ bfqg_stats_mark_empty(stats); -+} -+ -+static void bfqg_stats_update_idle_time(struct bfq_group *bfqg) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ -+ if (bfqg_stats_idling(stats)) { -+ unsigned long long now = sched_clock(); -+ -+ if (time_after64(now, stats->start_idle_time)) -+ blkg_stat_add(&stats->idle_time, -+ now - stats->start_idle_time); -+ bfqg_stats_clear_idling(stats); -+ } -+} -+ -+static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ -+ stats->start_idle_time = sched_clock(); -+ bfqg_stats_mark_idling(stats); -+} -+ -+static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ -+ blkg_stat_add(&stats->avg_queue_size_sum, -+ blkg_rwstat_total(&stats->queued)); -+ blkg_stat_add(&stats->avg_queue_size_samples, 1); -+ bfqg_stats_update_group_wait_time(stats); -+} -+ -+static struct blkcg_policy blkcg_policy_bfq; -+ -+/* -+ * blk-cgroup policy-related handlers -+ * The following functions help in converting between blk-cgroup -+ * internal structures and BFQ-specific structures. -+ */ -+ -+static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) -+{ -+ return pd ? container_of(pd, struct bfq_group, pd) : NULL; -+} -+ -+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) -+{ -+ return pd_to_blkg(&bfqg->pd); -+} -+ -+static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) -+{ -+ struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq); -+ -+ BUG_ON(!pd); -+ -+ return pd_to_bfqg(pd); -+} -+ -+/* -+ * bfq_group handlers -+ * The following functions help in navigating the bfq_group hierarchy -+ * by allowing to find the parent of a bfq_group or the bfq_group -+ * associated to a bfq_queue. -+ */ -+ -+static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) -+{ -+ struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; -+ -+ return pblkg ? blkg_to_bfqg(pblkg) : NULL; -+} -+ -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *group_entity = bfqq->entity.parent; -+ -+ return group_entity ? container_of(group_entity, struct bfq_group, -+ entity) : -+ bfqq->bfqd->root_group; -+} -+ -+/* -+ * The following two functions handle get and put of a bfq_group by -+ * wrapping the related blk-cgroup hooks. -+ */ -+ -+static void bfqg_get(struct bfq_group *bfqg) -+{ -+ return blkg_get(bfqg_to_blkg(bfqg)); -+} -+ -+static void bfqg_put(struct bfq_group *bfqg) -+{ -+ return blkg_put(bfqg_to_blkg(bfqg)); -+} -+ -+static void bfqg_stats_update_io_add(struct bfq_group *bfqg, -+ struct bfq_queue *bfqq, -+ int rw) -+{ -+ blkg_rwstat_add(&bfqg->stats.queued, rw, 1); -+ bfqg_stats_end_empty_time(&bfqg->stats); -+ if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) -+ bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); -+} -+ -+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) -+{ -+ blkg_rwstat_add(&bfqg->stats.queued, rw, -1); -+} -+ -+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) -+{ -+ blkg_rwstat_add(&bfqg->stats.merged, rw, 1); -+} -+ -+static void bfqg_stats_update_dispatch(struct bfq_group *bfqg, -+ uint64_t bytes, int rw) -+{ -+ blkg_stat_add(&bfqg->stats.sectors, bytes >> 9); -+ blkg_rwstat_add(&bfqg->stats.serviced, rw, 1); -+ blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes); -+} -+ -+static void bfqg_stats_update_completion(struct bfq_group *bfqg, -+ uint64_t start_time, uint64_t io_start_time, int rw) -+{ -+ struct bfqg_stats *stats = &bfqg->stats; -+ unsigned long long now = sched_clock(); -+ -+ if (time_after64(now, io_start_time)) -+ blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); -+ if (time_after64(io_start_time, start_time)) -+ blkg_rwstat_add(&stats->wait_time, rw, -+ io_start_time - start_time); -+} -+ -+/* @stats = 0 */ -+static void bfqg_stats_reset(struct bfqg_stats *stats) -+{ -+ if (!stats) -+ return; -+ -+ /* queued stats shouldn't be cleared */ -+ blkg_rwstat_reset(&stats->service_bytes); -+ blkg_rwstat_reset(&stats->serviced); -+ blkg_rwstat_reset(&stats->merged); -+ blkg_rwstat_reset(&stats->service_time); -+ blkg_rwstat_reset(&stats->wait_time); -+ blkg_stat_reset(&stats->time); -+ blkg_stat_reset(&stats->unaccounted_time); -+ blkg_stat_reset(&stats->avg_queue_size_sum); -+ blkg_stat_reset(&stats->avg_queue_size_samples); -+ blkg_stat_reset(&stats->dequeue); -+ blkg_stat_reset(&stats->group_wait_time); -+ blkg_stat_reset(&stats->idle_time); -+ blkg_stat_reset(&stats->empty_time); -+} -+ -+/* @to += @from */ -+static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from) -+{ -+ if (!to || !from) -+ return; -+ -+ /* queued stats shouldn't be cleared */ -+ blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes); -+ blkg_rwstat_add_aux(&to->serviced, &from->serviced); -+ blkg_rwstat_add_aux(&to->merged, &from->merged); -+ blkg_rwstat_add_aux(&to->service_time, &from->service_time); -+ blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); -+ blkg_stat_add_aux(&from->time, &from->time); -+ blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time); -+ blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); -+ blkg_stat_add_aux(&to->avg_queue_size_samples, -+ &from->avg_queue_size_samples); -+ blkg_stat_add_aux(&to->dequeue, &from->dequeue); -+ blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); -+ blkg_stat_add_aux(&to->idle_time, &from->idle_time); -+ blkg_stat_add_aux(&to->empty_time, &from->empty_time); -+} -+ -+/* -+ * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors' -+ * recursive stats can still account for the amount used by this bfqg after -+ * it's gone. -+ */ -+static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) -+{ -+ struct bfq_group *parent; -+ -+ if (!bfqg) /* root_group */ -+ return; -+ -+ parent = bfqg_parent(bfqg); -+ -+ lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock); -+ -+ if (unlikely(!parent)) -+ return; -+ -+ bfqg_stats_merge(&parent->dead_stats, &bfqg->stats); -+ bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats); -+ bfqg_stats_reset(&bfqg->stats); -+ bfqg_stats_reset(&bfqg->dead_stats); -+} -+ -+static void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ if (bfqq) { -+ bfqq->ioprio = bfqq->new_ioprio; -+ bfqq->ioprio_class = bfqq->new_ioprio_class; -+ bfqg_get(bfqg); -+ } -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static void bfqg_stats_exit(struct bfqg_stats *stats) -+{ -+ blkg_rwstat_exit(&stats->service_bytes); -+ blkg_rwstat_exit(&stats->serviced); -+ blkg_rwstat_exit(&stats->merged); -+ blkg_rwstat_exit(&stats->service_time); -+ blkg_rwstat_exit(&stats->wait_time); -+ blkg_rwstat_exit(&stats->queued); -+ blkg_stat_exit(&stats->sectors); -+ blkg_stat_exit(&stats->time); -+ blkg_stat_exit(&stats->unaccounted_time); -+ blkg_stat_exit(&stats->avg_queue_size_sum); -+ blkg_stat_exit(&stats->avg_queue_size_samples); -+ blkg_stat_exit(&stats->dequeue); -+ blkg_stat_exit(&stats->group_wait_time); -+ blkg_stat_exit(&stats->idle_time); -+ blkg_stat_exit(&stats->empty_time); -+} -+ -+static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) -+{ -+ if (blkg_rwstat_init(&stats->service_bytes, gfp) || -+ blkg_rwstat_init(&stats->serviced, gfp) || -+ blkg_rwstat_init(&stats->merged, gfp) || -+ blkg_rwstat_init(&stats->service_time, gfp) || -+ blkg_rwstat_init(&stats->wait_time, gfp) || -+ blkg_rwstat_init(&stats->queued, gfp) || -+ blkg_stat_init(&stats->sectors, gfp) || -+ blkg_stat_init(&stats->time, gfp) || -+ blkg_stat_init(&stats->unaccounted_time, gfp) || -+ blkg_stat_init(&stats->avg_queue_size_sum, gfp) || -+ blkg_stat_init(&stats->avg_queue_size_samples, gfp) || -+ blkg_stat_init(&stats->dequeue, gfp) || -+ blkg_stat_init(&stats->group_wait_time, gfp) || -+ blkg_stat_init(&stats->idle_time, gfp) || -+ blkg_stat_init(&stats->empty_time, gfp)) { -+ bfqg_stats_exit(stats); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) -+{ -+ return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; -+} -+ -+static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) -+{ -+ return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); -+} -+ -+static void bfq_cpd_init(struct blkcg_policy_data *cpd) -+{ -+ struct bfq_group_data *d = cpd_to_bfqgd(cpd); -+ -+ d->weight = BFQ_DEFAULT_GRP_WEIGHT; -+} -+ -+static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) -+{ -+ struct bfq_group *bfqg; -+ -+ bfqg = kzalloc_node(sizeof(*bfqg), gfp, node); -+ if (!bfqg) -+ return NULL; -+ -+ if (bfqg_stats_init(&bfqg->stats, gfp) || -+ bfqg_stats_init(&bfqg->dead_stats, gfp)) { -+ kfree(bfqg); -+ return NULL; -+ } -+ -+ return &bfqg->pd; -+} -+ -+static void bfq_group_set_parent(struct bfq_group *bfqg, -+ struct bfq_group *parent) -+{ -+ struct bfq_entity *entity; -+ -+ BUG_ON(!parent); -+ BUG_ON(!bfqg); -+ BUG_ON(bfqg == parent); -+ -+ entity = &bfqg->entity; -+ entity->parent = parent->my_entity; -+ entity->sched_data = &parent->sched_data; -+} -+ -+static void bfq_pd_init(struct blkg_policy_data *pd) -+{ -+ struct blkcg_gq *blkg = pd_to_blkg(pd); -+ struct bfq_group *bfqg = blkg_to_bfqg(blkg); -+ struct bfq_data *bfqd = blkg->q->elevator->elevator_data; -+ struct bfq_entity *entity = &bfqg->entity; -+ struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); -+ -+ entity->orig_weight = entity->weight = entity->new_weight = d->weight; -+ entity->my_sched_data = &bfqg->sched_data; -+ bfqg->my_entity = entity; /* -+ * the root_group's will be set to NULL -+ * in bfq_init_queue() -+ */ -+ bfqg->bfqd = bfqd; -+ bfqg->active_entities = 0; -+} -+ -+static void bfq_pd_free(struct blkg_policy_data *pd) -+{ -+ struct bfq_group *bfqg = pd_to_bfqg(pd); -+ -+ bfqg_stats_exit(&bfqg->stats); -+ bfqg_stats_exit(&bfqg->dead_stats); -+ -+ return kfree(bfqg); -+} -+ -+/* offset delta from bfqg->stats to bfqg->dead_stats */ -+static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) - -+ offsetof(struct bfq_group, stats); -+ -+/* to be used by recursive prfill, sums live and dead stats recursively */ -+static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) -+{ -+ u64 sum = 0; -+ -+ sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); -+ sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, -+ off + dead_stats_off_delta); -+ return sum; -+} -+ -+/* to be used by recursive prfill, sums live and dead rwstats recursively */ -+static struct blkg_rwstat -+bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, int off) -+{ -+ struct blkg_rwstat a, b; -+ -+ a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); -+ b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, -+ off + dead_stats_off_delta); -+ blkg_rwstat_add_aux(&a, &b); -+ return a; -+} -+ -+static void bfq_pd_reset_stats(struct blkg_policy_data *pd) -+{ -+ struct bfq_group *bfqg = pd_to_bfqg(pd); -+ -+ bfqg_stats_reset(&bfqg->stats); -+ bfqg_stats_reset(&bfqg->dead_stats); -+} -+ -+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) -+{ -+ struct request_queue *q = bfqd->queue; -+ struct bfq_group *bfqg = NULL, *parent; -+ struct bfq_entity *entity = NULL; -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ -+ /* avoid lookup for the common case where there's no blkcg */ -+ if (blkcg == &blkcg_root) { -+ bfqg = bfqd->root_group; -+ } else { -+ struct blkcg_gq *blkg; -+ -+ blkg = blkg_lookup_create(blkcg, q); -+ if (!IS_ERR(blkg)) -+ bfqg = blkg_to_bfqg(blkg); -+ else /* fallback to root_group */ -+ bfqg = bfqd->root_group; -+ } -+ -+ BUG_ON(!bfqg); -+ -+ /* -+ * Update chain of bfq_groups as we might be handling a leaf group -+ * which, along with some of its relatives, has not been hooked yet -+ * to the private hierarchy of BFQ. -+ */ -+ entity = &bfqg->entity; -+ for_each_entity(entity) { -+ bfqg = container_of(entity, struct bfq_group, entity); -+ BUG_ON(!bfqg); -+ if (bfqg != bfqd->root_group) { -+ parent = bfqg_parent(bfqg); -+ if (!parent) -+ parent = bfqd->root_group; -+ BUG_ON(!parent); -+ bfq_group_set_parent(bfqg, parent); -+ } -+ } -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_bfqq_move - migrate @bfqq to @bfqg. -+ * @bfqd: queue descriptor. -+ * @bfqq: the queue to move. -+ * @entity: @bfqq's entity. -+ * @bfqg: the group to move to. -+ * -+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating -+ * it on the new one. Avoid putting the entity on the old group idle tree. -+ * -+ * Must be called under the queue lock; the cgroup owning @bfqg must -+ * not disappear (by now this just means that we are called under -+ * rcu_read_lock()). -+ */ -+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct bfq_entity *entity, struct bfq_group *bfqg) -+{ -+ int busy, resume; -+ -+ busy = bfq_bfqq_busy(bfqq); -+ resume = !RB_EMPTY_ROOT(&bfqq->sort_list); -+ -+ BUG_ON(resume && !entity->on_st); -+ BUG_ON(busy && !resume && entity->on_st && -+ bfqq != bfqd->in_service_queue); -+ -+ if (busy) { -+ BUG_ON(atomic_read(&bfqq->ref) < 2); -+ -+ if (!resume) -+ bfq_del_bfqq_busy(bfqd, bfqq, 0); -+ else -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ } else if (entity->on_st) -+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); -+ bfqg_put(bfqq_group(bfqq)); -+ -+ /* -+ * Here we use a reference to bfqg. We don't need a refcounter -+ * as the cgroup reference will not be dropped, so that its -+ * destroy() callback will not be invoked. -+ */ -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+ bfqg_get(bfqg); -+ -+ if (busy) { -+ if (resume) -+ bfq_activate_bfqq(bfqd, bfqq); -+ } -+ -+ if (!bfqd->in_service_queue && !bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+} -+ -+/** -+ * __bfq_bic_change_cgroup - move @bic to @cgroup. -+ * @bfqd: the queue descriptor. -+ * @bic: the bic to move. -+ * @blkcg: the blk-cgroup to move to. -+ * -+ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller -+ * has to make sure that the reference to cgroup is valid across the call. -+ * -+ * NOTE: an alternative approach might have been to store the current -+ * cgroup in bfqq and getting a reference to it, reducing the lookup -+ * time here, at the price of slightly more complex code. -+ */ -+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic, -+ struct blkcg *blkcg) -+{ -+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); -+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); -+ struct bfq_group *bfqg; -+ struct bfq_entity *entity; -+ -+ lockdep_assert_held(bfqd->queue->queue_lock); -+ -+ bfqg = bfq_find_alloc_group(bfqd, blkcg); -+ if (async_bfqq) { -+ entity = &async_bfqq->entity; -+ -+ if (entity->sched_data != &bfqg->sched_data) { -+ bic_set_bfqq(bic, NULL, 0); -+ bfq_log_bfqq(bfqd, async_bfqq, -+ "bic_change_group: %p %d", -+ async_bfqq, atomic_read(&async_bfqq->ref)); -+ bfq_put_queue(async_bfqq); -+ } -+ } -+ -+ if (sync_bfqq) { -+ entity = &sync_bfqq->entity; -+ if (entity->sched_data != &bfqg->sched_data) -+ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); -+ } -+ -+ return bfqg; -+} -+ -+static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ struct blkcg *blkcg; -+ struct bfq_group *bfqg = NULL; -+ uint64_t id; -+ -+ rcu_read_lock(); -+ blkcg = bio_blkcg(bio); -+ id = blkcg->css.serial_nr; -+ rcu_read_unlock(); -+ -+ /* -+ * Check whether blkcg has changed. The condition may trigger -+ * spuriously on a newly created cic but there's no harm. -+ */ -+ if (unlikely(!bfqd) || likely(bic->blkcg_id == id)) -+ return; -+ -+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg); -+ BUG_ON(!bfqg); -+ bic->blkcg_id = id; -+} -+ -+/** -+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. -+ * @st: the service tree being flushed. -+ */ -+static void bfq_flush_idle_tree(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entity = st->first_idle; -+ -+ for (; entity ; entity = st->first_idle) -+ __bfq_deactivate_entity(entity, 0); -+} -+ -+/** -+ * bfq_reparent_leaf_entity - move leaf entity to the root_group. -+ * @bfqd: the device data structure with the root group. -+ * @entity: the entity to move. -+ */ -+static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(!bfqq); -+ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -+} -+ -+/** -+ * bfq_reparent_active_entities - move to the root group all active -+ * entities. -+ * @bfqd: the device data structure with the root group. -+ * @bfqg: the group to move from. -+ * @st: the service tree with the entities. -+ * -+ * Needs queue_lock to be taken and reference to be valid over the call. -+ */ -+static void bfq_reparent_active_entities(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ struct bfq_service_tree *st) -+{ -+ struct rb_root *active = &st->active; -+ struct bfq_entity *entity = NULL; -+ -+ if (!RB_EMPTY_ROOT(&st->active)) -+ entity = bfq_entity_of(rb_first(active)); -+ -+ for (; entity ; entity = bfq_entity_of(rb_first(active))) -+ bfq_reparent_leaf_entity(bfqd, entity); -+ -+ if (bfqg->sched_data.in_service_entity) -+ bfq_reparent_leaf_entity(bfqd, -+ bfqg->sched_data.in_service_entity); -+} -+ -+/** -+ * bfq_destroy_group - destroy @bfqg. -+ * @bfqg: the group being destroyed. -+ * -+ * Destroy @bfqg, making sure that it is not referenced from its parent. -+ * blkio already grabs the queue_lock for us, so no need to use RCU-based magic -+ */ -+static void bfq_pd_offline(struct blkg_policy_data *pd) -+{ -+ struct bfq_service_tree *st; -+ struct bfq_group *bfqg; -+ struct bfq_data *bfqd; -+ struct bfq_entity *entity; -+ int i; -+ -+ BUG_ON(!pd); -+ bfqg = pd_to_bfqg(pd); -+ BUG_ON(!bfqg); -+ bfqd = bfqg->bfqd; -+ BUG_ON(bfqd && !bfqd->root_group); -+ -+ entity = bfqg->my_entity; -+ -+ if (!entity) /* root group */ -+ return; -+ -+ /* -+ * Empty all service_trees belonging to this group before -+ * deactivating the group itself. -+ */ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { -+ BUG_ON(!bfqg->sched_data.service_tree); -+ st = bfqg->sched_data.service_tree + i; -+ /* -+ * The idle tree may still contain bfq_queues belonging -+ * to exited task because they never migrated to a different -+ * cgroup from the one being destroyed now. No one else -+ * can access them so it's safe to act without any lock. -+ */ -+ bfq_flush_idle_tree(st); -+ -+ /* -+ * It may happen that some queues are still active -+ * (busy) upon group destruction (if the corresponding -+ * processes have been forced to terminate). We move -+ * all the leaf entities corresponding to these queues -+ * to the root_group. -+ * Also, it may happen that the group has an entity -+ * in service, which is disconnected from the active -+ * tree: it must be moved, too. -+ * There is no need to put the sync queues, as the -+ * scheduler has taken no reference. -+ */ -+ bfq_reparent_active_entities(bfqd, bfqg, st); -+ BUG_ON(!RB_EMPTY_ROOT(&st->active)); -+ BUG_ON(!RB_EMPTY_ROOT(&st->idle)); -+ } -+ BUG_ON(bfqg->sched_data.next_in_service); -+ BUG_ON(bfqg->sched_data.in_service_entity); -+ -+ __bfq_deactivate_entity(entity, 0); -+ bfq_put_async_queues(bfqd, bfqg); -+ BUG_ON(entity->tree); -+ -+ bfqg_stats_xfer_dead(bfqg); -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ struct blkcg_gq *blkg; -+ -+ list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { -+ struct bfq_group *bfqg = blkg_to_bfqg(blkg); -+ -+ bfq_end_wr_async_queues(bfqd, bfqg); -+ } -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css, -+ struct cftype *cftype) -+{ -+ struct blkcg *blkcg = css_to_blkcg(css); -+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); -+ int ret = -EINVAL; -+ -+ spin_lock_irq(&blkcg->lock); -+ ret = bfqgd->weight; -+ spin_unlock_irq(&blkcg->lock); -+ -+ return ret; -+} -+ -+static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v) -+{ -+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); -+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); -+ -+ spin_lock_irq(&blkcg->lock); -+ seq_printf(sf, "%u\n", bfqgd->weight); -+ spin_unlock_irq(&blkcg->lock); -+ -+ return 0; -+} -+ -+static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, -+ struct cftype *cftype, -+ u64 val) -+{ -+ struct blkcg *blkcg = css_to_blkcg(css); -+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); -+ struct blkcg_gq *blkg; -+ int ret = -EINVAL; -+ -+ if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) -+ return ret; -+ -+ ret = 0; -+ spin_lock_irq(&blkcg->lock); -+ bfqgd->weight = (unsigned short)val; -+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { -+ struct bfq_group *bfqg = blkg_to_bfqg(blkg); -+ -+ if (!bfqg) -+ continue; -+ /* -+ * Setting the prio_changed flag of the entity -+ * to 1 with new_weight == weight would re-set -+ * the value of the weight to its ioprio mapping. -+ * Set the flag only if necessary. -+ */ -+ if ((unsigned short)val != bfqg->entity.new_weight) { -+ bfqg->entity.new_weight = (unsigned short)val; -+ /* -+ * Make sure that the above new value has been -+ * stored in bfqg->entity.new_weight before -+ * setting the prio_changed flag. In fact, -+ * this flag may be read asynchronously (in -+ * critical sections protected by a different -+ * lock than that held here), and finding this -+ * flag set may cause the execution of the code -+ * for updating parameters whose value may -+ * depend also on bfqg->entity.new_weight (in -+ * __bfq_entity_update_weight_prio). -+ * This barrier makes sure that the new value -+ * of bfqg->entity.new_weight is correctly -+ * seen in that code. -+ */ -+ smp_wmb(); -+ bfqg->entity.prio_changed = 1; -+ } -+ } -+ spin_unlock_irq(&blkcg->lock); -+ -+ return ret; -+} -+ -+static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, -+ loff_t off) -+{ -+ /* First unsigned long found in the file is used */ -+ return bfqio_cgroup_weight_write(of_css(of), NULL, -+ simple_strtoull(strim(buf), NULL, 0)); -+} -+ -+static int bfqg_print_stat(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, -+ &blkcg_policy_bfq, seq_cft(sf)->private, false); -+ return 0; -+} -+ -+static int bfqg_print_rwstat(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, -+ &blkcg_policy_bfq, seq_cft(sf)->private, true); -+ return 0; -+} -+ -+static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, -+ struct blkg_policy_data *pd, int off) -+{ -+ u64 sum = bfqg_stat_pd_recursive_sum(pd, off); -+ -+ return __blkg_prfill_u64(sf, pd, sum); -+} -+ -+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, -+ struct blkg_policy_data *pd, int off) -+{ -+ struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off); -+ -+ return __blkg_prfill_rwstat(sf, pd, &sum); -+} -+ -+static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), -+ bfqg_prfill_stat_recursive, &blkcg_policy_bfq, -+ seq_cft(sf)->private, false); -+ return 0; -+} -+ -+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), -+ bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, -+ seq_cft(sf)->private, true); -+ return 0; -+} -+ -+static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, -+ struct blkg_policy_data *pd, int off) -+{ -+ struct bfq_group *bfqg = pd_to_bfqg(pd); -+ u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples); -+ u64 v = 0; -+ -+ if (samples) { -+ v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum); -+ v = div64_u64(v, samples); -+ } -+ __blkg_prfill_u64(sf, pd, v); -+ return 0; -+} -+ -+/* print avg_queue_size */ -+static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), -+ bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, -+ 0, false); -+ return 0; -+} -+ -+static struct bfq_group * -+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) -+{ -+ int ret; -+ -+ ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq); -+ if (ret) -+ return NULL; -+ -+ return blkg_to_bfqg(bfqd->queue->root_blkg); -+} -+ -+static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) -+{ -+ struct bfq_group_data *bgd; -+ -+ bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); -+ if (!bgd) -+ return NULL; -+ return &bgd->pd; -+} -+ -+static void bfq_cpd_free(struct blkcg_policy_data *cpd) -+{ -+ kfree(cpd_to_bfqgd(cpd)); -+} -+ -+static struct cftype bfqio_files_dfl[] = { -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = bfqio_cgroup_weight_read_dfl, -+ .write = bfqio_cgroup_weight_write_dfl, -+ }, -+ {} /* terminate */ -+}; -+ -+static struct cftype bfqio_files[] = { -+ { -+ .name = "bfq.weight", -+ .read_u64 = bfqio_cgroup_weight_read, -+ .write_u64 = bfqio_cgroup_weight_write, -+ }, -+ /* statistics, cover only the tasks in the bfqg */ -+ { -+ .name = "bfq.time", -+ .private = offsetof(struct bfq_group, stats.time), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.sectors", -+ .private = offsetof(struct bfq_group, stats.sectors), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.io_service_bytes", -+ .private = offsetof(struct bfq_group, stats.service_bytes), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ { -+ .name = "bfq.io_serviced", -+ .private = offsetof(struct bfq_group, stats.serviced), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ { -+ .name = "bfq.io_service_time", -+ .private = offsetof(struct bfq_group, stats.service_time), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ { -+ .name = "bfq.io_wait_time", -+ .private = offsetof(struct bfq_group, stats.wait_time), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ { -+ .name = "bfq.io_merged", -+ .private = offsetof(struct bfq_group, stats.merged), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ { -+ .name = "bfq.io_queued", -+ .private = offsetof(struct bfq_group, stats.queued), -+ .seq_show = bfqg_print_rwstat, -+ }, -+ -+ /* the same statictics which cover the bfqg and its descendants */ -+ { -+ .name = "bfq.time_recursive", -+ .private = offsetof(struct bfq_group, stats.time), -+ .seq_show = bfqg_print_stat_recursive, -+ }, -+ { -+ .name = "bfq.sectors_recursive", -+ .private = offsetof(struct bfq_group, stats.sectors), -+ .seq_show = bfqg_print_stat_recursive, -+ }, -+ { -+ .name = "bfq.io_service_bytes_recursive", -+ .private = offsetof(struct bfq_group, stats.service_bytes), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.io_serviced_recursive", -+ .private = offsetof(struct bfq_group, stats.serviced), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.io_service_time_recursive", -+ .private = offsetof(struct bfq_group, stats.service_time), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.io_wait_time_recursive", -+ .private = offsetof(struct bfq_group, stats.wait_time), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.io_merged_recursive", -+ .private = offsetof(struct bfq_group, stats.merged), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.io_queued_recursive", -+ .private = offsetof(struct bfq_group, stats.queued), -+ .seq_show = bfqg_print_rwstat_recursive, -+ }, -+ { -+ .name = "bfq.avg_queue_size", -+ .seq_show = bfqg_print_avg_queue_size, -+ }, -+ { -+ .name = "bfq.group_wait_time", -+ .private = offsetof(struct bfq_group, stats.group_wait_time), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.idle_time", -+ .private = offsetof(struct bfq_group, stats.idle_time), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.empty_time", -+ .private = offsetof(struct bfq_group, stats.empty_time), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.dequeue", -+ .private = offsetof(struct bfq_group, stats.dequeue), -+ .seq_show = bfqg_print_stat, -+ }, -+ { -+ .name = "bfq.unaccounted_time", -+ .private = offsetof(struct bfq_group, stats.unaccounted_time), -+ .seq_show = bfqg_print_stat, -+ }, -+ { } /* terminate */ -+}; -+ -+static struct blkcg_policy blkcg_policy_bfq = { -+ .dfl_cftypes = bfqio_files_dfl, -+ .legacy_cftypes = bfqio_files, -+ -+ .pd_alloc_fn = bfq_pd_alloc, -+ .pd_init_fn = bfq_pd_init, -+ .pd_offline_fn = bfq_pd_offline, -+ .pd_free_fn = bfq_pd_free, -+ .pd_reset_stats_fn = bfq_pd_reset_stats, -+ -+ .cpd_alloc_fn = bfq_cpd_alloc, -+ .cpd_init_fn = bfq_cpd_init, -+ .cpd_bind_fn = bfq_cpd_init, -+ .cpd_free_fn = bfq_cpd_free, -+}; -+ -+#else -+ -+static void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ if (bfqq) { -+ bfqq->ioprio = bfqq->new_ioprio; -+ bfqq->ioprio_class = bfqq->new_ioprio_class; -+ } -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static struct bfq_group * -+bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ -+ return bfqd->root_group; -+} -+ -+static void bfq_bfqq_move(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+static void bfq_disconnect_groups(struct bfq_data *bfqd) -+{ -+ bfq_put_async_queues(bfqd, bfqd->root_group); -+} -+ -+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) -+{ -+ return bfqd->root_group; -+} -+ -+static struct bfq_group * -+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) -+{ -+ struct bfq_group *bfqg; -+ int i; -+ -+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); -+ if (!bfqg) -+ return NULL; -+ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ -+ return bfqg; -+} -+#endif -diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c -new file mode 100644 -index 0000000..fb7bb8f ---- /dev/null -+++ b/block/bfq-ioc.c -@@ -0,0 +1,36 @@ -+/* -+ * BFQ: I/O context handling. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -+ * -+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> -+ * Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ */ -+ -+/** -+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq. -+ * @icq: the iocontext queue. -+ */ -+static struct bfq_io_cq *icq_to_bic(struct io_cq *icq) -+{ -+ /* bic->icq is the first member, %NULL will convert to %NULL */ -+ return container_of(icq, struct bfq_io_cq, icq); -+} -+ -+/** -+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd. -+ * @bfqd: the lookup key. -+ * @ioc: the io_context of the process doing I/O. -+ * -+ * Queue lock must be held. -+ */ -+static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, -+ struct io_context *ioc) -+{ -+ if (ioc) -+ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue)); -+ return NULL; -+} -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -new file mode 100644 -index 0000000..85e2169 ---- /dev/null -+++ b/block/bfq-iosched.c -@@ -0,0 +1,3763 @@ -+/* -+ * Budget Fair Queueing (BFQ) disk scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -+ * -+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> -+ * Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ * -+ * BFQ is a proportional-share storage-I/O scheduling algorithm based on -+ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, -+ * measured in number of sectors, to processes instead of time slices. The -+ * device is not granted to the in-service process for a given time slice, -+ * but until it has exhausted its assigned budget. This change from the time -+ * to the service domain allows BFQ to distribute the device throughput -+ * among processes as desired, without any distortion due to ZBR, workload -+ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, -+ * called B-WF2Q+, to schedule processes according to their budgets. More -+ * precisely, BFQ schedules queues associated to processes. Thanks to the -+ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to -+ * I/O-bound processes issuing sequential requests (to boost the -+ * throughput), and yet guarantee a low latency to interactive and soft -+ * real-time applications. -+ * -+ * BFQ is described in [1], where also a reference to the initial, more -+ * theoretical paper on BFQ can be found. The interested reader can find -+ * in the latter paper full details on the main algorithm, as well as -+ * formulas of the guarantees and formal proofs of all the properties. -+ * With respect to the version of BFQ presented in these papers, this -+ * implementation adds a few more heuristics, such as the one that -+ * guarantees a low latency to soft real-time applications, and a -+ * hierarchical extension based on H-WF2Q+. -+ * -+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with -+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) -+ * complexity derives from the one introduced with EEVDF in [3]. -+ * -+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness -+ * with the BFQ Disk I/O Scheduler'', -+ * Proceedings of the 5th Annual International Systems and Storage -+ * Conference (SYSTOR '12), June 2012. -+ * -+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf -+ * -+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing -+ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, -+ * Oct 1997. -+ * -+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz -+ * -+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline -+ * First: A Flexible and Accurate Mechanism for Proportional Share -+ * Resource Allocation,'' technical report. -+ * -+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf -+ */ -+#include <linux/module.h> -+#include <linux/slab.h> -+#include <linux/blkdev.h> -+#include <linux/cgroup.h> -+#include <linux/elevator.h> -+#include <linux/jiffies.h> -+#include <linux/rbtree.h> -+#include <linux/ioprio.h> -+#include "bfq.h" -+#include "blk.h" -+ -+/* Expiration time of sync (0) and async (1) requests, in jiffies. */ -+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -+ -+/* Maximum backwards seek, in KiB. */ -+static const int bfq_back_max = 16 * 1024; -+ -+/* Penalty of a backwards seek, in number of sectors. */ -+static const int bfq_back_penalty = 2; -+ -+/* Idling period duration, in jiffies. */ -+static int bfq_slice_idle = HZ / 125; -+ -+/* Minimum number of assigned budgets for which stats are safe to compute. */ -+static const int bfq_stats_min_budgets = 194; -+ -+/* Default maximum budget values, in sectors and number of requests. */ -+static const int bfq_default_max_budget = 16 * 1024; -+static const int bfq_max_budget_async_rq = 4; -+ -+/* -+ * Async to sync throughput distribution is controlled as follows: -+ * when an async request is served, the entity is charged the number -+ * of sectors of the request, multiplied by the factor below -+ */ -+static const int bfq_async_charge_factor = 10; -+ -+/* Default timeout values, in jiffies, approximating CFQ defaults. */ -+static const int bfq_timeout_sync = HZ / 8; -+static int bfq_timeout_async = HZ / 25; -+ -+struct kmem_cache *bfq_pool; -+ -+/* Below this threshold (in ms), we consider thinktime immediate. */ -+#define BFQ_MIN_TT 2 -+ -+/* hw_tag detection: parallel requests threshold and min samples needed. */ -+#define BFQ_HW_QUEUE_THRESHOLD 4 -+#define BFQ_HW_QUEUE_SAMPLES 32 -+ -+#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) -+ -+/* Min samples used for peak rate estimation (for autotuning). */ -+#define BFQ_PEAK_RATE_SAMPLES 32 -+ -+/* Shift used for peak rate fixed precision calculations. */ -+#define BFQ_RATE_SHIFT 16 -+ -+/* -+ * By default, BFQ computes the duration of the weight raising for -+ * interactive applications automatically, using the following formula: -+ * duration = (R / r) * T, where r is the peak rate of the device, and -+ * R and T are two reference parameters. -+ * In particular, R is the peak rate of the reference device (see below), -+ * and T is a reference time: given the systems that are likely to be -+ * installed on the reference device according to its speed class, T is -+ * about the maximum time needed, under BFQ and while reading two files in -+ * parallel, to load typical large applications on these systems. -+ * In practice, the slower/faster the device at hand is, the more/less it -+ * takes to load applications with respect to the reference device. -+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive -+ * applications. -+ * -+ * BFQ uses four different reference pairs (R, T), depending on: -+ * . whether the device is rotational or non-rotational; -+ * . whether the device is slow, such as old or portable HDDs, as well as -+ * SD cards, or fast, such as newer HDDs and SSDs. -+ * -+ * The device's speed class is dynamically (re)detected in -+ * bfq_update_peak_rate() every time the estimated peak rate is updated. -+ * -+ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] -+ * are the reference values for a slow/fast rotational device, whereas -+ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for -+ * a slow/fast non-rotational device. Finally, device_speed_thresh are the -+ * thresholds used to switch between speed classes. -+ * Both the reference peak rates and the thresholds are measured in -+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT. -+ */ -+static int R_slow[2] = {1536, 10752}; -+static int R_fast[2] = {17415, 34791}; -+/* -+ * To improve readability, a conversion function is used to initialize the -+ * following arrays, which entails that they can be initialized only in a -+ * function. -+ */ -+static int T_slow[2]; -+static int T_fast[2]; -+static int device_speed_thresh[2]; -+ -+#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ -+ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) -+ -+#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) -+#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) -+ -+static void bfq_schedule_dispatch(struct bfq_data *bfqd); -+ -+#include "bfq-ioc.c" -+#include "bfq-sched.c" -+#include "bfq-cgroup.c" -+ -+#define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE) -+#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT) -+ -+#define bfq_sample_valid(samples) ((samples) > 80) -+ -+/* -+ * We regard a request as SYNC, if either it's a read or has the SYNC bit -+ * set (in which case it could also be a direct WRITE). -+ */ -+static int bfq_bio_sync(struct bio *bio) -+{ -+ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * Scheduler run of queue, if there are requests pending and no one in the -+ * driver that will restart queueing. -+ */ -+static void bfq_schedule_dispatch(struct bfq_data *bfqd) -+{ -+ if (bfqd->queued != 0) { -+ bfq_log(bfqd, "schedule dispatch"); -+ kblockd_schedule_work(&bfqd->unplug_work); -+ } -+} -+ -+/* -+ * Lifted from AS - choose which of rq1 and rq2 that is best served now. -+ * We choose the request that is closesr to the head right now. Distance -+ * behind the head is penalized and only allowed to a certain extent. -+ */ -+static struct request *bfq_choose_req(struct bfq_data *bfqd, -+ struct request *rq1, -+ struct request *rq2, -+ sector_t last) -+{ -+ sector_t s1, s2, d1 = 0, d2 = 0; -+ unsigned long back_max; -+#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ -+#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ -+ unsigned int wrap = 0; /* bit mask: requests behind the disk head? */ -+ -+ if (!rq1 || rq1 == rq2) -+ return rq2; -+ if (!rq2) -+ return rq1; -+ -+ if (rq_is_sync(rq1) && !rq_is_sync(rq2)) -+ return rq1; -+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) -+ return rq2; -+ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) -+ return rq1; -+ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) -+ return rq2; -+ -+ s1 = blk_rq_pos(rq1); -+ s2 = blk_rq_pos(rq2); -+ -+ /* -+ * By definition, 1KiB is 2 sectors. -+ */ -+ back_max = bfqd->bfq_back_max * 2; -+ -+ /* -+ * Strict one way elevator _except_ in the case where we allow -+ * short backward seeks which are biased as twice the cost of a -+ * similar forward seek. -+ */ -+ if (s1 >= last) -+ d1 = s1 - last; -+ else if (s1 + back_max >= last) -+ d1 = (last - s1) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ1_WRAP; -+ -+ if (s2 >= last) -+ d2 = s2 - last; -+ else if (s2 + back_max >= last) -+ d2 = (last - s2) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ2_WRAP; -+ -+ /* Found required data */ -+ -+ /* -+ * By doing switch() on the bit mask "wrap" we avoid having to -+ * check two variables for all permutations: --> faster! -+ */ -+ switch (wrap) { -+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ -+ if (d1 < d2) -+ return rq1; -+ else if (d2 < d1) -+ return rq2; -+ -+ if (s1 >= s2) -+ return rq1; -+ else -+ return rq2; -+ -+ case BFQ_RQ2_WRAP: -+ return rq1; -+ case BFQ_RQ1_WRAP: -+ return rq2; -+ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ -+ default: -+ /* -+ * Since both rqs are wrapped, -+ * start with the one that's further behind head -+ * (--> only *one* back seek required), -+ * since back seek takes more time than forward. -+ */ -+ if (s1 <= s2) -+ return rq1; -+ else -+ return rq2; -+ } -+} -+ -+/* -+ * Tell whether there are active queues or groups with differentiated weights. -+ */ -+static bool bfq_differentiated_weights(struct bfq_data *bfqd) -+{ -+ /* -+ * For weights to differ, at least one of the trees must contain -+ * at least two nodes. -+ */ -+ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && -+ (bfqd->queue_weights_tree.rb_node->rb_left || -+ bfqd->queue_weights_tree.rb_node->rb_right) -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ ) || -+ (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && -+ (bfqd->group_weights_tree.rb_node->rb_left || -+ bfqd->group_weights_tree.rb_node->rb_right) -+#endif -+ ); -+} -+ -+/* -+ * The following function returns true if every queue must receive the -+ * same share of the throughput (this condition is used when deciding -+ * whether idling may be disabled, see the comments in the function -+ * bfq_bfqq_may_idle()). -+ * -+ * Such a scenario occurs when: -+ * 1) all active queues have the same weight, -+ * 2) all active groups at the same level in the groups tree have the same -+ * weight, -+ * 3) all active groups at the same level in the groups tree have the same -+ * number of children. -+ * -+ * Unfortunately, keeping the necessary state for evaluating exactly the -+ * above symmetry conditions would be quite complex and time-consuming. -+ * Therefore this function evaluates, instead, the following stronger -+ * sub-conditions, for which it is much easier to maintain the needed -+ * state: -+ * 1) all active queues have the same weight, -+ * 2) all active groups have the same weight, -+ * 3) all active groups have at most one active child each. -+ * In particular, the last two conditions are always true if hierarchical -+ * support and the cgroups interface are not enabled, thus no state needs -+ * to be maintained in this case. -+ */ -+static bool bfq_symmetric_scenario(struct bfq_data *bfqd) -+{ -+ return -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ !bfqd->active_numerous_groups && -+#endif -+ !bfq_differentiated_weights(bfqd); -+} -+ -+/* -+ * If the weight-counter tree passed as input contains no counter for -+ * the weight of the input entity, then add that counter; otherwise just -+ * increment the existing counter. -+ * -+ * Note that weight-counter trees contain few nodes in mostly symmetric -+ * scenarios. For example, if all queues have the same weight, then the -+ * weight-counter tree for the queues may contain at most one node. -+ * This holds even if low_latency is on, because weight-raised queues -+ * are not inserted in the tree. -+ * In most scenarios, the rate at which nodes are created/destroyed -+ * should be low too. -+ */ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ struct rb_node **new = &(root->rb_node), *parent = NULL; -+ -+ /* -+ * Do not insert if the entity is already associated with a -+ * counter, which happens if: -+ * 1) the entity is associated with a queue, -+ * 2) a request arrival has caused the queue to become both -+ * non-weight-raised, and hence change its weight, and -+ * backlogged; in this respect, each of the two events -+ * causes an invocation of this function, -+ * 3) this is the invocation of this function caused by the -+ * second event. This second invocation is actually useless, -+ * and we handle this fact by exiting immediately. More -+ * efficient or clearer solutions might possibly be adopted. -+ */ -+ if (entity->weight_counter) -+ return; -+ -+ while (*new) { -+ struct bfq_weight_counter *__counter = container_of(*new, -+ struct bfq_weight_counter, -+ weights_node); -+ parent = *new; -+ -+ if (entity->weight == __counter->weight) { -+ entity->weight_counter = __counter; -+ goto inc_counter; -+ } -+ if (entity->weight < __counter->weight) -+ new = &((*new)->rb_left); -+ else -+ new = &((*new)->rb_right); -+ } -+ -+ entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), -+ GFP_ATOMIC); -+ entity->weight_counter->weight = entity->weight; -+ rb_link_node(&entity->weight_counter->weights_node, parent, new); -+ rb_insert_color(&entity->weight_counter->weights_node, root); -+ -+inc_counter: -+ entity->weight_counter->num_active++; -+} -+ -+/* -+ * Decrement the weight counter associated with the entity, and, if the -+ * counter reaches 0, remove the counter from the tree. -+ * See the comments to the function bfq_weights_tree_add() for considerations -+ * about overhead. -+ */ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ if (!entity->weight_counter) -+ return; -+ -+ BUG_ON(RB_EMPTY_ROOT(root)); -+ BUG_ON(entity->weight_counter->weight != entity->weight); -+ -+ BUG_ON(!entity->weight_counter->num_active); -+ entity->weight_counter->num_active--; -+ if (entity->weight_counter->num_active > 0) -+ goto reset_entity_pointer; -+ -+ rb_erase(&entity->weight_counter->weights_node, root); -+ kfree(entity->weight_counter); -+ -+reset_entity_pointer: -+ entity->weight_counter = NULL; -+} -+ -+static struct request *bfq_find_next_rq(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *last) -+{ -+ struct rb_node *rbnext = rb_next(&last->rb_node); -+ struct rb_node *rbprev = rb_prev(&last->rb_node); -+ struct request *next = NULL, *prev = NULL; -+ -+ BUG_ON(RB_EMPTY_NODE(&last->rb_node)); -+ -+ if (rbprev) -+ prev = rb_entry_rq(rbprev); -+ -+ if (rbnext) -+ next = rb_entry_rq(rbnext); -+ else { -+ rbnext = rb_first(&bfqq->sort_list); -+ if (rbnext && rbnext != &last->rb_node) -+ next = rb_entry_rq(rbnext); -+ } -+ -+ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); -+} -+ -+/* see the definition of bfq_async_charge_factor for details */ -+static unsigned long bfq_serv_to_charge(struct request *rq, -+ struct bfq_queue *bfqq) -+{ -+ return blk_rq_sectors(rq) * -+ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * -+ bfq_async_charge_factor)); -+} -+ -+/** -+ * bfq_updated_next_req - update the queue after a new next_rq selection. -+ * @bfqd: the device data the queue belongs to. -+ * @bfqq: the queue to update. -+ * -+ * If the first request of a queue changes we make sure that the queue -+ * has enough budget to serve at least its first request (if the -+ * request has grown). We do this because if the queue has not enough -+ * budget for its first request, it has to go through two dispatch -+ * rounds to actually get it dispatched. -+ */ -+static void bfq_updated_next_req(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ struct request *next_rq = bfqq->next_rq; -+ unsigned long new_budget; -+ -+ if (!next_rq) -+ return; -+ -+ if (bfqq == bfqd->in_service_queue) -+ /* -+ * In order not to break guarantees, budgets cannot be -+ * changed after an entity has been selected. -+ */ -+ return; -+ -+ BUG_ON(entity->tree != &st->active); -+ BUG_ON(entity == entity->sched_data->in_service_entity); -+ -+ new_budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ if (entity->budget != new_budget) { -+ entity->budget = new_budget; -+ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", -+ new_budget); -+ bfq_activate_bfqq(bfqd, bfqq); -+ } -+} -+ -+static unsigned int bfq_wr_duration(struct bfq_data *bfqd) -+{ -+ u64 dur; -+ -+ if (bfqd->bfq_wr_max_time > 0) -+ return bfqd->bfq_wr_max_time; -+ -+ dur = bfqd->RT_prod; -+ do_div(dur, bfqd->peak_rate); -+ -+ return dur; -+} -+ -+/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ -+static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_queue *item; -+ struct hlist_node *n; -+ -+ hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) -+ hlist_del_init(&item->burst_list_node); -+ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); -+ bfqd->burst_size = 1; -+} -+ -+/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ -+static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ /* Increment burst size to take into account also bfqq */ -+ bfqd->burst_size++; -+ -+ if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { -+ struct bfq_queue *pos, *bfqq_item; -+ struct hlist_node *n; -+ -+ /* -+ * Enough queues have been activated shortly after each -+ * other to consider this burst as large. -+ */ -+ bfqd->large_burst = true; -+ -+ /* -+ * We can now mark all queues in the burst list as -+ * belonging to a large burst. -+ */ -+ hlist_for_each_entry(bfqq_item, &bfqd->burst_list, -+ burst_list_node) -+ bfq_mark_bfqq_in_large_burst(bfqq_item); -+ bfq_mark_bfqq_in_large_burst(bfqq); -+ -+ /* -+ * From now on, and until the current burst finishes, any -+ * new queue being activated shortly after the last queue -+ * was inserted in the burst can be immediately marked as -+ * belonging to a large burst. So the burst list is not -+ * needed any more. Remove it. -+ */ -+ hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, -+ burst_list_node) -+ hlist_del_init(&pos->burst_list_node); -+ } else /* burst not yet large: add bfqq to the burst list */ -+ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); -+} -+ -+/* -+ * If many queues happen to become active shortly after each other, then, -+ * to help the processes associated to these queues get their job done as -+ * soon as possible, it is usually better to not grant either weight-raising -+ * or device idling to these queues. In this comment we describe, firstly, -+ * the reasons why this fact holds, and, secondly, the next function, which -+ * implements the main steps needed to properly mark these queues so that -+ * they can then be treated in a different way. -+ * -+ * As for the terminology, we say that a queue becomes active, i.e., -+ * switches from idle to backlogged, either when it is created (as a -+ * consequence of the arrival of an I/O request), or, if already existing, -+ * when a new request for the queue arrives while the queue is idle. -+ * Bursts of activations, i.e., activations of different queues occurring -+ * shortly after each other, are typically caused by services or applications -+ * that spawn or reactivate many parallel threads/processes. Examples are -+ * systemd during boot or git grep. -+ * -+ * These services or applications benefit mostly from a high throughput: -+ * the quicker the requests of the activated queues are cumulatively served, -+ * the sooner the target job of these queues gets completed. As a consequence, -+ * weight-raising any of these queues, which also implies idling the device -+ * for it, is almost always counterproductive: in most cases it just lowers -+ * throughput. -+ * -+ * On the other hand, a burst of activations may be also caused by the start -+ * of an application that does not consist in a lot of parallel I/O-bound -+ * threads. In fact, with a complex application, the burst may be just a -+ * consequence of the fact that several processes need to be executed to -+ * start-up the application. To start an application as quickly as possible, -+ * the best thing to do is to privilege the I/O related to the application -+ * with respect to all other I/O. Therefore, the best strategy to start as -+ * quickly as possible an application that causes a burst of activations is -+ * to weight-raise all the queues activated during the burst. This is the -+ * exact opposite of the best strategy for the other type of bursts. -+ * -+ * In the end, to take the best action for each of the two cases, the two -+ * types of bursts need to be distinguished. Fortunately, this seems -+ * relatively easy to do, by looking at the sizes of the bursts. In -+ * particular, we found a threshold such that bursts with a larger size -+ * than that threshold are apparently caused only by services or commands -+ * such as systemd or git grep. For brevity, hereafter we call just 'large' -+ * these bursts. BFQ *does not* weight-raise queues whose activations occur -+ * in a large burst. In addition, for each of these queues BFQ performs or -+ * does not perform idling depending on which choice boosts the throughput -+ * most. The exact choice depends on the device and request pattern at -+ * hand. -+ * -+ * Turning back to the next function, it implements all the steps needed -+ * to detect the occurrence of a large burst and to properly mark all the -+ * queues belonging to it (so that they can then be treated in a different -+ * way). This goal is achieved by maintaining a special "burst list" that -+ * holds, temporarily, the queues that belong to the burst in progress. The -+ * list is then used to mark these queues as belonging to a large burst if -+ * the burst does become large. The main steps are the following. -+ * -+ * . when the very first queue is activated, the queue is inserted into the -+ * list (as it could be the first queue in a possible burst) -+ * -+ * . if the current burst has not yet become large, and a queue Q that does -+ * not yet belong to the burst is activated shortly after the last time -+ * at which a new queue entered the burst list, then the function appends -+ * Q to the burst list -+ * -+ * . if, as a consequence of the previous step, the burst size reaches -+ * the large-burst threshold, then -+ * -+ * . all the queues in the burst list are marked as belonging to a -+ * large burst -+ * -+ * . the burst list is deleted; in fact, the burst list already served -+ * its purpose (keeping temporarily track of the queues in a burst, -+ * so as to be able to mark them as belonging to a large burst in the -+ * previous sub-step), and now is not needed any more -+ * -+ * . the device enters a large-burst mode -+ * -+ * . if a queue Q that does not belong to the burst is activated while -+ * the device is in large-burst mode and shortly after the last time -+ * at which a queue either entered the burst list or was marked as -+ * belonging to the current large burst, then Q is immediately marked -+ * as belonging to a large burst. -+ * -+ * . if a queue Q that does not belong to the burst is activated a while -+ * later, i.e., not shortly after, than the last time at which a queue -+ * either entered the burst list or was marked as belonging to the -+ * current large burst, then the current burst is deemed as finished and: -+ * -+ * . the large-burst mode is reset if set -+ * -+ * . the burst list is emptied -+ * -+ * . Q is inserted in the burst list, as Q may be the first queue -+ * in a possible new burst (then the burst list contains just Q -+ * after this step). -+ */ -+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ bool idle_for_long_time) -+{ -+ /* -+ * If bfqq happened to be activated in a burst, but has been idle -+ * for at least as long as an interactive queue, then we assume -+ * that, in the overall I/O initiated in the burst, the I/O -+ * associated to bfqq is finished. So bfqq does not need to be -+ * treated as a queue belonging to a burst anymore. Accordingly, -+ * we reset bfqq's in_large_burst flag if set, and remove bfqq -+ * from the burst list if it's there. We do not decrement instead -+ * burst_size, because the fact that bfqq does not need to belong -+ * to the burst list any more does not invalidate the fact that -+ * bfqq may have been activated during the current burst. -+ */ -+ if (idle_for_long_time) { -+ hlist_del_init(&bfqq->burst_list_node); -+ bfq_clear_bfqq_in_large_burst(bfqq); -+ } -+ -+ /* -+ * If bfqq is already in the burst list or is part of a large -+ * burst, then there is nothing else to do. -+ */ -+ if (!hlist_unhashed(&bfqq->burst_list_node) || -+ bfq_bfqq_in_large_burst(bfqq)) -+ return; -+ -+ /* -+ * If bfqq's activation happens late enough, then the current -+ * burst is finished, and related data structures must be reset. -+ * -+ * In this respect, consider the special case where bfqq is the very -+ * first queue being activated. In this case, last_ins_in_burst is -+ * not yet significant when we get here. But it is easy to verify -+ * that, whether or not the following condition is true, bfqq will -+ * end up being inserted into the burst list. In particular the -+ * list will happen to contain only bfqq. And this is exactly what -+ * has to happen, as bfqq may be the first queue in a possible -+ * burst. -+ */ -+ if (time_is_before_jiffies(bfqd->last_ins_in_burst + -+ bfqd->bfq_burst_interval)) { -+ bfqd->large_burst = false; -+ bfq_reset_burst_list(bfqd, bfqq); -+ return; -+ } -+ -+ /* -+ * If we get here, then bfqq is being activated shortly after the -+ * last queue. So, if the current burst is also large, we can mark -+ * bfqq as belonging to this large burst immediately. -+ */ -+ if (bfqd->large_burst) { -+ bfq_mark_bfqq_in_large_burst(bfqq); -+ return; -+ } -+ -+ /* -+ * If we get here, then a large-burst state has not yet been -+ * reached, but bfqq is being activated shortly after the last -+ * queue. Then we add bfqq to the burst. -+ */ -+ bfq_add_to_burst(bfqd, bfqq); -+} -+ -+static void bfq_add_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *next_rq, *prev; -+ unsigned long old_wr_coeff = bfqq->wr_coeff; -+ bool interactive = false; -+ -+ bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); -+ bfqq->queued[rq_is_sync(rq)]++; -+ bfqd->queued++; -+ -+ elv_rb_add(&bfqq->sort_list, rq); -+ -+ /* -+ * Check if this request is a better next-serve candidate. -+ */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); -+ BUG_ON(!next_rq); -+ bfqq->next_rq = next_rq; -+ -+ if (!bfq_bfqq_busy(bfqq)) { -+ bool soft_rt, in_burst, -+ idle_for_long_time = time_is_before_jiffies( -+ bfqq->budget_timeout + -+ bfqd->bfq_wr_min_idle_time); -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, -+ rq->cmd_flags); -+#endif -+ if (bfq_bfqq_sync(bfqq)) { -+ bool already_in_burst = -+ !hlist_unhashed(&bfqq->burst_list_node) || -+ bfq_bfqq_in_large_burst(bfqq); -+ bfq_handle_burst(bfqd, bfqq, idle_for_long_time); -+ /* -+ * If bfqq was not already in the current burst, -+ * then, at this point, bfqq either has been -+ * added to the current burst or has caused the -+ * current burst to terminate. In particular, in -+ * the second case, bfqq has become the first -+ * queue in a possible new burst. -+ * In both cases last_ins_in_burst needs to be -+ * moved forward. -+ */ -+ if (!already_in_burst) -+ bfqd->last_ins_in_burst = jiffies; -+ } -+ -+ in_burst = bfq_bfqq_in_large_burst(bfqq); -+ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ !in_burst && -+ time_is_before_jiffies(bfqq->soft_rt_next_start); -+ interactive = !in_burst && idle_for_long_time; -+ entity->budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ -+ if (!bfq_bfqq_IO_bound(bfqq)) { -+ if (time_before(jiffies, -+ RQ_BIC(rq)->ttime.last_end_request + -+ bfqd->bfq_slice_idle)) { -+ bfqq->requests_within_timer++; -+ if (bfqq->requests_within_timer >= -+ bfqd->bfq_requests_within_timer) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ } else -+ bfqq->requests_within_timer = 0; -+ } -+ -+ if (!bfqd->low_latency) -+ goto add_bfqq_busy; -+ -+ /* -+ * If the queue: -+ * - is not being boosted, -+ * - has been idle for enough time, -+ * - is not a sync queue or is linked to a bfq_io_cq (it is -+ * shared "for its nature" or it is not shared and its -+ * requests have not been redirected to a shared queue) -+ * start a weight-raising period. -+ */ -+ if (old_wr_coeff == 1 && (interactive || soft_rt) && -+ (!bfq_bfqq_sync(bfqq) || bfqq->bic)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ if (interactive) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } else if (old_wr_coeff > 1) { -+ if (interactive) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else if (in_burst || -+ (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt)) { -+ bfqq->wr_coeff = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq-> -+ wr_cur_max_time)); -+ } else if (time_before( -+ bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time, -+ jiffies + -+ bfqd->bfq_wr_rt_max_time) && -+ soft_rt) { -+ /* -+ * -+ * The remaining weight-raising time is lower -+ * than bfqd->bfq_wr_rt_max_time, which means -+ * that the application is enjoying weight -+ * raising either because deemed soft-rt in -+ * the near past, or because deemed interactive -+ * a long ago. -+ * In both cases, resetting now the current -+ * remaining weight-raising time for the -+ * application to the weight-raising duration -+ * for soft rt applications would not cause any -+ * latency increase for the application (as the -+ * new duration would be higher than the -+ * remaining time). -+ * -+ * In addition, the application is now meeting -+ * the requirements for being deemed soft rt. -+ * In the end we can correctly and safely -+ * (re)charge the weight-raising duration for -+ * the application with the weight-raising -+ * duration for soft rt applications. -+ * -+ * In particular, doing this recharge now, i.e., -+ * before the weight-raising period for the -+ * application finishes, reduces the probability -+ * of the following negative scenario: -+ * 1) the weight of a soft rt application is -+ * raised at startup (as for any newly -+ * created application), -+ * 2) since the application is not interactive, -+ * at a certain time weight-raising is -+ * stopped for the application, -+ * 3) at that time the application happens to -+ * still have pending requests, and hence -+ * is destined to not have a chance to be -+ * deemed soft rt before these requests are -+ * completed (see the comments to the -+ * function bfq_bfqq_softrt_next_start() -+ * for details on soft rt detection), -+ * 4) these pending requests experience a high -+ * latency because the application is not -+ * weight-raised while they are pending. -+ */ -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ } -+ } -+ if (old_wr_coeff != bfqq->wr_coeff) -+ entity->prio_changed = 1; -+add_bfqq_busy: -+ bfqq->last_idle_bklogged = jiffies; -+ bfqq->service_from_backlogged = 0; -+ bfq_clear_bfqq_softrt_update(bfqq); -+ bfq_add_bfqq_busy(bfqd, bfqq); -+ } else { -+ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && -+ time_is_before_jiffies( -+ bfqq->last_wr_start_finish + -+ bfqd->bfq_wr_min_inter_arr_async)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ -+ bfqd->wr_busy_queues++; -+ entity->prio_changed = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "non-idle wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ if (prev != bfqq->next_rq) -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ if (bfqd->low_latency && -+ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) -+ bfqq->last_wr_start_finish = jiffies; -+} -+ -+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, -+ struct bio *bio) -+{ -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (!bic) -+ return NULL; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ if (bfqq) -+ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); -+ -+ return NULL; -+} -+ -+static void bfq_activate_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ bfqd->rq_in_driver++; -+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", -+ (unsigned long long) bfqd->last_position); -+} -+ -+static void bfq_deactivate_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ BUG_ON(bfqd->rq_in_driver == 0); -+ bfqd->rq_in_driver--; -+} -+ -+static void bfq_remove_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ const int sync = rq_is_sync(rq); -+ -+ if (bfqq->next_rq == rq) { -+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ if (rq->queuelist.prev != &rq->queuelist) -+ list_del_init(&rq->queuelist); -+ BUG_ON(bfqq->queued[sync] == 0); -+ bfqq->queued[sync]--; -+ bfqd->queued--; -+ elv_rb_del(&bfqq->sort_list, rq); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ /* -+ * Remove queue from request-position tree as it is empty. -+ */ -+ if (bfqq->pos_root) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ } -+ -+ if (rq->cmd_flags & REQ_META) { -+ BUG_ON(bfqq->meta_pending == 0); -+ bfqq->meta_pending--; -+ } -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); -+#endif -+} -+ -+static int bfq_merge(struct request_queue *q, struct request **req, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct request *__rq; -+ -+ __rq = bfq_find_rq_fmerge(bfqd, bio); -+ if (__rq && elv_rq_merge_ok(__rq, bio)) { -+ *req = __rq; -+ return ELEVATOR_FRONT_MERGE; -+ } -+ -+ return ELEVATOR_NO_MERGE; -+} -+ -+static void bfq_merged_request(struct request_queue *q, struct request *req, -+ int type) -+{ -+ if (type == ELEVATOR_FRONT_MERGE && -+ rb_prev(&req->rb_node) && -+ blk_rq_pos(req) < -+ blk_rq_pos(container_of(rb_prev(&req->rb_node), -+ struct request, rb_node))) { -+ struct bfq_queue *bfqq = RQ_BFQQ(req); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *prev, *next_rq; -+ -+ /* Reposition request in its sort_list */ -+ elv_rb_del(&bfqq->sort_list, req); -+ elv_rb_add(&bfqq->sort_list, req); -+ /* Choose next request to be served for bfqq */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, -+ bfqd->last_position); -+ BUG_ON(!next_rq); -+ bfqq->next_rq = next_rq; -+ } -+} -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static void bfq_bio_merged(struct request_queue *q, struct request *req, -+ struct bio *bio) -+{ -+ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw); -+} -+#endif -+ -+static void bfq_merged_requests(struct request_queue *q, struct request *rq, -+ struct request *next) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next); -+ -+ /* -+ * If next and rq belong to the same bfq_queue and next is older -+ * than rq, then reposition rq in the fifo (by substituting next -+ * with rq). Otherwise, if next and rq belong to different -+ * bfq_queues, never reposition rq: in fact, we would have to -+ * reposition it with respect to next's position in its own fifo, -+ * which would most certainly be too expensive with respect to -+ * the benefits. -+ */ -+ if (bfqq == next_bfqq && -+ !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && -+ time_before(next->fifo_time, rq->fifo_time)) { -+ list_del_init(&rq->queuelist); -+ list_replace_init(&next->queuelist, &rq->queuelist); -+ rq->fifo_time = next->fifo_time; -+ } -+ -+ if (bfqq->next_rq == next) -+ bfqq->next_rq = rq; -+ -+ bfq_remove_request(next); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); -+#endif -+} -+ -+/* Must be called with bfqq != NULL */ -+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) -+{ -+ BUG_ON(!bfqq); -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues--; -+ bfqq->wr_coeff = 1; -+ bfqq->wr_cur_max_time = 0; -+ /* Trigger a weight change on the next activation of the queue */ -+ bfqq->entity.prio_changed = 1; -+} -+ -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ if (bfqg->async_bfqq[i][j]) -+ bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); -+ if (bfqg->async_idle_bfqq) -+ bfq_bfqq_end_wr(bfqg->async_idle_bfqq); -+} -+ -+static void bfq_end_wr(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq; -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ bfq_end_wr_async(bfqd); -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+} -+ -+static int bfq_allow_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic; -+ -+ /* -+ * Disallow merge of a sync bio into an async request. -+ */ -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -+ return 0; -+ -+ /* -+ * Lookup the bfqq that this bio will be queued with. Allow -+ * merge only if rq is queued there. -+ * Queue lock is held here. -+ */ -+ bic = bfq_bic_lookup(bfqd, current->io_context); -+ if (!bic) -+ return 0; -+ -+ return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq); -+} -+ -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq) { -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); -+#endif -+ bfq_mark_bfqq_must_alloc(bfqq); -+ bfq_mark_bfqq_budget_new(bfqq); -+ bfq_clear_bfqq_fifo_expire(bfqq); -+ -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_in_service_queue, cur-budget = %d", -+ bfqq->entity.budget); -+ } -+ -+ bfqd->in_service_queue = bfqq; -+} -+ -+/* -+ * Get and set a new queue for service. -+ */ -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); -+ -+ __bfq_set_in_service_queue(bfqd, bfqq); -+ return bfqq; -+} -+ -+/* -+ * If enough samples have been computed, return the current max budget -+ * stored in bfqd, which is dynamically updated according to the -+ * estimated disk peak rate; otherwise return the default max budget -+ */ -+static int bfq_max_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < bfq_stats_min_budgets) -+ return bfq_default_max_budget; -+ else -+ return bfqd->bfq_max_budget; -+} -+ -+/* -+ * Return min budget, which is a fraction of the current or default -+ * max budget (trying with 1/32) -+ */ -+static int bfq_min_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < bfq_stats_min_budgets) -+ return bfq_default_max_budget / 32; -+ else -+ return bfqd->bfq_max_budget / 32; -+} -+ -+static void bfq_arm_slice_timer(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ struct bfq_io_cq *bic; -+ unsigned long sl; -+ -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Processes have exited, don't wait. */ -+ bic = bfqd->in_service_bic; -+ if (!bic || atomic_read(&bic->icq.ioc->active_ref) == 0) -+ return; -+ -+ bfq_mark_bfqq_wait_request(bfqq); -+ -+ /* -+ * We don't want to idle for seeks, but we do want to allow -+ * fair distribution of slice time for a process doing back-to-back -+ * seeks. So allow a little bit of time for him to submit a new rq. -+ * -+ * To prevent processes with (partly) seeky workloads from -+ * being too ill-treated, grant them a small fraction of the -+ * assigned budget before reducing the waiting time to -+ * BFQ_MIN_TT. This happened to help reduce latency. -+ */ -+ sl = bfqd->bfq_slice_idle; -+ /* -+ * Unless the queue is being weight-raised or the scenario is -+ * asymmetric, grant only minimum idle time if the queue either -+ * has been seeky for long enough or has already proved to be -+ * constantly seeky. -+ */ -+ if (bfq_sample_valid(bfqq->seek_samples) && -+ ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > -+ bfq_max_budget(bfqq->bfqd) / 8) || -+ bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 && -+ bfq_symmetric_scenario(bfqd)) -+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); -+ else if (bfqq->wr_coeff > 1) -+ sl = sl * 3; -+ bfqd->last_idling_start = ktime_get(); -+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); -+#endif -+ bfq_log(bfqd, "arm idle: %u/%u ms", -+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -+} -+ -+/* -+ * Set the maximum time for the in-service queue to consume its -+ * budget. This prevents seeky processes from lowering the disk -+ * throughput (always guaranteed with a time slice scheme as in CFQ). -+ */ -+static void bfq_set_budget_timeout(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ unsigned int timeout_coeff; -+ -+ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -+ timeout_coeff = 1; -+ else -+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -+ -+ bfqd->last_budget_start = ktime_get(); -+ -+ bfq_clear_bfqq_budget_new(bfqq); -+ bfqq->budget_timeout = jiffies + -+ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; -+ -+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", -+ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * -+ timeout_coeff)); -+} -+ -+/* -+ * Move request from internal lists to the request queue dispatch list. -+ */ -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ /* -+ * For consistency, the next instruction should have been executed -+ * after removing the request from the queue and dispatching it. -+ * We execute instead this instruction before bfq_remove_request() -+ * (and hence introduce a temporary inconsistency), for efficiency. -+ * In fact, in a forced_dispatch, this prevents two counters related -+ * to bfqq->dispatched to risk to be uselessly decremented if bfqq -+ * is not in service, and then to be incremented again after -+ * incrementing bfqq->dispatched. -+ */ -+ bfqq->dispatched++; -+ bfq_remove_request(rq); -+ elv_dispatch_sort(q, rq); -+ -+ if (bfq_bfqq_sync(bfqq)) -+ bfqd->sync_flight++; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq), -+ rq->cmd_flags); -+#endif -+} -+ -+/* -+ * Return expired entry, or NULL to just start from scratch in rbtree. -+ */ -+static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -+{ -+ struct request *rq = NULL; -+ -+ if (bfq_bfqq_fifo_expire(bfqq)) -+ return NULL; -+ -+ bfq_mark_bfqq_fifo_expire(bfqq); -+ -+ if (list_empty(&bfqq->fifo)) -+ return NULL; -+ -+ rq = rq_entry_fifo(bfqq->fifo.next); -+ -+ if (time_before(jiffies, rq->fifo_time)) -+ return NULL; -+ -+ return rq; -+} -+ -+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ return entity->budget - entity->service; -+} -+ -+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * Overloading budget_timeout field to store the time -+ * at which the queue remains with no backlog; used by -+ * the weight-raising mechanism. -+ */ -+ bfqq->budget_timeout = jiffies; -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ } else -+ bfq_activate_bfqq(bfqd, bfqq); -+} -+ -+/** -+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. -+ * @bfqd: device data. -+ * @bfqq: queue to update. -+ * @reason: reason for expiration. -+ * -+ * Handle the feedback on @bfqq budget at queue expiration. -+ * See the body for detailed comments. -+ */ -+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ enum bfqq_expiration reason) -+{ -+ struct request *next_rq; -+ int budget, min_budget; -+ -+ budget = bfqq->max_budget; -+ min_budget = bfq_min_budget(bfqd); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", -+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", -+ budget, bfq_min_budget(bfqd)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", -+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); -+ -+ if (bfq_bfqq_sync(bfqq)) { -+ switch (reason) { -+ /* -+ * Caveat: in all the following cases we trade latency -+ * for throughput. -+ */ -+ case BFQ_BFQQ_TOO_IDLE: -+ /* -+ * This is the only case where we may reduce -+ * the budget: if there is no request of the -+ * process still waiting for completion, then -+ * we assume (tentatively) that the timer has -+ * expired because the batch of requests of -+ * the process could have been served with a -+ * smaller budget. Hence, betting that -+ * process will behave in the same way when it -+ * becomes backlogged again, we reduce its -+ * next budget. As long as we guess right, -+ * this budget cut reduces the latency -+ * experienced by the process. -+ * -+ * However, if there are still outstanding -+ * requests, then the process may have not yet -+ * issued its next request just because it is -+ * still waiting for the completion of some of -+ * the still outstanding ones. So in this -+ * subcase we do not reduce its budget, on the -+ * contrary we increase it to possibly boost -+ * the throughput, as discussed in the -+ * comments to the BUDGET_TIMEOUT case. -+ */ -+ if (bfqq->dispatched > 0) /* still outstanding reqs */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ else { -+ if (budget > 5 * min_budget) -+ budget -= 4 * min_budget; -+ else -+ budget = min_budget; -+ } -+ break; -+ case BFQ_BFQQ_BUDGET_TIMEOUT: -+ /* -+ * We double the budget here because: 1) it -+ * gives the chance to boost the throughput if -+ * this is not a seeky process (which may have -+ * bumped into this timeout because of, e.g., -+ * ZBR), 2) together with charge_full_budget -+ * it helps give seeky processes higher -+ * timestamps, and hence be served less -+ * frequently. -+ */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_BUDGET_EXHAUSTED: -+ /* -+ * The process still has backlog, and did not -+ * let either the budget timeout or the disk -+ * idling timeout expire. Hence it is not -+ * seeky, has a short thinktime and may be -+ * happy with a higher budget too. So -+ * definitely increase the budget of this good -+ * candidate to boost the disk throughput. -+ */ -+ budget = min(budget * 4, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_NO_MORE_REQUESTS: -+ /* -+ * Leave the budget unchanged. -+ */ -+ default: -+ return; -+ } -+ } else -+ /* -+ * Async queues get always the maximum possible budget -+ * (their ability to dispatch is limited by -+ * @bfqd->bfq_max_budget_async_rq). -+ */ -+ budget = bfqd->bfq_max_budget; -+ -+ bfqq->max_budget = budget; -+ -+ if (bfqd->budgets_assigned >= bfq_stats_min_budgets && -+ !bfqd->bfq_user_max_budget) -+ bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget); -+ -+ /* -+ * Make sure that we have enough budget for the next request. -+ * Since the finish time of the bfqq must be kept in sync with -+ * the budget, be sure to call __bfq_bfqq_expire() after the -+ * update. -+ */ -+ next_rq = bfqq->next_rq; -+ if (next_rq) -+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ else -+ bfqq->entity.budget = bfqq->max_budget; -+ -+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d", -+ next_rq ? blk_rq_sectors(next_rq) : 0, -+ bfqq->entity.budget); -+} -+ -+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -+{ -+ unsigned long max_budget; -+ -+ /* -+ * The max_budget calculated when autotuning is equal to the -+ * amount of sectors transfered in timeout_sync at the -+ * estimated peak rate. -+ */ -+ max_budget = (unsigned long)(peak_rate * 1000 * -+ timeout >> BFQ_RATE_SHIFT); -+ -+ return max_budget; -+} -+ -+/* -+ * In addition to updating the peak rate, checks whether the process -+ * is "slow", and returns 1 if so. This slow flag is used, in addition -+ * to the budget timeout, to reduce the amount of service provided to -+ * seeky processes, and hence reduce their chances to lower the -+ * throughput. See the code for more details. -+ */ -+static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ bool compensate, enum bfqq_expiration reason) -+{ -+ u64 bw, usecs, expected, timeout; -+ ktime_t delta; -+ int update = 0; -+ -+ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) -+ return false; -+ -+ if (compensate) -+ delta = bfqd->last_idling_start; -+ else -+ delta = ktime_get(); -+ delta = ktime_sub(delta, bfqd->last_budget_start); -+ usecs = ktime_to_us(delta); -+ -+ /* Don't trust short/unrealistic values. */ -+ if (usecs < 100 || usecs >= LONG_MAX) -+ return false; -+ -+ /* -+ * Calculate the bandwidth for the last slice. We use a 64 bit -+ * value to store the peak rate, in sectors per usec in fixed -+ * point math. We do so to have enough precision in the estimate -+ * and to avoid overflows. -+ */ -+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; -+ do_div(bw, (unsigned long)usecs); -+ -+ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ /* -+ * Use only long (> 20ms) intervals to filter out spikes for -+ * the peak rate estimation. -+ */ -+ if (usecs > 20000) { -+ if (bw > bfqd->peak_rate || -+ (!BFQQ_SEEKY(bfqq) && -+ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { -+ bfq_log(bfqd, "measured bw =%llu", bw); -+ /* -+ * To smooth oscillations use a low-pass filter with -+ * alpha=7/8, i.e., -+ * new_rate = (7/8) * old_rate + (1/8) * bw -+ */ -+ do_div(bw, 8); -+ if (bw == 0) -+ return 0; -+ bfqd->peak_rate *= 7; -+ do_div(bfqd->peak_rate, 8); -+ bfqd->peak_rate += bw; -+ update = 1; -+ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); -+ } -+ -+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; -+ -+ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) -+ bfqd->peak_rate_samples++; -+ -+ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && -+ update) { -+ int dev_type = blk_queue_nonrot(bfqd->queue); -+ -+ if (bfqd->bfq_user_max_budget == 0) { -+ bfqd->bfq_max_budget = -+ bfq_calc_max_budget(bfqd->peak_rate, -+ timeout); -+ bfq_log(bfqd, "new max_budget=%d", -+ bfqd->bfq_max_budget); -+ } -+ if (bfqd->device_speed == BFQ_BFQD_FAST && -+ bfqd->peak_rate < device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_SLOW; -+ bfqd->RT_prod = R_slow[dev_type] * -+ T_slow[dev_type]; -+ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && -+ bfqd->peak_rate > device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ bfqd->RT_prod = R_fast[dev_type] * -+ T_fast[dev_type]; -+ } -+ } -+ } -+ -+ /* -+ * If the process has been served for a too short time -+ * interval to let its possible sequential accesses prevail on -+ * the initial seek time needed to move the disk head on the -+ * first sector it requested, then give the process a chance -+ * and for the moment return false. -+ */ -+ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) -+ return false; -+ -+ /* -+ * A process is considered ``slow'' (i.e., seeky, so that we -+ * cannot treat it fairly in the service domain, as it would -+ * slow down too much the other processes) if, when a slice -+ * ends for whatever reason, it has received service at a -+ * rate that would not be high enough to complete the budget -+ * before the budget timeout expiration. -+ */ -+ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; -+ -+ /* -+ * Caveat: processes doing IO in the slower disk zones will -+ * tend to be slow(er) even if not seeky. And the estimated -+ * peak rate will actually be an average over the disk -+ * surface. Hence, to not be too harsh with unlucky processes, -+ * we keep a budget/3 margin of safety before declaring a -+ * process slow. -+ */ -+ return expected > (4 * bfqq->entity.budget) / 3; -+} -+ -+/* -+ * To be deemed as soft real-time, an application must meet two -+ * requirements. First, the application must not require an average -+ * bandwidth higher than the approximate bandwidth required to playback or -+ * record a compressed high-definition video. -+ * The next function is invoked on the completion of the last request of a -+ * batch, to compute the next-start time instant, soft_rt_next_start, such -+ * that, if the next request of the application does not arrive before -+ * soft_rt_next_start, then the above requirement on the bandwidth is met. -+ * -+ * The second requirement is that the request pattern of the application is -+ * isochronous, i.e., that, after issuing a request or a batch of requests, -+ * the application stops issuing new requests until all its pending requests -+ * have been completed. After that, the application may issue a new batch, -+ * and so on. -+ * For this reason the next function is invoked to compute -+ * soft_rt_next_start only for applications that meet this requirement, -+ * whereas soft_rt_next_start is set to infinity for applications that do -+ * not. -+ * -+ * Unfortunately, even a greedy application may happen to behave in an -+ * isochronous way if the CPU load is high. In fact, the application may -+ * stop issuing requests while the CPUs are busy serving other processes, -+ * then restart, then stop again for a while, and so on. In addition, if -+ * the disk achieves a low enough throughput with the request pattern -+ * issued by the application (e.g., because the request pattern is random -+ * and/or the device is slow), then the application may meet the above -+ * bandwidth requirement too. To prevent such a greedy application to be -+ * deemed as soft real-time, a further rule is used in the computation of -+ * soft_rt_next_start: soft_rt_next_start must be higher than the current -+ * time plus the maximum time for which the arrival of a request is waited -+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. -+ * This filters out greedy applications, as the latter issue instead their -+ * next request as soon as possible after the last one has been completed -+ * (in contrast, when a batch of requests is completed, a soft real-time -+ * application spends some time processing data). -+ * -+ * Unfortunately, the last filter may easily generate false positives if -+ * only bfqd->bfq_slice_idle is used as a reference time interval and one -+ * or both the following cases occur: -+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher -+ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with -+ * HZ=100. -+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing -+ * for a while, then suddenly 'jump' by several units to recover the lost -+ * increments. This seems to happen, e.g., inside virtual machines. -+ * To address this issue, we do not use as a reference time interval just -+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In -+ * particular we add the minimum number of jiffies for which the filter -+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual -+ * machines. -+ */ -+static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ return max(bfqq->last_idle_bklogged + -+ HZ * bfqq->service_from_backlogged / -+ bfqd->bfq_wr_max_softrt_rate, -+ jiffies + bfqq->bfqd->bfq_slice_idle + 4); -+} -+ -+/* -+ * Return the largest-possible time instant such that, for as long as possible, -+ * the current time will be lower than this time instant according to the macro -+ * time_is_before_jiffies(). -+ */ -+static unsigned long bfq_infinity_from_now(unsigned long now) -+{ -+ return now + ULONG_MAX / 2; -+} -+ -+/** -+ * bfq_bfqq_expire - expire a queue. -+ * @bfqd: device owning the queue. -+ * @bfqq: the queue to expire. -+ * @compensate: if true, compensate for the time spent idling. -+ * @reason: the reason causing the expiration. -+ * -+ * -+ * If the process associated to the queue is slow (i.e., seeky), or in -+ * case of budget timeout, or, finally, if it is async, we -+ * artificially charge it an entire budget (independently of the -+ * actual service it received). As a consequence, the queue will get -+ * higher timestamps than the correct ones upon reactivation, and -+ * hence it will be rescheduled as if it had received more service -+ * than what it actually received. In the end, this class of processes -+ * will receive less service in proportion to how slowly they consume -+ * their budgets (and hence how seriously they tend to lower the -+ * throughput). -+ * -+ * In contrast, when a queue expires because it has been idling for -+ * too much or because it exhausted its budget, we do not touch the -+ * amount of service it has received. Hence when the queue will be -+ * reactivated and its timestamps updated, the latter will be in sync -+ * with the actual service received by the queue until expiration. -+ * -+ * Charging a full budget to the first type of queues and the exact -+ * service to the others has the effect of using the WF2Q+ policy to -+ * schedule the former on a timeslice basis, without violating the -+ * service domain guarantees of the latter. -+ */ -+static void bfq_bfqq_expire(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ bool compensate, -+ enum bfqq_expiration reason) -+{ -+ bool slow; -+ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ /* -+ * Update disk peak rate for autotuning and check whether the -+ * process is slow (see bfq_update_peak_rate). -+ */ -+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); -+ -+ /* -+ * As above explained, 'punish' slow (i.e., seeky), timed-out -+ * and async queues, to favor sequential sync workloads. -+ * -+ * Processes doing I/O in the slower disk zones will tend to be -+ * slow(er) even if not seeky. Hence, since the estimated peak -+ * rate is actually an average over the disk surface, these -+ * processes may timeout just for bad luck. To avoid punishing -+ * them we do not charge a full budget to a process that -+ * succeeded in consuming at least 2/3 of its budget. -+ */ -+ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) -+ bfq_bfqq_charge_full_budget(bfqq); -+ -+ bfqq->service_from_backlogged += bfqq->entity.service; -+ -+ if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ !bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_mark_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ -+ if (reason == BFQ_BFQQ_TOO_IDLE && -+ bfqq->entity.service <= 2 * bfqq->entity.budget / 10) -+ bfq_clear_bfqq_IO_bound(bfqq); -+ -+ if (bfqd->low_latency && bfqq->wr_coeff == 1) -+ bfqq->last_wr_start_finish = jiffies; -+ -+ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * If we get here, and there are no outstanding requests, -+ * then the request pattern is isochronous (see the comments -+ * to the function bfq_bfqq_softrt_next_start()). Hence we -+ * can compute soft_rt_next_start. If, instead, the queue -+ * still has outstanding requests, then we have to wait -+ * for the completion of all the outstanding requests to -+ * discover whether the request pattern is actually -+ * isochronous. -+ */ -+ if (bfqq->dispatched == 0) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ else { -+ /* -+ * The application is still waiting for the -+ * completion of one or more requests: -+ * prevent it from possibly being incorrectly -+ * deemed as soft real-time by setting its -+ * soft_rt_next_start to infinity. In fact, -+ * without this assignment, the application -+ * would be incorrectly deemed as soft -+ * real-time if: -+ * 1) it issued a new request before the -+ * completion of all its in-flight -+ * requests, and -+ * 2) at that time, its soft_rt_next_start -+ * happened to be in the past. -+ */ -+ bfqq->soft_rt_next_start = -+ bfq_infinity_from_now(jiffies); -+ /* -+ * Schedule an update of soft_rt_next_start to when -+ * the task may be discovered to be isochronous. -+ */ -+ bfq_mark_bfqq_softrt_update(bfqq); -+ } -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, -+ slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); -+ -+ /* -+ * Increase, decrease or leave budget unchanged according to -+ * reason. -+ */ -+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); -+ __bfq_bfqq_expire(bfqd, bfqq); -+} -+ -+/* -+ * Budget timeout is not implemented through a dedicated timer, but -+ * just checked on request arrivals and completions, as well as on -+ * idle timer expirations. -+ */ -+static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_budget_new(bfqq) || -+ time_before(jiffies, bfqq->budget_timeout)) -+ return false; -+ return true; -+} -+ -+/* -+ * If we expire a queue that is waiting for the arrival of a new -+ * request, we may prevent the fictitious timestamp back-shifting that -+ * allows the guarantees of the queue to be preserved (see [1] for -+ * this tricky aspect). Hence we return true only if this condition -+ * does not hold, or if the queue is slow enough to deserve only to be -+ * kicked off for preserving a high throughput. -+*/ -+static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "may_budget_timeout: wait_request %d left %d timeout %d", -+ bfq_bfqq_wait_request(bfqq), -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, -+ bfq_bfqq_budget_timeout(bfqq)); -+ -+ return (!bfq_bfqq_wait_request(bfqq) || -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) -+ && -+ bfq_bfqq_budget_timeout(bfqq); -+} -+ -+/* -+ * For a queue that becomes empty, device idling is allowed only if -+ * this function returns true for that queue. As a consequence, since -+ * device idling plays a critical role for both throughput boosting -+ * and service guarantees, the return value of this function plays a -+ * critical role as well. -+ * -+ * In a nutshell, this function returns true only if idling is -+ * beneficial for throughput or, even if detrimental for throughput, -+ * idling is however necessary to preserve service guarantees (low -+ * latency, desired throughput distribution, ...). In particular, on -+ * NCQ-capable devices, this function tries to return false, so as to -+ * help keep the drives' internal queues full, whenever this helps the -+ * device boost the throughput without causing any service-guarantee -+ * issue. -+ * -+ * In more detail, the return value of this function is obtained by, -+ * first, computing a number of boolean variables that take into -+ * account throughput and service-guarantee issues, and, then, -+ * combining these variables in a logical expression. Most of the -+ * issues taken into account are not trivial. We discuss these issues -+ * while introducing the variables. -+ */ -+static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ bool idling_boosts_thr, idling_boosts_thr_without_issues, -+ all_queues_seeky, on_hdd_and_not_all_queues_seeky, -+ idling_needed_for_service_guarantees, -+ asymmetric_scenario; -+ -+ /* -+ * The next variable takes into account the cases where idling -+ * boosts the throughput. -+ * -+ * The value of the variable is computed considering, first, that -+ * idling is virtually always beneficial for the throughput if: -+ * (a) the device is not NCQ-capable, or -+ * (b) regardless of the presence of NCQ, the device is rotational -+ * and the request pattern for bfqq is I/O-bound and sequential. -+ * -+ * Secondly, and in contrast to the above item (b), idling an -+ * NCQ-capable flash-based device would not boost the -+ * throughput even with sequential I/O; rather it would lower -+ * the throughput in proportion to how fast the device -+ * is. Accordingly, the next variable is true if any of the -+ * above conditions (a) and (b) is true, and, in particular, -+ * happens to be false if bfqd is an NCQ-capable flash-based -+ * device. -+ */ -+ idling_boosts_thr = !bfqd->hw_tag || -+ (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) && -+ bfq_bfqq_idle_window(bfqq)); -+ -+ /* -+ * The value of the next variable, -+ * idling_boosts_thr_without_issues, is equal to that of -+ * idling_boosts_thr, unless a special case holds. In this -+ * special case, described below, idling may cause problems to -+ * weight-raised queues. -+ * -+ * When the request pool is saturated (e.g., in the presence -+ * of write hogs), if the processes associated with -+ * non-weight-raised queues ask for requests at a lower rate, -+ * then processes associated with weight-raised queues have a -+ * higher probability to get a request from the pool -+ * immediately (or at least soon) when they need one. Thus -+ * they have a higher probability to actually get a fraction -+ * of the device throughput proportional to their high -+ * weight. This is especially true with NCQ-capable drives, -+ * which enqueue several requests in advance, and further -+ * reorder internally-queued requests. -+ * -+ * For this reason, we force to false the value of -+ * idling_boosts_thr_without_issues if there are weight-raised -+ * busy queues. In this case, and if bfqq is not weight-raised, -+ * this guarantees that the device is not idled for bfqq (if, -+ * instead, bfqq is weight-raised, then idling will be -+ * guaranteed by another variable, see below). Combined with -+ * the timestamping rules of BFQ (see [1] for details), this -+ * behavior causes bfqq, and hence any sync non-weight-raised -+ * queue, to get a lower number of requests served, and thus -+ * to ask for a lower number of requests from the request -+ * pool, before the busy weight-raised queues get served -+ * again. This often mitigates starvation problems in the -+ * presence of heavy write workloads and NCQ, thereby -+ * guaranteeing a higher application and system responsiveness -+ * in these hostile scenarios. -+ */ -+ idling_boosts_thr_without_issues = idling_boosts_thr && -+ bfqd->wr_busy_queues == 0; -+ -+ /* -+ * There are then two cases where idling must be performed not -+ * for throughput concerns, but to preserve service -+ * guarantees. In the description of these cases, we say, for -+ * short, that a queue is sequential/random if the process -+ * associated to the queue issues sequential/random requests -+ * (in the second case the queue may be tagged as seeky or -+ * even constantly_seeky). -+ * -+ * To introduce the first case, we note that, since -+ * bfq_bfqq_idle_window(bfqq) is false if the device is -+ * NCQ-capable and bfqq is random (see -+ * bfq_update_idle_window()), then, from the above two -+ * assignments it follows that -+ * idling_boosts_thr_without_issues is false if the device is -+ * NCQ-capable and bfqq is random. Therefore, for this case, -+ * device idling would never be allowed if we used just -+ * idling_boosts_thr_without_issues to decide whether to allow -+ * it. And, beneficially, this would imply that throughput -+ * would always be boosted also with random I/O on NCQ-capable -+ * HDDs. -+ * -+ * But we must be careful on this point, to avoid an unfair -+ * treatment for bfqq. In fact, because of the same above -+ * assignments, idling_boosts_thr_without_issues is, on the -+ * other hand, true if 1) the device is an HDD and bfqq is -+ * sequential, and 2) there are no busy weight-raised -+ * queues. As a consequence, if we used just -+ * idling_boosts_thr_without_issues to decide whether to idle -+ * the device, then with an HDD we might easily bump into a -+ * scenario where queues that are sequential and I/O-bound -+ * would enjoy idling, whereas random queues would not. The -+ * latter might then get a low share of the device throughput, -+ * simply because the former would get many requests served -+ * after being set as in service, while the latter would not. -+ * -+ * To address this issue, we start by setting to true a -+ * sentinel variable, on_hdd_and_not_all_queues_seeky, if the -+ * device is rotational and not all queues with pending or -+ * in-flight requests are constantly seeky (i.e., there are -+ * active sequential queues, and bfqq might then be mistreated -+ * if it does not enjoy idling because it is random). -+ */ -+ all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) && -+ bfqd->busy_in_flight_queues == -+ bfqd->const_seeky_busy_in_flight_queues; -+ -+ on_hdd_and_not_all_queues_seeky = -+ !blk_queue_nonrot(bfqd->queue) && !all_queues_seeky; -+ -+ /* -+ * To introduce the second case where idling needs to be -+ * performed to preserve service guarantees, we can note that -+ * allowing the drive to enqueue more than one request at a -+ * time, and hence delegating de facto final scheduling -+ * decisions to the drive's internal scheduler, causes loss of -+ * control on the actual request service order. In particular, -+ * the critical situation is when requests from different -+ * processes happens to be present, at the same time, in the -+ * internal queue(s) of the drive. In such a situation, the -+ * drive, by deciding the service order of the -+ * internally-queued requests, does determine also the actual -+ * throughput distribution among these processes. But the -+ * drive typically has no notion or concern about per-process -+ * throughput distribution, and makes its decisions only on a -+ * per-request basis. Therefore, the service distribution -+ * enforced by the drive's internal scheduler is likely to -+ * coincide with the desired device-throughput distribution -+ * only in a completely symmetric scenario where: -+ * (i) each of these processes must get the same throughput as -+ * the others; -+ * (ii) all these processes have the same I/O pattern -+ * (either sequential or random). -+ * In fact, in such a scenario, the drive will tend to treat -+ * the requests of each of these processes in about the same -+ * way as the requests of the others, and thus to provide -+ * each of these processes with about the same throughput -+ * (which is exactly the desired throughput distribution). In -+ * contrast, in any asymmetric scenario, device idling is -+ * certainly needed to guarantee that bfqq receives its -+ * assigned fraction of the device throughput (see [1] for -+ * details). -+ * -+ * We address this issue by controlling, actually, only the -+ * symmetry sub-condition (i), i.e., provided that -+ * sub-condition (i) holds, idling is not performed, -+ * regardless of whether sub-condition (ii) holds. In other -+ * words, only if sub-condition (i) holds, then idling is -+ * allowed, and the device tends to be prevented from queueing -+ * many requests, possibly of several processes. The reason -+ * for not controlling also sub-condition (ii) is that, first, -+ * in the case of an HDD, the asymmetry in terms of types of -+ * I/O patterns is already taken in to account in the above -+ * sentinel variable -+ * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a -+ * flash-based device, we prefer however to privilege -+ * throughput (and idling lowers throughput for this type of -+ * devices), for the following reasons: -+ * 1) differently from HDDs, the service time of random -+ * requests is not orders of magnitudes lower than the service -+ * time of sequential requests; thus, even if processes doing -+ * sequential I/O get a preferential treatment with respect to -+ * others doing random I/O, the consequences are not as -+ * dramatic as with HDDs; -+ * 2) if a process doing random I/O does need strong -+ * throughput guarantees, it is hopefully already being -+ * weight-raised, or the user is likely to have assigned it a -+ * higher weight than the other processes (and thus -+ * sub-condition (i) is likely to be false, which triggers -+ * idling). -+ * -+ * According to the above considerations, the next variable is -+ * true (only) if sub-condition (i) holds. To compute the -+ * value of this variable, we not only use the return value of -+ * the function bfq_symmetric_scenario(), but also check -+ * whether bfqq is being weight-raised, because -+ * bfq_symmetric_scenario() does not take into account also -+ * weight-raised queues (see comments to -+ * bfq_weights_tree_add()). -+ * -+ * As a side note, it is worth considering that the above -+ * device-idling countermeasures may however fail in the -+ * following unlucky scenario: if idling is (correctly) -+ * disabled in a time period during which all symmetry -+ * sub-conditions hold, and hence the device is allowed to -+ * enqueue many requests, but at some later point in time some -+ * sub-condition stops to hold, then it may become impossible -+ * to let requests be served in the desired order until all -+ * the requests already queued in the device have been served. -+ */ -+ asymmetric_scenario = bfqq->wr_coeff > 1 || -+ !bfq_symmetric_scenario(bfqd); -+ -+ /* -+ * Finally, there is a case where maximizing throughput is the -+ * best choice even if it may cause unfairness toward -+ * bfqq. Such a case is when bfqq became active in a burst of -+ * queue activations. Queues that became active during a large -+ * burst benefit only from throughput, as discussed in the -+ * comments to bfq_handle_burst. Thus, if bfqq became active -+ * in a burst and not idling the device maximizes throughput, -+ * then the device must no be idled, because not idling the -+ * device provides bfqq and all other queues in the burst with -+ * maximum benefit. Combining this and the two cases above, we -+ * can now establish when idling is actually needed to -+ * preserve service guarantees. -+ */ -+ idling_needed_for_service_guarantees = -+ (on_hdd_and_not_all_queues_seeky || asymmetric_scenario) && -+ !bfq_bfqq_in_large_burst(bfqq); -+ -+ /* -+ * We have now all the components we need to compute the return -+ * value of the function, which is true only if both the following -+ * conditions hold: -+ * 1) bfqq is sync, because idling make sense only for sync queues; -+ * 2) idling either boosts the throughput (without issues), or -+ * is necessary to preserve service guarantees. -+ */ -+ return bfq_bfqq_sync(bfqq) && -+ (idling_boosts_thr_without_issues || -+ idling_needed_for_service_guarantees); -+} -+ -+/* -+ * If the in-service queue is empty but the function bfq_bfqq_may_idle -+ * returns true, then: -+ * 1) the queue must remain in service and cannot be expired, and -+ * 2) the device must be idled to wait for the possible arrival of a new -+ * request for the queue. -+ * See the comments to the function bfq_bfqq_may_idle for the reasons -+ * why performing device idling is the best choice to boost the throughput -+ * and preserve service guarantees when bfq_bfqq_may_idle itself -+ * returns true. -+ */ -+static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && -+ bfq_bfqq_may_idle(bfqq); -+} -+ -+/* -+ * Select a queue for service. If we have a current queue in service, -+ * check whether to continue servicing it, or retrieve and set a new one. -+ */ -+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq; -+ struct request *next_rq; -+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ -+ bfqq = bfqd->in_service_queue; -+ if (!bfqq) -+ goto new_queue; -+ -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); -+ -+ if (bfq_may_expire_for_budg_timeout(bfqq) && -+ !timer_pending(&bfqd->idle_slice_timer) && -+ !bfq_bfqq_must_idle(bfqq)) -+ goto expire; -+ -+ next_rq = bfqq->next_rq; -+ /* -+ * If bfqq has requests queued and it has enough budget left to -+ * serve them, keep the queue, otherwise expire it. -+ */ -+ if (next_rq) { -+ if (bfq_serv_to_charge(next_rq, bfqq) > -+ bfq_bfqq_budget_left(bfqq)) { -+ reason = BFQ_BFQQ_BUDGET_EXHAUSTED; -+ goto expire; -+ } else { -+ /* -+ * The idle timer may be pending because we may -+ * not disable disk idling even when a new request -+ * arrives. -+ */ -+ if (timer_pending(&bfqd->idle_slice_timer)) { -+ /* -+ * If we get here: 1) at least a new request -+ * has arrived but we have not disabled the -+ * timer because the request was too small, -+ * 2) then the block layer has unplugged -+ * the device, causing the dispatch to be -+ * invoked. -+ * -+ * Since the device is unplugged, now the -+ * requests are probably large enough to -+ * provide a reasonable throughput. -+ * So we disable idling. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_idle_time(bfqq_group(bfqq)); -+#endif -+ } -+ goto keep_queue; -+ } -+ } -+ -+ /* -+ * No requests pending. However, if the in-service queue is idling -+ * for a new request, or has requests waiting for a completion and -+ * may idle after their completion, then keep it anyway. -+ */ -+ if (timer_pending(&bfqd->idle_slice_timer) || -+ (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) { -+ bfqq = NULL; -+ goto keep_queue; -+ } -+ -+ reason = BFQ_BFQQ_NO_MORE_REQUESTS; -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, false, reason); -+new_queue: -+ bfqq = bfq_set_in_service_queue(bfqd); -+ bfq_log(bfqd, "select_queue: new queue %d returned", -+ bfqq ? bfqq->pid : 0); -+keep_queue: -+ return bfqq; -+} -+ -+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ -+ bfq_log_bfqq(bfqd, bfqq, -+ "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time), -+ bfqq->wr_coeff, -+ bfqq->entity.weight, bfqq->entity.orig_weight); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != -+ entity->orig_weight * bfqq->wr_coeff); -+ if (entity->prio_changed) -+ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); -+ -+ /* -+ * If the queue was activated in a burst, or -+ * too much time has elapsed from the beginning -+ * of this weight-raising period, then end weight -+ * raising. -+ */ -+ if (bfq_bfqq_in_large_burst(bfqq) || -+ time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time)) { -+ bfqq->last_wr_start_finish = jiffies; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ bfqq->last_wr_start_finish, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ bfq_bfqq_end_wr(bfqq); -+ } -+ } -+ /* Update weight both if it must be raised and if it must be lowered */ -+ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) -+ __bfq_entity_update_weight_prio( -+ bfq_entity_service_tree(entity), -+ entity); -+} -+ -+/* -+ * Dispatch one request from bfqq, moving it to the request queue -+ * dispatch list. -+ */ -+static int bfq_dispatch_request(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ struct request *rq; -+ unsigned long service_to_charge; -+ -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Follow expired path, else get first next available. */ -+ rq = bfq_check_fifo(bfqq); -+ if (!rq) -+ rq = bfqq->next_rq; -+ service_to_charge = bfq_serv_to_charge(rq, bfqq); -+ -+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { -+ /* -+ * This may happen if the next rq is chosen in fifo order -+ * instead of sector order. The budget is properly -+ * dimensioned to be always sufficient to serve the next -+ * request only if it is chosen in sector order. The reason -+ * is that it would be quite inefficient and little useful -+ * to always make sure that the budget is large enough to -+ * serve even the possible next rq in fifo order. -+ * In fact, requests are seldom served in fifo order. -+ * -+ * Expire the queue for budget exhaustion, and make sure -+ * that the next act_budget is enough to serve the next -+ * request, even if it comes from the fifo expired path. -+ */ -+ bfqq->next_rq = rq; -+ /* -+ * Since this dispatch is failed, make sure that -+ * a new one will be performed -+ */ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ goto expire; -+ } -+ -+ /* Finally, insert request into driver dispatch list. */ -+ bfq_bfqq_served(bfqq, service_to_charge); -+ bfq_dispatch_insert(bfqd->queue, rq); -+ -+ bfq_update_wr_data(bfqd, bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "dispatched %u sec req (%llu), budg left %d", -+ blk_rq_sectors(rq), -+ (unsigned long long) blk_rq_pos(rq), -+ bfq_bfqq_budget_left(bfqq)); -+ -+ dispatched++; -+ -+ if (!bfqd->in_service_bic) { -+ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); -+ bfqd->in_service_bic = RQ_BIC(rq); -+ } -+ -+ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && -+ dispatched >= bfqd->bfq_max_budget_async_rq) || -+ bfq_class_idle(bfqq))) -+ goto expire; -+ -+ return dispatched; -+ -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED); -+ return dispatched; -+} -+ -+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ -+ while (bfqq->next_rq) { -+ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); -+ dispatched++; -+ } -+ -+ BUG_ON(!list_empty(&bfqq->fifo)); -+ return dispatched; -+} -+ -+/* -+ * Drain our current requests. -+ * Used for barriers and when switching io schedulers on-the-fly. -+ */ -+static int bfq_forced_dispatch(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq, *n; -+ struct bfq_service_tree *st; -+ int dispatched = 0; -+ -+ bfqq = bfqd->in_service_queue; -+ if (bfqq) -+ __bfq_bfqq_expire(bfqd, bfqq); -+ -+ /* -+ * Loop through classes, and be careful to leave the scheduler -+ * in a consistent state, as feedback mechanisms and vtime -+ * updates cannot be disabled during the process. -+ */ -+ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { -+ st = bfq_entity_service_tree(&bfqq->entity); -+ -+ dispatched += __bfq_forced_dispatch_bfqq(bfqq); -+ bfqq->max_budget = bfq_max_budget(bfqd); -+ -+ bfq_forget_idle(st); -+ } -+ -+ BUG_ON(bfqd->busy_queues != 0); -+ -+ return dispatched; -+} -+ -+static int bfq_dispatch_requests(struct request_queue *q, int force) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq; -+ int max_dispatch; -+ -+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); -+ if (bfqd->busy_queues == 0) -+ return 0; -+ -+ if (unlikely(force)) -+ return bfq_forced_dispatch(bfqd); -+ -+ bfqq = bfq_select_queue(bfqd); -+ if (!bfqq) -+ return 0; -+ -+ if (bfq_class_idle(bfqq)) -+ max_dispatch = 1; -+ -+ if (!bfq_bfqq_sync(bfqq)) -+ max_dispatch = bfqd->bfq_max_budget_async_rq; -+ -+ if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) { -+ if (bfqd->busy_queues > 1) -+ return 0; -+ if (bfqq->dispatched >= 4 * max_dispatch) -+ return 0; -+ } -+ -+ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) -+ return 0; -+ -+ bfq_clear_bfqq_wait_request(bfqq); -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+ if (!bfq_dispatch_request(bfqd, bfqq)) -+ return 0; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", -+ bfq_bfqq_sync(bfqq) ? "sync" : "async"); -+ -+ return 1; -+} -+ -+/* -+ * Task holds one reference to the queue, dropped when task exits. Each rq -+ * in-flight on this queue also holds a reference, dropped when rq is freed. -+ * -+ * Queue lock must be held here. -+ */ -+static void bfq_put_queue(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ struct bfq_group *bfqg = bfqq_group(bfqq); -+#endif -+ -+ BUG_ON(atomic_read(&bfqq->ref) <= 0); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ if (!atomic_dec_and_test(&bfqq->ref)) -+ return; -+ -+ BUG_ON(rb_first(&bfqq->sort_list)); -+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); -+ BUG_ON(bfqq->entity.tree); -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqd->in_service_queue == bfqq); -+ -+ if (bfq_bfqq_sync(bfqq)) -+ /* -+ * The fact that this queue is being destroyed does not -+ * invalidate the fact that this queue may have been -+ * activated during the current burst. As a consequence, -+ * although the queue does not exist anymore, and hence -+ * needs to be removed from the burst list if there, -+ * the burst size has not to be decremented. -+ */ -+ hlist_del_init(&bfqq->burst_list_node); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); -+ -+ kmem_cache_free(bfq_pool, bfqq); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_put(bfqg); -+#endif -+} -+ -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ if (bfqq == bfqd->in_service_queue) { -+ __bfq_bfqq_expire(bfqd, bfqq); -+ bfq_schedule_dispatch(bfqd); -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ bfq_put_queue(bfqq); -+} -+ -+static void bfq_init_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ -+ bic->ttime.last_end_request = jiffies; -+} -+ -+static void bfq_exit_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ -+ if (bic->bfqq[BLK_RW_ASYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); -+ bic->bfqq[BLK_RW_ASYNC] = NULL; -+ } -+ -+ if (bic->bfqq[BLK_RW_SYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); -+ bic->bfqq[BLK_RW_SYNC] = NULL; -+ } -+} -+ -+/* -+ * Update the entity prio values; note that the new values will not -+ * be used until the next (re)activation. -+ */ -+static void -+bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ struct task_struct *tsk = current; -+ int ioprio_class; -+ -+ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ switch (ioprio_class) { -+ default: -+ dev_err(bfqq->bfqd->queue->backing_dev_info.dev, -+ "bfq: bad prio class %d\n", ioprio_class); -+ case IOPRIO_CLASS_NONE: -+ /* -+ * No prio set, inherit CPU scheduling settings. -+ */ -+ bfqq->new_ioprio = task_nice_ioprio(tsk); -+ bfqq->new_ioprio_class = task_nice_ioclass(tsk); -+ break; -+ case IOPRIO_CLASS_RT: -+ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->new_ioprio_class = IOPRIO_CLASS_RT; -+ break; -+ case IOPRIO_CLASS_BE: -+ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->new_ioprio_class = IOPRIO_CLASS_BE; -+ break; -+ case IOPRIO_CLASS_IDLE: -+ bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE; -+ bfqq->new_ioprio = 7; -+ bfq_clear_bfqq_idle_window(bfqq); -+ break; -+ } -+ -+ if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) { -+ pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", -+ bfqq->new_ioprio); -+ BUG(); -+ } -+ -+ bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); -+ bfqq->entity.prio_changed = 1; -+} -+ -+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) -+{ -+ struct bfq_data *bfqd; -+ struct bfq_queue *bfqq, *new_bfqq; -+ unsigned long uninitialized_var(flags); -+ int ioprio = bic->icq.ioc->ioprio; -+ -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -+ &flags); -+ /* -+ * This condition may trigger on a newly created bic, be sure to -+ * drop the lock before returning. -+ */ -+ if (unlikely(!bfqd) || likely(bic->ioprio == ioprio)) -+ goto out; -+ -+ bic->ioprio = ioprio; -+ -+ bfqq = bic->bfqq[BLK_RW_ASYNC]; -+ if (bfqq) { -+ new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, -+ GFP_ATOMIC); -+ if (new_bfqq) { -+ bic->bfqq[BLK_RW_ASYNC] = new_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, -+ "check_ioprio_change: bfqq %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+ } -+ -+ bfqq = bic->bfqq[BLK_RW_SYNC]; -+ if (bfqq) -+ bfq_set_next_ioprio_data(bfqq, bic); -+ -+out: -+ bfq_put_bfqd_unlock(bfqd, &flags); -+} -+ -+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic, pid_t pid, int is_sync) -+{ -+ RB_CLEAR_NODE(&bfqq->entity.rb_node); -+ INIT_LIST_HEAD(&bfqq->fifo); -+ INIT_HLIST_NODE(&bfqq->burst_list_node); -+ -+ atomic_set(&bfqq->ref, 0); -+ bfqq->bfqd = bfqd; -+ -+ if (bic) -+ bfq_set_next_ioprio_data(bfqq, bic); -+ -+ if (is_sync) { -+ if (!bfq_class_idle(bfqq)) -+ bfq_mark_bfqq_idle_window(bfqq); -+ bfq_mark_bfqq_sync(bfqq); -+ } else -+ bfq_clear_bfqq_sync(bfqq); -+ bfq_mark_bfqq_IO_bound(bfqq); -+ -+ /* Tentative initial value to trade off between thr and lat */ -+ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; -+ bfqq->pid = pid; -+ -+ bfqq->wr_coeff = 1; -+ bfqq->last_wr_start_finish = 0; -+ /* -+ * Set to the value for which bfqq will not be deemed as -+ * soft rt when it becomes backlogged. -+ */ -+ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); -+} -+ -+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, -+ struct bio *bio, int is_sync, -+ struct bfq_io_cq *bic, -+ gfp_t gfp_mask) -+{ -+ struct bfq_group *bfqg; -+ struct bfq_queue *bfqq, *new_bfqq = NULL; -+ struct blkcg *blkcg; -+ -+retry: -+ rcu_read_lock(); -+ -+ blkcg = bio_blkcg(bio); -+ bfqg = bfq_find_alloc_group(bfqd, blkcg); -+ /* bic always exists here */ -+ bfqq = bic_to_bfqq(bic, is_sync); -+ -+ /* -+ * Always try a new alloc if we fall back to the OOM bfqq -+ * originally, since it should just be a temporary situation. -+ */ -+ if (!bfqq || bfqq == &bfqd->oom_bfqq) { -+ bfqq = NULL; -+ if (new_bfqq) { -+ bfqq = new_bfqq; -+ new_bfqq = NULL; -+ } else if (gfpflags_allow_blocking(gfp_mask)) { -+ rcu_read_unlock(); -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ new_bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ spin_lock_irq(bfqd->queue->queue_lock); -+ if (new_bfqq) -+ goto retry; -+ } else { -+ bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ } -+ -+ if (bfqq) { -+ bfq_init_bfqq(bfqd, bfqq, bic, current->pid, -+ is_sync); -+ bfq_init_entity(&bfqq->entity, bfqg); -+ bfq_log_bfqq(bfqd, bfqq, "allocated"); -+ } else { -+ bfqq = &bfqd->oom_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); -+ } -+ } -+ -+ if (new_bfqq) -+ kmem_cache_free(bfq_pool, new_bfqq); -+ -+ rcu_read_unlock(); -+ -+ return bfqq; -+} -+ -+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ int ioprio_class, int ioprio) -+{ -+ switch (ioprio_class) { -+ case IOPRIO_CLASS_RT: -+ return &bfqg->async_bfqq[0][ioprio]; -+ case IOPRIO_CLASS_NONE: -+ ioprio = IOPRIO_NORM; -+ /* fall through */ -+ case IOPRIO_CLASS_BE: -+ return &bfqg->async_bfqq[1][ioprio]; -+ case IOPRIO_CLASS_IDLE: -+ return &bfqg->async_idle_bfqq; -+ default: -+ BUG(); -+ } -+} -+ -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bio *bio, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask) -+{ -+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ struct bfq_queue **async_bfqq = NULL; -+ struct bfq_queue *bfqq = NULL; -+ -+ if (!is_sync) { -+ struct blkcg *blkcg; -+ struct bfq_group *bfqg; -+ -+ rcu_read_lock(); -+ blkcg = bio_blkcg(bio); -+ rcu_read_unlock(); -+ bfqg = bfq_find_alloc_group(bfqd, blkcg); -+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, -+ ioprio); -+ bfqq = *async_bfqq; -+ } -+ -+ if (!bfqq) -+ bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask); -+ -+ /* -+ * Pin the queue now that it's allocated, scheduler exit will -+ * prune it. -+ */ -+ if (!is_sync && !(*async_bfqq)) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ *async_bfqq = bfqq; -+ } -+ -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ return bfqq; -+} -+ -+static void bfq_update_io_thinktime(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic) -+{ -+ unsigned long elapsed = jiffies - bic->ttime.last_end_request; -+ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); -+ -+ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; -+ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; -+ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / -+ bic->ttime.ttime_samples; -+} -+ -+static void bfq_update_io_seektime(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ sector_t sdist; -+ u64 total; -+ -+ if (bfqq->last_request_pos < blk_rq_pos(rq)) -+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; -+ else -+ sdist = bfqq->last_request_pos - blk_rq_pos(rq); -+ -+ /* -+ * Don't allow the seek distance to get too large from the -+ * odd fragment, pagein, etc. -+ */ -+ if (bfqq->seek_samples == 0) /* first request, not really a seek */ -+ sdist = 0; -+ else if (bfqq->seek_samples <= 60) /* second & third seek */ -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); -+ else -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); -+ -+ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; -+ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; -+ total = bfqq->seek_total + (bfqq->seek_samples/2); -+ do_div(total, bfqq->seek_samples); -+ bfqq->seek_mean = (sector_t)total; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, -+ (u64)bfqq->seek_mean); -+} -+ -+/* -+ * Disable idle window if the process thinks too long or seeks so much that -+ * it doesn't matter. -+ */ -+static void bfq_update_idle_window(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic) -+{ -+ int enable_idle; -+ -+ /* Don't idle for async or idle io prio class. */ -+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) -+ return; -+ -+ enable_idle = bfq_bfqq_idle_window(bfqq); -+ -+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -+ bfqd->bfq_slice_idle == 0 || -+ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && -+ bfqq->wr_coeff == 1)) -+ enable_idle = 0; -+ else if (bfq_sample_valid(bic->ttime.ttime_samples)) { -+ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && -+ bfqq->wr_coeff == 1) -+ enable_idle = 0; -+ else -+ enable_idle = 1; -+ } -+ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", -+ enable_idle); -+ -+ if (enable_idle) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+} -+ -+/* -+ * Called when a new fs request (rq) is added to bfqq. Check if there's -+ * something we should do about it. -+ */ -+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ struct bfq_io_cq *bic = RQ_BIC(rq); -+ -+ if (rq->cmd_flags & REQ_META) -+ bfqq->meta_pending++; -+ -+ bfq_update_io_thinktime(bfqd, bic); -+ bfq_update_io_seektime(bfqd, bfqq, rq); -+ if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_clear_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || -+ !BFQQ_SEEKY(bfqq)) -+ bfq_update_idle_window(bfqd, bfqq, bic); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -+ (unsigned long long) bfqq->seek_mean); -+ -+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ -+ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { -+ bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 && -+ blk_rq_sectors(rq) < 32; -+ bool budget_timeout = bfq_bfqq_budget_timeout(bfqq); -+ -+ /* -+ * There is just this request queued: if the request -+ * is small and the queue is not to be expired, then -+ * just exit. -+ * -+ * In this way, if the disk is being idled to wait for -+ * a new request from the in-service queue, we avoid -+ * unplugging the device and committing the disk to serve -+ * just a small request. On the contrary, we wait for -+ * the block layer to decide when to unplug the device: -+ * hopefully, new requests will be merged to this one -+ * quickly, then the device will be unplugged and -+ * larger requests will be dispatched. -+ */ -+ if (small_req && !budget_timeout) -+ return; -+ -+ /* -+ * A large enough request arrived, or the queue is to -+ * be expired: in both cases disk idling is to be -+ * stopped, so clear wait_request flag and reset -+ * timer. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_idle_time(bfqq_group(bfqq)); -+#endif -+ -+ /* -+ * The queue is not empty, because a new request just -+ * arrived. Hence we can safely expire the queue, in -+ * case of budget timeout, without risking that the -+ * timestamps of the queue are not updated correctly. -+ * See [1] for more details. -+ */ -+ if (budget_timeout) -+ bfq_bfqq_expire(bfqd, bfqq, false, -+ BFQ_BFQQ_BUDGET_TIMEOUT); -+ -+ /* -+ * Let the request rip immediately, or let a new queue be -+ * selected if bfqq has just been expired. -+ */ -+ __blk_run_queue(bfqd->queue); -+ } -+} -+ -+static void bfq_insert_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ -+ bfq_add_request(rq); -+ -+ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; -+ list_add_tail(&rq->queuelist, &bfqq->fifo); -+ -+ bfq_rq_enqueued(bfqd, bfqq, rq); -+} -+ -+static void bfq_update_hw_tag(struct bfq_data *bfqd) -+{ -+ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, -+ bfqd->rq_in_driver); -+ -+ if (bfqd->hw_tag == 1) -+ return; -+ -+ /* -+ * This sample is valid if the number of outstanding requests -+ * is large enough to allow a queueing behavior. Note that the -+ * sum is not exact, as it's not taking into account deactivated -+ * requests. -+ */ -+ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) -+ return; -+ -+ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) -+ return; -+ -+ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; -+ bfqd->max_rq_in_driver = 0; -+ bfqd->hw_tag_samples = 0; -+} -+ -+static void bfq_completed_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ bool sync = bfq_bfqq_sync(bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", -+ blk_rq_sectors(rq), sync); -+ -+ bfq_update_hw_tag(bfqd); -+ -+ BUG_ON(!bfqd->rq_in_driver); -+ BUG_ON(!bfqq->dispatched); -+ bfqd->rq_in_driver--; -+ bfqq->dispatched--; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_completion(bfqq_group(bfqq), -+ rq_start_time_ns(rq), -+ rq_io_start_time_ns(rq), rq->cmd_flags); -+#endif -+ -+ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ -+ if (sync) { -+ bfqd->sync_flight--; -+ RQ_BIC(rq)->ttime.last_end_request = jiffies; -+ } -+ -+ /* -+ * If we are waiting to discover whether the request pattern of the -+ * task associated with the queue is actually isochronous, and -+ * both requisites for this condition to hold are satisfied, then -+ * compute soft_rt_next_start (see the comments to the function -+ * bfq_bfqq_softrt_next_start()). -+ */ -+ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ -+ /* -+ * If this is the in-service queue, check if it needs to be expired, -+ * or if we want to idle in case it has no pending requests. -+ */ -+ if (bfqd->in_service_queue == bfqq) { -+ if (bfq_bfqq_budget_new(bfqq)) -+ bfq_set_budget_timeout(bfqd); -+ -+ if (bfq_bfqq_must_idle(bfqq)) { -+ bfq_arm_slice_timer(bfqd); -+ goto out; -+ } else if (bfq_may_expire_for_budg_timeout(bfqq)) -+ bfq_bfqq_expire(bfqd, bfqq, false, -+ BFQ_BFQQ_BUDGET_TIMEOUT); -+ else if (RB_EMPTY_ROOT(&bfqq->sort_list) && -+ (bfqq->dispatched == 0 || -+ !bfq_bfqq_may_idle(bfqq))) -+ bfq_bfqq_expire(bfqd, bfqq, false, -+ BFQ_BFQQ_NO_MORE_REQUESTS); -+ } -+ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ -+out: -+ return; -+} -+ -+static int __bfq_may_queue(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { -+ bfq_clear_bfqq_must_alloc(bfqq); -+ return ELV_MQUEUE_MUST; -+ } -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+static int bfq_may_queue(struct request_queue *q, int rw) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ /* -+ * Don't force setup of a queue from here, as a call to may_queue -+ * does not necessarily imply that a request actually will be -+ * queued. So just lookup a possibly existing queue, or return -+ * 'may queue' if that fails. -+ */ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (!bic) -+ return ELV_MQUEUE_MAY; -+ -+ bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); -+ if (bfqq) -+ return __bfq_may_queue(bfqq); -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+/* -+ * Queue lock held here. -+ */ -+static void bfq_put_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ if (bfqq) { -+ const int rw = rq_data_dir(rq); -+ -+ BUG_ON(!bfqq->allocated[rw]); -+ bfqq->allocated[rw]--; -+ -+ rq->elv.priv[0] = NULL; -+ rq->elv.priv[1] = NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+/* -+ * Allocate bfq data structures associated with this request. -+ */ -+static int bfq_set_request(struct request_queue *q, struct request *rq, -+ struct bio *bio, gfp_t gfp_mask) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); -+ const int rw = rq_data_dir(rq); -+ const int is_sync = rq_is_sync(rq); -+ struct bfq_queue *bfqq; -+ unsigned long flags; -+ -+ might_sleep_if(gfpflags_allow_blocking(gfp_mask)); -+ -+ bfq_check_ioprio_change(bic, bio); -+ -+ spin_lock_irqsave(q->queue_lock, flags); -+ -+ if (!bic) -+ goto queue_fail; -+ -+ bfq_bic_update_cgroup(bic, bio); -+ -+ bfqq = bic_to_bfqq(bic, is_sync); -+ if (!bfqq || bfqq == &bfqd->oom_bfqq) { -+ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask); -+ bic_set_bfqq(bic, bfqq, is_sync); -+ if (is_sync) { -+ if (bfqd->large_burst) -+ bfq_mark_bfqq_in_large_burst(bfqq); -+ else -+ bfq_clear_bfqq_in_large_burst(bfqq); -+ } -+ } -+ -+ bfqq->allocated[rw]++; -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ rq->elv.priv[0] = bic; -+ rq->elv.priv[1] = bfqq; -+ -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 0; -+ -+queue_fail: -+ bfq_schedule_dispatch(bfqd); -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 1; -+} -+ -+static void bfq_kick_queue(struct work_struct *work) -+{ -+ struct bfq_data *bfqd = -+ container_of(work, struct bfq_data, unplug_work); -+ struct request_queue *q = bfqd->queue; -+ -+ spin_lock_irq(q->queue_lock); -+ __blk_run_queue(q); -+ spin_unlock_irq(q->queue_lock); -+} -+ -+/* -+ * Handler of the expiration of the timer running if the in-service queue -+ * is idling inside its time slice. -+ */ -+static void bfq_idle_slice_timer(unsigned long data) -+{ -+ struct bfq_data *bfqd = (struct bfq_data *)data; -+ struct bfq_queue *bfqq; -+ unsigned long flags; -+ enum bfqq_expiration reason; -+ -+ spin_lock_irqsave(bfqd->queue->queue_lock, flags); -+ -+ bfqq = bfqd->in_service_queue; -+ /* -+ * Theoretical race here: the in-service queue can be NULL or -+ * different from the queue that was idling if the timer handler -+ * spins on the queue_lock and a new request arrives for the -+ * current queue and there is a full dispatch cycle that changes -+ * the in-service queue. This can hardly happen, but in the worst -+ * case we just expire a queue too early. -+ */ -+ if (bfqq) { -+ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); -+ if (bfq_bfqq_budget_timeout(bfqq)) -+ /* -+ * Also here the queue can be safely expired -+ * for budget timeout without wasting -+ * guarantees -+ */ -+ reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) -+ /* -+ * The queue may not be empty upon timer expiration, -+ * because we may not disable the timer when the -+ * first request of the in-service queue arrives -+ * during disk idling. -+ */ -+ reason = BFQ_BFQQ_TOO_IDLE; -+ else -+ goto schedule_dispatch; -+ -+ bfq_bfqq_expire(bfqd, bfqq, true, reason); -+ } -+ -+schedule_dispatch: -+ bfq_schedule_dispatch(bfqd); -+ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -+} -+ -+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) -+{ -+ del_timer_sync(&bfqd->idle_slice_timer); -+ cancel_work_sync(&bfqd->unplug_work); -+} -+ -+static void __bfq_put_async_bfqq(struct bfq_data *bfqd, -+ struct bfq_queue **bfqq_ptr) -+{ -+ struct bfq_group *root_group = bfqd->root_group; -+ struct bfq_queue *bfqq = *bfqq_ptr; -+ -+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq); -+ if (bfqq) { -+ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); -+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ *bfqq_ptr = NULL; -+ } -+} -+ -+/* -+ * Release all the bfqg references to its async queues. If we are -+ * deallocating the group these queues may still contain requests, so -+ * we reparent them to the root cgroup (i.e., the only one that will -+ * exist for sure until all the requests on a device are gone). -+ */ -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); -+ -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); -+} -+ -+static void bfq_exit_queue(struct elevator_queue *e) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ struct request_queue *q = bfqd->queue; -+ struct bfq_queue *bfqq, *n; -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ spin_lock_irq(q->queue_lock); -+ -+ BUG_ON(bfqd->in_service_queue); -+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ -+ spin_unlock_irq(q->queue_lock); -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ synchronize_rcu(); -+ -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ blkcg_deactivate_policy(q, &blkcg_policy_bfq); -+#else -+ kfree(bfqd->root_group); -+#endif -+ -+ kfree(bfqd); -+} -+ -+static void bfq_init_root_group(struct bfq_group *root_group, -+ struct bfq_data *bfqd) -+{ -+ int i; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ root_group->entity.parent = NULL; -+ root_group->my_entity = NULL; -+ root_group->bfqd = bfqd; -+#endif -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+} -+ -+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) -+{ -+ struct bfq_data *bfqd; -+ struct elevator_queue *eq; -+ -+ eq = elevator_alloc(q, e); -+ if (!eq) -+ return -ENOMEM; -+ -+ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); -+ if (!bfqd) { -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+ } -+ eq->elevator_data = bfqd; -+ -+ /* -+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. -+ * Grab a permanent reference to it, so that the normal code flow -+ * will not attempt to free it. -+ */ -+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); -+ atomic_inc(&bfqd->oom_bfqq.ref); -+ bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; -+ bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; -+ bfqd->oom_bfqq.entity.new_weight = -+ bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); -+ /* -+ * Trigger weight initialization, according to ioprio, at the -+ * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio -+ * class won't be changed any more. -+ */ -+ bfqd->oom_bfqq.entity.prio_changed = 1; -+ -+ bfqd->queue = q; -+ -+ spin_lock_irq(q->queue_lock); -+ q->elevator = eq; -+ spin_unlock_irq(q->queue_lock); -+ -+ bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node); -+ if (!bfqd->root_group) -+ goto out_free; -+ bfq_init_root_group(bfqd->root_group, bfqd); -+ bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqd->active_numerous_groups = 0; -+#endif -+ -+ init_timer(&bfqd->idle_slice_timer); -+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer; -+ bfqd->idle_slice_timer.data = (unsigned long)bfqd; -+ -+ bfqd->queue_weights_tree = RB_ROOT; -+ bfqd->group_weights_tree = RB_ROOT; -+ -+ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); -+ -+ INIT_LIST_HEAD(&bfqd->active_list); -+ INIT_LIST_HEAD(&bfqd->idle_list); -+ INIT_HLIST_HEAD(&bfqd->burst_list); -+ -+ bfqd->hw_tag = -1; -+ -+ bfqd->bfq_max_budget = bfq_default_max_budget; -+ -+ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; -+ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; -+ bfqd->bfq_back_max = bfq_back_max; -+ bfqd->bfq_back_penalty = bfq_back_penalty; -+ bfqd->bfq_slice_idle = bfq_slice_idle; -+ bfqd->bfq_class_idle_last_service = 0; -+ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; -+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; -+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; -+ -+ bfqd->bfq_requests_within_timer = 120; -+ -+ bfqd->bfq_large_burst_thresh = 11; -+ bfqd->bfq_burst_interval = msecs_to_jiffies(500); -+ -+ bfqd->low_latency = true; -+ -+ bfqd->bfq_wr_coeff = 20; -+ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); -+ bfqd->bfq_wr_max_time = 0; -+ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); -+ bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); -+ bfqd->bfq_wr_max_softrt_rate = 7000; /* -+ * Approximate rate required -+ * to playback or record a -+ * high-definition compressed -+ * video. -+ */ -+ bfqd->wr_busy_queues = 0; -+ bfqd->busy_in_flight_queues = 0; -+ bfqd->const_seeky_busy_in_flight_queues = 0; -+ -+ /* -+ * Begin by assuming, optimistically, that the device peak rate is -+ * equal to the highest reference rate. -+ */ -+ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * -+ T_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ -+ return 0; -+ -+out_free: -+ kfree(bfqd); -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+} -+ -+static void bfq_slab_kill(void) -+{ -+ kmem_cache_destroy(bfq_pool); -+} -+ -+static int __init bfq_slab_setup(void) -+{ -+ bfq_pool = KMEM_CACHE(bfq_queue, 0); -+ if (!bfq_pool) -+ return -ENOMEM; -+ return 0; -+} -+ -+static ssize_t bfq_var_show(unsigned int var, char *page) -+{ -+ return sprintf(page, "%d\n", var); -+} -+ -+static ssize_t bfq_var_store(unsigned long *var, const char *page, -+ size_t count) -+{ -+ unsigned long new_val; -+ int ret = kstrtoul(page, 10, &new_val); -+ -+ if (ret == 0) -+ *var = new_val; -+ -+ return count; -+} -+ -+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ -+ return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? -+ jiffies_to_msecs(bfqd->bfq_wr_max_time) : -+ jiffies_to_msecs(bfq_wr_duration(bfqd))); -+} -+ -+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_queue *bfqq; -+ struct bfq_data *bfqd = e->elevator_data; -+ ssize_t num_char = 0; -+ -+ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", -+ bfqd->queued); -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ num_char += sprintf(page + num_char, "Active:\n"); -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, nr_queued %d %d, ", -+ bfqq->pid, -+ bfqq->entity.weight, -+ bfqq->queued[0], -+ bfqq->queued[1]); -+ num_char += sprintf(page + num_char, -+ "dur %d/%u\n", -+ jiffies_to_msecs( -+ jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ num_char += sprintf(page + num_char, "Idle:\n"); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ -+ return num_char; -+} -+ -+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -+static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned int __data = __VAR; \ -+ if (__CONV) \ -+ __data = jiffies_to_msecs(__data); \ -+ return bfq_var_show(__data, (page)); \ -+} -+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); -+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); -+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); -+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -+SHOW_FUNCTION(bfq_max_budget_async_rq_show, -+ bfqd->bfq_max_budget_async_rq, 0); -+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); -+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); -+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); -+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); -+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1); -+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, -+ 1); -+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); -+#undef SHOW_FUNCTION -+ -+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -+static ssize_t \ -+__FUNC(struct elevator_queue *e, const char *page, size_t count) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned long uninitialized_var(__data); \ -+ int ret = bfq_var_store(&__data, (page), count); \ -+ if (__data < (MIN)) \ -+ __data = (MIN); \ -+ else if (__data > (MAX)) \ -+ __data = (MAX); \ -+ if (__CONV) \ -+ *(__PTR) = msecs_to_jiffies(__data); \ -+ else \ -+ *(__PTR) = __data; \ -+ return ret; \ -+} -+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); -+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, -+ INT_MAX, 0); -+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, -+ 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, -+ 1); -+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, -+ &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, -+ INT_MAX, 0); -+#undef STORE_FUNCTION -+ -+/* do nothing for the moment */ -+static ssize_t bfq_weights_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ return count; -+} -+ -+static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) -+{ -+ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) -+ return bfq_calc_max_budget(bfqd->peak_rate, timeout); -+ else -+ return bfq_default_max_budget; -+} -+ -+static ssize_t bfq_max_budget_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ else { -+ if (__data > INT_MAX) -+ __data = INT_MAX; -+ bfqd->bfq_max_budget = __data; -+ } -+ -+ bfqd->bfq_user_max_budget = __data; -+ -+ return ret; -+} -+ -+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data < 1) -+ __data = 1; -+ else if (__data > INT_MAX) -+ __data = INT_MAX; -+ -+ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); -+ if (bfqd->bfq_user_max_budget == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ -+ return ret; -+} -+ -+static ssize_t bfq_low_latency_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data > 1) -+ __data = 1; -+ if (__data == 0 && bfqd->low_latency != 0) -+ bfq_end_wr(bfqd); -+ bfqd->low_latency = __data; -+ -+ return ret; -+} -+ -+#define BFQ_ATTR(name) \ -+ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) -+ -+static struct elv_fs_entry bfq_attrs[] = { -+ BFQ_ATTR(fifo_expire_sync), -+ BFQ_ATTR(fifo_expire_async), -+ BFQ_ATTR(back_seek_max), -+ BFQ_ATTR(back_seek_penalty), -+ BFQ_ATTR(slice_idle), -+ BFQ_ATTR(max_budget), -+ BFQ_ATTR(max_budget_async_rq), -+ BFQ_ATTR(timeout_sync), -+ BFQ_ATTR(timeout_async), -+ BFQ_ATTR(low_latency), -+ BFQ_ATTR(wr_coeff), -+ BFQ_ATTR(wr_max_time), -+ BFQ_ATTR(wr_rt_max_time), -+ BFQ_ATTR(wr_min_idle_time), -+ BFQ_ATTR(wr_min_inter_arr_async), -+ BFQ_ATTR(wr_max_softrt_rate), -+ BFQ_ATTR(weights), -+ __ATTR_NULL -+}; -+ -+static struct elevator_type iosched_bfq = { -+ .ops = { -+ .elevator_merge_fn = bfq_merge, -+ .elevator_merged_fn = bfq_merged_request, -+ .elevator_merge_req_fn = bfq_merged_requests, -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ .elevator_bio_merged_fn = bfq_bio_merged, -+#endif -+ .elevator_allow_merge_fn = bfq_allow_merge, -+ .elevator_dispatch_fn = bfq_dispatch_requests, -+ .elevator_add_req_fn = bfq_insert_request, -+ .elevator_activate_req_fn = bfq_activate_request, -+ .elevator_deactivate_req_fn = bfq_deactivate_request, -+ .elevator_completed_req_fn = bfq_completed_request, -+ .elevator_former_req_fn = elv_rb_former_request, -+ .elevator_latter_req_fn = elv_rb_latter_request, -+ .elevator_init_icq_fn = bfq_init_icq, -+ .elevator_exit_icq_fn = bfq_exit_icq, -+ .elevator_set_req_fn = bfq_set_request, -+ .elevator_put_req_fn = bfq_put_request, -+ .elevator_may_queue_fn = bfq_may_queue, -+ .elevator_init_fn = bfq_init_queue, -+ .elevator_exit_fn = bfq_exit_queue, -+ }, -+ .icq_size = sizeof(struct bfq_io_cq), -+ .icq_align = __alignof__(struct bfq_io_cq), -+ .elevator_attrs = bfq_attrs, -+ .elevator_name = "bfq", -+ .elevator_owner = THIS_MODULE, -+}; -+ -+static int __init bfq_init(void) -+{ -+ int ret; -+ -+ /* -+ * Can be 0 on HZ < 1000 setups. -+ */ -+ if (bfq_slice_idle == 0) -+ bfq_slice_idle = 1; -+ -+ if (bfq_timeout_async == 0) -+ bfq_timeout_async = 1; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ ret = blkcg_policy_register(&blkcg_policy_bfq); -+ if (ret) -+ return ret; -+#endif -+ -+ ret = -ENOMEM; -+ if (bfq_slab_setup()) -+ goto err_pol_unreg; -+ -+ /* -+ * Times to load large popular applications for the typical systems -+ * installed on the reference devices (see the comments before the -+ * definitions of the two arrays). -+ */ -+ T_slow[0] = msecs_to_jiffies(2600); -+ T_slow[1] = msecs_to_jiffies(1000); -+ T_fast[0] = msecs_to_jiffies(5500); -+ T_fast[1] = msecs_to_jiffies(2000); -+ -+ /* -+ * Thresholds that determine the switch between speed classes (see -+ * the comments before the definition of the array). -+ */ -+ device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; -+ device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; -+ -+ ret = elv_register(&iosched_bfq); -+ if (ret) -+ goto err_pol_unreg; -+ -+ pr_info("BFQ I/O-scheduler: v7r11"); -+ -+ return 0; -+ -+err_pol_unreg: -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ blkcg_policy_unregister(&blkcg_policy_bfq); -+#endif -+ return ret; -+} -+ -+static void __exit bfq_exit(void) -+{ -+ elv_unregister(&iosched_bfq); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ blkcg_policy_unregister(&blkcg_policy_bfq); -+#endif -+ bfq_slab_kill(); -+} -+ -+module_init(bfq_init); -+module_exit(bfq_exit); -+ -+MODULE_AUTHOR("Arianna Avanzini, Fabio Checconi, Paolo Valente"); -+MODULE_LICENSE("GPL"); -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -new file mode 100644 -index 0000000..a5ed694 ---- /dev/null -+++ b/block/bfq-sched.c -@@ -0,0 +1,1199 @@ -+/* -+ * BFQ: Hierarchical B-WF2Q+ scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -+ * -+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> -+ * Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ */ -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+#define for_each_entity(entity) \ -+ for (; entity ; entity = entity->parent) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) -+ -+ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd); -+ -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); -+ -+static void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+ struct bfq_entity *bfqg_entity; -+ struct bfq_group *bfqg; -+ struct bfq_sched_data *group_sd; -+ -+ BUG_ON(!next_in_service); -+ -+ group_sd = next_in_service->sched_data; -+ -+ bfqg = container_of(group_sd, struct bfq_group, sched_data); -+ /* -+ * bfq_group's my_entity field is not NULL only if the group -+ * is not the root group. We must not touch the root entity -+ * as it must never become an in-service entity. -+ */ -+ bfqg_entity = bfqg->my_entity; -+ if (bfqg_entity) -+ bfqg_entity->budget = next_in_service->budget; -+} -+ -+static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ struct bfq_entity *next_in_service; -+ -+ if (sd->in_service_entity) -+ /* will update/requeue at the end of service */ -+ return 0; -+ -+ /* -+ * NOTE: this can be improved in many ways, such as returning -+ * 1 (and thus propagating upwards the update) only when the -+ * budget changes, or caching the bfqq that will be scheduled -+ * next from this subtree. By now we worry more about -+ * correctness than about performance... -+ */ -+ next_in_service = bfq_lookup_next_entity(sd, 0, NULL); -+ sd->next_in_service = next_in_service; -+ -+ if (next_in_service) -+ bfq_update_budget(next_in_service); -+ -+ return 1; -+} -+ -+static void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+ BUG_ON(sd->next_in_service != entity); -+} -+#else -+#define for_each_entity(entity) \ -+ for (; entity ; entity = NULL) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (parent = NULL; entity ; entity = parent) -+ -+static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ return 0; -+} -+ -+static void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+} -+ -+static void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+} -+#endif -+ -+/* -+ * Shift for timestamp calculations. This actually limits the maximum -+ * service allowed in one timestamp delta (small shift values increase it), -+ * the maximum total weight that can be used for the queues in the system -+ * (big shift values increase it), and the period of virtual time -+ * wraparounds. -+ */ -+#define WFQ_SERVICE_SHIFT 22 -+ -+/** -+ * bfq_gt - compare two timestamps. -+ * @a: first ts. -+ * @b: second ts. -+ * -+ * Return @a > @b, dealing with wrapping correctly. -+ */ -+static int bfq_gt(u64 a, u64 b) -+{ -+ return (s64)(a - b) > 0; -+} -+ -+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = NULL; -+ -+ BUG_ON(!entity); -+ -+ if (!entity->my_sched_data) -+ bfqq = container_of(entity, struct bfq_queue, entity); -+ -+ return bfqq; -+} -+ -+ -+/** -+ * bfq_delta - map service into the virtual time domain. -+ * @service: amount of service. -+ * @weight: scale factor (weight of an entity or weight sum). -+ */ -+static u64 bfq_delta(unsigned long service, unsigned long weight) -+{ -+ u64 d = (u64)service << WFQ_SERVICE_SHIFT; -+ -+ do_div(d, weight); -+ return d; -+} -+ -+/** -+ * bfq_calc_finish - assign the finish time to an entity. -+ * @entity: the entity to act upon. -+ * @service: the service to be charged to the entity. -+ */ -+static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(entity->weight == 0); -+ -+ entity->finish = entity->start + -+ bfq_delta(service, entity->weight); -+ -+ if (bfqq) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: serv %lu, w %d", -+ service, entity->weight); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: start %llu, finish %llu, delta %llu", -+ entity->start, entity->finish, -+ bfq_delta(service, entity->weight)); -+ } -+} -+ -+/** -+ * bfq_entity_of - get an entity from a node. -+ * @node: the node field of the entity. -+ * -+ * Convert a node pointer to the relative entity. This is used only -+ * to simplify the logic of some functions and not as the generic -+ * conversion mechanism because, e.g., in the tree walking functions, -+ * the check for a %NULL value would be redundant. -+ */ -+static struct bfq_entity *bfq_entity_of(struct rb_node *node) -+{ -+ struct bfq_entity *entity = NULL; -+ -+ if (node) -+ entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ return entity; -+} -+ -+/** -+ * bfq_extract - remove an entity from a tree. -+ * @root: the tree root. -+ * @entity: the entity to remove. -+ */ -+static void bfq_extract(struct rb_root *root, struct bfq_entity *entity) -+{ -+ BUG_ON(entity->tree != root); -+ -+ entity->tree = NULL; -+ rb_erase(&entity->rb_node, root); -+} -+ -+/** -+ * bfq_idle_extract - extract an entity from the idle tree. -+ * @st: the service tree of the owning @entity. -+ * @entity: the entity being removed. -+ */ -+static void bfq_idle_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *next; -+ -+ BUG_ON(entity->tree != &st->idle); -+ -+ if (entity == st->first_idle) { -+ next = rb_next(&entity->rb_node); -+ st->first_idle = bfq_entity_of(next); -+ } -+ -+ if (entity == st->last_idle) { -+ next = rb_prev(&entity->rb_node); -+ st->last_idle = bfq_entity_of(next); -+ } -+ -+ bfq_extract(&st->idle, entity); -+ -+ if (bfqq) -+ list_del(&bfqq->bfqq_list); -+} -+ -+/** -+ * bfq_insert - generic tree insertion. -+ * @root: tree root. -+ * @entity: entity to insert. -+ * -+ * This is used for the idle and the active tree, since they are both -+ * ordered by finish time. -+ */ -+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) -+{ -+ struct bfq_entity *entry; -+ struct rb_node **node = &root->rb_node; -+ struct rb_node *parent = NULL; -+ -+ BUG_ON(entity->tree); -+ -+ while (*node) { -+ parent = *node; -+ entry = rb_entry(parent, struct bfq_entity, rb_node); -+ -+ if (bfq_gt(entry->finish, entity->finish)) -+ node = &parent->rb_left; -+ else -+ node = &parent->rb_right; -+ } -+ -+ rb_link_node(&entity->rb_node, parent, node); -+ rb_insert_color(&entity->rb_node, root); -+ -+ entity->tree = root; -+} -+ -+/** -+ * bfq_update_min - update the min_start field of a entity. -+ * @entity: the entity to update. -+ * @node: one of its children. -+ * -+ * This function is called when @entity may store an invalid value for -+ * min_start due to updates to the active tree. The function assumes -+ * that the subtree rooted at @node (which may be its left or its right -+ * child) has a valid min_start value. -+ */ -+static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node) -+{ -+ struct bfq_entity *child; -+ -+ if (node) { -+ child = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entity->min_start, child->min_start)) -+ entity->min_start = child->min_start; -+ } -+} -+ -+/** -+ * bfq_update_active_node - recalculate min_start. -+ * @node: the node to update. -+ * -+ * @node may have changed position or one of its children may have moved, -+ * this function updates its min_start value. The left and right subtrees -+ * are assumed to hold a correct min_start value. -+ */ -+static void bfq_update_active_node(struct rb_node *node) -+{ -+ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ entity->min_start = entity->start; -+ bfq_update_min(entity, node->rb_right); -+ bfq_update_min(entity, node->rb_left); -+} -+ -+/** -+ * bfq_update_active_tree - update min_start for the whole active tree. -+ * @node: the starting node. -+ * -+ * @node must be the deepest modified node after an update. This function -+ * updates its min_start using the values held by its children, assuming -+ * that they did not change, and then updates all the nodes that may have -+ * changed in the path to the root. The only nodes that may have changed -+ * are the ones in the path or their siblings. -+ */ -+static void bfq_update_active_tree(struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+up: -+ bfq_update_active_node(node); -+ -+ parent = rb_parent(node); -+ if (!parent) -+ return; -+ -+ if (node == parent->rb_left && parent->rb_right) -+ bfq_update_active_node(parent->rb_right); -+ else if (parent->rb_left) -+ bfq_update_active_node(parent->rb_left); -+ -+ node = parent; -+ goto up; -+} -+ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+ -+/** -+ * bfq_active_insert - insert an entity in the active tree of its -+ * group/device. -+ * @st: the service tree of the entity. -+ * @entity: the entity being inserted. -+ * -+ * The active tree is ordered by finish time, but an extra key is kept -+ * per each node, containing the minimum value for the start times of -+ * its children (and the node itself), so it's possible to search for -+ * the eligible node with the lowest finish time in logarithmic time. -+ */ -+static void bfq_active_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node = &entity->rb_node; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ bfq_insert(&st->active, entity); -+ -+ if (node->rb_left) -+ node = node->rb_left; -+ else if (node->rb_right) -+ node = node->rb_right; -+ -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ bfqg->active_entities++; -+ if (bfqg->active_entities == 2) -+ bfqd->active_numerous_groups++; -+ } -+#endif -+} -+ -+/** -+ * bfq_ioprio_to_weight - calc a weight from an ioprio. -+ * @ioprio: the ioprio value to convert. -+ */ -+static unsigned short bfq_ioprio_to_weight(int ioprio) -+{ -+ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); -+ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio; -+} -+ -+/** -+ * bfq_weight_to_ioprio - calc an ioprio from a weight. -+ * @weight: the weight value to convert. -+ * -+ * To preserve as much as possible the old only-ioprio user interface, -+ * 0 is used as an escape ioprio value for weights (numerically) equal or -+ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF. -+ */ -+static unsigned short bfq_weight_to_ioprio(int weight) -+{ -+ BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); -+ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight < 0 ? -+ 0 : IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight; -+} -+ -+static void bfq_get_entity(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ if (bfqq) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ } -+} -+ -+/** -+ * bfq_find_deepest - find the deepest node that an extraction can modify. -+ * @node: the node being removed. -+ * -+ * Do the first step of an extraction in an rb tree, looking for the -+ * node that will replace @node, and returning the deepest node that -+ * the following modifications to the tree can touch. If @node is the -+ * last node in the tree return %NULL. -+ */ -+static struct rb_node *bfq_find_deepest(struct rb_node *node) -+{ -+ struct rb_node *deepest; -+ -+ if (!node->rb_right && !node->rb_left) -+ deepest = rb_parent(node); -+ else if (!node->rb_right) -+ deepest = node->rb_left; -+ else if (!node->rb_left) -+ deepest = node->rb_right; -+ else { -+ deepest = rb_next(node); -+ if (deepest->rb_right) -+ deepest = deepest->rb_right; -+ else if (rb_parent(deepest) != node) -+ deepest = rb_parent(deepest); -+ } -+ -+ return deepest; -+} -+ -+/** -+ * bfq_active_extract - remove an entity from the active tree. -+ * @st: the service_tree containing the tree. -+ * @entity: the entity being removed. -+ */ -+static void bfq_active_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ node = bfq_find_deepest(&entity->rb_node); -+ bfq_extract(&st->active, entity); -+ -+ if (node) -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq) -+ list_del(&bfqq->bfqq_list); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_remove(bfqd, entity, -+ &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ BUG_ON(!bfqg->active_entities); -+ bfqg->active_entities--; -+ if (bfqg->active_entities == 1) { -+ BUG_ON(!bfqd->active_numerous_groups); -+ bfqd->active_numerous_groups--; -+ } -+ } -+#endif -+} -+ -+/** -+ * bfq_idle_insert - insert an entity into the idle tree. -+ * @st: the service tree containing the tree. -+ * @entity: the entity to insert. -+ */ -+static void bfq_idle_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (!first_idle || bfq_gt(first_idle->finish, entity->finish)) -+ st->first_idle = entity; -+ if (!last_idle || bfq_gt(entity->finish, last_idle->finish)) -+ st->last_idle = entity; -+ -+ bfq_insert(&st->idle, entity); -+ -+ if (bfqq) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); -+} -+ -+/** -+ * bfq_forget_entity - remove an entity from the wfq trees. -+ * @st: the service tree. -+ * @entity: the entity being removed. -+ * -+ * Update the device status and forget everything about @entity, putting -+ * the device reference to it, if it is a queue. Entities belonging to -+ * groups are not refcounted. -+ */ -+static void bfq_forget_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_sched_data *sd; -+ -+ BUG_ON(!entity->on_st); -+ -+ entity->on_st = 0; -+ st->wsum -= entity->weight; -+ if (bfqq) { -+ sd = entity->sched_data; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+/** -+ * bfq_put_idle_entity - release the idle tree ref of an entity. -+ * @st: service tree for the entity. -+ * @entity: the entity being released. -+ */ -+static void bfq_put_idle_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ bfq_idle_extract(st, entity); -+ bfq_forget_entity(st, entity); -+} -+ -+/** -+ * bfq_forget_idle - update the idle tree if necessary. -+ * @st: the service tree to act upon. -+ * -+ * To preserve the global O(log N) complexity we only remove one entry here; -+ * as the idle tree will not grow indefinitely this can be done safely. -+ */ -+static void bfq_forget_idle(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (RB_EMPTY_ROOT(&st->active) && last_idle && -+ !bfq_gt(last_idle->finish, st->vtime)) { -+ /* -+ * Forget the whole idle tree, increasing the vtime past -+ * the last finish time of idle entities. -+ */ -+ st->vtime = last_idle->finish; -+ } -+ -+ if (first_idle && !bfq_gt(first_idle->finish, st->vtime)) -+ bfq_put_idle_entity(st, first_idle); -+} -+ -+static struct bfq_service_tree * -+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_service_tree *new_st = old_st; -+ -+ if (entity->prio_changed) { -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ unsigned short prev_weight, new_weight; -+ struct bfq_data *bfqd = NULL; -+ struct rb_root *root; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ struct bfq_sched_data *sd; -+ struct bfq_group *bfqg; -+#endif -+ -+ if (bfqq) -+ bfqd = bfqq->bfqd; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ sd = entity->my_sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+ BUG_ON(!bfqd); -+ } -+#endif -+ -+ BUG_ON(old_st->wsum < entity->weight); -+ old_st->wsum -= entity->weight; -+ -+ if (entity->new_weight != entity->orig_weight) { -+ if (entity->new_weight < BFQ_MIN_WEIGHT || -+ entity->new_weight > BFQ_MAX_WEIGHT) { -+ pr_crit("update_weight_prio: new_weight %d\n", -+ entity->new_weight); -+ BUG(); -+ } -+ entity->orig_weight = entity->new_weight; -+ if (bfqq) -+ bfqq->ioprio = -+ bfq_weight_to_ioprio(entity->orig_weight); -+ } -+ -+ if (bfqq) -+ bfqq->ioprio_class = bfqq->new_ioprio_class; -+ entity->prio_changed = 0; -+ -+ /* -+ * NOTE: here we may be changing the weight too early, -+ * this will cause unfairness. The correct approach -+ * would have required additional complexity to defer -+ * weight changes to the proper time instants (i.e., -+ * when entity->finish <= old_st->vtime). -+ */ -+ new_st = bfq_entity_service_tree(entity); -+ -+ prev_weight = entity->weight; -+ new_weight = entity->orig_weight * -+ (bfqq ? bfqq->wr_coeff : 1); -+ /* -+ * If the weight of the entity changes, remove the entity -+ * from its old weight counter (if there is a counter -+ * associated with the entity), and add it to the counter -+ * associated with its new weight. -+ */ -+ if (prev_weight != new_weight) { -+ root = bfqq ? &bfqd->queue_weights_tree : -+ &bfqd->group_weights_tree; -+ bfq_weights_tree_remove(bfqd, entity, root); -+ } -+ entity->weight = new_weight; -+ /* -+ * Add the entity to its weights tree only if it is -+ * not associated with a weight-raised queue. -+ */ -+ if (prev_weight != new_weight && -+ (bfqq ? bfqq->wr_coeff == 1 : 1)) -+ /* If we get here, root has been initialized. */ -+ bfq_weights_tree_add(bfqd, entity, root); -+ -+ new_st->wsum += entity->weight; -+ -+ if (new_st != old_st) -+ entity->start = new_st->vtime; -+ } -+ -+ return new_st; -+} -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); -+#endif -+ -+/** -+ * bfq_bfqq_served - update the scheduler status after selection for -+ * service. -+ * @bfqq: the queue being served. -+ * @served: bytes to transfer. -+ * -+ * NOTE: this can be optimized, as the timestamps of upper level entities -+ * are synchronized every time a new bfqq is selected for service. By now, -+ * we keep it to better check consistency. -+ */ -+static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st; -+ -+ for_each_entity(entity) { -+ st = bfq_entity_service_tree(entity); -+ -+ entity->service += served; -+ BUG_ON(entity->service > entity->budget); -+ BUG_ON(st->wsum == 0); -+ -+ st->vtime += bfq_delta(served, st->wsum); -+ bfq_forget_idle(st); -+ } -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_set_start_empty_time(bfqq_group(bfqq)); -+#endif -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served); -+} -+ -+/** -+ * bfq_bfqq_charge_full_budget - set the service to the entity budget. -+ * @bfqq: the queue that needs a service update. -+ * -+ * When it's not possible to be fair in the service domain, because -+ * a queue is not consuming its budget fast enough (the meaning of -+ * fast depends on the timeout parameter), we charge it a full -+ * budget. In this way we should obtain a sort of time-domain -+ * fairness among all the seeky/slow queues. -+ */ -+static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); -+ -+ bfq_bfqq_served(bfqq, entity->budget - entity->service); -+} -+ -+/** -+ * __bfq_activate_entity - activate an entity. -+ * @entity: the entity being activated. -+ * -+ * Called whenever an entity is activated, i.e., it is not active and one -+ * of its children receives a new request, or has to be reactivated due to -+ * budget exhaustion. It uses the current budget of the entity (and the -+ * service received if @entity is active) of the queue to calculate its -+ * timestamps. -+ */ -+static void __bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ if (entity == sd->in_service_entity) { -+ BUG_ON(entity->tree); -+ /* -+ * If we are requeueing the current entity we have -+ * to take care of not charging to it service it has -+ * not received. -+ */ -+ bfq_calc_finish(entity, entity->service); -+ entity->start = entity->finish; -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) { -+ /* -+ * Requeueing an entity due to a change of some -+ * next_in_service entity below it. We reuse the -+ * old start time. -+ */ -+ bfq_active_extract(st, entity); -+ } else if (entity->tree == &st->idle) { -+ /* -+ * Must be on the idle tree, bfq_idle_extract() will -+ * check for that. -+ */ -+ bfq_idle_extract(st, entity); -+ entity->start = bfq_gt(st->vtime, entity->finish) ? -+ st->vtime : entity->finish; -+ } else { -+ /* -+ * The finish time of the entity may be invalid, and -+ * it is in the past for sure, otherwise the queue -+ * would have been on the idle tree. -+ */ -+ entity->start = st->vtime; -+ st->wsum += entity->weight; -+ bfq_get_entity(entity); -+ -+ BUG_ON(entity->on_st); -+ entity->on_st = 1; -+ } -+ -+ st = __bfq_entity_update_weight_prio(st, entity); -+ bfq_calc_finish(entity, entity->budget); -+ bfq_active_insert(st, entity); -+} -+ -+/** -+ * bfq_activate_entity - activate an entity and its ancestors if necessary. -+ * @entity: the entity to activate. -+ * -+ * Activate @entity and all the entities on the path from it to the root. -+ */ -+static void bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd; -+ -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ /* -+ * No need to propagate the activation to the -+ * upper entities, as they will be updated when -+ * the in-service entity is rescheduled. -+ */ -+ break; -+ } -+} -+ -+/** -+ * __bfq_deactivate_entity - deactivate an entity from its service tree. -+ * @entity: the entity to deactivate. -+ * @requeue: if false, the entity will not be put into the idle tree. -+ * -+ * Deactivate an entity, independently from its previous state. If the -+ * entity was not on a service tree just return, otherwise if it is on -+ * any scheduler tree, extract it from that tree, and if necessary -+ * and if the caller did not specify @requeue, put it on the idle tree. -+ * -+ * Return %1 if the caller should update the entity hierarchy, i.e., -+ * if the entity was in service or if it was the next_in_service for -+ * its sched_data; return %0 otherwise. -+ */ -+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st; -+ int was_in_service; -+ int ret = 0; -+ -+ if (sd == NULL || !entity->on_st) /* never activated, or inactive */ -+ return 0; -+ -+ st = bfq_entity_service_tree(entity); -+ was_in_service = entity == sd->in_service_entity; -+ -+ BUG_ON(was_in_service && entity->tree); -+ -+ if (was_in_service) { -+ bfq_calc_finish(entity, entity->service); -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) -+ bfq_active_extract(st, entity); -+ else if (entity->tree == &st->idle) -+ bfq_idle_extract(st, entity); -+ else if (entity->tree) -+ BUG(); -+ -+ if (was_in_service || sd->next_in_service == entity) -+ ret = bfq_update_next_in_service(sd); -+ -+ if (!requeue || !bfq_gt(entity->finish, st->vtime)) -+ bfq_forget_entity(st, entity); -+ else -+ bfq_idle_insert(st, entity); -+ -+ BUG_ON(sd->in_service_entity == entity); -+ BUG_ON(sd->next_in_service == entity); -+ -+ return ret; -+} -+ -+/** -+ * bfq_deactivate_entity - deactivate an entity. -+ * @entity: the entity to deactivate. -+ * @requeue: true if the entity can be put on the idle tree -+ */ -+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd; -+ struct bfq_entity *parent; -+ -+ for_each_entity_safe(entity, parent) { -+ sd = entity->sched_data; -+ -+ if (!__bfq_deactivate_entity(entity, requeue)) -+ /* -+ * The parent entity is still backlogged, and -+ * we don't need to update it as it is still -+ * in service. -+ */ -+ break; -+ -+ if (sd->next_in_service) -+ /* -+ * The parent entity is still backlogged and -+ * the budgets on the path towards the root -+ * need to be updated. -+ */ -+ goto update; -+ -+ /* -+ * If we reach there the parent is no more backlogged and -+ * we want to propagate the dequeue upwards. -+ */ -+ requeue = 1; -+ } -+ -+ return; -+ -+update: -+ entity = parent; -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ break; -+ } -+} -+ -+/** -+ * bfq_update_vtime - update vtime if necessary. -+ * @st: the service tree to act upon. -+ * -+ * If necessary update the service tree vtime to have at least one -+ * eligible entity, skipping to its start time. Assumes that the -+ * active tree of the device is not empty. -+ * -+ * NOTE: this hierarchical implementation updates vtimes quite often, -+ * we may end up with reactivated processes getting timestamps after a -+ * vtime skip done because we needed a ->first_active entity on some -+ * intermediate node. -+ */ -+static void bfq_update_vtime(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry; -+ struct rb_node *node = st->active.rb_node; -+ -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entry->min_start, st->vtime)) { -+ st->vtime = entry->min_start; -+ bfq_forget_idle(st); -+ } -+} -+ -+/** -+ * bfq_first_active_entity - find the eligible entity with -+ * the smallest finish time -+ * @st: the service tree to select from. -+ * -+ * This function searches the first schedulable entity, starting from the -+ * root of the tree and going on the left every time on this side there is -+ * a subtree with at least one eligible (start >= vtime) entity. The path on -+ * the right is followed only if a) the left subtree contains no eligible -+ * entities and b) no eligible entity has been found yet. -+ */ -+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry, *first = NULL; -+ struct rb_node *node = st->active.rb_node; -+ -+ while (node) { -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+left: -+ if (!bfq_gt(entry->start, st->vtime)) -+ first = entry; -+ -+ BUG_ON(bfq_gt(entry->min_start, st->vtime)); -+ -+ if (node->rb_left) { -+ entry = rb_entry(node->rb_left, -+ struct bfq_entity, rb_node); -+ if (!bfq_gt(entry->min_start, st->vtime)) { -+ node = node->rb_left; -+ goto left; -+ } -+ } -+ if (first) -+ break; -+ node = node->rb_right; -+ } -+ -+ BUG_ON(!first && !RB_EMPTY_ROOT(&st->active)); -+ return first; -+} -+ -+/** -+ * __bfq_lookup_next_entity - return the first eligible entity in @st. -+ * @st: the service tree. -+ * -+ * Update the virtual time in @st and return the first eligible entity -+ * it contains. -+ */ -+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, -+ bool force) -+{ -+ struct bfq_entity *entity, *new_next_in_service = NULL; -+ -+ if (RB_EMPTY_ROOT(&st->active)) -+ return NULL; -+ -+ bfq_update_vtime(st); -+ entity = bfq_first_active_entity(st); -+ BUG_ON(bfq_gt(entity->start, st->vtime)); -+ -+ /* -+ * If the chosen entity does not match with the sched_data's -+ * next_in_service and we are forcedly serving the IDLE priority -+ * class tree, bubble up budget update. -+ */ -+ if (unlikely(force && entity != entity->sched_data->next_in_service)) { -+ new_next_in_service = entity; -+ for_each_entity(new_next_in_service) -+ bfq_update_budget(new_next_in_service); -+ } -+ -+ return entity; -+} -+ -+/** -+ * bfq_lookup_next_entity - return the first eligible entity in @sd. -+ * @sd: the sched_data. -+ * @extract: if true the returned entity will be also extracted from @sd. -+ * -+ * NOTE: since we cache the next_in_service entity at each level of the -+ * hierarchy, the complexity of the lookup can be decreased with -+ * absolutely no effort just returning the cached next_in_service value; -+ * we prefer to do full lookups to test the consistency of * the data -+ * structures. -+ */ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd) -+{ -+ struct bfq_service_tree *st = sd->service_tree; -+ struct bfq_entity *entity; -+ int i = 0; -+ -+ BUG_ON(sd->in_service_entity); -+ -+ if (bfqd && -+ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { -+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, -+ true); -+ if (entity) { -+ i = BFQ_IOPRIO_CLASSES - 1; -+ bfqd->bfq_class_idle_last_service = jiffies; -+ sd->next_in_service = entity; -+ } -+ } -+ for (; i < BFQ_IOPRIO_CLASSES; i++) { -+ entity = __bfq_lookup_next_entity(st + i, false); -+ if (entity) { -+ if (extract) { -+ bfq_check_next_in_service(sd, entity); -+ bfq_active_extract(st + i, entity); -+ sd->in_service_entity = entity; -+ sd->next_in_service = NULL; -+ } -+ break; -+ } -+ } -+ -+ return entity; -+} -+ -+/* -+ * Get next queue for service. -+ */ -+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_entity *entity = NULL; -+ struct bfq_sched_data *sd; -+ struct bfq_queue *bfqq; -+ -+ BUG_ON(bfqd->in_service_queue); -+ -+ if (bfqd->busy_queues == 0) -+ return NULL; -+ -+ sd = &bfqd->root_group->sched_data; -+ for (; sd ; sd = entity->my_sched_data) { -+ entity = bfq_lookup_next_entity(sd, 1, bfqd); -+ BUG_ON(!entity); -+ entity->service = 0; -+ } -+ -+ bfqq = bfq_entity_to_bfqq(entity); -+ BUG_ON(!bfqq); -+ -+ return bfqq; -+} -+ -+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) -+{ -+ if (bfqd->in_service_bic) { -+ put_io_context(bfqd->in_service_bic->icq.ioc); -+ bfqd->in_service_bic = NULL; -+ } -+ -+ bfqd->in_service_queue = NULL; -+ del_timer(&bfqd->idle_slice_timer); -+} -+ -+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ if (bfqq == bfqd->in_service_queue) -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ bfq_deactivate_entity(entity, requeue); -+} -+ -+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_activate_entity(entity); -+} -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg); -+#endif -+ -+/* -+ * Called when the bfqq no longer has requests pending, remove it from -+ * the service tree. -+ */ -+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ BUG_ON(!bfq_bfqq_busy(bfqq)); -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ bfq_log_bfqq(bfqd, bfqq, "del from busy"); -+ -+ bfq_clear_bfqq_busy(bfqq); -+ -+ BUG_ON(bfqd->busy_queues == 0); -+ bfqd->busy_queues--; -+ -+ if (!bfqq->dispatched) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues--; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ bfqg_stats_update_dequeue(bfqq_group(bfqq)); -+#endif -+ -+ bfq_deactivate_bfqq(bfqd, bfqq, requeue); -+} -+ -+/* -+ * Called when an inactive queue receives a new request. -+ */ -+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "add to busy"); -+ -+ bfq_activate_bfqq(bfqd, bfqq); -+ -+ bfq_mark_bfqq_busy(bfqq); -+ bfqd->busy_queues++; -+ -+ if (!bfqq->dispatched) { -+ if (bfqq->wr_coeff == 1) -+ bfq_weights_tree_add(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ bfqd->busy_in_flight_queues++; -+ if (bfq_bfqq_constantly_seeky(bfqq)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues++; -+} -diff --git a/block/bfq.h b/block/bfq.h -new file mode 100644 -index 0000000..2bf54ae ---- /dev/null -+++ b/block/bfq.h -@@ -0,0 +1,801 @@ -+/* -+ * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -+ * -+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> -+ * Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ */ -+ -+#ifndef _BFQ_H -+#define _BFQ_H -+ -+#include <linux/blktrace_api.h> -+#include <linux/hrtimer.h> -+#include <linux/ioprio.h> -+#include <linux/rbtree.h> -+#include <linux/blk-cgroup.h> -+ -+#define BFQ_IOPRIO_CLASSES 3 -+#define BFQ_CL_IDLE_TIMEOUT (HZ/5) -+ -+#define BFQ_MIN_WEIGHT 1 -+#define BFQ_MAX_WEIGHT 1000 -+#define BFQ_WEIGHT_CONVERSION_COEFF 10 -+ -+#define BFQ_DEFAULT_QUEUE_IOPRIO 4 -+ -+#define BFQ_DEFAULT_GRP_WEIGHT 10 -+#define BFQ_DEFAULT_GRP_IOPRIO 0 -+#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE -+ -+struct bfq_entity; -+ -+/** -+ * struct bfq_service_tree - per ioprio_class service tree. -+ * @active: tree for active entities (i.e., those backlogged). -+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). -+ * @first_idle: idle entity with minimum F_i. -+ * @last_idle: idle entity with maximum F_i. -+ * @vtime: scheduler virtual time. -+ * @wsum: scheduler weight sum; active and idle entities contribute to it. -+ * -+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each -+ * ioprio_class has its own independent scheduler, and so its own -+ * bfq_service_tree. All the fields are protected by the queue lock -+ * of the containing bfqd. -+ */ -+struct bfq_service_tree { -+ struct rb_root active; -+ struct rb_root idle; -+ -+ struct bfq_entity *first_idle; -+ struct bfq_entity *last_idle; -+ -+ u64 vtime; -+ unsigned long wsum; -+}; -+ -+/** -+ * struct bfq_sched_data - multi-class scheduler. -+ * @in_service_entity: entity in service. -+ * @next_in_service: head-of-the-line entity in the scheduler. -+ * @service_tree: array of service trees, one per ioprio_class. -+ * -+ * bfq_sched_data is the basic scheduler queue. It supports three -+ * ioprio_classes, and can be used either as a toplevel queue or as -+ * an intermediate queue on a hierarchical setup. -+ * @next_in_service points to the active entity of the sched_data -+ * service trees that will be scheduled next. -+ * -+ * The supported ioprio_classes are the same as in CFQ, in descending -+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. -+ * Requests from higher priority queues are served before all the -+ * requests from lower priority queues; among requests of the same -+ * queue requests are served according to B-WF2Q+. -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_sched_data { -+ struct bfq_entity *in_service_entity; -+ struct bfq_entity *next_in_service; -+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -+}; -+ -+/** -+ * struct bfq_weight_counter - counter of the number of all active entities -+ * with a given weight. -+ * @weight: weight of the entities that this counter refers to. -+ * @num_active: number of active entities with this weight. -+ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree -+ * and @group_weights_tree). -+ */ -+struct bfq_weight_counter { -+ short int weight; -+ unsigned int num_active; -+ struct rb_node weights_node; -+}; -+ -+/** -+ * struct bfq_entity - schedulable entity. -+ * @rb_node: service_tree member. -+ * @weight_counter: pointer to the weight counter associated with this entity. -+ * @on_st: flag, true if the entity is on a tree (either the active or -+ * the idle one of its service_tree). -+ * @finish: B-WF2Q+ finish timestamp (aka F_i). -+ * @start: B-WF2Q+ start timestamp (aka S_i). -+ * @tree: tree the entity is enqueued into; %NULL if not on a tree. -+ * @min_start: minimum start time of the (active) subtree rooted at -+ * this entity; used for O(log N) lookups into active trees. -+ * @service: service received during the last round of service. -+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. -+ * @weight: weight of the queue -+ * @parent: parent entity, for hierarchical scheduling. -+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the -+ * associated scheduler queue, %NULL on leaf nodes. -+ * @sched_data: the scheduler queue this entity belongs to. -+ * @ioprio: the ioprio in use. -+ * @new_weight: when a weight change is requested, the new weight value. -+ * @orig_weight: original weight, used to implement weight boosting -+ * @prio_changed: flag, true when the user requested a weight, ioprio or -+ * ioprio_class change. -+ * -+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the -+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each -+ * entity belongs to the sched_data of the parent group in the cgroup -+ * hierarchy. Non-leaf entities have also their own sched_data, stored -+ * in @my_sched_data. -+ * -+ * Each entity stores independently its priority values; this would -+ * allow different weights on different devices, but this -+ * functionality is not exported to userspace by now. Priorities and -+ * weights are updated lazily, first storing the new values into the -+ * new_* fields, then setting the @prio_changed flag. As soon as -+ * there is a transition in the entity state that allows the priority -+ * update to take place the effective and the requested priority -+ * values are synchronized. -+ * -+ * Unless cgroups are used, the weight value is calculated from the -+ * ioprio to export the same interface as CFQ. When dealing with -+ * ``well-behaved'' queues (i.e., queues that do not spend too much -+ * time to consume their budget and have true sequential behavior, and -+ * when there are no external factors breaking anticipation) the -+ * relative weights at each level of the cgroups hierarchy should be -+ * guaranteed. All the fields are protected by the queue lock of the -+ * containing bfqd. -+ */ -+struct bfq_entity { -+ struct rb_node rb_node; -+ struct bfq_weight_counter *weight_counter; -+ -+ int on_st; -+ -+ u64 finish; -+ u64 start; -+ -+ struct rb_root *tree; -+ -+ u64 min_start; -+ -+ int service, budget; -+ unsigned short weight, new_weight; -+ unsigned short orig_weight; -+ -+ struct bfq_entity *parent; -+ -+ struct bfq_sched_data *my_sched_data; -+ struct bfq_sched_data *sched_data; -+ -+ int prio_changed; -+}; -+ -+struct bfq_group; -+ -+/** -+ * struct bfq_queue - leaf schedulable entity. -+ * @ref: reference counter. -+ * @bfqd: parent bfq_data. -+ * @new_ioprio: when an ioprio change is requested, the new ioprio value. -+ * @ioprio_class: the ioprio_class in use. -+ * @new_ioprio_class: when an ioprio_class change is requested, the new -+ * ioprio_class value. -+ * @new_bfqq: shared bfq_queue if queue is cooperating with -+ * one or more other queues. -+ * @sort_list: sorted list of pending requests. -+ * @next_rq: if fifo isn't expired, next request to serve. -+ * @queued: nr of requests queued in @sort_list. -+ * @allocated: currently allocated requests. -+ * @meta_pending: pending metadata requests. -+ * @fifo: fifo list of requests in sort_list. -+ * @entity: entity representing this queue in the scheduler. -+ * @max_budget: maximum budget allowed from the feedback mechanism. -+ * @budget_timeout: budget expiration (in jiffies). -+ * @dispatched: number of requests on the dispatch list or inside driver. -+ * @flags: status flags. -+ * @bfqq_list: node for active/idle bfqq list inside our bfqd. -+ * @burst_list_node: node for the device's burst list. -+ * @seek_samples: number of seeks sampled -+ * @seek_total: sum of the distances of the seeks sampled -+ * @seek_mean: mean seek distance -+ * @last_request_pos: position of the last request enqueued -+ * @requests_within_timer: number of consecutive pairs of request completion -+ * and arrival, such that the queue becomes idle -+ * after the completion, but the next request arrives -+ * within an idle time slice; used only if the queue's -+ * IO_bound has been cleared. -+ * @pid: pid of the process owning the queue, used for logging purposes. -+ * @last_wr_start_finish: start time of the current weight-raising period if -+ * the @bfq-queue is being weight-raised, otherwise -+ * finish time of the last weight-raising period -+ * @wr_cur_max_time: current max raising time for this queue -+ * @soft_rt_next_start: minimum time instant such that, only if a new -+ * request is enqueued after this time instant in an -+ * idle @bfq_queue with no outstanding requests, then -+ * the task associated with the queue it is deemed as -+ * soft real-time (see the comments to the function -+ * bfq_bfqq_softrt_next_start()) -+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from -+ * idle to backlogged -+ * @service_from_backlogged: cumulative service received from the @bfq_queue -+ * since the last transition from idle to -+ * backlogged -+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the -+ * queue is shared -+ * -+ * A bfq_queue is a leaf request queue; it can be associated with an -+ * io_context or more, if it is async or shared between cooperating -+ * processes. @cgroup holds a reference to the cgroup, to be sure that it -+ * does not disappear while a bfqq still references it (mostly to avoid -+ * races between request issuing and task migration followed by cgroup -+ * destruction). -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_queue { -+ atomic_t ref; -+ struct bfq_data *bfqd; -+ -+ unsigned short ioprio, new_ioprio; -+ unsigned short ioprio_class, new_ioprio_class; -+ -+ /* fields for cooperating queues handling */ -+ struct bfq_queue *new_bfqq; -+ struct rb_node pos_node; -+ struct rb_root *pos_root; -+ -+ struct rb_root sort_list; -+ struct request *next_rq; -+ int queued[2]; -+ int allocated[2]; -+ int meta_pending; -+ struct list_head fifo; -+ -+ struct bfq_entity entity; -+ -+ int max_budget; -+ unsigned long budget_timeout; -+ -+ int dispatched; -+ -+ unsigned int flags; -+ -+ struct list_head bfqq_list; -+ -+ struct hlist_node burst_list_node; -+ -+ unsigned int seek_samples; -+ u64 seek_total; -+ sector_t seek_mean; -+ sector_t last_request_pos; -+ -+ unsigned int requests_within_timer; -+ -+ pid_t pid; -+ struct bfq_io_cq *bic; -+ -+ /* weight-raising fields */ -+ unsigned long wr_cur_max_time; -+ unsigned long soft_rt_next_start; -+ unsigned long last_wr_start_finish; -+ unsigned int wr_coeff; -+ unsigned long last_idle_bklogged; -+ unsigned long service_from_backlogged; -+}; -+ -+/** -+ * struct bfq_ttime - per process thinktime stats. -+ * @ttime_total: total process thinktime -+ * @ttime_samples: number of thinktime samples -+ * @ttime_mean: average process thinktime -+ */ -+struct bfq_ttime { -+ unsigned long last_end_request; -+ -+ unsigned long ttime_total; -+ unsigned long ttime_samples; -+ unsigned long ttime_mean; -+}; -+ -+/** -+ * struct bfq_io_cq - per (request_queue, io_context) structure. -+ * @icq: associated io_cq structure -+ * @bfqq: array of two process queues, the sync and the async -+ * @ttime: associated @bfq_ttime struct -+ * @ioprio: per (request_queue, blkcg) ioprio. -+ * @blkcg_id: id of the blkcg the related io_cq belongs to. -+ */ -+struct bfq_io_cq { -+ struct io_cq icq; /* must be the first member */ -+ struct bfq_queue *bfqq[2]; -+ struct bfq_ttime ttime; -+ int ioprio; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ uint64_t blkcg_id; /* the current blkcg ID */ -+#endif -+}; -+ -+enum bfq_device_speed { -+ BFQ_BFQD_FAST, -+ BFQ_BFQD_SLOW, -+}; -+ -+/** -+ * struct bfq_data - per device data structure. -+ * @queue: request queue for the managed device. -+ * @root_group: root bfq_group for the device. -+ * @active_numerous_groups: number of bfq_groups containing more than one -+ * active @bfq_entity. -+ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by -+ * weight. Used to keep track of whether all @bfq_queues -+ * have the same weight. The tree contains one counter -+ * for each distinct weight associated to some active -+ * and not weight-raised @bfq_queue (see the comments to -+ * the functions bfq_weights_tree_[add|remove] for -+ * further details). -+ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted -+ * by weight. Used to keep track of whether all -+ * @bfq_groups have the same weight. The tree contains -+ * one counter for each distinct weight associated to -+ * some active @bfq_group (see the comments to the -+ * functions bfq_weights_tree_[add|remove] for further -+ * details). -+ * @busy_queues: number of bfq_queues containing requests (including the -+ * queue in service, even if it is idling). -+ * @busy_in_flight_queues: number of @bfq_queues containing pending or -+ * in-flight requests, plus the @bfq_queue in -+ * service, even if idle but waiting for the -+ * possible arrival of its next sync request. This -+ * field is updated only if the device is rotational, -+ * but used only if the device is also NCQ-capable. -+ * The reason why the field is updated also for non- -+ * NCQ-capable rotational devices is related to the -+ * fact that the value of @hw_tag may be set also -+ * later than when busy_in_flight_queues may need to -+ * be incremented for the first time(s). Taking also -+ * this possibility into account, to avoid unbalanced -+ * increments/decrements, would imply more overhead -+ * than just updating busy_in_flight_queues -+ * regardless of the value of @hw_tag. -+ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues -+ * (that is, seeky queues that expired -+ * for budget timeout at least once) -+ * containing pending or in-flight -+ * requests, including the in-service -+ * @bfq_queue if constantly seeky. This -+ * field is updated only if the device -+ * is rotational, but used only if the -+ * device is also NCQ-capable (see the -+ * comments to @busy_in_flight_queues). -+ * @wr_busy_queues: number of weight-raised busy @bfq_queues. -+ * @queued: number of queued requests. -+ * @rq_in_driver: number of requests dispatched and waiting for completion. -+ * @sync_flight: number of sync requests in the driver. -+ * @max_rq_in_driver: max number of reqs in driver in the last -+ * @hw_tag_samples completed requests. -+ * @hw_tag_samples: nr of samples used to calculate hw_tag. -+ * @hw_tag: flag set to one if the driver is showing a queueing behavior. -+ * @budgets_assigned: number of budgets assigned. -+ * @idle_slice_timer: timer set when idling for the next sequential request -+ * from the queue in service. -+ * @unplug_work: delayed work to restart dispatching on the request queue. -+ * @in_service_queue: bfq_queue in service. -+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue. -+ * @last_position: on-disk position of the last served request. -+ * @last_budget_start: beginning of the last budget. -+ * @last_idling_start: beginning of the last idle slice. -+ * @peak_rate: peak transfer rate observed for a budget. -+ * @peak_rate_samples: number of samples used to calculate @peak_rate. -+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before -+ * rescheduling. -+ * @active_list: list of all the bfq_queues active on the device. -+ * @idle_list: list of all the bfq_queues idle on the device. -+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires -+ * requests are served in fifo order. -+ * @bfq_back_penalty: weight of backward seeks wrt forward ones. -+ * @bfq_back_max: maximum allowed backward seek. -+ * @bfq_slice_idle: maximum idling time. -+ * @bfq_user_max_budget: user-configured max budget value -+ * (0 for auto-tuning). -+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to -+ * async queues. -+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to -+ * to prevent seeky queues to impose long latencies to well -+ * behaved ones (this also implies that seeky queues cannot -+ * receive guarantees in the service domain; after a timeout -+ * they are charged for the whole allocated budget, to try -+ * to preserve a behavior reasonably fair among them, but -+ * without service-domain guarantees). -+ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is -+ * no more granted any weight-raising. -+ * @bfq_failed_cooperations: number of consecutive failed cooperation -+ * chances after which weight-raising is restored -+ * to a queue subject to more than bfq_coop_thresh -+ * queue merges. -+ * @bfq_requests_within_timer: number of consecutive requests that must be -+ * issued within the idle time slice to set -+ * again idling to a queue which was marked as -+ * non-I/O-bound (see the definition of the -+ * IO_bound flag for further details). -+ * @last_ins_in_burst: last time at which a queue entered the current -+ * burst of queues being activated shortly after -+ * each other; for more details about this and the -+ * following parameters related to a burst of -+ * activations, see the comments to the function -+ * @bfq_handle_burst. -+ * @bfq_burst_interval: reference time interval used to decide whether a -+ * queue has been activated shortly after -+ * @last_ins_in_burst. -+ * @burst_size: number of queues in the current burst of queue activations. -+ * @bfq_large_burst_thresh: maximum burst size above which the current -+ * queue-activation burst is deemed as 'large'. -+ * @large_burst: true if a large queue-activation burst is in progress. -+ * @burst_list: head of the burst list (as for the above fields, more details -+ * in the comments to the function bfq_handle_burst). -+ * @low_latency: if set to true, low-latency heuristics are enabled. -+ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised -+ * queue is multiplied. -+ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies). -+ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes. -+ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising -+ * may be reactivated for a queue (in jiffies). -+ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals -+ * after which weight-raising may be -+ * reactivated for an already busy queue -+ * (in jiffies). -+ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, -+ * sectors per seconds. -+ * @RT_prod: cached value of the product R*T used for computing the maximum -+ * duration of the weight raising automatically. -+ * @device_speed: device-speed class for the low-latency heuristic. -+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions. -+ * -+ * All the fields are protected by the @queue lock. -+ */ -+struct bfq_data { -+ struct request_queue *queue; -+ -+ struct bfq_group *root_group; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ int active_numerous_groups; -+#endif -+ -+ struct rb_root queue_weights_tree; -+ struct rb_root group_weights_tree; -+ -+ int busy_queues; -+ int busy_in_flight_queues; -+ int const_seeky_busy_in_flight_queues; -+ int wr_busy_queues; -+ int queued; -+ int rq_in_driver; -+ int sync_flight; -+ -+ int max_rq_in_driver; -+ int hw_tag_samples; -+ int hw_tag; -+ -+ int budgets_assigned; -+ -+ struct timer_list idle_slice_timer; -+ struct work_struct unplug_work; -+ -+ struct bfq_queue *in_service_queue; -+ struct bfq_io_cq *in_service_bic; -+ -+ sector_t last_position; -+ -+ ktime_t last_budget_start; -+ ktime_t last_idling_start; -+ int peak_rate_samples; -+ u64 peak_rate; -+ int bfq_max_budget; -+ -+ struct list_head active_list; -+ struct list_head idle_list; -+ -+ unsigned int bfq_fifo_expire[2]; -+ unsigned int bfq_back_penalty; -+ unsigned int bfq_back_max; -+ unsigned int bfq_slice_idle; -+ u64 bfq_class_idle_last_service; -+ -+ int bfq_user_max_budget; -+ int bfq_max_budget_async_rq; -+ unsigned int bfq_timeout[2]; -+ -+ unsigned int bfq_coop_thresh; -+ unsigned int bfq_failed_cooperations; -+ unsigned int bfq_requests_within_timer; -+ -+ unsigned long last_ins_in_burst; -+ unsigned long bfq_burst_interval; -+ int burst_size; -+ unsigned long bfq_large_burst_thresh; -+ bool large_burst; -+ struct hlist_head burst_list; -+ -+ bool low_latency; -+ -+ /* parameters of the low_latency heuristics */ -+ unsigned int bfq_wr_coeff; -+ unsigned int bfq_wr_max_time; -+ unsigned int bfq_wr_rt_max_time; -+ unsigned int bfq_wr_min_idle_time; -+ unsigned long bfq_wr_min_inter_arr_async; -+ unsigned int bfq_wr_max_softrt_rate; -+ u64 RT_prod; -+ enum bfq_device_speed device_speed; -+ -+ struct bfq_queue oom_bfqq; -+}; -+ -+enum bfqq_state_flags { -+ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ -+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ -+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ -+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ -+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ -+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ -+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ -+ BFQ_BFQQ_FLAG_IO_bound, /* -+ * bfqq has timed-out at least once -+ * having consumed at most 2/10 of -+ * its budget -+ */ -+ BFQ_BFQQ_FLAG_in_large_burst, /* -+ * bfqq activated in a large burst, -+ * see comments to bfq_handle_burst. -+ */ -+ BFQ_BFQQ_FLAG_constantly_seeky, /* -+ * bfqq has proved to be slow and -+ * seeky until budget timeout -+ */ -+ BFQ_BFQQ_FLAG_softrt_update, /* -+ * may need softrt-next-start -+ * update -+ */ -+}; -+ -+#define BFQ_BFQQ_FNS(name) \ -+static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ -+{ \ -+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ -+} -+ -+BFQ_BFQQ_FNS(busy); -+BFQ_BFQQ_FNS(wait_request); -+BFQ_BFQQ_FNS(must_alloc); -+BFQ_BFQQ_FNS(fifo_expire); -+BFQ_BFQQ_FNS(idle_window); -+BFQ_BFQQ_FNS(sync); -+BFQ_BFQQ_FNS(budget_new); -+BFQ_BFQQ_FNS(IO_bound); -+BFQ_BFQQ_FNS(in_large_burst); -+BFQ_BFQQ_FNS(constantly_seeky); -+BFQ_BFQQ_FNS(softrt_update); -+#undef BFQ_BFQQ_FNS -+ -+/* Logging facilities. */ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) -+ -+#define bfq_log(bfqd, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) -+ -+/* Expiration reasons. */ -+enum bfqq_expiration { -+ BFQ_BFQQ_TOO_IDLE = 0, /* -+ * queue has been idling for -+ * too long -+ */ -+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ -+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ -+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -+}; -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ -+struct bfqg_stats { -+ /* total bytes transferred */ -+ struct blkg_rwstat service_bytes; -+ /* total IOs serviced, post merge */ -+ struct blkg_rwstat serviced; -+ /* number of ios merged */ -+ struct blkg_rwstat merged; -+ /* total time spent on device in ns, may not be accurate w/ queueing */ -+ struct blkg_rwstat service_time; -+ /* total time spent waiting in scheduler queue in ns */ -+ struct blkg_rwstat wait_time; -+ /* number of IOs queued up */ -+ struct blkg_rwstat queued; -+ /* total sectors transferred */ -+ struct blkg_stat sectors; -+ /* total disk time and nr sectors dispatched by this group */ -+ struct blkg_stat time; -+ /* time not charged to this cgroup */ -+ struct blkg_stat unaccounted_time; -+ /* sum of number of ios queued across all samples */ -+ struct blkg_stat avg_queue_size_sum; -+ /* count of samples taken for average */ -+ struct blkg_stat avg_queue_size_samples; -+ /* how many times this group has been removed from service tree */ -+ struct blkg_stat dequeue; -+ /* total time spent waiting for it to be assigned a timeslice. */ -+ struct blkg_stat group_wait_time; -+ /* time spent idling for this blkcg_gq */ -+ struct blkg_stat idle_time; -+ /* total time with empty current active q with other requests queued */ -+ struct blkg_stat empty_time; -+ /* fields after this shouldn't be cleared on stat reset */ -+ uint64_t start_group_wait_time; -+ uint64_t start_idle_time; -+ uint64_t start_empty_time; -+ uint16_t flags; -+}; -+ -+/* -+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem. -+ * -+ * @ps: @blkcg_policy_storage that this structure inherits -+ * @weight: weight of the bfq_group -+ */ -+struct bfq_group_data { -+ /* must be the first member */ -+ struct blkcg_policy_data pd; -+ -+ unsigned short weight; -+}; -+ -+/** -+ * struct bfq_group - per (device, cgroup) data structure. -+ * @entity: schedulable entity to insert into the parent group sched_data. -+ * @sched_data: own sched_data, to contain child entities (they may be -+ * both bfq_queues and bfq_groups). -+ * @bfqd: the bfq_data for the device this group acts upon. -+ * @async_bfqq: array of async queues for all the tasks belonging to -+ * the group, one queue per ioprio value per ioprio_class, -+ * except for the idle class that has only one queue. -+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). -+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used -+ * to avoid too many special cases during group creation/ -+ * migration. -+ * @active_entities: number of active entities belonging to the group; -+ * unused for the root group. Used to know whether there -+ * are groups with more than one active @bfq_entity -+ * (see the comments to the function -+ * bfq_bfqq_must_not_expire()). -+ * -+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup -+ * there is a set of bfq_groups, each one collecting the lower-level -+ * entities belonging to the group that are acting on the same device. -+ * -+ * Locking works as follows: -+ * o @bfqd is protected by the queue lock, RCU is used to access it -+ * from the readers. -+ * o All the other fields are protected by the @bfqd queue lock. -+ */ -+struct bfq_group { -+ /* must be the first member */ -+ struct blkg_policy_data pd; -+ -+ struct bfq_entity entity; -+ struct bfq_sched_data sched_data; -+ -+ void *bfqd; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+ -+ struct bfq_entity *my_entity; -+ -+ int active_entities; -+ -+ struct bfqg_stats stats; -+ struct bfqg_stats dead_stats; /* stats pushed from dead children */ -+}; -+ -+#else -+struct bfq_group { -+ struct bfq_sched_data sched_data; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+}; -+#endif -+ -+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); -+ -+static struct bfq_service_tree * -+bfq_entity_service_tree(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sched_data = entity->sched_data; -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ unsigned int idx = bfqq ? bfqq->ioprio_class - 1 : -+ BFQ_DEFAULT_GRP_CLASS; -+ -+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES); -+ BUG_ON(sched_data == NULL); -+ -+ return sched_data->service_tree + idx; -+} -+ -+static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) -+{ -+ return bic->bfqq[is_sync]; -+} -+ -+static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, -+ bool is_sync) -+{ -+ bic->bfqq[is_sync] = bfqq; -+} -+ -+static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) -+{ -+ return bic->icq.q->elevator->elevator_data; -+} -+ -+/** -+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. -+ * @ptr: a pointer to a bfqd. -+ * @flags: storage for the flags to be saved. -+ * -+ * This function allows bfqg->bfqd to be protected by the -+ * queue lock of the bfqd they reference; the pointer is dereferenced -+ * under RCU, so the storage for bfqd is assured to be safe as long -+ * as the RCU read side critical section does not end. After the -+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be -+ * sure that no other writer accessed it. If we raced with a writer, -+ * the function returns NULL, with the queue unlocked, otherwise it -+ * returns the dereferenced pointer, with the queue locked. -+ */ -+static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags) -+{ -+ struct bfq_data *bfqd; -+ -+ rcu_read_lock(); -+ bfqd = rcu_dereference(*(struct bfq_data **)ptr); -+ -+ if (bfqd != NULL) { -+ spin_lock_irqsave(bfqd->queue->queue_lock, *flags); -+ if (ptr == NULL) -+ printk(KERN_CRIT "get_bfqd_locked pointer NULL\n"); -+ else if (*ptr == bfqd) -+ goto out; -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+ } -+ -+ bfqd = NULL; -+out: -+ rcu_read_unlock(); -+ return bfqd; -+} -+ -+static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags) -+{ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+} -+ -+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); -+static void bfq_put_queue(struct bfq_queue *bfqq); -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bio *bio, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask); -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg); -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); -+ -+#endif /* _BFQ_H */ --- -2.10.0 - diff --git a/helpers/DATA/linux-hwe/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch b/helpers/DATA/linux-hwe/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch deleted file mode 100644 index 28eeb1f7..00000000 --- a/helpers/DATA/linux-hwe/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch +++ /dev/null @@ -1,1101 +0,0 @@ -From e4d9bed2dfdec562b23491e44602c89c4a2a5ea4 Mon Sep 17 00:00:00 2001 -From: Mauro Andreolini <mauro.andreolini@unimore.it> -Date: Sun, 6 Sep 2015 16:09:05 +0200 -Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11 for - 4.10.0 - -A set of processes may happen to perform interleaved reads, i.e.,requests -whose union would give rise to a sequential read pattern. There are two -typical cases: in the first case, processes read fixed-size chunks of -data at a fixed distance from each other, while in the second case processes -may read variable-size chunks at variable distances. The latter case occurs -for example with QEMU, which splits the I/O generated by the guest into -multiple chunks, and lets these chunks be served by a pool of cooperating -processes, iteratively assigning the next chunk of I/O to the first -available process. CFQ uses actual queue merging for the first type of -rocesses, whereas it uses preemption to get a sequential read pattern out -of the read requests performed by the second type of processes. In the end -it uses two different mechanisms to achieve the same goal: boosting the -throughput with interleaved I/O. - -This patch introduces Early Queue Merge (EQM), a unified mechanism to get a -sequential read pattern with both types of processes. The main idea is -checking newly arrived requests against the next request of the active queue -both in case of actual request insert and in case of request merge. By doing -so, both the types of processes can be handled by just merging their queues. -EQM is then simpler and more compact than the pair of mechanisms used in -CFQ. - -Finally, EQM also preserves the typical low-latency properties of BFQ, by -properly restoring the weight-raising state of a queue when it gets back to -a non-merged state. - -Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it> -Signed-off-by: Arianna Avanzini <avanzini@google.com> -Signed-off-by: Paolo Valente <paolo.valente@unimore.it> -Signed-off-by: Linus Walleij <linus.walleij@linaro.org> ---- - block/bfq-cgroup.c | 5 + - block/bfq-iosched.c | 685 +++++++++++++++++++++++++++++++++++++++++++++++++++- - block/bfq.h | 66 +++++ - 3 files changed, 743 insertions(+), 13 deletions(-) - -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -index 8b08a57..0367996 100644 ---- a/block/bfq-cgroup.c -+++ b/block/bfq-cgroup.c -@@ -440,6 +440,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) - */ - bfqg->bfqd = bfqd; - bfqg->active_entities = 0; -+ bfqg->rq_pos_tree = RB_ROOT; - } - - static void bfq_pd_free(struct blkg_policy_data *pd) -@@ -533,6 +534,9 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - return bfqg; - } - -+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq); -+ - /** - * bfq_bfqq_move - migrate @bfqq to @bfqg. - * @bfqd: queue descriptor. -@@ -580,6 +584,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfqg_get(bfqg); - - if (busy) { -+ bfq_pos_tree_add_move(bfqd, bfqq); - if (resume) - bfq_activate_bfqq(bfqd, bfqq); - } -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 85e2169..cf3e9b1 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -295,6 +295,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, - } - } - -+static struct bfq_queue * -+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, -+ sector_t sector, struct rb_node **ret_parent, -+ struct rb_node ***rb_link) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *bfqq = NULL; -+ -+ parent = NULL; -+ p = &root->rb_node; -+ while (*p) { -+ struct rb_node **n; -+ -+ parent = *p; -+ bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ -+ /* -+ * Sort strictly based on sector. Smallest to the left, -+ * largest to the right. -+ */ -+ if (sector > blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_right; -+ else if (sector < blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_left; -+ else -+ break; -+ p = n; -+ bfqq = NULL; -+ } -+ -+ *ret_parent = parent; -+ if (rb_link) -+ *rb_link = p; -+ -+ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", -+ (unsigned long long) sector, -+ bfqq ? bfqq->pid : 0); -+ -+ return bfqq; -+} -+ -+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *__bfqq; -+ -+ if (bfqq->pos_root) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ -+ if (bfq_class_idle(bfqq)) -+ return; -+ if (!bfqq->next_rq) -+ return; -+ -+ bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, -+ blk_rq_pos(bfqq->next_rq), &parent, &p); -+ if (!__bfqq) { -+ rb_link_node(&bfqq->pos_node, parent, p); -+ rb_insert_color(&bfqq->pos_node, bfqq->pos_root); -+ } else -+ bfqq->pos_root = NULL; -+} -+ - /* - * Tell whether there are active queues or groups with differentiated weights. - */ -@@ -527,6 +593,57 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) - return dur; - } - -+static unsigned int bfq_bfqq_cooperations(struct bfq_queue *bfqq) -+{ -+ return bfqq->bic ? bfqq->bic->cooperations : 0; -+} -+ -+static void -+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ if (bic->saved_idle_window) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+ if (bic->saved_IO_bound) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ else -+ bfq_clear_bfqq_IO_bound(bfqq); -+ /* Assuming that the flag in_large_burst is already correctly set */ -+ if (bic->wr_time_left && bfqq->bfqd->low_latency && -+ !bfq_bfqq_in_large_burst(bfqq) && -+ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { -+ /* -+ * Start a weight raising period with the duration given by -+ * the raising_time_left snapshot. -+ */ -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues++; -+ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bic->wr_time_left; -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->entity.prio_changed = 1; -+ } -+ /* -+ * Clear wr_time_left to prevent bfq_bfqq_save_state() from -+ * getting confused about the queue's need of a weight-raising -+ * period. -+ */ -+ bic->wr_time_left = 0; -+} -+ -+static int bfqq_process_refs(struct bfq_queue *bfqq) -+{ -+ int process_refs, io_refs; -+ -+ lockdep_assert_held(bfqq->bfqd->queue->queue_lock); -+ -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ BUG_ON(process_refs < 0); -+ return process_refs; -+} -+ - /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ - static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { -@@ -763,8 +880,14 @@ static void bfq_add_request(struct request *rq) - BUG_ON(!next_rq); - bfqq->next_rq = next_rq; - -+ /* -+ * Adjust priority tree position, if next_rq changes. -+ */ -+ if (prev != bfqq->next_rq) -+ bfq_pos_tree_add_move(bfqd, bfqq); -+ - if (!bfq_bfqq_busy(bfqq)) { -- bool soft_rt, in_burst, -+ bool soft_rt, coop_or_in_burst, - idle_for_long_time = time_is_before_jiffies( - bfqq->budget_timeout + - bfqd->bfq_wr_min_idle_time); -@@ -792,11 +915,12 @@ static void bfq_add_request(struct request *rq) - bfqd->last_ins_in_burst = jiffies; - } - -- in_burst = bfq_bfqq_in_large_burst(bfqq); -+ coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) || -+ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh; - soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -- !in_burst && -+ !coop_or_in_burst && - time_is_before_jiffies(bfqq->soft_rt_next_start); -- interactive = !in_burst && idle_for_long_time; -+ interactive = !coop_or_in_burst && idle_for_long_time; - entity->budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - -@@ -815,6 +939,9 @@ static void bfq_add_request(struct request *rq) - if (!bfqd->low_latency) - goto add_bfqq_busy; - -+ if (bfq_bfqq_just_split(bfqq)) -+ goto set_prio_changed; -+ - /* - * If the queue: - * - is not being boosted, -@@ -839,7 +966,7 @@ static void bfq_add_request(struct request *rq) - } else if (old_wr_coeff > 1) { - if (interactive) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else if (in_burst || -+ else if (coop_or_in_burst || - (bfqq->wr_cur_max_time == - bfqd->bfq_wr_rt_max_time && - !soft_rt)) { -@@ -904,6 +1031,7 @@ static void bfq_add_request(struct request *rq) - bfqd->bfq_wr_rt_max_time; - } - } -+set_prio_changed: - if (old_wr_coeff != bfqq->wr_coeff) - entity->prio_changed = 1; - add_bfqq_busy: -@@ -1046,6 +1174,15 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, - bfqd->last_position); - BUG_ON(!next_rq); - bfqq->next_rq = next_rq; -+ /* -+ * If next_rq changes, update both the queue's budget to -+ * fit the new request and the queue's position in its -+ * rq_pos_tree. -+ */ -+ if (prev != bfqq->next_rq) { -+ bfq_updated_next_req(bfqd, bfqq); -+ bfq_pos_tree_add_move(bfqd, bfqq); -+ } - } - } - -@@ -1128,11 +1265,346 @@ static void bfq_end_wr(struct bfq_data *bfqd) - spin_unlock_irq(bfqd->queue->queue_lock); - } - -+static sector_t bfq_io_struct_pos(void *io_struct, bool request) -+{ -+ if (request) -+ return blk_rq_pos(io_struct); -+ else -+ return ((struct bio *)io_struct)->bi_iter.bi_sector; -+} -+ -+static int bfq_rq_close_to_sector(void *io_struct, bool request, -+ sector_t sector) -+{ -+ return abs(bfq_io_struct_pos(io_struct, request) - sector) <= -+ BFQQ_SEEK_THR; -+} -+ -+static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ sector_t sector) -+{ -+ struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; -+ struct rb_node *parent, *node; -+ struct bfq_queue *__bfqq; -+ -+ if (RB_EMPTY_ROOT(root)) -+ return NULL; -+ -+ /* -+ * First, if we find a request starting at the end of the last -+ * request, choose it. -+ */ -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); -+ if (__bfqq) -+ return __bfqq; -+ -+ /* -+ * If the exact sector wasn't found, the parent of the NULL leaf -+ * will contain the closest sector (rq_pos_tree sorted by -+ * next_request position). -+ */ -+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) -+ return __bfqq; -+ -+ if (blk_rq_pos(__bfqq->next_rq) < sector) -+ node = rb_next(&__bfqq->pos_node); -+ else -+ node = rb_prev(&__bfqq->pos_node); -+ if (!node) -+ return NULL; -+ -+ __bfqq = rb_entry(node, struct bfq_queue, pos_node); -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) -+ return __bfqq; -+ -+ return NULL; -+} -+ -+static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd, -+ struct bfq_queue *cur_bfqq, -+ sector_t sector) -+{ -+ struct bfq_queue *bfqq; -+ -+ /* -+ * We shall notice if some of the queues are cooperating, -+ * e.g., working closely on the same area of the device. In -+ * that case, we can group them together and: 1) don't waste -+ * time idling, and 2) serve the union of their requests in -+ * the best possible order for throughput. -+ */ -+ bfqq = bfqq_find_close(bfqd, cur_bfqq, sector); -+ if (!bfqq || bfqq == cur_bfqq) -+ return NULL; -+ -+ return bfqq; -+} -+ -+static struct bfq_queue * -+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ int process_refs, new_process_refs; -+ struct bfq_queue *__bfqq; -+ -+ /* -+ * If there are no process references on the new_bfqq, then it is -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -+ * may have dropped their last reference (not just their last process -+ * reference). -+ */ -+ if (!bfqq_process_refs(new_bfqq)) -+ return NULL; -+ -+ /* Avoid a circular list and skip interim queue merges. */ -+ while ((__bfqq = new_bfqq->new_bfqq)) { -+ if (__bfqq == bfqq) -+ return NULL; -+ new_bfqq = __bfqq; -+ } -+ -+ process_refs = bfqq_process_refs(bfqq); -+ new_process_refs = bfqq_process_refs(new_bfqq); -+ /* -+ * If the process for the bfqq has gone away, there is no -+ * sense in merging the queues. -+ */ -+ if (process_refs == 0 || new_process_refs == 0) -+ return NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -+ new_bfqq->pid); -+ -+ /* -+ * Merging is just a redirection: the requests of the process -+ * owning one of the two queues are redirected to the other queue. -+ * The latter queue, in its turn, is set as shared if this is the -+ * first time that the requests of some process are redirected to -+ * it. -+ * -+ * We redirect bfqq to new_bfqq and not the opposite, because we -+ * are in the context of the process owning bfqq, hence we have -+ * the io_cq of this process. So we can immediately configure this -+ * io_cq to redirect the requests of the process to new_bfqq. -+ * -+ * NOTE, even if new_bfqq coincides with the in-service queue, the -+ * io_cq of new_bfqq is not available, because, if the in-service -+ * queue is shared, bfqd->in_service_bic may not point to the -+ * io_cq of the in-service queue. -+ * Redirecting the requests of the process owning bfqq to the -+ * currently in-service queue is in any case the best option, as -+ * we feed the in-service queue with new requests close to the -+ * last request served and, by doing so, hopefully increase the -+ * throughput. -+ */ -+ bfqq->new_bfqq = new_bfqq; -+ atomic_add(process_refs, &new_bfqq->ref); -+ return new_bfqq; -+} -+ -+static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, -+ struct bfq_queue *new_bfqq) -+{ -+ if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || -+ (bfqq->ioprio_class != new_bfqq->ioprio_class)) -+ return false; -+ -+ /* -+ * If either of the queues has already been detected as seeky, -+ * then merging it with the other queue is unlikely to lead to -+ * sequential I/O. -+ */ -+ if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq)) -+ return false; -+ -+ /* -+ * Interleaved I/O is known to be done by (some) applications -+ * only for reads, so it does not make sense to merge async -+ * queues. -+ */ -+ if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq)) -+ return false; -+ -+ return true; -+} -+ -+/* -+ * Attempt to schedule a merge of bfqq with the currently in-service queue -+ * or with a close queue among the scheduled queues. -+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue -+ * structure otherwise. -+ * -+ * The OOM queue is not allowed to participate to cooperation: in fact, since -+ * the requests temporarily redirected to the OOM queue could be redirected -+ * again to dedicated queues at any time, the state needed to correctly -+ * handle merging with the OOM queue would be quite complex and expensive -+ * to maintain. Besides, in such a critical condition as an out of memory, -+ * the benefits of queue merging may be little relevant, or even negligible. -+ */ -+static struct bfq_queue * -+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ void *io_struct, bool request) -+{ -+ struct bfq_queue *in_service_bfqq, *new_bfqq; -+ -+ if (bfqq->new_bfqq) -+ return bfqq->new_bfqq; -+ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) -+ return NULL; -+ /* If device has only one backlogged bfq_queue, don't search. */ -+ if (bfqd->busy_queues == 1) -+ return NULL; -+ -+ in_service_bfqq = bfqd->in_service_queue; -+ -+ if (!in_service_bfqq || in_service_bfqq == bfqq || -+ !bfqd->in_service_bic || -+ unlikely(in_service_bfqq == &bfqd->oom_bfqq)) -+ goto check_scheduled; -+ -+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && -+ bfqq->entity.parent == in_service_bfqq->entity.parent && -+ bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { -+ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); -+ if (new_bfqq) -+ return new_bfqq; -+ } -+ /* -+ * Check whether there is a cooperator among currently scheduled -+ * queues. The only thing we need is that the bio/request is not -+ * NULL, as we need it to establish whether a cooperator exists. -+ */ -+check_scheduled: -+ new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, -+ bfq_io_struct_pos(io_struct, request)); -+ -+ BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); -+ -+ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && -+ bfq_may_be_close_cooperator(bfqq, new_bfqq)) -+ return bfq_setup_merge(bfqq, new_bfqq); -+ -+ return NULL; -+} -+ -+static void bfq_bfqq_save_state(struct bfq_queue *bfqq) -+{ -+ /* -+ * If !bfqq->bic, the queue is already shared or its requests -+ * have already been redirected to a shared queue; both idle window -+ * and weight raising state have already been saved. Do nothing. -+ */ -+ if (!bfqq->bic) -+ return; -+ if (bfqq->bic->wr_time_left) -+ /* -+ * This is the queue of a just-started process, and would -+ * deserve weight raising: we set wr_time_left to the full -+ * weight-raising duration to trigger weight-raising when -+ * and if the queue is split and the first request of the -+ * queue is enqueued. -+ */ -+ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); -+ else if (bfqq->wr_coeff > 1) { -+ unsigned long wr_duration = -+ jiffies - bfqq->last_wr_start_finish; -+ /* -+ * It may happen that a queue's weight raising period lasts -+ * longer than its wr_cur_max_time, as weight raising is -+ * handled only when a request is enqueued or dispatched (it -+ * does not use any timer). If the weight raising period is -+ * about to end, don't save it. -+ */ -+ if (bfqq->wr_cur_max_time <= wr_duration) -+ bfqq->bic->wr_time_left = 0; -+ else -+ bfqq->bic->wr_time_left = -+ bfqq->wr_cur_max_time - wr_duration; -+ /* -+ * The bfq_queue is becoming shared or the requests of the -+ * process owning the queue are being redirected to a shared -+ * queue. Stop the weight raising period of the queue, as in -+ * both cases it should not be owned by an interactive or -+ * soft real-time application. -+ */ -+ bfq_bfqq_end_wr(bfqq); -+ } else -+ bfqq->bic->wr_time_left = 0; -+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -+ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); -+ bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); -+ bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); -+ bfqq->bic->cooperations++; -+ bfqq->bic->failed_cooperations = 0; -+} -+ -+static void bfq_get_bic_reference(struct bfq_queue *bfqq) -+{ -+ /* -+ * If bfqq->bic has a non-NULL value, the bic to which it belongs -+ * is about to begin using a shared bfq_queue. -+ */ -+ if (bfqq->bic) -+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); -+} -+ -+static void -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (unsigned long) new_bfqq->pid); -+ /* Save weight raising and idle window of the merged queues */ -+ bfq_bfqq_save_state(bfqq); -+ bfq_bfqq_save_state(new_bfqq); -+ if (bfq_bfqq_IO_bound(bfqq)) -+ bfq_mark_bfqq_IO_bound(new_bfqq); -+ bfq_clear_bfqq_IO_bound(bfqq); -+ /* -+ * Grab a reference to the bic, to prevent it from being destroyed -+ * before being possibly touched by a bfq_split_bfqq(). -+ */ -+ bfq_get_bic_reference(bfqq); -+ bfq_get_bic_reference(new_bfqq); -+ /* -+ * Merge queues (that is, let bic redirect its requests to new_bfqq) -+ */ -+ bic_set_bfqq(bic, new_bfqq, 1); -+ bfq_mark_bfqq_coop(new_bfqq); -+ /* -+ * new_bfqq now belongs to at least two bics (it is a shared queue): -+ * set new_bfqq->bic to NULL. bfqq either: -+ * - does not belong to any bic any more, and hence bfqq->bic must -+ * be set to NULL, or -+ * - is a queue whose owning bics have already been redirected to a -+ * different queue, hence the queue is destined to not belong to -+ * any bic soon and bfqq->bic is already NULL (therefore the next -+ * assignment causes no harm). -+ */ -+ new_bfqq->bic = NULL; -+ bfqq->bic = NULL; -+ bfq_put_queue(bfqq); -+} -+ -+static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) -+{ -+ struct bfq_io_cq *bic = bfqq->bic; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { -+ bic->failed_cooperations++; -+ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) -+ bic->cooperations = 0; -+ } -+} -+ - static int bfq_allow_merge(struct request_queue *q, struct request *rq, - struct bio *bio) - { - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq, *new_bfqq; - - /* - * Disallow merge of a sync bio into an async request. -@@ -1149,7 +1621,26 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, - if (!bic) - return 0; - -- return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq); -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ /* -+ * We take advantage of this function to perform an early merge -+ * of the queues of possible cooperating processes. -+ */ -+ if (bfqq) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); -+ if (new_bfqq) { -+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); -+ /* -+ * If we get here, the bio will be queued in the -+ * shared queue, i.e., new_bfqq, so use new_bfqq -+ * to decide whether bio and rq can be merged. -+ */ -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ -+ return bfqq == RQ_BFQQ(rq); - } - - static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -@@ -1350,6 +1841,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) - - __bfq_bfqd_reset_in_service(bfqd); - -+ /* -+ * If this bfqq is shared between multiple processes, check -+ * to make sure that those processes are still issuing I/Os -+ * within the mean seek distance. If not, it may be time to -+ * break the queues apart again. -+ */ -+ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) -+ bfq_mark_bfqq_split_coop(bfqq); -+ - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { - /* - * Overloading budget_timeout field to store the time -@@ -1358,8 +1858,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) - */ - bfqq->budget_timeout = jiffies; - bfq_del_bfqq_busy(bfqd, bfqq, 1); -- } else -+ } else { - bfq_activate_bfqq(bfqd, bfqq); -+ /* -+ * Resort priority tree of potential close cooperators. -+ */ -+ bfq_pos_tree_add_move(bfqd, bfqq); -+ } - } - - /** -@@ -2246,10 +2751,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - /* - * If the queue was activated in a burst, or - * too much time has elapsed from the beginning -- * of this weight-raising period, then end weight -- * raising. -+ * of this weight-raising period, or the queue has -+ * exceeded the acceptable number of cooperations, -+ * then end weight raising. - */ - if (bfq_bfqq_in_large_burst(bfqq) || -+ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || - time_is_before_jiffies(bfqq->last_wr_start_finish + - bfqq->wr_cur_max_time)) { - bfqq->last_wr_start_finish = jiffies; -@@ -2478,6 +2985,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq) - #endif - } - -+static void bfq_put_cooperator(struct bfq_queue *bfqq) -+{ -+ struct bfq_queue *__bfqq, *next; -+ -+ /* -+ * If this queue was scheduled to merge with another queue, be -+ * sure to drop the reference taken on that queue (and others in -+ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. -+ */ -+ __bfqq = bfqq->new_bfqq; -+ while (__bfqq) { -+ if (__bfqq == bfqq) -+ break; -+ next = __bfqq->new_bfqq; -+ bfq_put_queue(__bfqq); -+ __bfqq = next; -+ } -+} -+ - static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { - if (bfqq == bfqd->in_service_queue) { -@@ -2488,6 +3014,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, - atomic_read(&bfqq->ref)); - -+ bfq_put_cooperator(bfqq); -+ - bfq_put_queue(bfqq); - } - -@@ -2496,6 +3024,25 @@ static void bfq_init_icq(struct io_cq *icq) - struct bfq_io_cq *bic = icq_to_bic(icq); - - bic->ttime.last_end_request = jiffies; -+ /* -+ * A newly created bic indicates that the process has just -+ * started doing I/O, and is probably mapping into memory its -+ * executable and libraries: it definitely needs weight raising. -+ * There is however the possibility that the process performs, -+ * for a while, I/O close to some other process. EQM intercepts -+ * this behavior and may merge the queue corresponding to the -+ * process with some other queue, BEFORE the weight of the queue -+ * is raised. Merged queues are not weight-raised (they are assumed -+ * to belong to processes that benefit only from high throughput). -+ * If the merge is basically the consequence of an accident, then -+ * the queue will be split soon and will get back its old weight. -+ * It is then important to write down somewhere that this queue -+ * does need weight raising, even if it did not make it to get its -+ * weight raised before being merged. To this purpose, we overload -+ * the field raising_time_left and assign 1 to it, to mark the queue -+ * as needing weight raising. -+ */ -+ bic->wr_time_left = 1; - } - - static void bfq_exit_icq(struct io_cq *icq) -@@ -2509,6 +3056,13 @@ static void bfq_exit_icq(struct io_cq *icq) - } - - if (bic->bfqq[BLK_RW_SYNC]) { -+ /* -+ * If the bic is using a shared queue, put the reference -+ * taken on the io_context when the bic started using a -+ * shared bfq_queue. -+ */ -+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) -+ put_io_context(icq->ioc); - bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); - bic->bfqq[BLK_RW_SYNC] = NULL; - } -@@ -2814,6 +3368,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) - return; - -+ /* Idle window just restored, statistics are meaningless. */ -+ if (bfq_bfqq_just_split(bfqq)) -+ return; -+ - enable_idle = bfq_bfqq_idle_window(bfqq); - - if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -@@ -2861,6 +3419,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, bic); -+ bfq_clear_bfqq_just_split(bfqq); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -@@ -2925,12 +3484,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - static void bfq_insert_request(struct request_queue *q, struct request *rq) - { - struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; - - assert_spin_locked(bfqd->queue->queue_lock); - -+ /* -+ * An unplug may trigger a requeue of a request from the device -+ * driver: make sure we are in process context while trying to -+ * merge two bfq_queues. -+ */ -+ if (!in_interrupt()) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); -+ if (new_bfqq) { -+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) -+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); -+ /* -+ * Release the request's reference to the old bfqq -+ * and make sure one is taken to the shared queue. -+ */ -+ new_bfqq->allocated[rq_data_dir(rq)]++; -+ bfqq->allocated[rq_data_dir(rq)]--; -+ atomic_inc(&new_bfqq->ref); -+ bfq_put_queue(bfqq); -+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) -+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), -+ bfqq, new_bfqq); -+ rq->elv.priv[1] = new_bfqq; -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ - bfq_add_request(rq); - -+ /* -+ * Here a newly-created bfq_queue has already started a weight-raising -+ * period: clear raising_time_left to prevent bfq_bfqq_save_state() -+ * from assigning it a full weight-raising period. See the detailed -+ * comments about this field in bfq_init_icq(). -+ */ -+ if (bfqq->bic) -+ bfqq->bic->wr_time_left = 0; - rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; - list_add_tail(&rq->queuelist, &bfqq->fifo); - -@@ -3099,6 +3693,32 @@ static void bfq_put_request(struct request *rq) - } - - /* -+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this -+ * was the last process referring to said bfqq. -+ */ -+static struct bfq_queue * -+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); -+ -+ put_io_context(bic->icq.ioc); -+ -+ if (bfqq_process_refs(bfqq) == 1) { -+ bfqq->pid = current->pid; -+ bfq_clear_bfqq_coop(bfqq); -+ bfq_clear_bfqq_split_coop(bfqq); -+ return bfqq; -+ } -+ -+ bic_set_bfqq(bic, NULL, 1); -+ -+ bfq_put_cooperator(bfqq); -+ -+ bfq_put_queue(bfqq); -+ return NULL; -+} -+ -+/* - * Allocate bfq data structures associated with this request. - */ - static int bfq_set_request(struct request_queue *q, struct request *rq, -@@ -3110,6 +3730,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - const int is_sync = rq_is_sync(rq); - struct bfq_queue *bfqq; - unsigned long flags; -+ bool split = false; - - might_sleep_if(gfpflags_allow_blocking(gfp_mask)); - -@@ -3122,15 +3743,30 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - - bfq_bic_update_cgroup(bic, bio); - -+new_queue: - bfqq = bic_to_bfqq(bic, is_sync); - if (!bfqq || bfqq == &bfqd->oom_bfqq) { - bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask); - bic_set_bfqq(bic, bfqq, is_sync); -- if (is_sync) { -- if (bfqd->large_burst) -+ if (split && is_sync) { -+ if ((bic->was_in_burst_list && bfqd->large_burst) || -+ bic->saved_in_large_burst) - bfq_mark_bfqq_in_large_burst(bfqq); -- else -+ else { - bfq_clear_bfqq_in_large_burst(bfqq); -+ if (bic->was_in_burst_list) -+ hlist_add_head(&bfqq->burst_list_node, -+ &bfqd->burst_list); -+ } -+ } -+ } else { -+ /* If the queue was seeky for too long, break it apart. */ -+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { -+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); -+ bfqq = bfq_split_bfqq(bic, bfqq); -+ split = true; -+ if (!bfqq) -+ goto new_queue; - } - } - -@@ -3142,6 +3778,26 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - rq->elv.priv[0] = bic; - rq->elv.priv[1] = bfqq; - -+ /* -+ * If a bfq_queue has only one process reference, it is owned -+ * by only one bfq_io_cq: we can set the bic field of the -+ * bfq_queue to the address of that structure. Also, if the -+ * queue has just been split, mark a flag so that the -+ * information is available to the other scheduler hooks. -+ */ -+ if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { -+ bfqq->bic = bic; -+ if (split) { -+ bfq_mark_bfqq_just_split(bfqq); -+ /* -+ * If the queue has just been split from a shared -+ * queue, restore the idle window and the possible -+ * weight raising period. -+ */ -+ bfq_bfqq_resume_state(bfqq, bic); -+ } -+ } -+ - spin_unlock_irqrestore(q->queue_lock, flags); - - return 0; -@@ -3295,6 +3951,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, - root_group->my_entity = NULL; - root_group->bfqd = bfqd; - #endif -+ root_group->rq_pos_tree = RB_ROOT; - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; - } -@@ -3375,6 +4032,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; - bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; - -+ bfqd->bfq_coop_thresh = 2; -+ bfqd->bfq_failed_cooperations = 7000; - bfqd->bfq_requests_within_timer = 120; - - bfqd->bfq_large_burst_thresh = 11; -diff --git a/block/bfq.h b/block/bfq.h -index 2bf54ae..fcce855 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -183,6 +183,8 @@ struct bfq_group; - * ioprio_class value. - * @new_bfqq: shared bfq_queue if queue is cooperating with - * one or more other queues. -+ * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree). -+ * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree). - * @sort_list: sorted list of pending requests. - * @next_rq: if fifo isn't expired, next request to serve. - * @queued: nr of requests queued in @sort_list. -@@ -304,6 +306,26 @@ struct bfq_ttime { - * @ttime: associated @bfq_ttime struct - * @ioprio: per (request_queue, blkcg) ioprio. - * @blkcg_id: id of the blkcg the related io_cq belongs to. -+ * @wr_time_left: snapshot of the time left before weight raising ends -+ * for the sync queue associated to this process; this -+ * snapshot is taken to remember this value while the weight -+ * raising is suspended because the queue is merged with a -+ * shared queue, and is used to set @raising_cur_max_time -+ * when the queue is split from the shared queue and its -+ * weight is raised again -+ * @saved_idle_window: same purpose as the previous field for the idle -+ * window -+ * @saved_IO_bound: same purpose as the previous two fields for the I/O -+ * bound classification of a queue -+ * @saved_in_large_burst: same purpose as the previous fields for the -+ * value of the field keeping the queue's belonging -+ * to a large burst -+ * @was_in_burst_list: true if the queue belonged to a burst list -+ * before its merge with another cooperating queue -+ * @cooperations: counter of consecutive successful queue merges underwent -+ * by any of the process' @bfq_queues -+ * @failed_cooperations: counter of consecutive failed queue merges of any -+ * of the process' @bfq_queues - */ - struct bfq_io_cq { - struct io_cq icq; /* must be the first member */ -@@ -314,6 +336,16 @@ struct bfq_io_cq { - #ifdef CONFIG_BFQ_GROUP_IOSCHED - uint64_t blkcg_id; /* the current blkcg ID */ - #endif -+ -+ unsigned int wr_time_left; -+ bool saved_idle_window; -+ bool saved_IO_bound; -+ -+ bool saved_in_large_burst; -+ bool was_in_burst_list; -+ -+ unsigned int cooperations; -+ unsigned int failed_cooperations; - }; - - enum bfq_device_speed { -@@ -557,6 +589,9 @@ enum bfqq_state_flags { - * may need softrt-next-start - * update - */ -+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ -+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ - }; - - #define BFQ_BFQQ_FNS(name) \ -@@ -583,6 +618,9 @@ BFQ_BFQQ_FNS(budget_new); - BFQ_BFQQ_FNS(IO_bound); - BFQ_BFQQ_FNS(in_large_burst); - BFQ_BFQQ_FNS(constantly_seeky); -+BFQ_BFQQ_FNS(coop); -+BFQ_BFQQ_FNS(split_coop); -+BFQ_BFQQ_FNS(just_split); - BFQ_BFQQ_FNS(softrt_update); - #undef BFQ_BFQQ_FNS - -@@ -675,6 +713,9 @@ struct bfq_group_data { - * are groups with more than one active @bfq_entity - * (see the comments to the function - * bfq_bfqq_must_not_expire()). -+ * @rq_pos_tree: rbtree sorted by next_request position, used when -+ * determining if two or more queues have interleaving -+ * requests (see bfq_find_close_cooperator()). - * - * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup - * there is a set of bfq_groups, each one collecting the lower-level -@@ -701,6 +742,8 @@ struct bfq_group { - - int active_entities; - -+ struct rb_root rq_pos_tree; -+ - struct bfqg_stats stats; - struct bfqg_stats dead_stats; /* stats pushed from dead children */ - }; -@@ -711,6 +754,8 @@ struct bfq_group { - - struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; - struct bfq_queue *async_idle_bfqq; -+ -+ struct rb_root rq_pos_tree; - }; - #endif - -@@ -787,6 +832,27 @@ static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags) - spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); - } - -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ -+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *group_entity = bfqq->entity.parent; -+ -+ if (!group_entity) -+ group_entity = &bfqq->bfqd->root_group->entity; -+ -+ return container_of(group_entity, struct bfq_group, entity); -+} -+ -+#else -+ -+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -+{ -+ return bfqq->bfqd->root_group; -+} -+ -+#endif -+ - static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); - static void bfq_put_queue(struct bfq_queue *bfqq); - static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); --- -2.10.0 - diff --git a/helpers/DATA/linux-hwe/0004-Turn-BFQ-v7r11-for-4.10.0-into-BFQ-v8r11-for-4.10.0.patch b/helpers/DATA/linux-hwe/0004-Turn-BFQ-v7r11-for-4.10.0-into-BFQ-v8r11-for-4.10.0.patch deleted file mode 100644 index 86ff3b63..00000000 --- a/helpers/DATA/linux-hwe/0004-Turn-BFQ-v7r11-for-4.10.0-into-BFQ-v8r11-for-4.10.0.patch +++ /dev/null @@ -1,9308 +0,0 @@ -From a97fff52b333556bc4f2c990b4548667b4ac8af1 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Mon, 16 May 2016 11:16:17 +0200 -Subject: [PATCH 4/4] Turn BFQ-v7r11 for 4.10.0 into BFQ-v8r11 for 4.10.0 - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - Documentation/block/00-INDEX | 2 + - Documentation/block/bfq-iosched.txt | 530 ++++++ - block/Kconfig.iosched | 18 +- - block/bfq-cgroup.c | 511 +++--- - block/bfq-iosched.c | 3448 ++++++++++++++++++++++------------- - block/bfq-sched.c | 1344 +++++++++++--- - block/bfq.h | 800 ++++---- - 7 files changed, 4467 insertions(+), 2186 deletions(-) - create mode 100644 Documentation/block/bfq-iosched.txt - -diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX -index e55103a..8d55b4b 100644 ---- a/Documentation/block/00-INDEX -+++ b/Documentation/block/00-INDEX -@@ -1,5 +1,7 @@ - 00-INDEX - - This file -+bfq-iosched.txt -+ - BFQ IO scheduler and its tunables - biodoc.txt - - Notes on the Generic Block Layer Rewrite in Linux 2.5 - biovecs.txt -diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt -new file mode 100644 -index 0000000..13b5248 ---- /dev/null -+++ b/Documentation/block/bfq-iosched.txt -@@ -0,0 +1,530 @@ -+BFQ (Budget Fair Queueing) -+========================== -+ -+BFQ is a proportional-share I/O scheduler, with some extra -+low-latency capabilities. In addition to cgroups support (blkio or io -+controllers), BFQ's main features are: -+- BFQ guarantees a high system and application responsiveness, and a -+ low latency for time-sensitive applications, such as audio or video -+ players; -+- BFQ distributes bandwidth, and not just time, among processes or -+ groups (switching back to time distribution when needed to keep -+ throughput high). -+ -+On average CPUs, the current version of BFQ can handle devices -+performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a -+reference, 30-50 KIOPS correspond to very high bandwidths with -+sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and -+to 120-200 MB/s with 4KB random I/O. -+ -+The table of contents follow. Impatients can just jump to Section 3. -+ -+CONTENTS -+ -+1. When may BFQ be useful? -+ 1-1 Personal systems -+ 1-2 Server systems -+2. How does BFQ work? -+3. What are BFQ's tunable? -+4. BFQ group scheduling -+ 4-1 Service guarantees provided -+ 4-2 Interface -+ -+1. When may BFQ be useful? -+========================== -+ -+BFQ provides the following benefits on personal and server systems. -+ -+1-1 Personal systems -+-------------------- -+ -+Low latency for interactive applications -+ -+Regardless of the actual background workload, BFQ guarantees that, for -+interactive tasks, the storage device is virtually as responsive as if -+it was idle. For example, even if one or more of the following -+background workloads are being executed: -+- one or more large files are being read, written or copied, -+- a tree of source files is being compiled, -+- one or more virtual machines are performing I/O, -+- a software update is in progress, -+- indexing daemons are scanning filesystems and updating their -+ databases, -+starting an application or loading a file from within an application -+takes about the same time as if the storage device was idle. As a -+comparison, with CFQ, NOOP or DEADLINE, and in the same conditions, -+applications experience high latencies, or even become unresponsive -+until the background workload terminates (also on SSDs). -+ -+Low latency for soft real-time applications -+ -+Also soft real-time applications, such as audio and video -+players/streamers, enjoy a low latency and a low drop rate, regardless -+of the background I/O workload. As a consequence, these applications -+do not suffer from almost any glitch due to the background workload. -+ -+Higher speed for code-development tasks -+ -+If some additional workload happens to be executed in parallel, then -+BFQ executes the I/O-related components of typical code-development -+tasks (compilation, checkout, merge, ...) much more quickly than CFQ, -+NOOP or DEADLINE. -+ -+High throughput -+ -+On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and -+up to 150% higher throughput than DEADLINE and NOOP, with all the -+sequential workloads considered in our tests. With random workloads, -+and with all the workloads on flash-based devices, BFQ achieves, -+instead, about the same throughput as the other schedulers. -+ -+Strong fairness, bandwidth and delay guarantees -+ -+BFQ distributes the device throughput, and not just the device time, -+among I/O-bound applications in proportion their weights, with any -+workload and regardless of the device parameters. From these bandwidth -+guarantees, it is possible to compute tight per-I/O-request delay -+guarantees by a simple formula. If not configured for strict service -+guarantees, BFQ switches to time-based resource sharing (only) for -+applications that would otherwise cause a throughput loss. -+ -+1-2 Server systems -+------------------ -+ -+Most benefits for server systems follow from the same service -+properties as above. In particular, regardless of whether additional, -+possibly heavy workloads are being served, BFQ guarantees: -+ -+. audio and video-streaming with zero or very low jitter and drop -+ rate; -+ -+. fast retrieval of WEB pages and embedded objects; -+ -+. real-time recording of data in live-dumping applications (e.g., -+ packet logging); -+ -+. responsiveness in local and remote access to a server. -+ -+ -+2. How does BFQ work? -+===================== -+ -+BFQ is a proportional-share I/O scheduler, whose general structure, -+plus a lot of code, are borrowed from CFQ. -+ -+- Each process doing I/O on a device is associated with a weight and a -+ (bfq_)queue. -+ -+- BFQ grants exclusive access to the device, for a while, to one queue -+ (process) at a time, and implements this service model by -+ associating every queue with a budget, measured in number of -+ sectors. -+ -+ - After a queue is granted access to the device, the budget of the -+ queue is decremented, on each request dispatch, by the size of the -+ request. -+ -+ - The in-service queue is expired, i.e., its service is suspended, -+ only if one of the following events occurs: 1) the queue finishes -+ its budget, 2) the queue empties, 3) a "budget timeout" fires. -+ -+ - The budget timeout prevents processes doing random I/O from -+ holding the device for too long and dramatically reducing -+ throughput. -+ -+ - Actually, as in CFQ, a queue associated with a process issuing -+ sync requests may not be expired immediately when it empties. In -+ contrast, BFQ may idle the device for a short time interval, -+ giving the process the chance to go on being served if it issues -+ a new request in time. Device idling typically boosts the -+ throughput on rotational devices, if processes do synchronous -+ and sequential I/O. In addition, under BFQ, device idling is -+ also instrumental in guaranteeing the desired throughput -+ fraction to processes issuing sync requests (see the description -+ of the slice_idle tunable in this document, or [1, 2], for more -+ details). -+ -+ - With respect to idling for service guarantees, if several -+ processes are competing for the device at the same time, but -+ all processes (and groups, after the following commit) have -+ the same weight, then BFQ guarantees the expected throughput -+ distribution without ever idling the device. Throughput is -+ thus as high as possible in this common scenario. -+ -+ - If low-latency mode is enabled (default configuration), BFQ -+ executes some special heuristics to detect interactive and soft -+ real-time applications (e.g., video or audio players/streamers), -+ and to reduce their latency. The most important action taken to -+ achieve this goal is to give to the queues associated with these -+ applications more than their fair share of the device -+ throughput. For brevity, we call just "weight-raising" the whole -+ sets of actions taken by BFQ to privilege these queues. In -+ particular, BFQ provides a milder form of weight-raising for -+ interactive applications, and a stronger form for soft real-time -+ applications. -+ -+ - BFQ automatically deactivates idling for queues born in a burst of -+ queue creations. In fact, these queues are usually associated with -+ the processes of applications and services that benefit mostly -+ from a high throughput. Examples are systemd during boot, or git -+ grep. -+ -+ - As CFQ, BFQ merges queues performing interleaved I/O, i.e., -+ performing random I/O that becomes mostly sequential if -+ merged. Differently from CFQ, BFQ achieves this goal with a more -+ reactive mechanism, called Early Queue Merge (EQM). EQM is so -+ responsive in detecting interleaved I/O (cooperating processes), -+ that it enables BFQ to achieve a high throughput, by queue -+ merging, even for queues for which CFQ needs a different -+ mechanism, preemption, to get a high throughput. As such EQM is a -+ unified mechanism to achieve a high throughput with interleaved -+ I/O. -+ -+ - Queues are scheduled according to a variant of WF2Q+, named -+ B-WF2Q+, and implemented using an augmented rb-tree to preserve an -+ O(log N) overall complexity. See [2] for more details. B-WF2Q+ is -+ also ready for hierarchical scheduling. However, for a cleaner -+ logical breakdown, the code that enables and completes -+ hierarchical support is provided in the next commit, which focuses -+ exactly on this feature. -+ -+ - B-WF2Q+ guarantees a tight deviation with respect to an ideal, -+ perfectly fair, and smooth service. In particular, B-WF2Q+ -+ guarantees that each queue receives a fraction of the device -+ throughput proportional to its weight, even if the throughput -+ fluctuates, and regardless of: the device parameters, the current -+ workload and the budgets assigned to the queue. -+ -+ - The last, budget-independence, property (although probably -+ counterintuitive in the first place) is definitely beneficial, for -+ the following reasons: -+ -+ - First, with any proportional-share scheduler, the maximum -+ deviation with respect to an ideal service is proportional to -+ the maximum budget (slice) assigned to queues. As a consequence, -+ BFQ can keep this deviation tight not only because of the -+ accurate service of B-WF2Q+, but also because BFQ *does not* -+ need to assign a larger budget to a queue to let the queue -+ receive a higher fraction of the device throughput. -+ -+ - Second, BFQ is free to choose, for every process (queue), the -+ budget that best fits the needs of the process, or best -+ leverages the I/O pattern of the process. In particular, BFQ -+ updates queue budgets with a simple feedback-loop algorithm that -+ allows a high throughput to be achieved, while still providing -+ tight latency guarantees to time-sensitive applications. When -+ the in-service queue expires, this algorithm computes the next -+ budget of the queue so as to: -+ -+ - Let large budgets be eventually assigned to the queues -+ associated with I/O-bound applications performing sequential -+ I/O: in fact, the longer these applications are served once -+ got access to the device, the higher the throughput is. -+ -+ - Let small budgets be eventually assigned to the queues -+ associated with time-sensitive applications (which typically -+ perform sporadic and short I/O), because, the smaller the -+ budget assigned to a queue waiting for service is, the sooner -+ B-WF2Q+ will serve that queue (Subsec 3.3 in [2]). -+ -+- If several processes are competing for the device at the same time, -+ but all processes and groups have the same weight, then BFQ -+ guarantees the expected throughput distribution without ever idling -+ the device. It uses preemption instead. Throughput is then much -+ higher in this common scenario. -+ -+- ioprio classes are served in strict priority order, i.e., -+ lower-priority queues are not served as long as there are -+ higher-priority queues. Among queues in the same class, the -+ bandwidth is distributed in proportion to the weight of each -+ queue. A very thin extra bandwidth is however guaranteed to -+ the Idle class, to prevent it from starving. -+ -+ -+3. What are BFQ's tunable? -+========================== -+ -+The tunables back_seek-max, back_seek_penalty, fifo_expire_async and -+fifo_expire_sync below are the same as in CFQ. Their description is -+just copied from that for CFQ. Some considerations in the description -+of slice_idle are copied from CFQ too. -+ -+per-process ioprio and weight -+----------------------------- -+ -+Unless the cgroups interface is used (see "4. BFQ group scheduling"), -+weights can be assigned to processes only indirectly, through I/O -+priorities, and according to the relation: -+weight = (IOPRIO_BE_NR - ioprio) * 10. -+ -+Beware that, if low-latency is set, then BFQ automatically raises the -+weight of the queues associated with interactive and soft real-time -+applications. Unset this tunable if you need/want to control weights. -+ -+slice_idle -+---------- -+ -+This parameter specifies how long BFQ should idle for next I/O -+request, when certain sync BFQ queues become empty. By default -+slice_idle is a non-zero value. Idling has a double purpose: boosting -+throughput and making sure that the desired throughput distribution is -+respected (see the description of how BFQ works, and, if needed, the -+papers referred there). -+ -+As for throughput, idling can be very helpful on highly seeky media -+like single spindle SATA/SAS disks where we can cut down on overall -+number of seeks and see improved throughput. -+ -+Setting slice_idle to 0 will remove all the idling on queues and one -+should see an overall improved throughput on faster storage devices -+like multiple SATA/SAS disks in hardware RAID configuration. -+ -+So depending on storage and workload, it might be useful to set -+slice_idle=0. In general for SATA/SAS disks and software RAID of -+SATA/SAS disks keeping slice_idle enabled should be useful. For any -+configurations where there are multiple spindles behind single LUN -+(Host based hardware RAID controller or for storage arrays), setting -+slice_idle=0 might end up in better throughput and acceptable -+latencies. -+ -+Idling is however necessary to have service guarantees enforced in -+case of differentiated weights or differentiated I/O-request lengths. -+To see why, suppose that a given BFQ queue A must get several I/O -+requests served for each request served for another queue B. Idling -+ensures that, if A makes a new I/O request slightly after becoming -+empty, then no request of B is dispatched in the middle, and thus A -+does not lose the possibility to get more than one request dispatched -+before the next request of B is dispatched. Note that idling -+guarantees the desired differentiated treatment of queues only in -+terms of I/O-request dispatches. To guarantee that the actual service -+order then corresponds to the dispatch order, the strict_guarantees -+tunable must be set too. -+ -+There is an important flipside for idling: apart from the above cases -+where it is beneficial also for throughput, idling can severely impact -+throughput. One important case is random workload. Because of this -+issue, BFQ tends to avoid idling as much as possible, when it is not -+beneficial also for throughput. As a consequence of this behavior, and -+of further issues described for the strict_guarantees tunable, -+short-term service guarantees may be occasionally violated. And, in -+some cases, these guarantees may be more important than guaranteeing -+maximum throughput. For example, in video playing/streaming, a very -+low drop rate may be more important than maximum throughput. In these -+cases, consider setting the strict_guarantees parameter. -+ -+strict_guarantees -+----------------- -+ -+If this parameter is set (default: unset), then BFQ -+ -+- always performs idling when the in-service queue becomes empty; -+ -+- forces the device to serve one I/O request at a time, by dispatching a -+ new request only if there is no outstanding request. -+ -+In the presence of differentiated weights or I/O-request sizes, both -+the above conditions are needed to guarantee that every BFQ queue -+receives its allotted share of the bandwidth. The first condition is -+needed for the reasons explained in the description of the slice_idle -+tunable. The second condition is needed because all modern storage -+devices reorder internally-queued requests, which may trivially break -+the service guarantees enforced by the I/O scheduler. -+ -+Setting strict_guarantees may evidently affect throughput. -+ -+back_seek_max -+------------- -+ -+This specifies, given in Kbytes, the maximum "distance" for backward seeking. -+The distance is the amount of space from the current head location to the -+sectors that are backward in terms of distance. -+ -+This parameter allows the scheduler to anticipate requests in the "backward" -+direction and consider them as being the "next" if they are within this -+distance from the current head location. -+ -+back_seek_penalty -+----------------- -+ -+This parameter is used to compute the cost of backward seeking. If the -+backward distance of request is just 1/back_seek_penalty from a "front" -+request, then the seeking cost of two requests is considered equivalent. -+ -+So scheduler will not bias toward one or the other request (otherwise scheduler -+will bias toward front request). Default value of back_seek_penalty is 2. -+ -+fifo_expire_async -+----------------- -+ -+This parameter is used to set the timeout of asynchronous requests. Default -+value of this is 248ms. -+ -+fifo_expire_sync -+---------------- -+ -+This parameter is used to set the timeout of synchronous requests. Default -+value of this is 124ms. In case to favor synchronous requests over asynchronous -+one, this value should be decreased relative to fifo_expire_async. -+ -+low_latency -+----------- -+ -+This parameter is used to enable/disable BFQ's low latency mode. By -+default, low latency mode is enabled. If enabled, interactive and soft -+real-time applications are privileged and experience a lower latency, -+as explained in more detail in the description of how BFQ works. -+ -+DO NOT enable this mode if you need full control on bandwidth -+distribution. In fact, if it is enabled, then BFQ automatically -+increases the bandwidth share of privileged applications, as the main -+means to guarantee a lower latency to them. -+ -+timeout_sync -+------------ -+ -+Maximum amount of device time that can be given to a task (queue) once -+it has been selected for service. On devices with costly seeks, -+increasing this time usually increases maximum throughput. On the -+opposite end, increasing this time coarsens the granularity of the -+short-term bandwidth and latency guarantees, especially if the -+following parameter is set to zero. -+ -+max_budget -+---------- -+ -+Maximum amount of service, measured in sectors, that can be provided -+to a BFQ queue once it is set in service (of course within the limits -+of the above timeout). According to what said in the description of -+the algorithm, larger values increase the throughput in proportion to -+the percentage of sequential I/O requests issued. The price of larger -+values is that they coarsen the granularity of short-term bandwidth -+and latency guarantees. -+ -+The default value is 0, which enables auto-tuning: BFQ sets max_budget -+to the maximum number of sectors that can be served during -+timeout_sync, according to the estimated peak rate. -+ -+weights -+------- -+ -+Read-only parameter, used to show the weights of the currently active -+BFQ queues. -+ -+ -+wr_ tunables -+------------ -+ -+BFQ exports a few parameters to control/tune the behavior of -+low-latency heuristics. -+ -+wr_coeff -+ -+Factor by which the weight of a weight-raised queue is multiplied. If -+the queue is deemed soft real-time, then the weight is further -+multiplied by an additional, constant factor. -+ -+wr_max_time -+ -+Maximum duration of a weight-raising period for an interactive task -+(ms). If set to zero (default value), then this value is computed -+automatically, as a function of the peak rate of the device. In any -+case, when the value of this parameter is read, it always reports the -+current duration, regardless of whether it has been set manually or -+computed automatically. -+ -+wr_max_softrt_rate -+ -+Maximum service rate below which a queue is deemed to be associated -+with a soft real-time application, and is then weight-raised -+accordingly (sectors/sec). -+ -+wr_min_idle_time -+ -+Minimum idle period after which interactive weight-raising may be -+reactivated for a queue (in ms). -+ -+wr_rt_max_time -+ -+Maximum weight-raising duration for soft real-time queues (in ms). The -+start time from which this duration is considered is automatically -+moved forward if the queue is detected to be still soft real-time -+before the current soft real-time weight-raising period finishes. -+ -+wr_min_inter_arr_async -+ -+Minimum period between I/O request arrivals after which weight-raising -+may be reactivated for an already busy async queue (in ms). -+ -+ -+4. Group scheduling with BFQ -+============================ -+ -+BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely -+blkio and io. In particular, BFQ supports weight-based proportional -+share. To activate cgroups support, set BFQ_GROUP_IOSCHED. -+ -+4-1 Service guarantees provided -+------------------------------- -+ -+With BFQ, proportional share means true proportional share of the -+device bandwidth, according to group weights. For example, a group -+with weight 200 gets twice the bandwidth, and not just twice the time, -+of a group with weight 100. -+ -+BFQ supports hierarchies (group trees) of any depth. Bandwidth is -+distributed among groups and processes in the expected way: for each -+group, the children of the group share the whole bandwidth of the -+group in proportion to their weights. In particular, this implies -+that, for each leaf group, every process of the group receives the -+same share of the whole group bandwidth, unless the ioprio of the -+process is modified. -+ -+The resource-sharing guarantee for a group may partially or totally -+switch from bandwidth to time, if providing bandwidth guarantees to -+the group lowers the throughput too much. This switch occurs on a -+per-process basis: if a process of a leaf group causes throughput loss -+if served in such a way to receive its share of the bandwidth, then -+BFQ switches back to just time-based proportional share for that -+process. -+ -+4-2 Interface -+------------- -+ -+To get proportional sharing of bandwidth with BFQ for a given device, -+BFQ must of course be the active scheduler for that device. -+ -+Within each group directory, the names of the files associated with -+BFQ-specific cgroup parameters and stats begin with the "bfq." -+prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for -+BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group -+parameter to set the weight of a group with BFQ is blkio.bfq.weight -+or io.bfq.weight. -+ -+Parameters to set -+----------------- -+ -+For each group, there is only the following parameter to set. -+ -+weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the -+group inside its parent. Available values: 1..10000 (default 100). The -+linear mapping between ioprio and weights, described at the beginning -+of the tunable section, is still valid, but all weights higher than -+IOPRIO_BE_NR*10 are mapped to ioprio 0. -+ -+Recall that, if low-latency is set, then BFQ automatically raises the -+weight of the queues associated with interactive and soft real-time -+applications. Unset this tunable if you need/want to control weights. -+ -+ -+[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O -+ Scheduler", Proceedings of the First Workshop on Mobile System -+ Technologies (MST-2015), May 2015. -+ http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf -+ -+[2] P. Valente and M. Andreolini, "Improving Application -+ Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of -+ the 5th Annual International Systems and Storage Conference -+ (SYSTOR '12), June 2012. -+ Slightly extended version: -+ http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- -+ results.pdf -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index f78cd1a..f2cd945 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -43,20 +43,20 @@ config IOSCHED_BFQ - tristate "BFQ I/O scheduler" - default n - ---help--- -- The BFQ I/O scheduler tries to distribute bandwidth among -- all processes according to their weights. -- It aims at distributing the bandwidth as desired, independently of -- the disk parameters and with any workload. It also tries to -- guarantee low latency to interactive and soft real-time -- applications. If compiled built-in (saying Y here), BFQ can -- be configured to support hierarchical scheduling. -+ The BFQ I/O scheduler distributes bandwidth among all -+ processes according to their weights, regardless of the -+ device parameters and with any workload. It also guarantees -+ a low latency to interactive and soft real-time applications. -+ Details in Documentation/block/bfq-iosched.txt - - config BFQ_GROUP_IOSCHED - bool "BFQ hierarchical scheduling support" -- depends on CGROUPS && IOSCHED_BFQ=y -+ depends on IOSCHED_BFQ && BLK_CGROUP - default n - ---help--- -- Enable hierarchical scheduling in BFQ, using the blkio controller. -+ -+ Enable hierarchical scheduling in BFQ, using the blkio -+ (cgroups-v1) or io (cgroups-v2) controller. - - choice - prompt "Default I/O scheduler" -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -index 0367996..39daaf4 100644 ---- a/block/bfq-cgroup.c -+++ b/block/bfq-cgroup.c -@@ -7,7 +7,9 @@ - * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> - * Paolo Valente <paolo.valente@unimore.it> - * -- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org> - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ - * file. -@@ -163,8 +165,6 @@ static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) - { - struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq); - -- BUG_ON(!pd); -- - return pd_to_bfqg(pd); - } - -@@ -208,59 +208,47 @@ static void bfqg_put(struct bfq_group *bfqg) - - static void bfqg_stats_update_io_add(struct bfq_group *bfqg, - struct bfq_queue *bfqq, -- int rw) -+ unsigned int op) - { -- blkg_rwstat_add(&bfqg->stats.queued, rw, 1); -+ blkg_rwstat_add(&bfqg->stats.queued, op, 1); - bfqg_stats_end_empty_time(&bfqg->stats); - if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) - bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); - } - --static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) --{ -- blkg_rwstat_add(&bfqg->stats.queued, rw, -1); --} -- --static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) -+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) - { -- blkg_rwstat_add(&bfqg->stats.merged, rw, 1); -+ blkg_rwstat_add(&bfqg->stats.queued, op, -1); - } - --static void bfqg_stats_update_dispatch(struct bfq_group *bfqg, -- uint64_t bytes, int rw) -+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) - { -- blkg_stat_add(&bfqg->stats.sectors, bytes >> 9); -- blkg_rwstat_add(&bfqg->stats.serviced, rw, 1); -- blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes); -+ blkg_rwstat_add(&bfqg->stats.merged, op, 1); - } - - static void bfqg_stats_update_completion(struct bfq_group *bfqg, -- uint64_t start_time, uint64_t io_start_time, int rw) -+ uint64_t start_time, uint64_t io_start_time, -+ unsigned int op) - { - struct bfqg_stats *stats = &bfqg->stats; - unsigned long long now = sched_clock(); - - if (time_after64(now, io_start_time)) -- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); -+ blkg_rwstat_add(&stats->service_time, op, -+ now - io_start_time); - if (time_after64(io_start_time, start_time)) -- blkg_rwstat_add(&stats->wait_time, rw, -+ blkg_rwstat_add(&stats->wait_time, op, - io_start_time - start_time); - } - - /* @stats = 0 */ - static void bfqg_stats_reset(struct bfqg_stats *stats) - { -- if (!stats) -- return; -- - /* queued stats shouldn't be cleared */ -- blkg_rwstat_reset(&stats->service_bytes); -- blkg_rwstat_reset(&stats->serviced); - blkg_rwstat_reset(&stats->merged); - blkg_rwstat_reset(&stats->service_time); - blkg_rwstat_reset(&stats->wait_time); - blkg_stat_reset(&stats->time); -- blkg_stat_reset(&stats->unaccounted_time); - blkg_stat_reset(&stats->avg_queue_size_sum); - blkg_stat_reset(&stats->avg_queue_size_samples); - blkg_stat_reset(&stats->dequeue); -@@ -270,19 +258,16 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) - } - - /* @to += @from */ --static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from) -+static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) - { - if (!to || !from) - return; - - /* queued stats shouldn't be cleared */ -- blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes); -- blkg_rwstat_add_aux(&to->serviced, &from->serviced); - blkg_rwstat_add_aux(&to->merged, &from->merged); - blkg_rwstat_add_aux(&to->service_time, &from->service_time); - blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); - blkg_stat_add_aux(&from->time, &from->time); -- blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time); - blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); - blkg_stat_add_aux(&to->avg_queue_size_samples, - &from->avg_queue_size_samples); -@@ -311,10 +296,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) - if (unlikely(!parent)) - return; - -- bfqg_stats_merge(&parent->dead_stats, &bfqg->stats); -- bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats); -+ bfqg_stats_add_aux(&parent->stats, &bfqg->stats); - bfqg_stats_reset(&bfqg->stats); -- bfqg_stats_reset(&bfqg->dead_stats); - } - - static void bfq_init_entity(struct bfq_entity *entity, -@@ -329,21 +312,17 @@ static void bfq_init_entity(struct bfq_entity *entity, - bfqq->ioprio_class = bfqq->new_ioprio_class; - bfqg_get(bfqg); - } -- entity->parent = bfqg->my_entity; -+ entity->parent = bfqg->my_entity; /* NULL for root group */ - entity->sched_data = &bfqg->sched_data; - } - - static void bfqg_stats_exit(struct bfqg_stats *stats) - { -- blkg_rwstat_exit(&stats->service_bytes); -- blkg_rwstat_exit(&stats->serviced); - blkg_rwstat_exit(&stats->merged); - blkg_rwstat_exit(&stats->service_time); - blkg_rwstat_exit(&stats->wait_time); - blkg_rwstat_exit(&stats->queued); -- blkg_stat_exit(&stats->sectors); - blkg_stat_exit(&stats->time); -- blkg_stat_exit(&stats->unaccounted_time); - blkg_stat_exit(&stats->avg_queue_size_sum); - blkg_stat_exit(&stats->avg_queue_size_samples); - blkg_stat_exit(&stats->dequeue); -@@ -354,15 +333,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) - - static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) - { -- if (blkg_rwstat_init(&stats->service_bytes, gfp) || -- blkg_rwstat_init(&stats->serviced, gfp) || -- blkg_rwstat_init(&stats->merged, gfp) || -+ if (blkg_rwstat_init(&stats->merged, gfp) || - blkg_rwstat_init(&stats->service_time, gfp) || - blkg_rwstat_init(&stats->wait_time, gfp) || - blkg_rwstat_init(&stats->queued, gfp) || -- blkg_stat_init(&stats->sectors, gfp) || - blkg_stat_init(&stats->time, gfp) || -- blkg_stat_init(&stats->unaccounted_time, gfp) || - blkg_stat_init(&stats->avg_queue_size_sum, gfp) || - blkg_stat_init(&stats->avg_queue_size_samples, gfp) || - blkg_stat_init(&stats->dequeue, gfp) || -@@ -386,11 +361,27 @@ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) - return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); - } - -+static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) -+{ -+ struct bfq_group_data *bgd; -+ -+ bgd = kzalloc(sizeof(*bgd), gfp); -+ if (!bgd) -+ return NULL; -+ return &bgd->pd; -+} -+ - static void bfq_cpd_init(struct blkcg_policy_data *cpd) - { - struct bfq_group_data *d = cpd_to_bfqgd(cpd); - -- d->weight = BFQ_DEFAULT_GRP_WEIGHT; -+ d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ? -+ CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; -+} -+ -+static void bfq_cpd_free(struct blkcg_policy_data *cpd) -+{ -+ kfree(cpd_to_bfqgd(cpd)); - } - - static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) -@@ -401,8 +392,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) - if (!bfqg) - return NULL; - -- if (bfqg_stats_init(&bfqg->stats, gfp) || -- bfqg_stats_init(&bfqg->dead_stats, gfp)) { -+ if (bfqg_stats_init(&bfqg->stats, gfp)) { - kfree(bfqg); - return NULL; - } -@@ -410,27 +400,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) - return &bfqg->pd; - } - --static void bfq_group_set_parent(struct bfq_group *bfqg, -- struct bfq_group *parent) -+static void bfq_pd_init(struct blkg_policy_data *pd) - { -+ struct blkcg_gq *blkg; -+ struct bfq_group *bfqg; -+ struct bfq_data *bfqd; - struct bfq_entity *entity; -+ struct bfq_group_data *d; - -- BUG_ON(!parent); -- BUG_ON(!bfqg); -- BUG_ON(bfqg == parent); -- -+ blkg = pd_to_blkg(pd); -+ BUG_ON(!blkg); -+ bfqg = blkg_to_bfqg(blkg); -+ bfqd = blkg->q->elevator->elevator_data; - entity = &bfqg->entity; -- entity->parent = parent->my_entity; -- entity->sched_data = &parent->sched_data; --} -- --static void bfq_pd_init(struct blkg_policy_data *pd) --{ -- struct blkcg_gq *blkg = pd_to_blkg(pd); -- struct bfq_group *bfqg = blkg_to_bfqg(blkg); -- struct bfq_data *bfqd = blkg->q->elevator->elevator_data; -- struct bfq_entity *entity = &bfqg->entity; -- struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); -+ d = blkcg_to_bfqgd(blkg->blkcg); - - entity->orig_weight = entity->weight = entity->new_weight = d->weight; - entity->my_sched_data = &bfqg->sched_data; -@@ -448,70 +431,53 @@ static void bfq_pd_free(struct blkg_policy_data *pd) - struct bfq_group *bfqg = pd_to_bfqg(pd); - - bfqg_stats_exit(&bfqg->stats); -- bfqg_stats_exit(&bfqg->dead_stats); -- - return kfree(bfqg); - } - --/* offset delta from bfqg->stats to bfqg->dead_stats */ --static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) - -- offsetof(struct bfq_group, stats); -- --/* to be used by recursive prfill, sums live and dead stats recursively */ --static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) -+static void bfq_pd_reset_stats(struct blkg_policy_data *pd) - { -- u64 sum = 0; -+ struct bfq_group *bfqg = pd_to_bfqg(pd); - -- sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); -- sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, -- off + dead_stats_off_delta); -- return sum; -+ bfqg_stats_reset(&bfqg->stats); - } - --/* to be used by recursive prfill, sums live and dead rwstats recursively */ --static struct blkg_rwstat --bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, int off) -+static void bfq_group_set_parent(struct bfq_group *bfqg, -+ struct bfq_group *parent) - { -- struct blkg_rwstat a, b; -+ struct bfq_entity *entity; - -- a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); -- b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, -- off + dead_stats_off_delta); -- blkg_rwstat_add_aux(&a, &b); -- return a; -+ BUG_ON(!parent); -+ BUG_ON(!bfqg); -+ BUG_ON(bfqg == parent); -+ -+ entity = &bfqg->entity; -+ entity->parent = parent->my_entity; -+ entity->sched_data = &parent->sched_data; - } - --static void bfq_pd_reset_stats(struct blkg_policy_data *pd) -+static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, -+ struct blkcg *blkcg) - { -- struct bfq_group *bfqg = pd_to_bfqg(pd); -+ struct blkcg_gq *blkg; - -- bfqg_stats_reset(&bfqg->stats); -- bfqg_stats_reset(&bfqg->dead_stats); -+ blkg = blkg_lookup(blkcg, bfqd->queue); -+ if (likely(blkg)) -+ return blkg_to_bfqg(blkg); -+ return NULL; - } - --static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -- struct blkcg *blkcg) -+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) - { -- struct request_queue *q = bfqd->queue; -- struct bfq_group *bfqg = NULL, *parent; -- struct bfq_entity *entity = NULL; -+ struct bfq_group *bfqg, *parent; -+ struct bfq_entity *entity; - - assert_spin_locked(bfqd->queue->queue_lock); - -- /* avoid lookup for the common case where there's no blkcg */ -- if (blkcg == &blkcg_root) { -- bfqg = bfqd->root_group; -- } else { -- struct blkcg_gq *blkg; -- -- blkg = blkg_lookup_create(blkcg, q); -- if (!IS_ERR(blkg)) -- bfqg = blkg_to_bfqg(blkg); -- else /* fallback to root_group */ -- bfqg = bfqd->root_group; -- } -+ bfqg = bfq_lookup_bfqg(bfqd, blkcg); - -- BUG_ON(!bfqg); -+ if (unlikely(!bfqg)) -+ return NULL; - - /* - * Update chain of bfq_groups as we might be handling a leaf group -@@ -537,11 +503,15 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - static void bfq_pos_tree_add_move(struct bfq_data *bfqd, - struct bfq_queue *bfqq); - -+static void bfq_bfqq_expire(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ bool compensate, -+ enum bfqq_expiration reason); -+ - /** - * bfq_bfqq_move - migrate @bfqq to @bfqg. - * @bfqd: queue descriptor. - * @bfqq: the queue to move. -- * @entity: @bfqq's entity. - * @bfqg: the group to move to. - * - * Move @bfqq to @bfqg, deactivating it from its old group and reactivating -@@ -552,26 +522,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, - * rcu_read_lock()). - */ - static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- struct bfq_entity *entity, struct bfq_group *bfqg) -+ struct bfq_group *bfqg) - { -- int busy, resume; -- -- busy = bfq_bfqq_busy(bfqq); -- resume = !RB_EMPTY_ROOT(&bfqq->sort_list); -+ struct bfq_entity *entity = &bfqq->entity; - -- BUG_ON(resume && !entity->on_st); -- BUG_ON(busy && !resume && entity->on_st && -+ BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list)); -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && !entity->on_st); -+ BUG_ON(bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) -+ && entity->on_st && - bfqq != bfqd->in_service_queue); -+ BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue); -+ -+ /* If bfqq is empty, then bfq_bfqq_expire also invokes -+ * bfq_del_bfqq_busy, thereby removing bfqq and its entity -+ * from data structures related to current group. Otherwise we -+ * need to remove bfqq explicitly with bfq_deactivate_bfqq, as -+ * we do below. -+ */ -+ if (bfqq == bfqd->in_service_queue) -+ bfq_bfqq_expire(bfqd, bfqd->in_service_queue, -+ false, BFQ_BFQQ_PREEMPTED); -+ -+ BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq) -+ && &bfq_entity_service_tree(entity)->idle != -+ entity->tree); - -- if (busy) { -- BUG_ON(atomic_read(&bfqq->ref) < 2); -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq)); - -- if (!resume) -- bfq_del_bfqq_busy(bfqd, bfqq, 0); -- else -- bfq_deactivate_bfqq(bfqd, bfqq, 0); -- } else if (entity->on_st) -+ if (bfq_bfqq_busy(bfqq)) -+ bfq_deactivate_bfqq(bfqd, bfqq, false, false); -+ else if (entity->on_st) { -+ BUG_ON(&bfq_entity_service_tree(entity)->idle != -+ entity->tree); - bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); -+ } - bfqg_put(bfqq_group(bfqq)); - - /* -@@ -583,14 +567,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, - entity->sched_data = &bfqg->sched_data; - bfqg_get(bfqg); - -- if (busy) { -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq)); -+ if (bfq_bfqq_busy(bfqq)) { - bfq_pos_tree_add_move(bfqd, bfqq); -- if (resume) -- bfq_activate_bfqq(bfqd, bfqq); -+ bfq_activate_bfqq(bfqd, bfqq); - } - - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); -+ BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq) -+ && &bfq_entity_service_tree(entity)->idle != -+ entity->tree); - } - - /** -@@ -617,7 +604,11 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - - lockdep_assert_held(bfqd->queue->queue_lock); - -- bfqg = bfq_find_alloc_group(bfqd, blkcg); -+ bfqg = bfq_find_set_group(bfqd, blkcg); -+ -+ if (unlikely(!bfqg)) -+ bfqg = bfqd->root_group; -+ - if (async_bfqq) { - entity = &async_bfqq->entity; - -@@ -625,7 +616,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - bic_set_bfqq(bic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "bic_change_group: %p %d", -- async_bfqq, atomic_read(&async_bfqq->ref)); -+ async_bfqq, -+ async_bfqq->ref); - bfq_put_queue(async_bfqq); - } - } -@@ -633,7 +625,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) -- bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); -+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg); - } - - return bfqg; -@@ -642,25 +634,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) - { - struct bfq_data *bfqd = bic_to_bfqd(bic); -- struct blkcg *blkcg; - struct bfq_group *bfqg = NULL; -- uint64_t id; -+ uint64_t serial_nr; - - rcu_read_lock(); -- blkcg = bio_blkcg(bio); -- id = blkcg->css.serial_nr; -- rcu_read_unlock(); -+ serial_nr = bio_blkcg(bio)->css.serial_nr; - - /* - * Check whether blkcg has changed. The condition may trigger - * spuriously on a newly created cic but there's no harm. - */ -- if (unlikely(!bfqd) || likely(bic->blkcg_id == id)) -- return; -+ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) -+ goto out; - -- bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg); -- BUG_ON(!bfqg); -- bic->blkcg_id = id; -+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); -+ bic->blkcg_serial_nr = serial_nr; -+out: -+ rcu_read_unlock(); - } - - /** -@@ -672,7 +662,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) - struct bfq_entity *entity = st->first_idle; - - for (; entity ; entity = st->first_idle) -- __bfq_deactivate_entity(entity, 0); -+ __bfq_deactivate_entity(entity, false); - } - - /** -@@ -686,7 +676,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(!bfqq); -- bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -+ bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); - } - - /** -@@ -717,11 +707,12 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, - } - - /** -- * bfq_destroy_group - destroy @bfqg. -- * @bfqg: the group being destroyed. -+ * bfq_pd_offline - deactivate the entity associated with @pd, -+ * and reparent its children entities. -+ * @pd: descriptor of the policy going offline. - * -- * Destroy @bfqg, making sure that it is not referenced from its parent. -- * blkio already grabs the queue_lock for us, so no need to use RCU-based magic -+ * blkio already grabs the queue_lock for us, so no need to use -+ * RCU-based magic - */ - static void bfq_pd_offline(struct blkg_policy_data *pd) - { -@@ -776,10 +767,15 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) - BUG_ON(bfqg->sched_data.next_in_service); - BUG_ON(bfqg->sched_data.in_service_entity); - -- __bfq_deactivate_entity(entity, 0); -+ __bfq_deactivate_entity(entity, false); - bfq_put_async_queues(bfqd, bfqg); -- BUG_ON(entity->tree); - -+ /* -+ * @blkg is going offline and will be ignored by -+ * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so -+ * that they don't get lost. If IOs complete after this point, the -+ * stats for them will be lost. Oh well... -+ */ - bfqg_stats_xfer_dead(bfqg); - } - -@@ -789,46 +785,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd) - - list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { - struct bfq_group *bfqg = blkg_to_bfqg(blkg); -+ BUG_ON(!bfqg); - - bfq_end_wr_async_queues(bfqd, bfqg); - } - bfq_end_wr_async_queues(bfqd, bfqd->root_group); - } - --static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css, -- struct cftype *cftype) --{ -- struct blkcg *blkcg = css_to_blkcg(css); -- struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); -- int ret = -EINVAL; -- -- spin_lock_irq(&blkcg->lock); -- ret = bfqgd->weight; -- spin_unlock_irq(&blkcg->lock); -- -- return ret; --} -- --static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v) -+static int bfq_io_show_weight(struct seq_file *sf, void *v) - { - struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); - struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); -+ unsigned int val = 0; - -- spin_lock_irq(&blkcg->lock); -- seq_printf(sf, "%u\n", bfqgd->weight); -- spin_unlock_irq(&blkcg->lock); -+ if (bfqgd) -+ val = bfqgd->weight; -+ -+ seq_printf(sf, "%u\n", val); - - return 0; - } - --static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, -- struct cftype *cftype, -- u64 val) -+static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css, -+ struct cftype *cftype, -+ u64 val) - { - struct blkcg *blkcg = css_to_blkcg(css); - struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); - struct blkcg_gq *blkg; -- int ret = -EINVAL; -+ int ret = -ERANGE; - - if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) - return ret; -@@ -873,13 +858,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, - return ret; - } - --static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of, -- char *buf, size_t nbytes, -- loff_t off) -+static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, -+ loff_t off) - { -+ u64 weight; - /* First unsigned long found in the file is used */ -- return bfqio_cgroup_weight_write(of_css(of), NULL, -- simple_strtoull(strim(buf), NULL, 0)); -+ int ret = kstrtoull(strim(buf), 0, &weight); -+ -+ if (ret) -+ return ret; -+ -+ return bfq_io_set_weight_legacy(of_css(of), NULL, weight); - } - - static int bfqg_print_stat(struct seq_file *sf, void *v) -@@ -899,16 +889,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v) - static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, int off) - { -- u64 sum = bfqg_stat_pd_recursive_sum(pd, off); -- -+ u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd), -+ &blkcg_policy_bfq, off); - return __blkg_prfill_u64(sf, pd, sum); - } - - static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, int off) - { -- struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off); -- -+ struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd), -+ &blkcg_policy_bfq, -+ off); - return __blkg_prfill_rwstat(sf, pd, &sum); - } - -@@ -928,6 +919,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) - return 0; - } - -+static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, -+ int off) -+{ -+ u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes); -+ -+ return __blkg_prfill_u64(sf, pd, sum >> 9); -+} -+ -+static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), -+ bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false); -+ return 0; -+} -+ -+static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, -+ struct blkg_policy_data *pd, int off) -+{ -+ struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL, -+ offsetof(struct blkcg_gq, stat_bytes)); -+ u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + -+ atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); -+ -+ return __blkg_prfill_u64(sf, pd, sum >> 9); -+} -+ -+static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) -+{ -+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), -+ bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0, -+ false); -+ return 0; -+} -+ -+ - static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, - struct blkg_policy_data *pd, int off) - { -@@ -964,38 +990,15 @@ bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) - return blkg_to_bfqg(bfqd->queue->root_blkg); - } - --static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) --{ -- struct bfq_group_data *bgd; -- -- bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); -- if (!bgd) -- return NULL; -- return &bgd->pd; --} -- --static void bfq_cpd_free(struct blkcg_policy_data *cpd) --{ -- kfree(cpd_to_bfqgd(cpd)); --} -- --static struct cftype bfqio_files_dfl[] = { -+static struct cftype bfq_blkcg_legacy_files[] = { - { -- .name = "weight", -+ .name = "bfq.weight", - .flags = CFTYPE_NOT_ON_ROOT, -- .seq_show = bfqio_cgroup_weight_read_dfl, -- .write = bfqio_cgroup_weight_write_dfl, -+ .seq_show = bfq_io_show_weight, -+ .write_u64 = bfq_io_set_weight_legacy, - }, -- {} /* terminate */ --}; - --static struct cftype bfqio_files[] = { -- { -- .name = "bfq.weight", -- .read_u64 = bfqio_cgroup_weight_read, -- .write_u64 = bfqio_cgroup_weight_write, -- }, -- /* statistics, cover only the tasks in the bfqg */ -+ /* statistics, covers only the tasks in the bfqg */ - { - .name = "bfq.time", - .private = offsetof(struct bfq_group, stats.time), -@@ -1003,18 +1006,17 @@ static struct cftype bfqio_files[] = { - }, - { - .name = "bfq.sectors", -- .private = offsetof(struct bfq_group, stats.sectors), -- .seq_show = bfqg_print_stat, -+ .seq_show = bfqg_print_stat_sectors, - }, - { - .name = "bfq.io_service_bytes", -- .private = offsetof(struct bfq_group, stats.service_bytes), -- .seq_show = bfqg_print_rwstat, -+ .private = (unsigned long)&blkcg_policy_bfq, -+ .seq_show = blkg_print_stat_bytes, - }, - { - .name = "bfq.io_serviced", -- .private = offsetof(struct bfq_group, stats.serviced), -- .seq_show = bfqg_print_rwstat, -+ .private = (unsigned long)&blkcg_policy_bfq, -+ .seq_show = blkg_print_stat_ios, - }, - { - .name = "bfq.io_service_time", -@@ -1045,18 +1047,17 @@ static struct cftype bfqio_files[] = { - }, - { - .name = "bfq.sectors_recursive", -- .private = offsetof(struct bfq_group, stats.sectors), -- .seq_show = bfqg_print_stat_recursive, -+ .seq_show = bfqg_print_stat_sectors_recursive, - }, - { - .name = "bfq.io_service_bytes_recursive", -- .private = offsetof(struct bfq_group, stats.service_bytes), -- .seq_show = bfqg_print_rwstat_recursive, -+ .private = (unsigned long)&blkcg_policy_bfq, -+ .seq_show = blkg_print_stat_bytes_recursive, - }, - { - .name = "bfq.io_serviced_recursive", -- .private = offsetof(struct bfq_group, stats.serviced), -- .seq_show = bfqg_print_rwstat_recursive, -+ .private = (unsigned long)&blkcg_policy_bfq, -+ .seq_show = blkg_print_stat_ios_recursive, - }, - { - .name = "bfq.io_service_time_recursive", -@@ -1102,31 +1103,42 @@ static struct cftype bfqio_files[] = { - .private = offsetof(struct bfq_group, stats.dequeue), - .seq_show = bfqg_print_stat, - }, -- { -- .name = "bfq.unaccounted_time", -- .private = offsetof(struct bfq_group, stats.unaccounted_time), -- .seq_show = bfqg_print_stat, -- }, - { } /* terminate */ - }; - --static struct blkcg_policy blkcg_policy_bfq = { -- .dfl_cftypes = bfqio_files_dfl, -- .legacy_cftypes = bfqio_files, -- -- .pd_alloc_fn = bfq_pd_alloc, -- .pd_init_fn = bfq_pd_init, -- .pd_offline_fn = bfq_pd_offline, -- .pd_free_fn = bfq_pd_free, -- .pd_reset_stats_fn = bfq_pd_reset_stats, -- -- .cpd_alloc_fn = bfq_cpd_alloc, -- .cpd_init_fn = bfq_cpd_init, -- .cpd_bind_fn = bfq_cpd_init, -- .cpd_free_fn = bfq_cpd_free, -+static struct cftype bfq_blkg_files[] = { -+ { -+ .name = "bfq.weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = bfq_io_show_weight, -+ .write = bfq_io_set_weight, -+ }, -+ {} /* terminate */ - }; - --#else -+#else /* CONFIG_BFQ_GROUP_IOSCHED */ -+ -+static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg, -+ struct bfq_queue *bfqq, unsigned int op) { } -+static inline void -+bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } -+static inline void -+bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } -+static inline void bfqg_stats_update_completion(struct bfq_group *bfqg, -+ uint64_t start_time, uint64_t io_start_time, -+ unsigned int op) { } -+static inline void -+bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, -+ struct bfq_group *curr_bfqg) { } -+static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { } -+static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } -+static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } -+static inline void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } -+static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } -+static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { } -+ -+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct bfq_group *bfqg) {} - - static void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -@@ -1142,35 +1154,22 @@ static void bfq_init_entity(struct bfq_entity *entity, - entity->sched_data = &bfqg->sched_data; - } - --static struct bfq_group * --bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) --{ -- struct bfq_data *bfqd = bic_to_bfqd(bic); -- -- return bfqd->root_group; --} -- --static void bfq_bfqq_move(struct bfq_data *bfqd, -- struct bfq_queue *bfqq, -- struct bfq_entity *entity, -- struct bfq_group *bfqg) --{ --} -+static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} - - static void bfq_end_wr_async(struct bfq_data *bfqd) - { - bfq_end_wr_async_queues(bfqd, bfqd->root_group); - } - --static void bfq_disconnect_groups(struct bfq_data *bfqd) -+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) - { -- bfq_put_async_queues(bfqd, bfqd->root_group); -+ return bfqd->root_group; - } - --static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -- struct blkcg *blkcg) -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) - { -- return bfqd->root_group; -+ return bfqq->bfqd->root_group; - } - - static struct bfq_group * -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index cf3e9b1..6d06c3c 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -1,5 +1,5 @@ - /* -- * Budget Fair Queueing (BFQ) disk scheduler. -+ * Budget Fair Queueing (BFQ) I/O scheduler. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -@@ -7,25 +7,34 @@ - * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> - * Paolo Valente <paolo.valente@unimore.it> - * -- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org> - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ - * file. - * -- * BFQ is a proportional-share storage-I/O scheduling algorithm based on -- * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, -- * measured in number of sectors, to processes instead of time slices. The -- * device is not granted to the in-service process for a given time slice, -- * but until it has exhausted its assigned budget. This change from the time -- * to the service domain allows BFQ to distribute the device throughput -- * among processes as desired, without any distortion due to ZBR, workload -- * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, -- * called B-WF2Q+, to schedule processes according to their budgets. More -- * precisely, BFQ schedules queues associated to processes. Thanks to the -- * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to -- * I/O-bound processes issuing sequential requests (to boost the -- * throughput), and yet guarantee a low latency to interactive and soft -- * real-time applications. -+ * BFQ is a proportional-share I/O scheduler, with some extra -+ * low-latency capabilities. BFQ also supports full hierarchical -+ * scheduling through cgroups. Next paragraphs provide an introduction -+ * on BFQ inner workings. Details on BFQ benefits and usage can be -+ * found in Documentation/block/bfq-iosched.txt. -+ * -+ * BFQ is a proportional-share storage-I/O scheduling algorithm based -+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns -+ * budgets, measured in number of sectors, to processes instead of -+ * time slices. The device is not granted to the in-service process -+ * for a given time slice, but until it has exhausted its assigned -+ * budget. This change from the time to the service domain enables BFQ -+ * to distribute the device throughput among processes as desired, -+ * without any distortion due to throughput fluctuations, or to device -+ * internal queueing. BFQ uses an ad hoc internal scheduler, called -+ * B-WF2Q+, to schedule processes according to their budgets. More -+ * precisely, BFQ schedules queues associated with processes. Thanks to -+ * the accurate policy of B-WF2Q+, BFQ can afford to assign high -+ * budgets to I/O-bound processes issuing sequential requests (to -+ * boost the throughput), and yet guarantee a low latency to -+ * interactive and soft real-time applications. - * - * BFQ is described in [1], where also a reference to the initial, more - * theoretical paper on BFQ can be found. The interested reader can find -@@ -40,10 +49,10 @@ - * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) - * complexity derives from the one introduced with EEVDF in [3]. - * -- * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness -- * with the BFQ Disk I/O Scheduler'', -- * Proceedings of the 5th Annual International Systems and Storage -- * Conference (SYSTOR '12), June 2012. -+ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O -+ * Scheduler", Proceedings of the First Workshop on Mobile System -+ * Technologies (MST-2015), May 2015. -+ * http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf - * - * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf - * -@@ -70,24 +79,23 @@ - #include "bfq.h" - #include "blk.h" - --/* Expiration time of sync (0) and async (1) requests, in jiffies. */ --static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -+/* Expiration time of sync (0) and async (1) requests, in ns. */ -+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; - - /* Maximum backwards seek, in KiB. */ --static const int bfq_back_max = 16 * 1024; -+static const int bfq_back_max = (16 * 1024); - - /* Penalty of a backwards seek, in number of sectors. */ - static const int bfq_back_penalty = 2; - --/* Idling period duration, in jiffies. */ --static int bfq_slice_idle = HZ / 125; -+/* Idling period duration, in ns. */ -+static u32 bfq_slice_idle = (NSEC_PER_SEC / 125); - - /* Minimum number of assigned budgets for which stats are safe to compute. */ - static const int bfq_stats_min_budgets = 194; - - /* Default maximum budget values, in sectors and number of requests. */ --static const int bfq_default_max_budget = 16 * 1024; --static const int bfq_max_budget_async_rq = 4; -+static const int bfq_default_max_budget = (16 * 1024); - - /* - * Async to sync throughput distribution is controlled as follows: -@@ -97,23 +105,28 @@ static const int bfq_max_budget_async_rq = 4; - static const int bfq_async_charge_factor = 10; - - /* Default timeout values, in jiffies, approximating CFQ defaults. */ --static const int bfq_timeout_sync = HZ / 8; --static int bfq_timeout_async = HZ / 25; -+static const int bfq_timeout = (HZ / 8); - --struct kmem_cache *bfq_pool; -+static struct kmem_cache *bfq_pool; - --/* Below this threshold (in ms), we consider thinktime immediate. */ --#define BFQ_MIN_TT 2 -+/* Below this threshold (in ns), we consider thinktime immediate. */ -+#define BFQ_MIN_TT (2 * NSEC_PER_MSEC) - - /* hw_tag detection: parallel requests threshold and min samples needed. */ - #define BFQ_HW_QUEUE_THRESHOLD 4 - #define BFQ_HW_QUEUE_SAMPLES 32 - --#define BFQQ_SEEK_THR (sector_t)(8 * 1024) --#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) -+#define BFQQ_SEEK_THR (sector_t)(8 * 100) -+#define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) -+#define BFQQ_CLOSE_THR (sector_t)(8 * 1024) -+#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) - --/* Min samples used for peak rate estimation (for autotuning). */ --#define BFQ_PEAK_RATE_SAMPLES 32 -+/* Min number of samples required to perform peak-rate update */ -+#define BFQ_RATE_MIN_SAMPLES 32 -+/* Min observation time interval required to perform a peak-rate update (ns) */ -+#define BFQ_RATE_MIN_INTERVAL (300*NSEC_PER_MSEC) -+/* Target observation time interval for a peak-rate update (ns) */ -+#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC - - /* Shift used for peak rate fixed precision calculations. */ - #define BFQ_RATE_SHIFT 16 -@@ -141,16 +154,24 @@ struct kmem_cache *bfq_pool; - * The device's speed class is dynamically (re)detected in - * bfq_update_peak_rate() every time the estimated peak rate is updated. - * -- * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] -- * are the reference values for a slow/fast rotational device, whereas -- * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for -- * a slow/fast non-rotational device. Finally, device_speed_thresh are the -- * thresholds used to switch between speed classes. -+ * In the following definitions, R_slow[0]/R_fast[0] and -+ * T_slow[0]/T_fast[0] are the reference values for a slow/fast -+ * rotational device, whereas R_slow[1]/R_fast[1] and -+ * T_slow[1]/T_fast[1] are the reference values for a slow/fast -+ * non-rotational device. Finally, device_speed_thresh are the -+ * thresholds used to switch between speed classes. The reference -+ * rates are not the actual peak rates of the devices used as a -+ * reference, but slightly lower values. The reason for using these -+ * slightly lower values is that the peak-rate estimator tends to -+ * yield slightly lower values than the actual peak rate (it can yield -+ * the actual peak rate only if there is only one process doing I/O, -+ * and the process does sequential I/O). -+ * - * Both the reference peak rates and the thresholds are measured in - * sectors/usec, left-shifted by BFQ_RATE_SHIFT. - */ --static int R_slow[2] = {1536, 10752}; --static int R_fast[2] = {17415, 34791}; -+static int R_slow[2] = {1000, 10700}; -+static int R_fast[2] = {14000, 33000}; - /* - * To improve readability, a conversion function is used to initialize the - * following arrays, which entails that they can be initialized only in a -@@ -178,18 +199,6 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd); - #define bfq_sample_valid(samples) ((samples) > 80) - - /* -- * We regard a request as SYNC, if either it's a read or has the SYNC bit -- * set (in which case it could also be a direct WRITE). -- */ --static int bfq_bio_sync(struct bio *bio) --{ -- if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) -- return 1; -- -- return 0; --} -- --/* - * Scheduler run of queue, if there are requests pending and no one in the - * driver that will restart queueing. - */ -@@ -409,11 +418,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) - */ - static bool bfq_symmetric_scenario(struct bfq_data *bfqd) - { -- return --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- !bfqd->active_numerous_groups && --#endif -- !bfq_differentiated_weights(bfqd); -+ return !bfq_differentiated_weights(bfqd); - } - - /* -@@ -469,6 +474,22 @@ static void bfq_weights_tree_add(struct bfq_data *bfqd, - - entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), - GFP_ATOMIC); -+ -+ /* -+ * In the unlucky event of an allocation failure, we just -+ * exit. This will cause the weight of entity to not be -+ * considered in bfq_differentiated_weights, which, in its -+ * turn, causes the scenario to be deemed wrongly symmetric in -+ * case entity's weight would have been the only weight making -+ * the scenario asymmetric. On the bright side, no unbalance -+ * will however occur when entity becomes inactive again (the -+ * invocation of this function is triggered by an activation -+ * of entity). In fact, bfq_weights_tree_remove does nothing -+ * if !entity->weight_counter. -+ */ -+ if (unlikely(!entity->weight_counter)) -+ return; -+ - entity->weight_counter->weight = entity->weight; - rb_link_node(&entity->weight_counter->weights_node, parent, new); - rb_insert_color(&entity->weight_counter->weights_node, root); -@@ -505,13 +526,45 @@ static void bfq_weights_tree_remove(struct bfq_data *bfqd, - entity->weight_counter = NULL; - } - -+/* -+ * Return expired entry, or NULL to just start from scratch in rbtree. -+ */ -+static struct request *bfq_check_fifo(struct bfq_queue *bfqq, -+ struct request *last) -+{ -+ struct request *rq; -+ -+ if (bfq_bfqq_fifo_expire(bfqq)) -+ return NULL; -+ -+ bfq_mark_bfqq_fifo_expire(bfqq); -+ -+ rq = rq_entry_fifo(bfqq->fifo.next); -+ -+ if (rq == last || ktime_get_ns() < rq->fifo_time) -+ return NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); -+ BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); -+ return rq; -+} -+ - static struct request *bfq_find_next_rq(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *last) - { - struct rb_node *rbnext = rb_next(&last->rb_node); - struct rb_node *rbprev = rb_prev(&last->rb_node); -- struct request *next = NULL, *prev = NULL; -+ struct request *next, *prev = NULL; -+ -+ BUG_ON(list_empty(&bfqq->fifo)); -+ -+ /* Follow expired path, else get first next available. */ -+ next = bfq_check_fifo(bfqq, last); -+ if (next) { -+ BUG_ON(next == last); -+ return next; -+ } - - BUG_ON(RB_EMPTY_NODE(&last->rb_node)); - -@@ -533,9 +586,19 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, - static unsigned long bfq_serv_to_charge(struct request *rq, - struct bfq_queue *bfqq) - { -- return blk_rq_sectors(rq) * -- (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * -- bfq_async_charge_factor)); -+ if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1) -+ return blk_rq_sectors(rq); -+ -+ /* -+ * If there are no weight-raised queues, then amplify service -+ * by just the async charge factor; otherwise amplify service -+ * by twice the async charge factor, to further reduce latency -+ * for weight-raised queues. -+ */ -+ if (bfqq->bfqd->wr_busy_queues == 0) -+ return blk_rq_sectors(rq) * bfq_async_charge_factor; -+ -+ return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor; - } - - /** -@@ -576,7 +639,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, - entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", - new_budget); -- bfq_activate_bfqq(bfqd, bfqq); -+ bfq_requeue_bfqq(bfqd, bfqq); - } - } - -@@ -590,12 +653,23 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) - dur = bfqd->RT_prod; - do_div(dur, bfqd->peak_rate); - -- return dur; --} -+ /* -+ * Limit duration between 3 and 13 seconds. Tests show that -+ * higher values than 13 seconds often yield the opposite of -+ * the desired result, i.e., worsen responsiveness by letting -+ * non-interactive and non-soft-real-time applications -+ * preserve weight raising for a too long time interval. -+ * -+ * On the other end, lower values than 3 seconds make it -+ * difficult for most interactive tasks to complete their jobs -+ * before weight-raising finishes. -+ */ -+ if (dur > msecs_to_jiffies(13000)) -+ dur = msecs_to_jiffies(13000); -+ else if (dur < msecs_to_jiffies(3000)) -+ dur = msecs_to_jiffies(3000); - --static unsigned int bfq_bfqq_cooperations(struct bfq_queue *bfqq) --{ -- return bfqq->bic ? bfqq->bic->cooperations : 0; -+ return dur; - } - - static void -@@ -605,31 +679,31 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - bfq_mark_bfqq_idle_window(bfqq); - else - bfq_clear_bfqq_idle_window(bfqq); -+ - if (bic->saved_IO_bound) - bfq_mark_bfqq_IO_bound(bfqq); - else - bfq_clear_bfqq_IO_bound(bfqq); -- /* Assuming that the flag in_large_burst is already correctly set */ -- if (bic->wr_time_left && bfqq->bfqd->low_latency && -- !bfq_bfqq_in_large_burst(bfqq) && -- bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { -- /* -- * Start a weight raising period with the duration given by -- * the raising_time_left snapshot. -- */ -- if (bfq_bfqq_busy(bfqq)) -- bfqq->bfqd->wr_busy_queues++; -- bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; -- bfqq->wr_cur_max_time = bic->wr_time_left; -- bfqq->last_wr_start_finish = jiffies; -- bfqq->entity.prio_changed = 1; -+ -+ bfqq->wr_coeff = bic->saved_wr_coeff; -+ bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; -+ BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt)); -+ bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; -+ bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; -+ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); -+ -+ if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || -+ time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time))) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "resume state: switching off wr (%lu + %lu < %lu)", -+ bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, -+ jiffies); -+ -+ bfqq->wr_coeff = 1; - } -- /* -- * Clear wr_time_left to prevent bfq_bfqq_save_state() from -- * getting confused about the queue's need of a weight-raising -- * period. -- */ -- bic->wr_time_left = 0; -+ /* make sure weight will be updated, however we got here */ -+ bfqq->entity.prio_changed = 1; - } - - static int bfqq_process_refs(struct bfq_queue *bfqq) -@@ -639,7 +713,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq) - lockdep_assert_held(bfqq->bfqd->queue->queue_lock); - - io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ process_refs = bfqq->ref - io_refs - bfqq->entity.on_st; - BUG_ON(process_refs < 0); - return process_refs; - } -@@ -654,6 +728,7 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) - hlist_del_init(&item->burst_list_node); - hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); - bfqd->burst_size = 1; -+ bfqd->burst_parent_entity = bfqq->entity.parent; - } - - /* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ -@@ -662,6 +737,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - /* Increment burst size to take into account also bfqq */ - bfqd->burst_size++; - -+ bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size); -+ -+ BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); -+ - if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { - struct bfq_queue *pos, *bfqq_item; - struct hlist_node *n; -@@ -671,15 +750,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - * other to consider this burst as large. - */ - bfqd->large_burst = true; -+ bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started"); - - /* - * We can now mark all queues in the burst list as - * belonging to a large burst. - */ - hlist_for_each_entry(bfqq_item, &bfqd->burst_list, -- burst_list_node) -+ burst_list_node) { - bfq_mark_bfqq_in_large_burst(bfqq_item); -+ bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst"); -+ } - bfq_mark_bfqq_in_large_burst(bfqq); -+ bfq_log_bfqq(bfqd, bfqq, "marked in large burst"); - - /* - * From now on, and until the current burst finishes, any -@@ -691,67 +774,79 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, - burst_list_node) - hlist_del_init(&pos->burst_list_node); -- } else /* burst not yet large: add bfqq to the burst list */ -+ } else /* -+ * Burst not yet large: add bfqq to the burst list. Do -+ * not increment the ref counter for bfqq, because bfqq -+ * is removed from the burst list before freeing bfqq -+ * in put_queue. -+ */ - hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); - } - - /* -- * If many queues happen to become active shortly after each other, then, -- * to help the processes associated to these queues get their job done as -- * soon as possible, it is usually better to not grant either weight-raising -- * or device idling to these queues. In this comment we describe, firstly, -- * the reasons why this fact holds, and, secondly, the next function, which -- * implements the main steps needed to properly mark these queues so that -- * they can then be treated in a different way. -+ * If many queues belonging to the same group happen to be created -+ * shortly after each other, then the processes associated with these -+ * queues have typically a common goal. In particular, bursts of queue -+ * creations are usually caused by services or applications that spawn -+ * many parallel threads/processes. Examples are systemd during boot, -+ * or git grep. To help these processes get their job done as soon as -+ * possible, it is usually better to not grant either weight-raising -+ * or device idling to their queues. - * -- * As for the terminology, we say that a queue becomes active, i.e., -- * switches from idle to backlogged, either when it is created (as a -- * consequence of the arrival of an I/O request), or, if already existing, -- * when a new request for the queue arrives while the queue is idle. -- * Bursts of activations, i.e., activations of different queues occurring -- * shortly after each other, are typically caused by services or applications -- * that spawn or reactivate many parallel threads/processes. Examples are -- * systemd during boot or git grep. -+ * In this comment we describe, firstly, the reasons why this fact -+ * holds, and, secondly, the next function, which implements the main -+ * steps needed to properly mark these queues so that they can then be -+ * treated in a different way. - * -- * These services or applications benefit mostly from a high throughput: -- * the quicker the requests of the activated queues are cumulatively served, -- * the sooner the target job of these queues gets completed. As a consequence, -- * weight-raising any of these queues, which also implies idling the device -- * for it, is almost always counterproductive: in most cases it just lowers -- * throughput. -+ * The above services or applications benefit mostly from a high -+ * throughput: the quicker the requests of the activated queues are -+ * cumulatively served, the sooner the target job of these queues gets -+ * completed. As a consequence, weight-raising any of these queues, -+ * which also implies idling the device for it, is almost always -+ * counterproductive. In most cases it just lowers throughput. - * -- * On the other hand, a burst of activations may be also caused by the start -- * of an application that does not consist in a lot of parallel I/O-bound -- * threads. In fact, with a complex application, the burst may be just a -- * consequence of the fact that several processes need to be executed to -- * start-up the application. To start an application as quickly as possible, -- * the best thing to do is to privilege the I/O related to the application -- * with respect to all other I/O. Therefore, the best strategy to start as -- * quickly as possible an application that causes a burst of activations is -- * to weight-raise all the queues activated during the burst. This is the -+ * On the other hand, a burst of queue creations may be caused also by -+ * the start of an application that does not consist of a lot of -+ * parallel I/O-bound threads. In fact, with a complex application, -+ * several short processes may need to be executed to start-up the -+ * application. In this respect, to start an application as quickly as -+ * possible, the best thing to do is in any case to privilege the I/O -+ * related to the application with respect to all other -+ * I/O. Therefore, the best strategy to start as quickly as possible -+ * an application that causes a burst of queue creations is to -+ * weight-raise all the queues created during the burst. This is the - * exact opposite of the best strategy for the other type of bursts. - * -- * In the end, to take the best action for each of the two cases, the two -- * types of bursts need to be distinguished. Fortunately, this seems -- * relatively easy to do, by looking at the sizes of the bursts. In -- * particular, we found a threshold such that bursts with a larger size -- * than that threshold are apparently caused only by services or commands -- * such as systemd or git grep. For brevity, hereafter we call just 'large' -- * these bursts. BFQ *does not* weight-raise queues whose activations occur -- * in a large burst. In addition, for each of these queues BFQ performs or -- * does not perform idling depending on which choice boosts the throughput -- * most. The exact choice depends on the device and request pattern at -+ * In the end, to take the best action for each of the two cases, the -+ * two types of bursts need to be distinguished. Fortunately, this -+ * seems relatively easy, by looking at the sizes of the bursts. In -+ * particular, we found a threshold such that only bursts with a -+ * larger size than that threshold are apparently caused by -+ * services or commands such as systemd or git grep. For brevity, -+ * hereafter we call just 'large' these bursts. BFQ *does not* -+ * weight-raise queues whose creation occurs in a large burst. In -+ * addition, for each of these queues BFQ performs or does not perform -+ * idling depending on which choice boosts the throughput more. The -+ * exact choice depends on the device and request pattern at - * hand. - * -- * Turning back to the next function, it implements all the steps needed -- * to detect the occurrence of a large burst and to properly mark all the -- * queues belonging to it (so that they can then be treated in a different -- * way). This goal is achieved by maintaining a special "burst list" that -- * holds, temporarily, the queues that belong to the burst in progress. The -- * list is then used to mark these queues as belonging to a large burst if -- * the burst does become large. The main steps are the following. -+ * Unfortunately, false positives may occur while an interactive task -+ * is starting (e.g., an application is being started). The -+ * consequence is that the queues associated with the task do not -+ * enjoy weight raising as expected. Fortunately these false positives -+ * are very rare. They typically occur if some service happens to -+ * start doing I/O exactly when the interactive task starts. -+ * -+ * Turning back to the next function, it implements all the steps -+ * needed to detect the occurrence of a large burst and to properly -+ * mark all the queues belonging to it (so that they can then be -+ * treated in a different way). This goal is achieved by maintaining a -+ * "burst list" that holds, temporarily, the queues that belong to the -+ * burst in progress. The list is then used to mark these queues as -+ * belonging to a large burst if the burst does become large. The main -+ * steps are the following. - * -- * . when the very first queue is activated, the queue is inserted into the -+ * . when the very first queue is created, the queue is inserted into the - * list (as it could be the first queue in a possible burst) - * - * . if the current burst has not yet become large, and a queue Q that does -@@ -772,13 +867,13 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - * - * . the device enters a large-burst mode - * -- * . if a queue Q that does not belong to the burst is activated while -+ * . if a queue Q that does not belong to the burst is created while - * the device is in large-burst mode and shortly after the last time - * at which a queue either entered the burst list or was marked as - * belonging to the current large burst, then Q is immediately marked - * as belonging to a large burst. - * -- * . if a queue Q that does not belong to the burst is activated a while -+ * . if a queue Q that does not belong to the burst is created a while - * later, i.e., not shortly after, than the last time at which a queue - * either entered the burst list or was marked as belonging to the - * current large burst, then the current burst is deemed as finished and: -@@ -791,52 +886,44 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - * in a possible new burst (then the burst list contains just Q - * after this step). - */ --static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- bool idle_for_long_time) -+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { - /* -- * If bfqq happened to be activated in a burst, but has been idle -- * for at least as long as an interactive queue, then we assume -- * that, in the overall I/O initiated in the burst, the I/O -- * associated to bfqq is finished. So bfqq does not need to be -- * treated as a queue belonging to a burst anymore. Accordingly, -- * we reset bfqq's in_large_burst flag if set, and remove bfqq -- * from the burst list if it's there. We do not decrement instead -- * burst_size, because the fact that bfqq does not need to belong -- * to the burst list any more does not invalidate the fact that -- * bfqq may have been activated during the current burst. -- */ -- if (idle_for_long_time) { -- hlist_del_init(&bfqq->burst_list_node); -- bfq_clear_bfqq_in_large_burst(bfqq); -- } -- -- /* - * If bfqq is already in the burst list or is part of a large -- * burst, then there is nothing else to do. -+ * burst, or finally has just been split, then there is -+ * nothing else to do. - */ - if (!hlist_unhashed(&bfqq->burst_list_node) || -- bfq_bfqq_in_large_burst(bfqq)) -+ bfq_bfqq_in_large_burst(bfqq) || -+ time_is_after_eq_jiffies(bfqq->split_time + -+ msecs_to_jiffies(10))) - return; - - /* -- * If bfqq's activation happens late enough, then the current -- * burst is finished, and related data structures must be reset. -+ * If bfqq's creation happens late enough, or bfqq belongs to -+ * a different group than the burst group, then the current -+ * burst is finished, and related data structures must be -+ * reset. - * -- * In this respect, consider the special case where bfqq is the very -- * first queue being activated. In this case, last_ins_in_burst is -- * not yet significant when we get here. But it is easy to verify -- * that, whether or not the following condition is true, bfqq will -- * end up being inserted into the burst list. In particular the -- * list will happen to contain only bfqq. And this is exactly what -- * has to happen, as bfqq may be the first queue in a possible -+ * In this respect, consider the special case where bfqq is -+ * the very first queue created after BFQ is selected for this -+ * device. In this case, last_ins_in_burst and -+ * burst_parent_entity are not yet significant when we get -+ * here. But it is easy to verify that, whether or not the -+ * following condition is true, bfqq will end up being -+ * inserted into the burst list. In particular the list will -+ * happen to contain only bfqq. And this is exactly what has -+ * to happen, as bfqq may be the first queue of the first - * burst. - */ - if (time_is_before_jiffies(bfqd->last_ins_in_burst + -- bfqd->bfq_burst_interval)) { -+ bfqd->bfq_burst_interval) || -+ bfqq->entity.parent != bfqd->burst_parent_entity) { - bfqd->large_burst = false; - bfq_reset_burst_list(bfqd, bfqq); -- return; -+ bfq_log_bfqq(bfqd, bfqq, -+ "handle_burst: late activation or different group"); -+ goto end; - } - - /* -@@ -845,8 +932,9 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, - * bfqq as belonging to this large burst immediately. - */ - if (bfqd->large_burst) { -+ bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst"); - bfq_mark_bfqq_in_large_burst(bfqq); -- return; -+ goto end; - } - - /* -@@ -855,25 +943,489 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, - * queue. Then we add bfqq to the burst. - */ - bfq_add_to_burst(bfqd, bfqq); -+end: -+ /* -+ * At this point, bfqq either has been added to the current -+ * burst or has caused the current burst to terminate and a -+ * possible new burst to start. In particular, in the second -+ * case, bfqq has become the first queue in the possible new -+ * burst. In both cases last_ins_in_burst needs to be moved -+ * forward. -+ */ -+ bfqd->last_ins_in_burst = jiffies; -+ -+} -+ -+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ return entity->budget - entity->service; -+} -+ -+/* -+ * If enough samples have been computed, return the current max budget -+ * stored in bfqd, which is dynamically updated according to the -+ * estimated disk peak rate; otherwise return the default max budget -+ */ -+static int bfq_max_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < bfq_stats_min_budgets) -+ return bfq_default_max_budget; -+ else -+ return bfqd->bfq_max_budget; -+} -+ -+/* -+ * Return min budget, which is a fraction of the current or default -+ * max budget (trying with 1/32) -+ */ -+static int bfq_min_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < bfq_stats_min_budgets) -+ return bfq_default_max_budget / 32; -+ else -+ return bfqd->bfq_max_budget / 32; -+} -+ -+static void bfq_bfqq_expire(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ bool compensate, -+ enum bfqq_expiration reason); -+ -+/* -+ * The next function, invoked after the input queue bfqq switches from -+ * idle to busy, updates the budget of bfqq. The function also tells -+ * whether the in-service queue should be expired, by returning -+ * true. The purpose of expiring the in-service queue is to give bfqq -+ * the chance to possibly preempt the in-service queue, and the reason -+ * for preempting the in-service queue is to achieve one of the two -+ * goals below. -+ * -+ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has -+ * expired because it has remained idle. In particular, bfqq may have -+ * expired for one of the following two reasons: -+ * -+ * - BFQ_BFQQ_NO_MORE_REQUEST bfqq did not enjoy any device idling and -+ * did not make it to issue a new request before its last request -+ * was served; -+ * -+ * - BFQ_BFQQ_TOO_IDLE bfqq did enjoy device idling, but did not issue -+ * a new request before the expiration of the idling-time. -+ * -+ * Even if bfqq has expired for one of the above reasons, the process -+ * associated with the queue may be however issuing requests greedily, -+ * and thus be sensitive to the bandwidth it receives (bfqq may have -+ * remained idle for other reasons: CPU high load, bfqq not enjoying -+ * idling, I/O throttling somewhere in the path from the process to -+ * the I/O scheduler, ...). But if, after every expiration for one of -+ * the above two reasons, bfqq has to wait for the service of at least -+ * one full budget of another queue before being served again, then -+ * bfqq is likely to get a much lower bandwidth or resource time than -+ * its reserved ones. To address this issue, two countermeasures need -+ * to be taken. -+ * -+ * First, the budget and the timestamps of bfqq need to be updated in -+ * a special way on bfqq reactivation: they need to be updated as if -+ * bfqq did not remain idle and did not expire. In fact, if they are -+ * computed as if bfqq expired and remained idle until reactivation, -+ * then the process associated with bfqq is treated as if, instead of -+ * being greedy, it stopped issuing requests when bfqq remained idle, -+ * and restarts issuing requests only on this reactivation. In other -+ * words, the scheduler does not help the process recover the "service -+ * hole" between bfqq expiration and reactivation. As a consequence, -+ * the process receives a lower bandwidth than its reserved one. In -+ * contrast, to recover this hole, the budget must be updated as if -+ * bfqq was not expired at all before this reactivation, i.e., it must -+ * be set to the value of the remaining budget when bfqq was -+ * expired. Along the same line, timestamps need to be assigned the -+ * value they had the last time bfqq was selected for service, i.e., -+ * before last expiration. Thus timestamps need to be back-shifted -+ * with respect to their normal computation (see [1] for more details -+ * on this tricky aspect). -+ * -+ * Secondly, to allow the process to recover the hole, the in-service -+ * queue must be expired too, to give bfqq the chance to preempt it -+ * immediately. In fact, if bfqq has to wait for a full budget of the -+ * in-service queue to be completed, then it may become impossible to -+ * let the process recover the hole, even if the back-shifted -+ * timestamps of bfqq are lower than those of the in-service queue. If -+ * this happens for most or all of the holes, then the process may not -+ * receive its reserved bandwidth. In this respect, it is worth noting -+ * that, being the service of outstanding requests unpreemptible, a -+ * little fraction of the holes may however be unrecoverable, thereby -+ * causing a little loss of bandwidth. -+ * -+ * The last important point is detecting whether bfqq does need this -+ * bandwidth recovery. In this respect, the next function deems the -+ * process associated with bfqq greedy, and thus allows it to recover -+ * the hole, if: 1) the process is waiting for the arrival of a new -+ * request (which implies that bfqq expired for one of the above two -+ * reasons), and 2) such a request has arrived soon. The first -+ * condition is controlled through the flag non_blocking_wait_rq, -+ * while the second through the flag arrived_in_time. If both -+ * conditions hold, then the function computes the budget in the -+ * above-described special way, and signals that the in-service queue -+ * should be expired. Timestamp back-shifting is done later in -+ * __bfq_activate_entity. -+ * -+ * 2. Reduce latency. Even if timestamps are not backshifted to let -+ * the process associated with bfqq recover a service hole, bfqq may -+ * however happen to have, after being (re)activated, a lower finish -+ * timestamp than the in-service queue. That is, the next budget of -+ * bfqq may have to be completed before the one of the in-service -+ * queue. If this is the case, then preempting the in-service queue -+ * allows this goal to be achieved, apart from the unpreemptible, -+ * outstanding requests mentioned above. -+ * -+ * Unfortunately, regardless of which of the above two goals one wants -+ * to achieve, service trees need first to be updated to know whether -+ * the in-service queue must be preempted. To have service trees -+ * correctly updated, the in-service queue must be expired and -+ * rescheduled, and bfqq must be scheduled too. This is one of the -+ * most costly operations (in future versions, the scheduling -+ * mechanism may be re-designed in such a way to make it possible to -+ * know whether preemption is needed without needing to update service -+ * trees). In addition, queue preemptions almost always cause random -+ * I/O, and thus loss of throughput. Because of these facts, the next -+ * function adopts the following simple scheme to avoid both costly -+ * operations and too frequent preemptions: it requests the expiration -+ * of the in-service queue (unconditionally) only for queues that need -+ * to recover a hole, or that either are weight-raised or deserve to -+ * be weight-raised. -+ */ -+static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ bool arrived_in_time, -+ bool wr_or_deserves_wr) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) { -+ /* -+ * We do not clear the flag non_blocking_wait_rq here, as -+ * the latter is used in bfq_activate_bfqq to signal -+ * that timestamps need to be back-shifted (and is -+ * cleared right after). -+ */ -+ -+ /* -+ * In next assignment we rely on that either -+ * entity->service or entity->budget are not updated -+ * on expiration if bfqq is empty (see -+ * __bfq_bfqq_recalc_budget). Thus both quantities -+ * remain unchanged after such an expiration, and the -+ * following statement therefore assigns to -+ * entity->budget the remaining budget on such an -+ * expiration. For clarity, entity->service is not -+ * updated on expiration in any case, and, in normal -+ * operation, is reset only when bfqq is selected for -+ * service (see bfq_get_next_queue). -+ */ -+ BUG_ON(bfqq->max_budget < 0); -+ entity->budget = min_t(unsigned long, -+ bfq_bfqq_budget_left(bfqq), -+ bfqq->max_budget); -+ -+ BUG_ON(entity->budget < 0); -+ return true; -+ } -+ -+ BUG_ON(bfqq->max_budget < 0); -+ entity->budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(bfqq->next_rq, bfqq)); -+ BUG_ON(entity->budget < 0); -+ -+ bfq_clear_bfqq_non_blocking_wait_rq(bfqq); -+ return wr_or_deserves_wr; -+} -+ -+static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ unsigned int old_wr_coeff, -+ bool wr_or_deserves_wr, -+ bool interactive, -+ bool in_burst, -+ bool soft_rt) -+{ -+ if (old_wr_coeff == 1 && wr_or_deserves_wr) { -+ /* start a weight-raising period */ -+ if (interactive) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ } else { -+ bfqq->wr_start_at_switch_to_srt = jiffies; -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff * -+ BFQ_SOFTRT_WEIGHT_FACTOR; -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ } -+ /* -+ * If needed, further reduce budget to make sure it is -+ * close to bfqq's backlog, so as to reduce the -+ * scheduling-error component due to a too large -+ * budget. Do not care about throughput consequences, -+ * but only about latency. Finally, do not assign a -+ * too small budget either, to avoid increasing -+ * latency by causing too frequent expirations. -+ */ -+ bfqq->entity.budget = min_t(unsigned long, -+ bfqq->entity.budget, -+ 2 * bfq_min_budget(bfqd)); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } else if (old_wr_coeff > 1) { -+ if (interactive) { /* update wr coeff and duration */ -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ } else if (in_burst) { -+ bfqq->wr_coeff = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq-> -+ wr_cur_max_time)); -+ } else if (soft_rt) { -+ /* -+ * The application is now or still meeting the -+ * requirements for being deemed soft rt. We -+ * can then correctly and safely (re)charge -+ * the weight-raising duration for the -+ * application with the weight-raising -+ * duration for soft rt applications. -+ * -+ * In particular, doing this recharge now, i.e., -+ * before the weight-raising period for the -+ * application finishes, reduces the probability -+ * of the following negative scenario: -+ * 1) the weight of a soft rt application is -+ * raised at startup (as for any newly -+ * created application), -+ * 2) since the application is not interactive, -+ * at a certain time weight-raising is -+ * stopped for the application, -+ * 3) at that time the application happens to -+ * still have pending requests, and hence -+ * is destined to not have a chance to be -+ * deemed soft rt before these requests are -+ * completed (see the comments to the -+ * function bfq_bfqq_softrt_next_start() -+ * for details on soft rt detection), -+ * 4) these pending requests experience a high -+ * latency because the application is not -+ * weight-raised while they are pending. -+ */ -+ if (bfqq->wr_cur_max_time != -+ bfqd->bfq_wr_rt_max_time) { -+ bfqq->wr_start_at_switch_to_srt = -+ bfqq->last_wr_start_finish; -+ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); -+ -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff * -+ BFQ_SOFTRT_WEIGHT_FACTOR; -+ bfq_log_bfqq(bfqd, bfqq, -+ "switching to soft_rt wr"); -+ } else -+ bfq_log_bfqq(bfqd, bfqq, -+ "moving forward soft_rt wr duration"); -+ bfqq->last_wr_start_finish = jiffies; -+ } -+ } -+} -+ -+static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ return bfqq->dispatched == 0 && -+ time_is_before_jiffies( -+ bfqq->budget_timeout + -+ bfqd->bfq_wr_min_idle_time); -+} -+ -+static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ int old_wr_coeff, -+ struct request *rq, -+ bool *interactive) -+{ -+ bool soft_rt, in_burst, wr_or_deserves_wr, -+ bfqq_wants_to_preempt, -+ idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), -+ /* -+ * See the comments on -+ * bfq_bfqq_update_budg_for_activation for -+ * details on the usage of the next variable. -+ */ -+ arrived_in_time = ktime_get_ns() <= -+ RQ_BIC(rq)->ttime.last_end_request + -+ bfqd->bfq_slice_idle * 3; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "bfq_add_request non-busy: " -+ "jiffies %lu, in_time %d, idle_long %d busyw %d " -+ "wr_coeff %u", -+ jiffies, arrived_in_time, -+ idle_for_long_time, -+ bfq_bfqq_non_blocking_wait_rq(bfqq), -+ old_wr_coeff); -+ -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); -+ -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags); -+ -+ /* -+ * bfqq deserves to be weight-raised if: -+ * - it is sync, -+ * - it does not belong to a large burst, -+ * - it has been idle for enough time or is soft real-time, -+ * - is linked to a bfq_io_cq (it is not shared in any sense) -+ */ -+ in_burst = bfq_bfqq_in_large_burst(bfqq); -+ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ !in_burst && -+ time_is_before_jiffies(bfqq->soft_rt_next_start); -+ *interactive = -+ !in_burst && -+ idle_for_long_time; -+ wr_or_deserves_wr = bfqd->low_latency && -+ (bfqq->wr_coeff > 1 || -+ (bfq_bfqq_sync(bfqq) && -+ bfqq->bic && (*interactive || soft_rt))); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "bfq_add_request: " -+ "in_burst %d, " -+ "soft_rt %d (next %lu), inter %d, bic %p", -+ bfq_bfqq_in_large_burst(bfqq), soft_rt, -+ bfqq->soft_rt_next_start, -+ *interactive, -+ bfqq->bic); -+ -+ /* -+ * Using the last flag, update budget and check whether bfqq -+ * may want to preempt the in-service queue. -+ */ -+ bfqq_wants_to_preempt = -+ bfq_bfqq_update_budg_for_activation(bfqd, bfqq, -+ arrived_in_time, -+ wr_or_deserves_wr); -+ -+ /* -+ * If bfqq happened to be activated in a burst, but has been -+ * idle for much more than an interactive queue, then we -+ * assume that, in the overall I/O initiated in the burst, the -+ * I/O associated with bfqq is finished. So bfqq does not need -+ * to be treated as a queue belonging to a burst -+ * anymore. Accordingly, we reset bfqq's in_large_burst flag -+ * if set, and remove bfqq from the burst list if it's -+ * there. We do not decrement burst_size, because the fact -+ * that bfqq does not need to belong to the burst list any -+ * more does not invalidate the fact that bfqq was created in -+ * a burst. -+ */ -+ if (likely(!bfq_bfqq_just_created(bfqq)) && -+ idle_for_long_time && -+ time_is_before_jiffies( -+ bfqq->budget_timeout + -+ msecs_to_jiffies(10000))) { -+ hlist_del_init(&bfqq->burst_list_node); -+ bfq_clear_bfqq_in_large_burst(bfqq); -+ } -+ -+ bfq_clear_bfqq_just_created(bfqq); -+ -+ if (!bfq_bfqq_IO_bound(bfqq)) { -+ if (arrived_in_time) { -+ bfqq->requests_within_timer++; -+ if (bfqq->requests_within_timer >= -+ bfqd->bfq_requests_within_timer) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ } else -+ bfqq->requests_within_timer = 0; -+ bfq_log_bfqq(bfqd, bfqq, "requests in time %d", -+ bfqq->requests_within_timer); -+ } -+ -+ if (bfqd->low_latency) { -+ if (unlikely(time_is_after_jiffies(bfqq->split_time))) -+ /* wraparound */ -+ bfqq->split_time = -+ jiffies - bfqd->bfq_wr_min_idle_time - 1; -+ -+ if (time_is_before_jiffies(bfqq->split_time + -+ bfqd->bfq_wr_min_idle_time)) { -+ bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq, -+ old_wr_coeff, -+ wr_or_deserves_wr, -+ *interactive, -+ in_burst, -+ soft_rt); -+ -+ if (old_wr_coeff != bfqq->wr_coeff) -+ bfqq->entity.prio_changed = 1; -+ } -+ } -+ -+ bfqq->last_idle_bklogged = jiffies; -+ bfqq->service_from_backlogged = 0; -+ bfq_clear_bfqq_softrt_update(bfqq); -+ -+ bfq_add_bfqq_busy(bfqd, bfqq); -+ -+ /* -+ * Expire in-service queue only if preemption may be needed -+ * for guarantees. In this respect, the function -+ * next_queue_may_preempt just checks a simple, necessary -+ * condition, and not a sufficient condition based on -+ * timestamps. In fact, for the latter condition to be -+ * evaluated, timestamps would need first to be updated, and -+ * this operation is quite costly (see the comments on the -+ * function bfq_bfqq_update_budg_for_activation). -+ */ -+ if (bfqd->in_service_queue && bfqq_wants_to_preempt && -+ bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff && -+ next_queue_may_preempt(bfqd)) { -+ struct bfq_queue *in_serv = -+ bfqd->in_service_queue; -+ BUG_ON(in_serv == bfqq); -+ -+ bfq_bfqq_expire(bfqd, bfqd->in_service_queue, -+ false, BFQ_BFQQ_PREEMPTED); -+ } - } - - static void bfq_add_request(struct request *rq) - { - struct bfq_queue *bfqq = RQ_BFQQ(rq); -- struct bfq_entity *entity = &bfqq->entity; - struct bfq_data *bfqd = bfqq->bfqd; - struct request *next_rq, *prev; -- unsigned long old_wr_coeff = bfqq->wr_coeff; -+ unsigned int old_wr_coeff = bfqq->wr_coeff; - bool interactive = false; - -- bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); -+ bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s", -+ blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); -+ -+ if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ -+ bfq_log_bfqq(bfqd, bfqq, -+ "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time), -+ bfqq->wr_coeff, -+ bfqq->entity.weight, bfqq->entity.orig_weight); -+ - bfqq->queued[rq_is_sync(rq)]++; - bfqd->queued++; - - elv_rb_add(&bfqq->sort_list, rq); - - /* -- * Check if this request is a better next-serve candidate. -+ * Check if this request is a better next-to-serve candidate. - */ - prev = bfqq->next_rq; - next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); -@@ -886,160 +1438,10 @@ static void bfq_add_request(struct request *rq) - if (prev != bfqq->next_rq) - bfq_pos_tree_add_move(bfqd, bfqq); - -- if (!bfq_bfqq_busy(bfqq)) { -- bool soft_rt, coop_or_in_burst, -- idle_for_long_time = time_is_before_jiffies( -- bfqq->budget_timeout + -- bfqd->bfq_wr_min_idle_time); -- --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, -- rq->cmd_flags); --#endif -- if (bfq_bfqq_sync(bfqq)) { -- bool already_in_burst = -- !hlist_unhashed(&bfqq->burst_list_node) || -- bfq_bfqq_in_large_burst(bfqq); -- bfq_handle_burst(bfqd, bfqq, idle_for_long_time); -- /* -- * If bfqq was not already in the current burst, -- * then, at this point, bfqq either has been -- * added to the current burst or has caused the -- * current burst to terminate. In particular, in -- * the second case, bfqq has become the first -- * queue in a possible new burst. -- * In both cases last_ins_in_burst needs to be -- * moved forward. -- */ -- if (!already_in_burst) -- bfqd->last_ins_in_burst = jiffies; -- } -- -- coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) || -- bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh; -- soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -- !coop_or_in_burst && -- time_is_before_jiffies(bfqq->soft_rt_next_start); -- interactive = !coop_or_in_burst && idle_for_long_time; -- entity->budget = max_t(unsigned long, bfqq->max_budget, -- bfq_serv_to_charge(next_rq, bfqq)); -- -- if (!bfq_bfqq_IO_bound(bfqq)) { -- if (time_before(jiffies, -- RQ_BIC(rq)->ttime.last_end_request + -- bfqd->bfq_slice_idle)) { -- bfqq->requests_within_timer++; -- if (bfqq->requests_within_timer >= -- bfqd->bfq_requests_within_timer) -- bfq_mark_bfqq_IO_bound(bfqq); -- } else -- bfqq->requests_within_timer = 0; -- } -- -- if (!bfqd->low_latency) -- goto add_bfqq_busy; -- -- if (bfq_bfqq_just_split(bfqq)) -- goto set_prio_changed; -- -- /* -- * If the queue: -- * - is not being boosted, -- * - has been idle for enough time, -- * - is not a sync queue or is linked to a bfq_io_cq (it is -- * shared "for its nature" or it is not shared and its -- * requests have not been redirected to a shared queue) -- * start a weight-raising period. -- */ -- if (old_wr_coeff == 1 && (interactive || soft_rt) && -- (!bfq_bfqq_sync(bfqq) || bfqq->bic)) { -- bfqq->wr_coeff = bfqd->bfq_wr_coeff; -- if (interactive) -- bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else -- bfqq->wr_cur_max_time = -- bfqd->bfq_wr_rt_max_time; -- bfq_log_bfqq(bfqd, bfqq, -- "wrais starting at %lu, rais_max_time %u", -- jiffies, -- jiffies_to_msecs(bfqq->wr_cur_max_time)); -- } else if (old_wr_coeff > 1) { -- if (interactive) -- bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else if (coop_or_in_burst || -- (bfqq->wr_cur_max_time == -- bfqd->bfq_wr_rt_max_time && -- !soft_rt)) { -- bfqq->wr_coeff = 1; -- bfq_log_bfqq(bfqd, bfqq, -- "wrais ending at %lu, rais_max_time %u", -- jiffies, -- jiffies_to_msecs(bfqq-> -- wr_cur_max_time)); -- } else if (time_before( -- bfqq->last_wr_start_finish + -- bfqq->wr_cur_max_time, -- jiffies + -- bfqd->bfq_wr_rt_max_time) && -- soft_rt) { -- /* -- * -- * The remaining weight-raising time is lower -- * than bfqd->bfq_wr_rt_max_time, which means -- * that the application is enjoying weight -- * raising either because deemed soft-rt in -- * the near past, or because deemed interactive -- * a long ago. -- * In both cases, resetting now the current -- * remaining weight-raising time for the -- * application to the weight-raising duration -- * for soft rt applications would not cause any -- * latency increase for the application (as the -- * new duration would be higher than the -- * remaining time). -- * -- * In addition, the application is now meeting -- * the requirements for being deemed soft rt. -- * In the end we can correctly and safely -- * (re)charge the weight-raising duration for -- * the application with the weight-raising -- * duration for soft rt applications. -- * -- * In particular, doing this recharge now, i.e., -- * before the weight-raising period for the -- * application finishes, reduces the probability -- * of the following negative scenario: -- * 1) the weight of a soft rt application is -- * raised at startup (as for any newly -- * created application), -- * 2) since the application is not interactive, -- * at a certain time weight-raising is -- * stopped for the application, -- * 3) at that time the application happens to -- * still have pending requests, and hence -- * is destined to not have a chance to be -- * deemed soft rt before these requests are -- * completed (see the comments to the -- * function bfq_bfqq_softrt_next_start() -- * for details on soft rt detection), -- * 4) these pending requests experience a high -- * latency because the application is not -- * weight-raised while they are pending. -- */ -- bfqq->last_wr_start_finish = jiffies; -- bfqq->wr_cur_max_time = -- bfqd->bfq_wr_rt_max_time; -- } -- } --set_prio_changed: -- if (old_wr_coeff != bfqq->wr_coeff) -- entity->prio_changed = 1; --add_bfqq_busy: -- bfqq->last_idle_bklogged = jiffies; -- bfqq->service_from_backlogged = 0; -- bfq_clear_bfqq_softrt_update(bfqq); -- bfq_add_bfqq_busy(bfqd, bfqq); -- } else { -+ if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ -+ bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff, -+ rq, &interactive); -+ else { - if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && - time_is_before_jiffies( - bfqq->last_wr_start_finish + -@@ -1048,16 +1450,43 @@ static void bfq_add_request(struct request *rq) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); - - bfqd->wr_busy_queues++; -- entity->prio_changed = 1; -+ bfqq->entity.prio_changed = 1; - bfq_log_bfqq(bfqd, bfqq, -- "non-idle wrais starting at %lu, rais_max_time %u", -- jiffies, -- jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ "non-idle wrais starting, " -+ "wr_max_time %u wr_busy %d", -+ jiffies_to_msecs(bfqq->wr_cur_max_time), -+ bfqd->wr_busy_queues); - } - if (prev != bfqq->next_rq) - bfq_updated_next_req(bfqd, bfqq); - } - -+ /* -+ * Assign jiffies to last_wr_start_finish in the following -+ * cases: -+ * -+ * . if bfqq is not going to be weight-raised, because, for -+ * non weight-raised queues, last_wr_start_finish stores the -+ * arrival time of the last request; as of now, this piece -+ * of information is used only for deciding whether to -+ * weight-raise async queues -+ * -+ * . if bfqq is not weight-raised, because, if bfqq is now -+ * switching to weight-raised, then last_wr_start_finish -+ * stores the time when weight-raising starts -+ * -+ * . if bfqq is interactive, because, regardless of whether -+ * bfqq is currently weight-raised, the weight-raising -+ * period must start or restart (this case is considered -+ * separately because it is not detected by the above -+ * conditions, if bfqq is already weight-raised) -+ * -+ * last_wr_start_finish has to be updated also if bfqq is soft -+ * real-time, because the weight-raising period is constantly -+ * restarted on idle-to-busy transitions for these queues, but -+ * this is already done in bfq_bfqq_handle_idle_busy_switch if -+ * needed. -+ */ - if (bfqd->low_latency && - (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) - bfqq->last_wr_start_finish = jiffies; -@@ -1074,21 +1503,31 @@ static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, - if (!bic) - return NULL; - -- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - if (bfqq) - return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); - - return NULL; - } - --static void bfq_activate_request(struct request_queue *q, struct request *rq) -+static sector_t get_sdist(sector_t last_pos, struct request *rq) - { -- struct bfq_data *bfqd = q->elevator->elevator_data; -+ sector_t sdist = 0; -+ -+ if (last_pos) { -+ if (last_pos < blk_rq_pos(rq)) -+ sdist = blk_rq_pos(rq) - last_pos; -+ else -+ sdist = last_pos - blk_rq_pos(rq); -+ } -+ -+ return sdist; -+} - -+static void bfq_activate_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; - bfqd->rq_in_driver++; -- bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); -- bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", -- (unsigned long long) bfqd->last_position); - } - - static void bfq_deactivate_request(struct request_queue *q, struct request *rq) -@@ -1105,6 +1544,9 @@ static void bfq_remove_request(struct request *rq) - struct bfq_data *bfqd = bfqq->bfqd; - const int sync = rq_is_sync(rq); - -+ BUG_ON(bfqq->entity.service > bfqq->entity.budget && -+ bfqq == bfqd->in_service_queue); -+ - if (bfqq->next_rq == rq) { - bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); - bfq_updated_next_req(bfqd, bfqq); -@@ -1118,8 +1560,29 @@ static void bfq_remove_request(struct request *rq) - elv_rb_del(&bfqq->sort_list, rq); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -- if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) -- bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ bfqq->next_rq = NULL; -+ -+ BUG_ON(bfqq->entity.budget < 0); -+ -+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) { -+ BUG_ON(bfqq->ref < 2); /* referred by rq and on tree */ -+ bfq_del_bfqq_busy(bfqd, bfqq, false); -+ /* -+ * bfqq emptied. In normal operation, when -+ * bfqq is empty, bfqq->entity.service and -+ * bfqq->entity.budget must contain, -+ * respectively, the service received and the -+ * budget used last time bfqq emptied. These -+ * facts do not hold in this case, as at least -+ * this last removal occurred while bfqq is -+ * not in service. To avoid inconsistencies, -+ * reset both bfqq->entity.service and -+ * bfqq->entity.budget, if bfqq has still a -+ * process that may issue I/O requests to it. -+ */ -+ bfqq->entity.budget = bfqq->entity.service = 0; -+ } -+ - /* - * Remove queue from request-position tree as it is empty. - */ -@@ -1133,9 +1596,7 @@ static void bfq_remove_request(struct request *rq) - BUG_ON(bfqq->meta_pending == 0); - bfqq->meta_pending--; - } --#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); --#endif - } - - static int bfq_merge(struct request_queue *q, struct request **req, -@@ -1145,7 +1606,7 @@ static int bfq_merge(struct request_queue *q, struct request **req, - struct request *__rq; - - __rq = bfq_find_rq_fmerge(bfqd, bio); -- if (__rq && elv_rq_merge_ok(__rq, bio)) { -+ if (__rq && elv_bio_merge_ok(__rq, bio)) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; - } -@@ -1190,7 +1651,7 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, - static void bfq_bio_merged(struct request_queue *q, struct request *req, - struct bio *bio) - { -- bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw); -+ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_opf); - } - #endif - -@@ -1210,7 +1671,7 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq, - */ - if (bfqq == next_bfqq && - !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && -- time_before(next->fifo_time, rq->fifo_time)) { -+ next->fifo_time < rq->fifo_time) { - list_del_init(&rq->queuelist); - list_replace_init(&next->queuelist, &rq->queuelist); - rq->fifo_time = next->fifo_time; -@@ -1220,21 +1681,30 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq, - bfqq->next_rq = rq; - - bfq_remove_request(next); --#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); --#endif - } - - /* Must be called with bfqq != NULL */ - static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) - { - BUG_ON(!bfqq); -+ - if (bfq_bfqq_busy(bfqq)) - bfqq->bfqd->wr_busy_queues--; - bfqq->wr_coeff = 1; - bfqq->wr_cur_max_time = 0; -- /* Trigger a weight change on the next activation of the queue */ -+ bfqq->last_wr_start_finish = jiffies; -+ /* -+ * Trigger a weight change on the next invocation of -+ * __bfq_entity_update_weight_prio. -+ */ - bfqq->entity.prio_changed = 1; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "end_wr: wrais ending at %lu, rais_max_time %u", -+ bfqq->last_wr_start_finish, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", -+ bfqq->bfqd->wr_busy_queues); - } - - static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -@@ -1277,7 +1747,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request, - sector_t sector) - { - return abs(bfq_io_struct_pos(io_struct, request) - sector) <= -- BFQQ_SEEK_THR; -+ BFQQ_CLOSE_THR; - } - - static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, -@@ -1399,7 +1869,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) - * throughput. - */ - bfqq->new_bfqq = new_bfqq; -- atomic_add(process_refs, &new_bfqq->ref); -+ new_bfqq->ref += process_refs; - return new_bfqq; - } - -@@ -1430,9 +1900,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, - } - - /* -- * Attempt to schedule a merge of bfqq with the currently in-service queue -- * or with a close queue among the scheduled queues. -- * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue -+ * If this function returns true, then bfqq cannot be merged. The idea -+ * is that true cooperation happens very early after processes start -+ * to do I/O. Usually, late cooperations are just accidental false -+ * positives. In case bfqq is weight-raised, such false positives -+ * would evidently degrade latency guarantees for bfqq. -+ */ -+static bool wr_from_too_long(struct bfq_queue *bfqq) -+{ -+ return bfqq->wr_coeff > 1 && -+ time_is_before_jiffies(bfqq->last_wr_start_finish + -+ msecs_to_jiffies(100)); -+} -+ -+/* -+ * Attempt to schedule a merge of bfqq with the currently in-service -+ * queue or with a close queue among the scheduled queues. Return -+ * NULL if no merge was scheduled, a pointer to the shared bfq_queue - * structure otherwise. - * - * The OOM queue is not allowed to participate to cooperation: in fact, since -@@ -1441,6 +1925,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, - * handle merging with the OOM queue would be quite complex and expensive - * to maintain. Besides, in such a critical condition as an out of memory, - * the benefits of queue merging may be little relevant, or even negligible. -+ * -+ * Weight-raised queues can be merged only if their weight-raising -+ * period has just started. In fact cooperating processes are usually -+ * started together. Thus, with this filter we avoid false positives -+ * that would jeopardize low-latency guarantees. -+ * -+ * WARNING: queue merging may impair fairness among non-weight raised -+ * queues, for at least two reasons: 1) the original weight of a -+ * merged queue may change during the merged state, 2) even being the -+ * weight the same, a merged queue may be bloated with many more -+ * requests than the ones produced by its originally-associated -+ * process. - */ - static struct bfq_queue * - bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, -@@ -1450,16 +1946,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, - - if (bfqq->new_bfqq) - return bfqq->new_bfqq; -- if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) -+ -+ if (io_struct && wr_from_too_long(bfqq) && -+ likely(bfqq != &bfqd->oom_bfqq)) -+ bfq_log_bfqq(bfqd, bfqq, -+ "would have looked for coop, but bfq%d wr", -+ bfqq->pid); -+ -+ if (!io_struct || -+ wr_from_too_long(bfqq) || -+ unlikely(bfqq == &bfqd->oom_bfqq)) - return NULL; -- /* If device has only one backlogged bfq_queue, don't search. */ -+ -+ /* If there is only one backlogged queue, don't search. */ - if (bfqd->busy_queues == 1) - return NULL; - - in_service_bfqq = bfqd->in_service_queue; - -+ if (in_service_bfqq && in_service_bfqq != bfqq && -+ bfqd->in_service_bic && wr_from_too_long(in_service_bfqq) -+ && likely(in_service_bfqq == &bfqd->oom_bfqq)) -+ bfq_log_bfqq(bfqd, bfqq, -+ "would have tried merge with in-service-queue, but wr"); -+ - if (!in_service_bfqq || in_service_bfqq == bfqq || -- !bfqd->in_service_bic || -+ !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) || - unlikely(in_service_bfqq == &bfqd->oom_bfqq)) - goto check_scheduled; - -@@ -1481,7 +1993,15 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, - - BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); - -- if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && -+ if (new_bfqq && wr_from_too_long(new_bfqq) && -+ likely(new_bfqq != &bfqd->oom_bfqq) && -+ bfq_may_be_close_cooperator(bfqq, new_bfqq)) -+ bfq_log_bfqq(bfqd, bfqq, -+ "would have merged with bfq%d, but wr", -+ new_bfqq->pid); -+ -+ if (new_bfqq && !wr_from_too_long(new_bfqq) && -+ likely(new_bfqq != &bfqd->oom_bfqq) && - bfq_may_be_close_cooperator(bfqq, new_bfqq)) - return bfq_setup_merge(bfqq, new_bfqq); - -@@ -1490,53 +2010,25 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, - - static void bfq_bfqq_save_state(struct bfq_queue *bfqq) - { -+ struct bfq_io_cq *bic = bfqq->bic; -+ - /* - * If !bfqq->bic, the queue is already shared or its requests - * have already been redirected to a shared queue; both idle window - * and weight raising state have already been saved. Do nothing. - */ -- if (!bfqq->bic) -+ if (!bic) - return; -- if (bfqq->bic->wr_time_left) -- /* -- * This is the queue of a just-started process, and would -- * deserve weight raising: we set wr_time_left to the full -- * weight-raising duration to trigger weight-raising when -- * and if the queue is split and the first request of the -- * queue is enqueued. -- */ -- bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); -- else if (bfqq->wr_coeff > 1) { -- unsigned long wr_duration = -- jiffies - bfqq->last_wr_start_finish; -- /* -- * It may happen that a queue's weight raising period lasts -- * longer than its wr_cur_max_time, as weight raising is -- * handled only when a request is enqueued or dispatched (it -- * does not use any timer). If the weight raising period is -- * about to end, don't save it. -- */ -- if (bfqq->wr_cur_max_time <= wr_duration) -- bfqq->bic->wr_time_left = 0; -- else -- bfqq->bic->wr_time_left = -- bfqq->wr_cur_max_time - wr_duration; -- /* -- * The bfq_queue is becoming shared or the requests of the -- * process owning the queue are being redirected to a shared -- * queue. Stop the weight raising period of the queue, as in -- * both cases it should not be owned by an interactive or -- * soft real-time application. -- */ -- bfq_bfqq_end_wr(bfqq); -- } else -- bfqq->bic->wr_time_left = 0; -- bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -- bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); -- bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); -- bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); -- bfqq->bic->cooperations++; -- bfqq->bic->failed_cooperations = 0; -+ -+ bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -+ bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); -+ bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); -+ bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); -+ bic->saved_wr_coeff = bfqq->wr_coeff; -+ bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; -+ bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; -+ bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; -+ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); - } - - static void bfq_get_bic_reference(struct bfq_queue *bfqq) -@@ -1561,6 +2053,41 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - if (bfq_bfqq_IO_bound(bfqq)) - bfq_mark_bfqq_IO_bound(new_bfqq); - bfq_clear_bfqq_IO_bound(bfqq); -+ -+ /* -+ * If bfqq is weight-raised, then let new_bfqq inherit -+ * weight-raising. To reduce false positives, neglect the case -+ * where bfqq has just been created, but has not yet made it -+ * to be weight-raised (which may happen because EQM may merge -+ * bfqq even before bfq_add_request is executed for the first -+ * time for bfqq). Handling this case would however be very -+ * easy, thanks to the flag just_created. -+ */ -+ if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { -+ new_bfqq->wr_coeff = bfqq->wr_coeff; -+ new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time; -+ new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; -+ new_bfqq->wr_start_at_switch_to_srt = -+ bfqq->wr_start_at_switch_to_srt; -+ if (bfq_bfqq_busy(new_bfqq)) -+ bfqd->wr_busy_queues++; -+ new_bfqq->entity.prio_changed = 1; -+ bfq_log_bfqq(bfqd, new_bfqq, -+ "wr start after merge with %d, rais_max_time %u", -+ bfqq->pid, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */ -+ bfqq->wr_coeff = 1; -+ bfqq->entity.prio_changed = 1; -+ if (bfq_bfqq_busy(bfqq)) -+ bfqd->wr_busy_queues--; -+ } -+ -+ bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", -+ bfqd->wr_busy_queues); -+ - /* - * Grab a reference to the bic, to prevent it from being destroyed - * before being possibly touched by a bfq_split_bfqq(). -@@ -1584,33 +2111,23 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - */ - new_bfqq->bic = NULL; - bfqq->bic = NULL; -+ /* release process reference to bfqq */ - bfq_put_queue(bfqq); - } - --static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) --{ -- struct bfq_io_cq *bic = bfqq->bic; -- struct bfq_data *bfqd = bfqq->bfqd; -- -- if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { -- bic->failed_cooperations++; -- if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) -- bic->cooperations = 0; -- } --} -- --static int bfq_allow_merge(struct request_queue *q, struct request *rq, -- struct bio *bio) -+static int bfq_allow_bio_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) - { - struct bfq_data *bfqd = q->elevator->elevator_data; -+ bool is_sync = op_is_sync(bio->bi_opf); - struct bfq_io_cq *bic; - struct bfq_queue *bfqq, *new_bfqq; - - /* - * Disallow merge of a sync bio into an async request. - */ -- if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -- return 0; -+ if (is_sync && !rq_is_sync(rq)) -+ return false; - - /* - * Lookup the bfqq that this bio will be queued with. Allow -@@ -1619,9 +2136,9 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, - */ - bic = bfq_bic_lookup(bfqd, current->io_context); - if (!bic) -- return 0; -+ return false; - -- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ bfqq = bic_to_bfqq(bic, is_sync); - /* - * We take advantage of this function to perform an early merge - * of the queues of possible cooperating processes. -@@ -1636,30 +2153,111 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, - * to decide whether bio and rq can be merged. - */ - bfqq = new_bfqq; -- } else -- bfq_bfqq_increase_failed_cooperations(bfqq); -+ } - } - - return bfqq == RQ_BFQQ(rq); - } - -+static int bfq_allow_rq_merge(struct request_queue *q, struct request *rq, -+ struct request *next) -+{ -+ return RQ_BFQQ(rq) == RQ_BFQQ(next); -+} -+ -+/* -+ * Set the maximum time for the in-service queue to consume its -+ * budget. This prevents seeky processes from lowering the throughput. -+ * In practice, a time-slice service scheme is used with seeky -+ * processes. -+ */ -+static void bfq_set_budget_timeout(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ unsigned int timeout_coeff; -+ -+ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -+ timeout_coeff = 1; -+ else -+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -+ -+ bfqd->last_budget_start = ktime_get(); -+ -+ bfqq->budget_timeout = jiffies + -+ bfqd->bfq_timeout * timeout_coeff; -+ -+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", -+ jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); -+} -+ - static void __bfq_set_in_service_queue(struct bfq_data *bfqd, - struct bfq_queue *bfqq) - { - if (bfqq) { --#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); --#endif - bfq_mark_bfqq_must_alloc(bfqq); -- bfq_mark_bfqq_budget_new(bfqq); - bfq_clear_bfqq_fifo_expire(bfqq); - - bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; - -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ if (time_is_before_jiffies(bfqq->last_wr_start_finish) && -+ bfqq->wr_coeff > 1 && -+ bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && -+ time_is_before_jiffies(bfqq->budget_timeout)) { -+ /* -+ * For soft real-time queues, move the start -+ * of the weight-raising period forward by the -+ * time the queue has not received any -+ * service. Otherwise, a relatively long -+ * service delay is likely to cause the -+ * weight-raising period of the queue to end, -+ * because of the short duration of the -+ * weight-raising period of a soft real-time -+ * queue. It is worth noting that this move -+ * is not so dangerous for the other queues, -+ * because soft real-time queues are not -+ * greedy. -+ * -+ * To not add a further variable, we use the -+ * overloaded field budget_timeout to -+ * determine for how long the queue has not -+ * received service, i.e., how much time has -+ * elapsed since the queue expired. However, -+ * this is a little imprecise, because -+ * budget_timeout is set to jiffies if bfqq -+ * not only expires, but also remains with no -+ * request. -+ */ -+ if (time_after(bfqq->budget_timeout, -+ bfqq->last_wr_start_finish)) -+ bfqq->last_wr_start_finish += -+ jiffies - bfqq->budget_timeout; -+ else -+ bfqq->last_wr_start_finish = jiffies; -+ -+ if (time_is_after_jiffies(bfqq->last_wr_start_finish)) { -+ pr_crit( -+ "BFQ WARNING:last %lu budget %lu jiffies %lu", -+ bfqq->last_wr_start_finish, -+ bfqq->budget_timeout, -+ jiffies); -+ pr_crit("diff %lu", jiffies - -+ max_t(unsigned long, -+ bfqq->last_wr_start_finish, -+ bfqq->budget_timeout)); -+ bfqq->last_wr_start_finish = jiffies; -+ } -+ } -+ -+ bfq_set_budget_timeout(bfqd, bfqq); - bfq_log_bfqq(bfqd, bfqq, - "set_in_service_queue, cur-budget = %d", - bfqq->entity.budget); -- } -+ } else -+ bfq_log(bfqd, "set_in_service_queue: NULL"); - - bfqd->in_service_queue = bfqq; - } -@@ -1675,36 +2273,11 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) - return bfqq; - } - --/* -- * If enough samples have been computed, return the current max budget -- * stored in bfqd, which is dynamically updated according to the -- * estimated disk peak rate; otherwise return the default max budget -- */ --static int bfq_max_budget(struct bfq_data *bfqd) --{ -- if (bfqd->budgets_assigned < bfq_stats_min_budgets) -- return bfq_default_max_budget; -- else -- return bfqd->bfq_max_budget; --} -- --/* -- * Return min budget, which is a fraction of the current or default -- * max budget (trying with 1/32) -- */ --static int bfq_min_budget(struct bfq_data *bfqd) --{ -- if (bfqd->budgets_assigned < bfq_stats_min_budgets) -- return bfq_default_max_budget / 32; -- else -- return bfqd->bfq_max_budget / 32; --} -- - static void bfq_arm_slice_timer(struct bfq_data *bfqd) - { - struct bfq_queue *bfqq = bfqd->in_service_queue; - struct bfq_io_cq *bic; -- unsigned long sl; -+ u32 sl; - - BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); - -@@ -1728,119 +2301,366 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) - sl = bfqd->bfq_slice_idle; - /* - * Unless the queue is being weight-raised or the scenario is -- * asymmetric, grant only minimum idle time if the queue either -- * has been seeky for long enough or has already proved to be -- * constantly seeky. -+ * asymmetric, grant only minimum idle time if the queue -+ * is seeky. A long idling is preserved for a weight-raised -+ * queue, or, more in general, in an asymemtric scenario, -+ * because a long idling is needed for guaranteeing to a queue -+ * its reserved share of the throughput (in particular, it is -+ * needed if the queue has a higher weight than some other -+ * queue). - */ -- if (bfq_sample_valid(bfqq->seek_samples) && -- ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > -- bfq_max_budget(bfqq->bfqd) / 8) || -- bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 && -+ if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && - bfq_symmetric_scenario(bfqd)) -- sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); -- else if (bfqq->wr_coeff > 1) -- sl = sl * 3; -+ sl = min_t(u32, sl, BFQ_MIN_TT); -+ - bfqd->last_idling_start = ktime_get(); -- mod_timer(&bfqd->idle_slice_timer, jiffies + sl); --#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), -+ HRTIMER_MODE_REL); - bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); --#endif -- bfq_log(bfqd, "arm idle: %u/%u ms", -- jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -+ bfq_log(bfqd, "arm idle: %ld/%ld ms", -+ sl / NSEC_PER_MSEC, bfqd->bfq_slice_idle / NSEC_PER_MSEC); - } - - /* -- * Set the maximum time for the in-service queue to consume its -- * budget. This prevents seeky processes from lowering the disk -- * throughput (always guaranteed with a time slice scheme as in CFQ). -+ * In autotuning mode, max_budget is dynamically recomputed as the -+ * amount of sectors transferred in timeout at the estimated peak -+ * rate. This enables BFQ to utilize a full timeslice with a full -+ * budget, even if the in-service queue is served at peak rate. And -+ * this maximises throughput with sequential workloads. - */ --static void bfq_set_budget_timeout(struct bfq_data *bfqd) -+static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd) - { -- struct bfq_queue *bfqq = bfqd->in_service_queue; -- unsigned int timeout_coeff; -+ return (u64)bfqd->peak_rate * USEC_PER_MSEC * -+ jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT; -+} - -- if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -- timeout_coeff = 1; -- else -- timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -+/* -+ * Update parameters related to throughput and responsiveness, as a -+ * function of the estimated peak rate. See comments on -+ * bfq_calc_max_budget(), and on T_slow and T_fast arrays. -+ */ -+static void update_thr_responsiveness_params(struct bfq_data *bfqd) -+{ -+ int dev_type = blk_queue_nonrot(bfqd->queue); -+ -+ if (bfqd->bfq_user_max_budget == 0) { -+ bfqd->bfq_max_budget = -+ bfq_calc_max_budget(bfqd); -+ BUG_ON(bfqd->bfq_max_budget < 0); -+ bfq_log(bfqd, "new max_budget = %d", -+ bfqd->bfq_max_budget); -+ } - -- bfqd->last_budget_start = ktime_get(); -+ if (bfqd->device_speed == BFQ_BFQD_FAST && -+ bfqd->peak_rate < device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_SLOW; -+ bfqd->RT_prod = R_slow[dev_type] * -+ T_slow[dev_type]; -+ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && -+ bfqd->peak_rate > device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ bfqd->RT_prod = R_fast[dev_type] * -+ T_fast[dev_type]; -+ } - -- bfq_clear_bfqq_budget_new(bfqq); -- bfqq->budget_timeout = jiffies + -- bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; -+ bfq_log(bfqd, -+"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec", -+ dev_type == 0 ? "ROT" : "NONROT", -+ bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW", -+ bfqd->device_speed == BFQ_BFQD_FAST ? -+ (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT : -+ (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT, -+ (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>> -+ BFQ_RATE_SHIFT); -+} - -- bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", -- jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * -- timeout_coeff)); -+static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq) -+{ -+ if (rq != NULL) { /* new rq dispatch now, reset accordingly */ -+ bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ; -+ bfqd->peak_rate_samples = 1; -+ bfqd->sequential_samples = 0; -+ bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size = -+ blk_rq_sectors(rq); -+ } else /* no new rq dispatched, just reset the number of samples */ -+ bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ -+ -+ bfq_log(bfqd, -+ "reset_rate_computation at end, sample %u/%u tot_sects %llu", -+ bfqd->peak_rate_samples, bfqd->sequential_samples, -+ bfqd->tot_sectors_dispatched); - } - --/* -- * Move request from internal lists to the request queue dispatch list. -- */ --static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -+static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) - { -- struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ u32 rate, weight, divisor; - - /* -- * For consistency, the next instruction should have been executed -- * after removing the request from the queue and dispatching it. -- * We execute instead this instruction before bfq_remove_request() -- * (and hence introduce a temporary inconsistency), for efficiency. -- * In fact, in a forced_dispatch, this prevents two counters related -- * to bfqq->dispatched to risk to be uselessly decremented if bfqq -- * is not in service, and then to be incremented again after -- * incrementing bfqq->dispatched. -+ * For the convergence property to hold (see comments on -+ * bfq_update_peak_rate()) and for the assessment to be -+ * reliable, a minimum number of samples must be present, and -+ * a minimum amount of time must have elapsed. If not so, do -+ * not compute new rate. Just reset parameters, to get ready -+ * for a new evaluation attempt. - */ -- bfqq->dispatched++; -- bfq_remove_request(rq); -- elv_dispatch_sort(q, rq); -+ if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || -+ bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { -+ bfq_log(bfqd, -+ "update_rate_reset: only resetting, delta_first %lluus samples %d", -+ bfqd->delta_from_first>>10, bfqd->peak_rate_samples); -+ goto reset_computation; -+ } - -- if (bfq_bfqq_sync(bfqq)) -- bfqd->sync_flight++; --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq), -- rq->cmd_flags); --#endif -+ /* -+ * If a new request completion has occurred after last -+ * dispatch, then, to approximate the rate at which requests -+ * have been served by the device, it is more precise to -+ * extend the observation interval to the last completion. -+ */ -+ bfqd->delta_from_first = -+ max_t(u64, bfqd->delta_from_first, -+ bfqd->last_completion - bfqd->first_dispatch); -+ -+ BUG_ON(bfqd->delta_from_first == 0); -+ /* -+ * Rate computed in sects/usec, and not sects/nsec, for -+ * precision issues. -+ */ -+ rate = div64_ul(bfqd->tot_sectors_dispatched<<BFQ_RATE_SHIFT, -+ div_u64(bfqd->delta_from_first, NSEC_PER_USEC)); -+ -+ bfq_log(bfqd, -+"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", -+ bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, -+ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), -+ rate > 20<<BFQ_RATE_SHIFT); -+ -+ /* -+ * Peak rate not updated if: -+ * - the percentage of sequential dispatches is below 3/4 of the -+ * total, and rate is below the current estimated peak rate -+ * - rate is unreasonably high (> 20M sectors/sec) -+ */ -+ if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 && -+ rate <= bfqd->peak_rate) || -+ rate > 20<<BFQ_RATE_SHIFT) { -+ bfq_log(bfqd, -+ "update_rate_reset: goto reset, samples %u/%u rate/peak %llu/%llu", -+ bfqd->peak_rate_samples, bfqd->sequential_samples, -+ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), -+ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); -+ goto reset_computation; -+ } else { -+ bfq_log(bfqd, -+ "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", -+ bfqd->peak_rate_samples, bfqd->sequential_samples, -+ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), -+ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); -+ } -+ -+ /* -+ * We have to update the peak rate, at last! To this purpose, -+ * we use a low-pass filter. We compute the smoothing constant -+ * of the filter as a function of the 'weight' of the new -+ * measured rate. -+ * -+ * As can be seen in next formulas, we define this weight as a -+ * quantity proportional to how sequential the workload is, -+ * and to how long the observation time interval is. -+ * -+ * The weight runs from 0 to 8. The maximum value of the -+ * weight, 8, yields the minimum value for the smoothing -+ * constant. At this minimum value for the smoothing constant, -+ * the measured rate contributes for half of the next value of -+ * the estimated peak rate. -+ * -+ * So, the first step is to compute the weight as a function -+ * of how sequential the workload is. Note that the weight -+ * cannot reach 9, because bfqd->sequential_samples cannot -+ * become equal to bfqd->peak_rate_samples, which, in its -+ * turn, holds true because bfqd->sequential_samples is not -+ * incremented for the first sample. -+ */ -+ weight = (9 * bfqd->sequential_samples) / bfqd->peak_rate_samples; -+ -+ /* -+ * Second step: further refine the weight as a function of the -+ * duration of the observation interval. -+ */ -+ weight = min_t(u32, 8, -+ div_u64(weight * bfqd->delta_from_first, -+ BFQ_RATE_REF_INTERVAL)); -+ -+ /* -+ * Divisor ranging from 10, for minimum weight, to 2, for -+ * maximum weight. -+ */ -+ divisor = 10 - weight; -+ BUG_ON(divisor == 0); -+ -+ /* -+ * Finally, update peak rate: -+ * -+ * peak_rate = peak_rate * (divisor-1) / divisor + rate / divisor -+ */ -+ bfqd->peak_rate *= divisor-1; -+ bfqd->peak_rate /= divisor; -+ rate /= divisor; /* smoothing constant alpha = 1/divisor */ -+ -+ bfq_log(bfqd, -+ "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", -+ divisor, -+ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), -+ (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); -+ -+ BUG_ON(bfqd->peak_rate == 0); -+ BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT); -+ -+ bfqd->peak_rate += rate; -+ update_thr_responsiveness_params(bfqd); -+ BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT); -+ -+reset_computation: -+ bfq_reset_rate_computation(bfqd, rq); - } - - /* -- * Return expired entry, or NULL to just start from scratch in rbtree. -+ * Update the read/write peak rate (the main quantity used for -+ * auto-tuning, see update_thr_responsiveness_params()). -+ * -+ * It is not trivial to estimate the peak rate (correctly): because of -+ * the presence of sw and hw queues between the scheduler and the -+ * device components that finally serve I/O requests, it is hard to -+ * say exactly when a given dispatched request is served inside the -+ * device, and for how long. As a consequence, it is hard to know -+ * precisely at what rate a given set of requests is actually served -+ * by the device. -+ * -+ * On the opposite end, the dispatch time of any request is trivially -+ * available, and, from this piece of information, the "dispatch rate" -+ * of requests can be immediately computed. So, the idea in the next -+ * function is to use what is known, namely request dispatch times -+ * (plus, when useful, request completion times), to estimate what is -+ * unknown, namely in-device request service rate. -+ * -+ * The main issue is that, because of the above facts, the rate at -+ * which a certain set of requests is dispatched over a certain time -+ * interval can vary greatly with respect to the rate at which the -+ * same requests are then served. But, since the size of any -+ * intermediate queue is limited, and the service scheme is lossless -+ * (no request is silently dropped), the following obvious convergence -+ * property holds: the number of requests dispatched MUST become -+ * closer and closer to the number of requests completed as the -+ * observation interval grows. This is the key property used in -+ * the next function to estimate the peak service rate as a function -+ * of the observed dispatch rate. The function assumes to be invoked -+ * on every request dispatch. - */ --static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -+static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) - { -- struct request *rq = NULL; -+ u64 now_ns = ktime_get_ns(); -+ -+ if (bfqd->peak_rate_samples == 0) { /* first dispatch */ -+ bfq_log(bfqd, -+ "update_peak_rate: goto reset, samples %d", -+ bfqd->peak_rate_samples) ; -+ bfq_reset_rate_computation(bfqd, rq); -+ goto update_last_values; /* will add one sample */ -+ } - -- if (bfq_bfqq_fifo_expire(bfqq)) -- return NULL; -+ /* -+ * Device idle for very long: the observation interval lasting -+ * up to this dispatch cannot be a valid observation interval -+ * for computing a new peak rate (similarly to the late- -+ * completion event in bfq_completed_request()). Go to -+ * update_rate_and_reset to have the following three steps -+ * taken: -+ * - close the observation interval at the last (previous) -+ * request dispatch or completion -+ * - compute rate, if possible, for that observation interval -+ * - start a new observation interval with this dispatch -+ */ -+ if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && -+ bfqd->rq_in_driver == 0) { -+ bfq_log(bfqd, -+"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", -+ (now_ns - bfqd->last_dispatch)>>10, -+ bfqd->peak_rate_samples) ; -+ goto update_rate_and_reset; -+ } - -- bfq_mark_bfqq_fifo_expire(bfqq); -+ /* Update sampling information */ -+ bfqd->peak_rate_samples++; - -- if (list_empty(&bfqq->fifo)) -- return NULL; -+ if ((bfqd->rq_in_driver > 0 || -+ now_ns - bfqd->last_completion < BFQ_MIN_TT) -+ && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR) -+ bfqd->sequential_samples++; - -- rq = rq_entry_fifo(bfqq->fifo.next); -+ bfqd->tot_sectors_dispatched += blk_rq_sectors(rq); - -- if (time_before(jiffies, rq->fifo_time)) -- return NULL; -+ /* Reset max observed rq size every 32 dispatches */ -+ if (likely(bfqd->peak_rate_samples % 32)) -+ bfqd->last_rq_max_size = -+ max_t(u32, blk_rq_sectors(rq), bfqd->last_rq_max_size); -+ else -+ bfqd->last_rq_max_size = blk_rq_sectors(rq); - -- return rq; -+ bfqd->delta_from_first = now_ns - bfqd->first_dispatch; -+ -+ bfq_log(bfqd, -+ "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", -+ bfqd->peak_rate_samples, bfqd->sequential_samples, -+ bfqd->tot_sectors_dispatched, -+ bfqd->delta_from_first>>10); -+ -+ /* Target observation interval not yet reached, go on sampling */ -+ if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL) -+ goto update_last_values; -+ -+update_rate_and_reset: -+ bfq_update_rate_reset(bfqd, rq); -+update_last_values: -+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ bfqd->last_dispatch = now_ns; -+ -+ bfq_log(bfqd, -+ "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", -+ (now_ns - bfqd->first_dispatch)>>10, -+ (unsigned long long) bfqd->last_position, -+ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); -+ bfq_log(bfqd, -+ "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); - } - --static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) -+/* -+ * Move request from internal lists to the dispatch list of the request queue -+ */ -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) - { -- struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); - -- return entity->budget - entity->service; -+ /* -+ * For consistency, the next instruction should have been executed -+ * after removing the request from the queue and dispatching it. -+ * We execute instead this instruction before bfq_remove_request() -+ * (and hence introduce a temporary inconsistency), for efficiency. -+ * In fact, in a forced_dispatch, this prevents two counters related -+ * to bfqq->dispatched to risk to be uselessly decremented if bfqq -+ * is not in service, and then to be incremented again after -+ * incrementing bfqq->dispatched. -+ */ -+ bfqq->dispatched++; -+ bfq_update_peak_rate(q->elevator->elevator_data, rq); -+ -+ bfq_remove_request(rq); -+ elv_dispatch_sort(q, rq); - } - - static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { - BUG_ON(bfqq != bfqd->in_service_queue); - -- __bfq_bfqd_reset_in_service(bfqd); -- - /* - * If this bfqq is shared between multiple processes, check - * to make sure that those processes are still issuing I/Os -@@ -1851,20 +2671,30 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_mark_bfqq_split_coop(bfqq); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -- /* -- * Overloading budget_timeout field to store the time -- * at which the queue remains with no backlog; used by -- * the weight-raising mechanism. -- */ -- bfqq->budget_timeout = jiffies; -- bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ if (bfqq->dispatched == 0) -+ /* -+ * Overloading budget_timeout field to store -+ * the time at which the queue remains with no -+ * backlog and no outstanding request; used by -+ * the weight-raising mechanism. -+ */ -+ bfqq->budget_timeout = jiffies; -+ -+ bfq_del_bfqq_busy(bfqd, bfqq, true); - } else { -- bfq_activate_bfqq(bfqd, bfqq); -+ bfq_requeue_bfqq(bfqd, bfqq); - /* - * Resort priority tree of potential close cooperators. - */ - bfq_pos_tree_add_move(bfqd, bfqq); - } -+ -+ /* -+ * All in-service entities must have been properly deactivated -+ * or requeued before executing the next function, which -+ * resets all in-service entites as no more in service. -+ */ -+ __bfq_bfqd_reset_in_service(bfqd); - } - - /** -@@ -1883,10 +2713,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - struct request *next_rq; - int budget, min_budget; - -- budget = bfqq->max_budget; -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ - min_budget = bfq_min_budget(bfqd); - -- BUG_ON(bfqq != bfqd->in_service_queue); -+ if (bfqq->wr_coeff == 1) -+ budget = bfqq->max_budget; -+ else /* -+ * Use a constant, low budget for weight-raised queues, -+ * to help achieve a low latency. Keep it slightly higher -+ * than the minimum possible budget, to cause a little -+ * bit fewer expirations. -+ */ -+ budget = 2 * min_budget; - - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", - bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); -@@ -1895,7 +2734,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", - bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); - -- if (bfq_bfqq_sync(bfqq)) { -+ if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { - switch (reason) { - /* - * Caveat: in all the following cases we trade latency -@@ -1937,14 +2776,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - break; - case BFQ_BFQQ_BUDGET_TIMEOUT: - /* -- * We double the budget here because: 1) it -- * gives the chance to boost the throughput if -- * this is not a seeky process (which may have -- * bumped into this timeout because of, e.g., -- * ZBR), 2) together with charge_full_budget -- * it helps give seeky processes higher -- * timestamps, and hence be served less -- * frequently. -+ * We double the budget here because it gives -+ * the chance to boost the throughput if this -+ * is not a seeky process (and has bumped into -+ * this timeout because of, e.g., ZBR). - */ - budget = min(budget * 2, bfqd->bfq_max_budget); - break; -@@ -1961,17 +2796,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - budget = min(budget * 4, bfqd->bfq_max_budget); - break; - case BFQ_BFQQ_NO_MORE_REQUESTS: -- /* -- * Leave the budget unchanged. -- */ -+ /* -+ * For queues that expire for this reason, it -+ * is particularly important to keep the -+ * budget close to the actual service they -+ * need. Doing so reduces the timestamp -+ * misalignment problem described in the -+ * comments in the body of -+ * __bfq_activate_entity. In fact, suppose -+ * that a queue systematically expires for -+ * BFQ_BFQQ_NO_MORE_REQUESTS and presents a -+ * new request in time to enjoy timestamp -+ * back-shifting. The larger the budget of the -+ * queue is with respect to the service the -+ * queue actually requests in each service -+ * slot, the more times the queue can be -+ * reactivated with the same virtual finish -+ * time. It follows that, even if this finish -+ * time is pushed to the system virtual time -+ * to reduce the consequent timestamp -+ * misalignment, the queue unjustly enjoys for -+ * many re-activations a lower finish time -+ * than all newly activated queues. -+ * -+ * The service needed by bfqq is measured -+ * quite precisely by bfqq->entity.service. -+ * Since bfqq does not enjoy device idling, -+ * bfqq->entity.service is equal to the number -+ * of sectors that the process associated with -+ * bfqq requested to read/write before waiting -+ * for request completions, or blocking for -+ * other reasons. -+ */ -+ budget = max_t(int, bfqq->entity.service, min_budget); -+ break; - default: - return; - } -- } else -+ } else if (!bfq_bfqq_sync(bfqq)) - /* -- * Async queues get always the maximum possible budget -- * (their ability to dispatch is limited by -- * @bfqd->bfq_max_budget_async_rq). -+ * Async queues get always the maximum possible -+ * budget, as for them we do not care about latency -+ * (in addition, their ability to dispatch is limited -+ * by the charging factor). - */ - budget = bfqd->bfq_max_budget; - -@@ -1982,160 +2849,120 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget); - - /* -- * Make sure that we have enough budget for the next request. -- * Since the finish time of the bfqq must be kept in sync with -- * the budget, be sure to call __bfq_bfqq_expire() after the -+ * If there is still backlog, then assign a new budget, making -+ * sure that it is large enough for the next request. Since -+ * the finish time of bfqq must be kept in sync with the -+ * budget, be sure to call __bfq_bfqq_expire() *after* this - * update. -+ * -+ * If there is no backlog, then no need to update the budget; -+ * it will be updated on the arrival of a new request. - */ - next_rq = bfqq->next_rq; -- if (next_rq) -+ if (next_rq) { -+ BUG_ON(reason == BFQ_BFQQ_TOO_IDLE || -+ reason == BFQ_BFQQ_NO_MORE_REQUESTS); - bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); -- else -- bfqq->entity.budget = bfqq->max_budget; -+ BUG_ON(!bfq_bfqq_busy(bfqq)); -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ } - - bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d", - next_rq ? blk_rq_sectors(next_rq) : 0, - bfqq->entity.budget); - } - --static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) --{ -- unsigned long max_budget; -- -- /* -- * The max_budget calculated when autotuning is equal to the -- * amount of sectors transfered in timeout_sync at the -- * estimated peak rate. -- */ -- max_budget = (unsigned long)(peak_rate * 1000 * -- timeout >> BFQ_RATE_SHIFT); -- -- return max_budget; --} -- - /* -- * In addition to updating the peak rate, checks whether the process -- * is "slow", and returns 1 if so. This slow flag is used, in addition -- * to the budget timeout, to reduce the amount of service provided to -- * seeky processes, and hence reduce their chances to lower the -- * throughput. See the code for more details. -+ * Return true if the process associated with bfqq is "slow". The slow -+ * flag is used, in addition to the budget timeout, to reduce the -+ * amount of service provided to seeky processes, and thus reduce -+ * their chances to lower the throughput. More details in the comments -+ * on the function bfq_bfqq_expire(). -+ * -+ * An important observation is in order: as discussed in the comments -+ * on the function bfq_update_peak_rate(), with devices with internal -+ * queues, it is hard if ever possible to know when and for how long -+ * an I/O request is processed by the device (apart from the trivial -+ * I/O pattern where a new request is dispatched only after the -+ * previous one has been completed). This makes it hard to evaluate -+ * the real rate at which the I/O requests of each bfq_queue are -+ * served. In fact, for an I/O scheduler like BFQ, serving a -+ * bfq_queue means just dispatching its requests during its service -+ * slot (i.e., until the budget of the queue is exhausted, or the -+ * queue remains idle, or, finally, a timeout fires). But, during the -+ * service slot of a bfq_queue, around 100 ms at most, the device may -+ * be even still processing requests of bfq_queues served in previous -+ * service slots. On the opposite end, the requests of the in-service -+ * bfq_queue may be completed after the service slot of the queue -+ * finishes. -+ * -+ * Anyway, unless more sophisticated solutions are used -+ * (where possible), the sum of the sizes of the requests dispatched -+ * during the service slot of a bfq_queue is probably the only -+ * approximation available for the service received by the bfq_queue -+ * during its service slot. And this sum is the quantity used in this -+ * function to evaluate the I/O speed of a process. - */ --static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- bool compensate, enum bfqq_expiration reason) -+static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ bool compensate, enum bfqq_expiration reason, -+ unsigned long *delta_ms) - { -- u64 bw, usecs, expected, timeout; -- ktime_t delta; -- int update = 0; -+ ktime_t delta_ktime; -+ u32 delta_usecs; -+ bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */ - -- if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) -+ if (!bfq_bfqq_sync(bfqq)) - return false; - - if (compensate) -- delta = bfqd->last_idling_start; -+ delta_ktime = bfqd->last_idling_start; - else -- delta = ktime_get(); -- delta = ktime_sub(delta, bfqd->last_budget_start); -- usecs = ktime_to_us(delta); -- -- /* Don't trust short/unrealistic values. */ -- if (usecs < 100 || usecs >= LONG_MAX) -- return false; -- -- /* -- * Calculate the bandwidth for the last slice. We use a 64 bit -- * value to store the peak rate, in sectors per usec in fixed -- * point math. We do so to have enough precision in the estimate -- * and to avoid overflows. -- */ -- bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; -- do_div(bw, (unsigned long)usecs); -+ delta_ktime = ktime_get(); -+ delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start); -+ delta_usecs = ktime_to_us(delta_ktime); -+ -+ /* don't use too short time intervals */ -+ if (delta_usecs < 1000) { -+ if (blk_queue_nonrot(bfqd->queue)) -+ /* -+ * give same worst-case guarantees as idling -+ * for seeky -+ */ -+ *delta_ms = BFQ_MIN_TT / NSEC_PER_MSEC; -+ else /* charge at least one seek */ -+ *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; -+ -+ bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); -+ -+ return slow; -+ } - -- timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ *delta_ms = delta_usecs / USEC_PER_MSEC; - - /* -- * Use only long (> 20ms) intervals to filter out spikes for -- * the peak rate estimation. -+ * Use only long (> 20ms) intervals to filter out excessive -+ * spikes in service rate estimation. - */ -- if (usecs > 20000) { -- if (bw > bfqd->peak_rate || -- (!BFQQ_SEEKY(bfqq) && -- reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { -- bfq_log(bfqd, "measured bw =%llu", bw); -- /* -- * To smooth oscillations use a low-pass filter with -- * alpha=7/8, i.e., -- * new_rate = (7/8) * old_rate + (1/8) * bw -- */ -- do_div(bw, 8); -- if (bw == 0) -- return 0; -- bfqd->peak_rate *= 7; -- do_div(bfqd->peak_rate, 8); -- bfqd->peak_rate += bw; -- update = 1; -- bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); -- } -- -- update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; -- -- if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) -- bfqd->peak_rate_samples++; -- -- if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && -- update) { -- int dev_type = blk_queue_nonrot(bfqd->queue); -- -- if (bfqd->bfq_user_max_budget == 0) { -- bfqd->bfq_max_budget = -- bfq_calc_max_budget(bfqd->peak_rate, -- timeout); -- bfq_log(bfqd, "new max_budget=%d", -- bfqd->bfq_max_budget); -- } -- if (bfqd->device_speed == BFQ_BFQD_FAST && -- bfqd->peak_rate < device_speed_thresh[dev_type]) { -- bfqd->device_speed = BFQ_BFQD_SLOW; -- bfqd->RT_prod = R_slow[dev_type] * -- T_slow[dev_type]; -- } else if (bfqd->device_speed == BFQ_BFQD_SLOW && -- bfqd->peak_rate > device_speed_thresh[dev_type]) { -- bfqd->device_speed = BFQ_BFQD_FAST; -- bfqd->RT_prod = R_fast[dev_type] * -- T_fast[dev_type]; -- } -- } -+ if (delta_usecs > 20000) { -+ /* -+ * Caveat for rotational devices: processes doing I/O -+ * in the slower disk zones tend to be slow(er) even -+ * if not seeky. In this respect, the estimated peak -+ * rate is likely to be an average over the disk -+ * surface. Accordingly, to not be too harsh with -+ * unlucky processes, a process is deemed slow only if -+ * its rate has been lower than half of the estimated -+ * peak rate. -+ */ -+ slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; -+ bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", -+ bfqq->entity.service, bfqd->bfq_max_budget); - } - -- /* -- * If the process has been served for a too short time -- * interval to let its possible sequential accesses prevail on -- * the initial seek time needed to move the disk head on the -- * first sector it requested, then give the process a chance -- * and for the moment return false. -- */ -- if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) -- return false; -- -- /* -- * A process is considered ``slow'' (i.e., seeky, so that we -- * cannot treat it fairly in the service domain, as it would -- * slow down too much the other processes) if, when a slice -- * ends for whatever reason, it has received service at a -- * rate that would not be high enough to complete the budget -- * before the budget timeout expiration. -- */ -- expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; -+ bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); - -- /* -- * Caveat: processes doing IO in the slower disk zones will -- * tend to be slow(er) even if not seeky. And the estimated -- * peak rate will actually be an average over the disk -- * surface. Hence, to not be too harsh with unlucky processes, -- * we keep a budget/3 margin of safety before declaring a -- * process slow. -- */ -- return expected > (4 * bfqq->entity.budget) / 3; -+ return slow; - } - - /* -@@ -2193,20 +3020,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, - struct bfq_queue *bfqq) - { -+ bfq_log_bfqq(bfqd, bfqq, -+"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", -+ bfqq->service_from_backlogged, -+ bfqd->bfq_wr_max_softrt_rate, -+ jiffies_to_msecs(HZ * bfqq->service_from_backlogged / -+ bfqd->bfq_wr_max_softrt_rate)); -+ - return max(bfqq->last_idle_bklogged + - HZ * bfqq->service_from_backlogged / - bfqd->bfq_wr_max_softrt_rate, -- jiffies + bfqq->bfqd->bfq_slice_idle + 4); -+ jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); -+} -+ -+/* -+ * Return the farthest future time instant according to jiffies -+ * macros. -+ */ -+static unsigned long bfq_greatest_from_now(void) -+{ -+ return jiffies + MAX_JIFFY_OFFSET; - } - - /* -- * Return the largest-possible time instant such that, for as long as possible, -- * the current time will be lower than this time instant according to the macro -- * time_is_before_jiffies(). -+ * Return the farthest past time instant according to jiffies -+ * macros. - */ --static unsigned long bfq_infinity_from_now(unsigned long now) -+static unsigned long bfq_smallest_from_now(void) - { -- return now + ULONG_MAX / 2; -+ return jiffies - MAX_JIFFY_OFFSET; - } - - /** -@@ -2216,28 +3058,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now) - * @compensate: if true, compensate for the time spent idling. - * @reason: the reason causing the expiration. - * -+ * If the process associated with bfqq does slow I/O (e.g., because it -+ * issues random requests), we charge bfqq with the time it has been -+ * in service instead of the service it has received (see -+ * bfq_bfqq_charge_time for details on how this goal is achieved). As -+ * a consequence, bfqq will typically get higher timestamps upon -+ * reactivation, and hence it will be rescheduled as if it had -+ * received more service than what it has actually received. In the -+ * end, bfqq receives less service in proportion to how slowly its -+ * associated process consumes its budgets (and hence how seriously it -+ * tends to lower the throughput). In addition, this time-charging -+ * strategy guarantees time fairness among slow processes. In -+ * contrast, if the process associated with bfqq is not slow, we -+ * charge bfqq exactly with the service it has received. - * -- * If the process associated to the queue is slow (i.e., seeky), or in -- * case of budget timeout, or, finally, if it is async, we -- * artificially charge it an entire budget (independently of the -- * actual service it received). As a consequence, the queue will get -- * higher timestamps than the correct ones upon reactivation, and -- * hence it will be rescheduled as if it had received more service -- * than what it actually received. In the end, this class of processes -- * will receive less service in proportion to how slowly they consume -- * their budgets (and hence how seriously they tend to lower the -- * throughput). -- * -- * In contrast, when a queue expires because it has been idling for -- * too much or because it exhausted its budget, we do not touch the -- * amount of service it has received. Hence when the queue will be -- * reactivated and its timestamps updated, the latter will be in sync -- * with the actual service received by the queue until expiration. -- * -- * Charging a full budget to the first type of queues and the exact -- * service to the others has the effect of using the WF2Q+ policy to -- * schedule the former on a timeslice basis, without violating the -- * service domain guarantees of the latter. -+ * Charging time to the first type of queues and the exact service to -+ * the other has the effect of using the WF2Q+ policy to schedule the -+ * former on a timeslice basis, without violating service domain -+ * guarantees among the latter. - */ - static void bfq_bfqq_expire(struct bfq_data *bfqd, - struct bfq_queue *bfqq, -@@ -2245,41 +3083,53 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - enum bfqq_expiration reason) - { - bool slow; -+ unsigned long delta = 0; -+ struct bfq_entity *entity = &bfqq->entity; -+ int ref; - - BUG_ON(bfqq != bfqd->in_service_queue); - - /* -- * Update disk peak rate for autotuning and check whether the -- * process is slow (see bfq_update_peak_rate). -+ * Check whether the process is slow (see bfq_bfqq_is_slow). - */ -- slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); -+ slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); - - /* -- * As above explained, 'punish' slow (i.e., seeky), timed-out -- * and async queues, to favor sequential sync workloads. -- * -- * Processes doing I/O in the slower disk zones will tend to be -- * slow(er) even if not seeky. Hence, since the estimated peak -- * rate is actually an average over the disk surface, these -- * processes may timeout just for bad luck. To avoid punishing -- * them we do not charge a full budget to a process that -- * succeeded in consuming at least 2/3 of its budget. -+ * Increase service_from_backlogged before next statement, -+ * because the possible next invocation of -+ * bfq_bfqq_charge_time would likely inflate -+ * entity->service. In contrast, service_from_backlogged must -+ * contain real service, to enable the soft real-time -+ * heuristic to correctly compute the bandwidth consumed by -+ * bfqq. - */ -- if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && -- bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) -- bfq_bfqq_charge_full_budget(bfqq); -+ bfqq->service_from_backlogged += entity->service; - -- bfqq->service_from_backlogged += bfqq->entity.service; -+ /* -+ * As above explained, charge slow (typically seeky) and -+ * timed-out queues with the time and not the service -+ * received, to favor sequential workloads. -+ * -+ * Processes doing I/O in the slower disk zones will tend to -+ * be slow(er) even if not seeky. Therefore, since the -+ * estimated peak rate is actually an average over the disk -+ * surface, these processes may timeout just for bad luck. To -+ * avoid punishing them, do not charge time to processes that -+ * succeeded in consuming at least 2/3 of their budget. This -+ * allows BFQ to preserve enough elasticity to still perform -+ * bandwidth, and not time, distribution with little unlucky -+ * or quasi-sequential processes. -+ */ -+ if (bfqq->wr_coeff == 1 && -+ (slow || -+ (reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ bfq_bfqq_budget_left(bfqq) >= entity->budget / 3))) -+ bfq_bfqq_charge_time(bfqd, bfqq, delta); - -- if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && -- !bfq_bfqq_constantly_seeky(bfqq)) { -- bfq_mark_bfqq_constantly_seeky(bfqq); -- if (!blk_queue_nonrot(bfqd->queue)) -- bfqd->const_seeky_busy_in_flight_queues++; -- } -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); - - if (reason == BFQ_BFQQ_TOO_IDLE && -- bfqq->entity.service <= 2 * bfqq->entity.budget / 10) -+ entity->service <= 2 * entity->budget / 10) - bfq_clear_bfqq_IO_bound(bfqq); - - if (bfqd->low_latency && bfqq->wr_coeff == 1) -@@ -2288,19 +3138,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && - RB_EMPTY_ROOT(&bfqq->sort_list)) { - /* -- * If we get here, and there are no outstanding requests, -- * then the request pattern is isochronous (see the comments -- * to the function bfq_bfqq_softrt_next_start()). Hence we -- * can compute soft_rt_next_start. If, instead, the queue -- * still has outstanding requests, then we have to wait -- * for the completion of all the outstanding requests to -+ * If we get here, and there are no outstanding -+ * requests, then the request pattern is isochronous -+ * (see the comments on the function -+ * bfq_bfqq_softrt_next_start()). Thus we can compute -+ * soft_rt_next_start. If, instead, the queue still -+ * has outstanding requests, then we have to wait for -+ * the completion of all the outstanding requests to - * discover whether the request pattern is actually - * isochronous. - */ -- if (bfqq->dispatched == 0) -+ BUG_ON(bfqd->busy_queues < 1); -+ if (bfqq->dispatched == 0) { - bfqq->soft_rt_next_start = - bfq_bfqq_softrt_next_start(bfqd, bfqq); -- else { -+ bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu", -+ bfqq->soft_rt_next_start); -+ } else { - /* - * The application is still waiting for the - * completion of one or more requests: -@@ -2317,7 +3171,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - * happened to be in the past. - */ - bfqq->soft_rt_next_start = -- bfq_infinity_from_now(jiffies); -+ bfq_greatest_from_now(); - /* - * Schedule an update of soft_rt_next_start to when - * the task may be discovered to be isochronous. -@@ -2327,15 +3181,30 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - } - - bfq_log_bfqq(bfqd, bfqq, -- "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, -- slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); -+ "expire (%d, slow %d, num_disp %d, idle_win %d, weight %d)", -+ reason, slow, bfqq->dispatched, -+ bfq_bfqq_idle_window(bfqq), entity->weight); - - /* - * Increase, decrease or leave budget unchanged according to - * reason. - */ -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); - __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); -+ BUG_ON(bfqq->next_rq == NULL && -+ bfqq->entity.budget < bfqq->entity.service); -+ ref = bfqq->ref; - __bfq_bfqq_expire(bfqd, bfqq); -+ -+ BUG_ON(ref > 1 && -+ !bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED && -+ !bfq_class_idle(bfqq)); -+ -+ /* mark bfqq as waiting a request only if a bic still points to it */ -+ if (ref > 1 && !bfq_bfqq_busy(bfqq) && -+ reason != BFQ_BFQQ_BUDGET_TIMEOUT && -+ reason != BFQ_BFQQ_BUDGET_EXHAUSTED) -+ bfq_mark_bfqq_non_blocking_wait_rq(bfqq); - } - - /* -@@ -2345,20 +3214,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - */ - static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) - { -- if (bfq_bfqq_budget_new(bfqq) || -- time_before(jiffies, bfqq->budget_timeout)) -- return false; -- return true; -+ return time_is_before_eq_jiffies(bfqq->budget_timeout); - } - - /* -- * If we expire a queue that is waiting for the arrival of a new -- * request, we may prevent the fictitious timestamp back-shifting that -- * allows the guarantees of the queue to be preserved (see [1] for -- * this tricky aspect). Hence we return true only if this condition -- * does not hold, or if the queue is slow enough to deserve only to be -- * kicked off for preserving a high throughput. --*/ -+ * If we expire a queue that is actively waiting (i.e., with the -+ * device idled) for the arrival of a new request, then we may incur -+ * the timestamp misalignment problem described in the body of the -+ * function __bfq_activate_entity. Hence we return true only if this -+ * condition does not hold, or if the queue is slow enough to deserve -+ * only to be kicked off for preserving a high throughput. -+ */ - static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) - { - bfq_log_bfqq(bfqq->bfqd, bfqq, -@@ -2400,10 +3266,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - { - struct bfq_data *bfqd = bfqq->bfqd; - bool idling_boosts_thr, idling_boosts_thr_without_issues, -- all_queues_seeky, on_hdd_and_not_all_queues_seeky, - idling_needed_for_service_guarantees, - asymmetric_scenario; - -+ if (bfqd->strict_guarantees) -+ return true; -+ - /* - * The next variable takes into account the cases where idling - * boosts the throughput. -@@ -2466,74 +3334,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - bfqd->wr_busy_queues == 0; - - /* -- * There are then two cases where idling must be performed not -+ * There is then a case where idling must be performed not - * for throughput concerns, but to preserve service -- * guarantees. In the description of these cases, we say, for -- * short, that a queue is sequential/random if the process -- * associated to the queue issues sequential/random requests -- * (in the second case the queue may be tagged as seeky or -- * even constantly_seeky). -+ * guarantees. - * -- * To introduce the first case, we note that, since -- * bfq_bfqq_idle_window(bfqq) is false if the device is -- * NCQ-capable and bfqq is random (see -- * bfq_update_idle_window()), then, from the above two -- * assignments it follows that -- * idling_boosts_thr_without_issues is false if the device is -- * NCQ-capable and bfqq is random. Therefore, for this case, -- * device idling would never be allowed if we used just -- * idling_boosts_thr_without_issues to decide whether to allow -- * it. And, beneficially, this would imply that throughput -- * would always be boosted also with random I/O on NCQ-capable -- * HDDs. -- * -- * But we must be careful on this point, to avoid an unfair -- * treatment for bfqq. In fact, because of the same above -- * assignments, idling_boosts_thr_without_issues is, on the -- * other hand, true if 1) the device is an HDD and bfqq is -- * sequential, and 2) there are no busy weight-raised -- * queues. As a consequence, if we used just -- * idling_boosts_thr_without_issues to decide whether to idle -- * the device, then with an HDD we might easily bump into a -- * scenario where queues that are sequential and I/O-bound -- * would enjoy idling, whereas random queues would not. The -- * latter might then get a low share of the device throughput, -- * simply because the former would get many requests served -- * after being set as in service, while the latter would not. -- * -- * To address this issue, we start by setting to true a -- * sentinel variable, on_hdd_and_not_all_queues_seeky, if the -- * device is rotational and not all queues with pending or -- * in-flight requests are constantly seeky (i.e., there are -- * active sequential queues, and bfqq might then be mistreated -- * if it does not enjoy idling because it is random). -- */ -- all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) && -- bfqd->busy_in_flight_queues == -- bfqd->const_seeky_busy_in_flight_queues; -- -- on_hdd_and_not_all_queues_seeky = -- !blk_queue_nonrot(bfqd->queue) && !all_queues_seeky; -- -- /* -- * To introduce the second case where idling needs to be -- * performed to preserve service guarantees, we can note that -- * allowing the drive to enqueue more than one request at a -- * time, and hence delegating de facto final scheduling -- * decisions to the drive's internal scheduler, causes loss of -- * control on the actual request service order. In particular, -- * the critical situation is when requests from different -- * processes happens to be present, at the same time, in the -- * internal queue(s) of the drive. In such a situation, the -- * drive, by deciding the service order of the -- * internally-queued requests, does determine also the actual -- * throughput distribution among these processes. But the -- * drive typically has no notion or concern about per-process -- * throughput distribution, and makes its decisions only on a -- * per-request basis. Therefore, the service distribution -- * enforced by the drive's internal scheduler is likely to -- * coincide with the desired device-throughput distribution -- * only in a completely symmetric scenario where: -+ * To introduce this case, we can note that allowing the drive -+ * to enqueue more than one request at a time, and hence -+ * delegating de facto final scheduling decisions to the -+ * drive's internal scheduler, entails loss of control on the -+ * actual request service order. In particular, the critical -+ * situation is when requests from different processes happen -+ * to be present, at the same time, in the internal queue(s) -+ * of the drive. In such a situation, the drive, by deciding -+ * the service order of the internally-queued requests, does -+ * determine also the actual throughput distribution among -+ * these processes. But the drive typically has no notion or -+ * concern about per-process throughput distribution, and -+ * makes its decisions only on a per-request basis. Therefore, -+ * the service distribution enforced by the drive's internal -+ * scheduler is likely to coincide with the desired -+ * device-throughput distribution only in a completely -+ * symmetric scenario where: - * (i) each of these processes must get the same throughput as - * the others; - * (ii) all these processes have the same I/O pattern -@@ -2555,26 +3376,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - * words, only if sub-condition (i) holds, then idling is - * allowed, and the device tends to be prevented from queueing - * many requests, possibly of several processes. The reason -- * for not controlling also sub-condition (ii) is that, first, -- * in the case of an HDD, the asymmetry in terms of types of -- * I/O patterns is already taken in to account in the above -- * sentinel variable -- * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a -- * flash-based device, we prefer however to privilege -- * throughput (and idling lowers throughput for this type of -- * devices), for the following reasons: -- * 1) differently from HDDs, the service time of random -- * requests is not orders of magnitudes lower than the service -- * time of sequential requests; thus, even if processes doing -- * sequential I/O get a preferential treatment with respect to -- * others doing random I/O, the consequences are not as -- * dramatic as with HDDs; -- * 2) if a process doing random I/O does need strong -- * throughput guarantees, it is hopefully already being -- * weight-raised, or the user is likely to have assigned it a -- * higher weight than the other processes (and thus -- * sub-condition (i) is likely to be false, which triggers -- * idling). -+ * for not controlling also sub-condition (ii) is that we -+ * exploit preemption to preserve guarantees in case of -+ * symmetric scenarios, even if (ii) does not hold, as -+ * explained in the next two paragraphs. -+ * -+ * Even if a queue, say Q, is expired when it remains idle, Q -+ * can still preempt the new in-service queue if the next -+ * request of Q arrives soon (see the comments on -+ * bfq_bfqq_update_budg_for_activation). If all queues and -+ * groups have the same weight, this form of preemption, -+ * combined with the hole-recovery heuristic described in the -+ * comments on function bfq_bfqq_update_budg_for_activation, -+ * are enough to preserve a correct bandwidth distribution in -+ * the mid term, even without idling. In fact, even if not -+ * idling allows the internal queues of the device to contain -+ * many requests, and thus to reorder requests, we can rather -+ * safely assume that the internal scheduler still preserves a -+ * minimum of mid-term fairness. The motivation for using -+ * preemption instead of idling is that, by not idling, -+ * service guarantees are preserved without minimally -+ * sacrificing throughput. In other words, both a high -+ * throughput and its desired distribution are obtained. -+ * -+ * More precisely, this preemption-based, idleless approach -+ * provides fairness in terms of IOPS, and not sectors per -+ * second. This can be seen with a simple example. Suppose -+ * that there are two queues with the same weight, but that -+ * the first queue receives requests of 8 sectors, while the -+ * second queue receives requests of 1024 sectors. In -+ * addition, suppose that each of the two queues contains at -+ * most one request at a time, which implies that each queue -+ * always remains idle after it is served. Finally, after -+ * remaining idle, each queue receives very quickly a new -+ * request. It follows that the two queues are served -+ * alternatively, preempting each other if needed. This -+ * implies that, although both queues have the same weight, -+ * the queue with large requests receives a service that is -+ * 1024/8 times as high as the service received by the other -+ * queue. -+ * -+ * On the other hand, device idling is performed, and thus -+ * pure sector-domain guarantees are provided, for the -+ * following queues, which are likely to need stronger -+ * throughput guarantees: weight-raised queues, and queues -+ * with a higher weight than other queues. When such queues -+ * are active, sub-condition (i) is false, which triggers -+ * device idling. - * - * According to the above considerations, the next variable is - * true (only) if sub-condition (i) holds. To compute the -@@ -2582,7 +3430,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - * the function bfq_symmetric_scenario(), but also check - * whether bfqq is being weight-raised, because - * bfq_symmetric_scenario() does not take into account also -- * weight-raised queues (see comments to -+ * weight-raised queues (see comments on - * bfq_weights_tree_add()). - * - * As a side note, it is worth considering that the above -@@ -2604,17 +3452,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - * bfqq. Such a case is when bfqq became active in a burst of - * queue activations. Queues that became active during a large - * burst benefit only from throughput, as discussed in the -- * comments to bfq_handle_burst. Thus, if bfqq became active -+ * comments on bfq_handle_burst. Thus, if bfqq became active - * in a burst and not idling the device maximizes throughput, - * then the device must no be idled, because not idling the - * device provides bfqq and all other queues in the burst with -- * maximum benefit. Combining this and the two cases above, we -- * can now establish when idling is actually needed to -- * preserve service guarantees. -+ * maximum benefit. Combining this and the above case, we can -+ * now establish when idling is actually needed to preserve -+ * service guarantees. - */ - idling_needed_for_service_guarantees = -- (on_hdd_and_not_all_queues_seeky || asymmetric_scenario) && -- !bfq_bfqq_in_large_burst(bfqq); -+ asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq); - - /* - * We have now all the components we need to compute the return -@@ -2624,6 +3471,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - * 2) idling either boosts the throughput (without issues), or - * is necessary to preserve service guarantees. - */ -+ bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", -+ bfq_bfqq_sync(bfqq), idling_boosts_thr); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", -+ bfqd->wr_busy_queues, -+ idling_boosts_thr_without_issues, -+ bfq_bfqq_IO_bound(bfqq), -+ idling_needed_for_service_guarantees); -+ - return bfq_bfqq_sync(bfqq) && - (idling_boosts_thr_without_issues || - idling_needed_for_service_guarantees); -@@ -2635,7 +3492,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - * 1) the queue must remain in service and cannot be expired, and - * 2) the device must be idled to wait for the possible arrival of a new - * request for the queue. -- * See the comments to the function bfq_bfqq_may_idle for the reasons -+ * See the comments on the function bfq_bfqq_may_idle for the reasons - * why performing device idling is the best choice to boost the throughput - * and preserve service guarantees when bfq_bfqq_may_idle itself - * returns true. -@@ -2665,18 +3522,33 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); - - if (bfq_may_expire_for_budg_timeout(bfqq) && -- !timer_pending(&bfqd->idle_slice_timer) && -+ !hrtimer_active(&bfqd->idle_slice_timer) && - !bfq_bfqq_must_idle(bfqq)) - goto expire; - -+check_queue: -+ /* -+ * This loop is rarely executed more than once. Even when it -+ * happens, it is much more convenient to re-execute this loop -+ * than to return NULL and trigger a new dispatch to get a -+ * request served. -+ */ - next_rq = bfqq->next_rq; - /* - * If bfqq has requests queued and it has enough budget left to - * serve them, keep the queue, otherwise expire it. - */ - if (next_rq) { -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ - if (bfq_serv_to_charge(next_rq, bfqq) > - bfq_bfqq_budget_left(bfqq)) { -+ /* -+ * Expire the queue for budget exhaustion, -+ * which makes sure that the next budget is -+ * enough to serve the next request, even if -+ * it comes from the fifo expired path. -+ */ - reason = BFQ_BFQQ_BUDGET_EXHAUSTED; - goto expire; - } else { -@@ -2685,7 +3557,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - * not disable disk idling even when a new request - * arrives. - */ -- if (timer_pending(&bfqd->idle_slice_timer)) { -+ if (bfq_bfqq_wait_request(bfqq)) { -+ BUG_ON(!hrtimer_active(&bfqd->idle_slice_timer)); - /* - * If we get here: 1) at least a new request - * has arrived but we have not disabled the -@@ -2700,10 +3573,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - * So we disable idling. - */ - bfq_clear_bfqq_wait_request(bfqq); -- del_timer(&bfqd->idle_slice_timer); --#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); --#endif - } - goto keep_queue; - } -@@ -2714,7 +3585,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - * for a new request, or has requests waiting for a completion and - * may idle after their completion, then keep it anyway. - */ -- if (timer_pending(&bfqd->idle_slice_timer) || -+ if (hrtimer_active(&bfqd->idle_slice_timer) || - (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) { - bfqq = NULL; - goto keep_queue; -@@ -2725,9 +3596,16 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - bfq_bfqq_expire(bfqd, bfqq, false, reason); - new_queue: - bfqq = bfq_set_in_service_queue(bfqd); -- bfq_log(bfqd, "select_queue: new queue %d returned", -- bfqq ? bfqq->pid : 0); -+ if (bfqq) { -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); -+ goto check_queue; -+ } - keep_queue: -+ if (bfqq) -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); -+ else -+ bfq_log(bfqd, "select_queue: no queue returned"); -+ - return bfqq; - } - -@@ -2736,6 +3614,9 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - struct bfq_entity *entity = &bfqq->entity; - - if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ -+ BUG_ON(bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && -+ time_is_after_jiffies(bfqq->last_wr_start_finish)); -+ - bfq_log_bfqq(bfqd, bfqq, - "raising period dur %u/%u msec, old coeff %u, w %d(%d)", - jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -@@ -2749,22 +3630,30 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); - - /* -- * If the queue was activated in a burst, or -- * too much time has elapsed from the beginning -- * of this weight-raising period, or the queue has -- * exceeded the acceptable number of cooperations, -- * then end weight raising. -+ * If the queue was activated in a burst, or too much -+ * time has elapsed from the beginning of this -+ * weight-raising period, then end weight raising. - */ -- if (bfq_bfqq_in_large_burst(bfqq) || -- bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || -- time_is_before_jiffies(bfqq->last_wr_start_finish + -- bfqq->wr_cur_max_time)) { -- bfqq->last_wr_start_finish = jiffies; -- bfq_log_bfqq(bfqd, bfqq, -- "wrais ending at %lu, rais_max_time %u", -- bfqq->last_wr_start_finish, -- jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ if (bfq_bfqq_in_large_burst(bfqq)) - bfq_bfqq_end_wr(bfqq); -+ else if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time)) { -+ if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || -+ time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + -+ bfq_wr_duration(bfqd))) -+ bfq_bfqq_end_wr(bfqq); -+ else { -+ /* switch back to interactive wr */ -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ bfqq->last_wr_start_finish = -+ bfqq->wr_start_at_switch_to_srt; -+ BUG_ON(time_is_after_jiffies( -+ bfqq->last_wr_start_finish)); -+ bfqq->entity.prio_changed = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "back to interactive wr"); -+ } - } - } - /* Update weight both if it must be raised and if it must be lowered */ -@@ -2782,46 +3671,34 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, - struct bfq_queue *bfqq) - { - int dispatched = 0; -- struct request *rq; -+ struct request *rq = bfqq->next_rq; - unsigned long service_to_charge; - - BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -- -- /* Follow expired path, else get first next available. */ -- rq = bfq_check_fifo(bfqq); -- if (!rq) -- rq = bfqq->next_rq; -+ BUG_ON(!rq); - service_to_charge = bfq_serv_to_charge(rq, bfqq); - -- if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { -- /* -- * This may happen if the next rq is chosen in fifo order -- * instead of sector order. The budget is properly -- * dimensioned to be always sufficient to serve the next -- * request only if it is chosen in sector order. The reason -- * is that it would be quite inefficient and little useful -- * to always make sure that the budget is large enough to -- * serve even the possible next rq in fifo order. -- * In fact, requests are seldom served in fifo order. -- * -- * Expire the queue for budget exhaustion, and make sure -- * that the next act_budget is enough to serve the next -- * request, even if it comes from the fifo expired path. -- */ -- bfqq->next_rq = rq; -- /* -- * Since this dispatch is failed, make sure that -- * a new one will be performed -- */ -- if (!bfqd->rq_in_driver) -- bfq_schedule_dispatch(bfqd); -- goto expire; -- } -+ BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq)); -+ -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); - -- /* Finally, insert request into driver dispatch list. */ - bfq_bfqq_served(bfqq, service_to_charge); -+ -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); -+ - bfq_dispatch_insert(bfqd->queue, rq); - -+ /* -+ * If weight raising has to terminate for bfqq, then next -+ * function causes an immediate update of bfqq's weight, -+ * without waiting for next activation. As a consequence, on -+ * expiration, bfqq will be timestamped as if has never been -+ * weight-raised during this service slot, even if it has -+ * received part or even most of the service as a -+ * weight-raised queue. This inflates bfqq's timestamps, which -+ * is beneficial, as bfqq is then more willing to leave the -+ * device immediately to possible other weight-raised queues. -+ */ - bfq_update_wr_data(bfqd, bfqq); - - bfq_log_bfqq(bfqd, bfqq, -@@ -2837,9 +3714,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, - bfqd->in_service_bic = RQ_BIC(rq); - } - -- if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && -- dispatched >= bfqd->bfq_max_budget_async_rq) || -- bfq_class_idle(bfqq))) -+ if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq)) - goto expire; - - return dispatched; -@@ -2885,8 +3760,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd) - st = bfq_entity_service_tree(&bfqq->entity); - - dispatched += __bfq_forced_dispatch_bfqq(bfqq); -- bfqq->max_budget = bfq_max_budget(bfqd); - -+ bfqq->max_budget = bfq_max_budget(bfqd); - bfq_forget_idle(st); - } - -@@ -2899,37 +3774,37 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) - { - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq; -- int max_dispatch; - - bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); -+ - if (bfqd->busy_queues == 0) - return 0; - - if (unlikely(force)) - return bfq_forced_dispatch(bfqd); - -+ /* -+ * Force device to serve one request at a time if -+ * strict_guarantees is true. Forcing this service scheme is -+ * currently the ONLY way to guarantee that the request -+ * service order enforced by the scheduler is respected by a -+ * queueing device. Otherwise the device is free even to make -+ * some unlucky request wait for as long as the device -+ * wishes. -+ * -+ * Of course, serving one request at at time may cause loss of -+ * throughput. -+ */ -+ if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0) -+ return 0; -+ - bfqq = bfq_select_queue(bfqd); - if (!bfqq) - return 0; - -- if (bfq_class_idle(bfqq)) -- max_dispatch = 1; -- -- if (!bfq_bfqq_sync(bfqq)) -- max_dispatch = bfqd->bfq_max_budget_async_rq; -- -- if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) { -- if (bfqd->busy_queues > 1) -- return 0; -- if (bfqq->dispatched >= 4 * max_dispatch) -- return 0; -- } -- -- if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) -- return 0; -+ BUG_ON(bfqq->entity.budget < bfqq->entity.service); - -- bfq_clear_bfqq_wait_request(bfqq); -- BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ BUG_ON(bfq_bfqq_wait_request(bfqq)); - - if (!bfq_dispatch_request(bfqd, bfqq)) - return 0; -@@ -2937,6 +3812,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) - bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", - bfq_bfqq_sync(bfqq) ? "sync" : "async"); - -+ BUG_ON(bfqq->next_rq == NULL && -+ bfqq->entity.budget < bfqq->entity.service); - return 1; - } - -@@ -2944,27 +3821,26 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) - * Task holds one reference to the queue, dropped when task exits. Each rq - * in-flight on this queue also holds a reference, dropped when rq is freed. - * -- * Queue lock must be held here. -+ * Queue lock must be held here. Recall not to use bfqq after calling -+ * this function on it. - */ - static void bfq_put_queue(struct bfq_queue *bfqq) - { -- struct bfq_data *bfqd = bfqq->bfqd; - #ifdef CONFIG_BFQ_GROUP_IOSCHED - struct bfq_group *bfqg = bfqq_group(bfqq); - #endif - -- BUG_ON(atomic_read(&bfqq->ref) <= 0); -+ BUG_ON(bfqq->ref <= 0); - -- bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, -- atomic_read(&bfqq->ref)); -- if (!atomic_dec_and_test(&bfqq->ref)) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref); -+ bfqq->ref--; -+ if (bfqq->ref) - return; - - BUG_ON(rb_first(&bfqq->sort_list)); - BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); - BUG_ON(bfqq->entity.tree); - BUG_ON(bfq_bfqq_busy(bfqq)); -- BUG_ON(bfqd->in_service_queue == bfqq); - - if (bfq_bfqq_sync(bfqq)) - /* -@@ -2977,7 +3853,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) - */ - hlist_del_init(&bfqq->burst_list_node); - -- bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); - - kmem_cache_free(bfq_pool, bfqq); - #ifdef CONFIG_BFQ_GROUP_IOSCHED -@@ -3011,38 +3887,16 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_schedule_dispatch(bfqd); - } - -- bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, -- atomic_read(&bfqq->ref)); -+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); - - bfq_put_cooperator(bfqq); - -- bfq_put_queue(bfqq); -+ bfq_put_queue(bfqq); /* release process reference */ - } - - static void bfq_init_icq(struct io_cq *icq) - { -- struct bfq_io_cq *bic = icq_to_bic(icq); -- -- bic->ttime.last_end_request = jiffies; -- /* -- * A newly created bic indicates that the process has just -- * started doing I/O, and is probably mapping into memory its -- * executable and libraries: it definitely needs weight raising. -- * There is however the possibility that the process performs, -- * for a while, I/O close to some other process. EQM intercepts -- * this behavior and may merge the queue corresponding to the -- * process with some other queue, BEFORE the weight of the queue -- * is raised. Merged queues are not weight-raised (they are assumed -- * to belong to processes that benefit only from high throughput). -- * If the merge is basically the consequence of an accident, then -- * the queue will be split soon and will get back its old weight. -- * It is then important to write down somewhere that this queue -- * does need weight raising, even if it did not make it to get its -- * weight raised before being merged. To this purpose, we overload -- * the field raising_time_left and assign 1 to it, to mark the queue -- * as needing weight raising. -- */ -- bic->wr_time_left = 1; -+ icq_to_bic(icq)->ttime.last_end_request = ktime_get_ns() - (1ULL<<32); - } - - static void bfq_exit_icq(struct io_cq *icq) -@@ -3050,21 +3904,21 @@ static void bfq_exit_icq(struct io_cq *icq) - struct bfq_io_cq *bic = icq_to_bic(icq); - struct bfq_data *bfqd = bic_to_bfqd(bic); - -- if (bic->bfqq[BLK_RW_ASYNC]) { -- bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); -- bic->bfqq[BLK_RW_ASYNC] = NULL; -+ if (bic_to_bfqq(bic, false)) { -+ bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, false)); -+ bic_set_bfqq(bic, NULL, false); - } - -- if (bic->bfqq[BLK_RW_SYNC]) { -+ if (bic_to_bfqq(bic, true)) { - /* - * If the bic is using a shared queue, put the reference - * taken on the io_context when the bic started using a - * shared bfq_queue. - */ -- if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) -+ if (bfq_bfqq_coop(bic_to_bfqq(bic, true))) - put_io_context(icq->ioc); -- bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); -- bic->bfqq[BLK_RW_SYNC] = NULL; -+ bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, true)); -+ bic_set_bfqq(bic, NULL, true); - } - } - -@@ -3072,8 +3926,8 @@ static void bfq_exit_icq(struct io_cq *icq) - * Update the entity prio values; note that the new values will not - * be used until the next (re)activation. - */ --static void --bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic) - { - struct task_struct *tsk = current; - int ioprio_class; -@@ -3105,7 +3959,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - break; - } - -- if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) { -+ if (bfqq->new_ioprio >= IOPRIO_BE_NR) { - pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", - bfqq->new_ioprio); - BUG(); -@@ -3113,45 +3967,41 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - - bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); - bfqq->entity.prio_changed = 1; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "set_next_ioprio_data: bic_class %d prio %d class %d", -+ ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class); - } - - static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) - { -- struct bfq_data *bfqd; -- struct bfq_queue *bfqq, *new_bfqq; -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ struct bfq_queue *bfqq; - unsigned long uninitialized_var(flags); - int ioprio = bic->icq.ioc->ioprio; - -- bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -- &flags); - /* - * This condition may trigger on a newly created bic, be sure to - * drop the lock before returning. - */ - if (unlikely(!bfqd) || likely(bic->ioprio == ioprio)) -- goto out; -+ return; - - bic->ioprio = ioprio; - -- bfqq = bic->bfqq[BLK_RW_ASYNC]; -+ bfqq = bic_to_bfqq(bic, false); - if (bfqq) { -- new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, -- GFP_ATOMIC); -- if (new_bfqq) { -- bic->bfqq[BLK_RW_ASYNC] = new_bfqq; -- bfq_log_bfqq(bfqd, bfqq, -- "check_ioprio_change: bfqq %p %d", -- bfqq, atomic_read(&bfqq->ref)); -- bfq_put_queue(bfqq); -- } -+ /* release process reference on this queue */ -+ bfq_put_queue(bfqq); -+ bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); -+ bic_set_bfqq(bic, bfqq, false); -+ bfq_log_bfqq(bfqd, bfqq, -+ "check_ioprio_change: bfqq %p %d", -+ bfqq, bfqq->ref); - } - -- bfqq = bic->bfqq[BLK_RW_SYNC]; -+ bfqq = bic_to_bfqq(bic, true); - if (bfqq) - bfq_set_next_ioprio_data(bfqq, bic); -- --out: -- bfq_put_bfqd_unlock(bfqd, &flags); - } - - static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -@@ -3160,8 +4010,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - RB_CLEAR_NODE(&bfqq->entity.rb_node); - INIT_LIST_HEAD(&bfqq->fifo); - INIT_HLIST_NODE(&bfqq->burst_list_node); -+ BUG_ON(!hlist_unhashed(&bfqq->burst_list_node)); - -- atomic_set(&bfqq->ref, 0); -+ bfqq->ref = 0; - bfqq->bfqd = bfqd; - - if (bic) -@@ -3171,6 +4022,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - if (!bfq_class_idle(bfqq)) - bfq_mark_bfqq_idle_window(bfqq); - bfq_mark_bfqq_sync(bfqq); -+ bfq_mark_bfqq_just_created(bfqq); - } else - bfq_clear_bfqq_sync(bfqq); - bfq_mark_bfqq_IO_bound(bfqq); -@@ -3180,72 +4032,19 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfqq->pid = pid; - - bfqq->wr_coeff = 1; -- bfqq->last_wr_start_finish = 0; -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now(); -+ bfqq->budget_timeout = bfq_smallest_from_now(); -+ bfqq->split_time = bfq_smallest_from_now(); -+ - /* - * Set to the value for which bfqq will not be deemed as - * soft rt when it becomes backlogged. - */ -- bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); --} -- --static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, -- struct bio *bio, int is_sync, -- struct bfq_io_cq *bic, -- gfp_t gfp_mask) --{ -- struct bfq_group *bfqg; -- struct bfq_queue *bfqq, *new_bfqq = NULL; -- struct blkcg *blkcg; -- --retry: -- rcu_read_lock(); -- -- blkcg = bio_blkcg(bio); -- bfqg = bfq_find_alloc_group(bfqd, blkcg); -- /* bic always exists here */ -- bfqq = bic_to_bfqq(bic, is_sync); -- -- /* -- * Always try a new alloc if we fall back to the OOM bfqq -- * originally, since it should just be a temporary situation. -- */ -- if (!bfqq || bfqq == &bfqd->oom_bfqq) { -- bfqq = NULL; -- if (new_bfqq) { -- bfqq = new_bfqq; -- new_bfqq = NULL; -- } else if (gfpflags_allow_blocking(gfp_mask)) { -- rcu_read_unlock(); -- spin_unlock_irq(bfqd->queue->queue_lock); -- new_bfqq = kmem_cache_alloc_node(bfq_pool, -- gfp_mask | __GFP_ZERO, -- bfqd->queue->node); -- spin_lock_irq(bfqd->queue->queue_lock); -- if (new_bfqq) -- goto retry; -- } else { -- bfqq = kmem_cache_alloc_node(bfq_pool, -- gfp_mask | __GFP_ZERO, -- bfqd->queue->node); -- } -+ bfqq->soft_rt_next_start = bfq_greatest_from_now(); - -- if (bfqq) { -- bfq_init_bfqq(bfqd, bfqq, bic, current->pid, -- is_sync); -- bfq_init_entity(&bfqq->entity, bfqg); -- bfq_log_bfqq(bfqd, bfqq, "allocated"); -- } else { -- bfqq = &bfqd->oom_bfqq; -- bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); -- } -- } -- -- if (new_bfqq) -- kmem_cache_free(bfq_pool, new_bfqq); -- -- rcu_read_unlock(); -- -- return bfqq; -+ /* first request is almost certainly seeky */ -+ bfqq->seek_history = 1; - } - - static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, -@@ -3268,90 +4067,93 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, - } - - static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -- struct bio *bio, int is_sync, -- struct bfq_io_cq *bic, gfp_t gfp_mask) -+ struct bio *bio, bool is_sync, -+ struct bfq_io_cq *bic) - { - const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); - const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); - struct bfq_queue **async_bfqq = NULL; -- struct bfq_queue *bfqq = NULL; -+ struct bfq_queue *bfqq; -+ struct bfq_group *bfqg; - -- if (!is_sync) { -- struct blkcg *blkcg; -- struct bfq_group *bfqg; -+ rcu_read_lock(); -+ -+ bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); -+ if (!bfqg) { -+ bfqq = &bfqd->oom_bfqq; -+ goto out; -+ } - -- rcu_read_lock(); -- blkcg = bio_blkcg(bio); -- rcu_read_unlock(); -- bfqg = bfq_find_alloc_group(bfqd, blkcg); -+ if (!is_sync) { - async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, - ioprio); - bfqq = *async_bfqq; -+ if (bfqq) -+ goto out; - } - -- if (!bfqq) -- bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask); -+ bfqq = kmem_cache_alloc_node(bfq_pool, -+ GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN, -+ bfqd->queue->node); -+ -+ if (bfqq) { -+ bfq_init_bfqq(bfqd, bfqq, bic, current->pid, -+ is_sync); -+ bfq_init_entity(&bfqq->entity, bfqg); -+ bfq_log_bfqq(bfqd, bfqq, "allocated"); -+ } else { -+ bfqq = &bfqd->oom_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); -+ goto out; -+ } - - /* - * Pin the queue now that it's allocated, scheduler exit will - * prune it. - */ -- if (!is_sync && !(*async_bfqq)) { -- atomic_inc(&bfqq->ref); -+ if (async_bfqq) { -+ bfqq->ref++; /* -+ * Extra group reference, w.r.t. sync -+ * queue. This extra reference is removed -+ * only if bfqq->bfqg disappears, to -+ * guarantee that this queue is not freed -+ * until its group goes away. -+ */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", -- bfqq, atomic_read(&bfqq->ref)); -+ bfqq, bfqq->ref); - *async_bfqq = bfqq; - } - -- atomic_inc(&bfqq->ref); -- bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, -- atomic_read(&bfqq->ref)); -+out: -+ bfqq->ref++; /* get a process reference to this queue */ -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); -+ rcu_read_unlock(); - return bfqq; - } - - static void bfq_update_io_thinktime(struct bfq_data *bfqd, - struct bfq_io_cq *bic) - { -- unsigned long elapsed = jiffies - bic->ttime.last_end_request; -- unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); -+ struct bfq_ttime *ttime = &bic->ttime; -+ u64 elapsed = ktime_get_ns() - bic->ttime.last_end_request; - -- bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; -- bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; -- bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / -- bic->ttime.ttime_samples; -+ elapsed = min_t(u64, elapsed, 2 * bfqd->bfq_slice_idle); -+ -+ ttime->ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; -+ ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); -+ ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, -+ ttime->ttime_samples); - } - --static void bfq_update_io_seektime(struct bfq_data *bfqd, -- struct bfq_queue *bfqq, -- struct request *rq) -+static void -+bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct request *rq) - { -- sector_t sdist; -- u64 total; -- -- if (bfqq->last_request_pos < blk_rq_pos(rq)) -- sdist = blk_rq_pos(rq) - bfqq->last_request_pos; -- else -- sdist = bfqq->last_request_pos - blk_rq_pos(rq); -- -- /* -- * Don't allow the seek distance to get too large from the -- * odd fragment, pagein, etc. -- */ -- if (bfqq->seek_samples == 0) /* first request, not really a seek */ -- sdist = 0; -- else if (bfqq->seek_samples <= 60) /* second & third seek */ -- sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); -- else -- sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); -- -- bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; -- bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; -- total = bfqq->seek_total + (bfqq->seek_samples/2); -- do_div(total, bfqq->seek_samples); -- bfqq->seek_mean = (sector_t)total; -- -- bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, -- (u64)bfqq->seek_mean); -+ bfqq->seek_history <<= 1; -+ bfqq->seek_history |= -+ get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR && -+ (!blk_queue_nonrot(bfqd->queue) || -+ blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT); - } - - /* -@@ -3369,7 +4171,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, - return; - - /* Idle window just restored, statistics are meaningless. */ -- if (bfq_bfqq_just_split(bfqq)) -+ if (time_is_after_eq_jiffies(bfqq->split_time + -+ bfqd->bfq_wr_min_idle_time)) - return; - - enable_idle = bfq_bfqq_idle_window(bfqq); -@@ -3409,22 +4212,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - - bfq_update_io_thinktime(bfqd, bic); - bfq_update_io_seektime(bfqd, bfqq, rq); -- if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { -- bfq_clear_bfqq_constantly_seeky(bfqq); -- if (!blk_queue_nonrot(bfqd->queue)) { -- BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); -- bfqd->const_seeky_busy_in_flight_queues--; -- } -- } - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, bic); -- bfq_clear_bfqq_just_split(bfqq); - - bfq_log_bfqq(bfqd, bfqq, -- "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -- bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -- (unsigned long long) bfqq->seek_mean); -+ "rq_enqueued: idle_window=%d (seeky %d)", -+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq)); - - bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); - -@@ -3438,14 +4232,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - * is small and the queue is not to be expired, then - * just exit. - * -- * In this way, if the disk is being idled to wait for -- * a new request from the in-service queue, we avoid -- * unplugging the device and committing the disk to serve -- * just a small request. On the contrary, we wait for -- * the block layer to decide when to unplug the device: -- * hopefully, new requests will be merged to this one -- * quickly, then the device will be unplugged and -- * larger requests will be dispatched. -+ * In this way, if the device is being idled to wait -+ * for a new request from the in-service queue, we -+ * avoid unplugging the device and committing the -+ * device to serve just a small request. On the -+ * contrary, we wait for the block layer to decide -+ * when to unplug the device: hopefully, new requests -+ * will be merged to this one quickly, then the device -+ * will be unplugged and larger requests will be -+ * dispatched. - */ - if (small_req && !budget_timeout) - return; -@@ -3457,10 +4252,8 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - * timer. - */ - bfq_clear_bfqq_wait_request(bfqq); -- del_timer(&bfqd->idle_slice_timer); --#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); --#endif - - /* - * The queue is not empty, because a new request just -@@ -3504,28 +4297,24 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) - */ - new_bfqq->allocated[rq_data_dir(rq)]++; - bfqq->allocated[rq_data_dir(rq)]--; -- atomic_inc(&new_bfqq->ref); -- bfq_put_queue(bfqq); -+ new_bfqq->ref++; -+ bfq_clear_bfqq_just_created(bfqq); - if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) - bfq_merge_bfqqs(bfqd, RQ_BIC(rq), - bfqq, new_bfqq); -+ /* -+ * rq is about to be enqueued into new_bfqq, -+ * release rq reference on bfqq -+ */ -+ bfq_put_queue(bfqq); - rq->elv.priv[1] = new_bfqq; - bfqq = new_bfqq; -- } else -- bfq_bfqq_increase_failed_cooperations(bfqq); -+ } - } - - bfq_add_request(rq); - -- /* -- * Here a newly-created bfq_queue has already started a weight-raising -- * period: clear raising_time_left to prevent bfq_bfqq_save_state() -- * from assigning it a full weight-raising period. See the detailed -- * comments about this field in bfq_init_icq(). -- */ -- if (bfqq->bic) -- bfqq->bic->wr_time_left = 0; -- rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; -+ rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; - list_add_tail(&rq->queuelist, &bfqq->fifo); - - bfq_rq_enqueued(bfqd, bfqq, rq); -@@ -3533,8 +4322,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) - - static void bfq_update_hw_tag(struct bfq_data *bfqd) - { -- bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, -- bfqd->rq_in_driver); -+ bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver, -+ bfqd->rq_in_driver); - - if (bfqd->hw_tag == 1) - return; -@@ -3560,48 +4349,85 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) - { - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; -- bool sync = bfq_bfqq_sync(bfqq); -+ u64 now_ns; -+ u32 delta_us; - -- bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", -- blk_rq_sectors(rq), sync); -+ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left", -+ blk_rq_sectors(rq)); - -+ assert_spin_locked(bfqd->queue->queue_lock); - bfq_update_hw_tag(bfqd); - - BUG_ON(!bfqd->rq_in_driver); - BUG_ON(!bfqq->dispatched); - bfqd->rq_in_driver--; - bfqq->dispatched--; --#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_completion(bfqq_group(bfqq), - rq_start_time_ns(rq), -- rq_io_start_time_ns(rq), rq->cmd_flags); --#endif -+ rq_io_start_time_ns(rq), -+ rq->cmd_flags); - - if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ /* -+ * Set budget_timeout (which we overload to store the -+ * time at which the queue remains with no backlog and -+ * no outstanding request; used by the weight-raising -+ * mechanism). -+ */ -+ bfqq->budget_timeout = jiffies; -+ - bfq_weights_tree_remove(bfqd, &bfqq->entity, - &bfqd->queue_weights_tree); -- if (!blk_queue_nonrot(bfqd->queue)) { -- BUG_ON(!bfqd->busy_in_flight_queues); -- bfqd->busy_in_flight_queues--; -- if (bfq_bfqq_constantly_seeky(bfqq)) { -- BUG_ON(!bfqd-> -- const_seeky_busy_in_flight_queues); -- bfqd->const_seeky_busy_in_flight_queues--; -- } -- } - } - -- if (sync) { -- bfqd->sync_flight--; -- RQ_BIC(rq)->ttime.last_end_request = jiffies; -- } -+ now_ns = ktime_get_ns(); -+ -+ RQ_BIC(rq)->ttime.last_end_request = now_ns; -+ -+ /* -+ * Using us instead of ns, to get a reasonable precision in -+ * computing rate in next check. -+ */ -+ delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); -+ -+ bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", -+ delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, -+ (USEC_PER_SEC* -+ (u64)((bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us)) -+ >>BFQ_RATE_SHIFT, -+ (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); -+ -+ /* -+ * If the request took rather long to complete, and, according -+ * to the maximum request size recorded, this completion latency -+ * implies that the request was certainly served at a very low -+ * rate (less than 1M sectors/sec), then the whole observation -+ * interval that lasts up to this time instant cannot be a -+ * valid time interval for computing a new peak rate. Invoke -+ * bfq_update_rate_reset to have the following three steps -+ * taken: -+ * - close the observation interval at the last (previous) -+ * request dispatch or completion -+ * - compute rate, if possible, for that observation interval -+ * - reset to zero samples, which will trigger a proper -+ * re-initialization of the observation interval on next -+ * dispatch -+ */ -+ if (delta_us > BFQ_MIN_TT/NSEC_PER_USEC && -+ (bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us < -+ 1UL<<(BFQ_RATE_SHIFT - 10)) -+ bfq_update_rate_reset(bfqd, NULL); -+ bfqd->last_completion = now_ns; - - /* -- * If we are waiting to discover whether the request pattern of the -- * task associated with the queue is actually isochronous, and -- * both requisites for this condition to hold are satisfied, then -- * compute soft_rt_next_start (see the comments to the function -- * bfq_bfqq_softrt_next_start()). -+ * If we are waiting to discover whether the request pattern -+ * of the task associated with the queue is actually -+ * isochronous, and both requisites for this condition to hold -+ * are now satisfied, then compute soft_rt_next_start (see the -+ * comments on the function bfq_bfqq_softrt_next_start()). We -+ * schedule this delayed check when bfqq expires, if it still -+ * has in-flight requests. - */ - if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && - RB_EMPTY_ROOT(&bfqq->sort_list)) -@@ -3613,10 +4439,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) - * or if we want to idle in case it has no pending requests. - */ - if (bfqd->in_service_queue == bfqq) { -- if (bfq_bfqq_budget_new(bfqq)) -- bfq_set_budget_timeout(bfqd); -- -- if (bfq_bfqq_must_idle(bfqq)) { -+ if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) { - bfq_arm_slice_timer(bfqd); - goto out; - } else if (bfq_may_expire_for_budg_timeout(bfqq)) -@@ -3646,7 +4469,7 @@ static int __bfq_may_queue(struct bfq_queue *bfqq) - return ELV_MQUEUE_MAY; - } - --static int bfq_may_queue(struct request_queue *q, int rw) -+static int bfq_may_queue(struct request_queue *q, unsigned int op) - { - struct bfq_data *bfqd = q->elevator->elevator_data; - struct task_struct *tsk = current; -@@ -3663,7 +4486,7 @@ static int bfq_may_queue(struct request_queue *q, int rw) - if (!bic) - return ELV_MQUEUE_MAY; - -- bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); -+ bfqq = bic_to_bfqq(bic, op_is_sync(op)); - if (bfqq) - return __bfq_may_queue(bfqq); - -@@ -3687,14 +4510,14 @@ static void bfq_put_request(struct request *rq) - rq->elv.priv[1] = NULL; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", -- bfqq, atomic_read(&bfqq->ref)); -+ bfqq, bfqq->ref); - bfq_put_queue(bfqq); - } - } - - /* - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this -- * was the last process referring to said bfqq. -+ * was the last process referring to that bfqq. - */ - static struct bfq_queue * - bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) -@@ -3732,37 +4555,60 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - unsigned long flags; - bool split = false; - -- might_sleep_if(gfpflags_allow_blocking(gfp_mask)); -- -- bfq_check_ioprio_change(bic, bio); -- - spin_lock_irqsave(q->queue_lock, flags); -+ bfq_check_ioprio_change(bic, bio); - - if (!bic) - goto queue_fail; - -+ bfq_check_ioprio_change(bic, bio); -+ - bfq_bic_update_cgroup(bic, bio); - - new_queue: - bfqq = bic_to_bfqq(bic, is_sync); - if (!bfqq || bfqq == &bfqd->oom_bfqq) { -- bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask); -+ if (bfqq) -+ bfq_put_queue(bfqq); -+ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); -+ BUG_ON(!hlist_unhashed(&bfqq->burst_list_node)); -+ - bic_set_bfqq(bic, bfqq, is_sync); - if (split && is_sync) { -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_request: was_in_list %d " -+ "was_in_large_burst %d " -+ "large burst in progress %d", -+ bic->was_in_burst_list, -+ bic->saved_in_large_burst, -+ bfqd->large_burst); -+ - if ((bic->was_in_burst_list && bfqd->large_burst) || -- bic->saved_in_large_burst) -+ bic->saved_in_large_burst) { -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_request: marking in " -+ "large burst"); - bfq_mark_bfqq_in_large_burst(bfqq); -- else { -+ } else { -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_request: clearing in " -+ "large burst"); - bfq_clear_bfqq_in_large_burst(bfqq); - if (bic->was_in_burst_list) - hlist_add_head(&bfqq->burst_list_node, - &bfqd->burst_list); - } -+ bfqq->split_time = jiffies; - } - } else { - /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); -+ -+ /* Update bic before losing reference to bfqq */ -+ if (bfq_bfqq_in_large_burst(bfqq)) -+ bic->saved_in_large_burst = true; -+ - bfqq = bfq_split_bfqq(bic, bfqq); - split = true; - if (!bfqq) -@@ -3771,9 +4617,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - } - - bfqq->allocated[rw]++; -- atomic_inc(&bfqq->ref); -- bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, -- atomic_read(&bfqq->ref)); -+ bfqq->ref++; -+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, bfqq->ref); - - rq->elv.priv[0] = bic; - rq->elv.priv[1] = bfqq; -@@ -3788,7 +4633,6 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { - bfqq->bic = bic; - if (split) { -- bfq_mark_bfqq_just_split(bfqq); - /* - * If the queue has just been split from a shared - * queue, restore the idle window and the possible -@@ -3798,6 +4642,9 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - } - } - -+ if (unlikely(bfq_bfqq_just_created(bfqq))) -+ bfq_handle_burst(bfqd, bfqq); -+ - spin_unlock_irqrestore(q->queue_lock, flags); - - return 0; -@@ -3824,9 +4671,10 @@ static void bfq_kick_queue(struct work_struct *work) - * Handler of the expiration of the timer running if the in-service queue - * is idling inside its time slice. - */ --static void bfq_idle_slice_timer(unsigned long data) -+static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) - { -- struct bfq_data *bfqd = (struct bfq_data *)data; -+ struct bfq_data *bfqd = container_of(timer, struct bfq_data, -+ idle_slice_timer); - struct bfq_queue *bfqq; - unsigned long flags; - enum bfqq_expiration reason; -@@ -3844,6 +4692,8 @@ static void bfq_idle_slice_timer(unsigned long data) - */ - if (bfqq) { - bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); -+ bfq_clear_bfqq_wait_request(bfqq); -+ - if (bfq_bfqq_budget_timeout(bfqq)) - /* - * Also here the queue can be safely expired -@@ -3869,25 +4719,26 @@ static void bfq_idle_slice_timer(unsigned long data) - bfq_schedule_dispatch(bfqd); - - spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -+ return HRTIMER_NORESTART; - } - - static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) - { -- del_timer_sync(&bfqd->idle_slice_timer); -+ hrtimer_cancel(&bfqd->idle_slice_timer); - cancel_work_sync(&bfqd->unplug_work); - } - - static void __bfq_put_async_bfqq(struct bfq_data *bfqd, -- struct bfq_queue **bfqq_ptr) -+ struct bfq_queue **bfqq_ptr) - { - struct bfq_group *root_group = bfqd->root_group; - struct bfq_queue *bfqq = *bfqq_ptr; - - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); - if (bfqq) { -- bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); -+ bfq_bfqq_move(bfqd, bfqq, root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", -- bfqq, atomic_read(&bfqq->ref)); -+ bfqq, bfqq->ref); - bfq_put_queue(bfqq); - *bfqq_ptr = NULL; - } -@@ -3922,19 +4773,18 @@ static void bfq_exit_queue(struct elevator_queue *e) - - BUG_ON(bfqd->in_service_queue); - list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) -- bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ bfq_deactivate_bfqq(bfqd, bfqq, false, false); - - spin_unlock_irq(q->queue_lock); - - bfq_shutdown_timer_wq(bfqd); - -- synchronize_rcu(); -- -- BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); - - #ifdef CONFIG_BFQ_GROUP_IOSCHED - blkcg_deactivate_policy(q, &blkcg_policy_bfq); - #else -+ bfq_put_async_queues(bfqd, bfqd->root_group); - kfree(bfqd->root_group); - #endif - -@@ -3954,6 +4804,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, - root_group->rq_pos_tree = RB_ROOT; - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ root_group->sched_data.bfq_class_idle_last_service = jiffies; - } - - static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) -@@ -3978,11 +4829,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - * will not attempt to free it. - */ - bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); -- atomic_inc(&bfqd->oom_bfqq.ref); -+ bfqd->oom_bfqq.ref++; - bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; - bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; - bfqd->oom_bfqq.entity.new_weight = - bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); -+ -+ /* oom_bfqq does not participate to bursts */ -+ bfq_clear_bfqq_just_created(&bfqd->oom_bfqq); - /* - * Trigger weight initialization, according to ioprio, at the - * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio -@@ -4001,13 +4855,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - goto out_free; - bfq_init_root_group(bfqd->root_group, bfqd); - bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- bfqd->active_numerous_groups = 0; --#endif - -- init_timer(&bfqd->idle_slice_timer); -+ hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); - bfqd->idle_slice_timer.function = bfq_idle_slice_timer; -- bfqd->idle_slice_timer.data = (unsigned long)bfqd; - - bfqd->queue_weights_tree = RB_ROOT; - bfqd->group_weights_tree = RB_ROOT; -@@ -4027,21 +4878,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - bfqd->bfq_back_max = bfq_back_max; - bfqd->bfq_back_penalty = bfq_back_penalty; - bfqd->bfq_slice_idle = bfq_slice_idle; -- bfqd->bfq_class_idle_last_service = 0; -- bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; -- bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; -- bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; -+ bfqd->bfq_timeout = bfq_timeout; - -- bfqd->bfq_coop_thresh = 2; -- bfqd->bfq_failed_cooperations = 7000; - bfqd->bfq_requests_within_timer = 120; - -- bfqd->bfq_large_burst_thresh = 11; -- bfqd->bfq_burst_interval = msecs_to_jiffies(500); -+ bfqd->bfq_large_burst_thresh = 8; -+ bfqd->bfq_burst_interval = msecs_to_jiffies(180); - - bfqd->low_latency = true; - -- bfqd->bfq_wr_coeff = 20; -+ /* -+ * Trade-off between responsiveness and fairness. -+ */ -+ bfqd->bfq_wr_coeff = 30; - bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); - bfqd->bfq_wr_max_time = 0; - bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); -@@ -4053,16 +4902,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) - * video. - */ - bfqd->wr_busy_queues = 0; -- bfqd->busy_in_flight_queues = 0; -- bfqd->const_seeky_busy_in_flight_queues = 0; - - /* -- * Begin by assuming, optimistically, that the device peak rate is -- * equal to the highest reference rate. -+ * Begin by assuming, optimistically, that the device is a -+ * high-speed one, and that its peak rate is equal to 2/3 of -+ * the highest reference rate. - */ - bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * - T_fast[blk_queue_nonrot(bfqd->queue)]; -- bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3; - bfqd->device_speed = BFQ_BFQD_FAST; - - return 0; -@@ -4088,7 +4936,7 @@ static int __init bfq_slab_setup(void) - - static ssize_t bfq_var_show(unsigned int var, char *page) - { -- return sprintf(page, "%d\n", var); -+ return sprintf(page, "%u\n", var); - } - - static ssize_t bfq_var_store(unsigned long *var, const char *page, -@@ -4159,21 +5007,21 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) - static ssize_t __FUNC(struct elevator_queue *e, char *page) \ - { \ - struct bfq_data *bfqd = e->elevator_data; \ -- unsigned int __data = __VAR; \ -- if (__CONV) \ -+ u64 __data = __VAR; \ -+ if (__CONV == 1) \ - __data = jiffies_to_msecs(__data); \ -+ else if (__CONV == 2) \ -+ __data = div_u64(__data, NSEC_PER_MSEC); \ - return bfq_var_show(__data, (page)); \ - } --SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); --SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2); -+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2); - SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); - SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); --SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2); - SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); --SHOW_FUNCTION(bfq_max_budget_async_rq_show, -- bfqd->bfq_max_budget_async_rq, 0); --SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); --SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1); -+SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0); - SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); - SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); - SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); -@@ -4183,6 +5031,17 @@ SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, - SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); - #undef SHOW_FUNCTION - -+#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ -+static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ u64 __data = __VAR; \ -+ __data = div_u64(__data, NSEC_PER_USEC); \ -+ return bfq_var_show(__data, (page)); \ -+} -+USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle); -+#undef USEC_SHOW_FUNCTION -+ - #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ - static ssize_t \ - __FUNC(struct elevator_queue *e, const char *page, size_t count) \ -@@ -4194,24 +5053,22 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ -- if (__CONV) \ -+ if (__CONV == 1) \ - *(__PTR) = msecs_to_jiffies(__data); \ -+ else if (__CONV == 2) \ -+ *(__PTR) = (u64)__data * NSEC_PER_MSEC; \ - else \ - *(__PTR) = __data; \ - return ret; \ - } - STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, -- INT_MAX, 1); -+ INT_MAX, 2); - STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, -- INT_MAX, 1); -+ INT_MAX, 2); - STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); - STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, - INT_MAX, 0); --STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); --STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, -- 1, INT_MAX, 0); --STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, -- INT_MAX, 1); -+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2); - STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); - STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); - STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, -@@ -4224,6 +5081,23 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, - INT_MAX, 0); - #undef STORE_FUNCTION - -+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ -+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned long uninitialized_var(__data); \ -+ int ret = bfq_var_store(&__data, (page), count); \ -+ if (__data < (MIN)) \ -+ __data = (MIN); \ -+ else if (__data > (MAX)) \ -+ __data = (MAX); \ -+ *(__PTR) = (u64)__data * NSEC_PER_USEC; \ -+ return ret; \ -+} -+USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0, -+ UINT_MAX); -+#undef USEC_STORE_FUNCTION -+ - /* do nothing for the moment */ - static ssize_t bfq_weights_store(struct elevator_queue *e, - const char *page, size_t count) -@@ -4231,16 +5105,6 @@ static ssize_t bfq_weights_store(struct elevator_queue *e, - return count; - } - --static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) --{ -- u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -- -- if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) -- return bfq_calc_max_budget(bfqd->peak_rate, timeout); -- else -- return bfq_default_max_budget; --} -- - static ssize_t bfq_max_budget_store(struct elevator_queue *e, - const char *page, size_t count) - { -@@ -4249,7 +5113,7 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, - int ret = bfq_var_store(&__data, (page), count); - - if (__data == 0) -- bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); - else { - if (__data > INT_MAX) - __data = INT_MAX; -@@ -4261,6 +5125,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, - return ret; - } - -+/* -+ * Leaving this name to preserve name compatibility with cfq -+ * parameters, but this timeout is used for both sync and async. -+ */ - static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, - const char *page, size_t count) - { -@@ -4273,9 +5141,27 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, - else if (__data > INT_MAX) - __data = INT_MAX; - -- bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); -+ bfqd->bfq_timeout = msecs_to_jiffies(__data); - if (bfqd->bfq_user_max_budget == 0) -- bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); -+ -+ return ret; -+} -+ -+static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data > 1) -+ __data = 1; -+ if (!bfqd->strict_guarantees && __data == 1 -+ && bfqd->bfq_slice_idle < 8 * NSEC_PER_MSEC) -+ bfqd->bfq_slice_idle = 8 * NSEC_PER_MSEC; -+ -+ bfqd->strict_guarantees = __data; - - return ret; - } -@@ -4305,10 +5191,10 @@ static struct elv_fs_entry bfq_attrs[] = { - BFQ_ATTR(back_seek_max), - BFQ_ATTR(back_seek_penalty), - BFQ_ATTR(slice_idle), -+ BFQ_ATTR(slice_idle_us), - BFQ_ATTR(max_budget), -- BFQ_ATTR(max_budget_async_rq), - BFQ_ATTR(timeout_sync), -- BFQ_ATTR(timeout_async), -+ BFQ_ATTR(strict_guarantees), - BFQ_ATTR(low_latency), - BFQ_ATTR(wr_coeff), - BFQ_ATTR(wr_max_time), -@@ -4328,7 +5214,8 @@ static struct elevator_type iosched_bfq = { - #ifdef CONFIG_BFQ_GROUP_IOSCHED - .elevator_bio_merged_fn = bfq_bio_merged, - #endif -- .elevator_allow_merge_fn = bfq_allow_merge, -+ .elevator_allow_bio_merge_fn = bfq_allow_bio_merge, -+ .elevator_allow_rq_merge_fn = bfq_allow_rq_merge, - .elevator_dispatch_fn = bfq_dispatch_requests, - .elevator_add_req_fn = bfq_insert_request, - .elevator_activate_req_fn = bfq_activate_request, -@@ -4351,18 +5238,28 @@ static struct elevator_type iosched_bfq = { - .elevator_owner = THIS_MODULE, - }; - -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static struct blkcg_policy blkcg_policy_bfq = { -+ .dfl_cftypes = bfq_blkg_files, -+ .legacy_cftypes = bfq_blkcg_legacy_files, -+ -+ .cpd_alloc_fn = bfq_cpd_alloc, -+ .cpd_init_fn = bfq_cpd_init, -+ .cpd_bind_fn = bfq_cpd_init, -+ .cpd_free_fn = bfq_cpd_free, -+ -+ .pd_alloc_fn = bfq_pd_alloc, -+ .pd_init_fn = bfq_pd_init, -+ .pd_offline_fn = bfq_pd_offline, -+ .pd_free_fn = bfq_pd_free, -+ .pd_reset_stats_fn = bfq_pd_reset_stats, -+}; -+#endif -+ - static int __init bfq_init(void) - { - int ret; -- -- /* -- * Can be 0 on HZ < 1000 setups. -- */ -- if (bfq_slice_idle == 0) -- bfq_slice_idle = 1; -- -- if (bfq_timeout_async == 0) -- bfq_timeout_async = 1; -+ char msg[60] = "BFQ I/O-scheduler: v8r11"; - - #ifdef CONFIG_BFQ_GROUP_IOSCHED - ret = blkcg_policy_register(&blkcg_policy_bfq); -@@ -4375,27 +5272,46 @@ static int __init bfq_init(void) - goto err_pol_unreg; - - /* -- * Times to load large popular applications for the typical systems -- * installed on the reference devices (see the comments before the -- * definitions of the two arrays). -+ * Times to load large popular applications for the typical -+ * systems installed on the reference devices (see the -+ * comments before the definitions of the next two -+ * arrays). Actually, we use slightly slower values, as the -+ * estimated peak rate tends to be smaller than the actual -+ * peak rate. The reason for this last fact is that estimates -+ * are computed over much shorter time intervals than the long -+ * intervals typically used for benchmarking. Why? First, to -+ * adapt more quickly to variations. Second, because an I/O -+ * scheduler cannot rely on a peak-rate-evaluation workload to -+ * be run for a long time. - */ -- T_slow[0] = msecs_to_jiffies(2600); -- T_slow[1] = msecs_to_jiffies(1000); -- T_fast[0] = msecs_to_jiffies(5500); -- T_fast[1] = msecs_to_jiffies(2000); -+ T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */ -+ T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */ -+ T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */ -+ T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */ - - /* -- * Thresholds that determine the switch between speed classes (see -- * the comments before the definition of the array). -+ * Thresholds that determine the switch between speed classes -+ * (see the comments before the definition of the array -+ * device_speed_thresh). These thresholds are biased towards -+ * transitions to the fast class. This is safer than the -+ * opposite bias. In fact, a wrong transition to the slow -+ * class results in short weight-raising periods, because the -+ * speed of the device then tends to be higher that the -+ * reference peak rate. On the opposite end, a wrong -+ * transition to the fast class tends to increase -+ * weight-raising periods, because of the opposite reason. - */ -- device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; -- device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; -+ device_speed_thresh[0] = (4 * R_slow[0]) / 3; -+ device_speed_thresh[1] = (4 * R_slow[1]) / 3; - - ret = elv_register(&iosched_bfq); - if (ret) - goto err_pol_unreg; - -- pr_info("BFQ I/O-scheduler: v7r11"); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ strcat(msg, " (with cgroups support)"); -+#endif -+ pr_info("%s", msg); - - return 0; - -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index a5ed694..8311bdb 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -7,28 +7,172 @@ - * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> - * Paolo Valente <paolo.valente@unimore.it> - * -- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org> - */ - --#ifdef CONFIG_BFQ_GROUP_IOSCHED --#define for_each_entity(entity) \ -- for (; entity ; entity = entity->parent) -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); - --#define for_each_entity_safe(entity, parent) \ -- for (; entity && ({ parent = entity->parent; 1; }); entity = parent) -+/** -+ * bfq_gt - compare two timestamps. -+ * @a: first ts. -+ * @b: second ts. -+ * -+ * Return @a > @b, dealing with wrapping correctly. -+ */ -+static int bfq_gt(u64 a, u64 b) -+{ -+ return (s64)(a - b) > 0; -+} - -+static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree) -+{ -+ struct rb_node *node = tree->rb_node; - --static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -- int extract, -- struct bfq_data *bfqd); -+ return rb_entry(node, struct bfq_entity, rb_node); -+} - --static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd); -+ -+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); -+ -+/** -+ * bfq_update_next_in_service - update sd->next_in_service -+ * @sd: sched_data for which to perform the update. -+ * @new_entity: if not NULL, pointer to the entity whose activation, -+ * requeueing or repositionig triggered the invocation of -+ * this function. -+ * -+ * This function is called to update sd->next_in_service, which, in -+ * its turn, may change as a consequence of the insertion or -+ * extraction of an entity into/from one of the active trees of -+ * sd. These insertions/extractions occur as a consequence of -+ * activations/deactivations of entities, with some activations being -+ * 'true' activations, and other activations being requeueings (i.e., -+ * implementing the second, requeueing phase of the mechanism used to -+ * reposition an entity in its active tree; see comments on -+ * __bfq_activate_entity and __bfq_requeue_entity for details). In -+ * both the last two activation sub-cases, new_entity points to the -+ * just activated or requeued entity. -+ * -+ * Returns true if sd->next_in_service changes in such a way that -+ * entity->parent may become the next_in_service for its parent -+ * entity. -+ */ -+static bool bfq_update_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *new_entity) -+{ -+ struct bfq_entity *next_in_service = sd->next_in_service; -+ struct bfq_queue *bfqq; -+ bool parent_sched_may_change = false; -+ -+ /* -+ * If this update is triggered by the activation, requeueing -+ * or repositiong of an entity that does not coincide with -+ * sd->next_in_service, then a full lookup in the active tree -+ * can be avoided. In fact, it is enough to check whether the -+ * just-modified entity has a higher priority than -+ * sd->next_in_service, or, even if it has the same priority -+ * as sd->next_in_service, is eligible and has a lower virtual -+ * finish time than sd->next_in_service. If this compound -+ * condition holds, then the new entity becomes the new -+ * next_in_service. Otherwise no change is needed. -+ */ -+ if (new_entity && new_entity != sd->next_in_service) { -+ /* -+ * Flag used to decide whether to replace -+ * sd->next_in_service with new_entity. Tentatively -+ * set to true, and left as true if -+ * sd->next_in_service is NULL. -+ */ -+ bool replace_next = true; -+ -+ /* -+ * If there is already a next_in_service candidate -+ * entity, then compare class priorities or timestamps -+ * to decide whether to replace sd->service_tree with -+ * new_entity. -+ */ -+ if (next_in_service) { -+ unsigned int new_entity_class_idx = -+ bfq_class_idx(new_entity); -+ struct bfq_service_tree *st = -+ sd->service_tree + new_entity_class_idx; -+ -+ /* -+ * For efficiency, evaluate the most likely -+ * sub-condition first. -+ */ -+ replace_next = -+ (new_entity_class_idx == -+ bfq_class_idx(next_in_service) -+ && -+ !bfq_gt(new_entity->start, st->vtime) -+ && -+ bfq_gt(next_in_service->finish, -+ new_entity->finish)) -+ || -+ new_entity_class_idx < -+ bfq_class_idx(next_in_service); -+ } -+ -+ if (replace_next) -+ next_in_service = new_entity; -+ } else /* invoked because of a deactivation: lookup needed */ -+ next_in_service = bfq_lookup_next_entity(sd); -+ -+ if (next_in_service) { -+ parent_sched_may_change = !sd->next_in_service || -+ bfq_update_parent_budget(next_in_service); -+ } -+ -+ sd->next_in_service = next_in_service; -+ -+ if (!next_in_service) -+ return parent_sched_may_change; - --static void bfq_update_budget(struct bfq_entity *next_in_service) -+ bfqq = bfq_entity_to_bfqq(next_in_service); -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "update_next_in_service: chosen this queue"); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(next_in_service, -+ struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "update_next_in_service: chosen this entity"); -+ } -+#endif -+ return parent_sched_may_change; -+} -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+/* both next loops stop at one of the child entities of the root group */ -+#define for_each_entity(entity) \ -+ for (; entity ; entity = entity->parent) -+ -+/* -+ * For each iteration, compute parent in advance, so as to be safe if -+ * entity is deallocated during the iteration. Such a deallocation may -+ * happen as a consequence of a bfq_put_queue that frees the bfq_queue -+ * containing entity. -+ */ -+#define for_each_entity_safe(entity, parent) \ -+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) -+ -+/* -+ * Returns true if this budget changes may let next_in_service->parent -+ * become the next_in_service entity for its parent entity. -+ */ -+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) - { - struct bfq_entity *bfqg_entity; - struct bfq_group *bfqg; - struct bfq_sched_data *group_sd; -+ bool ret = false; - - BUG_ON(!next_in_service); - -@@ -41,60 +185,68 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) - * as it must never become an in-service entity. - */ - bfqg_entity = bfqg->my_entity; -- if (bfqg_entity) -+ if (bfqg_entity) { -+ if (bfqg_entity->budget > next_in_service->budget) -+ ret = true; - bfqg_entity->budget = next_in_service->budget; -+ } -+ -+ return ret; - } - --static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+/* -+ * This function tells whether entity stops being a candidate for next -+ * service, according to the following logic. -+ * -+ * This function is invoked for an entity that is about to be set in -+ * service. If such an entity is a queue, then the entity is no longer -+ * a candidate for next service (i.e, a candidate entity to serve -+ * after the in-service entity is expired). The function then returns -+ * true. -+ * -+ * In contrast, the entity could stil be a candidate for next service -+ * if it is not a queue, and has more than one child. In fact, even if -+ * one of its children is about to be set in service, other children -+ * may still be the next to serve. As a consequence, a non-queue -+ * entity is not a candidate for next-service only if it has only one -+ * child. And only if this condition holds, then the function returns -+ * true for a non-queue entity. -+ */ -+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) - { -- struct bfq_entity *next_in_service; -+ struct bfq_group *bfqg; - -- if (sd->in_service_entity) -- /* will update/requeue at the end of service */ -- return 0; -+ if (bfq_entity_to_bfqq(entity)) -+ return true; - -- /* -- * NOTE: this can be improved in many ways, such as returning -- * 1 (and thus propagating upwards the update) only when the -- * budget changes, or caching the bfqq that will be scheduled -- * next from this subtree. By now we worry more about -- * correctness than about performance... -- */ -- next_in_service = bfq_lookup_next_entity(sd, 0, NULL); -- sd->next_in_service = next_in_service; -+ bfqg = container_of(entity, struct bfq_group, entity); - -- if (next_in_service) -- bfq_update_budget(next_in_service); -+ BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group); -+ BUG_ON(bfqg->active_entities == 0); -+ if (bfqg->active_entities == 1) -+ return true; - -- return 1; -+ return false; - } - --static void bfq_check_next_in_service(struct bfq_sched_data *sd, -- struct bfq_entity *entity) --{ -- BUG_ON(sd->next_in_service != entity); --} --#else -+#else /* CONFIG_BFQ_GROUP_IOSCHED */ - #define for_each_entity(entity) \ - for (; entity ; entity = NULL) - - #define for_each_entity_safe(entity, parent) \ - for (parent = NULL; entity ; entity = parent) - --static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) - { -- return 0; -+ return false; - } - --static void bfq_check_next_in_service(struct bfq_sched_data *sd, -- struct bfq_entity *entity) -+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) - { -+ return true; - } - --static void bfq_update_budget(struct bfq_entity *next_in_service) --{ --} --#endif -+#endif /* CONFIG_BFQ_GROUP_IOSCHED */ - - /* - * Shift for timestamp calculations. This actually limits the maximum -@@ -105,18 +257,6 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) - */ - #define WFQ_SERVICE_SHIFT 22 - --/** -- * bfq_gt - compare two timestamps. -- * @a: first ts. -- * @b: second ts. -- * -- * Return @a > @b, dealing with wrapping correctly. -- */ --static int bfq_gt(u64 a, u64 b) --{ -- return (s64)(a - b) > 0; --} -- - static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) - { - struct bfq_queue *bfqq = NULL; -@@ -151,20 +291,36 @@ static u64 bfq_delta(unsigned long service, unsigned long weight) - static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) - { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ unsigned long long start, finish, delta; - - BUG_ON(entity->weight == 0); - - entity->finish = entity->start + - bfq_delta(service, entity->weight); - -+ start = ((entity->start>>10)*1000)>>12; -+ finish = ((entity->finish>>10)*1000)>>12; -+ delta = ((bfq_delta(service, entity->weight)>>10)*1000)>>12; -+ - if (bfqq) { - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: serv %lu, w %d", - service, entity->weight); - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: start %llu, finish %llu, delta %llu", -- entity->start, entity->finish, -- bfq_delta(service, entity->weight)); -+ start, finish, delta); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ } else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "calc_finish group: serv %lu, w %d", -+ service, entity->weight); -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "calc_finish group: start %llu, finish %llu, delta %llu", -+ start, finish, delta); -+#endif - } - } - -@@ -293,10 +449,26 @@ static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node) - static void bfq_update_active_node(struct rb_node *node) - { - struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - entity->min_start = entity->start; - bfq_update_min(entity, node->rb_right); - bfq_update_min(entity, node->rb_left); -+ -+ if (bfqq) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "update_active_node: new min_start %llu", -+ ((entity->min_start>>10)*1000)>>12); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ } else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "update_active_node: new min_start %llu", -+ ((entity->min_start>>10)*1000)>>12); -+#endif -+ } - } - - /** -@@ -386,8 +558,6 @@ static void bfq_active_insert(struct bfq_service_tree *st, - BUG_ON(!bfqg); - BUG_ON(!bfqd); - bfqg->active_entities++; -- if (bfqg->active_entities == 2) -- bfqd->active_numerous_groups++; - } - #endif - } -@@ -399,7 +569,7 @@ static void bfq_active_insert(struct bfq_service_tree *st, - static unsigned short bfq_ioprio_to_weight(int ioprio) - { - BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); -- return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio; -+ return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF; - } - - /** -@@ -422,9 +592,9 @@ static void bfq_get_entity(struct bfq_entity *entity) - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - if (bfqq) { -- atomic_inc(&bfqq->ref); -+ bfqq->ref++; - bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", -- bfqq, atomic_read(&bfqq->ref)); -+ bfqq, bfqq->ref); - } - } - -@@ -499,10 +669,6 @@ static void bfq_active_extract(struct bfq_service_tree *st, - BUG_ON(!bfqd); - BUG_ON(!bfqg->active_entities); - bfqg->active_entities--; -- if (bfqg->active_entities == 1) { -- BUG_ON(!bfqd->active_numerous_groups); -- bfqd->active_numerous_groups--; -- } - } - #endif - } -@@ -531,28 +697,32 @@ static void bfq_idle_insert(struct bfq_service_tree *st, - } - - /** -- * bfq_forget_entity - remove an entity from the wfq trees. -+ * bfq_forget_entity - do not consider entity any longer for scheduling - * @st: the service tree. - * @entity: the entity being removed. -+ * @is_in_service: true if entity is currently the in-service entity. - * -- * Update the device status and forget everything about @entity, putting -- * the device reference to it, if it is a queue. Entities belonging to -- * groups are not refcounted. -+ * Forget everything about @entity. In addition, if entity represents -+ * a queue, and the latter is not in service, then release the service -+ * reference to the queue (the one taken through bfq_get_entity). In -+ * fact, in this case, there is really no more service reference to -+ * the queue, as the latter is also outside any service tree. If, -+ * instead, the queue is in service, then __bfq_bfqd_reset_in_service -+ * will take care of putting the reference when the queue finally -+ * stops being served. - */ - static void bfq_forget_entity(struct bfq_service_tree *st, -- struct bfq_entity *entity) -+ struct bfq_entity *entity, -+ bool is_in_service) - { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -- struct bfq_sched_data *sd; -- - BUG_ON(!entity->on_st); - -- entity->on_st = 0; -+ entity->on_st = false; - st->wsum -= entity->weight; -- if (bfqq) { -- sd = entity->sched_data; -- bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", -- bfqq, atomic_read(&bfqq->ref)); -+ if (bfqq && !is_in_service) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity (before): %p %d", -+ bfqq, bfqq->ref); - bfq_put_queue(bfqq); - } - } -@@ -566,7 +736,8 @@ static void bfq_put_idle_entity(struct bfq_service_tree *st, - struct bfq_entity *entity) - { - bfq_idle_extract(st, entity); -- bfq_forget_entity(st, entity); -+ bfq_forget_entity(st, entity, -+ entity == entity->sched_data->in_service_entity); - } - - /** -@@ -602,7 +773,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - - if (entity->prio_changed) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -- unsigned short prev_weight, new_weight; -+ unsigned int prev_weight, new_weight; - struct bfq_data *bfqd = NULL; - struct rb_root *root; - #ifdef CONFIG_BFQ_GROUP_IOSCHED -@@ -630,7 +801,10 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - entity->new_weight > BFQ_MAX_WEIGHT) { - pr_crit("update_weight_prio: new_weight %d\n", - entity->new_weight); -- BUG(); -+ if (entity->new_weight < BFQ_MIN_WEIGHT) -+ entity->new_weight = BFQ_MIN_WEIGHT; -+ else -+ entity->new_weight = BFQ_MAX_WEIGHT; - } - entity->orig_weight = entity->new_weight; - if (bfqq) -@@ -661,6 +835,13 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - * associated with its new weight. - */ - if (prev_weight != new_weight) { -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "weight changed %d %d(%d %d)", -+ prev_weight, new_weight, -+ entity->orig_weight, -+ bfqq->wr_coeff); -+ - root = bfqq ? &bfqd->queue_weights_tree : - &bfqd->group_weights_tree; - bfq_weights_tree_remove(bfqd, entity, root); -@@ -707,7 +888,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) - st = bfq_entity_service_tree(entity); - - entity->service += served; -- BUG_ON(entity->service > entity->budget); -+ - BUG_ON(st->wsum == 0); - - st->vtime += bfq_delta(served, st->wsum); -@@ -716,234 +897,589 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) - #ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_set_start_empty_time(bfqq_group(bfqq)); - #endif -- bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served); -+ st = bfq_entity_service_tree(&bfqq->entity); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs, vtime %llu on %p", -+ served, ((st->vtime>>10)*1000)>>12, st); - } - - /** -- * bfq_bfqq_charge_full_budget - set the service to the entity budget. -+ * bfq_bfqq_charge_time - charge an amount of service equivalent to the length -+ * of the time interval during which bfqq has been in -+ * service. -+ * @bfqd: the device - * @bfqq: the queue that needs a service update. -+ * @time_ms: the amount of time during which the queue has received service - * -- * When it's not possible to be fair in the service domain, because -- * a queue is not consuming its budget fast enough (the meaning of -- * fast depends on the timeout parameter), we charge it a full -- * budget. In this way we should obtain a sort of time-domain -- * fairness among all the seeky/slow queues. -+ * If a queue does not consume its budget fast enough, then providing -+ * the queue with service fairness may impair throughput, more or less -+ * severely. For this reason, queues that consume their budget slowly -+ * are provided with time fairness instead of service fairness. This -+ * goal is achieved through the BFQ scheduling engine, even if such an -+ * engine works in the service, and not in the time domain. The trick -+ * is charging these queues with an inflated amount of service, equal -+ * to the amount of service that they would have received during their -+ * service slot if they had been fast, i.e., if their requests had -+ * been dispatched at a rate equal to the estimated peak rate. -+ * -+ * It is worth noting that time fairness can cause important -+ * distortions in terms of bandwidth distribution, on devices with -+ * internal queueing. The reason is that I/O requests dispatched -+ * during the service slot of a queue may be served after that service -+ * slot is finished, and may have a total processing time loosely -+ * correlated with the duration of the service slot. This is -+ * especially true for short service slots. - */ --static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -+static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ unsigned long time_ms) - { - struct bfq_entity *entity = &bfqq->entity; -+ int tot_serv_to_charge = entity->service; -+ unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout); -+ -+ if (time_ms > 0 && time_ms < timeout_ms) -+ tot_serv_to_charge = -+ (bfqd->bfq_max_budget * time_ms) / timeout_ms; -+ -+ if (tot_serv_to_charge < entity->service) -+ tot_serv_to_charge = entity->service; - -- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "charge_time: %lu/%u ms, %d/%d/%d sectors", -+ time_ms, timeout_ms, entity->service, -+ tot_serv_to_charge, entity->budget); - -- bfq_bfqq_served(bfqq, entity->budget - entity->service); -+ /* Increase budget to avoid inconsistencies */ -+ if (tot_serv_to_charge > entity->budget) -+ entity->budget = tot_serv_to_charge; -+ -+ bfq_bfqq_served(bfqq, -+ max_t(int, 0, tot_serv_to_charge - entity->service)); -+} -+ -+static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, -+ struct bfq_service_tree *st, -+ bool backshifted) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_sched_data *sd = entity->sched_data; -+ -+ st = __bfq_entity_update_weight_prio(st, entity); -+ bfq_calc_finish(entity, entity->budget); -+ -+ /* -+ * If some queues enjoy backshifting for a while, then their -+ * (virtual) finish timestamps may happen to become lower and -+ * lower than the system virtual time. In particular, if -+ * these queues often happen to be idle for short time -+ * periods, and during such time periods other queues with -+ * higher timestamps happen to be busy, then the backshifted -+ * timestamps of the former queues can become much lower than -+ * the system virtual time. In fact, to serve the queues with -+ * higher timestamps while the ones with lower timestamps are -+ * idle, the system virtual time may be pushed-up to much -+ * higher values than the finish timestamps of the idle -+ * queues. As a consequence, the finish timestamps of all new -+ * or newly activated queues may end up being much larger than -+ * those of lucky queues with backshifted timestamps. The -+ * latter queues may then monopolize the device for a lot of -+ * time. This would simply break service guarantees. -+ * -+ * To reduce this problem, push up a little bit the -+ * backshifted timestamps of the queue associated with this -+ * entity (only a queue can happen to have the backshifted -+ * flag set): just enough to let the finish timestamp of the -+ * queue be equal to the current value of the system virtual -+ * time. This may introduce a little unfairness among queues -+ * with backshifted timestamps, but it does not break -+ * worst-case fairness guarantees. -+ * -+ * As a special case, if bfqq is weight-raised, push up -+ * timestamps much less, to keep very low the probability that -+ * this push up causes the backshifted finish timestamps of -+ * weight-raised queues to become higher than the backshifted -+ * finish timestamps of non weight-raised queues. -+ */ -+ if (backshifted && bfq_gt(st->vtime, entity->finish)) { -+ unsigned long delta = st->vtime - entity->finish; -+ -+ if (bfqq) -+ delta /= bfqq->wr_coeff; -+ -+ entity->start += delta; -+ entity->finish += delta; -+ -+ if (bfqq) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "__activate_entity: new queue finish %llu", -+ ((entity->finish>>10)*1000)>>12); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ } else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "__activate_entity: new group finish %llu", -+ ((entity->finish>>10)*1000)>>12); -+#endif -+ } -+ } -+ -+ bfq_active_insert(st, entity); -+ -+ if (bfqq) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "__activate_entity: queue %seligible in st %p", -+ entity->start <= st->vtime ? "" : "non ", st); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ } else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "__activate_entity: group %seligible in st %p", -+ entity->start <= st->vtime ? "" : "non ", st); -+#endif -+ } -+ BUG_ON(RB_EMPTY_ROOT(&st->active)); -+ BUG_ON(&st->active != &sd->service_tree->active && -+ &st->active != &(sd->service_tree+1)->active && -+ &st->active != &(sd->service_tree+2)->active); - } - - /** -- * __bfq_activate_entity - activate an entity. -+ * __bfq_activate_entity - handle activation of entity. - * @entity: the entity being activated. -+ * @non_blocking_wait_rq: true if entity was waiting for a request -+ * -+ * Called for a 'true' activation, i.e., if entity is not active and -+ * one of its children receives a new request. - * -- * Called whenever an entity is activated, i.e., it is not active and one -- * of its children receives a new request, or has to be reactivated due to -- * budget exhaustion. It uses the current budget of the entity (and the -- * service received if @entity is active) of the queue to calculate its -- * timestamps. -+ * Basically, this function updates the timestamps of entity and -+ * inserts entity into its active tree, ater possible extracting it -+ * from its idle tree. - */ --static void __bfq_activate_entity(struct bfq_entity *entity) -+static void __bfq_activate_entity(struct bfq_entity *entity, -+ bool non_blocking_wait_rq) - { - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ bool backshifted = false; -+ unsigned long long min_vstart; - -- if (entity == sd->in_service_entity) { -- BUG_ON(entity->tree); -- /* -- * If we are requeueing the current entity we have -- * to take care of not charging to it service it has -- * not received. -- */ -- bfq_calc_finish(entity, entity->service); -- entity->start = entity->finish; -- sd->in_service_entity = NULL; -- } else if (entity->tree == &st->active) { -- /* -- * Requeueing an entity due to a change of some -- * next_in_service entity below it. We reuse the -- * old start time. -- */ -- bfq_active_extract(st, entity); -- } else if (entity->tree == &st->idle) { -+ BUG_ON(!sd); -+ BUG_ON(!st); -+ -+ /* See comments on bfq_fqq_update_budg_for_activation */ -+ if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) { -+ backshifted = true; -+ min_vstart = entity->finish; -+ } else -+ min_vstart = st->vtime; -+ -+ if (entity->tree == &st->idle) { - /* - * Must be on the idle tree, bfq_idle_extract() will - * check for that. - */ - bfq_idle_extract(st, entity); -- entity->start = bfq_gt(st->vtime, entity->finish) ? -- st->vtime : entity->finish; -+ entity->start = bfq_gt(min_vstart, entity->finish) ? -+ min_vstart : entity->finish; - } else { - /* - * The finish time of the entity may be invalid, and - * it is in the past for sure, otherwise the queue - * would have been on the idle tree. - */ -- entity->start = st->vtime; -+ entity->start = min_vstart; - st->wsum += entity->weight; -+ /* -+ * entity is about to be inserted into a service tree, -+ * and then set in service: get a reference to make -+ * sure entity does not disappear until it is no -+ * longer in service or scheduled for service. -+ */ - bfq_get_entity(entity); - -- BUG_ON(entity->on_st); -- entity->on_st = 1; -+ BUG_ON(entity->on_st && bfqq); -+ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ if (entity->on_st && !bfqq) { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, -+ entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, -+ bfqg, -+ "activate bug, class %d in_service %p", -+ bfq_class_idx(entity), sd->in_service_entity); -+ } -+#endif -+ BUG_ON(entity->on_st && !bfqq); -+ entity->on_st = true; - } - -- st = __bfq_entity_update_weight_prio(st, entity); -- bfq_calc_finish(entity, entity->budget); -- bfq_active_insert(st, entity); -+ bfq_update_fin_time_enqueue(entity, st, backshifted); - } - - /** -- * bfq_activate_entity - activate an entity and its ancestors if necessary. -- * @entity: the entity to activate. -+ * __bfq_requeue_entity - handle requeueing or repositioning of an entity. -+ * @entity: the entity being requeued or repositioned. -+ * -+ * Requeueing is needed if this entity stops being served, which -+ * happens if a leaf descendant entity has expired. On the other hand, -+ * repositioning is needed if the next_inservice_entity for the child -+ * entity has changed. See the comments inside the function for -+ * details. - * -- * Activate @entity and all the entities on the path from it to the root. -+ * Basically, this function: 1) removes entity from its active tree if -+ * present there, 2) updates the timestamps of entity and 3) inserts -+ * entity back into its active tree (in the new, right position for -+ * the new values of the timestamps). -+ */ -+static void __bfq_requeue_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ BUG_ON(!sd); -+ BUG_ON(!st); -+ -+ BUG_ON(entity != sd->in_service_entity && -+ entity->tree != &st->active); -+ -+ if (entity == sd->in_service_entity) { -+ /* -+ * We are requeueing the current in-service entity, -+ * which may have to be done for one of the following -+ * reasons: -+ * - entity represents the in-service queue, and the -+ * in-service queue is being requeued after an -+ * expiration; -+ * - entity represents a group, and its budget has -+ * changed because one of its child entities has -+ * just been either activated or requeued for some -+ * reason; the timestamps of the entity need then to -+ * be updated, and the entity needs to be enqueued -+ * or repositioned accordingly. -+ * -+ * In particular, before requeueing, the start time of -+ * the entity must be moved forward to account for the -+ * service that the entity has received while in -+ * service. This is done by the next instructions. The -+ * finish time will then be updated according to this -+ * new value of the start time, and to the budget of -+ * the entity. -+ */ -+ bfq_calc_finish(entity, entity->service); -+ entity->start = entity->finish; -+ BUG_ON(entity->tree && entity->tree != &st->active); -+ /* -+ * In addition, if the entity had more than one child -+ * when set in service, then was not extracted from -+ * the active tree. This implies that the position of -+ * the entity in the active tree may need to be -+ * changed now, because we have just updated the start -+ * time of the entity, and we will update its finish -+ * time in a moment (the requeueing is then, more -+ * precisely, a repositioning in this case). To -+ * implement this repositioning, we: 1) dequeue the -+ * entity here, 2) update the finish time and -+ * requeue the entity according to the new -+ * timestamps below. -+ */ -+ if (entity->tree) -+ bfq_active_extract(st, entity); -+ } else { /* The entity is already active, and not in service */ -+ /* -+ * In this case, this function gets called only if the -+ * next_in_service entity below this entity has -+ * changed, and this change has caused the budget of -+ * this entity to change, which, finally implies that -+ * the finish time of this entity must be -+ * updated. Such an update may cause the scheduling, -+ * i.e., the position in the active tree, of this -+ * entity to change. We handle this change by: 1) -+ * dequeueing the entity here, 2) updating the finish -+ * time and requeueing the entity according to the new -+ * timestamps below. This is the same approach as the -+ * non-extracted-entity sub-case above. -+ */ -+ bfq_active_extract(st, entity); -+ } -+ -+ bfq_update_fin_time_enqueue(entity, st, false); -+} -+ -+static void __bfq_activate_requeue_entity(struct bfq_entity *entity, -+ struct bfq_sched_data *sd, -+ bool non_blocking_wait_rq) -+{ -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ if (sd->in_service_entity == entity || entity->tree == &st->active) -+ /* -+ * in service or already queued on the active tree, -+ * requeue or reposition -+ */ -+ __bfq_requeue_entity(entity); -+ else -+ /* -+ * Not in service and not queued on its active tree: -+ * the activity is idle and this is a true activation. -+ */ -+ __bfq_activate_entity(entity, non_blocking_wait_rq); -+} -+ -+ -+/** -+ * bfq_activate_entity - activate or requeue an entity representing a bfq_queue, -+ * and activate, requeue or reposition all ancestors -+ * for which such an update becomes necessary. -+ * @entity: the entity to activate. -+ * @non_blocking_wait_rq: true if this entity was waiting for a request -+ * @requeue: true if this is a requeue, which implies that bfqq is -+ * being expired; thus ALL its ancestors stop being served and must -+ * therefore be requeued - */ --static void bfq_activate_entity(struct bfq_entity *entity) -+static void bfq_activate_requeue_entity(struct bfq_entity *entity, -+ bool non_blocking_wait_rq, -+ bool requeue) - { - struct bfq_sched_data *sd; - - for_each_entity(entity) { -- __bfq_activate_entity(entity); -- -+ BUG_ON(!entity); - sd = entity->sched_data; -- if (!bfq_update_next_in_service(sd)) -- /* -- * No need to propagate the activation to the -- * upper entities, as they will be updated when -- * the in-service entity is rescheduled. -- */ -+ __bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq); -+ -+ BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) && -+ RB_EMPTY_ROOT(&(sd->service_tree+1)->active) && -+ RB_EMPTY_ROOT(&(sd->service_tree+2)->active)); -+ -+ if (!bfq_update_next_in_service(sd, entity) && !requeue) { -+ BUG_ON(!sd->next_in_service); - break; -+ } -+ BUG_ON(!sd->next_in_service); - } - } - - /** - * __bfq_deactivate_entity - deactivate an entity from its service tree. - * @entity: the entity to deactivate. -- * @requeue: if false, the entity will not be put into the idle tree. -+ * @ins_into_idle_tree: if false, the entity will not be put into the -+ * idle tree. - * -- * Deactivate an entity, independently from its previous state. If the -- * entity was not on a service tree just return, otherwise if it is on -- * any scheduler tree, extract it from that tree, and if necessary -- * and if the caller did not specify @requeue, put it on the idle tree. -- * -- * Return %1 if the caller should update the entity hierarchy, i.e., -- * if the entity was in service or if it was the next_in_service for -- * its sched_data; return %0 otherwise. -+ * Deactivates an entity, independently from its previous state. Must -+ * be invoked only if entity is on a service tree. Extracts the entity -+ * from that tree, and if necessary and allowed, puts it on the idle -+ * tree. - */ --static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+static bool __bfq_deactivate_entity(struct bfq_entity *entity, -+ bool ins_into_idle_tree) - { - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st; -- int was_in_service; -- int ret = 0; -+ bool is_in_service; - -- if (sd == NULL || !entity->on_st) /* never activated, or inactive */ -- return 0; -+ if (!entity->on_st) { /* entity never activated, or already inactive */ -+ BUG_ON(sd && entity == sd->in_service_entity); -+ return false; -+ } - -+ /* -+ * If we get here, then entity is active, which implies that -+ * bfq_group_set_parent has already been invoked for the group -+ * represented by entity. Therefore, the field -+ * entity->sched_data has been set, and we can safely use it. -+ */ - st = bfq_entity_service_tree(entity); -- was_in_service = entity == sd->in_service_entity; -+ is_in_service = entity == sd->in_service_entity; - -- BUG_ON(was_in_service && entity->tree); -+ BUG_ON(is_in_service && entity->tree && entity->tree != &st->active); - -- if (was_in_service) { -+ if (is_in_service) - bfq_calc_finish(entity, entity->service); -- sd->in_service_entity = NULL; -- } else if (entity->tree == &st->active) -+ -+ if (entity->tree == &st->active) - bfq_active_extract(st, entity); -- else if (entity->tree == &st->idle) -+ else if (!is_in_service && entity->tree == &st->idle) - bfq_idle_extract(st, entity); - else if (entity->tree) - BUG(); - -- if (was_in_service || sd->next_in_service == entity) -- ret = bfq_update_next_in_service(sd); -- -- if (!requeue || !bfq_gt(entity->finish, st->vtime)) -- bfq_forget_entity(st, entity); -+ if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime)) -+ bfq_forget_entity(st, entity, is_in_service); - else - bfq_idle_insert(st, entity); - -- BUG_ON(sd->in_service_entity == entity); -- BUG_ON(sd->next_in_service == entity); -- -- return ret; -+ return true; - } - - /** -- * bfq_deactivate_entity - deactivate an entity. -+ * bfq_deactivate_entity - deactivate an entity representing a bfq_queue. - * @entity: the entity to deactivate. -- * @requeue: true if the entity can be put on the idle tree -+ * @ins_into_idle_tree: true if the entity can be put on the idle tree - */ --static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+static void bfq_deactivate_entity(struct bfq_entity *entity, -+ bool ins_into_idle_tree, -+ bool expiration) - { - struct bfq_sched_data *sd; -- struct bfq_entity *parent; -+ struct bfq_entity *parent = NULL; - - for_each_entity_safe(entity, parent) { - sd = entity->sched_data; - -- if (!__bfq_deactivate_entity(entity, requeue)) -+ BUG_ON(sd == NULL); /* -+ * It would mean that this is the -+ * root group. -+ */ -+ -+ BUG_ON(expiration && entity != sd->in_service_entity); -+ -+ BUG_ON(entity != sd->in_service_entity && -+ entity->tree == -+ &bfq_entity_service_tree(entity)->active && -+ !sd->next_in_service); -+ -+ if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) { - /* -- * The parent entity is still backlogged, and -- * we don't need to update it as it is still -- * in service. -+ * entity is not in any tree any more, so -+ * this deactivation is a no-op, and there is -+ * nothing to change for upper-level entities -+ * (in case of expiration, this can never -+ * happen). - */ -- break; -+ BUG_ON(expiration); /* -+ * entity cannot be already out of -+ * any tree -+ */ -+ return; -+ } -+ -+ if (sd->next_in_service == entity) -+ /* -+ * entity was the next_in_service entity, -+ * then, since entity has just been -+ * deactivated, a new one must be found. -+ */ -+ bfq_update_next_in_service(sd, NULL); - -- if (sd->next_in_service) -+ if (sd->next_in_service) { - /* -- * The parent entity is still backlogged and -- * the budgets on the path towards the root -- * need to be updated. -+ * The parent entity is still backlogged, -+ * because next_in_service is not NULL. So, no -+ * further upwards deactivation must be -+ * performed. Yet, next_in_service has -+ * changed. Then the schedule does need to be -+ * updated upwards. - */ -- goto update; -+ BUG_ON(sd->next_in_service == entity); -+ break; -+ } - - /* -- * If we reach there the parent is no more backlogged and -- * we want to propagate the dequeue upwards. -+ * If we get here, then the parent is no more -+ * backlogged and we need to propagate the -+ * deactivation upwards. Thus let the loop go on. - */ -- requeue = 1; -- } - -- return; -+ /* -+ * Also let parent be queued into the idle tree on -+ * deactivation, to preserve service guarantees, and -+ * assuming that who invoked this function does not -+ * need parent entities too to be removed completely. -+ */ -+ ins_into_idle_tree = true; -+ } - --update: -+ /* -+ * If the deactivation loop is fully executed, then there are -+ * no more entities to touch and next loop is not executed at -+ * all. Otherwise, requeue remaining entities if they are -+ * about to stop receiving service, or reposition them if this -+ * is not the case. -+ */ - entity = parent; - for_each_entity(entity) { -- __bfq_activate_entity(entity); -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ /* -+ * Invoke __bfq_requeue_entity on entity, even if -+ * already active, to requeue/reposition it in the -+ * active tree (because sd->next_in_service has -+ * changed) -+ */ -+ __bfq_requeue_entity(entity); - - sd = entity->sched_data; -- if (!bfq_update_next_in_service(sd)) -+ BUG_ON(expiration && sd->in_service_entity != entity); -+ -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "invoking udpdate_next for this queue"); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(entity, -+ struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "invoking udpdate_next for this entity"); -+ } -+#endif -+ if (!bfq_update_next_in_service(sd, entity) && -+ !expiration) -+ /* -+ * next_in_service unchanged or not causing -+ * any change in entity->parent->sd, and no -+ * requeueing needed for expiration: stop -+ * here. -+ */ - break; - } - } - - /** -- * bfq_update_vtime - update vtime if necessary. -+ * bfq_calc_vtime_jump - compute the value to which the vtime should jump, -+ * if needed, to have at least one entity eligible. - * @st: the service tree to act upon. - * -- * If necessary update the service tree vtime to have at least one -- * eligible entity, skipping to its start time. Assumes that the -- * active tree of the device is not empty. -- * -- * NOTE: this hierarchical implementation updates vtimes quite often, -- * we may end up with reactivated processes getting timestamps after a -- * vtime skip done because we needed a ->first_active entity on some -- * intermediate node. -+ * Assumes that st is not empty. - */ --static void bfq_update_vtime(struct bfq_service_tree *st) -+static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) - { -- struct bfq_entity *entry; -- struct rb_node *node = st->active.rb_node; -+ struct bfq_entity *root_entity = bfq_root_active_entity(&st->active); -+ -+ if (bfq_gt(root_entity->min_start, st->vtime)) { -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity); - -- entry = rb_entry(node, struct bfq_entity, rb_node); -- if (bfq_gt(entry->min_start, st->vtime)) { -- st->vtime = entry->min_start; -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_vtime_jump: new value %llu", -+ root_entity->min_start); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(root_entity, struct bfq_group, -+ entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "calc_vtime_jump: new value %llu", -+ root_entity->min_start); -+ } -+#endif -+ return root_entity->min_start; -+ } -+ return st->vtime; -+} -+ -+static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value) -+{ -+ if (new_value > st->vtime) { -+ st->vtime = new_value; - bfq_forget_idle(st); - } - } -@@ -952,6 +1488,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st) - * bfq_first_active_entity - find the eligible entity with - * the smallest finish time - * @st: the service tree to select from. -+ * @vtime: the system virtual to use as a reference for eligibility - * - * This function searches the first schedulable entity, starting from the - * root of the tree and going on the left every time on this side there is -@@ -959,7 +1496,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st) - * the right is followed only if a) the left subtree contains no eligible - * entities and b) no eligible entity has been found yet. - */ --static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, -+ u64 vtime) - { - struct bfq_entity *entry, *first = NULL; - struct rb_node *node = st->active.rb_node; -@@ -967,15 +1505,15 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) - while (node) { - entry = rb_entry(node, struct bfq_entity, rb_node); - left: -- if (!bfq_gt(entry->start, st->vtime)) -+ if (!bfq_gt(entry->start, vtime)) - first = entry; - -- BUG_ON(bfq_gt(entry->min_start, st->vtime)); -+ BUG_ON(bfq_gt(entry->min_start, vtime)); - - if (node->rb_left) { - entry = rb_entry(node->rb_left, - struct bfq_entity, rb_node); -- if (!bfq_gt(entry->min_start, st->vtime)) { -+ if (!bfq_gt(entry->min_start, vtime)) { - node = node->rb_left; - goto left; - } -@@ -993,31 +1531,84 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) - * __bfq_lookup_next_entity - return the first eligible entity in @st. - * @st: the service tree. - * -- * Update the virtual time in @st and return the first eligible entity -- * it contains. -+ * If there is no in-service entity for the sched_data st belongs to, -+ * then return the entity that will be set in service if: -+ * 1) the parent entity this st belongs to is set in service; -+ * 2) no entity belonging to such parent entity undergoes a state change -+ * that would influence the timestamps of the entity (e.g., becomes idle, -+ * becomes backlogged, changes its budget, ...). -+ * -+ * In this first case, update the virtual time in @st too (see the -+ * comments on this update inside the function). -+ * -+ * In constrast, if there is an in-service entity, then return the -+ * entity that would be set in service if not only the above -+ * conditions, but also the next one held true: the currently -+ * in-service entity, on expiration, -+ * 1) gets a finish time equal to the current one, or -+ * 2) is not eligible any more, or -+ * 3) is idle. - */ --static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, -- bool force) -+static struct bfq_entity * -+__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service -+#if 0 -+ , bool force -+#endif -+ ) - { -- struct bfq_entity *entity, *new_next_in_service = NULL; -+ struct bfq_entity *entity -+#if 0 -+ , *new_next_in_service = NULL -+#endif -+ ; -+ u64 new_vtime; -+ struct bfq_queue *bfqq; - - if (RB_EMPTY_ROOT(&st->active)) - return NULL; - -- bfq_update_vtime(st); -- entity = bfq_first_active_entity(st); -- BUG_ON(bfq_gt(entity->start, st->vtime)); -+ /* -+ * Get the value of the system virtual time for which at -+ * least one entity is eligible. -+ */ -+ new_vtime = bfq_calc_vtime_jump(st); - - /* -- * If the chosen entity does not match with the sched_data's -- * next_in_service and we are forcedly serving the IDLE priority -- * class tree, bubble up budget update. -+ * If there is no in-service entity for the sched_data this -+ * active tree belongs to, then push the system virtual time -+ * up to the value that guarantees that at least one entity is -+ * eligible. If, instead, there is an in-service entity, then -+ * do not make any such update, because there is already an -+ * eligible entity, namely the in-service one (even if the -+ * entity is not on st, because it was extracted when set in -+ * service). - */ -- if (unlikely(force && entity != entity->sched_data->next_in_service)) { -- new_next_in_service = entity; -- for_each_entity(new_next_in_service) -- bfq_update_budget(new_next_in_service); -+ if (!in_service) -+ bfq_update_vtime(st, new_vtime); -+ -+ entity = bfq_first_active_entity(st, new_vtime); -+ BUG_ON(bfq_gt(entity->start, new_vtime)); -+ -+ /* Log some information */ -+ bfqq = bfq_entity_to_bfqq(entity); -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "__lookup_next: start %llu vtime %llu st %p", -+ ((entity->start>>10)*1000)>>12, -+ ((new_vtime>>10)*1000)>>12, st); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "__lookup_next: start %llu vtime %llu st %p", -+ ((entity->start>>10)*1000)>>12, -+ ((new_vtime>>10)*1000)>>12, st); - } -+#endif -+ -+ BUG_ON(!entity); - - return entity; - } -@@ -1025,50 +1616,81 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, - /** - * bfq_lookup_next_entity - return the first eligible entity in @sd. - * @sd: the sched_data. -- * @extract: if true the returned entity will be also extracted from @sd. - * -- * NOTE: since we cache the next_in_service entity at each level of the -- * hierarchy, the complexity of the lookup can be decreased with -- * absolutely no effort just returning the cached next_in_service value; -- * we prefer to do full lookups to test the consistency of * the data -- * structures. -+ * This function is invoked when there has been a change in the trees -+ * for sd, and we need know what is the new next entity after this -+ * change. - */ --static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -- int extract, -- struct bfq_data *bfqd) -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd) - { - struct bfq_service_tree *st = sd->service_tree; -- struct bfq_entity *entity; -- int i = 0; -- -- BUG_ON(sd->in_service_entity); -+ struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1); -+ struct bfq_entity *entity = NULL; -+ struct bfq_queue *bfqq; -+ int class_idx = 0; - -- if (bfqd && -- jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { -- entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, -- true); -- if (entity) { -- i = BFQ_IOPRIO_CLASSES - 1; -- bfqd->bfq_class_idle_last_service = jiffies; -- sd->next_in_service = entity; -- } -+ BUG_ON(!sd); -+ BUG_ON(!st); -+ /* -+ * Choose from idle class, if needed to guarantee a minimum -+ * bandwidth to this class (and if there is some active entity -+ * in idle class). This should also mitigate -+ * priority-inversion problems in case a low priority task is -+ * holding file system resources. -+ */ -+ if (time_is_before_jiffies(sd->bfq_class_idle_last_service + -+ BFQ_CL_IDLE_TIMEOUT)) { -+ if (!RB_EMPTY_ROOT(&idle_class_st->active)) -+ class_idx = BFQ_IOPRIO_CLASSES - 1; -+ /* About to be served if backlogged, or not yet backlogged */ -+ sd->bfq_class_idle_last_service = jiffies; - } -- for (; i < BFQ_IOPRIO_CLASSES; i++) { -- entity = __bfq_lookup_next_entity(st + i, false); -- if (entity) { -- if (extract) { -- bfq_check_next_in_service(sd, entity); -- bfq_active_extract(st + i, entity); -- sd->in_service_entity = entity; -- sd->next_in_service = NULL; -- } -+ -+ /* -+ * Find the next entity to serve for the highest-priority -+ * class, unless the idle class needs to be served. -+ */ -+ for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) { -+ entity = __bfq_lookup_next_entity(st + class_idx, -+ sd->in_service_entity); -+ -+ if (entity) - break; -- } - } - -+ BUG_ON(!entity && -+ (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) || -+ !RB_EMPTY_ROOT(&(st+2)->active))); -+ -+ if (!entity) -+ return NULL; -+ -+ /* Log some information */ -+ bfqq = bfq_entity_to_bfqq(entity); -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d", -+ st + class_idx, class_idx); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "chosen from st %p %d", -+ st + class_idx, class_idx); -+ } -+#endif -+ - return entity; - } - -+static bool next_queue_may_preempt(struct bfq_data *bfqd) -+{ -+ struct bfq_sched_data *sd = &bfqd->root_group->sched_data; -+ -+ return sd->next_in_service != sd->in_service_entity; -+} -+ - /* - * Get next queue for service. - */ -@@ -1083,58 +1705,218 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) - if (bfqd->busy_queues == 0) - return NULL; - -+ /* -+ * Traverse the path from the root to the leaf entity to -+ * serve. Set in service all the entities visited along the -+ * way. -+ */ - sd = &bfqd->root_group->sched_data; - for (; sd ; sd = entity->my_sched_data) { -- entity = bfq_lookup_next_entity(sd, 1, bfqd); -- BUG_ON(!entity); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ if (entity) { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg(bfqd, bfqg, -+ "get_next_queue: lookup in this group"); -+ if (!sd->next_in_service) -+ pr_crit("get_next_queue: lookup in this group"); -+ } else { -+ bfq_log_bfqg(bfqd, bfqd->root_group, -+ "get_next_queue: lookup in root group"); -+ if (!sd->next_in_service) -+ pr_crit("get_next_queue: lookup in root group"); -+ } -+#endif -+ -+ BUG_ON(!sd->next_in_service); -+ -+ /* -+ * WARNING. We are about to set the in-service entity -+ * to sd->next_in_service, i.e., to the (cached) value -+ * returned by bfq_lookup_next_entity(sd) the last -+ * time it was invoked, i.e., the last time when the -+ * service order in sd changed as a consequence of the -+ * activation or deactivation of an entity. In this -+ * respect, if we execute bfq_lookup_next_entity(sd) -+ * in this very moment, it may, although with low -+ * probability, yield a different entity than that -+ * pointed to by sd->next_in_service. This rare event -+ * happens in case there was no CLASS_IDLE entity to -+ * serve for sd when bfq_lookup_next_entity(sd) was -+ * invoked for the last time, while there is now one -+ * such entity. -+ * -+ * If the above event happens, then the scheduling of -+ * such entity in CLASS_IDLE is postponed until the -+ * service of the sd->next_in_service entity -+ * finishes. In fact, when the latter is expired, -+ * bfq_lookup_next_entity(sd) gets called again, -+ * exactly to update sd->next_in_service. -+ */ -+ -+ /* Make next_in_service entity become in_service_entity */ -+ entity = sd->next_in_service; -+ sd->in_service_entity = entity; -+ -+ /* -+ * Reset the accumulator of the amount of service that -+ * the entity is about to receive. -+ */ - entity->service = 0; -+ -+ /* -+ * If entity is no longer a candidate for next -+ * service, then we extract it from its active tree, -+ * for the following reason. To further boost the -+ * throughput in some special case, BFQ needs to know -+ * which is the next candidate entity to serve, while -+ * there is already an entity in service. In this -+ * respect, to make it easy to compute/update the next -+ * candidate entity to serve after the current -+ * candidate has been set in service, there is a case -+ * where it is necessary to extract the current -+ * candidate from its service tree. Such a case is -+ * when the entity just set in service cannot be also -+ * a candidate for next service. Details about when -+ * this conditions holds are reported in the comments -+ * on the function bfq_no_longer_next_in_service() -+ * invoked below. -+ */ -+ if (bfq_no_longer_next_in_service(entity)) -+ bfq_active_extract(bfq_entity_service_tree(entity), -+ entity); -+ -+ /* -+ * For the same reason why we may have just extracted -+ * entity from its active tree, we may need to update -+ * next_in_service for the sched_data of entity too, -+ * regardless of whether entity has been extracted. -+ * In fact, even if entity has not been extracted, a -+ * descendant entity may get extracted. Such an event -+ * would cause a change in next_in_service for the -+ * level of the descendant entity, and thus possibly -+ * back to upper levels. -+ * -+ * We cannot perform the resulting needed update -+ * before the end of this loop, because, to know which -+ * is the correct next-to-serve candidate entity for -+ * each level, we need first to find the leaf entity -+ * to set in service. In fact, only after we know -+ * which is the next-to-serve leaf entity, we can -+ * discover whether the parent entity of the leaf -+ * entity becomes the next-to-serve, and so on. -+ */ -+ -+ /* Log some information */ -+ bfqq = bfq_entity_to_bfqq(entity); -+ if (bfqq) -+ bfq_log_bfqq(bfqd, bfqq, -+ "get_next_queue: this queue, finish %llu", -+ (((entity->finish>>10)*1000)>>10)>>2); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg(bfqd, bfqg, -+ "get_next_queue: this entity, finish %llu", -+ (((entity->finish>>10)*1000)>>10)>>2); -+ } -+#endif -+ - } - -+ BUG_ON(!entity); - bfqq = bfq_entity_to_bfqq(entity); - BUG_ON(!bfqq); - -+ /* -+ * We can finally update all next-to-serve entities along the -+ * path from the leaf entity just set in service to the root. -+ */ -+ for_each_entity(entity) { -+ struct bfq_sched_data *sd = entity->sched_data; -+ -+ if(!bfq_update_next_in_service(sd, NULL)) -+ break; -+ } -+ - return bfqq; - } - - static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) - { -+ struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; -+ struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; -+ struct bfq_entity *entity = in_serv_entity; -+ - if (bfqd->in_service_bic) { - put_io_context(bfqd->in_service_bic->icq.ioc); - bfqd->in_service_bic = NULL; - } - -+ bfq_clear_bfqq_wait_request(in_serv_bfqq); -+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqd->in_service_queue = NULL; -- del_timer(&bfqd->idle_slice_timer); -+ -+ /* -+ * When this function is called, all in-service entities have -+ * been properly deactivated or requeued, so we can safely -+ * execute the final step: reset in_service_entity along the -+ * path from entity to the root. -+ */ -+ for_each_entity(entity) -+ entity->sched_data->in_service_entity = NULL; -+ -+ /* -+ * in_serv_entity is no longer in service, so, if it is in no -+ * service tree either, then release the service reference to -+ * the queue it represents (taken with bfq_get_entity). -+ */ -+ if (!in_serv_entity->on_st) -+ bfq_put_queue(in_serv_bfqq); - } - - static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- int requeue) -+ bool ins_into_idle_tree, bool expiration) - { - struct bfq_entity *entity = &bfqq->entity; - -- if (bfqq == bfqd->in_service_queue) -- __bfq_bfqd_reset_in_service(bfqd); -- -- bfq_deactivate_entity(entity, requeue); -+ bfq_deactivate_entity(entity, ins_into_idle_tree, expiration); - } - - static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { - struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ BUG_ON(entity->tree != &st->active && entity->tree != &st->idle && -+ entity->on_st); -+ -+ bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq), -+ false); -+ bfq_clear_bfqq_non_blocking_wait_rq(bfqq); -+} -+ -+static void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; - -- bfq_activate_entity(entity); -+ bfq_activate_requeue_entity(entity, false, -+ bfqq == bfqd->in_service_queue); - } - --#ifdef CONFIG_BFQ_GROUP_IOSCHED - static void bfqg_stats_update_dequeue(struct bfq_group *bfqg); --#endif - - /* - * Called when the bfqq no longer has requests pending, remove it from -- * the service tree. -+ * the service tree. As a special case, it can be invoked during an -+ * expiration. - */ - static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- int requeue) -+ bool expiration) - { - BUG_ON(!bfq_bfqq_busy(bfqq)); - BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -@@ -1146,27 +1928,18 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, - BUG_ON(bfqd->busy_queues == 0); - bfqd->busy_queues--; - -- if (!bfqq->dispatched) { -+ if (!bfqq->dispatched) - bfq_weights_tree_remove(bfqd, &bfqq->entity, - &bfqd->queue_weights_tree); -- if (!blk_queue_nonrot(bfqd->queue)) { -- BUG_ON(!bfqd->busy_in_flight_queues); -- bfqd->busy_in_flight_queues--; -- if (bfq_bfqq_constantly_seeky(bfqq)) { -- BUG_ON(!bfqd-> -- const_seeky_busy_in_flight_queues); -- bfqd->const_seeky_busy_in_flight_queues--; -- } -- } -- } -+ - if (bfqq->wr_coeff > 1) - bfqd->wr_busy_queues--; - --#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_dequeue(bfqq_group(bfqq)); --#endif - -- bfq_deactivate_bfqq(bfqd, bfqq, requeue); -+ BUG_ON(bfqq->entity.budget < 0); -+ -+ bfq_deactivate_bfqq(bfqd, bfqq, true, expiration); - } - - /* -@@ -1184,16 +1957,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_mark_bfqq_busy(bfqq); - bfqd->busy_queues++; - -- if (!bfqq->dispatched) { -+ if (!bfqq->dispatched) - if (bfqq->wr_coeff == 1) - bfq_weights_tree_add(bfqd, &bfqq->entity, - &bfqd->queue_weights_tree); -- if (!blk_queue_nonrot(bfqd->queue)) { -- bfqd->busy_in_flight_queues++; -- if (bfq_bfqq_constantly_seeky(bfqq)) -- bfqd->const_seeky_busy_in_flight_queues++; -- } -- } -+ - if (bfqq->wr_coeff > 1) - bfqd->wr_busy_queues++; - } -diff --git a/block/bfq.h b/block/bfq.h -index fcce855..8cd2b6f 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -1,5 +1,5 @@ - /* -- * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes. -+ * BFQ v8r11 for 4.10.0: data structures and common functions prototypes. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> -@@ -7,7 +7,9 @@ - * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> - * Paolo Valente <paolo.valente@unimore.it> - * -- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> -+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> -+ * -+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org> - */ - - #ifndef _BFQ_H -@@ -28,20 +30,21 @@ - - #define BFQ_DEFAULT_QUEUE_IOPRIO 4 - --#define BFQ_DEFAULT_GRP_WEIGHT 10 -+#define BFQ_WEIGHT_LEGACY_DFL 100 - #define BFQ_DEFAULT_GRP_IOPRIO 0 - #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE - -+/* -+ * Soft real-time applications are extremely more latency sensitive -+ * than interactive ones. Over-raise the weight of the former to -+ * privilege them against the latter. -+ */ -+#define BFQ_SOFTRT_WEIGHT_FACTOR 100 -+ - struct bfq_entity; - - /** - * struct bfq_service_tree - per ioprio_class service tree. -- * @active: tree for active entities (i.e., those backlogged). -- * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). -- * @first_idle: idle entity with minimum F_i. -- * @last_idle: idle entity with maximum F_i. -- * @vtime: scheduler virtual time. -- * @wsum: scheduler weight sum; active and idle entities contribute to it. - * - * Each service tree represents a B-WF2Q+ scheduler on its own. Each - * ioprio_class has its own independent scheduler, and so its own -@@ -49,27 +52,28 @@ struct bfq_entity; - * of the containing bfqd. - */ - struct bfq_service_tree { -+ /* tree for active entities (i.e., those backlogged) */ - struct rb_root active; -+ /* tree for idle entities (i.e., not backlogged, with V <= F_i)*/ - struct rb_root idle; - -- struct bfq_entity *first_idle; -- struct bfq_entity *last_idle; -+ struct bfq_entity *first_idle; /* idle entity with minimum F_i */ -+ struct bfq_entity *last_idle; /* idle entity with maximum F_i */ - -- u64 vtime; -+ u64 vtime; /* scheduler virtual time */ -+ /* scheduler weight sum; active and idle entities contribute to it */ - unsigned long wsum; - }; - - /** - * struct bfq_sched_data - multi-class scheduler. -- * @in_service_entity: entity in service. -- * @next_in_service: head-of-the-line entity in the scheduler. -- * @service_tree: array of service trees, one per ioprio_class. - * - * bfq_sched_data is the basic scheduler queue. It supports three -- * ioprio_classes, and can be used either as a toplevel queue or as -- * an intermediate queue on a hierarchical setup. -- * @next_in_service points to the active entity of the sched_data -- * service trees that will be scheduled next. -+ * ioprio_classes, and can be used either as a toplevel queue or as an -+ * intermediate queue on a hierarchical setup. @next_in_service -+ * points to the active entity of the sched_data service trees that -+ * will be scheduled next. It is used to reduce the number of steps -+ * needed for each hierarchical-schedule update. - * - * The supported ioprio_classes are the same as in CFQ, in descending - * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. -@@ -79,48 +83,32 @@ struct bfq_service_tree { - * All the fields are protected by the queue lock of the containing bfqd. - */ - struct bfq_sched_data { -- struct bfq_entity *in_service_entity; -+ struct bfq_entity *in_service_entity; /* entity in service */ -+ /* head-of-the-line entity in the scheduler (see comments above) */ - struct bfq_entity *next_in_service; -+ /* array of service trees, one per ioprio_class */ - struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -+ /* last time CLASS_IDLE was served */ -+ unsigned long bfq_class_idle_last_service; -+ - }; - - /** - * struct bfq_weight_counter - counter of the number of all active entities - * with a given weight. -- * @weight: weight of the entities that this counter refers to. -- * @num_active: number of active entities with this weight. -- * @weights_node: weights tree member (see bfq_data's @queue_weights_tree -- * and @group_weights_tree). - */ - struct bfq_weight_counter { -- short int weight; -- unsigned int num_active; -+ unsigned int weight; /* weight of the entities this counter refers to */ -+ unsigned int num_active; /* nr of active entities with this weight */ -+ /* -+ * Weights tree member (see bfq_data's @queue_weights_tree and -+ * @group_weights_tree) -+ */ - struct rb_node weights_node; - }; - - /** - * struct bfq_entity - schedulable entity. -- * @rb_node: service_tree member. -- * @weight_counter: pointer to the weight counter associated with this entity. -- * @on_st: flag, true if the entity is on a tree (either the active or -- * the idle one of its service_tree). -- * @finish: B-WF2Q+ finish timestamp (aka F_i). -- * @start: B-WF2Q+ start timestamp (aka S_i). -- * @tree: tree the entity is enqueued into; %NULL if not on a tree. -- * @min_start: minimum start time of the (active) subtree rooted at -- * this entity; used for O(log N) lookups into active trees. -- * @service: service received during the last round of service. -- * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. -- * @weight: weight of the queue -- * @parent: parent entity, for hierarchical scheduling. -- * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the -- * associated scheduler queue, %NULL on leaf nodes. -- * @sched_data: the scheduler queue this entity belongs to. -- * @ioprio: the ioprio in use. -- * @new_weight: when a weight change is requested, the new weight value. -- * @orig_weight: original weight, used to implement weight boosting -- * @prio_changed: flag, true when the user requested a weight, ioprio or -- * ioprio_class change. - * - * A bfq_entity is used to represent either a bfq_queue (leaf node in the - * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each -@@ -147,27 +135,52 @@ struct bfq_weight_counter { - * containing bfqd. - */ - struct bfq_entity { -- struct rb_node rb_node; -+ struct rb_node rb_node; /* service_tree member */ -+ /* pointer to the weight counter associated with this entity */ - struct bfq_weight_counter *weight_counter; - -- int on_st; -+ /* -+ * Flag, true if the entity is on a tree (either the active or -+ * the idle one of its service_tree) or is in service. -+ */ -+ bool on_st; - -- u64 finish; -- u64 start; -+ u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */ -+ u64 start; /* B-WF2Q+ start timestamp (aka S_i) */ - -+ /* tree the entity is enqueued into; %NULL if not on a tree */ - struct rb_root *tree; - -+ /* -+ * minimum start time of the (active) subtree rooted at this -+ * entity; used for O(log N) lookups into active trees -+ */ - u64 min_start; - -- int service, budget; -- unsigned short weight, new_weight; -- unsigned short orig_weight; -+ /* amount of service received during the last service slot */ -+ int service; -+ -+ /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */ -+ int budget; -+ -+ unsigned int weight; /* weight of the queue */ -+ unsigned int new_weight; /* next weight if a change is in progress */ -+ -+ /* original weight, used to implement weight boosting */ -+ unsigned int orig_weight; - -+ /* parent entity, for hierarchical scheduling */ - struct bfq_entity *parent; - -+ /* -+ * For non-leaf nodes in the hierarchy, the associated -+ * scheduler queue, %NULL on leaf nodes. -+ */ - struct bfq_sched_data *my_sched_data; -+ /* the scheduler queue this entity belongs to */ - struct bfq_sched_data *sched_data; - -+ /* flag, set to request a weight, ioprio or ioprio_class change */ - int prio_changed; - }; - -@@ -175,56 +188,6 @@ struct bfq_group; - - /** - * struct bfq_queue - leaf schedulable entity. -- * @ref: reference counter. -- * @bfqd: parent bfq_data. -- * @new_ioprio: when an ioprio change is requested, the new ioprio value. -- * @ioprio_class: the ioprio_class in use. -- * @new_ioprio_class: when an ioprio_class change is requested, the new -- * ioprio_class value. -- * @new_bfqq: shared bfq_queue if queue is cooperating with -- * one or more other queues. -- * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree). -- * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree). -- * @sort_list: sorted list of pending requests. -- * @next_rq: if fifo isn't expired, next request to serve. -- * @queued: nr of requests queued in @sort_list. -- * @allocated: currently allocated requests. -- * @meta_pending: pending metadata requests. -- * @fifo: fifo list of requests in sort_list. -- * @entity: entity representing this queue in the scheduler. -- * @max_budget: maximum budget allowed from the feedback mechanism. -- * @budget_timeout: budget expiration (in jiffies). -- * @dispatched: number of requests on the dispatch list or inside driver. -- * @flags: status flags. -- * @bfqq_list: node for active/idle bfqq list inside our bfqd. -- * @burst_list_node: node for the device's burst list. -- * @seek_samples: number of seeks sampled -- * @seek_total: sum of the distances of the seeks sampled -- * @seek_mean: mean seek distance -- * @last_request_pos: position of the last request enqueued -- * @requests_within_timer: number of consecutive pairs of request completion -- * and arrival, such that the queue becomes idle -- * after the completion, but the next request arrives -- * within an idle time slice; used only if the queue's -- * IO_bound has been cleared. -- * @pid: pid of the process owning the queue, used for logging purposes. -- * @last_wr_start_finish: start time of the current weight-raising period if -- * the @bfq-queue is being weight-raised, otherwise -- * finish time of the last weight-raising period -- * @wr_cur_max_time: current max raising time for this queue -- * @soft_rt_next_start: minimum time instant such that, only if a new -- * request is enqueued after this time instant in an -- * idle @bfq_queue with no outstanding requests, then -- * the task associated with the queue it is deemed as -- * soft real-time (see the comments to the function -- * bfq_bfqq_softrt_next_start()) -- * @last_idle_bklogged: time of the last transition of the @bfq_queue from -- * idle to backlogged -- * @service_from_backlogged: cumulative service received from the @bfq_queue -- * since the last transition from idle to -- * backlogged -- * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the -- * queue is shared - * - * A bfq_queue is a leaf request queue; it can be associated with an - * io_context or more, if it is async or shared between cooperating -@@ -235,117 +198,175 @@ struct bfq_group; - * All the fields are protected by the queue lock of the containing bfqd. - */ - struct bfq_queue { -- atomic_t ref; -+ /* reference counter */ -+ int ref; -+ /* parent bfq_data */ - struct bfq_data *bfqd; - -- unsigned short ioprio, new_ioprio; -- unsigned short ioprio_class, new_ioprio_class; -+ /* current ioprio and ioprio class */ -+ unsigned short ioprio, ioprio_class; -+ /* next ioprio and ioprio class if a change is in progress */ -+ unsigned short new_ioprio, new_ioprio_class; - -- /* fields for cooperating queues handling */ -+ /* -+ * Shared bfq_queue if queue is cooperating with one or more -+ * other queues. -+ */ - struct bfq_queue *new_bfqq; -+ /* request-position tree member (see bfq_group's @rq_pos_tree) */ - struct rb_node pos_node; -+ /* request-position tree root (see bfq_group's @rq_pos_tree) */ - struct rb_root *pos_root; - -+ /* sorted list of pending requests */ - struct rb_root sort_list; -+ /* if fifo isn't expired, next request to serve */ - struct request *next_rq; -+ /* number of sync and async requests queued */ - int queued[2]; -+ /* number of sync and async requests currently allocated */ - int allocated[2]; -+ /* number of pending metadata requests */ - int meta_pending; -+ /* fifo list of requests in sort_list */ - struct list_head fifo; - -+ /* entity representing this queue in the scheduler */ - struct bfq_entity entity; - -+ /* maximum budget allowed from the feedback mechanism */ - int max_budget; -+ /* budget expiration (in jiffies) */ - unsigned long budget_timeout; - -+ /* number of requests on the dispatch list or inside driver */ - int dispatched; - -- unsigned int flags; -+ unsigned int flags; /* status flags.*/ - -+ /* node for active/idle bfqq list inside parent bfqd */ - struct list_head bfqq_list; - -+ /* bit vector: a 1 for each seeky requests in history */ -+ u32 seek_history; -+ -+ /* node for the device's burst list */ - struct hlist_node burst_list_node; - -- unsigned int seek_samples; -- u64 seek_total; -- sector_t seek_mean; -+ /* position of the last request enqueued */ - sector_t last_request_pos; - -+ /* Number of consecutive pairs of request completion and -+ * arrival, such that the queue becomes idle after the -+ * completion, but the next request arrives within an idle -+ * time slice; used only if the queue's IO_bound flag has been -+ * cleared. -+ */ - unsigned int requests_within_timer; - -+ /* pid of the process owning the queue, used for logging purposes */ - pid_t pid; -+ -+ /* -+ * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL -+ * if the queue is shared. -+ */ - struct bfq_io_cq *bic; - -- /* weight-raising fields */ -+ /* current maximum weight-raising time for this queue */ - unsigned long wr_cur_max_time; -+ /* -+ * Minimum time instant such that, only if a new request is -+ * enqueued after this time instant in an idle @bfq_queue with -+ * no outstanding requests, then the task associated with the -+ * queue it is deemed as soft real-time (see the comments on -+ * the function bfq_bfqq_softrt_next_start()) -+ */ - unsigned long soft_rt_next_start; -+ /* -+ * Start time of the current weight-raising period if -+ * the @bfq-queue is being weight-raised, otherwise -+ * finish time of the last weight-raising period. -+ */ - unsigned long last_wr_start_finish; -+ /* factor by which the weight of this queue is multiplied */ - unsigned int wr_coeff; -+ /* -+ * Time of the last transition of the @bfq_queue from idle to -+ * backlogged. -+ */ - unsigned long last_idle_bklogged; -+ /* -+ * Cumulative service received from the @bfq_queue since the -+ * last transition from idle to backlogged. -+ */ - unsigned long service_from_backlogged; -+ /* -+ * Value of wr start time when switching to soft rt -+ */ -+ unsigned long wr_start_at_switch_to_srt; -+ -+ unsigned long split_time; /* time of last split */ - }; - - /** - * struct bfq_ttime - per process thinktime stats. -- * @ttime_total: total process thinktime -- * @ttime_samples: number of thinktime samples -- * @ttime_mean: average process thinktime - */ - struct bfq_ttime { -- unsigned long last_end_request; -+ u64 last_end_request; /* completion time of last request */ -+ -+ u64 ttime_total; /* total process thinktime */ -+ unsigned long ttime_samples; /* number of thinktime samples */ -+ u64 ttime_mean; /* average process thinktime */ - -- unsigned long ttime_total; -- unsigned long ttime_samples; -- unsigned long ttime_mean; - }; - - /** - * struct bfq_io_cq - per (request_queue, io_context) structure. -- * @icq: associated io_cq structure -- * @bfqq: array of two process queues, the sync and the async -- * @ttime: associated @bfq_ttime struct -- * @ioprio: per (request_queue, blkcg) ioprio. -- * @blkcg_id: id of the blkcg the related io_cq belongs to. -- * @wr_time_left: snapshot of the time left before weight raising ends -- * for the sync queue associated to this process; this -- * snapshot is taken to remember this value while the weight -- * raising is suspended because the queue is merged with a -- * shared queue, and is used to set @raising_cur_max_time -- * when the queue is split from the shared queue and its -- * weight is raised again -- * @saved_idle_window: same purpose as the previous field for the idle -- * window -- * @saved_IO_bound: same purpose as the previous two fields for the I/O -- * bound classification of a queue -- * @saved_in_large_burst: same purpose as the previous fields for the -- * value of the field keeping the queue's belonging -- * to a large burst -- * @was_in_burst_list: true if the queue belonged to a burst list -- * before its merge with another cooperating queue -- * @cooperations: counter of consecutive successful queue merges underwent -- * by any of the process' @bfq_queues -- * @failed_cooperations: counter of consecutive failed queue merges of any -- * of the process' @bfq_queues - */ - struct bfq_io_cq { -+ /* associated io_cq structure */ - struct io_cq icq; /* must be the first member */ -+ /* array of two process queues, the sync and the async */ - struct bfq_queue *bfqq[2]; -+ /* associated @bfq_ttime struct */ - struct bfq_ttime ttime; -+ /* per (request_queue, blkcg) ioprio */ - int ioprio; -- - #ifdef CONFIG_BFQ_GROUP_IOSCHED -- uint64_t blkcg_id; /* the current blkcg ID */ -+ uint64_t blkcg_serial_nr; /* the current blkcg serial */ - #endif - -- unsigned int wr_time_left; -+ /* -+ * Snapshot of the idle window before merging; taken to -+ * remember this value while the queue is merged, so as to be -+ * able to restore it in case of split. -+ */ - bool saved_idle_window; -+ /* -+ * Same purpose as the previous two fields for the I/O bound -+ * classification of a queue. -+ */ - bool saved_IO_bound; - -+ /* -+ * Same purpose as the previous fields for the value of the -+ * field keeping the queue's belonging to a large burst -+ */ - bool saved_in_large_burst; -+ /* -+ * True if the queue belonged to a burst list before its merge -+ * with another cooperating queue. -+ */ - bool was_in_burst_list; - -- unsigned int cooperations; -- unsigned int failed_cooperations; -+ /* -+ * Similar to previous fields: save wr information. -+ */ -+ unsigned long saved_wr_coeff; -+ unsigned long saved_last_wr_start_finish; -+ unsigned long saved_wr_start_at_switch_to_srt; -+ unsigned int saved_wr_cur_max_time; - }; - - enum bfq_device_speed { -@@ -354,224 +375,232 @@ enum bfq_device_speed { - }; - - /** -- * struct bfq_data - per device data structure. -- * @queue: request queue for the managed device. -- * @root_group: root bfq_group for the device. -- * @active_numerous_groups: number of bfq_groups containing more than one -- * active @bfq_entity. -- * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by -- * weight. Used to keep track of whether all @bfq_queues -- * have the same weight. The tree contains one counter -- * for each distinct weight associated to some active -- * and not weight-raised @bfq_queue (see the comments to -- * the functions bfq_weights_tree_[add|remove] for -- * further details). -- * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted -- * by weight. Used to keep track of whether all -- * @bfq_groups have the same weight. The tree contains -- * one counter for each distinct weight associated to -- * some active @bfq_group (see the comments to the -- * functions bfq_weights_tree_[add|remove] for further -- * details). -- * @busy_queues: number of bfq_queues containing requests (including the -- * queue in service, even if it is idling). -- * @busy_in_flight_queues: number of @bfq_queues containing pending or -- * in-flight requests, plus the @bfq_queue in -- * service, even if idle but waiting for the -- * possible arrival of its next sync request. This -- * field is updated only if the device is rotational, -- * but used only if the device is also NCQ-capable. -- * The reason why the field is updated also for non- -- * NCQ-capable rotational devices is related to the -- * fact that the value of @hw_tag may be set also -- * later than when busy_in_flight_queues may need to -- * be incremented for the first time(s). Taking also -- * this possibility into account, to avoid unbalanced -- * increments/decrements, would imply more overhead -- * than just updating busy_in_flight_queues -- * regardless of the value of @hw_tag. -- * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues -- * (that is, seeky queues that expired -- * for budget timeout at least once) -- * containing pending or in-flight -- * requests, including the in-service -- * @bfq_queue if constantly seeky. This -- * field is updated only if the device -- * is rotational, but used only if the -- * device is also NCQ-capable (see the -- * comments to @busy_in_flight_queues). -- * @wr_busy_queues: number of weight-raised busy @bfq_queues. -- * @queued: number of queued requests. -- * @rq_in_driver: number of requests dispatched and waiting for completion. -- * @sync_flight: number of sync requests in the driver. -- * @max_rq_in_driver: max number of reqs in driver in the last -- * @hw_tag_samples completed requests. -- * @hw_tag_samples: nr of samples used to calculate hw_tag. -- * @hw_tag: flag set to one if the driver is showing a queueing behavior. -- * @budgets_assigned: number of budgets assigned. -- * @idle_slice_timer: timer set when idling for the next sequential request -- * from the queue in service. -- * @unplug_work: delayed work to restart dispatching on the request queue. -- * @in_service_queue: bfq_queue in service. -- * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue. -- * @last_position: on-disk position of the last served request. -- * @last_budget_start: beginning of the last budget. -- * @last_idling_start: beginning of the last idle slice. -- * @peak_rate: peak transfer rate observed for a budget. -- * @peak_rate_samples: number of samples used to calculate @peak_rate. -- * @bfq_max_budget: maximum budget allotted to a bfq_queue before -- * rescheduling. -- * @active_list: list of all the bfq_queues active on the device. -- * @idle_list: list of all the bfq_queues idle on the device. -- * @bfq_fifo_expire: timeout for async/sync requests; when it expires -- * requests are served in fifo order. -- * @bfq_back_penalty: weight of backward seeks wrt forward ones. -- * @bfq_back_max: maximum allowed backward seek. -- * @bfq_slice_idle: maximum idling time. -- * @bfq_user_max_budget: user-configured max budget value -- * (0 for auto-tuning). -- * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to -- * async queues. -- * @bfq_timeout: timeout for bfq_queues to consume their budget; used to -- * to prevent seeky queues to impose long latencies to well -- * behaved ones (this also implies that seeky queues cannot -- * receive guarantees in the service domain; after a timeout -- * they are charged for the whole allocated budget, to try -- * to preserve a behavior reasonably fair among them, but -- * without service-domain guarantees). -- * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is -- * no more granted any weight-raising. -- * @bfq_failed_cooperations: number of consecutive failed cooperation -- * chances after which weight-raising is restored -- * to a queue subject to more than bfq_coop_thresh -- * queue merges. -- * @bfq_requests_within_timer: number of consecutive requests that must be -- * issued within the idle time slice to set -- * again idling to a queue which was marked as -- * non-I/O-bound (see the definition of the -- * IO_bound flag for further details). -- * @last_ins_in_burst: last time at which a queue entered the current -- * burst of queues being activated shortly after -- * each other; for more details about this and the -- * following parameters related to a burst of -- * activations, see the comments to the function -- * @bfq_handle_burst. -- * @bfq_burst_interval: reference time interval used to decide whether a -- * queue has been activated shortly after -- * @last_ins_in_burst. -- * @burst_size: number of queues in the current burst of queue activations. -- * @bfq_large_burst_thresh: maximum burst size above which the current -- * queue-activation burst is deemed as 'large'. -- * @large_burst: true if a large queue-activation burst is in progress. -- * @burst_list: head of the burst list (as for the above fields, more details -- * in the comments to the function bfq_handle_burst). -- * @low_latency: if set to true, low-latency heuristics are enabled. -- * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised -- * queue is multiplied. -- * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies). -- * @bfq_wr_rt_max_time: maximum duration for soft real-time processes. -- * @bfq_wr_min_idle_time: minimum idle period after which weight-raising -- * may be reactivated for a queue (in jiffies). -- * @bfq_wr_min_inter_arr_async: minimum period between request arrivals -- * after which weight-raising may be -- * reactivated for an already busy queue -- * (in jiffies). -- * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, -- * sectors per seconds. -- * @RT_prod: cached value of the product R*T used for computing the maximum -- * duration of the weight raising automatically. -- * @device_speed: device-speed class for the low-latency heuristic. -- * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions. -+ * struct bfq_data - per-device data structure. - * - * All the fields are protected by the @queue lock. - */ - struct bfq_data { -+ /* request queue for the device */ - struct request_queue *queue; - -+ /* root bfq_group for the device */ - struct bfq_group *root_group; - --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- int active_numerous_groups; --#endif -- -+ /* -+ * rbtree of weight counters of @bfq_queues, sorted by -+ * weight. Used to keep track of whether all @bfq_queues have -+ * the same weight. The tree contains one counter for each -+ * distinct weight associated to some active and not -+ * weight-raised @bfq_queue (see the comments to the functions -+ * bfq_weights_tree_[add|remove] for further details). -+ */ - struct rb_root queue_weights_tree; -+ /* -+ * rbtree of non-queue @bfq_entity weight counters, sorted by -+ * weight. Used to keep track of whether all @bfq_groups have -+ * the same weight. The tree contains one counter for each -+ * distinct weight associated to some active @bfq_group (see -+ * the comments to the functions bfq_weights_tree_[add|remove] -+ * for further details). -+ */ - struct rb_root group_weights_tree; - -+ /* -+ * Number of bfq_queues containing requests (including the -+ * queue in service, even if it is idling). -+ */ - int busy_queues; -- int busy_in_flight_queues; -- int const_seeky_busy_in_flight_queues; -+ /* number of weight-raised busy @bfq_queues */ - int wr_busy_queues; -+ /* number of queued requests */ - int queued; -+ /* number of requests dispatched and waiting for completion */ - int rq_in_driver; -- int sync_flight; - -+ /* -+ * Maximum number of requests in driver in the last -+ * @hw_tag_samples completed requests. -+ */ - int max_rq_in_driver; -+ /* number of samples used to calculate hw_tag */ - int hw_tag_samples; -+ /* flag set to one if the driver is showing a queueing behavior */ - int hw_tag; - -+ /* number of budgets assigned */ - int budgets_assigned; - -- struct timer_list idle_slice_timer; -+ /* -+ * Timer set when idling (waiting) for the next request from -+ * the queue in service. -+ */ -+ struct hrtimer idle_slice_timer; -+ /* delayed work to restart dispatching on the request queue */ - struct work_struct unplug_work; - -+ /* bfq_queue in service */ - struct bfq_queue *in_service_queue; -+ /* bfq_io_cq (bic) associated with the @in_service_queue */ - struct bfq_io_cq *in_service_bic; - -+ /* on-disk position of the last served request */ - sector_t last_position; - -+ /* time of last request completion (ns) */ -+ u64 last_completion; -+ -+ /* time of first rq dispatch in current observation interval (ns) */ -+ u64 first_dispatch; -+ /* time of last rq dispatch in current observation interval (ns) */ -+ u64 last_dispatch; -+ -+ /* beginning of the last budget */ - ktime_t last_budget_start; -+ /* beginning of the last idle slice */ - ktime_t last_idling_start; -+ -+ /* number of samples in current observation interval */ - int peak_rate_samples; -- u64 peak_rate; -+ /* num of samples of seq dispatches in current observation interval */ -+ u32 sequential_samples; -+ /* total num of sectors transferred in current observation interval */ -+ u64 tot_sectors_dispatched; -+ /* max rq size seen during current observation interval (sectors) */ -+ u32 last_rq_max_size; -+ /* time elapsed from first dispatch in current observ. interval (us) */ -+ u64 delta_from_first; -+ /* current estimate of device peak rate */ -+ u32 peak_rate; -+ -+ /* maximum budget allotted to a bfq_queue before rescheduling */ - int bfq_max_budget; - -+ /* list of all the bfq_queues active on the device */ - struct list_head active_list; -+ /* list of all the bfq_queues idle on the device */ - struct list_head idle_list; - -- unsigned int bfq_fifo_expire[2]; -+ /* -+ * Timeout for async/sync requests; when it fires, requests -+ * are served in fifo order. -+ */ -+ u64 bfq_fifo_expire[2]; -+ /* weight of backward seeks wrt forward ones */ - unsigned int bfq_back_penalty; -+ /* maximum allowed backward seek */ - unsigned int bfq_back_max; -- unsigned int bfq_slice_idle; -- u64 bfq_class_idle_last_service; -+ /* maximum idling time */ -+ u32 bfq_slice_idle; - -+ /* user-configured max budget value (0 for auto-tuning) */ - int bfq_user_max_budget; -- int bfq_max_budget_async_rq; -- unsigned int bfq_timeout[2]; -- -- unsigned int bfq_coop_thresh; -- unsigned int bfq_failed_cooperations; -+ /* -+ * Timeout for bfq_queues to consume their budget; used to -+ * prevent seeky queues from imposing long latencies to -+ * sequential or quasi-sequential ones (this also implies that -+ * seeky queues cannot receive guarantees in the service -+ * domain; after a timeout they are charged for the time they -+ * have been in service, to preserve fairness among them, but -+ * without service-domain guarantees). -+ */ -+ unsigned int bfq_timeout; -+ -+ /* -+ * Number of consecutive requests that must be issued within -+ * the idle time slice to set again idling to a queue which -+ * was marked as non-I/O-bound (see the definition of the -+ * IO_bound flag for further details). -+ */ - unsigned int bfq_requests_within_timer; - -+ /* -+ * Force device idling whenever needed to provide accurate -+ * service guarantees, without caring about throughput -+ * issues. CAVEAT: this may even increase latencies, in case -+ * of useless idling for processes that did stop doing I/O. -+ */ -+ bool strict_guarantees; -+ -+ /* -+ * Last time at which a queue entered the current burst of -+ * queues being activated shortly after each other; for more -+ * details about this and the following parameters related to -+ * a burst of activations, see the comments on the function -+ * bfq_handle_burst. -+ */ - unsigned long last_ins_in_burst; -+ /* -+ * Reference time interval used to decide whether a queue has -+ * been activated shortly after @last_ins_in_burst. -+ */ - unsigned long bfq_burst_interval; -+ /* number of queues in the current burst of queue activations */ - int burst_size; -+ -+ /* common parent entity for the queues in the burst */ -+ struct bfq_entity *burst_parent_entity; -+ /* Maximum burst size above which the current queue-activation -+ * burst is deemed as 'large'. -+ */ - unsigned long bfq_large_burst_thresh; -+ /* true if a large queue-activation burst is in progress */ - bool large_burst; -+ /* -+ * Head of the burst list (as for the above fields, more -+ * details in the comments on the function bfq_handle_burst). -+ */ - struct hlist_head burst_list; - -+ /* if set to true, low-latency heuristics are enabled */ - bool low_latency; -- -- /* parameters of the low_latency heuristics */ -+ /* -+ * Maximum factor by which the weight of a weight-raised queue -+ * is multiplied. -+ */ - unsigned int bfq_wr_coeff; -+ /* maximum duration of a weight-raising period (jiffies) */ - unsigned int bfq_wr_max_time; -+ -+ /* Maximum weight-raising duration for soft real-time processes */ - unsigned int bfq_wr_rt_max_time; -+ /* -+ * Minimum idle period after which weight-raising may be -+ * reactivated for a queue (in jiffies). -+ */ - unsigned int bfq_wr_min_idle_time; -+ /* -+ * Minimum period between request arrivals after which -+ * weight-raising may be reactivated for an already busy async -+ * queue (in jiffies). -+ */ - unsigned long bfq_wr_min_inter_arr_async; -+ -+ /* Max service-rate for a soft real-time queue, in sectors/sec */ - unsigned int bfq_wr_max_softrt_rate; -+ /* -+ * Cached value of the product R*T, used for computing the -+ * maximum duration of weight raising automatically. -+ */ - u64 RT_prod; -+ /* device-speed class for the low-latency heuristic */ - enum bfq_device_speed device_speed; - -+ /* fallback dummy bfqq for extreme OOM conditions */ - struct bfq_queue oom_bfqq; - }; - - enum bfqq_state_flags { -- BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ -+ BFQ_BFQQ_FLAG_just_created = 0, /* queue just allocated */ -+ BFQ_BFQQ_FLAG_busy, /* has requests or is in service */ - BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ -+ BFQ_BFQQ_FLAG_non_blocking_wait_rq, /* -+ * waiting for a request -+ * without idling the device -+ */ - BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ - BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ - BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ -- BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ - BFQ_BFQQ_FLAG_IO_bound, /* - * bfqq has timed-out at least once - * having consumed at most 2/10 of -@@ -581,17 +610,12 @@ enum bfqq_state_flags { - * bfqq activated in a large burst, - * see comments to bfq_handle_burst. - */ -- BFQ_BFQQ_FLAG_constantly_seeky, /* -- * bfqq has proved to be slow and -- * seeky until budget timeout -- */ - BFQ_BFQQ_FLAG_softrt_update, /* - * may need softrt-next-start - * update - */ - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ -- BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ -+ BFQ_BFQQ_FLAG_split_coop /* shared bfqq will be split */ - }; - - #define BFQ_BFQQ_FNS(name) \ -@@ -608,28 +632,94 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ - return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ - } - -+BFQ_BFQQ_FNS(just_created); - BFQ_BFQQ_FNS(busy); - BFQ_BFQQ_FNS(wait_request); -+BFQ_BFQQ_FNS(non_blocking_wait_rq); - BFQ_BFQQ_FNS(must_alloc); - BFQ_BFQQ_FNS(fifo_expire); - BFQ_BFQQ_FNS(idle_window); - BFQ_BFQQ_FNS(sync); --BFQ_BFQQ_FNS(budget_new); - BFQ_BFQQ_FNS(IO_bound); - BFQ_BFQQ_FNS(in_large_burst); --BFQ_BFQQ_FNS(constantly_seeky); - BFQ_BFQQ_FNS(coop); - BFQ_BFQQ_FNS(split_coop); --BFQ_BFQQ_FNS(just_split); - BFQ_BFQQ_FNS(softrt_update); - #undef BFQ_BFQQ_FNS - - /* Logging facilities. */ --#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -- blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) -+#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); -+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); -+ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ -+ char __pbuf[128]; \ -+ \ -+ assert_spin_locked((bfqd)->queue->queue_lock); \ -+ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ -+ pr_crit("bfq%d%c %s " fmt "\n", \ -+ (bfqq)->pid, \ -+ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ -+ __pbuf, ##args); \ -+} while (0) -+ -+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ -+ char __pbuf[128]; \ -+ \ -+ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ -+ pr_crit("%s " fmt "\n", __pbuf, ##args); \ -+} while (0) -+ -+#else /* CONFIG_BFQ_GROUP_IOSCHED */ -+ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -+ pr_crit("bfq%d%c " fmt "\n", (bfqq)->pid, \ -+ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ -+ ##args) -+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) -+ -+#endif /* CONFIG_BFQ_GROUP_IOSCHED */ -+ -+#define bfq_log(bfqd, fmt, args...) \ -+ pr_crit("bfq " fmt "\n", ##args) -+ -+#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); -+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); -+ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ -+ char __pbuf[128]; \ -+ \ -+ assert_spin_locked((bfqd)->queue->queue_lock); \ -+ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ -+ blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \ -+ (bfqq)->pid, \ -+ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ -+ __pbuf, ##args); \ -+} while (0) -+ -+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ -+ char __pbuf[128]; \ -+ \ -+ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ -+ blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \ -+} while (0) -+ -+#else /* CONFIG_BFQ_GROUP_IOSCHED */ -+ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ -+ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ -+ ##args) -+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) -+ -+#endif /* CONFIG_BFQ_GROUP_IOSCHED */ - - #define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) -+#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ - - /* Expiration reasons. */ - enum bfqq_expiration { -@@ -640,15 +730,12 @@ enum bfqq_expiration { - BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ - BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ - BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -+ BFQ_BFQQ_PREEMPTED /* preemption in progress */ - }; - --#ifdef CONFIG_BFQ_GROUP_IOSCHED - - struct bfqg_stats { -- /* total bytes transferred */ -- struct blkg_rwstat service_bytes; -- /* total IOs serviced, post merge */ -- struct blkg_rwstat serviced; -+#ifdef CONFIG_BFQ_GROUP_IOSCHED - /* number of ios merged */ - struct blkg_rwstat merged; - /* total time spent on device in ns, may not be accurate w/ queueing */ -@@ -657,12 +744,8 @@ struct bfqg_stats { - struct blkg_rwstat wait_time; - /* number of IOs queued up */ - struct blkg_rwstat queued; -- /* total sectors transferred */ -- struct blkg_stat sectors; - /* total disk time and nr sectors dispatched by this group */ - struct blkg_stat time; -- /* time not charged to this cgroup */ -- struct blkg_stat unaccounted_time; - /* sum of number of ios queued across all samples */ - struct blkg_stat avg_queue_size_sum; - /* count of samples taken for average */ -@@ -680,8 +763,10 @@ struct bfqg_stats { - uint64_t start_idle_time; - uint64_t start_empty_time; - uint16_t flags; -+#endif - }; - -+#ifdef CONFIG_BFQ_GROUP_IOSCHED - /* - * struct bfq_group_data - per-blkcg storage for the blkio subsystem. - * -@@ -692,7 +777,7 @@ struct bfq_group_data { - /* must be the first member */ - struct blkcg_policy_data pd; - -- unsigned short weight; -+ unsigned int weight; - }; - - /** -@@ -712,7 +797,7 @@ struct bfq_group_data { - * unused for the root group. Used to know whether there - * are groups with more than one active @bfq_entity - * (see the comments to the function -- * bfq_bfqq_must_not_expire()). -+ * bfq_bfqq_may_idle()). - * @rq_pos_tree: rbtree sorted by next_request position, used when - * determining if two or more queues have interleaving - * requests (see bfq_find_close_cooperator()). -@@ -745,7 +830,6 @@ struct bfq_group { - struct rb_root rq_pos_tree; - - struct bfqg_stats stats; -- struct bfqg_stats dead_stats; /* stats pushed from dead children */ - }; - - #else -@@ -761,17 +845,38 @@ struct bfq_group { - - static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); - -+static unsigned int bfq_class_idx(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ return bfqq ? bfqq->ioprio_class - 1 : -+ BFQ_DEFAULT_GRP_CLASS - 1; -+} -+ - static struct bfq_service_tree * - bfq_entity_service_tree(struct bfq_entity *entity) - { - struct bfq_sched_data *sched_data = entity->sched_data; - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -- unsigned int idx = bfqq ? bfqq->ioprio_class - 1 : -- BFQ_DEFAULT_GRP_CLASS; -+ unsigned int idx = bfq_class_idx(entity); - - BUG_ON(idx >= BFQ_IOPRIO_CLASSES); - BUG_ON(sched_data == NULL); - -+ if (bfqq) -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "entity_service_tree %p %d", -+ sched_data->service_tree + idx, idx); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED -+ else { -+ struct bfq_group *bfqg = -+ container_of(entity, struct bfq_group, entity); -+ -+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, -+ "entity_service_tree %p %d", -+ sched_data->service_tree + idx, idx); -+ } -+#endif - return sched_data->service_tree + idx; - } - -@@ -791,47 +896,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) - return bic->icq.q->elevator->elevator_data; - } - --/** -- * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. -- * @ptr: a pointer to a bfqd. -- * @flags: storage for the flags to be saved. -- * -- * This function allows bfqg->bfqd to be protected by the -- * queue lock of the bfqd they reference; the pointer is dereferenced -- * under RCU, so the storage for bfqd is assured to be safe as long -- * as the RCU read side critical section does not end. After the -- * bfqd->queue->queue_lock is taken the pointer is rechecked, to be -- * sure that no other writer accessed it. If we raced with a writer, -- * the function returns NULL, with the queue unlocked, otherwise it -- * returns the dereferenced pointer, with the queue locked. -- */ --static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags) --{ -- struct bfq_data *bfqd; -- -- rcu_read_lock(); -- bfqd = rcu_dereference(*(struct bfq_data **)ptr); -- -- if (bfqd != NULL) { -- spin_lock_irqsave(bfqd->queue->queue_lock, *flags); -- if (ptr == NULL) -- printk(KERN_CRIT "get_bfqd_locked pointer NULL\n"); -- else if (*ptr == bfqd) -- goto out; -- spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -- } -- -- bfqd = NULL; --out: -- rcu_read_unlock(); -- return bfqd; --} -- --static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags) --{ -- spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); --} -- - #ifdef CONFIG_BFQ_GROUP_IOSCHED - - static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -@@ -857,11 +921,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); - static void bfq_put_queue(struct bfq_queue *bfqq); - static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); - static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -- struct bio *bio, int is_sync, -- struct bfq_io_cq *bic, gfp_t gfp_mask); -+ struct bio *bio, bool is_sync, -+ struct bfq_io_cq *bic); - static void bfq_end_wr_async_queues(struct bfq_data *bfqd, - struct bfq_group *bfqg); -+#ifdef CONFIG_BFQ_GROUP_IOSCHED - static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -+#endif - static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); - - #endif /* _BFQ_H */ --- -2.10.0 - diff --git a/helpers/DATA/linux-hwe/deblob-4.10 b/helpers/DATA/linux-hwe/deblob-4.13 similarity index 95% rename from helpers/DATA/linux-hwe/deblob-4.10 rename to helpers/DATA/linux-hwe/deblob-4.13 index e374b545..5119e1f6 100644 --- a/helpers/DATA/linux-hwe/deblob-4.10 +++ b/helpers/DATA/linux-hwe/deblob-4.13 @@ -48,7 +48,7 @@ # For each kver release, start extra with an empty string, then count # from 1 if changes are needed that require rebuilding the tarball. -kver=4.10 extra= +kver=4.13 extra= case $1 in --force) @@ -601,6 +601,24 @@ clean_mk CONFIG_ATM_SOLOS drivers/atm/Makefile # Crypto # ########## +announce CAVIUM_CPT - "Cavium Cryptographic Accelerator driver" +reject_firmware drivers/crypto/cavium/cpt/cptpf_main.c +clean_blob drivers/crypto/cavium/cpt/cptpf_main.c +clean_kconfig drivers/crypto/cavium/cpt/Kconfig CAVIUM_CPT +clean_mk CONFIG_CAVIUM_CPT drivers/crypto/cavium/cpt/Makefile + +announce CRYPTO_DEV_NITROX_CNN55XX - "Support for Cavium CNN55XX driver" +reject_firmware drivers/crypto/cavium/nitrox/nitrox_main.c +clean_blob drivers/crypto/cavium/nitrox/nitrox_main.c +clean_kconfig drivers/crypto/cavium/nitrox/Kconfig CRYPTO_DEV_NITROX_CNN55XX +clean_mk CONFIG_CRYPTO_DEV_NITROX_CNN55XX drivers/crypto/cavium/nitrox/Makefile + +announce CRYPTO_DEV_SAFEXCEL - "Inside Secure's SafeXcel cryptographic engine driver" +reject_firmware drivers/crypto/inside-secure/safexcel.c +clean_blob drivers/crypto/inside-secure/safexcel.c +clean_kconfig drivers/crypto/Kconfig CRYPTO_DEV_SAFEXCEL +clean_mk CONFIG_CRYPTO_DEV_SAFEXCEL drivers/crypto/inside-secure/Makefile + announce CRYPTO_DEV_QAT_DH895xCC - "Support for Intel(R) DH895xCC" clean_blob drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h clean_blob drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -651,19 +669,28 @@ reject_firmware drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c clean_blob drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c reject_firmware drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c clean_blob drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +reject_firmware drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +clean_blob drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c reject_firmware drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c clean_blob drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +reject_firmware drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +clean_blob drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c reject_firmware drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c clean_blob drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +reject_firmware drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +clean_blob drivers/gpu/drm/amd/amdgpu/psp_v3_1.c reject_firmware drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c clean_blob drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c reject_firmware drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c clean_blob drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +reject_firmware drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +clean_blob drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c reject_firmware drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c clean_blob drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +clean_blob drivers/gpu/drm/amd/amdgpu/soc15.c reject_firmware drivers/gpu/drm/amd/amdgpu/amdgpu_device.c clean_blob drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -clean_blob drivers/gpu/drm/amd/amdgpu/vi.c +clean_blob drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c clean_kconfig drivers/gpu/drm/Kconfig DRM_AMDGPU clean_mk CONFIG_DRM_AMDGPU drivers/gpu/drm/amd/amdgpu/Makefile @@ -697,9 +724,10 @@ clean_mk CONFIG_DRM_AST drivers/gpu/drm/ast/Makefile announce DRM_I915 - "Intel 8xx/9xx/G3x/G4x/HD Graphics" reject_firmware drivers/gpu/drm/i915/intel_csr.c -reject_firmware drivers/gpu/drm/i915/intel_guc_loader.c clean_blob drivers/gpu/drm/i915/intel_csr.c +reject_firmware drivers/gpu/drm/i915/intel_uc.c clean_blob drivers/gpu/drm/i915/intel_guc_loader.c +clean_blob drivers/gpu/drm/i915/intel_huc.c clean_kconfig drivers/gpu/drm/i915/Kconfig DRM_I915 clean_mk CONFIG_DRM_I915 drivers/gpu/drm/i915/Makefile @@ -716,6 +744,8 @@ clean_blob drivers/gpu/drm/nouveau/nvkm/engine/falcon.c reject_firmware drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c clean_blob drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c clean_blob drivers/gpu/drm/nouveau/nouveau_platform.c +clean_blob drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c +clean_blob drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c clean_blob drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c clean_blob drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c clean_kconfig drivers/gpu/drm/nouveau/Kconfig DRM_NOUVEAU @@ -731,6 +761,7 @@ clean_mk CONFIG_DRM_MGA drivers/gpu/drm/Makefile announce DRM_MSM - "MSM DRM" reject_firmware drivers/gpu/drm/msm/adreno/adreno_gpu.c +reject_firmware drivers/gpu/drm/msm/adreno/a5xx_gpu.c reject_firmware drivers/gpu/drm/msm/adreno/a5xx_power.c clean_blob drivers/gpu/drm/msm/adreno/adreno_device.c clean_kconfig drivers/gpu/drm/msm/Kconfig DRM_MSM @@ -801,12 +832,24 @@ clean_blob drivers/gpu/drm/radeon/radeon_vce.c clean_kconfig drivers/gpu/drm/Kconfig DRM_RADEON clean_mk CONFIG_DRM_RADEON drivers/gpu/drm/Makefile +announce ROCKCHIP_CDN_DP - "Rockchip cdn DP" +reject_firmware drivers/gpu/drm/rockchip/cdn-dp-core.c +clean_blob drivers/gpu/drm/rockchip/cdn-dp-core.c +clean_kconfig drivers/gpu/drm/rockchip/Kconfig ROCKCHIP_CDN_DP +clean_mk CONFIG_ROCKCHIP_CDN_DP drivers/gpu/drm/rockchip/Makefile + announce DRM_STI - "DRM Support for STMicroelectronics SoC stiH41x Series" reject_firmware drivers/gpu/drm/sti/sti_hqvdp.c clean_blob drivers/gpu/drm/sti/sti_hqvdp.c clean_kconfig drivers/gpu/drm/sti/Kconfig DRM_STI clean_mk CONFIG_DRM_STI drivers/gpu/drm/sti/Makefile +announce DRM_TEGRA - "NVIDIA Tegra DRM" +reject_firmware drivers/gpu/drm/tegra/falcon.c +clean_blob drivers/gpu/drm/tegra/vic.c +clean_kconfig drivers/gpu/drm/tegra/Kconfig DRM_TEGRA +clean_mk CONFIG_DRM_TEGRA drivers/gpu/drm/tegra/Makefile + ####### # dma # ####### @@ -1259,6 +1302,24 @@ clean_mk CONFIG_DVB_TTUSB_DEC drivers/media/usb/ttusb-dec/Makefile # video +announce VIDEO_AP1302 - "AP1302 external ISP support" +reject_firmware drivers/staging/media/atomisp/i2c/ap1302.c +clean_blob drivers/staging/media/atomisp/i2c/ap1302.c +clean_kconfig drivers/staging/media/atomisp/i2c/Kconfig VIDEO_AP1302 +clean_mk CONFIG_VIDEO_AP1302 drivers/staging/media/atomisp/i2c/Makefile + +announce VIDEO_MSRLIST_HELPER - "Helper library to load, parse and apply large register lists." +reject_firmware drivers/staging/media/atomisp/i2c/libmsrlisthelper.c +clean_kconfig drivers/staging/media/atomisp/i2c/Kconfig VIDEO_MSRLIST_HELPER +clean_mk CONFIG_VIDEO_MSRLIST_HELPER drivers/staging/media/atomisp/i2c/Makefile + +announce VIDEO_ATOMISP - "Intel Atom Image Signal Processor Driver" +reject_firmware drivers/staging/media/atomisp/pci/atomisp2/atomisp_v4l2.c +clean_blob drivers/staging/media/atomisp/pci/atomisp2/atomisp_v4l2.c +clean_blob drivers/staging/media/atomisp/TODO +clean_kconfig drivers/staging/media/atomisp/pci/Kconfig VIDEO_ATOMISP +clean_mk CONFIG_VIDEO_ATOMISP drivers/staging/media/atomisp/pci/Makefile + announce VIDEO_BT848 - "BT848 Video For Linux" reject_firmware drivers/media/pci/bt8xx/bttv-cards.c clean_blob drivers/media/pci/bt8xx/bttv-cards.c @@ -1416,6 +1477,12 @@ clean_blob drivers/media/usb/gspca/vicam.c clean_kconfig drivers/media/usb/gspca/Kconfig USB_GSPCA_VICAM clean_mk CONFIG_USB_GSPCA_VICAM drivers/media/usb/gspca/Makefile +announce VIDEO_QCOM_VENUS - "Qualcomm Venus V4L2 encoder/decoder driver" +reject_firmware drivers/media/platform/qcom/venus/firmware.c +clean_blob drivers/media/platform/qcom/venus/core.c +clean_kconfig drivers/media/platform/Kconfig VIDEO_QCOM_VENUS +clean_mk CONFIG_VIDEO_QCOM_VENUS drivers/media/platform/qcom/venus/Makefile + announce VIDEO_TI_VPE - "TI VPE (Video Processing Engine) driver" reject_firmware drivers/media/platform/ti-vpe/vpdma.c clean_blob drivers/media/platform/ti-vpe/vpdma.c @@ -1553,6 +1620,14 @@ clean_blob drivers/net/ethernet/cavium/liquidio/lio_main.c clean_kconfig drivers/net/ethernet/cavium/Kconfig LIQUIDIO clean_mk CONFIG_LIQUIDIO drivers/net/ethernet/cavium/liquidio/Makefile +announce MLXSW_SPECTRUM - "Mellanox Technologies Spectrum support" +reject_firmware drivers/net/ethernet/mellanox/mlxsw/spectrum.c ' +/request_firmware_direct.*MLXSW_SP_FW_FILENAME/!{p;d;}; +' +clean_blob drivers/net/ethernet/mellanox/mlxsw/spectrum.c +clean_kconfig drivers/net/ethernet/mellanox/mlxsw/Kconfig MLXSW_SPECTRUM +clean_mk CONFIG_MLXSW_SPECTRUM drivers/net/ethernet/mellanox/mlxsw/Makefile + announce MYRI_SBUS - "MyriCOM Gigabit Ethernet" drop_fw_file firmware/myricom/lanai.bin.ihex firmware/myricom/lanai.bin @@ -1562,6 +1637,12 @@ clean_blob drivers/net/ethernet/myricom/myri10ge/myri10ge.c clean_kconfig drivers/net/ethernet/myricom/Kconfig MYRI10GE clean_mk CONFIG_MYRI10GE drivers/net/ethernet/myricom/myri10ge/Makefile +announce NFP - "Netronome(R) NFP4000/NFP6000 NIC driver" +reject_firmware drivers/net/ethernet/netronome/nfp/nfp_main.c +clean_blob drivers/net/ethernet/netronome/nfp/nfp_main.c +clean_kconfig drivers/net/ethernet/netronome/Kconfig NFP +clean_mk CONFIG_NFP drivers/net/ethernet/netronome/nfp/Makefile + announce NETXEN_NIC - "NetXen Multi port (1/10) Gigabit Ethernet NIC" reject_firmware drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c clean_blob drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -1700,12 +1781,6 @@ reject_firmware drivers/nfc/nxp-nci/firmware.c clean_kconfig drivers/nfc/nxp-nci/Kconfig NFC_NXP_NCI clean_mk CONFIG_NFC_NXP_NCI drivers/nfc/nxp-nci/Makefile -announce NFC_WILINK - "Texas Instruments NFC WiLink driver" -reject_firmware drivers/nfc/nfcwilink.c -clean_blob drivers/nfc/nfcwilink.c -clean_kconfig drivers/nfc/Kconfig NFC_WILINK -clean_mk CONFIG_NFC_WILINK drivers/nfc/Makefile - announce NFC_PN544_I2C - "NFC PN544 i2c support" reject_firmware drivers/nfc/pn544/i2c.c clean_kconfig drivers/nfc/pn544/Kconfig NFC_PN544_I2C @@ -1880,19 +1955,19 @@ clean_kconfig drivers/net/wireless/intel/iwlwifi/Kconfig IWLWIFI clean_mk CONFIG_IWLWIFI drivers/net/wireless/intel/iwlwifi/Makefile announce IWLDVM - "Intel Wireless WiFi DVM Firmware support" -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-1000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-2000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-5000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-6000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/1000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/2000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/5000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/6000.c clean_kconfig drivers/net/wireless/intel/iwlwifi/Kconfig IWLDVM clean_mk CONFIG_IWLMVM drivers/net/wireless/intel/iwlwifi/Makefile announce IWLMVM - "Intel Wireless WiFi MVM Firmware support" reject_firmware drivers/net/wireless/intel/iwlwifi/mvm/nvm.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-7000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-8000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-9000.c -clean_blob drivers/net/wireless/intel/iwlwifi/iwl-a000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/7000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/8000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/9000.c +clean_blob drivers/net/wireless/intel/iwlwifi/cfg/a000.c clean_kconfig drivers/net/wireless/intel/iwlwifi/Kconfig IWLMVM clean_mk CONFIG_IWLMVM drivers/net/wireless/intel/iwlwifi/Makefile @@ -2073,21 +2148,27 @@ clean_blob drivers/net/wireless/intersil/prism54/islpci_dev.c clean_kconfig drivers/net/wireless/intersil/Kconfig PRISM54 clean_mk CONFIG_PRISM54 drivers/net/wireless/intersil/prism54/Makefile +announce QTNFMAC_PEARL_PCIE - "Quantenna QSR10g PCIe support" +reject_firmware drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c +clean_blob drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h +clean_kconfig drivers/net/wireless/quantenna/qtnfmac/Kconfig QTNFMAC_PEARL_PCIE +clean_mk CONFIG_QTNFMAC_PEARL_PCIE drivers/net/wireless/quantenna/qtnfmac/Makefile + announce RSI_91X - "Redpine Signals Inc 91x WLAN driver support" +reject_firmware drivers/net/wireless/rsi/rsi_91x_hal.c clean_blob drivers/net/wireless/rsi/rsi_common.h +clean_blob drivers/net/wireless/rsi/rsi_91x_hal.c clean_kconfig drivers/net/wireless/rsi/Kconfig RSI_91X clean_mk CONFIG_RSI_91X drivers/net/wireless/rsi/Makefile announce RSI_SDIO - "Redpine Signals SDIO bus support" -reject_firmware drivers/net/wireless/rsi/rsi_91x_sdio_ops.c clean_blob drivers/net/wireless/rsi/rsi_91x_sdio.c clean_kconfig drivers/net/wireless/rsi/Kconfig RSI_SDIO -clean_mk CONFIG_RSI_SDIO drivers/net/wireless/rsi/Makefile +clean_mk CONFIG_RSI_USB drivers/net/wireless/rsi/Makefile announce RSI_USB - "Redpine Signals USB bus support" -reject_firmware drivers/net/wireless/rsi/rsi_91x_usb_ops.c clean_blob drivers/net/wireless/rsi/rsi_91x_usb.c -clean_kconfig drivers/net/wireless/rsi/Kconfig RSI_USB +clean_kconfig drivers/net/wireless/rsi/Kconfig RSI_SDIO clean_mk CONFIG_RSI_USB drivers/net/wireless/rsi/Makefile announce RT2X00_LIB_FIRMWARE - "Ralink driver firmware support" @@ -2208,6 +2289,13 @@ clean_blob drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c clean_kconfig drivers/net/wireless/realtek/rtlwifi/Kconfig RTL8723BE clean_mk CONFIG_RTL8723BE drivers/net/wireless/realtek/rtlwifi/rtl8723be/Makefile +announce RTL8723BS - "Realtek RTL8723BS SDIO Wireless LAN NIC driver" +reject_firmware drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c +clean_blob drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c +clean_blob drivers/staging/rtl8723bs/include/rtl8723b_hal.h +clean_kconfig drivers/staging/rtl8723bs/Kconfig RTL8723BS +clean_mk CONFIG_RTL8723BS drivers/staging/rtl8723bs/Makefile + announce RTL8821AE - "Realtek RTL8821AE/RTL8812AE Wireless LAN NIC driver" reject_firmware drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c clean_blob drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -2319,12 +2407,24 @@ reject_firmware drivers/bluetooth/hci_bcm.c clean_kconfig drivers/bluetooth/Kconfig BT_HCIUART_BCM clean_mk CONFIG_BT_HCIUART_BCM drivers/bluetooth/Makefile +announce BT_HCIUART_LL - "HCILL protocol support" +reject_firmware drivers/bluetooth/hci_ll.c +clean_blob drivers/bluetooth/hci_ll.c +clean_kconfig drivers/bluetooth/Kconfig BT_HCIUART_LL +clean_mk CONFIG_BT_HCIUART_LL drivers/bluetooth/Makefile + announce BT_HCIUART_MRVL - "Marvell protocol support" reject_firmware drivers/bluetooth/hci_mrvl.c clean_blob drivers/bluetooth/hci_mrvl.c clean_kconfig drivers/bluetooth/Kconfig BT_HCIUART_MRVL clean_mk CONFIG_BT_HCIUART_MRVL drivers/bluetooth/Makefile +announce BT_HCIUART_NOKIA - "UART Nokia H4+ protocol support" +reject_firmware drivers/bluetooth/hci_nokia.c +clean_blob drivers/bluetooth/hci_nokia.c +clean_kconfig drivers/bluetooth/Kconfig BT_HCIUART_NOKIA +clean_mk CONFIG_BT_HCIUART_NOKIA drivers/bluetooth/Makefile + announce BT_HCIBFUSB - "HCI BlueFRITZ! USB driver" reject_firmware drivers/bluetooth/bfusb.c clean_blob drivers/bluetooth/bfusb.c @@ -2527,6 +2627,9 @@ clean_mk CONFIG_TOUCHSCREEN_ELAN drivers/input/touchscreen/Makefile announce TOUCHSCREEN_ATMEL_MXT - "Atmel mXT I2C Touchscreen" reject_firmware drivers/input/touchscreen/atmel_mxt_ts.c clean_blob drivers/input/touchscreen/atmel_mxt_ts.c +clean_sed ' +/^[/][*]$/,/^ [*][/]$/ s,/lib/firmware/[^\n]*\.fw,*(DEBLOBBED)*, +' arch/arm/boot/dts/omap4-droid4-xt894.dts 'removed blob name' clean_kconfig drivers/input/touchscreen/Kconfig TOUCHSCREEN_ATMEL_MXT clean_mk CONFIG_TOUCHSCREEN_ATMEL_MXT drivers/input/touchscreen/Makefile @@ -2560,6 +2663,11 @@ clean_blob drivers/input/touchscreen/silead.c clean_kconfig drivers/input/touchscreen/Kconfig TOUCHSCREEN_SILEAD clean_mk CONFIG_TOUCHSCREEN_SILEAD drivers/input/touchscreen/Makefile +announce SILEAD_DMI - "Tablets with Silead touchscreens" +clean_blob drivers/platform/x86/silead_dmi.c +clean_kconfig drivers/platform/x86/Kconfig SILEAD_DMI +clean_mk CONFIG_SILEAD_DMI drivers/platform/x86/Makefile + announce TOUCHSCREEN_WDT87XX_I2C - "Weida HiTech I2C touchscreen" reject_firmware drivers/input/touchscreen/wdt87xx_i2c.c clean_blob drivers/input/touchscreen/wdt87xx_i2c.c @@ -2913,16 +3021,21 @@ clean_blob arch/arm/boot/dts/am4372.dtsi clean_kconfig drivers/remoteproc/Kconfig WKUP_M3_RPROC clean_mk CONFIG_WKUP_M3_RPROC drivers/remoteproc/Makefile +announce QCOM_ADSP_PIL - "Qualcomm ADSP Peripherial Image Loader" +clean_blob drivers/remoteproc/qcom_adsp_pil.c +clean_kconfig drivers/remoteproc/Kconfig QCOM_ADSP_PIL +clean_mk CONFIG_QCOM_ADSP_PIL drivers/remoteproc/Makefile + announce QCOM_Q6V5_PIL - "Qualcomm Hexagon V5 Peripherial Image Loader" reject_firmware drivers/remoteproc/qcom_q6v5_pil.c clean_blob drivers/remoteproc/qcom_q6v5_pil.c clean_kconfig drivers/remoteproc/Kconfig QCOM_Q6V5_PIL clean_mk CONFIG_QCOM_Q6V5_PIL drivers/remoteproc/Makefile -announce QCOM_MDT_LOADER - "Qualcomm Peripheral Image Loader" -reject_firmware drivers/remoteproc/qcom_mdt_loader.c -clean_kconfig drivers/remoteproc/Kconfig QCOM_MDT_LOADER -clean_mk CONFIG_QCOM_MDT_LOADER drivers/remoteproc/Makefile +announce QCOM_WCNSS_PIL - "Qualcomm WCNSS Peripherial Image Loader" +clean_blob drivers/remoteproc/qcom_wcnss.c +clean_kconfig drivers/remoteproc/Kconfig QCOM_WCNSS_PIL +clean_mk CONFIG_QCOM_WCNSS_PIL drivers/remoteproc/Makefile ######### @@ -3203,6 +3316,7 @@ clean_mk CONFIG_SND_SOC_INTEL_HASWELL sound/soc/intel/haswell/Makefile announce SND_SOC_INTEL_SKYLAKE - undocumented reject_firmware sound/soc/intel/skylake/skl-sst.c +reject_firmware sound/soc/intel/skylake/skl-sst-utils.c reject_firmware sound/soc/intel/skylake/skl-topology.c reject_firmware sound/soc/intel/skylake/bxt-sst.c clean_blob sound/soc/intel/skylake/skl.c @@ -3334,6 +3448,11 @@ clean_mk CONFIG_GREYBUS_BOOTROM drivers/staging/greybus/Makefile # SOC # ####### +announce QCOM_MDT_LOADER - "Qualcomm Peripheral Image Loader" +reject_firmware drivers/soc/qcom/mdt_loader.c +clean_kconfig drivers/soc/qcom/Kconfig QCOM_MDT_LOADER +clean_mk CONFIG_QCOM_MDT_LOADER drivers/soc/qcom/Makefile + announce QCOM_WCNSS_CTRL - "Qualcomm WCNSS control driver" reject_firmware drivers/soc/qcom/wcnss_ctrl.c clean_blob drivers/soc/qcom/wcnss_ctrl.c diff --git a/helpers/DATA/linux-hwe/deblob-check b/helpers/DATA/linux-hwe/deblob-check index f95b9722..d553cbfe 100644 --- a/helpers/DATA/linux-hwe/deblob-check +++ b/helpers/DATA/linux-hwe/deblob-check @@ -1,6 +1,6 @@ #! /bin/sh -# deblob-check version 2017-02-06 +# deblob-check version 2017-08-26 + 2017-10-23's r14392 # Inspired in gNewSense's find-firmware script. # Written by Alexandre Oliva <lxoliva@fsfla.org> @@ -904,7 +904,7 @@ set_except () { blobna 'DEFAULT_FIRMWARE' blobna '\([.]\|->\)firmware[ \n]*=[^=]' blobna 'mod_firmware_load' # sound/ - blobname '[.]\(\(fw\|bin\)[0-9]*\|hex\|frm\|co[dx]\|dat\|elf\|xlx\|rfb\|ucode\|img\|sbcf\|ctx\(prog\|vals\)\|z77\|wfw\|inp\|dlmem\|cld\|tftf\)[\\]\?["]' + blobname '[.]\(\(fw\|bin\)[0-9]*\|hex\|frm\|co[dx]\|dat\|elf\|xlx\|rfb\|ucode\|img\|sbcf\|ctx\(prog\|vals\)\|z77\|wfw\|inp\|dlmem\|cld\|tftf\|out\|nffw\|mdt\|mfa2\?\)[\\]\?["]' # Catch misdeblobbed fw extension. blobname '["][^" \t\n]*[/][*][(]DEBLOBBED[)][*][/][^"\\]' # Ideally we'd whitelist URLs that don't recommend non-Free @@ -4446,7 +4446,7 @@ set_except () { blobname 'pre-cal-%s-%s\.bin' drivers/net/wireless/ath/ath10k/core.c accept '[\t]fw_file->firmware[ ]=[ ]ath10k_fetch_fw_file' drivers/net/wireless/ath/ath10k/core.c blobname 'brcmfmac4356-sdio\.bin' drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c - blobname 'iwlwifi-9000\(-pu-a0-\(jf\|lc\)-a0-\|-\|\)' drivers/net/wireless/intel/iwlwifi/iwl-9000.c + blobname 'iwlwifi-9000\(-pu-a0-\(jf\|lc\)-[ab]0-\|-\|\)' drivers/net/wireless/intel/iwlwifi/iwl-9000.c blobname 'iwlwifi-9260-th-a0-\(jf\|lc\)-a0-' drivers/net/wireless/intel/iwlwifi/iwl-9000.c blobname 'mrvl[/]pcie8897_uapsta_a0\.bin' drivers/net/wireless/marvell/mwifiex/pcie.h blobname 'mrvl[/]pcieuart8997_combo\(_v2\)\?\.bin' drivers/net/wireless/marvell/mwifiex/pcie.h @@ -4659,6 +4659,210 @@ set_except () { blobname 'iwlwifi-8265-' drivers/net/wireless/intel/iwlwifi/iwl-8000.c blobname 'iwlwifi-[0-9][^"\n\t ]*-' drivers/net/wireless/intel/iwlwifi/iwl-8000.c blobname 'a530v3_gpmu\.fw2' drivers/gpu/drm/msm/adreno/adreno_device.c + + # New in 4.11. + blobname 'amdgpu[/]polaris1[01]_k_smc\.bin' drivers/gpu/drm/amdgpu/amdgpu_cgs.c + blobname 'i915[/]\(glk\|kbl\)_dmc_ver1_01\.bin' drivers/gpu/drm/i915/intel_csr.c + blobname 'vpu[/]vpu_fw_imx\(27_TO2\|53\|6[qd]\)\.bin' drivers/media/platform/coda/coda-common.c + blobname '%s-%d\.bin' drivers/net/wireless/ath/ath10k/core.c + blobname 'wil6210_sparrow_plus\.fw' drivers/net/wireless/ath/wil6210/wil6210.h + blobname 'iwlwifi-Qu-a0-hr-a0-' drivers/net/wireless/intel/iwlwifi/iwl-a000.c + blobname 'intel[/]dsp_fw_glk\.bin' sound/soc/intel/skylake/skl.c + defsnc 'unsigned[ ]char[ ]__aligned[(]16[)][ ]bootlogo_bits\[\][ ]=' 'arch/m68k/68000/bootlogo\(-vz\)\?\.h' + defsnc 'static[ ]volatile[ ]const[ ]u8[ ]__cacheline_aligned[ ]__aesti_\(inv_\)\?sbox\[\][ ]=' crypto/aes_ti.c + defsc 'static[ ]const[ ]struct[ ]ast_vbios_stdtable[ ]vbios_stdtable\[\][ ]=' drivers/gpu/drm/ast/ast_tables.h + defsnc 'static[ ]const[ ]struct[ ]vadc_map_pt[ ]adcmap_100k_104ef_104fb\[\][ ]=' drivers/iio/adc/qcom-spmi-vadc.c + defsnc 'static[ ]const[ ]int[ ]srf08_sensitivity\[\][ ]=' drivers/iio/proximity/srf80.c + defsnc '[\t]static[ ]u8[ ]rss_key\[40\][ ]=' drivers/net/ethernet/aquantia/atlantic/aq_nic.c + defsnc '[\t]static[ ]u32[ ]\(itr_imr_\(rxr\|txt\)\(en\)\?\|rpo_lro_ldes_max\)_\(adr\|msk\|shift\)\[32\][ ]=' drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c + defsnc 'static[ ]const[ ]u32[ ]hw_atl_utils_hw_mac_regs\[\][ ]=' drivers/net/ethernet/aquantia/atlantic/hw_tl/hw_atl_utils.c + defsnc 'static[ ]const[ ]u8[ ]netvsc_hash_key\[NETVSC_HASH_KEYLEN\][ ]=' drivers/net/hyperv/rndis_filter.c + defsnc 'static[ ]const[ ]struct[ ]rf_channel[ ]rf_vals_3x_xtal20\[\][ ]=' drivers/net/wireless/ralink/rt2x00/rt2800lib.c + defsnc 'unsigned[ ]long[ ]long[ ]lpfc_enable_nvmet\[LPFC_NVMET_MAX_PORTS\][ ]=' drivers/scsi/lpfc/lpfc_attr.c + defsnc 'static[ ]const[ ]u64[ ]test_vectors_siphash\[64\][ ]=' lib/test_siphash.c + defsnc 'static[ ]const[ ]u32[ ]test_vectors_hsiphash\[64\][ ]=' lib/test_siphash.c + defsnc 'static[ ]yyconst[ ]YY_CHAR[ ]yy_ec\[256\][ ]=' scripts/dtc/dtc-lexer.lex.c_shipped + defsnc 'static[ ]yyconst[ ]YY_CHAR[ ]yy_meta\[48\][ ]=' scripts/dtc/dtc-lexer.lex.c_shipped + defsnc 'static[ ]yyconst[ ]flex_uint16_t[ ]yy_base\[180\][ ]=' scripts/dtc/dtc-lexer.lex.c_shipped + defsnc 'static[ ]yyconst[ ]flex_uint16_t[ ]yy_nxt\[449\][ ]=' scripts/dtc/dtc-lexer.lex.c_shipped + accept '0x1B[,][ ]0x5E[,][ ]0x78[,][ ]0x3D[,][ ]0x00[,][ ]0x00[,][ ]0x00[,][ ]0x18[,][ ][0x1-9A-F, ]*' security/apparmor/nulldfa.in + defsnc '[\t]struct[ ]sock_filter[ ]bpf_filter\[\][ ]=' tools/testing/selftests/net/psock_lib.h + blobname 'cpt8x-mc-[as]e\.out' drivers/crypto/cavium/cpt/cptpf_main.c + blobname 'i915[/]["][ ]__stringify[(]platform[)][ ]["]_huc_ver["][ ]__stringify[(]major[)][ ]["]_["][ \\\n\t]*__stringify[(]minor[)][ ]["]_["][ ]__stringify[(]bld_num[)][ ]["]\.bin' drivers/gpu/drm/i915/intel_huc.c + accept '[ ][ ]*gf100_gr_init_fw[(]gr->fecs[,][ ][&]gr->fuc409c[,][ ][&]gr->fuc409d[)][;]' drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c + accept '[ ][ ]*gf100_gr_init_fw[(]gr->gpccs[,][ ][&]gr->fuc41ac[,][ ][&]gr->fuc41ad[)][;]' drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c + blobname 'rockchip[/]dptx\.bin' drivers/gpu/drm/rockchip/cdn-dp-core.c + accept 'static[ ]int[ ]cdn_dp_request_firmware[(]' drivers/gpu/drm/rockchip/cdn-dp-core.c + accept '[\t]ret[ ]=[ ]cdn_dp_request_firmware[(]dp[)]' drivers/gpu/drm/rockchip/cdn-cp-core.c + blobname 'netronome[/]nic_%s' drivers/net/ethernet/netronome/nfp/nfp_main.c + blobname 'spc[ ]-=[ ]snprintf[(][&]fw_name\[ARRAY_SIZE[(]fw_name[)][ ]-[ ]spc\][,][^;]*[)]' drivers/net/ethernet/netronome/nfp/nfp_main.c + blobname 'netronome[/]nic_AMDA00\(81-0001_\(1x40\|4x10\)\|96-0001_2x10\|97-0001_\(2x40\|4x10_1x40\|8x10\)\|99-0001_2x\(10\|25\)\)\.nffw' drivers/net/ethernet/netronome/nfp/nfp_main.c + accept '#define[ ]NFP_RESOURCE_NFP_NFFW[ \t]*["]nfp\.nffw["]' drivers/net/ethernet/netronome/nfp/nfp.h + accept '[\t]*\(rc[ ]=[ ]\)\?wil_request_firmware[(]wil[,][ ]\(wil->wil_fw_name\|WIL_BOARD_FILE_NAME\)[,][ ]\(true\|false\)[)][;]' drivers/net/wireless/ath/wil6210/main.c + blobname 'gsl3670-cube-iwork8-air\.fw' drivers/platform/x86/silead_dmi.c + blobname 'gsl3676-jumper-ezpad-mini3\.fw' drivers/platform/x86/silead_dmi.c + accept '[\t]*\(rc[ ]=[ ]\)\?request_firmware_nowait[(]THIS_MODULE[,][ ]FW_ACTION_NOHOTPLUG[,][ ]name[,]' lib/test_firmware.c + accept '[ ]*mechanism[ ]is[ ]available[ ]and[ ]for[ ]the[ ]request_firmware_nowait[(][)][ ]call' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*except[ ]request_firmware_direct[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*today[.][ ]The[ ]call[ ]request_firmware_nowait[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*argument[ ]to[ ]request_firmware_nowait[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*for[ ]request_firmware_nowait[(][)][ ]when[ ]uevent' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*supported[ ]for[ ]request_firmware_into_buf[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*For[ ]details[ ]of[ ]implementation[ ]refer[ ]to[ ]_request_firmware_load[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*Users[ ]of[ ]the[ ]request_firmware_nowait[(][)][ ]call' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*rely[ ]on[ ]the[ ]uevent[ ]flag[ ]which[ ]can[ ]be[ ]disabled[ ]by[ ]request_firmware_nowait[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*Although[ ]this[ ]can[ ]disable[ ]the[ ]firmware[ ]cache[ ]for[ ]request_firmware_nowait[(][)]' Documentation/driver-api/firmware/fallback-mechanisms.rst + accept '[ ]*uses[ ]all[ ]synchronous[ ]call[ ]except[ ]:c:func:[`]request_firmware_into_buf[`]' Documentation/driver-api/firmware/firmware_cache.rst + accept '[ \t]*if[(]request_firmware[(][&]fw_entry[,][ ][$]FIRMWARE[,]' Documentation/driver-api/firmware/firmware_cache.rst + accept '[ ]*device[ ]\(if[ ]\)*the[ ]second[ ]argument[ ][(]uevent[)][ ]to[ ]request_firmware_nowait[(][)]' Documentation/driver-api/firmware/firmware_cache.rst + accept '[ ]*introduction[\n][ ]*core[\n][ ]*request_firmware[\n]' Documentation/driver-api/firmware/index.rst + accept '\([ ]*:functions:[ ]\)\?request_firmware\([ ]API\|\|_direct\|_into_buf\|_nowait\|\)[\n]' Documentation/driver-api/firmware/request_firmware.rst + accept 'informed[ ]through[ ]the[ ]callback[.][ ]request_firmware_nowait[(][)][ ]cannot' Documentation/driver-api/firmware/request_firmware.rst + accept 'firmware[.][ ]For[ ]example[ ]if[ ]you[ ]used[ ]request_firmware[(][)][ ]and[ ]it[ ]returns' Documentation/driver-api/firmware/request_firmware.rst + accept 'If[ ]something[ ]went[ ]wrong[ ]request_firmware[(][)][ ]returns[ ]non-zero' Documentation/driver-api/firmware/request_firmware.rst + accept 'resume[/]restore[,][ ]but[ ]they[ ]cannot[ ]do[ ]it[ ]by[ ]calling[ ]:c:func:[`]request_firmware[(][)][`]' Documentation/driver-api/pm/notifiers.rst + accept 'In[ ]this[ ]example[,][ ]the[ ][^\n]*["]ts\.out["]' tools/power/x86/turbostat/turbostat.8 + accept '[\t]if[ ][(][!]snd_card_proc_new[(]hdspm->card[,][ ]["]ports\.\(in\|out\)["][,]' sound/pci/rme9652/hdspm.c + accept '[ \t]*print[ ]["]cat[ ]["][ ]rd[ ]cfr\[j\][ ][ ]["][/]kvm-test-1-run\.sh\.out["]' tools/testing/selftests/rcutorture/bin/kvm.sh + # If this actually requests any firmware, it will do so using + # disabled request_firmware calls elsewhere, but it seems to me + # that this would just pin and unpin firmware that might have + # already been requested before. + accept 'static[ ]int[ ]smu7_request_firmware[(]' drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c + accept '[\t]\.request_firmware[ ]=[ ]smu7_request_firmware[,]' drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c + + # New in 4.12. + accept '[ ]*Bit\([ ]*[0-7]\)*' Documentation/input/devices/sentelic.rst + defsnc 'static[ ]const[ ]struct[ ]hash_testvec[ ]\(ghash\|hmac_sha\(224\|256\)\|aes_xcbc128\|poly1305\|crc32\|crc32c\|bfin_crc\)_tv_template\[\][ ]=' crypto/testmgr.h + defsnc 'static[ ]const[ ]struct[ ]cipher_testvec[ ]\(\(des3_ede\|bf\)_cbc\|\(tf\|cast6\|aes\)_xts\|serpent\(_xts\)\?\|tnepres\|aes\(_cbc\|_ctr_rfc3686\)\?\|x\?tea\|anubis\(_cbc\)\?\|xeta\|camellia_\(cbc\|xts\)\|salsa20_stream\|chacha20\|cts_mode\)_\(enc\|dec\)_tv_template\[\][ ]=' crypto/testmgr.h + defsnc 'static[ ]const[ ]struct[ ]aead_testvec[ ]hmac_sha\(1\|256\|512\|224\|384\)_\(aes\|des3\?\(_ede\)\?\)_cbc_enc_tv_temp\[\][ ]=' crypto/testmgr.h + defsnc 'static[ ]const[ ]struct[ ]aead_testvec[ ]\(aes_\(gcm_rfc4106\|ccm_rfc4309\)\|rfc7539\(esp\)\?\)_\(enc\|dec\)_tv_template\[\][ \t]=' crypto/testmgr.h + defsnc 'static[ ]const[ ]struct[ ]comp_testvec[ ]\(lzo\|lz4\(hc\)\?\)_\(de\)\?comp_tv_template\[\][ ]=' crypto/testmgr.h + defsnc '[}][ ]segments\[MALIDP_COEFFTAB_NUM_COEFFS\][ ]=' drivers/gpu/drm/arm/malidp_crtc.c + defsnc 'static[ ]const[ ]u16[ ]dp500_se_scaling_coeffs\[\]\[SE_N_SCALING_COEFFS\][ ]=' drivers/gpu/drm/arm/malidp_hw.c + defsnc 'static[ ]const[ ]u8[ ]gamma_tbl\[S6E3HA2_NUM_GAMMA_STEPS\]\[S6E3HA2_GAMMA_CMD_CNT\][ ]=' drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c + defsnc 'static[ ]const[ ]struct[ ]rcar_hdmi_phy_params[ ]rcar_hdmi_phy_params\[\][ ]=' drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c + defsnc 'static[ ]const[ ]int[ ]temp_map\[CPCAP_MAX_TEMP_LVL\]\[2\][ ]=' drivers/iio/adc/cpcap-adc.c + defsnc 'static[ ]const[ ]struct[ ]reg_value[ ]ov5645_\(global_init_setting\|setting_\(sxga\|1080p\|full\)\)\[\][ ]=' drivers/media/i2c/ov5645.c + defsnc 'static[ ]struct[ ]regval_list[ ]ov5647_640x480\[\][ ]=' drivers/media/i2c/ov5647.c + defsc 'static[ ]const[ ]u32[ ]isc_gamma_table\[GAMMA_MAX[ ][+][ ]1\]\[GAMMA_ENTRIES\][ ]=' drivers/media/platform/atmel/atmel-isc.c + defsnc 'static[ ]const[ ]struct[ ]rf_channel[ ]rf_vals_7620\[\][ ]=' drivers/net/wireless/ralink/rt2x00/rt2800lib.c + defsnc 'static[ ]const[ ]u8[ ]zero_buff\[\][ ]=' drivers/staging/ccree/ssi_cipher.c + oprepline '#define[ ]NIST_\(AES\([CG]CM\)\?\|SHA\|HMAC\)_\(SHA\)\?\(256\|512\|192\|128\|1\)_\(XTS_\|CMAC_\)\?\(KEY\|PLAIN\(_DATA\)\?\|CIPHER\|MD\|MSG\|ADATA\)' drivers/staging/ccree/ssi_fips_data.h + defsnc 'static[ ]struct[ ]atomisp_css_macc_table[ ]\(skin_\(low\|medium\|high\)\|blue\|green\)_macc_table[ ]=' drivers/staging/media/atomisp/pci/atomisp2/atomisp_tables.h + defsnc 'static[ ]struct[ ]atomisp_css_ctc_table[ ]vivid_ctc_table[ ]=' drivers/staging/media/atomisp/pci/atomisp2/atomisp_tables.h + defsnc '#define[ ]S_1W_\(6X6\|9X9\|7X7\)_MATRIX_DEFAULT[ ][(][(]s_1w_.x._matrix[)][\\]' drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func_types.h + defsnc 'const[ ]struct[ ]ia_css_anr_config[ ]default_anr_config[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_1.0/ia_css_anr.host.c + defsnc 'const[ ]struct[ ]ia_css_anr_thres[ ]default_anr_thres[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr2_table.host.c + defsnc 'default_ctc_table_data\[IA_CSS_VAMEM_[21]_CTC_TABLE_SIZE\][ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_table.host.c + defsnc 'default_gamma_table_data\[IA_CSS_VAMEM_[21]_GAMMA_TABLE_SIZE\][ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_1.0/ia_css_gc_table.host.c + defsnc 'default_gamma_table_data\[IA_CSS_VAMEM_[21]_RGB_GAMMA_TABLE_SIZE\][ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_2/ia_css_gc2_table.host.c + defsnc 'const[ ]struct[ ]ia_css_macc1_5_table[ ]default_macc1_5_table[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc1_5/ia_css_macc1_5_table.host.c + defsnc 'const[ ]struct[ ]ia_css_macc_table[ ]default_macc2\?_table[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc_1.0/ia_css_macc_table.host.c + defsc '#define[ ]DEFAULT_DVS_GRID_INFO[ ]' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/common/ia_css_sdis_common_types.h + defsnc 'const[ ]int16_t[ ]g_pyramid\[8\]\[8\][ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c + defsnc 'static[ ]const[ ]int[ ]zoom_table\[4\]\[HRT_GDC_N\][ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c + defsnc '[\t]static[ ]\+u8[ ]sbox_table\[256\][ ]=' drivers/staging/rtl8723bs/core/rtw_security.c + defsnc 'static[ ]u32[ ]Array_MP_8723B_\(AGC_TAB\|PHY_REG\(_PG\)\?\)\[\][ ]=' drivers/staging/rtl8723bs/hal/HalHWImg8723B_BB.c + defsnc 'static[ ]u32[ ]Array_MP_8723B_MAC_REG\[\][ ]=' drivers/staging/rtl8723bs/hal/HalHWImg8723B_MAC.c + defsnc 'static[ ]u32[ ]Array_MP_8723B_RadioA\[\][ ]=' drivers/staging/rtl8723bs/hal/HalHWImg8723B_RF.c + defsnc 'static[ ]u8[ ]gDeltaSwingTableIdx_MP_5G[BA]_[NP]_TxPowerTrack_SDIO_8723B\[\]\[DELTA_SWINGIDX_SIZE\][ ]=' drivers/staging/rtl8723bs/hal/HalHWImg8723B_RF.c + defsnc '[\t]u8[ ]channel5G\[CHANNEL_MAX_NUMBER_5G\][ ]=' drivers/staging/rtl8723bs/hal/hal_com_phycfg.c + defsc 'static[ ]struct[ ]cs35l35_sysclk_config[ ]cs35l35_clk_ctl\[\][ ]=' sound/soc/codecs/cs35l35.c + blobname 'amdgpu[/]vega10_smc\.bin' drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c + blobname 'amdgpu[/]vega10_uvd\.bin' drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c + blobname 'amdgpu[/]vega10_vce\.bin' drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c + blobname 'amdgpu[/]vega10_\(ce\|pfp\|me\(c2\?\)\?\|rlc\)\.bin' drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c + blobname 'amdgpu[/]\(vega10\|%s\)_\(sos\|asd\)\.bin' drivers/gpu/drm/amd/amdgpu/psp_v3_1.c + blobname 'amdgpu[/]vega10_sdma1\?\.bin' drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c + blobname 'amdgpu[/]vega10_smc\.bin' drivers/gpu/drm/amd/amdgpu/soc15.c + blobname 'i915[/]glk_dmc_ver1_04\.bin' drivers/gpu/drm/i915/intel_csr.c + blobname 'https[:][/][/]01\.org[/]linuxgraphics[/]downloads[/][^"\n]*' drivers/gpu/drm/i915/intel_csr.c + blobname 'dvb-demod-si2168-d60-01\.fw' drivers/media/dvb-frontends/si2168_priv.h + blobname 'dvb-tuner-si2141-a10-01\.fw' drivers/media/tuners/si2157_priv.h + blobname 'firmware-6\.bin' drivers/net/wireless/ath/ath10k/hw.h + blobname '[/][*][ ]the[ ]firmware-6\.bin[ ]blob[ ][*][/]' drivers/net/wireless/ath/ath10k/hw.h + blobname 'iwlwifi-9260-th-b0-\(jf\|lc\)-b0-' drivers/net/wireless/intel/iwlwifi/iwl-9000.c + blobname 'iwlwifi-QuIcp-a0-hrcdb-a0-' drivers/net/wireless/intel/iwlwifi/iwl-a000.c + blobname 'git:[/][/]git\.kernel\.org[/][^"\n]*firmware\.git' drivers/net/wireless/intel/iwlwifi/iwl-drv.c + blobname 'rtlwifi[/]rtl8723befw_36\.bin' drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c + blobname 'rtlwifi[/]rtl8821aefw_29\.bin' drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c + blobname 'gsl1686-dexp-ursus-7w\.fw' drivers/platform/x86/silead_dmi.c + blobname 'gsl1686-surftab-wintron70-st70416-6\.fw' drivers/platform/x86/silead_dmi.c + blobname 'ti-connectivity[/]TIInit_\(\(%d\|[0-9]\+\)[.]\)\+bts' drivers/bluetooth/hci_ll.c + accept '[\t]*bt_dev_err[(]lldev->hu\.hdev[,][ ]["]request_firmware[ ]failed' drivers/bluetooth/hci_ll.c + blobname 'nokia[/]\(bcmfw\|ti1273\)\.bin' drivers/bluetooth/hci_nokia.c + accept '[\t ]*falcon->firmware\.\(bin_data\|firmware\)' drivers/gpu/drm/tegra/falcon.c + accept '[\t][/][*][ ]request_firmware[ ]prints' drivers/gpu/drm/tegra/falcon.c + accept 'static[ ]const[ ]struct[ ]vic_config[ ]vic_t\(124\|210\)_config[ ]=[ ][{][\n][ ]\.firmware[ ]*=[ ]' drivers/gpu/drm/tegra/vic.c + blobname 'nvidia[/]tegra124[/]vic03_ucode\.bin' drivers/gpu/drm/tegra/vic.c + blobname 'nvidia[/]tegra210[/]vic04_ucode\.bin' drivers/gpu/drm/tegra/vic.c + accept 'static[ ]int[ ]ap1302_request_firmware[(]' drivers/staging/media/atomisp/i2c/ap1302.c + accept '[\t ]*["]ap1302_request_firmware[ ]failed' drivers/staging/media/atomisp/i2c/ap1302.c + accept '[\t]ret[ ]=[ ]ap1302_request_firmware[(]' drivers/staging/media/atomisp/i2c/ap1302.c + blobname 'ap1302_fw\.bin' drivers/staging/media/atomisp/i2c/ap1302.c + blobname 'shisp_2401a0_\(legacy_\)\?v21\.bin' drivers/staging/media/atomisp/pci/atomisp2/atomisp_v4l2.c + blobname 'shisp_2400b0_v21\.bin' drivers/staging/media/atomisp/pci/atomisp2/atomisp_v4l2.c + accept '[\t]*isp->firmware[ ]=[ ]\(atomisp_load_firmware[(]\|NULL\)' 'drivers/staging/media/atomisp/pci/atomisp2/atomisp_\(fops\|v4l2\)\.c' + accept '[\t]stage_desc->firmware[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/src/pipe_stagedesc.c + accept '[\t]stage->firmware[ ]=' drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c + blobname 'rtlwifi[/]rtl8723bs_\(wowlan\|nic\)\.bin' drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c + blobname 'rtl8723b[/]FW_\(NIC\|WoWLAN\)\.bin' drivers/staging/rtl8723bs/include/rtl8723b_hal.h + + # New in 4.13 + accept '[ ]*This[ ]driver[ ]requires[ ]a[ ]patch[ ]for[ ]firmware_class\.c[^\n]*[\n][ ]*request_firmware_nowait[ ]function' Documentation/dell_rbu.txt + accept '[\t][ ]d=["]m[ ]0[,]0[ ]0[,]1895[ ]4118[,]0[ ][-0-9, LZm]*z["]' Documentation/media/uapi/v4l/crop.svg + defsnc 'static[ ]const[ ]struct[ ]akcipher_testvec[ ]pkcs1pad_rsa_tv_template\[\][ ]=' crypto/testmgr.h + accept '[\t]ret[ ]=[ ]_request_firmware_load[(]fw_priv[,][ ]opt_flags[,][ ]timeout[)][;]' drivers/base/firmware_class.c + defsnc 'static[ ]const[ ]struct[ ]cpg_pll_config[ ]cpg_pll_configs\[8\][ ]__initconst[ ]=' drivers/clk/renesas/clk-rcar-gen2.c + blobname 'cnn55xx_se.fw' drivers/crypto/cavium/nitrox/nitrox_main.c + defsnc 'unsigned[ ]int[ ]dsgl_ent_len\[\][ ]=' drivers/crypto/chelsio/chcr_algo.h + blobname '\(ifpp\|ipue\)\.bin' drivers/crypto/inside-secure/safexcel.c + blobname 'amdgpu[/]\(%s\|vega10\|raven\)_gpu_info\.bin' drivers/gpu/drm/amd/amdgpu/amdgpu_device.c + blobname 'amdgpu[/]raven_vcn\.bin' drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c + blobname 'amdgpu[/]raven_\(ce\|pfp\|me\(c2\?\)\?\|rlc\)\.bin' drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c + blobname 'amdgpu[/]\(vega10\|raven\)_sdma1\?\.bin' drivers/gpu/drm/amdgpu/sdma_v4_0.c + defsnc 'const[ ]struct[ ]pctl_data[ ]pctl[01]_data\[\][ ]=' drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c + blobname 'i915[/]cnl_dmc_ver1_04\.bin' drivers/gpu/drm/i915/intel_csr.c + defsnc '[\t]static[ ]const[ ]int[ ]dividers\[\][ ]=' drivers/gpu/drm/i915/intel_dpll_mgr.c + defsnc 'const[ ]struct[ ]stm32h7_adc_ck_spec[ ]stm32h7_adc_ckmodes_spec\[\][ ]=' drivers/iio/adc/stm32-adc-core.c + defsnc 'static[ ]const[ ]u8[ ]full_fm_\(eu\|na\)_1p0\[\][ ]=' drivers/media/i2c/max2175.c + defsnc 'static[ ]const[ ]struct[ ]max2175_reg_map[ ]\(dab12\|fmeu1p2\|fmna[12]p0\)_map\[\][ ]=' drivers/media/i2c/max2175.c + defsnc 'static[ ]const[ ]u8[ ]adc_presets\[2\]\[23\][ ]=' drivers/media/i2c/max2175.c + defsnc 'static[ ]const[ ]struct[ ]ov13858_reg[ ]mode_\(4224x3136\|2112x\(1568\|1188\)\|1056x784\)_regs\[\][ ]=' drivers/media/i2c/ov13858.c + defsnc 'static[ ]const[ ]struct[ ]reg_value[ ]ov5640_\(init_\)\?setting_\(30\|15\)fps_\(VGA\(_640_480\)\?\|XGA_1024_768\|QVGA_320_240\|QCIF_176_144\|NTSC_720_480\|PAL_720_576\|720P_1280_720\|1080P_1920_1080\|QSXGA_2592_1944\)\[\][ ]=' drivers/media/i2c/ov5640.c + blobname 'qcom[/]venus-\(1\.8\|4\.2\)[/]venus\.mdt' drivers/media/platform/qcom/venus/core.c + # The firmware file name is supplied by the user. + accept '[\t]err[ ]=[ ]request_firmware_direct[(][&]fw[,][ ]flash->data[,]\([^\n]*[\n]\+[^\n}]\)*err[ ]=[ ]mlx5_firmware_flash[(]mdev[,][ ]fw[)]' drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c + blobname 'mellanox[/]mlxsw_spectrum-\([0-9.]*\|["]\([ \t]*\|[\\][\n]\|__stringify[(]MLXSW_FWREV_\(MAJOR\|MINOR\|SUBMINOR\)[)]\|["].["]\)*["]\)\.mfa2' drivers/net/ethernet/mellanox/mlxsw/spectrum.c + # This firmware file name is supplied by the user, + # but there's another in the same source file (above) that is hardcoded. + accept '[\t]err[ ]=[ ]request_firmware_direct[(][&]firmware[,][ ]flash->data[,]\([^\n]*[\n]\+[^\n}]\)*err[ ]=[ ]mlxsw_sp_firmware_flash[(]mlxsw_sp[,][ ]firmware[)]' drivers/net/ethernet/mellanox/mlxsw/spectrum.c + defsnc 'static[ ]const[ ]struct[ ]iro[ ]iro_arr\[49\][ ]=' drivers/net/ethernet/qlogic/qed/qed_hsi.h + defsc 'static[ ]const[ ]u8[ ]iwl_ext_nvm_channels\[\][ ]=' drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c + blobname '\(rsi[/]\)\?rs9113_wlan_qspi\.rps' drivers/net/wireless/rsi/rsi_common.h + defsnc '[}][ ]hsfreq_map\[\][ ]=' drivers/staging/media/imx/imx6-mipi-csi2.c + defsnc 'static[ ]const[ ]u16[ ]avc_thr_db2reg\[97\][ ]=' sound/soc/codecs/sgtl5000.c + blob 'SD8688[ ]firmware[\n]=*[\n]*Images:[\n]*\(-[ ][/]lib[/]firmware[^\n]*[\n]*\)*The[ ]images[^:]*:[\n]*[^\n]*[/]linux-firmware[^\n]*' Documentation/btmrvl.txt + blobname '%s-%s-%d\.bin' drivers/net/wireless/ath/ath10k/core.c + blobname 'wil6210_\(sparrow_plus_\)\?ftm\.fw' drivers/net/wireless/ath/wil6210/wil6210.h + blobname 'brcmfmac43430a0-sdio\.bin' drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c + blobname 'iwlwifi-9000-pu-a0-jf-b0-' drivers/net/wireless/intel/iwlwifi/cfg/9000.c + blobname 'iwlwifi-QuIcp-z0-hrcdb-a0-' drivers/net/wireless/intel/iwlwifi/iwl-a000.c + blobname 'qtn[/]fmac_qsr10g\.img' drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h + blobname 'gsl1680-\(gp-electronic-t701\|pipo-w2s\)\.fw' drivers/platform/x86/silead_dmi.c + blobname 'gsl3692-pov-mobii-wintab-p800w\.fw' drivers/platform/x86/silead_dmi.c + blobname 'gsl3670-itworks-tw891\.fw' drivers/platform/x86/silead_dmi.c + defsnc 'struct[ ]phm_fuses_default[ ]vega10_fuses_default\[\][ ]=' drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c + blobname 'a530_zap\.mdt' drivers/gpu/drm/msm/adreno/adreno_device.c + accept '[\t]\(complete\|init_completion\|[\t]wait_for_completion\)[(][&]bus->request_firmware_complete[)]' drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c + accept '[\t]ret[ ]=[ ]reject_firmware_nowait[(][^)]*[,][ ]qtnf_firmware_load[)][;][\n][\t]*if[ ][(]ret[ ][<][ ]0[)][\n][\t]*pr_err[(]["]request_firmware_nowait[ ]error' drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c + blobname '\(adsp\|slpi\)\.mdt' drivers/remoteproc/qcom_adsp_pil.c + blobname 'wcnss\.mdt' drivers/remoteproc/qcom_wcnss.c + + # Backported into 4.13.10. + # New in 4.14-rc6. + defsnc '[\t]static[ ]const[ ]struct[ ]nphy_txiqcal_ladder[ ]ladder_\(lo\|iq\)\[\][ ]=' drivers/net/wireless/brcm80211/brcmsmac/phy/phy_n.c ;; */*freedo*.patch | */*logo*.patch) diff --git a/helpers/DATA/linux-hwe/silent-accept-firmware.patch b/helpers/DATA/linux-hwe/silent-accept-firmware.patch index 20bab538..8bd34921 100644 --- a/helpers/DATA/linux-hwe/silent-accept-firmware.patch +++ b/helpers/DATA/linux-hwe/silent-accept-firmware.patch @@ -1,28 +1,18 @@ -diff -ru source/drivers/base/firmware_class.c source/drivers/base/firmware_class.c ---- source/drivers/base/firmware_class.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/base/firmware_class.c 2017-08-21 10:54:54.485544208 -0400 -@@ -99,7 +99,7 @@ - FW_STATUS_ABORTED, - }; - --static int loading_timeout = 60; /* In seconds */ -+static int loading_timeout = 5; /* In seconds */ - - static inline long firmware_loading_timeout(void) - { -@@ -419,14 +419,14 @@ +diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c +index bfbe1e1..5a2ee57 100644 +--- a/drivers/base/firmware_class.c ++++ b/drivers/base/firmware_class.c +@@ -447,14 +447,14 @@ fw_get_filesystem_firmware(struct device *device, struct firmware_buf *buf) id); if (rc) { if (rc == -ENOENT) - dev_dbg(device, "loading %s failed with error %d\n", -- path, rc); + dev_dbg(device, "loading failed with error %d\n", -+ rc); + path, rc); else - dev_warn(device, "loading %s failed with error %d\n", -- path, rc); -+ dev_warn(device, "loading failed with error %d\n", -+ rc); ++ dev_warn(device, "loadingfailed with error %d\n", + path, rc); continue; } - dev_dbg(device, "direct-loading %s\n", buf->fw_id); @@ -30,7 +20,7 @@ diff -ru source/drivers/base/firmware_class.c source/drivers/base/firmware_class buf->size = size; fw_state_done(&buf->fw_st); break; -@@ -1010,7 +1010,7 @@ +@@ -1072,7 +1072,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, if (opt_flags & FW_OPT_UEVENT) { buf->need_uevent = true; dev_set_uevent_suppress(f_dev, false); @@ -39,60 +29,52 @@ diff -ru source/drivers/base/firmware_class.c source/drivers/base/firmware_class kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD); } else { timeout = MAX_JIFFY_OFFSET; -@@ -1099,7 +1099,7 @@ - } - - if (fw_get_builtin_firmware(firmware, name, dbuf, size)) { -- dev_dbg(device, "using built-in %s\n", name); -+ dev_dbg(device, "using built-in\n", name); - return 0; /* assigned */ - } - -@@ -1186,11 +1186,11 @@ - goto out; - - ret = 0; -- timeout = firmware_loading_timeout(); -+ timeout = is_nonfree_firmware(name) ? 1 : firmware_loading_timeout(); +@@ -1108,14 +1108,14 @@ static int fw_load_from_user_helper(struct firmware *firmware, if (opt_flags & FW_OPT_NOWAIT) { timeout = usermodehelper_read_lock_wait(timeout); if (!timeout) { - dev_dbg(device, "firmware: %s loading timed out\n", + dev_dbg(device, "firmware: loading timed out\n", name); - ret = -EBUSY; - goto out; -@@ -1198,7 +1198,7 @@ + return -EBUSY; + } } else { ret = usermodehelper_read_trylock(); if (WARN_ON(ret)) { - dev_err(device, "firmware: %s will not be loaded\n", + dev_err(device, "firmware: will not be loaded\n", name); - goto out; + return ret; } -@@ -1208,12 +1208,13 @@ +@@ -1171,7 +1171,7 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name, + } + + if (fw_get_builtin_firmware(firmware, name, dbuf, size)) { +- dev_dbg(device, "using built-in %s\n", name); ++ dev_dbg(device, "using built-in\n", name); + return 0; /* assigned */ + } + +@@ -1249,12 +1249,13 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret) { if (!(opt_flags & FW_OPT_NO_WARN)) dev_warn(device, - "Direct firmware load for %s failed with error %d\n", -- name, ret); + "Direct firmware load failed with error %d\n", -+ ret); + name, ret); if (opt_flags & FW_OPT_USERHELPER) { dev_warn(device, "Falling back to user helper\n"); ret = fw_load_from_user_helper(fw, name, device, - opt_flags, timeout); -+ dev_warn(device, "Please read https://www.gnu.org/distros/free-system-distribution-guidelines.html#nonfree-firmware\n"); + opt_flags); ++ dev_warn(device, "Please read https://www.gnu.org/distros/free-system-distribution-guidelines.html#nonfree-firmware\n"); } - } - -Only in source/drivers/base: firmware_class.c.orig -Only in source/drivers/base: firmware_class.c.rej -diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c source/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c ---- source/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 2017-08-21 10:54:54.485544208 -0400 -@@ -856,7 +856,7 @@ + } else + ret = assign_firmware_buf(fw, device, opt_flags); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +index c0a8062..65c1170 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +@@ -734,7 +734,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, err = amdgpu_ucode_validate(adev->pm.fw); if (err) { @@ -101,11 +83,11 @@ diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c source/drivers/gpu/drm/a release_firmware(adev->pm.fw); adev->pm.fw = NULL; return err; -Only in source/drivers/gpu/drm/amd/amdgpu: amdgpu_cgs.c.rej -diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c source/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c ---- source/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2017-08-21 10:54:54.485544208 -0400 -@@ -160,7 +160,7 @@ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +index 2ca09f1..15caf24 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +@@ -173,7 +173,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); if (r) { @@ -114,11 +96,11 @@ diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c source/drivers/gpu/drm/a fw_name); return r; } -Only in source/drivers/gpu/drm/amd/amdgpu: amdgpu_uvd.c.orig -diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c source/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c ---- source/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 2017-08-21 10:54:54.485544208 -0400 -@@ -133,7 +133,7 @@ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +index b692ad4..1ca7f9f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +@@ -140,7 +140,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) r = request_firmware(&adev->vce.fw, fw_name, adev->dev); if (r) { @@ -127,48 +109,50 @@ diff -ru source/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c source/drivers/gpu/drm/a fw_name); return r; } -Only in source/drivers/gpu/drm/amd/amdgpu: amdgpu_vce.c.orig -diff -ru source/drivers/gpu/drm/amd/amdgpu/ci_dpm.c source/drivers/gpu/drm/amd/amdgpu/ci_dpm.c ---- source/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/ci_dpm.c 2017-08-21 10:54:54.485544208 -0400 -@@ -5806,7 +5806,7 @@ +diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +index cb508a2..ca4453b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c ++++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +@@ -5848,7 +5848,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) + out: if (err) { - printk(KERN_ERR -- "cik_smc: Failed to load firmware \"%s\"\n", -+ "cik_smc: Failed to load firmware\n", - fw_name); +- pr_err("cik_smc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_smc: Failed to load firmware\n", fw_name); release_firmware(adev->pm.fw); adev->pm.fw = NULL; -Only in source/drivers/gpu/drm/amd/amdgpu: ci_dpm.c.orig -diff -ru source/drivers/gpu/drm/amd/amdgpu/cik_sdma.c source/drivers/gpu/drm/amd/amdgpu/cik_sdma.c ---- source/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 2017-08-21 10:54:54.485544208 -0400 -@@ -143,7 +143,7 @@ + } +diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +index c216e16..4a399cc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c ++++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +@@ -142,7 +142,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) + } out: if (err) { - printk(KERN_ERR -- "cik_sdma: Failed to load firmware \"%s\"\n", -+ "cik_sdma: Failed to load firmware\n", - fw_name); +- pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_sdma: Failed to load firmware\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); -diff -ru source/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c source/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ---- source/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2017-08-21 10:54:54.489544208 -0400 -@@ -973,7 +973,7 @@ + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 37b45e4..b7fa33c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -973,7 +973,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) + out: if (err) { - printk(KERN_ERR -- "gfx7: Failed to load firmware \"%s\"\n", -+ "gfx7: Failed to load firmware\n", - fw_name); +- pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("gfx7: Failed to load firmware\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; -diff -ru source/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c source/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ---- source/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2017-08-21 10:54:54.489544208 -0400 -@@ -1097,7 +1097,7 @@ + release_firmware(adev->gfx.me_fw); +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index aa5a50f..c7acffb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -1106,7 +1106,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) out: if (err) { dev_err(adev->dev, @@ -177,61 +161,63 @@ diff -ru source/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c source/drivers/gpu/drm/amd fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; -Only in source/drivers/gpu/drm/amd/amdgpu: gfx_v8_0.c.orig -diff -ru source/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c source/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c ---- source/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2017-08-21 10:54:54.489544208 -0400 -@@ -162,7 +162,7 @@ +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +index 7e9ea53..6815fb5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +@@ -163,7 +163,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) + out: if (err) { - printk(KERN_ERR -- "cik_mc: Failed to load firmware \"%s\"\n", -+ "cik_mc: Failed to load firmware\n", - fw_name); +- pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_mc: Failed to load firmware\n", fw_name); release_firmware(adev->mc.fw); adev->mc.fw = NULL; -diff -ru source/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c source/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c ---- source/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2017-08-21 10:54:54.489544208 -0400 -@@ -246,7 +246,7 @@ + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +index cc9f880..67359b3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +@@ -247,7 +247,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) + out: if (err) { - printk(KERN_ERR -- "mc: Failed to load firmware \"%s\"\n", -+ "mc: Failed to load firmware\n", - fw_name); +- pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("mc: Failed to load firmware\n", fw_name); release_firmware(adev->mc.fw); adev->mc.fw = NULL; -Only in source/drivers/gpu/drm/amd/amdgpu: gmc_v8_0.c.orig -diff -ru source/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c source/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ---- source/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 2017-08-21 10:54:54.489544208 -0400 -@@ -171,7 +171,7 @@ + } +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +index f2d0710..bf0edd5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +@@ -170,7 +170,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) + out: if (err) { - printk(KERN_ERR -- "sdma_v2_4: Failed to load firmware \"%s\"\n", -+ "sdma_v2_4: Failed to load firmware\n", - fw_name); +- pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("sdma_v2_4: Failed to load firmware\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); -diff -ru source/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c source/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ---- source/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 2017-08-21 10:54:54.489544208 -0400 -@@ -322,7 +322,7 @@ + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +index 1d766ae..1103992 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +@@ -321,7 +321,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) + } out: if (err) { - printk(KERN_ERR -- "sdma_v3_0: Failed to load firmware \"%s\"\n", -+ "sdma_v3_0: Failed to load firmware\n", - fw_name); +- pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("sdma_v3_0: Failed to load firmware\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); -Only in source/drivers/gpu/drm/amd/amdgpu: sdma_v3_0.c.orig -diff -ru source/drivers/gpu/drm/drm_edid_load.c source/drivers/gpu/drm/drm_edid_load.c ---- source/drivers/gpu/drm/drm_edid_load.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/drm_edid_load.c 2017-08-21 10:54:54.489544208 -0400 -@@ -188,8 +188,8 @@ + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c +index 1c0495a..0085e8e 100644 +--- a/drivers/gpu/drm/drm_edid_load.c ++++ b/drivers/gpu/drm/drm_edid_load.c +@@ -188,8 +188,8 @@ static void *edid_load(struct drm_connector *connector, const char *name, err = request_firmware(&fw, name, &pdev->dev); platform_device_unregister(pdev); if (err) { @@ -242,10 +228,11 @@ diff -ru source/drivers/gpu/drm/drm_edid_load.c source/drivers/gpu/drm/drm_edid_ return ERR_PTR(err); } -diff -ru source/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c source/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c ---- source/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c 2017-08-21 10:54:54.489544208 -0400 -@@ -109,7 +109,7 @@ +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +index 06bdb67..7eca739 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +@@ -109,7 +109,7 @@ nvkm_xtensa_init(struct nvkm_engine *engine) ret = request_firmware(&fw, name, device->dev); if (ret) { @@ -254,333 +241,324 @@ diff -ru source/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c source/drivers/gpu/ return ret; } -diff -ru source/drivers/gpu/drm/r128/r128_cce.c source/drivers/gpu/drm/r128/r128_cce.c ---- source/drivers/gpu/drm/r128/r128_cce.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/r128/r128_cce.c 2017-08-21 10:54:54.489544208 -0400 -@@ -155,14 +155,14 @@ +diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c +index c9890af..cfe11ce 100644 +--- a/drivers/gpu/drm/r128/r128_cce.c ++++ b/drivers/gpu/drm/r128/r128_cce.c +@@ -155,13 +155,13 @@ static int r128_cce_load_microcode(drm_r128_private_t *dev_priv) rc = request_firmware(&fw, FIRMWARE_NAME, &pdev->dev); platform_device_unregister(pdev); if (rc) { -- printk(KERN_ERR "r128_cce: Failed to load firmware \"%s\"\n", -+ printk(KERN_ERR "r128_cce: Failed to load firmware\n", +- pr_err("r128_cce: Failed to load firmware \"%s\"\n", ++ pr_err("r128_cce: Failed to load firmware\n", FIRMWARE_NAME); return rc; } if (fw->size != 256 * 8) { - printk(KERN_ERR -- "r128_cce: Bogus length %zu in firmware \"%s\"\n", -+ "r128_cce: Bogus length %zu in firmware\n", +- pr_err("r128_cce: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r128_cce: Bogus length %zu in firmware\n", fw->size, FIRMWARE_NAME); rc = -EINVAL; goto out_release; -diff -ru source/drivers/gpu/drm/radeon/cik.c source/drivers/gpu/drm/radeon/cik.c ---- source/drivers/gpu/drm/radeon/cik.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/cik.c 2017-08-21 10:54:54.489544208 -0400 -@@ -2070,7 +2070,7 @@ +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 3cb6c55..5bf3ff6 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -2072,7 +2072,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->pfp_fw->size != pfp_req_size) { - printk(KERN_ERR -- "cik_cp: Bogus length %zu in firmware \"%s\"\n", -+ "cik_cp: Bogus length %zu in firmware\n", +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", rdev->pfp_fw->size, fw_name); err = -EINVAL; goto out; -@@ -2079,7 +2079,7 @@ +@@ -2080,7 +2080,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } else { err = radeon_ucode_validate(rdev->pfp_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2096,7 +2096,7 @@ +@@ -2096,14 +2096,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->me_fw->size != me_req_size) { - printk(KERN_ERR -- "cik_cp: Bogus length %zu in firmware \"%s\"\n", -+ "cik_cp: Bogus length %zu in firmware\n", +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", rdev->me_fw->size, fw_name); err = -EINVAL; } -@@ -2104,7 +2104,7 @@ + } else { err = radeon_ucode_validate(rdev->me_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2121,7 +2121,7 @@ +@@ -2119,14 +2119,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->ce_fw->size != ce_req_size) { - printk(KERN_ERR -- "cik_cp: Bogus length %zu in firmware \"%s\"\n", -+ "cik_cp: Bogus length %zu in firmware\n", +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", rdev->ce_fw->size, fw_name); err = -EINVAL; } -@@ -2129,7 +2129,7 @@ + } else { err = radeon_ucode_validate(rdev->ce_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2146,7 +2146,7 @@ +@@ -2142,14 +2142,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->mec_fw->size != mec_req_size) { - printk(KERN_ERR -- "cik_cp: Bogus length %zu in firmware \"%s\"\n", -+ "cik_cp: Bogus length %zu in firmware\n", +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", rdev->mec_fw->size, fw_name); err = -EINVAL; } -@@ -2154,7 +2154,7 @@ + } else { err = radeon_ucode_validate(rdev->mec_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2186,7 +2186,7 @@ +@@ -2180,14 +2180,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->rlc_fw->size != rlc_req_size) { - printk(KERN_ERR -- "cik_rlc: Bogus length %zu in firmware \"%s\"\n", -+ "cik_rlc: Bogus length %zu in firmware\n", +- pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_rlc: Bogus length %zu in firmware\n", rdev->rlc_fw->size, fw_name); err = -EINVAL; } -@@ -2194,7 +2194,7 @@ + } else { err = radeon_ucode_validate(rdev->rlc_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2211,7 +2211,7 @@ +@@ -2203,14 +2203,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->sdma_fw->size != sdma_req_size) { - printk(KERN_ERR -- "cik_sdma: Bogus length %zu in firmware \"%s\"\n", -+ "cik_sdma: Bogus length %zu in firmware\n", +- pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_sdma: Bogus length %zu in firmware\n", rdev->sdma_fw->size, fw_name); err = -EINVAL; } -@@ -2219,7 +2219,7 @@ + } else { err = radeon_ucode_validate(rdev->sdma_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2243,7 +2243,7 @@ +@@ -2233,7 +2233,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } if ((rdev->mc_fw->size != mc_req_size) && (rdev->mc_fw->size != mc2_req_size)){ - printk(KERN_ERR -- "cik_mc: Bogus length %zu in firmware \"%s\"\n", -+ "cik_mc: Bogus length %zu in firmware\n", +- pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_mc: Bogus length %zu in firmware\n", rdev->mc_fw->size, fw_name); err = -EINVAL; } -@@ -2252,7 +2252,7 @@ +@@ -2241,7 +2241,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } else { err = radeon_ucode_validate(rdev->mc_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2270,14 +2270,14 @@ +@@ -2258,20 +2258,20 @@ static int cik_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); if (err) { - printk(KERN_ERR -- "smc: error loading firmware \"%s\"\n", -+ "smc: error loading firmware\n", +- pr_err("smc: error loading firmware \"%s\"\n", ++ pr_err("smc: error loading firmware\n", fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; err = 0; } else if (rdev->smc_fw->size != smc_req_size) { - printk(KERN_ERR -- "cik_smc: Bogus length %zu in firmware \"%s\"\n", -+ "cik_smc: Bogus length %zu in firmware\n", +- pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_smc: Bogus length %zu in firmware\n", rdev->smc_fw->size, fw_name); err = -EINVAL; } -@@ -2285,7 +2285,7 @@ + } else { err = radeon_ucode_validate(rdev->smc_fw); if (err) { - printk(KERN_ERR -- "cik_fw: validation failed for firmware \"%s\"\n", -+ "cik_fw: validation failed for firmware\n", +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", fw_name); goto out; } else { -@@ -2307,7 +2307,7 @@ +@@ -2292,7 +2292,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + out: if (err) { if (err != -EINVAL) - printk(KERN_ERR -- "cik_cp: Failed to load firmware \"%s\"\n", -+ "cik_cp: Failed to load firmware\n", +- pr_err("cik_cp: Failed to load firmware \"%s\"\n", ++ pr_err("cik_cp: Failed to load firmware\n", fw_name); release_firmware(rdev->pfp_fw); rdev->pfp_fw = NULL; -Only in source/drivers/gpu/drm/radeon: cik.c.orig -diff -ru source/drivers/gpu/drm/radeon/ni.c source/drivers/gpu/drm/radeon/ni.c ---- source/drivers/gpu/drm/radeon/ni.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/ni.c 2017-08-21 10:54:54.489544208 -0400 -@@ -775,7 +775,7 @@ +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index 9eccd0c..3b9b7d6 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -774,7 +774,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->pfp_fw->size != pfp_req_size) { - printk(KERN_ERR -- "ni_cp: Bogus length %zu in firmware \"%s\"\n", -+ "ni_cp: Bogus length %zu in firmware\n", +- pr_err("ni_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_cp: Bogus length %zu in firmware\n", rdev->pfp_fw->size, fw_name); err = -EINVAL; goto out; -@@ -787,7 +787,7 @@ +@@ -785,7 +785,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->me_fw->size != me_req_size) { - printk(KERN_ERR -- "ni_cp: Bogus length %zu in firmware \"%s\"\n", -+ "ni_cp: Bogus length %zu in firmware\n", +- pr_err("ni_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_cp: Bogus length %zu in firmware\n", rdev->me_fw->size, fw_name); err = -EINVAL; } -@@ -798,7 +798,7 @@ +@@ -795,7 +795,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->rlc_fw->size != rlc_req_size) { - printk(KERN_ERR -- "ni_rlc: Bogus length %zu in firmware \"%s\"\n", -+ "ni_rlc: Bogus length %zu in firmware\n", +- pr_err("ni_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_rlc: Bogus length %zu in firmware\n", rdev->rlc_fw->size, fw_name); err = -EINVAL; } -@@ -811,7 +811,7 @@ +@@ -807,7 +807,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->mc_fw->size != mc_req_size) { - printk(KERN_ERR -- "ni_mc: Bogus length %zu in firmware \"%s\"\n", -+ "ni_mc: Bogus length %zu in firmware\n", +- pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_mc: Bogus length %zu in firmware\n", rdev->mc_fw->size, fw_name); err = -EINVAL; } -@@ -822,14 +822,14 @@ +@@ -817,12 +817,12 @@ int ni_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); if (err) { - printk(KERN_ERR -- "smc: error loading firmware \"%s\"\n", -+ "smc: error loading firmware\n", - fw_name); +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; err = 0; } else if (rdev->smc_fw->size != smc_req_size) { - printk(KERN_ERR -- "ni_mc: Bogus length %zu in firmware \"%s\"\n", -+ "ni_mc: Bogus length %zu in firmware\n", +- pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_mc: Bogus length %zu in firmware\n", rdev->mc_fw->size, fw_name); err = -EINVAL; } -@@ -839,7 +839,7 @@ +@@ -831,7 +831,7 @@ int ni_init_microcode(struct radeon_device *rdev) + out: if (err) { if (err != -EINVAL) - printk(KERN_ERR -- "ni_cp: Failed to load firmware \"%s\"\n", -+ "ni_cp: Failed to load firmware\n", +- pr_err("ni_cp: Failed to load firmware \"%s\"\n", ++ pr_err("ni_cp: Failed to load firmware\n", fw_name); release_firmware(rdev->pfp_fw); rdev->pfp_fw = NULL; -diff -ru source/drivers/gpu/drm/radeon/r100.c source/drivers/gpu/drm/radeon/r100.c ---- source/drivers/gpu/drm/radeon/r100.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/r100.c 2017-08-21 10:54:54.493544208 -0400 -@@ -1042,11 +1042,11 @@ +diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c +index c31e660..658eaf8 100644 +--- a/drivers/gpu/drm/radeon/r100.c ++++ b/drivers/gpu/drm/radeon/r100.c +@@ -1042,9 +1042,9 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); if (err) { -- printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", -+ printk(KERN_ERR "radeon_cp: Failed to load firmware\n", - fw_name); +- pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("radeon_cp: Failed to load firmware\n", fw_name); } else if (rdev->me_fw->size % 8) { - printk(KERN_ERR -- "radeon_cp: Bogus length %zu in firmware \"%s\"\n", -+ "radeon_cp: Bogus length %zu in firmware\n", +- pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("radeon_cp: Bogus length %zu in firmware\n", rdev->me_fw->size, fw_name); err = -EINVAL; release_firmware(rdev->me_fw); -diff -ru source/drivers/gpu/drm/radeon/r600.c source/drivers/gpu/drm/radeon/r600.c ---- source/drivers/gpu/drm/radeon/r600.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/r600.c 2017-08-21 10:54:54.493544208 -0400 -@@ -2551,7 +2551,7 @@ +diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c +index e06e2d8..9004945 100644 +--- a/drivers/gpu/drm/radeon/r600.c ++++ b/drivers/gpu/drm/radeon/r600.c +@@ -2550,7 +2550,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->pfp_fw->size != pfp_req_size) { - printk(KERN_ERR -- "r600_cp: Bogus length %zu in firmware \"%s\"\n", -+ "r600_cp: Bogus length %zu in firmware \n", +- pr_err("r600_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_cp: Bogus length %zu in firmware\n", rdev->pfp_fw->size, fw_name); err = -EINVAL; goto out; -@@ -2563,7 +2563,7 @@ +@@ -2561,7 +2561,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->me_fw->size != me_req_size) { - printk(KERN_ERR -- "r600_cp: Bogus length %zu in firmware \"%s\"\n", -+ "r600_cp: Bogus length %zu in firmware \n", +- pr_err("r600_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_cp: Bogus length %zu in firmware\n", rdev->me_fw->size, fw_name); err = -EINVAL; } -@@ -2574,7 +2574,7 @@ +@@ -2571,7 +2571,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->rlc_fw->size != rlc_req_size) { - printk(KERN_ERR -- "r600_rlc: Bogus length %zu in firmware \"%s\"\n", -+ "r600_rlc: Bogus length %zu in firmware \n", +- pr_err("r600_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_rlc: Bogus length %zu in firmware\n", rdev->rlc_fw->size, fw_name); err = -EINVAL; } -@@ -2584,14 +2584,14 @@ +@@ -2580,12 +2580,12 @@ int r600_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", smc_chip_name); err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); if (err) { - printk(KERN_ERR -- "smc: error loading firmware \"%s\"\n", -+ "smc: error loading firmware \n", - fw_name); +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; err = 0; } else if (rdev->smc_fw->size != smc_req_size) { - printk(KERN_ERR -- "smc: Bogus length %zu in firmware \"%s\"\n", -+ "smc: Bogus length %zu in firmware \n", +- pr_err("smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("smc: Bogus length %zu in firmware\n", rdev->smc_fw->size, fw_name); err = -EINVAL; } -@@ -2601,7 +2601,7 @@ +@@ -2594,7 +2594,7 @@ int r600_init_microcode(struct radeon_device *rdev) + out: if (err) { if (err != -EINVAL) - printk(KERN_ERR -- "r600_cp: Failed to load firmware \"%s\"\n", -+ "r600_cp: Failed to load firmware \n", +- pr_err("r600_cp: Failed to load firmware \"%s\"\n", ++ pr_err("r600_cp: Failed to load firmware\n", fw_name); release_firmware(rdev->pfp_fw); rdev->pfp_fw = NULL; -diff -ru source/drivers/gpu/drm/radeon/radeon_uvd.c source/drivers/gpu/drm/radeon/radeon_uvd.c ---- source/drivers/gpu/drm/radeon/radeon_uvd.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/radeon_uvd.c 2017-08-21 10:54:54.493544208 -0400 -@@ -140,7 +140,7 @@ +diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c +index d34d1cf..10e859c 100644 +--- a/drivers/gpu/drm/radeon/radeon_uvd.c ++++ b/drivers/gpu/drm/radeon/radeon_uvd.c +@@ -140,7 +140,7 @@ int radeon_uvd_init(struct radeon_device *rdev) /* Let's try to load the newer firmware first */ r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); if (r) { @@ -589,7 +567,7 @@ diff -ru source/drivers/gpu/drm/radeon/radeon_uvd.c source/drivers/gpu/drm/radeo fw_name); } else { struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; -@@ -175,7 +175,7 @@ +@@ -175,7 +175,7 @@ int radeon_uvd_init(struct radeon_device *rdev) if (!fw_name || r) { r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); if (r) { @@ -598,10 +576,11 @@ diff -ru source/drivers/gpu/drm/radeon/radeon_uvd.c source/drivers/gpu/drm/radeo legacy_fw_name); return r; } -diff -ru source/drivers/gpu/drm/radeon/radeon_vce.c source/drivers/gpu/drm/radeon/radeon_vce.c ---- source/drivers/gpu/drm/radeon/radeon_vce.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/radeon_vce.c 2017-08-21 10:54:54.493544208 -0400 -@@ -87,7 +87,7 @@ +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index c1c619f..744612d 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -87,7 +87,7 @@ int radeon_vce_init(struct radeon_device *rdev) r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); if (r) { @@ -610,139 +589,134 @@ diff -ru source/drivers/gpu/drm/radeon/radeon_vce.c source/drivers/gpu/drm/radeo fw_name); return r; } -diff -ru source/drivers/gpu/drm/radeon/si.c source/drivers/gpu/drm/radeon/si.c ---- source/drivers/gpu/drm/radeon/si.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/gpu/drm/radeon/si.c 2017-08-21 10:54:54.493544208 -0400 -@@ -1763,7 +1763,7 @@ +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 1907c95..7ff65cc 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -1786,7 +1786,7 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->pfp_fw->size != pfp_req_size) { - printk(KERN_ERR -- "si_cp: Bogus length %zu in firmware \"%s\"\n", -+ "si_cp: Bogus length %zu in firmware\n", +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", rdev->pfp_fw->size, fw_name); err = -EINVAL; goto out; -@@ -1772,7 +1772,7 @@ +@@ -1794,7 +1794,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } else { err = radeon_ucode_validate(rdev->pfp_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1789,7 +1789,7 @@ +@@ -1810,14 +1810,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->me_fw->size != me_req_size) { - printk(KERN_ERR -- "si_cp: Bogus length %zu in firmware \"%s\"\n", -+ "si_cp: Bogus length %zu in firmware\n", +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", rdev->me_fw->size, fw_name); err = -EINVAL; } -@@ -1797,7 +1797,7 @@ + } else { err = radeon_ucode_validate(rdev->me_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1814,7 +1814,7 @@ +@@ -1833,14 +1833,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->ce_fw->size != ce_req_size) { - printk(KERN_ERR -- "si_cp: Bogus length %zu in firmware \"%s\"\n", -+ "si_cp: Bogus length %zu in firmware\n", +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", rdev->ce_fw->size, fw_name); err = -EINVAL; } -@@ -1822,7 +1822,7 @@ + } else { err = radeon_ucode_validate(rdev->ce_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1839,7 +1839,7 @@ +@@ -1856,14 +1856,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) goto out; if (rdev->rlc_fw->size != rlc_req_size) { - printk(KERN_ERR -- "si_rlc: Bogus length %zu in firmware \"%s\"\n", -+ "si_rlc: Bogus length %zu in firmware\n", +- pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_rlc: Bogus length %zu in firmware\n", rdev->rlc_fw->size, fw_name); err = -EINVAL; } -@@ -1847,7 +1847,7 @@ + } else { err = radeon_ucode_validate(rdev->rlc_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1872,7 +1872,7 @@ +@@ -1887,7 +1887,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } if ((rdev->mc_fw->size != mc_req_size) && (rdev->mc_fw->size != mc2_req_size)) { - printk(KERN_ERR -- "si_mc: Bogus length %zu in firmware \"%s\"\n", -+ "si_mc: Bogus length %zu in firmware\n", +- pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_mc: Bogus length %zu in firmware\n", rdev->mc_fw->size, fw_name); err = -EINVAL; } -@@ -1881,7 +1881,7 @@ +@@ -1895,7 +1895,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } else { err = radeon_ucode_validate(rdev->mc_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1901,14 +1901,14 @@ +@@ -1914,19 +1914,19 @@ static int si_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); if (err) { - printk(KERN_ERR -- "smc: error loading firmware \"%s\"\n", -+ "smc: error loading firmware\n", - fw_name); +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; err = 0; } else if (rdev->smc_fw->size != smc_req_size) { - printk(KERN_ERR -- "si_smc: Bogus length %zu in firmware \"%s\"\n", -+ "si_smc: Bogus length %zu in firmware\n", +- pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_smc: Bogus length %zu in firmware\n", rdev->smc_fw->size, fw_name); err = -EINVAL; } -@@ -1916,7 +1916,7 @@ + } else { err = radeon_ucode_validate(rdev->smc_fw); if (err) { - printk(KERN_ERR -- "si_cp: validation failed for firmware \"%s\"\n", -+ "si_cp: validation failed for firmware\n", +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", fw_name); goto out; } else { -@@ -1936,7 +1936,7 @@ +@@ -1945,7 +1945,7 @@ static int si_init_microcode(struct radeon_device *rdev) + out: if (err) { if (err != -EINVAL) - printk(KERN_ERR -- "si_cp: Failed to load firmware \"%s\"\n", -+ "si_cp: Failed to load firmware\n", +- pr_err("si_cp: Failed to load firmware \"%s\"\n", ++ pr_err("si_cp: Failed to load firmware\n", fw_name); release_firmware(rdev->pfp_fw); rdev->pfp_fw = NULL; -Only in source/drivers/gpu/drm/radeon: si.c.orig -diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/wireless/intel/ipw2x00/ipw2200.c ---- source/drivers/net/wireless/intel/ipw2x00/ipw2200.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/intel/ipw2x00/ipw2200.c 2017-08-21 10:54:54.493544208 -0400 -@@ -3419,12 +3419,12 @@ +diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c +index 9368abd..fa4de6a 100644 +--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c ++++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c +@@ -3417,12 +3417,12 @@ static int ipw_get_fw(struct ipw_priv *priv, /* ask firmware_class module to get the boot firmware off disk */ rc = request_firmware(raw, name, &priv->pci_dev->dev); if (rc < 0) { @@ -757,7 +731,7 @@ diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/ return -EINVAL; } -@@ -3432,13 +3432,12 @@ +@@ -3430,13 +3430,12 @@ static int ipw_get_fw(struct ipw_priv *priv, if ((*raw)->size < sizeof(*fw) + le32_to_cpu(fw->boot_size) + le32_to_cpu(fw->ucode_size) + le32_to_cpu(fw->fw_size)) { @@ -774,7 +748,7 @@ diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/ le32_to_cpu(fw->ver) >> 16, le32_to_cpu(fw->ver) & 0xff, (*raw)->size - sizeof(*fw)); -@@ -3574,7 +3573,7 @@ +@@ -3569,7 +3568,7 @@ static int ipw_load(struct ipw_priv *priv) /* DMA the initial boot firmware into the device */ rc = ipw_load_firmware(priv, boot_img, le32_to_cpu(fw->boot_size)); if (rc < 0) { @@ -783,7 +757,7 @@ diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/ goto error; } -@@ -3606,7 +3605,7 @@ +@@ -3601,7 +3600,7 @@ static int ipw_load(struct ipw_priv *priv) /* DMA bss firmware into the device */ rc = ipw_load_firmware(priv, fw_img, le32_to_cpu(fw->fw_size)); if (rc < 0) { @@ -792,7 +766,7 @@ diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/ goto error; } #ifdef CONFIG_PM -@@ -11217,7 +11216,7 @@ +@@ -11213,7 +11212,7 @@ static int ipw_up(struct ipw_priv *priv) * Also start the clocks. */ rc = ipw_load(priv); if (rc) { @@ -801,10 +775,11 @@ diff -ru source/drivers/net/wireless/intel/ipw2x00/ipw2200.c source/drivers/net/ return rc; } -diff -ru source/drivers/net/wireless/intel/iwlegacy/3945-mac.c source/drivers/net/wireless/intel/iwlegacy/3945-mac.c ---- source/drivers/net/wireless/intel/iwlegacy/3945-mac.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/intel/iwlegacy/3945-mac.c 2017-08-21 10:54:54.493544208 -0400 -@@ -1861,7 +1861,7 @@ +diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c +index 38bf403..a7934a6 100644 +--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c ++++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c +@@ -1861,7 +1861,7 @@ il3945_read_ucode(struct il_priv *il) sprintf(buf, "%s%u%s", name_pre, idx, ".ucode"); ret = request_firmware(&ucode_raw, buf, &il->pci_dev->dev); if (ret < 0) { @@ -813,7 +788,7 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/3945-mac.c source/drivers/ne if (ret == -ENOENT) continue; else -@@ -1870,7 +1870,7 @@ +@@ -1870,7 +1870,7 @@ il3945_read_ucode(struct il_priv *il) if (idx < api_max) IL_ERR("Loaded firmware %s, " "which is deprecated. " @@ -822,7 +797,7 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/3945-mac.c source/drivers/ne api_max); D_INFO("Got firmware '%s' file " "(%zd bytes) from disk\n", buf, ucode_raw->size); -@@ -1906,16 +1906,14 @@ +@@ -1906,16 +1906,14 @@ il3945_read_ucode(struct il_priv *il) if (api_ver < api_min || api_ver > api_max) { IL_ERR("Driver unable to support your firmware API. " @@ -841,10 +816,11 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/3945-mac.c source/drivers/ne api_ver); IL_INFO("loaded firmware version %u.%u.%u.%u\n", -diff -ru source/drivers/net/wireless/intel/iwlegacy/4965-mac.c source/drivers/net/wireless/intel/iwlegacy/4965-mac.c ---- source/drivers/net/wireless/intel/iwlegacy/4965-mac.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/intel/iwlegacy/4965-mac.c 2017-08-21 10:54:54.497544208 -0400 -@@ -4706,7 +4706,7 @@ +diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c +index 5b51fba..b16ab69 100644 +--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c ++++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c +@@ -4708,7 +4708,7 @@ il4965_request_firmware(struct il_priv *il, bool first) sprintf(il->firmware_name, "%s%s%s", name_pre, tag, ".ucode"); @@ -853,7 +829,7 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/4965-mac.c source/drivers/ne return request_firmware_nowait(THIS_MODULE, 1, il->firmware_name, &il->pci_dev->dev, GFP_KERNEL, il, -@@ -4797,7 +4797,7 @@ +@@ -4799,7 +4799,7 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context) if (!ucode_raw) { if (il->fw_idx <= il->cfg->ucode_api_max) @@ -862,7 +838,7 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/4965-mac.c source/drivers/ne il->firmware_name); goto try_again; } -@@ -4827,16 +4827,13 @@ +@@ -4829,16 +4829,13 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context) * on the API version read from firmware header from here on forward */ if (api_ver < api_min || api_ver > api_max) { @@ -881,22 +857,24 @@ diff -ru source/drivers/net/wireless/intel/iwlegacy/4965-mac.c source/drivers/ne api_ver); IL_INFO("loaded firmware version %u.%u.%u.%u\n", -diff -ru source/drivers/net/wireless/intel/iwlwifi/iwl-drv.c source/drivers/net/wireless/intel/iwlwifi/iwl-drv.c ---- source/drivers/net/wireless/intel/iwlwifi/iwl-drv.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/intel/iwlwifi/iwl-drv.c 2017-08-21 10:54:54.497544208 -0400 -@@ -232,7 +232,7 @@ +diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +index 4e0f86f..05ac835 100644 +--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +@@ -257,7 +257,7 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) snprintf(drv->firmware_name, sizeof(drv->firmware_name), "%s%s.ucode", - name_pre, tag); + fw_pre_name, tag); - IWL_DEBUG_INFO(drv, "attempting to load firmware '%s'\n", + IWL_DEBUG_INFO(drv, "attempting to load firmware\n", drv->firmware_name); return request_firmware_nowait(THIS_MODULE, 1, drv->firmware_name, -diff -ru source/drivers/net/wireless/intel/iwlwifi/mvm/fw.c source/drivers/net/wireless/intel/iwlwifi/mvm/fw.c ---- source/drivers/net/wireless/intel/iwlwifi/mvm/fw.c 2017-08-21 12:06:22.000000000 -0400 -+++ source/drivers/net/wireless/intel/iwlwifi/mvm/fw.c 2017-08-21 10:54:54.497544208 -0400 -@@ -1307,7 +1307,7 @@ +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +index 82863e9..1f2f235 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +@@ -1677,7 +1677,7 @@ int iwl_mvm_load_d3_fw(struct iwl_mvm *mvm) ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_WOWLAN); if (ret) { @@ -905,11 +883,11 @@ diff -ru source/drivers/net/wireless/intel/iwlwifi/mvm/fw.c source/drivers/net/w goto error; } -Only in source/drivers/net/wireless/intel/iwlwifi/mvm: fw.c.orig -diff -ru source/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c source/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c ---- source/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c 2017-08-21 10:54:54.497544208 -0400 -@@ -414,7 +414,7 @@ +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +index dac7e54..d2aefe6 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +@@ -410,7 +410,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) ret = request_firmware(&fw_entry, mvm->nvm_file_name, mvm->trans->dev); if (ret) { @@ -918,11 +896,11 @@ diff -ru source/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c source/drivers/net/ mvm->nvm_file_name, ret); return ret; } -Only in source/drivers/net/wireless/intel/iwlwifi/mvm: nvm.c.orig -diff -ru source/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c source/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c ---- source/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c 2017-08-21 10:54:54.497544208 -0400 -@@ -2074,9 +2074,9 @@ +diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +index 21e5ef0..520e63b 100644 +--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c ++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +@@ -2074,9 +2074,9 @@ int rtl8xxxu_load_firmware(struct rtl8xxxu_priv *priv, char *fw_name) int ret = 0; u16 signature; @@ -934,10 +912,11 @@ diff -ru source/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c source/dri ret = -EAGAIN; goto exit; } -diff -ru source/drivers/net/wireless/realtek/rtlwifi/core.c source/drivers/net/wireless/realtek/rtlwifi/core.c ---- source/drivers/net/wireless/realtek/rtlwifi/core.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/core.c 2017-08-21 11:09:25.201544104 -0400 -@@ -106,7 +106,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c +index b0ad061..dab2248 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/core.c ++++ b/drivers/net/wireless/realtek/rtlwifi/core.c +@@ -106,7 +106,7 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, err = request_firmware(&firmware, rtlpriv->cfg->alt_fw_name, rtlpriv->io.dev); @@ -946,10 +925,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/core.c source/drivers/net/w rtlpriv->cfg->alt_fw_name); if (!err) goto found_alt; -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c 2017-08-21 11:07:03.713544121 -0400 -@@ -172,7 +172,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +index 774e720..a364c1b 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +@@ -169,7 +169,7 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) fw_name = "rtlwifi/rtl8188efw.bin"; rtlpriv->max_fw_size = 0x8000; @@ -958,10 +938,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c 2017-08-21 11:07:03.737544121 -0400 -@@ -173,7 +173,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +index bcbb0c6..a1a9734 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +@@ -170,7 +170,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) fw_name = "rtlwifi/rtl8192cfw.bin"; rtlpriv->max_fw_size = 0x4000; @@ -970,10 +951,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c 2017-08-21 11:07:03.737544121 -0400 -@@ -82,7 +82,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +index 96c923b..26eece2 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +@@ -80,7 +80,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) } /* provide name of alternative file */ rtlpriv->cfg->alt_fw_name = "rtlwifi/rtl8192cufw.bin"; @@ -982,10 +964,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c source/drive rtlpriv->max_fw_size = 0x4000; err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c 2017-08-21 11:09:58.965544100 -0400 -@@ -178,7 +178,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +index 16132c6..dc9db6d 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +@@ -175,7 +175,7 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = 0x8000; pr_info("Driver for Realtek RTL8192DE WLAN interface\n"); @@ -994,10 +977,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c source/drive /* request fw */ err = request_firmware_nowait(THIS_MODULE, 1, fw_name, -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c 2017-08-21 11:07:03.741544121 -0400 -@@ -174,7 +174,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +index eaa503b..343f691 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +@@ -171,7 +171,7 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) fw_name = "rtlwifi/rtl8192eefw.bin"; rtlpriv->max_fw_size = 0x8000; @@ -1006,10 +990,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c 2017-08-21 11:08:21.829544111 -0400 -@@ -91,7 +91,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +index 2006b09..475940e 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +@@ -91,7 +91,7 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) "Firmware callback routine entered!\n"); complete(&rtlpriv->firmware_loading_complete); if (!firmware) { @@ -1018,7 +1003,7 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c source/drive rtlpriv->max_fw_size = 0; return; } -@@ -212,7 +212,7 @@ +@@ -209,7 +209,7 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 + sizeof(struct fw_hdr); pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n" @@ -1027,10 +1012,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c source/drive /* request fw */ err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c 2017-08-21 11:07:03.741544121 -0400 -@@ -181,7 +181,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +index 7bf9f25..a2acfac 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +@@ -178,7 +178,7 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) fw_name = "rtlwifi/rtl8723fw_B.bin"; rtlpriv->max_fw_size = 0x6000; @@ -1039,10 +1025,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c 2017-08-21 11:07:03.741544121 -0400 -@@ -185,7 +185,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +index f9d10f1..9048c51 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +@@ -182,7 +182,7 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) } rtlpriv->max_fw_size = 0x8000; @@ -1051,10 +1038,11 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c source/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c ---- source/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c 2017-02-19 17:34:00.000000000 -0500 -+++ source/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c 2017-08-21 11:07:03.741544121 -0400 -@@ -213,7 +213,7 @@ +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +index d71d277..f2c70e1 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +@@ -209,7 +209,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = 0x8000; /*load normal firmware*/ @@ -1063,8 +1051,8 @@ diff -ru source/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c source/drive err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); -@@ -223,7 +223,7 @@ - return 1; +@@ -226,7 +226,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) + } } /*load wowlan firmware*/ - pr_info("Using firmware %s\n", wowlan_fw_name); diff --git a/helpers/DATA/linux-hwe/silent-accept-firmware.patch.1 b/helpers/DATA/linux-hwe/silent-accept-firmware.patch.1 new file mode 100644 index 00000000..c1369696 --- /dev/null +++ b/helpers/DATA/linux-hwe/silent-accept-firmware.patch.1 @@ -0,0 +1,990 @@ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +index c0a8062..65c1170 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +@@ -734,7 +734,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, + + err = amdgpu_ucode_validate(adev->pm.fw); + if (err) { +- DRM_ERROR("Failed to load firmware \"%s\"", fw_name); ++ DRM_ERROR("Failed to load firmware", fw_name); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return err; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +index 2ca09f1..15caf24 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +@@ -173,7 +173,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + + r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); + if (r) { +- dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n", ++ dev_err(adev->dev, "amdgpu_uvd: Can't load firmware\n", + fw_name); + return r; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +index b692ad4..1ca7f9f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +@@ -140,7 +140,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) + + r = request_firmware(&adev->vce.fw, fw_name, adev->dev); + if (r) { +- dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n", ++ dev_err(adev->dev, "amdgpu_vce: Can't load firmware\n", + fw_name); + return r; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +index cb508a2..ca4453b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c ++++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +@@ -5848,7 +5848,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) + + out: + if (err) { +- pr_err("cik_smc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_smc: Failed to load firmware\n", fw_name); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +index c216e16..4a399cc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c ++++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +@@ -142,7 +142,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) + } + out: + if (err) { +- pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_sdma: Failed to load firmware\n", fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 37b45e4..b7fa33c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -973,7 +973,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) + + out: + if (err) { +- pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("gfx7: Failed to load firmware\n", fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index aa5a50f..c7acffb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -1106,7 +1106,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) + out: + if (err) { + dev_err(adev->dev, +- "gfx8: Failed to load firmware \"%s\"\n", ++ "gfx8: Failed to load firmware\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +index 7e9ea53..6815fb5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +@@ -163,7 +163,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) + + out: + if (err) { +- pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("cik_mc: Failed to load firmware\n", fw_name); + release_firmware(adev->mc.fw); + adev->mc.fw = NULL; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +index cc9f880..67359b3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +@@ -247,7 +247,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) + + out: + if (err) { +- pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("mc: Failed to load firmware\n", fw_name); + release_firmware(adev->mc.fw); + adev->mc.fw = NULL; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +index f2d0710..bf0edd5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +@@ -170,7 +170,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) + + out: + if (err) { +- pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("sdma_v2_4: Failed to load firmware\n", fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +index 1d766ae..1103992 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +@@ -321,7 +321,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) + } + out: + if (err) { +- pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("sdma_v3_0: Failed to load firmware\n", fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; +diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c +index 1c0495a..0085e8e 100644 +--- a/drivers/gpu/drm/drm_edid_load.c ++++ b/drivers/gpu/drm/drm_edid_load.c +@@ -188,8 +188,8 @@ static void *edid_load(struct drm_connector *connector, const char *name, + err = request_firmware(&fw, name, &pdev->dev); + platform_device_unregister(pdev); + if (err) { +- DRM_ERROR("Requesting EDID firmware \"%s\" failed (err=%d)\n", +- name, err); ++ DRM_ERROR("Requesting EDID firmware failed (err=%d)\n", ++ err); + return ERR_PTR(err); + } + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +index 06bdb67..7eca739 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +@@ -109,7 +109,7 @@ nvkm_xtensa_init(struct nvkm_engine *engine) + + ret = request_firmware(&fw, name, device->dev); + if (ret) { +- nvkm_warn(subdev, "unable to load firmware %s\n", name); ++ nvkm_warn(subdev, "unable to load firmware\n", name); + return ret; + } + +diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c +index c9890af..cfe11ce 100644 +--- a/drivers/gpu/drm/r128/r128_cce.c ++++ b/drivers/gpu/drm/r128/r128_cce.c +@@ -155,13 +155,13 @@ static int r128_cce_load_microcode(drm_r128_private_t *dev_priv) + rc = request_firmware(&fw, FIRMWARE_NAME, &pdev->dev); + platform_device_unregister(pdev); + if (rc) { +- pr_err("r128_cce: Failed to load firmware \"%s\"\n", ++ pr_err("r128_cce: Failed to load firmware\n", + FIRMWARE_NAME); + return rc; + } + + if (fw->size != 256 * 8) { +- pr_err("r128_cce: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r128_cce: Bogus length %zu in firmware\n", + fw->size, FIRMWARE_NAME); + rc = -EINVAL; + goto out_release; +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 3cb6c55..5bf3ff6 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -2072,7 +2072,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; +@@ -2080,7 +2080,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } else { + err = radeon_ucode_validate(rdev->pfp_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2096,14 +2096,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->me_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2119,14 +2119,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->ce_fw->size != ce_req_size) { +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", + rdev->ce_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->ce_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2142,14 +2142,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->mec_fw->size != mec_req_size) { +- pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_cp: Bogus length %zu in firmware\n", + rdev->mec_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->mec_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2180,14 +2180,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->rlc_fw->size != rlc_req_size) { +- pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_rlc: Bogus length %zu in firmware\n", + rdev->rlc_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->rlc_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2203,14 +2203,14 @@ static int cik_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->sdma_fw->size != sdma_req_size) { +- pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_sdma: Bogus length %zu in firmware\n", + rdev->sdma_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->sdma_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2233,7 +2233,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } + if ((rdev->mc_fw->size != mc_req_size) && + (rdev->mc_fw->size != mc2_req_size)){ +- pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_mc: Bogus length %zu in firmware\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } +@@ -2241,7 +2241,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + } else { + err = radeon_ucode_validate(rdev->mc_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2258,20 +2258,20 @@ static int cik_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { +- pr_err("smc: error loading firmware \"%s\"\n", ++ pr_err("smc: error loading firmware\n", + fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + err = 0; + } else if (rdev->smc_fw->size != smc_req_size) { +- pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("cik_smc: Bogus length %zu in firmware\n", + rdev->smc_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->smc_fw); + if (err) { +- pr_err("cik_fw: validation failed for firmware \"%s\"\n", ++ pr_err("cik_fw: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -2292,7 +2292,7 @@ static int cik_init_microcode(struct radeon_device *rdev) + out: + if (err) { + if (err != -EINVAL) +- pr_err("cik_cp: Failed to load firmware \"%s\"\n", ++ pr_err("cik_cp: Failed to load firmware\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index 9eccd0c..3b9b7d6 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -774,7 +774,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { +- pr_err("ni_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_cp: Bogus length %zu in firmware\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; +@@ -785,7 +785,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { +- pr_err("ni_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_cp: Bogus length %zu in firmware\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } +@@ -795,7 +795,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->rlc_fw->size != rlc_req_size) { +- pr_err("ni_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_rlc: Bogus length %zu in firmware\n", + rdev->rlc_fw->size, fw_name); + err = -EINVAL; + } +@@ -807,7 +807,7 @@ int ni_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->mc_fw->size != mc_req_size) { +- pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_mc: Bogus length %zu in firmware\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } +@@ -817,12 +817,12 @@ int ni_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + err = 0; + } else if (rdev->smc_fw->size != smc_req_size) { +- pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("ni_mc: Bogus length %zu in firmware\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } +@@ -831,7 +831,7 @@ int ni_init_microcode(struct radeon_device *rdev) + out: + if (err) { + if (err != -EINVAL) +- pr_err("ni_cp: Failed to load firmware \"%s\"\n", ++ pr_err("ni_cp: Failed to load firmware\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; +diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c +index c31e660..658eaf8 100644 +--- a/drivers/gpu/drm/radeon/r100.c ++++ b/drivers/gpu/drm/radeon/r100.c +@@ -1042,9 +1042,9 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) + + err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); + if (err) { +- pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name); ++ pr_err("radeon_cp: Failed to load firmware\n", fw_name); + } else if (rdev->me_fw->size % 8) { +- pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("radeon_cp: Bogus length %zu in firmware\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + release_firmware(rdev->me_fw); +diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c +index e06e2d8..9004945 100644 +--- a/drivers/gpu/drm/radeon/r600.c ++++ b/drivers/gpu/drm/radeon/r600.c +@@ -2550,7 +2550,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { +- pr_err("r600_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_cp: Bogus length %zu in firmware\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; +@@ -2561,7 +2561,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { +- pr_err("r600_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_cp: Bogus length %zu in firmware\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } +@@ -2571,7 +2571,7 @@ int r600_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->rlc_fw->size != rlc_req_size) { +- pr_err("r600_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("r600_rlc: Bogus length %zu in firmware\n", + rdev->rlc_fw->size, fw_name); + err = -EINVAL; + } +@@ -2580,12 +2580,12 @@ int r600_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", smc_chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + err = 0; + } else if (rdev->smc_fw->size != smc_req_size) { +- pr_err("smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("smc: Bogus length %zu in firmware\n", + rdev->smc_fw->size, fw_name); + err = -EINVAL; + } +@@ -2594,7 +2594,7 @@ int r600_init_microcode(struct radeon_device *rdev) + out: + if (err) { + if (err != -EINVAL) +- pr_err("r600_cp: Failed to load firmware \"%s\"\n", ++ pr_err("r600_cp: Failed to load firmware\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; +diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c +index d34d1cf..10e859c 100644 +--- a/drivers/gpu/drm/radeon/radeon_uvd.c ++++ b/drivers/gpu/drm/radeon/radeon_uvd.c +@@ -140,7 +140,7 @@ int radeon_uvd_init(struct radeon_device *rdev) + /* Let's try to load the newer firmware first */ + r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); + if (r) { +- dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", ++ dev_err(rdev->dev, "radeon_uvd: Can't load firmware\n", + fw_name); + } else { + struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; +@@ -175,7 +175,7 @@ int radeon_uvd_init(struct radeon_device *rdev) + if (!fw_name || r) { + r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); + if (r) { +- dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", ++ dev_err(rdev->dev, "radeon_uvd: Can't load firmware\n", + legacy_fw_name); + return r; + } +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index c1c619f..744612d 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -87,7 +87,7 @@ int radeon_vce_init(struct radeon_device *rdev) + + r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); + if (r) { +- dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", ++ dev_err(rdev->dev, "radeon_vce: Can't load firmware\n", + fw_name); + return r; + } +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 1907c95..7ff65cc 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -1786,7 +1786,7 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; +@@ -1794,7 +1794,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } else { + err = radeon_ucode_validate(rdev->pfp_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1810,14 +1810,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->me_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1833,14 +1833,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->ce_fw->size != ce_req_size) { +- pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_cp: Bogus length %zu in firmware\n", + rdev->ce_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->ce_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1856,14 +1856,14 @@ static int si_init_microcode(struct radeon_device *rdev) + if (err) + goto out; + if (rdev->rlc_fw->size != rlc_req_size) { +- pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_rlc: Bogus length %zu in firmware\n", + rdev->rlc_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->rlc_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1887,7 +1887,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } + if ((rdev->mc_fw->size != mc_req_size) && + (rdev->mc_fw->size != mc2_req_size)) { +- pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_mc: Bogus length %zu in firmware\n", + rdev->mc_fw->size, fw_name); + err = -EINVAL; + } +@@ -1895,7 +1895,7 @@ static int si_init_microcode(struct radeon_device *rdev) + } else { + err = radeon_ucode_validate(rdev->mc_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1914,19 +1914,19 @@ static int si_init_microcode(struct radeon_device *rdev) + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { +- pr_err("smc: error loading firmware \"%s\"\n", fw_name); ++ pr_err("smc: error loading firmware\n", fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + err = 0; + } else if (rdev->smc_fw->size != smc_req_size) { +- pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n", ++ pr_err("si_smc: Bogus length %zu in firmware\n", + rdev->smc_fw->size, fw_name); + err = -EINVAL; + } + } else { + err = radeon_ucode_validate(rdev->smc_fw); + if (err) { +- pr_err("si_cp: validation failed for firmware \"%s\"\n", ++ pr_err("si_cp: validation failed for firmware\n", + fw_name); + goto out; + } else { +@@ -1945,7 +1945,7 @@ static int si_init_microcode(struct radeon_device *rdev) + out: + if (err) { + if (err != -EINVAL) +- pr_err("si_cp: Failed to load firmware \"%s\"\n", ++ pr_err("si_cp: Failed to load firmware\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; +diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c +index 9368abd..fa4de6a 100644 +--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c ++++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c +@@ -3417,12 +3417,12 @@ static int ipw_get_fw(struct ipw_priv *priv, + /* ask firmware_class module to get the boot firmware off disk */ + rc = request_firmware(raw, name, &priv->pci_dev->dev); + if (rc < 0) { +- IPW_ERROR("%s request_firmware failed: Reason %d\n", name, rc); ++ IPW_ERROR("request_firmware failed: Reason %d\n", rc); + return rc; + } + + if ((*raw)->size < sizeof(*fw)) { +- IPW_ERROR("%s is too small (%zd)\n", name, (*raw)->size); ++ IPW_ERROR("Firmware file is too small (%zd)\n", (*raw)->size); + return -EINVAL; + } + +@@ -3430,13 +3430,12 @@ static int ipw_get_fw(struct ipw_priv *priv, + + if ((*raw)->size < sizeof(*fw) + le32_to_cpu(fw->boot_size) + + le32_to_cpu(fw->ucode_size) + le32_to_cpu(fw->fw_size)) { +- IPW_ERROR("%s is too small or corrupt (%zd)\n", +- name, (*raw)->size); ++ IPW_ERROR("Firmware file is too small or corrupt (%zd)\n", ++ (*raw)->size); + return -EINVAL; + } + +- IPW_DEBUG_INFO("Read firmware '%s' image v%d.%d (%zd bytes)\n", +- name, ++ IPW_DEBUG_INFO("Read firmware image v%d.%d (%zd bytes)\n", + le32_to_cpu(fw->ver) >> 16, + le32_to_cpu(fw->ver) & 0xff, + (*raw)->size - sizeof(*fw)); +@@ -3569,7 +3568,7 @@ static int ipw_load(struct ipw_priv *priv) + /* DMA the initial boot firmware into the device */ + rc = ipw_load_firmware(priv, boot_img, le32_to_cpu(fw->boot_size)); + if (rc < 0) { +- IPW_ERROR("Unable to load boot firmware: %d\n", rc); ++ IPW_ERROR("Unable to load boot firmware\n", rc); + goto error; + } + +@@ -3601,7 +3600,7 @@ static int ipw_load(struct ipw_priv *priv) + /* DMA bss firmware into the device */ + rc = ipw_load_firmware(priv, fw_img, le32_to_cpu(fw->fw_size)); + if (rc < 0) { +- IPW_ERROR("Unable to load firmware: %d\n", rc); ++ IPW_ERROR("Unable to load firmware\n", rc); + goto error; + } + #ifdef CONFIG_PM +@@ -11213,7 +11212,7 @@ static int ipw_up(struct ipw_priv *priv) + * Also start the clocks. */ + rc = ipw_load(priv); + if (rc) { +- IPW_ERROR("Unable to load firmware: %d\n", rc); ++ IPW_ERROR("Unable to load firmware\n", rc); + return rc; + } + +diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c +index 38bf403..a7934a6 100644 +--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c ++++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c +@@ -1861,7 +1861,7 @@ il3945_read_ucode(struct il_priv *il) + sprintf(buf, "%s%u%s", name_pre, idx, ".ucode"); + ret = request_firmware(&ucode_raw, buf, &il->pci_dev->dev); + if (ret < 0) { +- IL_ERR("%s firmware file req failed: %d\n", buf, ret); ++ IL_ERR("Firmware file req failed\n", buf, ret); + if (ret == -ENOENT) + continue; + else +@@ -1870,7 +1870,7 @@ il3945_read_ucode(struct il_priv *il) + if (idx < api_max) + IL_ERR("Loaded firmware %s, " + "which is deprecated. " +- " Please use API v%u instead.\n", buf, ++ "\n", buf, + api_max); + D_INFO("Got firmware '%s' file " + "(%zd bytes) from disk\n", buf, ucode_raw->size); +@@ -1906,16 +1906,14 @@ il3945_read_ucode(struct il_priv *il) + + if (api_ver < api_min || api_ver > api_max) { + IL_ERR("Driver unable to support your firmware API. " +- "Driver supports v%u, firmware is v%u.\n", api_max, ++ "\n", api_max, + api_ver); + il->ucode_ver = 0; + ret = -EINVAL; + goto err_release; + } + if (api_ver != api_max) +- IL_ERR("Firmware has old API version. Expected %u, " +- "got %u. New firmware can be obtained " +- "from http://www.intellinuxwireless.org.\n", api_max, ++ IL_ERR("Firmware has old API version\n", api_max, + api_ver); + + IL_INFO("loaded firmware version %u.%u.%u.%u\n", +diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c +index 5b51fba..b16ab69 100644 +--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c ++++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c +@@ -4708,7 +4708,7 @@ il4965_request_firmware(struct il_priv *il, bool first) + + sprintf(il->firmware_name, "%s%s%s", name_pre, tag, ".ucode"); + +- D_INFO("attempting to load firmware '%s'\n", il->firmware_name); ++ D_INFO("attempting to load firmware\n", il->firmware_name); + + return request_firmware_nowait(THIS_MODULE, 1, il->firmware_name, + &il->pci_dev->dev, GFP_KERNEL, il, +@@ -4799,7 +4799,7 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context) + + if (!ucode_raw) { + if (il->fw_idx <= il->cfg->ucode_api_max) +- IL_ERR("request for firmware file '%s' failed.\n", ++ IL_ERR("request for firmware failed.\n", + il->firmware_name); + goto try_again; + } +@@ -4829,16 +4829,13 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context) + * on the API version read from firmware header from here on forward + */ + if (api_ver < api_min || api_ver > api_max) { +- IL_ERR("Driver unable to support your firmware API. " +- "Driver supports v%u, firmware is v%u.\n", api_max, ++ IL_ERR("Driver unable to support your firmware API.\n", api_max, + api_ver); + goto try_again; + } + + if (api_ver != api_max) +- IL_ERR("Firmware has old API version. Expected v%u, " +- "got v%u. New firmware can be obtained " +- "from http://www.intellinuxwireless.org.\n", api_max, ++ IL_ERR("Firmware has old API version.\n", api_max, + api_ver); + + IL_INFO("loaded firmware version %u.%u.%u.%u\n", +diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +index 4e0f86f..05ac835 100644 +--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +@@ -257,7 +257,7 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) + snprintf(drv->firmware_name, sizeof(drv->firmware_name), "%s%s.ucode", + fw_pre_name, tag); + +- IWL_DEBUG_INFO(drv, "attempting to load firmware '%s'\n", ++ IWL_DEBUG_INFO(drv, "attempting to load firmware\n", + drv->firmware_name); + + return request_firmware_nowait(THIS_MODULE, 1, drv->firmware_name, +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +index 82863e9..1f2f235 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +@@ -1677,7 +1677,7 @@ int iwl_mvm_load_d3_fw(struct iwl_mvm *mvm) + + ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_WOWLAN); + if (ret) { +- IWL_ERR(mvm, "Failed to start WoWLAN firmware: %d\n", ret); ++ IWL_ERR(mvm, "Failed to start WoWLAN firmware\n", ret); + goto error; + } + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +index dac7e54..d2aefe6 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +@@ -410,7 +410,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) + ret = request_firmware(&fw_entry, mvm->nvm_file_name, + mvm->trans->dev); + if (ret) { +- IWL_ERR(mvm, "ERROR: %s isn't available %d\n", ++ IWL_ERR(mvm, "ERROR: firmware isn't available \n", + mvm->nvm_file_name, ret); + return ret; + } +diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +index 21e5ef0..520e63b 100644 +--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c ++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +@@ -2074,9 +2074,9 @@ int rtl8xxxu_load_firmware(struct rtl8xxxu_priv *priv, char *fw_name) + int ret = 0; + u16 signature; + +- dev_info(dev, "%s: Loading firmware %s\n", DRIVER_NAME, fw_name); ++ dev_info(dev, "%s: Loading firmware\n", DRIVER_NAME, fw_name); + if (request_firmware(&fw, fw_name, &priv->udev->dev)) { +- dev_warn(dev, "request_firmware(%s) failed\n", fw_name); ++ dev_warn(dev, "request_firmware failed\n", fw_name); + ret = -EAGAIN; + goto exit; + } +diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c +index b0ad061..dab2248 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/core.c ++++ b/drivers/net/wireless/realtek/rtlwifi/core.c +@@ -106,7 +106,7 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, + err = request_firmware(&firmware, + rtlpriv->cfg->alt_fw_name, + rtlpriv->io.dev); +- pr_info("Loading alternative firmware %s\n", ++ pr_info("Loading alternative firmware \n", + rtlpriv->cfg->alt_fw_name); + if (!err) + goto found_alt; +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +index 774e720..a364c1b 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +@@ -169,7 +169,7 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) + + fw_name = "rtlwifi/rtl8188efw.bin"; + rtlpriv->max_fw_size = 0x8000; +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +index bcbb0c6..a1a9734 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +@@ -170,7 +170,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) + fw_name = "rtlwifi/rtl8192cfw.bin"; + + rtlpriv->max_fw_size = 0x4000; +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +index 96c923b..26eece2 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +@@ -80,7 +80,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) + } + /* provide name of alternative file */ + rtlpriv->cfg->alt_fw_name = "rtlwifi/rtl8192cufw.bin"; +- pr_info("Loading firmware %s\n", fw_name); ++ pr_info("Loading firmware \n", fw_name); + rtlpriv->max_fw_size = 0x4000; + err = request_firmware_nowait(THIS_MODULE, 1, + fw_name, rtlpriv->io.dev, +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +index 16132c6..dc9db6d 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +@@ -175,7 +175,7 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) + + rtlpriv->max_fw_size = 0x8000; + pr_info("Driver for Realtek RTL8192DE WLAN interface\n"); +- pr_info("Loading firmware file %s\n", fw_name); ++ pr_info("Loading firmware file\n", fw_name); + + /* request fw */ + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +index eaa503b..343f691 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +@@ -171,7 +171,7 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) + fw_name = "rtlwifi/rtl8192eefw.bin"; + + rtlpriv->max_fw_size = 0x8000; +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +index 2006b09..475940e 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +@@ -91,7 +91,7 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) + "Firmware callback routine entered!\n"); + complete(&rtlpriv->firmware_loading_complete); + if (!firmware) { +- pr_err("Firmware %s not available\n", fw_name); ++ pr_err("Firmware not available\n", fw_name); + rtlpriv->max_fw_size = 0; + return; + } +@@ -209,7 +209,7 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) + rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 + + sizeof(struct fw_hdr); + pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n" +- "Loading firmware %s\n", fw_name); ++ "Loading firmware \n", fw_name); + /* request fw */ + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +index 7bf9f25..a2acfac 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +@@ -178,7 +178,7 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) + fw_name = "rtlwifi/rtl8723fw_B.bin"; + + rtlpriv->max_fw_size = 0x6000; +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +index f9d10f1..9048c51 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +@@ -182,7 +182,7 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) + } + + rtlpriv->max_fw_size = 0x8000; +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +index d71d277..f2c70e1 100644 +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +@@ -209,7 +209,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) + + rtlpriv->max_fw_size = 0x8000; + /*load normal firmware*/ +- pr_info("Using firmware %s\n", fw_name); ++ pr_info("Using firmware \n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, + rtl_fw_cb); +@@ -226,7 +226,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) + } + } + /*load wowlan firmware*/ +- pr_info("Using firmware %s\n", wowlan_fw_name); ++ pr_info("Using firmware \n", wowlan_fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, + wowlan_fw_name, + rtlpriv->io.dev, GFP_KERNEL, hw, diff --git a/helpers/make-linux-hwe b/helpers/make-linux-hwe index 942480e8..4524a418 100644 --- a/helpers/make-linux-hwe +++ b/helpers/make-linux-hwe @@ -34,10 +34,11 @@ for FILE in $PRESERVE; do cp $FILE /tmp/preserve --parents -a done -sh $DATA/deblob-4.10 --force +sh $DATA/deblob-4.13 --force sed 's/bnx2.*fw/$(DEBLOBBED)/' -i firmware/Makefile -cp /tmp/preserve/* . -a +echo "Reverting deblobbing for files patched by silent-accept-firmware" +cp /tmp/preserve/* . -av # Remove ZFS rm zfs spl debian/scripts/misc/update-zfs.sh -rf -- GitLab