0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch

From ec8981e245dfe24bc6a80207e832ca9be18fd39d Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 17 May 2016 08:28:04 +0200
Subject: [PATCH 4/4] Turn BFQ-v7r11 into BFQ-v8r4 for 4.8.0

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/Kconfig.iosched |    2 +-
 block/bfq-cgroup.c    |  495 ++++----
 block/bfq-iosched.c   | 3230 +++++++++++++++++++++++++++++++------------------
 block/bfq-sched.c     |  480 ++++++--
 block/bfq.h           |  747 ++++++------
 5 files changed, 3073 insertions(+), 1881 deletions(-)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index f78cd1a..6d92579 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -53,7 +53,7 @@ config IOSCHED_BFQ
 
 config BFQ_GROUP_IOSCHED
 	bool "BFQ hierarchical scheduling support"
-	depends on CGROUPS && IOSCHED_BFQ=y
+	depends on IOSCHED_BFQ && BLK_CGROUP
 	default n
 	---help---
 	  Enable hierarchical scheduling in BFQ, using the blkio controller.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 0367996..b50ae8e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -7,7 +7,9 @@
  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  *		      Paolo Valente <paolo.valente@unimore.it>
  *
- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
  *
  * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
  * file.
@@ -163,8 +165,6 @@ static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
 {
 	struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
 
-	BUG_ON(!pd);
-
 	return pd_to_bfqg(pd);
 }
 
@@ -208,59 +208,49 @@ static void bfqg_put(struct bfq_group *bfqg)
 
 static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
 				     struct bfq_queue *bfqq,
-				     int rw)
+				     int op, int op_flags)
 {
-	blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
+	blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, 1);
 	bfqg_stats_end_empty_time(&bfqg->stats);
 	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
 		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
 }
 
-static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
-{
-	blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
-}
-
-static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op,
+					int op_flags)
 {
-	blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
+	blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, -1);
 }
 
-static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
-					      uint64_t bytes, int rw)
+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg,  int op,
+					int op_flags)
 {
-	blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
-	blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
-	blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
+	blkg_rwstat_add(&bfqg->stats.merged, op, op_flags, 1);
 }
 
 static void bfqg_stats_update_completion(struct bfq_group *bfqg,
-			uint64_t start_time, uint64_t io_start_time, int rw)
+			uint64_t start_time, uint64_t io_start_time, int op,
+			int op_flags)
 {
 	struct bfqg_stats *stats = &bfqg->stats;
 	unsigned long long now = sched_clock();
 
 	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+		blkg_rwstat_add(&stats->service_time, op, op_flags,
+				now - io_start_time);
 	if (time_after64(io_start_time, start_time))
-		blkg_rwstat_add(&stats->wait_time, rw,
+		blkg_rwstat_add(&stats->wait_time, op, op_flags,
 				io_start_time - start_time);
 }
 
 /* @stats = 0 */
 static void bfqg_stats_reset(struct bfqg_stats *stats)
 {
-	if (!stats)
-		return;
-
 	/* queued stats shouldn't be cleared */
-	blkg_rwstat_reset(&stats->service_bytes);
-	blkg_rwstat_reset(&stats->serviced);
 	blkg_rwstat_reset(&stats->merged);
 	blkg_rwstat_reset(&stats->service_time);
 	blkg_rwstat_reset(&stats->wait_time);
 	blkg_stat_reset(&stats->time);
-	blkg_stat_reset(&stats->unaccounted_time);
 	blkg_stat_reset(&stats->avg_queue_size_sum);
 	blkg_stat_reset(&stats->avg_queue_size_samples);
 	blkg_stat_reset(&stats->dequeue);
@@ -270,19 +260,16 @@ static void bfqg_stats_reset(struct bfqg_stats *stats)
 }
 
 /* @to += @from */
-static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
+static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
 {
 	if (!to || !from)
 		return;
 
 	/* queued stats shouldn't be cleared */
-	blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
-	blkg_rwstat_add_aux(&to->serviced, &from->serviced);
 	blkg_rwstat_add_aux(&to->merged, &from->merged);
 	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
 	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
 	blkg_stat_add_aux(&from->time, &from->time);
-	blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
 	blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
 	blkg_stat_add_aux(&to->avg_queue_size_samples,
 			  &from->avg_queue_size_samples);
@@ -311,10 +298,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
 	if (unlikely(!parent))
 		return;
 
-	bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
-	bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
+	bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
 	bfqg_stats_reset(&bfqg->stats);
-	bfqg_stats_reset(&bfqg->dead_stats);
 }
 
 static void bfq_init_entity(struct bfq_entity *entity,
@@ -335,15 +320,11 @@ static void bfq_init_entity(struct bfq_entity *entity,
 
 static void bfqg_stats_exit(struct bfqg_stats *stats)
 {
-	blkg_rwstat_exit(&stats->service_bytes);
-	blkg_rwstat_exit(&stats->serviced);
 	blkg_rwstat_exit(&stats->merged);
 	blkg_rwstat_exit(&stats->service_time);
 	blkg_rwstat_exit(&stats->wait_time);
 	blkg_rwstat_exit(&stats->queued);
-	blkg_stat_exit(&stats->sectors);
 	blkg_stat_exit(&stats->time);
-	blkg_stat_exit(&stats->unaccounted_time);
 	blkg_stat_exit(&stats->avg_queue_size_sum);
 	blkg_stat_exit(&stats->avg_queue_size_samples);
 	blkg_stat_exit(&stats->dequeue);
@@ -354,15 +335,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
 
 static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
 {
-	if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
-	    blkg_rwstat_init(&stats->serviced, gfp) ||
-	    blkg_rwstat_init(&stats->merged, gfp) ||
+	if (blkg_rwstat_init(&stats->merged, gfp) ||
 	    blkg_rwstat_init(&stats->service_time, gfp) ||
 	    blkg_rwstat_init(&stats->wait_time, gfp) ||
 	    blkg_rwstat_init(&stats->queued, gfp) ||
-	    blkg_stat_init(&stats->sectors, gfp) ||
 	    blkg_stat_init(&stats->time, gfp) ||
-	    blkg_stat_init(&stats->unaccounted_time, gfp) ||
 	    blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
 	    blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
 	    blkg_stat_init(&stats->dequeue, gfp) ||
@@ -386,11 +363,27 @@ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
 	return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
 }
 
+static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
+{
+	struct bfq_group_data *bgd;
+
+	bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
+	if (!bgd)
+		return NULL;
+	return &bgd->pd;
+}
+
 static void bfq_cpd_init(struct blkcg_policy_data *cpd)
 {
 	struct bfq_group_data *d = cpd_to_bfqgd(cpd);
 
-	d->weight = BFQ_DEFAULT_GRP_WEIGHT;
+	d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
+		CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
+}
+
+static void bfq_cpd_free(struct blkcg_policy_data *cpd)
+{
+	kfree(cpd_to_bfqgd(cpd));
 }
 
 static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
@@ -401,8 +394,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
 	if (!bfqg)
 		return NULL;
 
-	if (bfqg_stats_init(&bfqg->stats, gfp) ||
-	    bfqg_stats_init(&bfqg->dead_stats, gfp)) {
+	if (bfqg_stats_init(&bfqg->stats, gfp)) {
 		kfree(bfqg);
 		return NULL;
 	}
@@ -410,27 +402,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
 	return &bfqg->pd;
 }
 
-static void bfq_group_set_parent(struct bfq_group *bfqg,
-					struct bfq_group *parent)
+static void bfq_pd_init(struct blkg_policy_data *pd)
 {
+	struct blkcg_gq *blkg;
+	struct bfq_group *bfqg;
+	struct bfq_data *bfqd;
 	struct bfq_entity *entity;
+	struct bfq_group_data *d;
 
-	BUG_ON(!parent);
-	BUG_ON(!bfqg);
-	BUG_ON(bfqg == parent);
-
+	blkg = pd_to_blkg(pd);
+	BUG_ON(!blkg);
+	bfqg = blkg_to_bfqg(blkg);
+	bfqd = blkg->q->elevator->elevator_data;
 	entity = &bfqg->entity;
-	entity->parent = parent->my_entity;
-	entity->sched_data = &parent->sched_data;
-}
-
-static void bfq_pd_init(struct blkg_policy_data *pd)
-{
-	struct blkcg_gq *blkg = pd_to_blkg(pd);
-	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
-	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
-	struct bfq_entity *entity = &bfqg->entity;
-	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
+	d = blkcg_to_bfqgd(blkg->blkcg);
 
 	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
 	entity->my_sched_data = &bfqg->sched_data;
@@ -448,70 +433,53 @@ static void bfq_pd_free(struct blkg_policy_data *pd)
 	struct bfq_group *bfqg = pd_to_bfqg(pd);
 
 	bfqg_stats_exit(&bfqg->stats);
-	bfqg_stats_exit(&bfqg->dead_stats);
-
 	return kfree(bfqg);
 }
 
-/* offset delta from bfqg->stats to bfqg->dead_stats */
-static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
-					offsetof(struct bfq_group, stats);
-
-/* to be used by recursive prfill, sums live and dead stats recursively */
-static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
 {
-	u64 sum = 0;
+	struct bfq_group *bfqg = pd_to_bfqg(pd);
 
-	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
-	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
-				       off + dead_stats_off_delta);
-	return sum;
+	bfqg_stats_reset(&bfqg->stats);
 }
 
-/* to be used by recursive prfill, sums live and dead rwstats recursively */
-static struct blkg_rwstat
-bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+static void bfq_group_set_parent(struct bfq_group *bfqg,
+					struct bfq_group *parent)
 {
-	struct blkg_rwstat a, b;
+	struct bfq_entity *entity;
 
-	a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
-	b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
-				      off + dead_stats_off_delta);
-	blkg_rwstat_add_aux(&a, &b);
-	return a;
+	BUG_ON(!parent);
+	BUG_ON(!bfqg);
+	BUG_ON(bfqg == parent);
+
+	entity = &bfqg->entity;
+	entity->parent = parent->my_entity;
+	entity->sched_data = &parent->sched_data;
 }
 
-static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
+					 struct blkcg *blkcg)
 {
-	struct bfq_group *bfqg = pd_to_bfqg(pd);
+	struct blkcg_gq *blkg;
 
-	bfqg_stats_reset(&bfqg->stats);
-	bfqg_stats_reset(&bfqg->dead_stats);
+	blkg = blkg_lookup(blkcg, bfqd->queue);
+	if (likely(blkg))
+		return blkg_to_bfqg(blkg);
+	return NULL;
 }
 
-static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
-					      struct blkcg *blkcg)
+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+					    struct blkcg *blkcg)
 {
-	struct request_queue *q = bfqd->queue;
-	struct bfq_group *bfqg = NULL, *parent;
-	struct bfq_entity *entity = NULL;
+	struct bfq_group *bfqg, *parent;
+	struct bfq_entity *entity;
 
 	assert_spin_locked(bfqd->queue->queue_lock);
 
-	/* avoid lookup for the common case where there's no blkcg */
-	if (blkcg == &blkcg_root) {
-		bfqg = bfqd->root_group;
-	} else {
-		struct blkcg_gq *blkg;
-
-		blkg = blkg_lookup_create(blkcg, q);
-		if (!IS_ERR(blkg))
-			bfqg = blkg_to_bfqg(blkg);
-		else /* fallback to root_group */
-			bfqg = bfqd->root_group;
-	}
+	bfqg = bfq_lookup_bfqg(bfqd, blkcg);
 
-	BUG_ON(!bfqg);
+	if (unlikely(!bfqg))
+		return NULL;
 
 	/*
 	 * Update chain of bfq_groups as we might be handling a leaf group
@@ -537,11 +505,15 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
 static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
 				  struct bfq_queue *bfqq);
 
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason);
+
 /**
  * bfq_bfqq_move - migrate @bfqq to @bfqg.
  * @bfqd: queue descriptor.
  * @bfqq: the queue to move.
- * @entity: @bfqq's entity.
  * @bfqg: the group to move to.
  *
  * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
@@ -552,26 +524,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
  * rcu_read_lock()).
  */
 static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-			  struct bfq_entity *entity, struct bfq_group *bfqg)
+			  struct bfq_group *bfqg)
 {
-	int busy, resume;
-
-	busy = bfq_bfqq_busy(bfqq);
-	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
+	struct bfq_entity *entity = &bfqq->entity;
 
-	BUG_ON(resume && !entity->on_st);
-	BUG_ON(busy && !resume && entity->on_st &&
+	BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list));
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && !entity->on_st);
+	BUG_ON(bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list)
+	       && entity->on_st &&
 	       bfqq != bfqd->in_service_queue);
+	BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue);
 
-	if (busy) {
-		BUG_ON(atomic_read(&bfqq->ref) < 2);
+	/* If bfqq is empty, then bfq_bfqq_expire also invokes
+	 * bfq_del_bfqq_busy, thereby removing bfqq and its entity
+	 * from data structures related to current group. Otherwise we
+	 * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
+	 * we do below.
+	 */
+	if (bfqq == bfqd->in_service_queue)
+		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+				false, BFQ_BFQQ_PREEMPTED);
+
+	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
+	    && &bfq_entity_service_tree(entity)->idle !=
+	       entity->tree);
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
 
-		if (!resume)
-			bfq_del_bfqq_busy(bfqd, bfqq, 0);
-		else
-			bfq_deactivate_bfqq(bfqd, bfqq, 0);
-	} else if (entity->on_st)
+	if (bfq_bfqq_busy(bfqq))
+		bfq_deactivate_bfqq(bfqd, bfqq, 0);
+	else if (entity->on_st) {
+		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
+		       entity->tree);
 		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+	}
 	bfqg_put(bfqq_group(bfqq));
 
 	/*
@@ -583,14 +569,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	entity->sched_data = &bfqg->sched_data;
 	bfqg_get(bfqg);
 
-	if (busy) {
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+	if (bfq_bfqq_busy(bfqq)) {
 		bfq_pos_tree_add_move(bfqd, bfqq);
-		if (resume)
-			bfq_activate_bfqq(bfqd, bfqq);
+		bfq_activate_bfqq(bfqd, bfqq);
 	}
 
 	if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
 		bfq_schedule_dispatch(bfqd);
+	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
+	       && &bfq_entity_service_tree(entity)->idle !=
+	       entity->tree);
 }
 
 /**
@@ -617,7 +606,11 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 
 	lockdep_assert_held(bfqd->queue->queue_lock);
 
-	bfqg = bfq_find_alloc_group(bfqd, blkcg);
+	bfqg = bfq_find_set_group(bfqd, blkcg);
+
+	if (unlikely(!bfqg))
+		bfqg = bfqd->root_group;
+
 	if (async_bfqq) {
 		entity = &async_bfqq->entity;
 
@@ -625,7 +618,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 			bic_set_bfqq(bic, NULL, 0);
 			bfq_log_bfqq(bfqd, async_bfqq,
 				     "bic_change_group: %p %d",
-				     async_bfqq, atomic_read(&async_bfqq->ref));
+				     async_bfqq,
+				     async_bfqq->ref);
 			bfq_put_queue(async_bfqq);
 		}
 	}
@@ -633,7 +627,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 	if (sync_bfqq) {
 		entity = &sync_bfqq->entity;
 		if (entity->sched_data != &bfqg->sched_data)
-			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
+			bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
 	}
 
 	return bfqg;
@@ -642,25 +636,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 {
 	struct bfq_data *bfqd = bic_to_bfqd(bic);
-	struct blkcg *blkcg;
 	struct bfq_group *bfqg = NULL;
-	uint64_t id;
+	uint64_t serial_nr;
 
 	rcu_read_lock();
-	blkcg = bio_blkcg(bio);
-	id = blkcg->css.serial_nr;
-	rcu_read_unlock();
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
 
 	/*
 	 * Check whether blkcg has changed.  The condition may trigger
 	 * spuriously on a newly created cic but there's no harm.
 	 */
-	if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
-		return;
+	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
+		goto out;
 
-	bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
-	BUG_ON(!bfqg);
-	bic->blkcg_id = id;
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+	bic->blkcg_serial_nr = serial_nr;
+out:
+	rcu_read_unlock();
 }
 
 /**
@@ -686,7 +678,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 
 	BUG_ON(!bfqq);
-	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
+	bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
 }
 
 /**
@@ -717,11 +709,12 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
 }
 
 /**
- * bfq_destroy_group - destroy @bfqg.
- * @bfqg: the group being destroyed.
+ * bfq_pd_offline - deactivate the entity associated with @pd,
+ *		    and reparent its children entities.
+ * @pd: descriptor of the policy going offline.
  *
- * Destroy @bfqg, making sure that it is not referenced from its parent.
- * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
+ * blkio already grabs the queue_lock for us, so no need to use
+ * RCU-based magic
  */
 static void bfq_pd_offline(struct blkg_policy_data *pd)
 {
@@ -780,6 +773,12 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
 	bfq_put_async_queues(bfqd, bfqg);
 	BUG_ON(entity->tree);
 
+	/*
+	 * @blkg is going offline and will be ignored by
+	 * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
+	 * that they don't get lost.  If IOs complete after this point, the
+	 * stats for them will be lost.  Oh well...
+	 */
 	bfqg_stats_xfer_dead(bfqg);
 }
 
@@ -789,46 +788,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
 
 	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
 		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+		BUG_ON(!bfqg);
 
 		bfq_end_wr_async_queues(bfqd, bfqg);
 	}
 	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
 }
 
-static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
-				       struct cftype *cftype)
-{
-	struct blkcg *blkcg = css_to_blkcg(css);
-	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
-	int ret = -EINVAL;
-
-	spin_lock_irq(&blkcg->lock);
-	ret = bfqgd->weight;
-	spin_unlock_irq(&blkcg->lock);
-
-	return ret;
-}
-
-static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v)
+static int bfq_io_show_weight(struct seq_file *sf, void *v)
 {
 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
 	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+	unsigned int val = 0;
 
-	spin_lock_irq(&blkcg->lock);
-	seq_printf(sf, "%u\n", bfqgd->weight);
-	spin_unlock_irq(&blkcg->lock);
+	if (bfqgd)
+		val = bfqgd->weight;
+
+	seq_printf(sf, "%u\n", val);
 
 	return 0;
 }
 
-static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
-					struct cftype *cftype,
-					u64 val)
+static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
+				    struct cftype *cftype,
+				    u64 val)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
 	struct blkcg_gq *blkg;
-	int ret = -EINVAL;
+	int ret = -ERANGE;
 
 	if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
 		return ret;
@@ -873,13 +861,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
 	return ret;
 }
 
-static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of,
-					     char *buf, size_t nbytes,
-					     loff_t off)
+static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
+				 char *buf, size_t nbytes,
+				 loff_t off)
 {
+	u64 weight;
 	/* First unsigned long found in the file is used */
-	return bfqio_cgroup_weight_write(of_css(of), NULL,
-					 simple_strtoull(strim(buf), NULL, 0));
+	int ret = kstrtoull(strim(buf), 0, &weight);
+
+	if (ret)
+		return ret;
+
+	return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
 }
 
 static int bfqg_print_stat(struct seq_file *sf, void *v)
@@ -899,16 +892,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v)
 static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
 				      struct blkg_policy_data *pd, int off)
 {
-	u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
-
+	u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
+					  &blkcg_policy_bfq, off);
 	return __blkg_prfill_u64(sf, pd, sum);
 }
 
 static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
 					struct blkg_policy_data *pd, int off)
 {
-	struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
-
+	struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
+							   &blkcg_policy_bfq,
+							   off);
 	return __blkg_prfill_rwstat(sf, pd, &sum);
 }
 
@@ -928,6 +922,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
 	return 0;
 }
 
+static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
+			       int off)
+{
+	u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+
+	return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
+	return 0;
+}
+
+static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
+					 struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
+					offsetof(struct blkcg_gq, stat_bytes));
+	u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
+		atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+	return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
+			  false);
+	return 0;
+}
+
+
 static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
 				      struct blkg_policy_data *pd, int off)
 {
@@ -964,38 +993,15 @@ bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 	return blkg_to_bfqg(bfqd->queue->root_blkg);
 }
 
-static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
-{
-	struct bfq_group_data *bgd;
-
-	bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
-	if (!bgd)
-		return NULL;
-	return &bgd->pd;
-}
-
-static void bfq_cpd_free(struct blkcg_policy_data *cpd)
-{
-	kfree(cpd_to_bfqgd(cpd));
-}
-
-static struct cftype bfqio_files_dfl[] = {
+static struct cftype bfq_blkcg_legacy_files[] = {
 	{
-		.name = "weight",
+		.name = "bfq.weight",
 		.flags = CFTYPE_NOT_ON_ROOT,
-		.seq_show = bfqio_cgroup_weight_read_dfl,
-		.write = bfqio_cgroup_weight_write_dfl,
+		.seq_show = bfq_io_show_weight,
+		.write_u64 = bfq_io_set_weight_legacy,
 	},
-	{} /* terminate */
-};
 
-static struct cftype bfqio_files[] = {
-	{
-		.name = "bfq.weight",
-		.read_u64 = bfqio_cgroup_weight_read,
-		.write_u64 = bfqio_cgroup_weight_write,
-	},
-	/* statistics, cover only the tasks in the bfqg */
+	/* statistics, covers only the tasks in the bfqg */
 	{
 		.name = "bfq.time",
 		.private = offsetof(struct bfq_group, stats.time),
@@ -1003,18 +1009,17 @@ static struct cftype bfqio_files[] = {
 	},
 	{
 		.name = "bfq.sectors",
-		.private = offsetof(struct bfq_group, stats.sectors),
-		.seq_show = bfqg_print_stat,
+		.seq_show = bfqg_print_stat_sectors,
 	},
 	{
 		.name = "bfq.io_service_bytes",
-		.private = offsetof(struct bfq_group, stats.service_bytes),
-		.seq_show = bfqg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_bytes,
 	},
 	{
 		.name = "bfq.io_serviced",
-		.private = offsetof(struct bfq_group, stats.serviced),
-		.seq_show = bfqg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_ios,
 	},
 	{
 		.name = "bfq.io_service_time",
@@ -1045,18 +1050,17 @@ static struct cftype bfqio_files[] = {
 	},
 	{
 		.name = "bfq.sectors_recursive",
-		.private = offsetof(struct bfq_group, stats.sectors),
-		.seq_show = bfqg_print_stat_recursive,
+		.seq_show = bfqg_print_stat_sectors_recursive,
 	},
 	{
 		.name = "bfq.io_service_bytes_recursive",
-		.private = offsetof(struct bfq_group, stats.service_bytes),
-		.seq_show = bfqg_print_rwstat_recursive,
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_bytes_recursive,
 	},
 	{
 		.name = "bfq.io_serviced_recursive",
-		.private = offsetof(struct bfq_group, stats.serviced),
-		.seq_show = bfqg_print_rwstat_recursive,
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_ios_recursive,
 	},
 	{
 		.name = "bfq.io_service_time_recursive",
@@ -1102,31 +1106,39 @@ static struct cftype bfqio_files[] = {
 		.private = offsetof(struct bfq_group, stats.dequeue),
 		.seq_show = bfqg_print_stat,
 	},
-	{
-		.name = "bfq.unaccounted_time",
-		.private = offsetof(struct bfq_group, stats.unaccounted_time),
-		.seq_show = bfqg_print_stat,
-	},
 	{ }	/* terminate */
 };
 
-static struct blkcg_policy blkcg_policy_bfq = {
-	.dfl_cftypes            = bfqio_files_dfl,
-	.legacy_cftypes		= bfqio_files,
-
-	.pd_alloc_fn		= bfq_pd_alloc,
-	.pd_init_fn		= bfq_pd_init,
-	.pd_offline_fn		= bfq_pd_offline,
-	.pd_free_fn		= bfq_pd_free,
-	.pd_reset_stats_fn	= bfq_pd_reset_stats,
-
-	.cpd_alloc_fn		= bfq_cpd_alloc,
-	.cpd_init_fn		= bfq_cpd_init,
-	.cpd_bind_fn		= bfq_cpd_init,
-	.cpd_free_fn		= bfq_cpd_free,
+static struct cftype bfq_blkg_files[] = {
+	{
+		.name = "bfq.weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = bfq_io_show_weight,
+		.write = bfq_io_set_weight,
+	},
+	{} /* terminate */
 };
 
-#else
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
+
+static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg,
+			struct bfq_queue *bfqq, int op, int op_flags) { }
+static inline void
+bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op, int op_flags) { }
+static inline void
+bfqg_stats_update_io_merged(struct bfq_group *bfqg, int op, int op_flags) { }
+static inline void bfqg_stats_update_completion(struct bfq_group *bfqg,
+			uint64_t start_time, uint64_t io_start_time, int op,
+			int op_flags) { }
+static inline void
+bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+				     struct bfq_group *curr_bfqg) { }
+static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { }
+static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
 
 static void bfq_init_entity(struct bfq_entity *entity,
 			    struct bfq_group *bfqg)
@@ -1150,27 +1162,20 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	return bfqd->root_group;
 }
 
-static void bfq_bfqq_move(struct bfq_data *bfqd,
-			  struct bfq_queue *bfqq,
-			  struct bfq_entity *entity,
-			  struct bfq_group *bfqg)
-{
-}
-
 static void bfq_end_wr_async(struct bfq_data *bfqd)
 {
 	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
 }
 
-static void bfq_disconnect_groups(struct bfq_data *bfqd)
+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+					    struct blkcg *blkcg)
 {
-	bfq_put_async_queues(bfqd, bfqd->root_group);
+	return bfqd->root_group;
 }
 
-static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
-					      struct blkcg *blkcg)
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
 {
-	return bfqd->root_group;
+	return bfqq->bfqd->root_group;
 }
 
 static struct bfq_group *
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index cf3e9b1..eef6ff4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7,25 +7,28 @@
  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  *		      Paolo Valente <paolo.valente@unimore.it>
  *
- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
  *
  * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
  * file.
  *
- * BFQ is a proportional-share storage-I/O scheduling algorithm based on
- * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
- * measured in number of sectors, to processes instead of time slices. The
- * device is not granted to the in-service process for a given time slice,
- * but until it has exhausted its assigned budget. This change from the time
- * to the service domain allows BFQ to distribute the device throughput
- * among processes as desired, without any distortion due to ZBR, workload
- * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
- * called B-WF2Q+, to schedule processes according to their budgets. More
- * precisely, BFQ schedules queues associated to processes. Thanks to the
- * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
- * I/O-bound processes issuing sequential requests (to boost the
- * throughput), and yet guarantee a low latency to interactive and soft
- * real-time applications.
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based
+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+ * budgets, measured in number of sectors, to processes instead of
+ * time slices. The device is not granted to the in-service process
+ * for a given time slice, but until it has exhausted its assigned
+ * budget. This change from the time to the service domain enables BFQ
+ * to distribute the device throughput among processes as desired,
+ * without any distortion due to throughput fluctuations, or to device
+ * internal queueing. BFQ uses an ad hoc internal scheduler, called
+ * B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated with processes. Thanks to
+ * the accurate policy of B-WF2Q+, BFQ can afford to assign high
+ * budgets to I/O-bound processes issuing sequential requests (to
+ * boost the throughput), and yet guarantee a low latency to
+ * interactive and soft real-time applications.
  *
  * BFQ is described in [1], where also a reference to the initial, more
  * theoretical paper on BFQ can be found. The interested reader can find
@@ -70,8 +73,8 @@
 #include "bfq.h"
 #include "blk.h"
 
-/* Expiration time of sync (0) and async (1) requests, in jiffies. */
-static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+/* Expiration time of sync (0) and async (1) requests, in ns. */
+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 
 /* Maximum backwards seek, in KiB. */
 static const int bfq_back_max = 16 * 1024;
@@ -79,15 +82,14 @@ static const int bfq_back_max = 16 * 1024;
 /* Penalty of a backwards seek, in number of sectors. */
 static const int bfq_back_penalty = 2;
 
-/* Idling period duration, in jiffies. */
-static int bfq_slice_idle = HZ / 125;
+/* Idling period duration, in ns. */
+static u32 bfq_slice_idle = NSEC_PER_SEC / 125;
 
 /* Minimum number of assigned budgets for which stats are safe to compute. */
 static const int bfq_stats_min_budgets = 194;
 
 /* Default maximum budget values, in sectors and number of requests. */
 static const int bfq_default_max_budget = 16 * 1024;
-static const int bfq_max_budget_async_rq = 4;
 
 /*
  * Async to sync throughput distribution is controlled as follows:
@@ -97,23 +99,27 @@ static const int bfq_max_budget_async_rq = 4;
 static const int bfq_async_charge_factor = 10;
 
 /* Default timeout values, in jiffies, approximating CFQ defaults. */
-static const int bfq_timeout_sync = HZ / 8;
-static int bfq_timeout_async = HZ / 25;
+static const int bfq_timeout = HZ / 8;
 
 struct kmem_cache *bfq_pool;
 
-/* Below this threshold (in ms), we consider thinktime immediate. */
-#define BFQ_MIN_TT		2
+/* Below this threshold (in ns), we consider thinktime immediate. */
+#define BFQ_MIN_TT		(2 * NSEC_PER_MSEC)
 
 /* hw_tag detection: parallel requests threshold and min samples needed. */
 #define BFQ_HW_QUEUE_THRESHOLD	4
 #define BFQ_HW_QUEUE_SAMPLES	32
 
-#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
-#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
+#define BFQQ_SEEK_THR		(sector_t)(8 * 100)
+#define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
+#define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 32/8)
 
-/* Min samples used for peak rate estimation (for autotuning). */
-#define BFQ_PEAK_RATE_SAMPLES	32
+/* Min number of samples required to perform peak-rate update */
+#define BFQ_RATE_MIN_SAMPLES	32