From dd4ee77b4ae3da38f03dba795a1c5de0195bcafd Mon Sep 17 00:00:00 2001 From: Li Nan Date: Tue, 6 Jun 2023 20:18:00 +0800 Subject: [PATCH 1/4] blk-iocost: fix UAF in ioc_pd_free hulk inclusion category: bugfix bugzilla: 188032, https://gitee.com/openeuler/kernel/issues/I64T3E CVE: NA -------------------------------- Our test found the following problem in kernel 5.10: BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0 Write of size 4 at addr ffff8881432000e0 by task swapper/4/0 ... Call Trace: dump_stack+0x9c/0xd3 print_address_description.constprop.0+0x19/0x170 __kasan_report.cold+0x6c/0x84 kasan_report+0x3a/0x50 check_memory_region+0xfd/0x1f0 _raw_spin_lock_irqsave+0x71/0xe0 ioc_pd_free+0x9d/0x250 blkg_free.part.0+0x80/0x100 __blkg_release+0xf3/0x1c0 rcu_do_batch+0x292/0x700 rcu_core+0x270/0x2d0 __do_softirq+0xfd/0x402 asm_call_irq_on_stack+0x12/0x20 do_softirq_own_stack+0x37/0x50 irq_exit_rcu+0x134/0x1a0 sysvec_apic_timer_interrupt+0x36/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 Freed by task 57: kfree+0xba/0x680 rq_qos_exit+0x5a/0x80 blk_cleanup_queue+0xce/0x1a0 virtblk_remove+0x77/0x130 [virtio_blk] virtio_dev_remove+0x56/0xe0 __device_release_driver+0x2ba/0x450 device_release_driver+0x29/0x40 bus_remove_device+0x1d8/0x2c0 device_del+0x333/0x7e0 device_unregister+0x27/0x90 unregister_virtio_device+0x22/0x40 virtio_pci_remove+0x53/0xb0 pci_device_remove+0x7a/0x130 __device_release_driver+0x2ba/0x450 device_release_driver+0x29/0x40 pci_stop_bus_device+0xcf/0x100 pci_stop_and_remove_bus_device+0x16/0x20 disable_slot+0xa1/0x110 acpiphp_disable_and_eject_slot+0x35/0xe0 hotplug_event+0x1b8/0x3c0 acpiphp_hotplug_notify+0x37/0x70 acpi_device_hotplug+0xee/0x320 acpi_hotplug_work_fn+0x69/0x80 process_one_work+0x3c5/0x730 worker_thread+0x93/0x650 kthread+0x1ba/0x210 ret_from_fork+0x22/0x30 It happened as follow: T1 T2 T3 T4 //delete device del_gendisk bdi_unregister bdi_remove_from_list synchronize_rcu_expedited //rmdir cgroup blkcg_destroy_blkgs blkg_destroy percpu_ref_kill blkg_release call_rcu blk_cleanup_queue rq_qos_exit ioc_rqos_exit kfree(ioc) __blkg_release blkg_free blkg_free_workfn pd_free_fn ioc_pd_free spin_lock_irqsave ->ioc is freed Fix the problem by moving the operation on ioc in ioc_pd_free() to ioc_pd_offline(), and just free resource in ioc_pd_free() like iolatency and throttle. Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Signed-off-by: Li Nan (cherry picked from commit 4c2d16fb7edf1db6f9f2e3dd1b7525f32b5bd027) --- block/blk-iocost.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 81dd3b02b36a..70f220174c0b 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2964,7 +2964,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd) spin_unlock_irqrestore(&ioc->lock, flags); } -static void ioc_pd_free(struct blkg_policy_data *pd) +static void ioc_pd_offline(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; @@ -2988,6 +2988,12 @@ static void ioc_pd_free(struct blkg_policy_data *pd) hrtimer_cancel(&iocg->waitq_timer); } +} + +static void ioc_pd_free(struct blkg_policy_data *pd) +{ + struct ioc_gq *iocg = pd_to_iocg(pd); + free_percpu(iocg->pcpu_stat); kfree(iocg); } @@ -3468,6 +3474,7 @@ static struct blkcg_policy blkcg_policy_iocost = { .cpd_free_fn = ioc_cpd_free, .pd_alloc_fn = ioc_pd_alloc, .pd_init_fn = ioc_pd_init, + .pd_offline_fn = ioc_pd_offline, .pd_free_fn = ioc_pd_free, .pd_stat_fn = ioc_pd_stat, }; -- Gitee From eaf7154d3d90f9f0a22991c0827b283e53ee9b43 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jun 2023 20:18:01 +0800 Subject: [PATCH 2/4] blk-iocost: track whether iocg is still online hulk inclusion category: bugfix bugzilla: 188107, https://gitee.com/openeuler/kernel/issues/I665S2 CVE: NA -------------------------------- blkcg_gq->online can't be used in iocost because it get cleared only after all policies are offlined. This patch add a new field 'online' in iocg. Signed-off-by: Yu Kuai Signed-off-by: Li Nan (cherry picked from commit 3fcd5793488dcb0b7b29d9c8b55cb10c4e755eb9) --- block/blk-iocost.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 70f220174c0b..e1fda6d76690 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -486,6 +486,7 @@ struct ioc_gq { u32 inuse; u32 last_inuse; + bool online; s64 saved_margin; sector_t cursor; /* to detect randio */ @@ -2939,6 +2940,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd) ioc_now(ioc, &now); iocg->ioc = ioc; + iocg->online = true; atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); @@ -2973,6 +2975,8 @@ static void ioc_pd_offline(struct blkg_policy_data *pd) if (ioc) { spin_lock_irqsave(&ioc->lock, flags); + iocg->online = false; + if (!list_empty(&iocg->active_list)) { struct ioc_now now; -- Gitee From a413dc6657400dc51c1ddd390aa2aa1904533063 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jun 2023 20:18:02 +0800 Subject: [PATCH 3/4] blk-iocost: don't throttle bio if iocg is offlined hulk inclusion category: bugfix bugzilla: 188107, https://gitee.com/openeuler/kernel/issues/I665S2 CVE: NA -------------------------------- bio will grab blkg reference, however, blkcg->online_pin is not grabbed, hence cgroup can be removed after thread exit while bio is still in progress. Bypass io in this suituation since it doesn't make sense to throttle bio while cgroup is removed. Signed-off-by: Yu Kuai Signed-off-by: Li Nan (cherry picked from commit ebe0cb558fc49b22c445e212cadbf9c9f4dd126b) --- block/blk-iocost.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e1fda6d76690..8a69e7207853 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -703,6 +703,20 @@ static struct ioc_cgrp *blkcg_to_iocc(struct blkcg *blkcg) struct ioc_cgrp, cpd); } +static struct ioc_gq *ioc_bio_iocg(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (blkg && blkg->online) { + struct ioc_gq *iocg = blkg_to_iocg(blkg); + + if (iocg && iocg->online) + return iocg; + } + + return NULL; +} + /* * Scale @abs_cost to the inverse of @hw_inuse. The lower the hierarchical * weight, the more expensive each IO. Must round up. @@ -1219,6 +1233,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) spin_lock_irq(&ioc->lock); + if (!iocg->online) + goto fail_unlock; + ioc_now(ioc, now); /* update period */ @@ -2543,9 +2560,8 @@ static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { - struct blkcg_gq *blkg = bio->bi_blkg; struct ioc *ioc = rqos_to_ioc(rqos); - struct ioc_gq *iocg = blkg_to_iocg(blkg); + struct ioc_gq *iocg = ioc_bio_iocg(bio); struct ioc_now now; struct iocg_wait wait; u64 abs_cost, cost, vtime; @@ -2679,7 +2695,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { - struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); + struct ioc_gq *iocg = ioc_bio_iocg(bio); struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; @@ -2737,7 +2753,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) { - struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); + struct ioc_gq *iocg = ioc_bio_iocg(bio); if (iocg && bio->bi_iocost_cost) atomic64_add(bio->bi_iocost_cost, &iocg->done_vtime); -- Gitee From b00f5f9392b68259136bd7dabdf20b6f9cd2e684 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jun 2023 20:18:03 +0800 Subject: [PATCH 4/4] blk-iocost: dispatch all throttled bio in ioc_pd_offline hulk inclusion category: bugfix bugzilla: 188107, https://gitee.com/openeuler/kernel/issues/I665S2 CVE: NA -------------------------------- Otherwise io will hang because timer is canceled. And make iocg->online updated under both 'ioc->lock' and 'iocg->waitq.lock', so it can be guaranteed that iocg will stay online while holding any lock. Signed-off-by: Yu Kuai Signed-off-by: Li Nan (cherry picked from commit bc184a0be5f58682fd582d5545986e144f2c8ef7) --- block/blk-iocost.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 8a69e7207853..0003bf2f3b30 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1405,14 +1405,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, { struct iocg_wait *wait = container_of(wq_entry, struct iocg_wait, wait); struct iocg_wake_ctx *ctx = (struct iocg_wake_ctx *)key; - u64 cost = abs_cost_to_cost(wait->abs_cost, ctx->hw_inuse); - ctx->vbudget -= cost; + if (ctx->iocg->online) { + u64 cost = abs_cost_to_cost(wait->abs_cost, ctx->hw_inuse); - if (ctx->vbudget < 0) - return -1; + ctx->vbudget -= cost; + if (ctx->vbudget < 0) + return -1; + + iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); + } - iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); wait->committed = true; /* @@ -2989,7 +2992,9 @@ static void ioc_pd_offline(struct blkg_policy_data *pd) unsigned long flags; if (ioc) { - spin_lock_irqsave(&ioc->lock, flags); + struct iocg_wake_ctx ctx = { .iocg = iocg }; + + iocg_lock(iocg, true, &flags); iocg->online = false; @@ -3004,9 +3009,10 @@ static void ioc_pd_offline(struct blkg_policy_data *pd) WARN_ON_ONCE(!list_empty(&iocg->walk_list)); WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); - spin_unlock_irqrestore(&ioc->lock, flags); + iocg_unlock(iocg, true, &flags); hrtimer_cancel(&iocg->waitq_timer); + __wake_up(&iocg->waitq, TASK_NORMAL, 0, &ctx); } } -- Gitee