From 8dafa01fe85f34a8b6c0c6acc29f7c5d4591a836 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 22 Jun 2022 17:02:23 +0800 Subject: [PATCH 1/6] ext4: avoid remove directory when directory is corrupted stable inclusion from stable-v4.19.262 commit 2925f9b78d81db307c6f9d91147be5cfa2613074 category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- [ Upstream commit b24e77ef1c6d4dbf42749ad4903c97539cc9755a ] Now if check directoy entry is corrupted, ext4_empty_dir may return true then directory will be removed when file system mounted with "errors=continue". In order not to make things worse just return false when directory is corrupted. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220622090223.682234-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin Signed-off-by: Xibo.Wang --- fs/ext4/namei.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 972611b72195..e21d05e64bd1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2841,11 +2841,8 @@ bool ext4_empty_dir(struct inode *inode) de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, - bh->b_data, bh->b_size, offset)) { - offset = (offset | (sb->s_blocksize - 1)) + 1; - continue; - } - if (le32_to_cpu(de->inode)) { + bh->b_data, bh->b_size, offset) || + le32_to_cpu(de->inode)) { brelse(bh); return false; } -- Gitee From e114ec9e577d38d8d64f0560077be7aaafbf86a3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Sep 2022 11:21:26 +0200 Subject: [PATCH 2/6] ext4: make directory inode spreading reflect flexbg size stable inclusion from stable-v4.19.262 commit 66b955d30f8cf4949a590bf6ce7b264b5ab64dea category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- commit 613c5a85898d1cd44e68f28d65eccf64a8ace9cf upstream. Currently the Orlov inode allocator searches for free inodes for a directory only in flex block groups with at most inodes_per_group/16 more directory inodes than average per flex block group. However with growing size of flex block group this becomes unnecessarily strict. Scale allowed difference from average directory count per flex block group with flex block group size as we do with other metrics. Tested-by: Stefan Wahren Tested-by: Ojaswin Mujoo Cc: stable@kernel.org Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/ Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220908092136.11770-3-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xibo.Wang --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 835109f05493..631bd9af1971 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -502,7 +502,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, goto fallback; } - max_dirs = ndirs / ngroups + inodes_per_group / 16; + max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16; min_inodes = avefreei - inodes_per_group*flex_size / 4; if (min_inodes < 1) min_inodes = 1; -- Gitee From 15f15d72642d343a578324e6faf04d683c64c64b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 5 Aug 2022 20:39:47 +0800 Subject: [PATCH 3/6] ext4: fix null-ptr-deref in ext4_write_info stable inclusion from stable-v4.19.262 commit 947264e00c46de19a016fd81218118c708fed2f3 category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- commit f9c1f248607d5546075d3f731e7607d5571f2b60 upstream. I caught a null-ptr-deref bug as follows: ================================================================== KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 1 PID: 1589 Comm: umount Not tainted 5.10.0-02219-dirty #339 RIP: 0010:ext4_write_info+0x53/0x1b0 [...] Call Trace: dquot_writeback_dquots+0x341/0x9a0 ext4_sync_fs+0x19e/0x800 __sync_filesystem+0x83/0x100 sync_filesystem+0x89/0xf0 generic_shutdown_super+0x79/0x3e0 kill_block_super+0xa1/0x110 deactivate_locked_super+0xac/0x130 deactivate_super+0xb6/0xd0 cleanup_mnt+0x289/0x400 __cleanup_mnt+0x16/0x20 task_work_run+0x11c/0x1c0 exit_to_user_mode_prepare+0x203/0x210 syscall_exit_to_user_mode+0x5b/0x3a0 do_syscall_64+0x59/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================== Above issue may happen as follows: ------------------------------------- exit_to_user_mode_prepare task_work_run __cleanup_mnt cleanup_mnt deactivate_super deactivate_locked_super kill_block_super generic_shutdown_super shrink_dcache_for_umount dentry = sb->s_root sb->s_root = NULL <--- Here set NULL sync_filesystem __sync_filesystem sb->s_op->sync_fs > ext4_sync_fs dquot_writeback_dquots sb->dq_op->write_info > ext4_write_info ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2) d_inode(sb->s_root) s_root->d_inode <--- Null pointer dereference To solve this problem, we use ext4_journal_start_sb directly to avoid s_root being used. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220805123947.565152-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xibo.Wang --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e4abf3de254f..90efc17249f8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6023,7 +6023,7 @@ static int ext4_write_info(struct super_block *sb, int type) handle_t *handle; /* Data block + inode block */ - handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); + handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); -- Gitee From cc0c22e4d5af2ddcb71a16c6824d8d858984c36e Mon Sep 17 00:00:00 2001 From: Lalith Rajendran Date: Thu, 18 Aug 2022 21:40:49 +0000 Subject: [PATCH 4/6] ext4: make ext4_lazyinit_thread freezable stable inclusion from stable-v4.19.262 commit 90e99fb5d2090a71d54410559675528f711632a0 category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- commit 3b575495ab8dbb4dbe85b4ac7f991693c3668ff5 upstream. ext4_lazyinit_thread is not set freezable. Hence when the thread calls try_to_freeze it doesn't freeze during suspend and continues to send requests to the storage during suspend, resulting in suspend failures. Cc: stable@kernel.org Signed-off-by: Lalith Rajendran Link: https://lore.kernel.org/r/20220818214049.1519544-1-lalithkraj@google.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xibo.Wang --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 90efc17249f8..31162458ce2c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3308,6 +3308,7 @@ static int ext4_lazyinit_thread(void *arg) unsigned long next_wakeup, cur; BUG_ON(NULL == eli); + set_freezable(); cont_thread: while (true) { -- Gitee From 9143cbad8ced2cddc7c9d86419167318dd222932 Mon Sep 17 00:00:00 2001 From: Jinke Han Date: Sat, 3 Sep 2022 09:24:29 +0800 Subject: [PATCH 5/6] ext4: place buffer head allocation before handle start stable inclusion from stable-v4.19.262 commit 5c8ebb8b32dbbcdc241dac732c82dcd7068661a7 category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- commit d1052d236eddf6aa851434db1897b942e8db9921 upstream. In our product environment, we encounter some jbd hung waiting handles to stop while several writters were doing memory reclaim for buffer head allocation in delay alloc write path. Ext4 do buffer head allocation with holding transaction handle which may be blocked too long if the reclaim works not so smooth. According to our bcc trace, the reclaim time in buffer head allocation can reach 258s and the jbd transaction commit also take almost the same time meanwhile. Except for these extreme cases, we often see several seconds delays for cgroup memory reclaim on our servers. This is more likely to happen considering docker environment. One thing to note, the allocation of buffer heads is as often as page allocation or more often when blocksize less than page size. Just like page cache allocation, we should also place the buffer head allocation before startting the handle. Cc: stable@kernel.org Signed-off-by: Jinke Han Link: https://lore.kernel.org/r/20220903012429.22555-1-hanjinke.666@bytedance.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xibo.Wang --- fs/ext4/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ce88148cdc4a..51ce7006afba 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1300,6 +1300,13 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; + /* + * The same as page allocation, we prealloc buffer heads before + * starting the handle. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, inode->i_sb->s_blocksize, 0); + unlock_page(page); retry_journal: -- Gitee From f1f5dd00c031a542764f8476e916c8a2a6652ae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jerry=20Lee=20=E6=9D=8E=E4=BF=AE=E8=B3=A2?= Date: Mon, 18 Jul 2022 10:25:19 +0000 Subject: [PATCH 6/6] ext4: continue to expand file system when the target size doesn't reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v4.19.262 commit f2180ad6a43501597d20eacad0c6f146c51d4bbd category: task bugzilla: https://gitee.com/openeuler/kernel/issues/I5YUDZ CVE: NA -------------------------------- commit df3cb754d13d2cd5490db9b8d536311f8413a92e upstream. When expanding a file system from (16TiB-2MiB) to 18TiB, the operation exits early which leads to result inconsistency between resize2fs and Ext4 kernel driver. === before === ○ → resize2fs /dev/mapper/thin resize2fs 1.45.5 (07-Jan-2020) Filesystem at /dev/mapper/thin is mounted on /mnt/test; on-line resizing required old_desc_blocks = 2048, new_desc_blocks = 2304 The filesystem on /dev/mapper/thin is now 4831837696 (4k) blocks long. [ 865.186308] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 912.091502] dm-4: detected capacity change from 34359738368 to 38654705664 [ 970.030550] dm-5: detected capacity change from 34359734272 to 38654701568 [ 1000.012751] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 1000.012878] EXT4-fs (dm-5): resized filesystem to 4294967296 === after === [ 129.104898] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 143.773630] dm-4: detected capacity change from 34359738368 to 38654705664 [ 198.203246] dm-5: detected capacity change from 34359734272 to 38654701568 [ 207.918603] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 207.918754] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 207.918758] EXT4-fs (dm-5): Converting file system to meta_bg [ 207.918790] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 221.454050] EXT4-fs (dm-5): resized to 4658298880 blocks [ 227.634613] EXT4-fs (dm-5): resized filesystem to 4831837696 Signed-off-by: Jerry Lee Link: https://lore.kernel.org/r/PU1PR04MB22635E739BD21150DC182AC6A18C9@PU1PR04MB2263.apcprd04.prod.outlook.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xibo.Wang --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bc870561e394..6db398ef0a65 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2084,7 +2084,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) goto out; } - if (ext4_blocks_count(es) == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out; err = ext4_alloc_flex_bg_array(sb, n_group + 1); -- Gitee