From ea5eb2d2098e7fc7db6870ceaee38a7ef2814903 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:41 +0800 Subject: [PATCH 01/74] mm/sharepool: Support read-only memory allocation Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S -------------------------------- When the driver uses the shared pool memory to share the memory with the user space, the user space is not allowed to operate this area. This prevents users from damaging sensitive data. When the sp_alloc and k2u processes apply for private memory, read-only memory can be applied for. Signed-off-by: Zhou Guanghui --- include/linux/share_pool.h | 3 ++- mm/share_pool.c | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6f294911c6af..5539a17da7a9 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -15,6 +15,7 @@ #define SP_HUGEPAGE_ONLY (1 << 1) #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) +#define SP_PROT_RO (1 << 16) #define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -24,7 +25,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS) #define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | \ + SP_SPEC_NODE_ID | SP_PROT_RO | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT)) diff --git a/mm/share_pool.c b/mm/share_pool.c index 76088952d0a5..99f25a551afc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2331,6 +2331,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, if (spg_node) prot = spg_node->prot; + if (ac->sp_flags & SP_PROT_RO) + prot = PROT_READ; + /* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(mmap_addr)) { @@ -2355,6 +2358,10 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, ret = -EINVAL; goto unmap; } + + if (ac->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + /* clean PTE_RDONLY flags or trigger SMMU event */ if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); @@ -2650,6 +2657,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; } + if (kc && kc->sp_flags & SP_PROT_RO) + prot = PROT_READ; + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); @@ -2662,6 +2672,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); + if (kc && kc->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + if (is_vm_hugetlb_page(vma)) { ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { @@ -2713,6 +2726,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; + struct sp_k2u_context kc; down_write(&sp_group_sem); stat = sp_init_process_stat(current, current->mm, spg_none); @@ -2731,8 +2745,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un } spa->kva = kva; - - uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL); + kc.sp_flags = sp_flags; + uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); -- Gitee From 3a676c85a551f92f78384a7ff8ce41109aaf7e75 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:42 +0800 Subject: [PATCH 02/74] mm/sharepool: Fix using uninitialized sp_flag Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- Add the missing initialization for kc.sp_flag in sp_make_share_kva_to_spg(). Or a random value would be used in sp_remap_kva_to_vma(). Signed-off-by: Wang Wensheng --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 99f25a551afc..db6ab098d403 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2787,7 +2787,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, } spa->kva = kva; - + kc.sp_flags = sp_flags; list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; kc.state = K2U_NORMAL; -- Gitee From 9ad6ac2facbc73d94011f26e9b02053fe2952fdc Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:46:43 +0800 Subject: [PATCH 03/74] mm/sharepool: use rwsem to protect sp group exit Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- Fix following situation: when the last process in a group exits, and a second process tries to add to this group. The second process may get a invalid spg. However the group's use_count is increased by 1, which caused the first process failed to free the group when it exits. And then second process called sp_group_drop --> free_sp_group and cause a double request of rwsem. Signed-off-by: Guo Mengqi Signed-off-by: Yang Yingliang --- mm/share_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index db6ab098d403..35edab122509 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -698,20 +698,25 @@ static void free_new_spg_id(bool new, int spg_id) free_sp_group_id(spg_id); } -static void free_sp_group(struct sp_group *spg) +static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); free_spg_stat(spg->id); - down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg->id); - up_write(&sp_group_sem); free_sp_group_id((unsigned int)spg->id); kfree(spg); system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); } +static void free_sp_group(struct sp_group *spg) +{ + down_write(&sp_group_sem); + free_sp_group_locked(spg); + up_write(&sp_group_sem); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -4453,14 +4458,15 @@ void sp_group_post_exit(struct mm_struct *mm) sp_proc_stat_drop(stat); } - /* lockless traverse */ + down_write(&sp_group_sem); list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; /* match with refcount inc in sp_group_add_task */ - sp_group_drop(spg); + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); kfree(spg_node); } - + up_write(&sp_group_sem); kfree(master); } -- Gitee From 66a528ba330290d1bca159e99fab53043ce9ee57 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:44 +0800 Subject: [PATCH 04/74] mm/sharepool: Allow share THP to kernel Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S -------------------------------------------------- This is not used for THP but the user page table is just like THP. The user alloc hugepages via a special driver and its vma is not marked with VM_HUGETLB. This commit allow to share those vma to kernel. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 1 + mm/share_pool.c | 44 +++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 5539a17da7a9..022e61bb6ce4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -179,6 +179,7 @@ struct sp_walk_data { unsigned long uva_aligned; unsigned long page_size; bool is_hugepage; + bool is_page_type_set; pmd_t *pmd; }; diff --git a/mm/share_pool.c b/mm/share_pool.c index 35edab122509..115200a1ee0d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3013,9 +3013,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { + struct page *page; struct sp_walk_data *sp_walk_data = walk->private; + /* + * There exist a scene in DVPP where the pagetable is huge page but its + * vma doesn't record it, something like THP. + * So we cannot make out whether it is a hugepage map until we access the + * pmd here. If mixed size of pages appear, just return an error. + */ + if (pmd_huge(*pmd)) { + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = true; + } else if (!sp_walk_data->is_hugepage) + return -EFAULT; + + /* To skip pte level walk */ + walk->action = ACTION_CONTINUE; + + page = pmd_page(*pmd); + get_page(page); + sp_walk_data->pages[sp_walk_data->page_count++] = page; + + return 0; + } + + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = false; + } else if (sp_walk_data->is_hugepage) + return -EFAULT; + sp_walk_data->pmd = pmd; + return 0; } @@ -3159,6 +3190,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk.pmd_entry = sp_pmd_entry; } + sp_walk_data->is_page_type_set = false; + sp_walk_data->page_count = 0; sp_walk_data->page_size = page_size; uva_aligned = ALIGN_DOWN(uva, page_size); sp_walk_data->uva_aligned = uva_aligned; @@ -3183,8 +3216,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned, &sp_walk, sp_walk_data); - if (ret) + if (ret) { + while (sp_walk_data->page_count--) + put_page(pages[sp_walk_data->page_count]); kvfree(pages); + sp_walk_data->pages = NULL; + } return ret; } @@ -3220,9 +3257,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); - struct sp_walk_data sp_walk_data = { - .page_count = 0, - }; + struct sp_walk_data sp_walk_data; struct vm_struct *area; check_interrupt_context(); @@ -3563,7 +3598,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; } - sp_walk_data->page_count = 0; down_write(&mm->mmap_lock); if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); -- Gitee From 72dec9f627e66b3c3e7572ef4df1de2e0c249dab Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:45 +0800 Subject: [PATCH 05/74] mm/sharepool: Delete single-group mode Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The single-group mode has no application scenario. Therefore, the related branch is deleted. The boot option "enable_sp_multi_group_mode" does not take effect. Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 137 +++++++++--------------------------------------- 1 file changed, 25 insertions(+), 112 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 115200a1ee0d..06c699dd6d3d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -62,9 +62,6 @@ #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10)) -#define SINGLE_GROUP_MODE 1 -#define MULTI_GROUP_MODE 2 - #define MAX_GROUP_FOR_SYSTEM 50000 #define MAX_GROUP_FOR_TASK 3000 #define MAX_PROC_PER_GROUP 1024 @@ -93,8 +90,6 @@ int sysctl_share_pool_map_lock_enable; int sysctl_sp_perf_k2u; int sysctl_sp_perf_alloc; -static int share_pool_group_mode = SINGLE_GROUP_MODE; - static int system_group_count; static unsigned int sp_device_number; @@ -1079,12 +1074,6 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) struct sp_group_master *master = mm->sp_group_master; bool exist = false; - if (share_pool_group_mode == SINGLE_GROUP_MODE && master && - master->count == 1) { - pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n"); - return -EEXIST; - } - master = sp_init_group_master_locked(mm, &exist); if (IS_ERR(master)) return PTR_ERR(master); @@ -2222,72 +2211,30 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE; - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - if (spg) { - if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) { - sp_group_drop(spg); - return -ENODEV; - } - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - if (enable_mdc_default_group) { - int ret = 0; - - ret = sp_group_add_task(current->tgid, spg_id); - if (ret < 0) { - pr_err_ratelimited("add group failed in pass through\n"); - return ret; - } - - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("pass through allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { - spg = spg_none; - } + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, can't find group\n"); + return -ENODEV; } - } else { - if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->pid, spg_id); - if (!spg) { - pr_err_ratelimited("allocation failed, can't find group\n"); - return -ENODEV; - } - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + } - if (!is_process_in_group(spg, current->mm)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, task not in group\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - spg = spg_none; + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; } + } else { /* alocation pass through scene */ + spg = spg_none; } if (sp_flags & SP_HUGEPAGE) { @@ -2902,33 +2849,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, kc->size_aligned = size_aligned; kc->sp_flags = sp_flags; kc->spg_id = spg_id; - kc->to_task = false; - return 0; -} - -static int sp_check_k2task(struct sp_k2u_context *kc) -{ - int ret = 0; - int spg_id = kc->spg_id; - - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - struct sp_group *spg = get_first_group(current->mm); + if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + kc->to_task = true; + else + kc->to_task = false; - if (!spg) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) - ret = -EINVAL; - else - kc->to_task = true; - } else { - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) - ret = -EINVAL; - sp_group_drop(spg); - } - } else { - if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) - kc->to_task = true; - } - return ret; + return 0; } static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) @@ -2973,12 +2899,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret); - ret = sp_check_k2task(&kc); - if (ret) { - uva = ERR_PTR(ret); - goto out; - } - if (kc.to_task) uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); else { @@ -3745,13 +3665,6 @@ static int __init enable_share_k2u_to_group(char *s) } __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); -static int __init enable_sp_multi_group_mode(char *s) -{ - share_pool_group_mode = MULTI_GROUP_MODE; - return 1; -} -__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode); - /*** Statistical and maintenance functions ***/ static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) -- Gitee From 689fb60fff868cb068b2c050fba5a233aaec8df5 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:46 +0800 Subject: [PATCH 06/74] mm/sharepool: Create global normal and dvpp mapping Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- struct sp_mapping is used to manage the address space of a shared pool. During the initialization of the shared pool, normal address spaces are created to allocate the memory of the current shared pool. Signed-off-by: Zhou Guanghui --- include/linux/share_pool.h | 18 +++++++++++++ mm/share_pool.c | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 022e61bb6ce4..654dc8cc2922 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -101,6 +101,17 @@ struct sp_proc_stat { atomic64_t k2u_size; }; +/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -142,6 +153,8 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; + struct sp_mapping *dvpp; + struct sp_mapping *normal; }; /* a per-process(per mm) struct which manages a sp_group_node list */ @@ -155,6 +168,11 @@ struct sp_group_master { struct list_head node_list; struct mm_struct *mm; struct sp_proc_stat *stat; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + struct sp_group *local; }; /* diff --git a/mm/share_pool.c b/mm/share_pool.c index 06c699dd6d3d..2589dab17096 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -125,6 +125,48 @@ static DECLARE_RWSEM(sp_spg_stat_sem); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat; +#define SP_MAPPING_DVPP 0x1 +#define SP_MAPPING_NORMAL 0x2 +static struct sp_mapping *sp_mapping_normal; + +static void sp_mapping_range_init(struct sp_mapping *spm) +{ + int i; + + for (i = 0; i < MAX_DEVID; i++) { + if (spm->flag & SP_MAPPING_NORMAL) { + spm->start[i] = MMAP_SHARE_POOL_START; + spm->end[i] = MMAP_SHARE_POOL_16G_START; + continue; + } + + if (!is_sp_dev_addr_enabled(i)) { + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_START; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + } else { + spm->start[i] = sp_dev_va_start[i]; + spm->end[i] = spm->start[i] + sp_dev_va_size[i]; + } + } +} + +static struct sp_mapping *sp_mapping_create(unsigned long flag) +{ + struct sp_mapping *spm; + + spm = kzalloc(sizeof(struct sp_mapping), GFP_KERNEL); + if (!spm) + return ERR_PTR(-ENOMEM); + + spm->flag = flag; + sp_mapping_range_init(spm); + atomic_set(&spm->user, 0); + spm->area_root = RB_ROOT; + + return spm; +} + /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) @@ -4442,12 +4484,22 @@ static void __init sp_device_number_detect(void) static int __init share_pool_init(void) { + if (!sp_is_enabled()) + return 0; + /* lockless, as init kthread has no sp operation else */ spg_none = create_spg(GROUP_NONE); /* without free spg_none, not a serious problem */ if (IS_ERR(spg_none) || !spg_none) goto fail; + sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); + if (IS_ERR(sp_mapping_normal)) { + sp_group_drop(spg_none); + goto fail; + } + atomic_inc(&sp_mapping_normal->user); + sp_device_number_detect(); proc_sharepool_init(); -- Gitee From 5975901c66941a5b52a3f7c4d3b9ad2a4ea2eb5b Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:47 +0800 Subject: [PATCH 07/74] mm/sharepool: Address space management for sp_group Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- Separately manage the normal and dvpp address spaces of the sp_group and set the normal and dvpp address spaces of the corresponding groups when adding a group, sp_alloc, and k2task. Signed-off-by: Zhou Guanghui --- include/linux/share_pool.h | 6 + mm/share_pool.c | 299 +++++++++++++++++++++++++++++-------- 2 files changed, 239 insertions(+), 66 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 654dc8cc2922..92cc1ffa3946 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -39,6 +39,8 @@ #define SPG_ID_AUTO_MIN 100000 #define SPG_ID_AUTO_MAX 199999 #define SPG_ID_AUTO 200000 /* generate group id automatically */ +#define SPG_ID_LOCAL_MIN 200001 +#define SPG_ID_LOCAL_MAX 299999 #define MAX_DEVID 8 /* the max num of Da-vinci devices */ @@ -110,6 +112,10 @@ struct sp_mapping { unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; }; /* Processes in the same sp_group can share memory. diff --git a/mm/share_pool.c b/mm/share_pool.c index 2589dab17096..bff066611ade 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -142,8 +142,8 @@ static void sp_mapping_range_init(struct sp_mapping *spm) if (!is_sp_dev_addr_enabled(i)) { spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_START; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } else { spm->start[i] = sp_dev_va_start[i]; spm->end[i] = spm->start[i] + sp_dev_va_size[i]; @@ -167,10 +167,91 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) return spm; } +static void sp_mapping_destroy(struct sp_mapping *spm) +{ + kfree(spm); +} + +static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) +{ + atomic_inc(&spm->user); + if (spm->flag & SP_MAPPING_DVPP) + spg->dvpp = spm; + else if (spm->flag & SP_MAPPING_NORMAL) + spg->normal = spm; +} + +static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) +{ + if (spm && atomic_dec_and_test(&spm->user)) + sp_mapping_destroy(spm); +} + +/* + * When you set the address space of a group, the normal address space + * is globally unified. When processing the DVPP address space, consider + * the following situations: + * 1. If a process is added to a non-new group, the DVPP address space + * must have been created. If the local group of the process also + * contains the DVPP address space and they are different, this + * scenario is not allowed to avoid address conflict. + * 2. If the DVPP address space does not exist in the local group of the + * process, attach the local group of the process to the DVPP address + * space of the group. + * 3. Add a new group. If the process has applied for the dvpp address + * space (sp_alloc or k2u), attach the new group to the dvpp address + * space of the current process. + * 4. If the process has not applied for the DVPP address space, attach + * the new group and the local group of the current process to the + * newly created DVPP address space. + * + * the caller must hold sp_group_sem + */ +static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_master *master = mm->sp_group_master; + struct sp_group *local = master->local; + struct sp_mapping *spm; + + if (!list_empty(&spg->procs)) { + /* 1 */ + if (local->dvpp && local->dvpp != spg->dvpp) { + pr_info_ratelimited("Duplicate address space, id=%d\n", + spg->id); + return 0; + } + + /* 2 */ + if (!local->dvpp) { + sp_mapping_attach(local, spg->dvpp); + sp_mapping_attach(local, spg->normal); + } + } else { + /* 4 */ + if (!local->dvpp) { + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(local, spm); + sp_mapping_attach(local, sp_mapping_normal); + } + + /* 3 */ + sp_mapping_attach(spg, local->dvpp); + sp_mapping_attach(spg, sp_mapping_normal); + } + + return 0; +} + +static struct sp_group *create_spg(int spg_id); +static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { + int spg_id; + struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master; if (master) { @@ -182,16 +263,92 @@ static struct sp_group_master *sp_init_group_master_locked( if (master == NULL) return ERR_PTR(-ENOMEM); + spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, + SPG_ID_LOCAL_MAX, GFP_ATOMIC); + if (spg_id < 0) { + kfree(master); + pr_err_ratelimited("generate local group id failed %d\n", spg_id); + return ERR_PTR(spg_id); + } + + spg = create_spg(spg_id); + if (IS_ERR(spg)) { + free_new_spg_id(true, spg_id); + kfree(master); + return (struct sp_group_master *)spg; + } + INIT_LIST_HEAD(&master->node_list); master->count = 0; master->stat = NULL; master->mm = mm; + master->local = spg; mm->sp_group_master = master; *exist = false; return master; } +static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; +} + +/* + * If the process is added to a group first, the address space of the local + * group of the process must have been set. If the process is not added to + * a group, directly create or attach the process to the corresponding DVPP + * and normal address space. + */ +static int sp_mapping_group_setup_local(struct mm_struct *mm) +{ + struct sp_group_master *master; + struct sp_mapping *spm; + bool exist = false; + + master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return PTR_ERR(master); + + if (master->local->dvpp) + return 0; + + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal); + + return 0; +} + +static struct sp_group *sp_get_local_group(struct mm_struct *mm) +{ + int ret; + struct sp_group_master *master; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (master && master->local) { + atomic_inc(&master->local->use_count); + up_read(&sp_group_sem); + return master->local; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(mm); + if (ret) { + up_write(&sp_group_sem); + return ERR_PTR(ret); + } + master = mm->sp_group_master; + atomic_inc(&master->local->use_count); + up_write(&sp_group_sem); + + return master->local; +} + static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) { struct sp_proc_stat *stat; @@ -575,7 +732,7 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spg_none->stat); + update_spg_stat_k2u(size, true, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; @@ -598,7 +755,7 @@ static void spa_inc_usage(struct sp_area *spa) spa_stat.total_num += 1; spa_stat.total_size += size; - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_inc(&sp_overall_stat.spa_total_num); atomic64_add(size, &sp_overall_stat.spa_total_size); } @@ -621,7 +778,7 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spg_none->stat); + update_spg_stat_k2u(size, false, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; @@ -640,7 +797,7 @@ static void spa_dec_usage(struct sp_area *spa) spa_stat.total_num -= 1; spa_stat.total_size -= size; - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_dec(&sp_overall_stat.spa_total_num); atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); } @@ -725,7 +882,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, static void free_sp_group_id(int spg_id) { /* ida operation is protected by an internal spin_lock */ - if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) + if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) || + (spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX)) ida_free(&sp_group_id_ida, spg_id); } @@ -742,8 +900,11 @@ static void free_sp_group_locked(struct sp_group *spg) free_spg_stat(spg->id); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); + sp_mapping_detach(spg, spg->dvpp); + sp_mapping_detach(spg, spg->normal); + if (!is_local_group(spg->id)) + system_group_count--; kfree(spg); - system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); } @@ -992,7 +1153,8 @@ static struct sp_group *create_spg(int spg_id) struct user_struct *user = NULL; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; - if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { + if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && + !is_local_group(spg_id))) { pr_err_ratelimited("reach system max group num\n"); return ERR_PTR(-ENOSPC); } @@ -1039,7 +1201,8 @@ static struct sp_group *create_spg(int spg_id) if (ret < 0) goto out_fput_all; - system_group_count++; + if (!is_local_group(spg_id)) + system_group_count++; return spg; out_fput_all: @@ -1322,6 +1485,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) if (ret) goto out_drop_group; + ret = sp_mapping_group_setup(mm, spg); + if (ret) + goto out_drop_group; + node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { ret = PTR_ERR(node); @@ -1603,7 +1770,6 @@ static void __insert_sp_area(struct sp_area *spa) /* The sp_area cache globals are protected by sp_area_lock */ static struct rb_node *free_sp_area_cache; -static unsigned long cached_hole_size; static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ /** @@ -1622,11 +1788,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, { struct sp_area *spa, *first, *err; struct rb_node *n; - unsigned long vstart = MMAP_SHARE_POOL_START; - unsigned long vend = MMAP_SHARE_POOL_16G_START; + unsigned long vstart; + unsigned long vend; unsigned long addr; unsigned long size_align = ALIGN(size, PMD_SIZE); /* va aligned to 2M */ int device_id, node_id; + struct sp_mapping *mapping; device_id = sp_flags_device_id(flags); node_id = flags & SP_SPEC_NODE_ID ? sp_flags_node_id(flags) : device_id; @@ -1636,17 +1803,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); } - if ((flags & SP_DVPP)) { - if (!is_sp_dev_addr_enabled(device_id)) { - vstart = MMAP_SHARE_POOL_16G_START + - device_id * MMAP_SHARE_POOL_16G_SIZE; - vend = vstart + MMAP_SHARE_POOL_16G_SIZE; - } else { - vstart = sp_dev_va_start[device_id]; - vend = vstart + sp_dev_va_size[device_id]; - } - } + if (flags & SP_DVPP) + mapping = spg->dvpp; + else + mapping = spg->normal; + vstart = mapping->start[device_id]; + vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); if (unlikely(!spa)) return ERR_PTR(-ENOMEM); @@ -1662,18 +1825,18 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, * Note that sp_free_area may update free_sp_area_cache * without updating cached_hole_size. */ - if (!free_sp_area_cache || size_align < cached_hole_size || - vstart != cached_vstart) { - cached_hole_size = 0; - free_sp_area_cache = NULL; + if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || + vstart != mapping->cached_vstart) { + mapping->cached_hole_size = 0; + mapping->free_area_cache = NULL; } /* record if we encounter less permissive parameters */ - cached_vstart = vstart; + mapping->cached_vstart = vstart; /* find starting point for our search */ - if (free_sp_area_cache) { - first = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + if (mapping->free_area_cache) { + first = rb_entry(mapping->free_area_cache, struct sp_area, rb_node); addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1686,7 +1849,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, goto error; } - n = sp_area_root.rb_node; + n = mapping->area_root.rb_node; first = NULL; while (n) { @@ -1708,8 +1871,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* from the starting point, traverse areas until a suitable hole is found */ while (addr + size_align > first->va_start && addr + size_align <= vend) { - if (addr + cached_hole_size < first->va_start) - cached_hole_size = first->va_start - addr; + if (addr + mapping->cached_hole_size < first->va_start) + mapping->cached_hole_size = first->va_start - addr; addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1747,9 +1910,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa_inc_usage(spa); __insert_sp_area(spa); - free_sp_area_cache = &spa->rb_node; - if (spa->spg != spg_none) - list_add_tail(&spa->link, &spg->spa_list); + mapping->free_area_cache = &spa->rb_node; + list_add_tail(&spa->link, &spg->spa_list); spin_unlock(&sp_area_lock); @@ -1840,8 +2002,7 @@ static void sp_free_area(struct sp_area *spa) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); spa_dec_usage(spa); - if (spa->spg != spg_none) - list_del(&spa->link); + list_del(&spa->link); rb_erase(&spa->rb_node, &sp_area_root); RB_CLEAR_NODE(&spa->rb_node); @@ -2001,7 +2162,7 @@ static void sp_fallocate(struct sp_area *spa) static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { down_read(&spa->spg->rw_lock); __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); sp_fallocate(spa); @@ -2206,7 +2367,6 @@ static void trace_sp_alloc_begin(struct sp_alloc_context *ac) static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) { unsigned long cost; - bool is_pass_through = ac->spg == spg_none ? true : false; if (!sysctl_sp_perf_alloc) return; @@ -2218,7 +2378,8 @@ static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) if (cost >= (unsigned long)sysctl_sp_perf_alloc) { pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through); + va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, + is_local_group(ac->spg->id)); } } @@ -2276,7 +2437,9 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -ENODEV; } } else { /* alocation pass through scene */ - spg = spg_none; + spg = sp_get_local_group(current->mm); + if (IS_ERR(spg)) + return PTR_ERR(spg); } if (sp_flags & SP_HUGEPAGE) { @@ -2299,7 +2462,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) __sp_free(spa->spg, spa->va_start, spa->real_size, mm); } @@ -2364,7 +2527,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret; unmap: - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); else sp_munmap(mm, spa->va_start, spa->real_size); @@ -2467,8 +2630,9 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (spa->spg != spg_none) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + if (!is_local_group(spa->spg->id)) + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, + spg_node); else sp_munmap(mm, spa->va_start, spa->real_size); @@ -2493,7 +2657,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node; - if (spa->spg == spg_none) { + if (is_local_group(spa->spg->id)) { ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); } else { /* create mapping for each process in the group */ @@ -2517,10 +2681,9 @@ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg; - bool is_pass_through = spg == spg_none ? true : false; - /* match sp_alloc_check_prepare */ - if (!is_pass_through) + /* match sp_alloc_prepare */ + if (!is_local_group(spg->id)) up_read(&spg->rw_lock); if (!result) @@ -2532,9 +2695,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, trace_sp_alloc_finish(ac, spa->va_start); } - if (!is_pass_through) - sp_group_drop(spg); - + sp_group_drop(spg); sp_dump_stack(); sp_try_to_compact(); } @@ -2716,22 +2877,33 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, */ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) { + int ret; void *uva; struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; struct sp_k2u_context kc; + struct sp_group *spg; down_write(&sp_group_sem); - stat = sp_init_process_stat(current, current->mm, spg_none); - up_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); + return ERR_PTR(ret); + } + + spg = current->mm->sp_group_master->local; + stat = sp_init_process_stat(current, current->mm, spg); if (IS_ERR(stat)) { + up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init process stat failed %lx\n", PTR_ERR(stat)); return stat; } + up_write(&sp_group_sem); - spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); + spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -3926,7 +4098,7 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock); - if (spa->spg == spg_none) /* k2u to task */ + if (is_local_group(spa->spg->id)) /* k2u to task */ seq_printf(seq, "%-10s ", "None"); else { down_read(&spa->spg->rw_lock); @@ -4456,6 +4628,9 @@ void sp_group_post_exit(struct mm_struct *mm) kfree(spg_node); } up_write(&sp_group_sem); + + if (master->local) + sp_group_drop(master->local); kfree(master); } @@ -4487,17 +4662,9 @@ static int __init share_pool_init(void) if (!sp_is_enabled()) return 0; - /* lockless, as init kthread has no sp operation else */ - spg_none = create_spg(GROUP_NONE); - /* without free spg_none, not a serious problem */ - if (IS_ERR(spg_none) || !spg_none) - goto fail; - sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); - if (IS_ERR(sp_mapping_normal)) { - sp_group_drop(spg_none); + if (IS_ERR(sp_mapping_normal)) goto fail; - } atomic_inc(&sp_mapping_normal->user); sp_device_number_detect(); -- Gitee From 673ecc617965f1d37f05eda7be07247b33f0b45a Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:48 +0800 Subject: [PATCH 08/74] mm/sharepool: Add an interface to obtain an id Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The DVPP address space is per process or per sharing group. During sp_free and unshare, you need to know which address space the current address belongs to. Signed-off-by: Zhou Guanghui --- include/linux/share_pool.h | 12 ++++++++++++ mm/share_pool.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 92cc1ffa3946..4e282b4122a3 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -283,6 +283,9 @@ extern bool mg_is_sharepool_addr(unsigned long addr); extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_add_task(int pid, int spg_id); +extern int sp_id_of_current(void); +extern int mg_sp_id_of_current(void); + extern void sp_area_drop(struct vm_area_struct *vma); extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); @@ -430,6 +433,15 @@ static inline int mg_sp_unshare(unsigned long va, unsigned long size) return -EPERM; } +static inline int sp_id_of_current(void) +{ + return -EPERM; +} + +static inline int mg_sp_id_of_current(void) +{ + return -EPERM; +} static inline void sp_init_mm(struct mm_struct *mm) { diff --git a/mm/share_pool.c b/mm/share_pool.c index bff066611ade..403e86e29ecb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1745,6 +1745,43 @@ int sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_del_task); +int sp_id_of_current(void) +{ + int ret, spg_id; + struct sp_group_master *master; + + if (current->flags & PF_KTHREAD || !current->mm) + return -EINVAL; + + down_read(&sp_group_sem); + master = current->mm->sp_group_master; + if (master && master->local) { + spg_id = master->local->id; + up_read(&sp_group_sem); + return spg_id; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + return ret; + } + master = current->mm->sp_group_master; + spg_id = master->local->id; + up_write(&sp_group_sem); + + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); +} +EXPORT_SYMBOL_GPL(mg_sp_id_of_current); + /* the caller must hold sp_area_lock */ static void __insert_sp_area(struct sp_area *spa) { -- Gitee From e03ab55a4b8dd907909821e616e464b246dfd3ab Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:49 +0800 Subject: [PATCH 09/74] mm/sharepool: Release the sp addr based on the id Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The address space of the DVPP is managed by group. When releasing the shared pool memory, you need to find the corresponding address space based on the ID. Signed-off-by: Zhou Guanghui --- include/linux/share_pool.h | 12 +- mm/share_pool.c | 235 ++++++++++++++++++------------------- 2 files changed, 122 insertions(+), 125 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 4e282b4122a3..7b536b30907d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -249,8 +249,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); -extern int sp_free(unsigned long addr); -extern int mg_sp_free(unsigned long addr); +extern int sp_free(unsigned long addr, int id); +extern int mg_sp_free(unsigned long addr, int id); extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); @@ -261,7 +261,7 @@ extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); extern int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); @@ -391,12 +391,12 @@ static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int return NULL; } -static inline int sp_free(unsigned long addr) +static inline int sp_free(unsigned long addr, int id) { return -EPERM; } -static inline int mg_sp_free(unsigned long addr) +static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } @@ -428,7 +428,7 @@ static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int return -EPERM; } -static inline int mg_sp_unshare(unsigned long va, unsigned long size) +static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } diff --git a/mm/share_pool.c b/mm/share_pool.c index 403e86e29ecb..57a004674970 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -637,12 +637,6 @@ static void free_spg_stat(int spg_id) kfree(stat); } -/* - * Group '0' for k2u_task and pass through. No process will be actually - * added to. - */ -static struct sp_group *spg_none; - /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -939,26 +933,6 @@ static int get_task(int pid, struct task_struct **task) return 0; } -static struct sp_group *get_first_group(struct mm_struct *mm) -{ - struct sp_group *spg = NULL; - struct sp_group_master *master = mm->sp_group_master; - - if (master && master->count >= 1) { - struct sp_group_node *spg_node = NULL; - - spg_node = list_first_entry(&master->node_list, - struct sp_group_node, group_node); - spg = spg_node->spg; - - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - spg = NULL; - } - - return spg; -} - /* * the caller must: * 1. hold spg->rw_lock @@ -983,35 +957,27 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) struct task_struct *tsk = NULL; int ret = 0; - ret = get_task(pid, &tsk); - if (ret) - return NULL; - if (spg_id == SPG_ID_DEFAULT) { - /* - * Once we encounter a concurrency problem here. - * To fix it, we believe get_task_mm() and mmput() is too - * heavy because we just get the pointer of sp_group. - */ + ret = get_task(pid, &tsk); + if (ret) + return NULL; + task_lock(tsk); if (tsk->mm == NULL) spg = NULL; - else - spg = get_first_group(tsk->mm); + else if (tsk->mm->sp_group_master) + spg = tsk->mm->sp_group_master->local; task_unlock(tsk); + + put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - goto fail; } - put_task_struct(tsk); - return spg; + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + return NULL; -fail: - put_task_struct(tsk); - return NULL; + return spg; } static struct sp_group *__sp_find_spg(int pid, int spg_id) @@ -1783,9 +1749,9 @@ int mg_sp_id_of_current(void) EXPORT_SYMBOL_GPL(mg_sp_id_of_current); /* the caller must hold sp_area_lock */ -static void __insert_sp_area(struct sp_area *spa) +static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) { - struct rb_node **p = &sp_area_root.rb_node; + struct rb_node **p = &spm->area_root.rb_node; struct rb_node *parent = NULL; while (*p) { @@ -1802,13 +1768,9 @@ static void __insert_sp_area(struct sp_area *spa) } rb_link_node(&spa->rb_node, parent, p); - rb_insert_color(&spa->rb_node, &sp_area_root); + rb_insert_color(&spa->rb_node, &spm->area_root); } -/* The sp_area cache globals are protected by sp_area_lock */ -static struct rb_node *free_sp_area_cache; -static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ - /** * sp_alloc_area() - Allocate a region of VA from the share pool. * @size: the size of VA to allocate. @@ -1856,10 +1818,10 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* * Invalidate cache if we have more permissive parameters. * cached_hole_size notes the largest hole noticed _below_ - * the sp_area cached in free_sp_area_cache: if size fits + * the sp_area cached in free_area_cache: if size fits * into that hole, we want to scan from vstart to reuse - * the hole instead of allocating above free_sp_area_cache. - * Note that sp_free_area may update free_sp_area_cache + * the hole instead of allocating above free_area_cache. + * Note that sp_free_area may update free_area_cache * without updating cached_hole_size. */ if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || @@ -1946,7 +1908,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->device_id = device_id; spa_inc_usage(spa); - __insert_sp_area(spa); + __insert_sp_area(mapping, spa); mapping->free_area_cache = &spa->rb_node; list_add_tail(&spa->link, &spg->spa_list); @@ -1961,9 +1923,15 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, } /* the caller should hold sp_area_lock */ -static struct sp_area *__find_sp_area_locked(unsigned long addr) +static struct sp_area *__find_sp_area_locked(struct sp_group *spg, + unsigned long addr) { - struct rb_node *n = sp_area_root.rb_node; + struct rb_node *n; + + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + n = spg->normal->area_root.rb_node; + else + n = spg->dvpp->area_root.rb_node; while (n) { struct sp_area *spa; @@ -1981,12 +1949,12 @@ static struct sp_area *__find_sp_area_locked(unsigned long addr) return NULL; } -static struct sp_area *__find_sp_area(unsigned long addr) +static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr) { struct sp_area *n; spin_lock(&sp_area_lock); - n = __find_sp_area_locked(addr); + n = __find_sp_area_locked(spg, addr); if (n) atomic_inc(&n->use_count); spin_unlock(&sp_area_lock); @@ -2011,22 +1979,30 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) */ static void sp_free_area(struct sp_area *spa) { + unsigned long addr = spa->va_start; + struct sp_mapping *spm; + lockdep_assert_held(&sp_area_lock); - if (free_sp_area_cache) { + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + spm = spa->spg->normal; + else + spm = spa->spg->dvpp; + + if (spm->free_area_cache) { struct sp_area *cache; - cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); if (spa->va_start <= cache->va_start) { - free_sp_area_cache = rb_prev(&spa->rb_node); + spm->free_area_cache = rb_prev(&spa->rb_node); /* * the new cache node may be changed to another region, * i.e. from DVPP region to normal region */ - if (free_sp_area_cache) { - cache = rb_entry(free_sp_area_cache, + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - cached_vstart = cache->region_vstart; + spm->cached_vstart = cache->region_vstart; } /* * We don't try to update cached_hole_size, @@ -2041,7 +2017,7 @@ static void sp_free_area(struct sp_area *spa) spa_dec_usage(spa); list_del(&spa->link); - rb_erase(&spa->rb_node, &sp_area_root); + rb_erase(&spa->rb_node, &spm->area_root); RB_CLEAR_NODE(&spa->rb_node); kfree(spa); } @@ -2083,7 +2059,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_start); + spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); __sp_area_drop_locked(spa); spin_unlock(&sp_area_lock); } @@ -2215,7 +2191,7 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0; down_read(&spg->rw_lock); - if (!is_process_in_group(spg, mm)) + if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); return ret; @@ -2228,6 +2204,7 @@ struct sp_free_context { unsigned long addr; struct sp_area *spa; int state; + int spg_id; }; /* when success, __sp_area_drop(spa) should be used */ @@ -2236,10 +2213,18 @@ static int sp_free_get_spa(struct sp_free_context *fc) int ret = 0; unsigned long addr = fc->addr; struct sp_area *spa; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, fc->spg_id); + if (!spg) { + pr_debug("sp free get group failed %d\n", fc->spg_id); + return -EINVAL; + } fc->state = FREE_CONT; - spa = __find_sp_area(addr); + spa = __find_sp_area(spg, addr); + sp_group_drop(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); return -EINVAL; @@ -2252,46 +2237,37 @@ static int sp_free_get_spa(struct sp_free_context *fc) } fc->spa = spa; - if (spa->spg != spg_none) { - /* - * Access control: an sp addr can only be freed by - * 1. another task in the same spg - * 2. a kthread - * - * a passthrough addr can only be freed by the applier process - */ - if (!current->mm) - goto check_spa; + if (!current->mm) + goto check_spa; - ret = sp_check_caller_permission(spa->spg, current->mm); - if (ret < 0) - goto drop_spa; + ret = sp_check_caller_permission(spa->spg, current->mm); + if (ret < 0) + goto drop_spa; check_spa: - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - fc->state = FREE_END; - up_write(&spa->spg->rw_lock); - goto drop_spa; - /* we must return success(0) in this situation */ - } - /* the life cycle of spa has a direct relation with sp group */ - if (unlikely(spa->is_dead)) { - up_write(&spa->spg->rw_lock); - pr_err_ratelimited("unexpected double sp free\n"); - dump_stack(); - ret = -EINVAL; - goto drop_spa; - } - spa->is_dead = true; - up_write(&spa->spg->rw_lock); + if (is_local_group(spa->spg->id) && (current->tgid != spa->applier)) { + ret = -EPERM; + goto drop_spa; + } - } else { - if (current->tgid != spa->applier) { - ret = -EPERM; - goto drop_spa; - } + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + /* we must return success(0) in this situation */ + } + /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp free\n"); + dump_stack(); + ret = -EINVAL; + goto drop_spa; } + spa->is_dead = true; + up_write(&spa->spg->rw_lock); + return 0; drop_spa: @@ -2302,21 +2278,26 @@ static int sp_free_get_spa(struct sp_free_context *fc) /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. + * @id: Address space identifier, which is used to distinguish the addr. * * Return: * * 0 - success. * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { .addr = addr, + .spg_id = id, }; check_interrupt_context(); + if (current->flags & PF_KTHREAD) + return -EINVAL; + ret = sp_free_get_spa(&fc); if (ret || fc.state == FREE_END) goto out; @@ -2337,9 +2318,9 @@ int sp_free(unsigned long addr) } EXPORT_SYMBOL_GPL(sp_free); -int mg_sp_free(unsigned long addr) +int mg_sp_free(unsigned long addr, int id) { - return sp_free(addr); + return sp_free(addr, id); } EXPORT_SYMBOL_GPL(mg_sp_free); @@ -2433,6 +2414,11 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (enable_mdc_default_group) spg_id = mdc_default_group_id; + if (current->flags & PF_KTHREAD) { + pr_err_ratelimited("allocation failed, task is kthread\n"); + return -EINVAL; + } + if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages())) { pr_err_ratelimited("allocation failed, invalid size %lu\n", size); return -EINVAL; @@ -2473,7 +2459,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } - } else { /* alocation pass through scene */ + } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); @@ -3504,7 +3490,7 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); * * This also means we must trust DVPP channel destroy and guard worker code. */ -static int sp_unshare_uva(unsigned long uva, unsigned long size) +static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) { int ret = 0; struct mm_struct *mm; @@ -3512,14 +3498,21 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, group_id); + if (!spg) { + pr_debug("sp unshare find group failed %d\n", group_id); + return -EINVAL; + } /* * at first we guess it's a hugepage addr * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u */ - spa = __find_sp_area(ALIGN_DOWN(uva, PMD_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); @@ -3650,6 +3643,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) out_drop_area: __sp_area_drop(spa); out: + sp_group_drop(spg); return ret; } @@ -3713,9 +3707,12 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) check_interrupt_context(); + if (current->flags & PF_KTHREAD) + return -EINVAL; + if (va < TASK_SIZE) { /* user address */ - ret = sp_unshare_uva(va, size); + ret = sp_unshare_uva(va, size, spg_id); } else if (va >= PAGE_OFFSET) { /* kernel address */ ret = sp_unshare_kva(va, size); @@ -3729,9 +3726,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_unshare); -int mg_sp_unshare(unsigned long va, unsigned long size) +int mg_sp_unshare(unsigned long va, unsigned long size, int id) { - return sp_unshare(va, size, 0, 0); + return sp_unshare(va, size, 0, id); } EXPORT_SYMBOL_GPL(mg_sp_unshare); @@ -3891,8 +3888,8 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id; - if (vma) { - spa = __find_sp_area(vma->vm_start); + if (vma && vma->vm_flags & VM_SHARE_POOL) { + spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); if (spa) { node_id = spa->node_id; __sp_area_drop(spa); @@ -4058,7 +4055,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) seq_puts(seq, "R"); else if (prot == (PROT_READ | PROT_WRITE)) seq_puts(seq, "RW"); - else /* e.g. spg_none */ + else seq_puts(seq, "-"); } @@ -4459,7 +4456,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa; - spa = __find_sp_area(vma->vm_start); + spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; -- Gitee From 3eee3114d4e9cf459ec67c633ceff707a40877d7 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:46:50 +0800 Subject: [PATCH 10/74] mm/sharepool: Share pool statistics adaption Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The management of the address space is adjusted, and the statistical data processing of the shared pool needs to be adapted. Signed-off-by: Zhou Guanghui Signed-off-by: Zhang Jian --- mm/share_pool.c | 69 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 57a004674970..de9abb9ca97d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -694,7 +694,6 @@ struct sp_area { int device_id; }; static DEFINE_SPINLOCK(sp_area_lock); -static struct rb_root sp_area_root = RB_ROOT; static unsigned long spa_size(struct sp_area *spa) { @@ -4117,14 +4116,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, return 0; } -static void rb_spa_stat_show(struct seq_file *seq) +static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *spm) { struct rb_node *node; struct sp_area *spa, *prev = NULL; spin_lock(&sp_area_lock); - - for (node = rb_first(&sp_area_root); node; node = rb_next(node)) { + for (node = rb_first(&spm->area_root); node; node = rb_next(node)) { __sp_area_drop_locked(prev); spa = rb_entry(node, struct sp_area, rb_node); @@ -4132,16 +4130,12 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock); - if (is_local_group(spa->spg->id)) /* k2u to task */ - seq_printf(seq, "%-10s ", "None"); - else { - down_read(&spa->spg->rw_lock); - if (spg_valid(spa->spg)) /* k2u to group */ - seq_printf(seq, "%-10d ", spa->spg->id); - else /* spg is dead */ - seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock); - } + down_read(&spa->spg->rw_lock); + if (spg_valid(spa->spg)) /* k2u to group */ + seq_printf(seq, "%-10d ", spa->spg->id); + else /* spg is dead */ + seq_printf(seq, "%-10s ", "Dead"); + up_read(&spa->spg->rw_lock); seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, @@ -4177,6 +4171,30 @@ static void rb_spa_stat_show(struct seq_file *seq) spin_unlock(&sp_area_lock); } +static void spa_normal_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_normal); +} + +static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) +{ + struct sp_group *spg = p; + struct seq_file *seq = data; + + if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) + spa_stat_of_mapping_show(seq, spg->dvpp); + + return 0; +} + +static void spa_dvpp_stat_show(struct seq_file *seq) +{ + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); + up_read(&sp_group_sem); +} + + void spa_overview_show(struct seq_file *seq) { unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num; @@ -4230,12 +4248,11 @@ static int idr_spg_stat_cb(int id, void *p, void *data) struct sp_spg_stat *s = p; struct seq_file *seq = data; - if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + if (is_local_group(id) && atomic64_read(&s->size) == 0) + return 0; + if (seq != NULL) { + seq_printf(seq, "Group %6d ", id); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4243,11 +4260,7 @@ static int idr_spg_stat_cb(int id, void *p, void *data) byte2kb(atomic64_read(&s->alloc_nsize)), byte2kb(atomic64_read(&s->alloc_hsize))); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); - + pr_info("Group %6d ", id); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4291,7 +4304,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); - rb_spa_stat_show(seq); + spa_normal_stat_show(seq); + spa_dvpp_stat_show(seq); return 0; } @@ -4328,10 +4342,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) prot = get_process_prot_locked(id, mm); seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat), -- Gitee From b6dc2f88bab23305713ddb0a1a3fe5ba8f7ce58d Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:51 +0800 Subject: [PATCH 11/74] mm/sharepool: Use vm_private_data to store the spa Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- When we destroy a vma, we first find the spa depending on the vma->vm_start, during which we should hold the sp_area_lock. While we store the spa in vma, we can get the spa directly. Don't worry if the spa exists or if it's to be freed soon, since we have increaced the refcount for the spa when it's mappend into a vma. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index de9abb9ca97d..45d967a7d142 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -850,7 +850,7 @@ static inline bool check_aoscore_process(struct task_struct *tsk) static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot); + unsigned long prot, struct vm_area_struct **pvma); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size); #define K2U_NORMAL 0 @@ -1515,7 +1515,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; } - addr = sp_mmap(mm, file, spa, &populate, prot); + addr = sp_mmap(mm, file, spa, &populate, prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -2045,8 +2045,6 @@ static void __sp_area_drop(struct sp_area *spa) void sp_area_drop(struct vm_area_struct *vma) { - struct sp_area *spa; - if (!(vma->vm_flags & VM_SHARE_POOL)) return; @@ -2058,8 +2056,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); - __sp_area_drop_locked(spa); + __sp_area_drop_locked(vma->vm_private_data); spin_unlock(&sp_area_lock); } @@ -2326,7 +2323,7 @@ EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot) + unsigned long prot, struct vm_area_struct **pvma) { unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); @@ -2334,6 +2331,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; + struct vm_area_struct *vma; /* Mark the mapped region to be locked. After the MAP_LOCKED is enable, * multiple tasks will preempt resources, causing performance loss. @@ -2349,8 +2347,13 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, pr_err("do_mmap fails %ld\n", addr); } else { BUG_ON(addr != spa->va_start); + vma = find_vma(mm, addr); + vma->vm_private_data = spa; + if (pvma) + *pvma = vma; } + return addr; } @@ -2495,7 +2498,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; /* pass through default permission */ unsigned long prot = PROT_READ | PROT_WRITE; - unsigned long sp_addr = spa->va_start; unsigned long populate = 0; struct vm_area_struct *vma; @@ -2514,7 +2516,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, prot = PROT_READ; /* when success, mmap_addr == spa->va_start */ - mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_lock); sp_alloc_unmap(mm, spa, spg_node); @@ -2530,14 +2532,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, } ac->populate = populate; - vma = find_vma(mm, sp_addr); - if (unlikely(!vma)) { - up_write(&mm->mmap_lock); - WARN(1, "allocation failed, can't find %lx vma\n", sp_addr); - ret = -EINVAL; - goto unmap; - } - if (ac->sp_flags & SP_PROT_RO) vma->vm_flags &= ~VM_MAYWRITE; @@ -2837,15 +2831,12 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (kc && kc->sp_flags & SP_PROT_RO) prot = PROT_READ; - ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); goto put_mm; } - BUG_ON(ret_addr != spa->va_start); - vma = find_vma(mm, ret_addr); - BUG_ON(vma == NULL); if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); @@ -3887,12 +3878,9 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id; - if (vma && vma->vm_flags & VM_SHARE_POOL) { - spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); - if (spa) { - node_id = spa->node_id; - __sp_area_drop(spa); - } + if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) { + spa = vma->vm_private_data; + node_id = spa->node_id; } return node_id; @@ -4467,13 +4455,12 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa; - spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); + spa = vma->vm_private_data; if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; } node_id = spa->node_id; - __sp_area_drop(spa); retry: page = find_lock_page(mapping, idx); -- Gitee From 081cc192db47cb4ee2f0db33b11f7e5ae24d24de Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:52 +0800 Subject: [PATCH 12/74] mm/sharepool: Unify the memory allocation process Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- There are two types of memory allocated from sharepool: passthrough memory for DVPP and shared memory. Currently, we branch to different routines depending on the memory type, both during the allocation and free process. Since we have already create a local group for passthrough memory, with just one step ahead, we could drop the redundant branches in allocation and free process and in all the fallback process when an error occurs. Here is the content of this patch: 1. Add erery process to its local group when initilizing its group_master. 2. Avoid to return the local group in find_sp_group_id_by_pid(). 3. Delete the redundant branches during allocation and free process. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 140 ++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 53 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 45d967a7d142..970fe76b4972 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -244,13 +244,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; } +static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { - int spg_id; + int spg_id, ret; struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master; @@ -266,16 +268,15 @@ static struct sp_group_master *sp_init_group_master_locked( spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { - kfree(master); pr_err_ratelimited("generate local group id failed %d\n", spg_id); - return ERR_PTR(spg_id); + ret = spg_id; + goto free_master; } spg = create_spg(spg_id); if (IS_ERR(spg)) { - free_new_spg_id(true, spg_id); - kfree(master); - return (struct sp_group_master *)spg; + ret = PTR_ERR(spg); + goto free_spg_id; } INIT_LIST_HEAD(&master->node_list); @@ -285,8 +286,20 @@ static struct sp_group_master *sp_init_group_master_locked( master->local = spg; mm->sp_group_master = master; + ret = local_group_add_task(mm, spg); + if (ret < 0) + goto free_spg; + *exist = false; return master; + +free_spg: + free_sp_group_locked(spg); +free_spg_id: + free_new_spg_id(true, spg_id); +free_master: + kfree(master); + return ERR_PTR(ret); } static inline bool is_local_group(int spg_id) @@ -665,6 +678,8 @@ static struct sp_overall_stat sp_overall_stat; enum spa_type { SPA_TYPE_ALLOC = 1, + /* NOTE: reorganize after the statisical structure is reconstructed. */ + SPA_TYPE_ALLOC_PRIVATE = SPA_TYPE_ALLOC, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -1032,7 +1047,7 @@ EXPORT_SYMBOL_GPL(sp_group_id_by_pid); */ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) { - int ret = 0; + int ret = 0, real_count; struct sp_group_node *node; struct sp_group_master *master = NULL; struct task_struct *tsk; @@ -1057,18 +1072,28 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) goto out_up_read; } - if (!master->count) { + /* + * There is a local group for each process which is used for + * passthrough allocation. The local group is a internal + * implementation for convenience and is not attempt to bother + * the user. + */ + real_count = master->count - 1; + if (real_count <= 0) { ret = -ENODEV; goto out_up_read; } - if ((unsigned int)*num < master->count) { + if ((unsigned int)*num < real_count) { ret = -E2BIG; goto out_up_read; } - *num = master->count; + *num = real_count; - list_for_each_entry(node, &master->node_list, group_node) + list_for_each_entry(node, &master->node_list, group_node) { + if (is_local_group(node->spg->id)) + continue; *(spg_ids++) = node->spg->id; + } out_up_read: up_read(&sp_group_sem); @@ -1256,7 +1281,7 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return -EEXIST; } - if (master->count + 1 == MAX_GROUP_FOR_TASK) { + if (master->count == MAX_GROUP_FOR_TASK) { pr_err("task reaches max group num\n"); return -ENOSPC; } @@ -1300,6 +1325,29 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) return 0; } +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + struct spg_proc_stat *stat; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + /* use current just to avoid compile error, rebuild in following patch */ + stat = sp_init_process_stat(current, mm, spg); + if (IS_ERR(stat)) { + free_sp_group_locked(spg); + pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); + return PTR_ERR(stat); + } + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /* the caller must down_write(&spg->rw_lock) */ static void delete_spg_node(struct sp_group *spg, struct sp_group_node *node) { @@ -2171,15 +2219,10 @@ static void sp_fallocate(struct sp_area *spa) static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (!is_local_group(spa->spg->id)) { - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - sp_fallocate(spa); - up_read(&spa->spg->rw_lock); - } else { - sp_munmap(current->mm, spa->va_start, spa_size(spa)); - sp_fallocate(spa); - } + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); + sp_fallocate(spa); + up_read(&spa->spg->rw_lock); } static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm) @@ -2187,9 +2230,10 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0; down_read(&spg->rw_lock); - if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) + if (!is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); + return ret; } @@ -2374,6 +2418,7 @@ struct sp_alloc_context { struct timespec64 start; struct timespec64 end; bool have_mbind; + enum spa_type type; }; static void trace_sp_alloc_begin(struct sp_alloc_context *ac) @@ -2461,10 +2506,13 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } + ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); + down_read(&spg->rw_lock); + ac->type = SPA_TYPE_ALLOC_PRIVATE; } if (sp_flags & SP_HUGEPAGE) { @@ -2487,8 +2535,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (!is_local_group(spa->spg->id)) - __sp_free(spa->spg, spa->va_start, spa->real_size, mm); + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); } static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, @@ -2543,10 +2590,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret; unmap: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); return ret; } @@ -2646,11 +2690,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, - spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); @@ -2673,34 +2713,30 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node; - if (is_local_group(spa->spg->id)) { - ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); - } else { - /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - mm = spg_node->master->mm; - mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); - if (mmap_ret) { - if (ac->state != ALLOC_COREDUMP) - return mmap_ret; - ac->state = ALLOC_NORMAL; - continue; - } - ret = mmap_ret; + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (mmap_ret) { + if (ac->state != ALLOC_COREDUMP) + return mmap_ret; + ac->state = ALLOC_NORMAL; + continue; } + ret = mmap_ret; } + return ret; } /* spa maybe an error pointer, so introduce variable spg */ static void sp_alloc_finish(int result, struct sp_area *spa, - struct sp_alloc_context *ac) + struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg; /* match sp_alloc_prepare */ - if (!is_local_group(spg->id)) - up_read(&spg->rw_lock); + up_read(&spg->rw_lock); if (!result) sp_update_process_stat(current, true, spa); @@ -2740,7 +2776,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) try_again: spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg, - SPA_TYPE_ALLOC, current->tgid); + ac.type, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in allocation(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -4661,8 +4697,6 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem); - if (master->local) - sp_group_drop(master->local); kfree(master); } -- Gitee From 3415529d28b3609f2b7f7799e00341de9ae74d89 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:53 +0800 Subject: [PATCH 13/74] mm/sharepool: Clear the initialization of sp-associated structure for a process Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- A few structures must have been created when a process want to get into sharepool subsystem, including allocating sharepool memory, being added into a spg or doing k2u and so on. Currently we create those structures just before we actually need them. For example, we find or create a sp_spa_stat after a successful memory allocation and before updating the statistical structure. The creation of a new structure may fail due to oom and we should then reclaim the memory allocated and revert all the process before. Or we just forget to do that and a potential memory-leak occurs. This design makes it confused when we indeed create a structure and we always worry about potential memory-leak when we changes the code around it. A better solution is to initialize all that structures at the same time when a process join in sharepool subsystem. And in future, we will clear the unnecessary statistical structures. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 4 - mm/share_pool.c | 279 ++++++++++++++++--------------------- 2 files changed, 117 insertions(+), 166 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 7b536b30907d..64ea7f688de9 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -506,10 +506,6 @@ static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) return NULL; } -static inline void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/mm/share_pool.c b/mm/share_pool.c index 970fe76b4972..8f5ecea44f08 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -244,33 +244,22 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; } -static void free_sp_group_locked(struct sp_group *spg); -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); -/* The caller must hold sp_group_sem */ -static struct sp_group_master *sp_init_group_master_locked( - struct mm_struct *mm, bool *exist) +static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); +static int init_local_group(struct mm_struct *mm) { int spg_id, ret; struct sp_group *spg; + struct sp_mapping *spm; struct sp_group_master *master = mm->sp_group_master; - if (master) { - *exist = true; - return master; - } - - master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); - if (master == NULL) - return ERR_PTR(-ENOMEM); - spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { pr_err_ratelimited("generate local group id failed %d\n", spg_id); - ret = spg_id; - goto free_master; + return spg_id; } spg = create_spg(spg_id); @@ -279,60 +268,73 @@ static struct sp_group_master *sp_init_group_master_locked( goto free_spg_id; } - INIT_LIST_HEAD(&master->node_list); - master->count = 0; - master->stat = NULL; - master->mm = mm; master->local = spg; - mm->sp_group_master = master; + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) { + ret = PTR_ERR(spm); + goto free_spg; + } + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal); ret = local_group_add_task(mm, spg); if (ret < 0) + /* The spm would be released while destroying the spg*/ goto free_spg; - *exist = false; - return master; + return 0; free_spg: free_sp_group_locked(spg); + master->local = NULL; free_spg_id: free_new_spg_id(true, spg_id); -free_master: - kfree(master); - return ERR_PTR(ret); -} -static inline bool is_local_group(int spg_id) -{ - return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; + return ret; } -/* - * If the process is added to a group first, the address space of the local - * group of the process must have been set. If the process is not added to - * a group, directly create or attach the process to the corresponding DVPP - * and normal address space. - */ -static int sp_mapping_group_setup_local(struct mm_struct *mm) +static void sp_proc_stat_drop(struct sp_proc_stat *stat); +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); +/* The caller must hold sp_group_sem */ +static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) { + int ret; struct sp_group_master *master; - struct sp_mapping *spm; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master); - if (master->local->dvpp) + if (mm->sp_group_master) return 0; - spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(master->local, spm); - sp_mapping_attach(master->local, sp_mapping_normal); + master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + INIT_LIST_HEAD(&master->node_list); + master->count = 0; + master->mm = mm; + mm->sp_group_master = master; + + ret = sp_init_proc_stat(mm, tsk); + if (ret) + goto free_master; + + ret = init_local_group(mm); + if (ret) + goto put_stat; return 0; + +put_stat: + sp_proc_stat_drop(master->stat); +free_master: + mm->sp_group_master = NULL; + kfree(master); + + return ret; +} + +static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; } static struct sp_group *sp_get_local_group(struct mm_struct *mm) @@ -350,7 +352,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem); down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(mm); + ret = sp_init_group_master_locked(current, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -398,37 +400,29 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, return stat; } -static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, - struct mm_struct *mm, struct task_struct *tsk) +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) { struct sp_proc_stat *stat; int alloc_id, tgid = tsk->tgid; - - down_write(&sp_proc_stat_sem); - stat = master->stat; - if (stat) { - up_write(&sp_proc_stat_sem); - return stat; - } + struct sp_group_master *master = mm->sp_group_master; stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) { - up_write(&sp_proc_stat_sem); - return stat; - } + if (IS_ERR(stat)) + return PTR_ERR(stat); + down_write(&sp_proc_stat_sem); alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (alloc_id < 0) { up_write(&sp_proc_stat_sem); pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); kfree(stat); - return ERR_PTR(alloc_id); + return alloc_id; } master->stat = stat; up_write(&sp_proc_stat_sem); - return stat; + return 0; } static void update_spg_stat_alloc(unsigned long size, bool inc, @@ -542,18 +536,14 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) return stat; } -static struct spg_proc_stat *sp_init_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg) +static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, + struct sp_group *spg) { struct spg_proc_stat *stat; int spg_id = spg->id; /* visit spg id locklessly */ struct sp_spg_stat *spg_stat = spg->stat; - stat = find_spg_proc_stat(proc_stat, tgid, spg_id); - if (stat) - return stat; - - stat = create_spg_proc_stat(tgid, spg_id); + stat = create_spg_proc_stat(proc_stat->tgid, spg_id); if (IS_ERR(stat)) return stat; @@ -570,31 +560,6 @@ static struct spg_proc_stat *sp_init_spg_proc_stat( return stat; } -/* - * The caller must - * 1. ensure no concurrency problem for task_struct and mm_struct. - * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) - */ -static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, - struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_master *master; - bool exist; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return (struct spg_proc_stat *)master; - - proc_stat = sp_init_proc_stat(master, mm, tsk); - if (IS_ERR(proc_stat)) - return (struct spg_proc_stat *)proc_stat; - - spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg); - return spg_proc_stat; -} - static struct sp_spg_stat *create_spg_stat(int spg_id) { struct sp_spg_stat *stat; @@ -841,9 +806,9 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type; down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, tsk->mm, spa->spg); + stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); up_write(&sp_group_sem); - if (unlikely(IS_ERR(stat))) + if (!stat) return; update_spg_proc_stat(size, inc, stat, type); @@ -1264,26 +1229,27 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str } /* the caller must hold sp_group_sem */ -static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) +static int mm_add_group_init(struct task_struct *tsk, struct mm_struct *mm, + struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master); - - if (!exist) - return 0; + int ret; + struct sp_group_master *master; - if (is_process_in_group(spg, mm)) { - pr_err_ratelimited("task already in target group, id=%d\n", spg->id); - return -EEXIST; - } + if (!mm->sp_group_master) { + ret = sp_init_group_master_locked(tsk, mm); + if (ret) + return ret; + } else { + if (is_process_in_group(spg, mm)) { + pr_err_ratelimited("task already in target group, id=%d\n", spg->id); + return -EEXIST; + } - if (master->count == MAX_GROUP_FOR_TASK) { - pr_err("task reaches max group num\n"); - return -ENOSPC; + master = mm->sp_group_master; + if (master->count == MAX_GROUP_FOR_TASK) { + pr_err("task reaches max group num\n"); + return -ENOSPC; + } } return 0; @@ -1322,29 +1288,13 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs); - return 0; -} - -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_node *node; - struct spg_proc_stat *stat; - - node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); - if (IS_ERR(node)) - return PTR_ERR(node); - - /* use current just to avoid compile error, rebuild in following patch */ - stat = sp_init_process_stat(current, mm, spg); - if (IS_ERR(stat)) { - free_sp_group_locked(spg); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - return PTR_ERR(stat); - } - - insert_spg_node(spg, node); - mmget(mm); + /* + * The only way where sp_init_spg_proc_stat got failed is that there is no + * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat + * into sp_group_node later. + */ + sp_init_spg_proc_stat(node->master->stat, spg); return 0; } @@ -1367,6 +1317,20 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, kfree(spg_node); } +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /** * sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. @@ -1391,7 +1355,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; - struct spg_proc_stat *stat; check_interrupt_context(); @@ -1494,29 +1457,27 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } } - ret = mm_add_group_init(mm, spg); - if (ret) + + down_write(&spg->rw_lock); + ret = mm_add_group_init(tsk, mm, spg); + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + } ret = sp_mapping_group_setup(mm, spg); - if (ret) + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + } node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { + up_write(&spg->rw_lock); ret = PTR_ERR(node); goto out_drop_spg_node; } - /* per process statistics initialization */ - stat = sp_init_process_stat(tsk, mm, spg); - if (IS_ERR(stat)) { - ret = PTR_ERR(stat); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - goto out_drop_spg_node; - } - - down_write(&spg->rw_lock); ret = insert_spg_node(spg, node); if (unlikely(ret)) { up_write(&spg->rw_lock); @@ -1768,7 +1729,7 @@ int sp_id_of_current(void) down_read(&sp_group_sem); master = current->mm->sp_group_master; - if (master && master->local) { + if (master) { spg_id = master->local->id; up_read(&sp_group_sem); return spg_id; @@ -1776,7 +1737,7 @@ int sp_id_of_current(void) up_read(&sp_group_sem); down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); return ret; @@ -2935,7 +2896,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_group *spg; down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); @@ -2943,13 +2904,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un } spg = current->mm->sp_group_master->local; - stat = sp_init_process_stat(current, current->mm, spg); - if (IS_ERR(stat)) { - up_write(&sp_group_sem); - pr_err_ratelimited("k2u_task init process stat failed %lx\n", - PTR_ERR(stat)); - return stat; - } + stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); up_write(&sp_group_sem); spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); @@ -3969,7 +3924,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat) } /* the caller make sure stat is not NULL */ -void sp_proc_stat_drop(struct sp_proc_stat *stat) +static void sp_proc_stat_drop(struct sp_proc_stat *stat) { if (atomic_dec_and_test(&stat->use_count)) free_sp_proc_stat(stat); -- Gitee From 5c89966399873a9f6242b540f0b2c835a931ea59 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:54 +0800 Subject: [PATCH 14/74] mm/sharepool: Update sp_mapping structure Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- 1. Add a list for sp_mapping to record all the sp_groups attached to it. 2. Initialize the sp_mapping for local_group when it is created. So when we add a task to a group, we should merge the dvpp mapping of the local group. 3. Every two groups can be merged if and only if at least one of them is empty. Then the empty mapping would be dropped and another mapping would be attached to the two groups. This need to traverse all the groups attached to the mapping. 4. A mapping is considered empty when no spa is allocated from it and its address space is default. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 9 ++++-- mm/share_pool.c | 65 +++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 64ea7f688de9..2ae1b75e5aba 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -116,6 +116,9 @@ struct sp_mapping { struct rb_node *free_area_cache; unsigned long cached_hole_size; unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; }; /* Processes in the same sp_group can share memory. @@ -159,8 +162,10 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; }; /* a per-process(per mm) struct which manages a sp_group_node list */ diff --git a/mm/share_pool.c b/mm/share_pool.c index 8f5ecea44f08..778f2658b191 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -163,6 +163,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; + INIT_LIST_HEAD(&spm->group_head); return spm; } @@ -175,18 +176,45 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) + if (spm->flag & SP_MAPPING_DVPP) { spg->dvpp = spm; - else if (spm->flag & SP_MAPPING_NORMAL) + list_add_tail(&spg->mnode, &spm->group_head); + } else if (spm->flag & SP_MAPPING_NORMAL) spg->normal = spm; } static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { - if (spm && atomic_dec_and_test(&spm->user)) + if (!spm) + return; + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spg->mnode); + if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); } +/* merge old mapping to new, and the old mapping would be destroyed */ +static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) +{ + struct sp_group *spg, *tmp; + + if (new == old) + return; + + list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { + list_move_tail(&spg->mnode, &new->group_head); + spg->dvpp = new; + } + + atomic_add(atomic_read(&old->user), &new->user); + sp_mapping_destroy(old); +} + +static bool is_mapping_empty(struct sp_mapping *spm) +{ + return RB_EMPTY_ROOT(&spm->area_root); +} + /* * When you set the address space of a group, the normal address space * is globally unified. When processing the DVPP address space, consider @@ -211,32 +239,18 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local; - struct sp_mapping *spm; if (!list_empty(&spg->procs)) { - /* 1 */ - if (local->dvpp && local->dvpp != spg->dvpp) { - pr_info_ratelimited("Duplicate address space, id=%d\n", - spg->id); - return 0; - } - - /* 2 */ - if (!local->dvpp) { - sp_mapping_attach(local, spg->dvpp); - sp_mapping_attach(local, spg->normal); + if (is_mapping_empty(local->dvpp)) + sp_mapping_merge(spg->dvpp, local->dvpp); + else if (is_mapping_empty(spg->dvpp)) + sp_mapping_merge(local->dvpp, spg->dvpp); + else { + pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); + return -EINVAL; } } else { - /* 4 */ - if (!local->dvpp) { - spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(local, spm); - sp_mapping_attach(local, sp_mapping_normal); - } - - /* 3 */ + /* the mapping of local group is always set */ sp_mapping_attach(spg, local->dvpp); sp_mapping_attach(spg, sp_mapping_normal); } @@ -1132,6 +1146,7 @@ static struct sp_group *create_spg(int spg_id) atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); + INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock); sprintf(name, "sp_group_%d", spg_id); -- Gitee From 756affa46bb0e8bea9d5eae6a4af94bfc2ca1424 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:55 +0800 Subject: [PATCH 15/74] mm/sharepool: Introduce SPG_NON_DVPP flag for sp_group_add_task Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- When SPG_NOD_DVPP is specified to sp_group_add_task, we don't create a DVPP mapping for the newly created sp_group. And the new group cannot support allocating DVPP memory. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 7 +++-- mm/share_pool.c | 60 ++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 2ae1b75e5aba..25b84d995619 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -42,6 +42,9 @@ #define SPG_ID_LOCAL_MIN 200001 #define SPG_ID_LOCAL_MAX 299999 +#define SPG_FLAG_NON_DVPP (1 << 0) +#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP) + #define MAX_DEVID 8 /* the max num of Da-vinci devices */ extern int sysctl_share_pool_hugepage_enable; @@ -145,6 +148,7 @@ struct sp_mapping { */ struct sp_group { int id; + unsigned long flag; struct file *file; struct file *file_hugetlb; /* number of process in this group */ @@ -285,9 +289,6 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr); -extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void); diff --git a/mm/share_pool.c b/mm/share_pool.c index 778f2658b191..3a1d99b8a515 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -216,31 +216,20 @@ static bool is_mapping_empty(struct sp_mapping *spm) } /* - * When you set the address space of a group, the normal address space - * is globally unified. When processing the DVPP address space, consider - * the following situations: - * 1. If a process is added to a non-new group, the DVPP address space - * must have been created. If the local group of the process also - * contains the DVPP address space and they are different, this - * scenario is not allowed to avoid address conflict. - * 2. If the DVPP address space does not exist in the local group of the - * process, attach the local group of the process to the DVPP address - * space of the group. - * 3. Add a new group. If the process has applied for the dvpp address - * space (sp_alloc or k2u), attach the new group to the dvpp address - * space of the current process. - * 4. If the process has not applied for the DVPP address space, attach - * the new group and the local group of the current process to the - * newly created DVPP address space. - * + * 1. The mappings of local group is set on creating. + * 2. This is used to setup the mapping for groups created during add_task. + * 3. The normal mapping exists for all groups. + * 4. The dvpp mappings for the new group and local group can merge _iff_ at + * least one of the mapping is empty. * the caller must hold sp_group_sem + * NOTE: undo the mergeing when the later process failed. */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local; - if (!list_empty(&spg->procs)) { + if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -250,15 +239,17 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return -EINVAL; } } else { - /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - sp_mapping_attach(spg, sp_mapping_normal); + if (!(spg->flag & SPG_FLAG_NON_DVPP)) + /* the mapping of local group is always set */ + sp_mapping_attach(spg, local->dvpp); + if (!spg->normal) + sp_mapping_attach(spg, sp_mapping_normal); } return 0; } -static struct sp_group *create_spg(int spg_id); +static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); @@ -276,7 +267,7 @@ static int init_local_group(struct mm_struct *mm) return spg_id; } - spg = create_spg(spg_id); + spg = create_spg(spg_id, 0); if (IS_ERR(spg)) { ret = PTR_ERR(spg); goto free_spg_id; @@ -1114,7 +1105,7 @@ static loff_t addr_offset(struct sp_area *spa) return (loff_t)(addr - sp_dev_va_start[spa->device_id]); } -static struct sp_group *create_spg(int spg_id) +static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; struct sp_group *spg; @@ -1128,6 +1119,11 @@ static struct sp_group *create_spg(int spg_id) return ERR_PTR(-ENOSPC); } + if (flag & ~SPG_FLAG_MASK) { + pr_err_ratelimited("invalid flag:%#lx\n", flag); + return ERR_PTR(-EINVAL); + } + spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -1140,6 +1136,7 @@ static struct sp_group *create_spg(int spg_id) } spg->id = spg_id; + spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; spg->owner = current->group_leader; @@ -1187,14 +1184,14 @@ static struct sp_group *create_spg(int spg_id) } /* the caller must hold sp_group_sem */ -static struct sp_group *find_or_alloc_sp_group(int spg_id) +static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) { struct sp_group *spg; spg = __sp_find_spg_locked(current->pid, spg_id); if (!spg) { - spg = create_spg(spg_id); + spg = create_spg(spg_id, flag); } else { down_read(&spg->rw_lock); if (!spg_valid(spg)) { @@ -1347,10 +1344,11 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) } /** - * sp_group_add_task() - Add a process to an share group (sp_group). + * mg_sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. * @prot: the prot of task for this spg. * @spg_id: the ID of the sp_group. + * @flag: to give some special message. * * A process can't be added to more than one sp_group in single group mode * and can in multiple group mode. @@ -1363,6 +1361,7 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) */ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) { + unsigned long flag = 0; struct task_struct *tsk; struct mm_struct *mm; struct sp_group *spg; @@ -1456,7 +1455,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_task; } - spg = find_or_alloc_sp_group(spg_id); + spg = find_or_alloc_sp_group(spg_id, flag); if (IS_ERR(spg)) { up_write(&sp_group_sem); ret = PTR_ERR(spg); @@ -1830,6 +1829,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, else mapping = spg->normal; + if (!mapping) { + pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); + return ERR_PTR(-EINVAL); + } + vstart = mapping->start[device_id]; vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); -- Gitee From 495a20ada2c6a421f053bda275d7adec62409279 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:56 +0800 Subject: [PATCH 16/74] mm/sharepool: Configure the DVPP range for process Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- Currently the dvpp range is global for each device. And it is unreasonable after the reconstruction that makes the DVPP mappings private to each process or group. This allows to configure the dvpp range for each process. The dvpp range for each dvpp mapping can only be configured once just as the old version. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 66 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 3a1d99b8a515..66d02929741c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -215,6 +215,17 @@ static bool is_mapping_empty(struct sp_mapping *spm) return RB_EMPTY_ROOT(&spm->area_root); } +static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) +{ + int i; + + for (i = 0; i < sp_device_number; i++) + if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) + return false; + + return true; +} + /* * 1. The mappings of local group is set on creating. * 2. This is used to setup the mapping for groups created during add_task. @@ -230,6 +241,11 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local; if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { + if (!can_mappings_merge(local->dvpp, spg->dvpp)) { + pr_info_ratelimited("address space conflict, id=%d\n", spg->id); + return -EINVAL; + } + if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -3836,16 +3852,50 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); */ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { - if (pid < 0 || - size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || - !is_online_node_id(device_id) || - is_sp_dev_addr_enabled(device_id)) + int ret; + bool err = false; + struct task_struct *tsk; + struct mm_struct *mm; + struct sp_group *spg; + struct sp_mapping *spm; + unsigned long default_start; + + /* NOTE: check the start address */ + if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || + device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) return false; - sp_dev_va_start[device_id] = start; - sp_dev_va_size[device_id] = size; - return true; + ret = get_task(pid, &tsk); + if (ret) + return false; + + mm = get_task_mm(tsk->group_leader); + if (!mm) + goto put_task; + + spg = sp_get_local_group(mm); + if (IS_ERR(spg)) + goto put_mm; + + spm = spg->dvpp; + default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + /* The dvpp range of each group can be configured only once */ + if (spm->start[device_id] != default_start) + goto put_spg; + + spm->start[device_id] = start; + spm->end[device_id] = start + size; + + err = true; + +put_spg: + sp_group_drop(spg); +put_mm: + mmput(mm); +put_task: + put_task_struct(tsk); + + return err; } EXPORT_SYMBOL_GPL(sp_config_dvpp_range); -- Gitee From 0dd0ac138144e44e89605bc32d82ff1ab8cf2a25 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:57 +0800 Subject: [PATCH 17/74] mm/sharepool: Don't check the DVPP address space range before merging Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The user doesn't care about the start address of the dvpp range, what is mattered is that the virtual space tagged DVPP located at in a 16G range. So we can safely drop the dvpp address space as long as it's empty during merging process. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 66d02929741c..e1d16143cdd9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -241,16 +241,25 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local; if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { - if (!can_mappings_merge(local->dvpp, spg->dvpp)) { - pr_info_ratelimited("address space conflict, id=%d\n", spg->id); - return -EINVAL; - } + /* + * Don't return an error when the mappings' address range conflict. + * As long as the mapping is unused, we can drop the empty mapping. + * This may change the address range for the task or group implicitly, + * give a warn for it. + */ + bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp); - if (is_mapping_empty(local->dvpp)) + if (is_mapping_empty(local->dvpp)) { sp_mapping_merge(spg->dvpp, local->dvpp); - else if (is_mapping_empty(spg->dvpp)) + if (is_conflict) + pr_warn_ratelimited("task address space conflict, spg_id=%d\n", + spg->id); + } else if (is_mapping_empty(spg->dvpp)) { sp_mapping_merge(local->dvpp, spg->dvpp); - else { + if (is_conflict) + pr_warn_ratelimited("group address space conflict, spg_id=%d\n", + spg->id); + } else { pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); return -EINVAL; } -- Gitee From 4216e84875c09b812a54f9642d6d43dd705ea9ad Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:58 +0800 Subject: [PATCH 18/74] mm/sharepool: Add a task_struct parameter for sp_get_local_group() Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- sp_get_local_group() could be invoked in kthread, where the current process isn't the process we want. Add a parameter and let the caller to avoid this problem. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index e1d16143cdd9..c1a00a5ac948 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -367,7 +367,7 @@ static inline bool is_local_group(int spg_id) return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; } -static struct sp_group *sp_get_local_group(struct mm_struct *mm) +static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_struct *mm) { int ret; struct sp_group_master *master; @@ -382,7 +382,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem); down_write(&sp_group_sem); - ret = sp_init_group_master_locked(current, mm); + ret = sp_init_group_master_locked(tsk, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -2513,7 +2513,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, } ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ - spg = sp_get_local_group(current->mm); + spg = sp_get_local_group(current, current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); down_read(&spg->rw_lock); @@ -3882,7 +3882,7 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (!mm) goto put_task; - spg = sp_get_local_group(mm); + spg = sp_get_local_group(tsk, mm); if (IS_ERR(spg)) goto put_mm; -- Gitee From 7712a35bf933cdbe24922db5d2f2be2fec08f264 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:46:59 +0800 Subject: [PATCH 19/74] mm/sharepool: Check sp_is_enabled() in all exported interfaces Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S -------------------------------------------------- We should forbid the usage of sharepool interfaces if sharepool is not enabled. Or undefined behaviour would panic the kernel. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index c1a00a5ac948..db6b8237c7f9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1012,6 +1012,9 @@ int sp_group_id_by_pid(int pid) struct sp_group *spg; int spg_id = -ENODEV; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); spg = __sp_find_spg(pid, SPG_ID_DEFAULT); @@ -1047,6 +1050,9 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) struct sp_group_master *master = NULL; struct task_struct *tsk; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (!spg_ids || num <= 0) @@ -1395,6 +1401,9 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); /* only allow READ, READ | WRITE */ @@ -1672,6 +1681,9 @@ int mg_sp_group_del_task(int pid, int spg_id) struct mm_struct *mm = NULL; bool is_alive = true; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1763,6 +1775,9 @@ int sp_id_of_current(void) int ret, spg_id; struct sp_group_master *master; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (current->flags & PF_KTHREAD || !current->mm) return -EINVAL; @@ -2338,6 +2353,9 @@ int sp_free(unsigned long addr, int id) .spg_id = id, }; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (current->flags & PF_KTHREAD) @@ -2775,6 +2793,9 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) int ret = 0; struct sp_alloc_context ac; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); @@ -3155,6 +3176,9 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, int ret; struct sp_k2u_context kc; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context(); ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc); @@ -3442,6 +3466,9 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) struct sp_walk_data sp_walk_data; struct vm_struct *area; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context(); if (mm == NULL) { @@ -3730,6 +3757,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (current->flags & PF_KTHREAD) @@ -3775,6 +3805,9 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, struct mm_struct *mm; int ret = 0; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (unlikely(!sp_walk_data)) { @@ -3820,6 +3853,9 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); */ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { + if (!sp_is_enabled()) + return; + check_interrupt_context(); if (!sp_walk_data) @@ -3869,6 +3905,9 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) struct sp_mapping *spm; unsigned long default_start; + if (!sp_is_enabled()) + return false; + /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) @@ -3929,7 +3968,8 @@ static bool is_sp_normal_addr(unsigned long addr) */ bool is_sharepool_addr(unsigned long addr) { - return is_sp_normal_addr(addr) || is_device_addr(addr); + return sp_is_enabled() && + (is_sp_normal_addr(addr) || is_device_addr(addr)); } EXPORT_SYMBOL_GPL(is_sharepool_addr); @@ -4126,6 +4166,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, unsigned long anon, file, shmem, total_rss, prot; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + if (!sp_is_enabled()) + return 0; + if (!mm) return 0; -- Gitee From 58cf89114870eb6b35c489e7ded88392b8613ed7 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:00 +0800 Subject: [PATCH 20/74] mm/sharepool: Accept device_id in k2u flags Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- We use device_id to select the correct dvpp vspace range when SP_DVPP flag is specified. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index db6b8237c7f9..9c5b8e7fd6b7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3096,10 +3096,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, trace_sp_k2u_begin(kc); - if (sp_flags & ~SP_DVPP) { + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; } + sp_flags &= ~SP_HUGEPAGE; if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n"); -- Gitee From 7daa891209a1be008a5c2b86bca1d75a87305f1b Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 18 Oct 2022 19:47:01 +0800 Subject: [PATCH 21/74] mm/sharepool: Avoid NULL pointer dereference in mg_sp_group_add_task Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------------ create_spg_node() may fail with NULL pointer returened, and in the out_drop_spg_node path, the NULL pointer will be dereferenced in free_spg_node(). Signed-off-by: Yuan Can --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 9c5b8e7fd6b7..e14753da004d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1523,7 +1523,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) if (unlikely(IS_ERR(node))) { up_write(&spg->rw_lock); ret = PTR_ERR(node); - goto out_drop_spg_node; + goto out_drop_group; } ret = insert_spg_node(spg, node); -- Gitee From b2e1e9a8191c830b230ab48ecf8188b9f9354080 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:02 +0800 Subject: [PATCH 22/74] mm/sharepool: Modify sharepool sp_mmap() page_offset Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ----------------------------------- In sp_mmap(), if use offset = va - MMAP_BASE/DVPP_BASE, then normal sp_alloc pgoff may have same value with DVPP pgoff, causing DVPP and sp_alloc mapped to overlapped part of file unexpectedly. To fix the problem, pass VA value as mmap offset, for in this scenario, VA value in one task address space will not be same. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index e14753da004d..2c29edae135c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -58,6 +58,11 @@ #define spg_valid(spg) ((spg)->is_alive == true) +/* Use spa va address as mmap offset. This can work because spa_file + * is setup with 64-bit address space. So va shall be well covered. + */ +#define addr_offset(spa) ((spa)->va_start) + #define byte2kb(size) ((size) >> 10) #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10)) @@ -1120,22 +1125,6 @@ static bool is_device_addr(unsigned long addr) return false; } -static loff_t addr_offset(struct sp_area *spa) -{ - unsigned long addr; - - if (unlikely(!spa)) { - WARN(1, "invalid spa when calculate addr offset\n"); - return 0; - } - addr = spa->va_start; - - if (!is_device_addr(addr)) - return (loff_t)(addr - MMAP_SHARE_POOL_START); - - return (loff_t)(addr - sp_dev_va_start[spa->device_id]); -} - static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; -- Gitee From 3423aedb5e305e251b8be8418f38bd1986eb4a56 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:03 +0800 Subject: [PATCH 23/74] mm/sharepool: Fix kabi borken in sp_group_master Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S ------------------------------------------------- The sp_group_master structure is used only in sharepool subsys and no other drivers use it. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 25b84d995619..1911cd35843b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -10,6 +10,7 @@ #include #include #include +#include #define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) @@ -187,7 +188,7 @@ struct sp_group_master { * Used to apply for the shared pool memory of the current process. * For example, sp_alloc non-share memory or k2task. */ - struct sp_group *local; + KABI_EXTEND(struct sp_group *local) }; /* -- Gitee From 37b1619ca59c4996c5d8a3045b6ac530890cb0d7 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:04 +0800 Subject: [PATCH 24/74] mm: sharepool: use built-in-statistics Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5J0YW -------------------------------- Sharepool owns an statistics system which allow user to check the memory use easily. The statistics codes are quite independent from the major functions. However, the realization is very similar with the major functions, which doubles the lock use and cause nesting problems. Thus we remove the statistics system, and put all the statistics into raw data structures as built-in statistics. The user api did not change. This can greatly reduce the complexity of locks, as well as remove hundred lines of redundant codes. Signed-off-by: Guo Mengqi --- include/linux/share_pool.h | 7 +- mm/share_pool.c | 843 ++++++++++++++++--------------------- 2 files changed, 375 insertions(+), 475 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 1911cd35843b..c2ef26661a4f 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -65,6 +65,7 @@ extern int sysctl_sp_perf_alloc; extern int sysctl_sp_perf_k2u; +#ifdef __GENKSYMS__ /* we estimate an sp-group ususally contains at most 64 sp-group */ #define SP_SPG_HASH_BITS 6 @@ -206,6 +207,7 @@ struct sp_group_node { struct sp_group *spg; unsigned long prot; }; +#endif struct sp_walk_data { struct page **pages; @@ -508,11 +510,6 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; } -static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) -{ - return NULL; -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/mm/share_pool.c b/mm/share_pool.c index 2c29edae135c..aef3cec4eca8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -117,19 +117,175 @@ static DEFINE_IDA(sp_group_id_ida); /*** Statistical and maintenance tools ***/ -/* idr of all sp_proc_stats */ -static DEFINE_IDR(sp_proc_stat_idr); -/* rw semaphore for sp_proc_stat_idr */ -static DECLARE_RWSEM(sp_proc_stat_sem); - -/* idr of all sp_spg_stats */ -static DEFINE_IDR(sp_spg_stat_idr); -/* rw semaphore for sp_spg_stat_idr */ -static DECLARE_RWSEM(sp_spg_stat_sem); +/* list of all sp_group_masters */ +static LIST_HEAD(master_list); +/* mutex to protect insert/delete ops from master_list */ +static DEFINE_MUTEX(master_list_lock); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat; +#ifndef __GENKSYMS__ +struct sp_spg_stat { + int spg_id; + /* record the number of hugepage allocation failures */ + atomic_t hugepage_failures; + /* number of sp_area */ + atomic_t spa_num; + /* total size of all sp_area from sp_alloc and k2u */ + atomic64_t size; + /* total size of all sp_area from sp_alloc 0-order page */ + atomic64_t alloc_nsize; + /* total size of all sp_area from sp_alloc hugepage */ + atomic64_t alloc_hsize; + /* total size of all sp_area from ap_alloc */ + atomic64_t alloc_size; + /* total size of all sp_area from sp_k2u */ + atomic64_t k2u_size; +}; + +/* per process memory usage statistics indexed by tgid */ +struct sp_proc_stat { + int tgid; + struct mm_struct *mm; + char comm[TASK_COMM_LEN]; + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t alloc_nsize; + atomic64_t alloc_hsize; + atomic64_t k2u_size; +}; + +/* per process/sp-group memory usage statistics */ +struct spg_proc_stat { + int tgid; + int spg_id; /* 0 for non-group data, such as k2u_task */ + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t alloc_nsize; + atomic64_t alloc_hsize; + atomic64_t k2u_size; +}; + +/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; +}; + +/* Processes in the same sp_group can share memory. + * Memory layout for share pool: + * + * |-------------------- 8T -------------------|---|------ 8T ------------| + * | Device 0 | Device 1 |...| | + * |----------------------------------------------------------------------| + * |------------- 16G -------------| 16G | | | + * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory | + * | sp | sp | | | | | + * |----------------------------------------------------------------------| + * + * The host SVM feature reserves 8T virtual memory by mmap, and due to the + * restriction of DVPP, while SVM and share pool will both allocate memory + * for DVPP, the memory have to be in the same 32G range. + * + * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP. + * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to + * tell us which 16G memory range is reserved for share pool . + * + * In some scenarios where there is no host SVM feature, share pool uses + * the default 8G memory setting for DVPP. + */ +struct sp_group { + int id; + unsigned long flag; + struct file *file; + struct file *file_hugetlb; + /* number of process in this group */ + int proc_num; + /* list head of processes (sp_group_node, each represents a process) */ + struct list_head procs; + /* list head of sp_area. it is protected by spin_lock sp_area_lock */ + struct list_head spa_list; + /* group statistics */ + struct sp_spg_stat instat; + /* we define the creator process of a sp_group as owner */ + struct task_struct *owner; + /* is_alive == false means it's being destroyed */ + bool is_alive; + atomic_t use_count; + /* protect the group internal elements, except spa_list */ + struct rw_semaphore rw_lock; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; +}; + +/* a per-process(per mm) struct which manages a sp_group_node list */ +struct sp_group_master { + /* + * number of sp groups the process belongs to, + * a.k.a the number of sp_node in node_list + */ + unsigned int count; + /* list head of sp_node */ + struct list_head node_list; + struct mm_struct *mm; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + struct sp_group *local; + struct sp_proc_stat instat; + struct list_head list_node; +}; + +/* + * each instance represents an sp group the process belongs to + * sp_group_master : sp_group_node = 1 : N + * sp_group_node->spg : sp_group = 1 : 1 + * sp_group_node : sp_group->procs = N : 1 + */ +struct sp_group_node { + /* list node in sp_group->procs */ + struct list_head proc_node; + /* list node in sp_group_maseter->node_list */ + struct list_head group_node; + struct sp_group_master *master; + struct sp_group *spg; + unsigned long prot; + struct spg_proc_stat instat; +}; +#endif + +/* The caller should hold mmap_sem to protect master (TBD) */ +static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat *stat) +{ + atomic64_set(&stat->alloc_nsize, 0); + atomic64_set(&stat->alloc_hsize, 0); + atomic64_set(&stat->k2u_size, 0); + stat->mm = mm; + get_task_comm(stat->comm, current); +} + #define SP_MAPPING_DVPP 0x1 #define SP_MAPPING_NORMAL 0x2 static struct sp_mapping *sp_mapping_normal; @@ -328,8 +484,6 @@ static int init_local_group(struct mm_struct *mm) return ret; } -static void sp_proc_stat_drop(struct sp_proc_stat *stat); -static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); /* The caller must hold sp_group_sem */ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) { @@ -346,20 +500,19 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; + sp_init_group_master_stat(mm, &master->instat); mm->sp_group_master = master; - ret = sp_init_proc_stat(mm, tsk); - if (ret) - goto free_master; + mutex_lock(&master_list_lock); + list_add_tail(&master->list_node, &master_list); + mutex_unlock(&master_list_lock); ret = init_local_group(mm); if (ret) - goto put_stat; + goto free_master; return 0; -put_stat: - sp_proc_stat_drop(master->stat); free_master: mm->sp_group_master = NULL; kfree(master); @@ -399,67 +552,6 @@ static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_st return master->local; } -static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) -{ - struct sp_proc_stat *stat; - - if (!mm->sp_group_master) - return NULL; - - down_read(&sp_proc_stat_sem); - stat = mm->sp_group_master->stat; - up_read(&sp_proc_stat_sem); - - /* maybe NULL or not, we always return it */ - return stat; -} - -static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, - struct task_struct *tsk) -{ - struct sp_proc_stat *stat; - - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - atomic_set(&stat->use_count, 1); - atomic64_set(&stat->alloc_size, 0); - atomic64_set(&stat->k2u_size, 0); - stat->tgid = tsk->tgid; - stat->mm = mm; - mutex_init(&stat->lock); - hash_init(stat->hash); - get_task_comm(stat->comm, tsk); - - return stat; -} - -static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) -{ - struct sp_proc_stat *stat; - int alloc_id, tgid = tsk->tgid; - struct sp_group_master *master = mm->sp_group_master; - - stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) - return PTR_ERR(stat); - - down_write(&sp_proc_stat_sem); - alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); - if (alloc_id < 0) { - up_write(&sp_proc_stat_sem); - pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); - kfree(stat); - return alloc_id; - } - - master->stat = stat; - up_write(&sp_proc_stat_sem); - - return 0; -} - static void update_spg_stat_alloc(unsigned long size, bool inc, bool huge, struct sp_spg_stat *stat) { @@ -496,158 +588,64 @@ static void update_spg_stat_k2u(unsigned long size, bool inc, } } -/* per process/sp-group memory usage statistics */ -struct spg_proc_stat { - int tgid; - int spg_id; /* 0 for non-group data, such as k2u_task */ - struct hlist_node pnode; /* hlist node in sp_proc_stat->hash */ - struct hlist_node gnode; /* hlist node in sp_spg_stat->hash */ - struct sp_proc_stat *proc_stat; - struct sp_spg_stat *spg_stat; - /* - * alloc amount minus free amount, may be negative when freed by - * another task in the same sp group. - */ - atomic64_t alloc_size; - atomic64_t k2u_size; -}; - -static void update_spg_proc_stat_alloc(unsigned long size, bool inc, - struct spg_proc_stat *stat) +static void update_mem_usage_alloc(unsigned long size, bool inc, + bool is_hugepage, struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = stat->proc_stat; + struct sp_proc_stat *proc_stat = &spg_node->master->instat; if (inc) { - atomic64_add(size, &stat->alloc_size); - atomic64_add(size, &proc_stat->alloc_size); - } else { - atomic64_sub(size, &stat->alloc_size); - atomic64_sub(size, &proc_stat->alloc_size); + if (is_hugepage) { + atomic64_add(size, &spg_node->instat.alloc_hsize); + atomic64_add(size, &proc_stat->alloc_hsize); + return; + } + atomic64_add(size, &spg_node->instat.alloc_nsize); + atomic64_add(size, &proc_stat->alloc_nsize); + return; } + + if (is_hugepage) { + atomic64_sub(size, &spg_node->instat.alloc_hsize); + atomic64_sub(size, &proc_stat->alloc_hsize); + return; + } + atomic64_sub(size, &spg_node->instat.alloc_nsize); + atomic64_sub(size, &proc_stat->alloc_nsize); + return; } -static void update_spg_proc_stat_k2u(unsigned long size, bool inc, - struct spg_proc_stat *stat) +static void update_mem_usage_k2u(unsigned long size, bool inc, + struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = stat->proc_stat; + struct sp_proc_stat *proc_stat = &spg_node->master->instat; if (inc) { - atomic64_add(size, &stat->k2u_size); + atomic64_add(size, &spg_node->instat.k2u_size); atomic64_add(size, &proc_stat->k2u_size); } else { - atomic64_sub(size, &stat->k2u_size); + atomic64_sub(size, &spg_node->instat.k2u_size); atomic64_sub(size, &proc_stat->k2u_size); } } -static struct spg_proc_stat *find_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, int spg_id) -{ - struct spg_proc_stat *stat = NULL; - - mutex_lock(&proc_stat->lock); - hash_for_each_possible(proc_stat->hash, stat, pnode, spg_id) { - if (stat->spg_id == spg_id) - break; - } - mutex_unlock(&proc_stat->lock); - - return stat; -} - -static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) +static void sp_init_spg_proc_stat(struct spg_proc_stat *stat, int spg_id) { - struct spg_proc_stat *stat; - - stat = kmalloc(sizeof(struct spg_proc_stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - stat->tgid = tgid; + stat->tgid = current->tgid; stat->spg_id = spg_id; - atomic64_set(&stat->alloc_size, 0); + atomic64_set(&stat->alloc_nsize, 0); + atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->k2u_size, 0); - - return stat; -} - -static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, - struct sp_group *spg) -{ - struct spg_proc_stat *stat; - int spg_id = spg->id; /* visit spg id locklessly */ - struct sp_spg_stat *spg_stat = spg->stat; - - stat = create_spg_proc_stat(proc_stat->tgid, spg_id); - if (IS_ERR(stat)) - return stat; - - stat->proc_stat = proc_stat; - stat->spg_stat = spg_stat; - - mutex_lock(&proc_stat->lock); - hash_add(proc_stat->hash, &stat->pnode, stat->spg_id); - mutex_unlock(&proc_stat->lock); - - mutex_lock(&spg_stat->lock); - hash_add(spg_stat->hash, &stat->gnode, stat->tgid); - mutex_unlock(&spg_stat->lock); - return stat; } -static struct sp_spg_stat *create_spg_stat(int spg_id) +static void sp_init_group_stat(struct sp_spg_stat *stat) { - struct sp_spg_stat *stat; - - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - stat->spg_id = spg_id; atomic_set(&stat->hugepage_failures, 0); atomic_set(&stat->spa_num, 0); atomic64_set(&stat->size, 0); atomic64_set(&stat->alloc_nsize, 0); atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->alloc_size, 0); - mutex_init(&stat->lock); - hash_init(stat->hash); - - return stat; -} - -static int sp_init_spg_stat(struct sp_group *spg) -{ - struct sp_spg_stat *stat; - int ret, spg_id = spg->id; - - stat = create_spg_stat(spg_id); - if (IS_ERR(stat)) - return PTR_ERR(stat); - - down_write(&sp_spg_stat_sem); - ret = idr_alloc(&sp_spg_stat_idr, stat, spg_id, spg_id + 1, - GFP_KERNEL); - up_write(&sp_spg_stat_sem); - if (ret < 0) { - pr_err_ratelimited("group %d idr alloc failed, ret %d\n", - spg_id, ret); - kfree(stat); - } - - spg->stat = stat; - return ret; -} - -static void free_spg_stat(int spg_id) -{ - struct sp_spg_stat *stat; - - down_write(&sp_spg_stat_sem); - stat = idr_remove(&sp_spg_stat_idr, spg_id); - up_write(&sp_spg_stat_sem); - WARN_ON(!stat); - kfree(stat); + atomic64_set(&stat->k2u_size, 0); } /* statistics of all sp area, protected by sp_area_lock */ @@ -735,17 +733,17 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num += 1; spa_stat.alloc_size += size; - update_spg_stat_alloc(size, true, is_huge, spa->spg->stat); + update_spg_stat_alloc(size, true, is_huge, &spa->spg->instat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spa->spg->stat); + update_spg_stat_k2u(size, true, &spa->spg->instat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; spa_stat.k2u_spg_size += size; - update_spg_stat_k2u(size, true, spa->spg->stat); + update_spg_stat_k2u(size, true, &spa->spg->instat); break; default: WARN(1, "invalid spa type"); @@ -781,17 +779,17 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num -= 1; spa_stat.alloc_size -= size; - update_spg_stat_alloc(size, false, is_huge, spa->spg->stat); + update_spg_stat_alloc(size, false, is_huge, &spa->spg->instat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spa->spg->stat); + update_spg_stat_k2u(size, false, &spa->spg->instat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; spa_stat.k2u_spg_size -= size; - update_spg_stat_k2u(size, false, spa->spg->stat); + update_spg_stat_k2u(size, false, &spa->spg->instat); break; default: WARN(1, "invalid spa type"); @@ -811,42 +809,52 @@ static void spa_dec_usage(struct sp_area *spa) } } -static void update_spg_proc_stat(unsigned long size, bool inc, - struct spg_proc_stat *stat, enum spa_type type) +static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, + struct sp_group_node *spg_node, enum spa_type type) { - if (unlikely(!stat)) { + if (unlikely(!spg_node)) { sp_dump_stack(); - WARN(1, "null process stat\n"); + WARN(1, "null sp group node\n"); return; } switch (type) { case SPA_TYPE_ALLOC: - update_spg_proc_stat_alloc(size, inc, stat); + update_mem_usage_alloc(size, inc, is_hugepage, spg_node); break; case SPA_TYPE_K2TASK: case SPA_TYPE_K2SPG: - update_spg_proc_stat_k2u(size, inc, stat); + update_mem_usage_k2u(size, inc, spg_node); break; default: WARN(1, "invalid stat type\n"); } } +struct sp_group_node *find_spg_node_by_spg(struct mm_struct *mm, + struct sp_group *spg) +{ + struct sp_group_node *spg_node; + + list_for_each_entry(spg_node, &mm->sp_group_master->node_list, group_node) { + if (spg_node->spg == spg) + return spg_node; + } + return NULL; +} + static void sp_update_process_stat(struct task_struct *tsk, bool inc, struct sp_area *spa) { - struct spg_proc_stat *stat; + struct sp_group_node *spg_node; unsigned long size = spa->real_size; enum spa_type type = spa->type; - down_write(&sp_group_sem); - stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); - up_write(&sp_group_sem); - if (!stat) - return; - - update_spg_proc_stat(size, inc, stat, type); + spg_node = find_spg_node_by_spg(tsk->mm, spa->spg); + if (!spg_node) + pr_err("share pool: spg node not found!\n"); + else + update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); } static inline void check_interrupt_context(void) @@ -905,7 +913,6 @@ static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); - free_spg_stat(spg->id); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); sp_mapping_detach(spg, spg->dvpp); @@ -1165,6 +1172,7 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) INIT_LIST_HEAD(&spg->spa_list); INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock); + sp_init_group_stat(&spg->instat); sprintf(name, "sp_group_%d", spg_id); spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, @@ -1184,16 +1192,10 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) goto out_fput; } - ret = sp_init_spg_stat(spg); - if (ret < 0) - goto out_fput_all; - if (!is_local_group(spg_id)) system_group_count++; return spg; -out_fput_all: - fput(spg->file_hugetlb); out_fput: fput(spg->file); out_idr: @@ -1303,6 +1305,7 @@ static struct sp_group_node *create_spg_node(struct mm_struct *mm, spg_node->spg = spg; spg_node->master = master; spg_node->prot = prot; + sp_init_spg_proc_stat(&spg_node->instat, spg->id); list_add_tail(&spg_node->group_node, &master->node_list); master->count++; @@ -1321,12 +1324,6 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs); - /* - * The only way where sp_init_spg_proc_stat got failed is that there is no - * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat - * into sp_group_node later. - */ - sp_init_spg_proc_stat(node->master->stat, spg); return 0; } @@ -1501,6 +1498,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&spg->rw_lock); goto out_drop_group; } + mm->sp_group_master->instat.tgid = tsk->tgid; ret = sp_mapping_group_setup(mm, spg); if (ret) { @@ -1628,27 +1626,6 @@ int sp_group_add_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_add_task); -static void free_spg_proc_stat(struct mm_struct *mm, int spg_id) -{ - int i; - struct sp_proc_stat *proc_stat = sp_get_proc_stat(mm); - struct spg_proc_stat *stat; - struct sp_spg_stat *spg_stat; - struct hlist_node *tmp; - - hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { - if (stat->spg_id == spg_id) { - spg_stat = stat->spg_stat; - mutex_lock(&spg_stat->lock); - hash_del(&stat->gnode); - mutex_unlock(&spg_stat->lock); - hash_del(&stat->pnode); - kfree(stat); - break; - } - } -} - /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1740,7 +1717,6 @@ int mg_sp_group_del_task(int pid, int spg_id) return -EINVAL; } - free_spg_proc_stat(mm, spg_id); up_write(&sp_group_sem); out_put_mm: @@ -2608,14 +2584,12 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) { - struct sp_spg_stat *stat = ac->spg->stat; - if (ac->file == ac->spg->file) { ac->state = ALLOC_NOMEM; return; } - atomic_inc(&stat->hugepage_failures); + atomic_inc(&ac->spg->instat.hugepage_failures); if (!(ac->sp_flags & SP_HUGEPAGE_ONLY)) { ac->file = ac->spg->file; ac->size_aligned = ALIGN(ac->size, PAGE_SIZE); @@ -2944,7 +2918,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un int ret; void *uva; struct sp_area *spa; - struct spg_proc_stat *stat; + struct sp_group_node *spg_node; unsigned long prot = PROT_READ | PROT_WRITE; struct sp_k2u_context kc; struct sp_group *spg; @@ -2958,7 +2932,6 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un } spg = current->mm->sp_group_master->local; - stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); up_write(&sp_group_sem); spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); @@ -2975,7 +2948,11 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { - update_spg_proc_stat(size, true, stat, SPA_TYPE_K2TASK); + spg_node = find_spg_node_by_spg(current->mm, spa->spg); + if (!spg_node) + pr_err("spg_node is null\n"); + else + update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; } @@ -4001,43 +3978,6 @@ __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); /*** Statistical and maintenance functions ***/ -static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) -{ - int i; - struct spg_proc_stat *stat; - struct hlist_node *tmp; - struct sp_spg_stat *spg_stat; - - /* traverse proc_stat->hash locklessly as process is exiting */ - hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { - spg_stat = stat->spg_stat; - mutex_lock(&spg_stat->lock); - hash_del(&stat->gnode); - mutex_unlock(&spg_stat->lock); - - hash_del(&stat->pnode); - kfree(stat); - } -} - -static void free_sp_proc_stat(struct sp_proc_stat *stat) -{ - free_process_spg_proc_stat(stat); - - down_write(&sp_proc_stat_sem); - stat->mm->sp_group_master->stat = NULL; - idr_remove(&sp_proc_stat_idr, stat->tgid); - up_write(&sp_proc_stat_sem); - kfree(stat); -} - -/* the caller make sure stat is not NULL */ -static void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ - if (atomic_dec_and_test(&stat->use_count)) - free_sp_proc_stat(stat); -} - static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, unsigned long *file, unsigned long *shmem, unsigned long *total_rss) { @@ -4047,54 +3987,29 @@ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, *total_rss = *anon + *file + *shmem; } -static long get_proc_alloc(struct sp_proc_stat *stat) -{ - return byte2kb(atomic64_read(&stat->alloc_size)); -} - static long get_proc_k2u(struct sp_proc_stat *stat) { return byte2kb(atomic64_read(&stat->k2u_size)); } -static long get_spg_alloc(struct sp_spg_stat *stat) -{ - return byte2kb(atomic64_read(&stat->alloc_size)); -} - -static long get_spg_alloc_nsize(struct sp_spg_stat *stat) +static long get_proc_alloc(struct sp_proc_stat *stat) { - return byte2kb(atomic64_read(&stat->alloc_nsize)); + return byte2kb(atomic64_read(&stat->alloc_nsize) + + atomic64_read(&stat->alloc_hsize)); } -static long get_spg_proc_alloc(struct spg_proc_stat *stat) +static void get_process_sp_res(struct sp_proc_stat *stat, + long *sp_res_out, long *sp_res_nsize_out) { - return byte2kb(atomic64_read(&stat->alloc_size)); + *sp_res_out = byte2kb(atomic64_read(&stat->alloc_nsize) + + atomic64_read(&stat->alloc_hsize)); + *sp_res_nsize_out = byte2kb(atomic64_read(&stat->alloc_nsize)); } -static long get_spg_proc_k2u(struct spg_proc_stat *stat) +static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&stat->k2u_size)); -} - -static void get_process_sp_res(struct sp_proc_stat *stat, - long *sp_res_out, long *sp_res_nsize_out) -{ - int i; - struct spg_proc_stat *spg_proc_stat; - struct sp_spg_stat *spg_stat; - long sp_res = 0, sp_res_nsize = 0; - - mutex_lock(&stat->lock); - hash_for_each(stat->hash, i, spg_proc_stat, pnode) { - spg_stat = spg_proc_stat->spg_stat; - sp_res += get_spg_alloc(spg_stat); - sp_res_nsize += get_spg_alloc_nsize(spg_stat); - } - mutex_unlock(&stat->lock); - - *sp_res_out = sp_res; - *sp_res_nsize_out = sp_res_nsize; + return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + + atomic64_read(&spg_node->instat.alloc_hsize)); } /* @@ -4115,24 +4030,15 @@ static void get_process_non_sp_res(unsigned long total_rss, unsigned long shmem, *non_sp_shm_out = non_sp_shm; } -static long get_sp_res_by_spg_proc(struct spg_proc_stat *stat) +static long get_spg_proc_alloc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&stat->spg_stat->alloc_size)); + return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + + atomic64_read(&spg_node->instat.alloc_hsize)); } -static unsigned long get_process_prot_locked(int spg_id, struct mm_struct *mm) +static long get_spg_proc_k2u(struct sp_group_node *spg_node) { - unsigned long prot = 0; - struct sp_group_node *spg_node; - struct sp_group_master *master = mm->sp_group_master; - - list_for_each_entry(spg_node, &master->node_list, group_node) { - if (spg_node->spg->id == spg_id) { - prot = spg_node->prot; - break; - } - } - return prot; + return byte2kb(atomic64_read(&spg_node->instat.k2u_size)); } static void print_process_prot(struct seq_file *seq, unsigned long prot) @@ -4151,9 +4057,8 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct mm_struct *mm = task->mm; struct sp_group_master *master; struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - int i; - unsigned long anon, file, shmem, total_rss, prot; + struct sp_group_node *spg_node; + unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; if (!sp_is_enabled()) @@ -4162,12 +4067,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!mm) return 0; + down_read(&mm->mmap_lock); master = mm->sp_group_master; if (!master) return 0; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - proc_stat = master->stat; + proc_stat = &master->instat; get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); @@ -4185,24 +4091,18 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_puts(m, "\n\nProcess in Each SP Group\n\n"); seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", - "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT"); + "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT"); - /* to prevent ABBA deadlock, first hold sp_group_sem */ - down_read(&sp_group_sem); - mutex_lock(&proc_stat->lock); - hash_for_each(proc_stat->hash, i, spg_proc_stat, pnode) { - prot = get_process_prot_locked(spg_proc_stat->spg_id, mm); + list_for_each_entry(spg_node, &master->node_list, proc_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", - spg_proc_stat->spg_id, - get_spg_proc_alloc(spg_proc_stat), - get_spg_proc_k2u(spg_proc_stat), - get_sp_res_by_spg_proc(spg_proc_stat)); - print_process_prot(m, prot); + spg_node->spg->id, + get_spg_proc_alloc(spg_node), + get_spg_proc_k2u(spg_node), + get_sp_res_by_spg_proc(spg_node)); + print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } - mutex_unlock(&proc_stat->lock); - up_read(&sp_group_sem); - + up_read(&mm->mmap_lock); return 0; } @@ -4332,31 +4232,42 @@ void spa_overview_show(struct seq_file *seq) } } -/* the caller must hold sp_group_sem */ -static int idr_spg_stat_cb(int id, void *p, void *data) +static int spg_info_show(int id, void *p, void *data) { - struct sp_spg_stat *s = p; + struct sp_group *spg = p; struct seq_file *seq = data; - if (is_local_group(id) && atomic64_read(&s->size) == 0) + if (id >= SPG_ID_LOCAL_MIN && id <= SPG_ID_LOCAL_MAX) return 0; if (seq != NULL) { - seq_printf(seq, "Group %6d ", id); + if (id == 0) + seq_puts(seq, "Non Group "); + else + seq_printf(seq, "Group %6d ", id); + + down_read(&spg->rw_lock); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&s->size)), - atomic_read(&s->spa_num), - byte2kb(atomic64_read(&s->alloc_size)), - byte2kb(atomic64_read(&s->alloc_nsize)), - byte2kb(atomic64_read(&s->alloc_hsize))); + byte2kb(atomic64_read(&spg->instat.size)), + atomic_read(&spg->instat.spa_num), + byte2kb(atomic64_read(&spg->instat.alloc_size)), + byte2kb(atomic64_read(&spg->instat.alloc_nsize)), + byte2kb(atomic64_read(&spg->instat.alloc_hsize))); + up_read(&spg->rw_lock); } else { - pr_info("Group %6d ", id); + if (id == 0) + pr_info("Non Group "); + else + pr_info("Group %6d ", id); + + down_read(&spg->rw_lock); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&s->size)), - atomic_read(&s->spa_num), - byte2kb(atomic64_read(&s->alloc_size)), - byte2kb(atomic64_read(&s->alloc_nsize)), - byte2kb(atomic64_read(&s->alloc_hsize))); + byte2kb(atomic64_read(&spg->instat.size)), + atomic_read(&spg->instat.spa_num), + byte2kb(atomic64_read(&spg->instat.alloc_size)), + byte2kb(atomic64_read(&spg->instat.alloc_nsize)), + byte2kb(atomic64_read(&spg->instat.alloc_hsize))); + up_read(&spg->rw_lock); } return 0; @@ -4369,17 +4280,17 @@ void spg_overview_show(struct seq_file *seq) if (seq != NULL) { seq_printf(seq, "Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); + byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), + atomic_read(&sp_overall_stat.spa_total_num)); } else { pr_info("Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); + byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), + atomic_read(&sp_overall_stat.spa_total_num)); } - down_read(&sp_spg_stat_sem); - idr_for_each(&sp_spg_stat_idr, idr_spg_stat_cb, seq); - up_read(&sp_spg_stat_sem); + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, spg_info_show, seq); + up_read(&sp_group_sem); if (seq != NULL) seq_puts(seq, "\n"); @@ -4393,118 +4304,109 @@ static int spa_stat_show(struct seq_file *seq, void *offset) spa_overview_show(seq); /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", - "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); + "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); spa_normal_stat_show(seq); spa_dvpp_stat_show(seq); return 0; } -static int idr_proc_stat_cb(int id, void *p, void *data) +static int proc_usage_by_group(int id, void *p, void *data) { - struct sp_spg_stat *spg_stat = p; + struct sp_group *spg = p; struct seq_file *seq = data; - int i, tgid; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - + struct sp_group_node *spg_node; struct mm_struct *mm; - unsigned long anon, file, shmem, total_rss, prot; - /* - * non_sp_res: resident memory size excluding share pool memory - * sp_res: resident memory size of share pool, including normal - * page and hugepage memory - * non_sp_shm: resident shared memory size excluding share pool - * memory - */ + struct sp_group_master *master; + int tgid; + unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; - /* to prevent ABBA deadlock, first hold sp_group_sem */ - mutex_lock(&spg_stat->lock); - hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) { - proc_stat = spg_proc_stat->proc_stat; - tgid = proc_stat->tgid; - mm = proc_stat->mm; + down_read(&spg->rw_lock); + list_for_each_entry(spg_node, &spg->procs, proc_node) { + + master = spg_node->master; + if (!master) { + pr_info("master is NULL! process %d, group %d\n", + spg_node->instat.tgid, id); + continue; + } + mm = master->mm; + tgid = master->instat.tgid; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm); - prot = get_process_prot_locked(id, mm); + &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d ", tgid); - seq_printf(seq, "%-8d ", id); + if (id == 0) + seq_printf(seq, "%-8c ", '-'); + else + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", - get_spg_proc_alloc(spg_proc_stat), - get_spg_proc_k2u(spg_proc_stat), - get_sp_res_by_spg_proc(spg_proc_stat), - sp_res, non_sp_res, - page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); - print_process_prot(seq, prot); + get_spg_proc_alloc(spg_node), + get_spg_proc_k2u(spg_node), + get_sp_res_by_spg_proc(spg_node), + sp_res, non_sp_res, + page2kb(mm->total_vm), page2kb(total_rss), + page2kb(shmem), non_sp_shm); + print_process_prot(seq, spg_node->prot); seq_putc(seq, '\n'); } - mutex_unlock(&spg_stat->lock); + up_read(&spg->rw_lock); + return 0; } -static int proc_stat_show(struct seq_file *seq, void *offset) +static int proc_group_usage_show(struct seq_file *seq, void *offset) { spg_overview_show(seq); spa_overview_show(seq); + /* print the file header */ seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", + "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", - "guard", "-", - byte2kb(atomic64_read(&kthread_stat.alloc_size)), - byte2kb(atomic64_read(&kthread_stat.k2u_size))); + "guard", "-", + byte2kb(atomic64_read(&kthread_stat.alloc_size)), + byte2kb(atomic64_read(&kthread_stat.k2u_size))); - /* - * This ugly code is just for fixing the ABBA deadlock against - * sp_group_add_task. - */ down_read(&sp_group_sem); - down_read(&sp_spg_stat_sem); - idr_for_each(&sp_spg_stat_idr, idr_proc_stat_cb, seq); - up_read(&sp_spg_stat_sem); + idr_for_each(&sp_group_idr, proc_usage_by_group, seq); up_read(&sp_group_sem); return 0; } -static int idr_proc_overview_cb(int id, void *p, void *data) +static int proc_usage_show(struct seq_file *seq, void *offset) { - struct sp_proc_stat *proc_stat = p; - struct seq_file *seq = data; - struct mm_struct *mm = proc_stat->mm; + struct sp_group_master *master = NULL; unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + struct sp_proc_stat *proc_stat; - get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); - get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm); - - seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", - id, proc_stat->comm, - get_proc_alloc(proc_stat), - get_proc_k2u(proc_stat), - sp_res, non_sp_res, non_sp_shm, - page2kb(mm->total_vm)); - return 0; -} - -static int proc_overview_show(struct seq_file *seq, void *offset) -{ seq_printf(seq, "%-8s %-16s %-9s %-9s %-9s %-10s %-10s %-8s\n", - "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", - "Non-SP_Shm", "VIRT"); + "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", + "Non-SP_Shm", "VIRT"); + + mutex_lock(&master_list_lock); + list_for_each_entry(master, &master_list, list_node) { + proc_stat = &master->instat; + get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); + get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_non_sp_res(total_rss, shmem, sp_res_nsize, + &non_sp_res, &non_sp_shm); + seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", + proc_stat->tgid, proc_stat->comm, + get_proc_alloc(proc_stat), + get_proc_k2u(proc_stat), + sp_res, non_sp_res, non_sp_shm, + page2kb(master->mm->total_vm)); + } + mutex_unlock(&master_list_lock); - down_read(&sp_proc_stat_sem); - idr_for_each(&sp_proc_stat_idr, idr_proc_overview_cb, seq); - up_read(&sp_proc_stat_sem); return 0; } @@ -4513,9 +4415,9 @@ static void __init proc_sharepool_init(void) if (!proc_mkdir("sharepool", NULL)) return; - proc_create_single_data("sharepool/proc_stat", 0400, NULL, proc_stat_show, NULL); proc_create_single_data("sharepool/spa_stat", 0400, NULL, spa_stat_show, NULL); - proc_create_single_data("sharepool/proc_overview", 0400, NULL, proc_overview_show, NULL); + proc_create_single_data("sharepool/proc_stat", 0400, NULL, proc_group_usage_show, NULL); + proc_create_single_data("sharepool/proc_overview", 0400, NULL, proc_usage_show, NULL); } /*** End of tatistical and maintenance functions ***/ @@ -4739,18 +4641,15 @@ void sp_group_post_exit(struct mm_struct *mm) * A process not in an sp group doesn't need to print because there * wont't be any memory which is not freed. */ - stat = sp_get_proc_stat(mm); + stat = &master->instat; if (stat) { - alloc_size = atomic64_read(&stat->alloc_size); + alloc_size = atomic64_read(&stat->alloc_nsize) + atomic64_read(&stat->alloc_hsize); k2u_size = atomic64_read(&stat->k2u_size); if (alloc_size != 0 || k2u_size != 0) pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", stat->comm, stat->tgid, byte2kb(alloc_size), byte2kb(k2u_size)); - - /* match with sp_init_proc_stat, we expect stat is released after this call */ - sp_proc_stat_drop(stat); } down_write(&sp_group_sem); @@ -4763,6 +4662,10 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem); + mutex_lock(&master_list_lock); + list_del(&master->list_node); + mutex_unlock(&master_list_lock); + kfree(master); } -- Gitee From 58903f41976b7978a538a005715203a20392419a Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:05 +0800 Subject: [PATCH 25/74] mm: sharepool: sp_alloc_mmap_populate bugfix Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5J0Z9 -------------------------------- when there is only one mm in a group allocating memory, if process is killed, the error path in sp_alloc_mmap_populate tries to access the next spg_node->master->mm in group's proc list. However, in this case the next spg_node in proc list is head and spg_node->master would be NULL, which leads to log below: [file:test_sp_alloc.c, func:alloc_large_repeat, line:437] start to alloc... [ 264.699086][ T1772] share pool: gonna sp_alloc_unmap... [ 264.699939][ T1772] share pool: list_next_entry(spg_node, proc_node) is ffff0004c4907028 [ 264.700380][ T1772] share pool: master is 0 [ 264.701240][ T1772] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 ... [ 264.704764][ T1772] Internal error: Oops: 96000006 [#1] SMP [ 264.705166][ T1772] Modules linked in: sharepool_dev(OE) [ 264.705823][ T1772] CPU: 3 PID: 1772 Comm: test_sp_alloc Tainted: G OE 5.10.0+ #23 ... [ 264.712513][ T1772] Call trace: [ 264.713057][ T1772] sp_alloc+0x528/0xa88 [ 264.713740][ T1772] dev_ioctl+0x6ec/0x1d00 [sharepool_dev] [ 264.714035][ T1772] __arm64_sys_ioctl+0xb0/0xe8 [ 264.714267][ T1772] el0_svc_common.constprop.0+0x88/0x268 [ 264.714539][ T1772] do_el0_svc+0x34/0xb8 [ 264.714743][ T1772] el0_svc+0x1c/0x28 [ 264.714938][ T1772] el0_sync_handler+0x8c/0xb0 [ 264.715163][ T1772] el0_sync+0x168/0x180 [ 264.715737][ T1772] Code: 9425adba f94012a1 d0006780 911c8000 (f9400c21) [ 264.716891][ T1772] ---[ end trace 1587677032f666c6 ]--- [ 264.717457][ T1772] Kernel panic - not syncing: Oops: Fatal exception [ 264.717961][ T1772] SMP: stopping secondary CPUs [ 264.718787][ T1772] Kernel Offset: disabled [ 264.719244][ T1772] CPU features: 0x0004,00200022,61006082 [ 264.719718][ T1772] Memory Limit: none [ 264.720333][ T1772] ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- Add a list_is_last check to avoid this null pointer access. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index aef3cec4eca8..2ea1c3a4149a 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2634,6 +2634,7 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, if (ret) sp_add_work_compact(); } + return ret; } @@ -2654,14 +2655,8 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, int ret; ret = sp_alloc_mmap(mm, spa, spg_node, ac); - if (ret < 0) { - if (ac->need_fallocate) { - /* e.g. second sp_mmap fail */ - sp_fallocate(spa); - ac->need_fallocate = false; - } + if (ret < 0) return ret; - } if (!ac->have_mbind) { ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id); @@ -2676,18 +2671,13 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); else pr_warn_ratelimited("allocation failed due to mm populate failed(potential no enough memory when -12): %d\n", - ret); - sp_fallocate(spa); /* need this, otherwise memleak */ - sp_alloc_fallback(spa, ac); + ret); } else ac->need_fallocate = true; - return ret; } @@ -2696,7 +2686,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, { int ret = -EINVAL; int mmap_ret = 0; - struct mm_struct *mm; + struct mm_struct *mm, *end_mm = NULL; struct sp_group_node *spg_node; /* create mapping for each process in the group */ @@ -2705,7 +2695,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); if (mmap_ret) { if (ac->state != ALLOC_COREDUMP) - return mmap_ret; + goto unmap; ac->state = ALLOC_NORMAL; continue; } @@ -2713,6 +2703,24 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, } return ret; + +unmap: + /* use the next mm in proc list as end mark */ + if (!list_is_last(&spg_node->proc_node, &spa->spg->procs)) + end_mm = list_next_entry(spg_node, proc_node)->master->mm; + sp_alloc_unmap(end_mm, spa, spg_node); + + /* only fallocate spa if physical memory had been allocated */ + if (ac->need_fallocate) { + sp_fallocate(spa); + ac->need_fallocate = false; + } + + /* if hugepage allocation fails, this will transfer to normal page + * and try again. (only if SP_HUGEPAGE_ONLY is not flagged */ + sp_alloc_fallback(spa, ac); + + return mmap_ret; } /* spa maybe an error pointer, so introduce variable spg */ -- Gitee From f3d62b96252f6fbb4e8bbbfb21fb49250bd902ec Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:06 +0800 Subject: [PATCH 26/74] mm: sharepool: remove deprecated interfaces Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KC7C -------------------------------- Most interfaces starting with "sp_" are deprecated, remove them. Signed-off-by: Guo Mengqi --- include/linux/share_pool.h | 90 +--------------------- mm/share_pool.c | 148 ++++++------------------------------- 2 files changed, 22 insertions(+), 216 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index c2ef26661a4f..e8bc9a368e34 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -246,53 +246,31 @@ static inline void sp_init_mm(struct mm_struct *mm) * Those interfaces are exported for modules */ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int mg_sp_group_del_task(int pid, int spg_id); -extern int sp_group_del_task(int pid, int spg_id); - extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); -extern int sp_group_id_by_pid(int pid); - -extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); -extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); - -extern int sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id); -extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id); extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); - -extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id); -extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); - -extern int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data); extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); -extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb); -extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); -extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr); -extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void); extern void sp_area_drop(struct vm_area_struct *vma); @@ -350,21 +328,11 @@ static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EPERM; } -static inline int sp_group_add_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_group_del_task(int pid, int spg_id) { return -EPERM; } -static inline int sp_group_del_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int sp_group_exit(struct mm_struct *mm) { return 0; @@ -379,74 +347,38 @@ static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) return -EPERM; } -static inline int sp_group_id_by_pid(int pid) -{ - return -EPERM; -} - static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return -EPERM; } -static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id) -{ - return NULL; -} - static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { return NULL; } -static inline int sp_free(unsigned long addr, int id) -{ - return -EPERM; -} - static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } -static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return NULL; -} - static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { return NULL; } -static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return NULL; -} - static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { return NULL; } -static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } -static inline int sp_id_of_current(void) -{ - return -EPERM; -} - static inline int mg_sp_id_of_current(void) { return -EPERM; @@ -460,22 +392,12 @@ static inline void sp_area_drop(struct vm_area_struct *vma) { } -static inline int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return 0; -} - static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; } -static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ -} - static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { } @@ -490,21 +412,11 @@ static inline int sp_unregister_notifier(struct notifier_block *nb) return -EPERM; } -static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return false; -} - static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { return false; } -static inline bool is_sharepool_addr(unsigned long addr) -{ - return false; -} - static inline bool mg_is_sharepool_addr(unsigned long addr) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index 2ea1c3a4149a..65c78ef24bdc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1011,38 +1011,6 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; } -/** - * sp_group_id_by_pid() - Get the sp_group ID of a process. - * @pid: pid of target process. - * - * Return: - * 0 the sp_group ID. - * -ENODEV target process doesn't belong to any sp_group. - */ -int sp_group_id_by_pid(int pid) -{ - struct sp_group *spg; - int spg_id = -ENODEV; - - if (!sp_is_enabled()) - return -EOPNOTSUPP; - - check_interrupt_context(); - - spg = __sp_find_spg(pid, SPG_ID_DEFAULT); - if (!spg) - return -ENODEV; - - down_read(&spg->rw_lock); - if (spg_valid(spg)) - spg_id = spg->id; - up_read(&spg->rw_lock); - - sp_group_drop(spg); - return spg_id; -} -EXPORT_SYMBOL_GPL(sp_group_id_by_pid); - /** * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. @@ -1620,12 +1588,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_add_task); -int sp_group_add_task(int pid, int spg_id) -{ - return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_add_task); - /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1729,13 +1691,7 @@ int mg_sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_del_task); -int sp_group_del_task(int pid, int spg_id) -{ - return mg_sp_group_del_task(pid, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_del_task); - -int sp_id_of_current(void) +int mg_sp_id_of_current(void) { int ret, spg_id; struct sp_group_master *master; @@ -1767,12 +1723,6 @@ int sp_id_of_current(void) return spg_id; } -EXPORT_SYMBOL_GPL(sp_id_of_current); - -int mg_sp_id_of_current(void) -{ - return sp_id_of_current(); -} EXPORT_SYMBOL_GPL(mg_sp_id_of_current); /* the caller must hold sp_area_lock */ @@ -2301,7 +2251,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) } /** - * sp_free() - Free the memory allocated by sp_alloc(). + * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2310,7 +2260,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr, int id) +int mg_sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { @@ -2344,12 +2294,6 @@ int sp_free(unsigned long addr, int id) sp_try_to_compact(); return ret; } -EXPORT_SYMBOL_GPL(sp_free); - -int mg_sp_free(unsigned long addr, int id) -{ - return sp_free(addr, id); -} EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ @@ -2747,7 +2691,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, } /** - * sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. * @sp_flags: how to allocate the memory. * @spg_id: the share group that the memory is allocated to. @@ -2758,7 +2702,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, * * if succeed, return the starting address of the shared memory. * * if fail, return the pointer of -errno. */ -void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_area *spa = NULL; int ret = 0; @@ -2792,12 +2736,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } -EXPORT_SYMBOL_GPL(sp_alloc); - -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) -{ - return sp_alloc(size, sp_flags, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_alloc); /** @@ -3128,7 +3066,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) } /** - * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. @@ -3144,7 +3082,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -void *sp_make_share_k2u(unsigned long kva, unsigned long size, +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; @@ -3182,13 +3120,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, out: return sp_k2u_finish(uva, &kc); } -EXPORT_SYMBOL_GPL(sp_make_share_k2u); - -void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -3424,7 +3355,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) } /** - * sp_make_share_u2k() - Share user memory of a specified process to kernel. + * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory * @pid: the pid of the specified process(Not currently in use) @@ -3433,7 +3364,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * * if success, return the starting kernel address of the shared memory. * * if failed, return the pointer of -errno. */ -void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; struct mm_struct *mm = current->mm; @@ -3492,12 +3423,6 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) kvfree(sp_walk_data.pages); return p; } -EXPORT_SYMBOL_GPL(sp_make_share_u2k); - -void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return sp_make_share_u2k(uva, size, pid); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); /* @@ -3719,7 +3644,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) } /** - * sp_unshare() - Unshare the kernel or user memory which shared by calling + * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory @@ -3728,7 +3653,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) { int ret = 0; @@ -3754,16 +3679,10 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) return ret; } -EXPORT_SYMBOL_GPL(sp_unshare); - -int mg_sp_unshare(unsigned long va, unsigned long size, int id) -{ - return sp_unshare(va, size, 0, id); -} EXPORT_SYMBOL_GPL(mg_sp_unshare); /** - * sp_walk_page_range() - Walk page table with caller specific callbacks. + * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. * @size: the size of user memory. * @tsk: task struct of the target task. @@ -3774,7 +3693,7 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare); * When return 0, sp_walk_data describing [uva, uva+size) can be used. * When return -errno, information in sp_walk_data is useless. */ -int sp_walk_page_range(unsigned long uva, unsigned long size, +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { struct mm_struct *mm; @@ -3813,20 +3732,13 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return ret; } -EXPORT_SYMBOL_GPL(sp_walk_page_range); - -int mg_sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return sp_walk_page_range(uva, size, tsk, sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); /** - * sp_walk_page_free() - Free the sp_walk_data structure. + * mg_sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. */ -void sp_walk_page_free(struct sp_walk_data *sp_walk_data) +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { if (!sp_is_enabled()) return; @@ -3838,12 +3750,6 @@ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) __sp_walk_page_free(sp_walk_data); } -EXPORT_SYMBOL_GPL(sp_walk_page_free); - -void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ - sp_walk_page_free(sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_free); int sp_register_notifier(struct notifier_block *nb) @@ -3859,7 +3765,7 @@ int sp_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(sp_unregister_notifier); /** - * sp_config_dvpp_range() - User can config the share pool start address + * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. * @start: the value of share pool start * @size: the value of share pool @@ -3870,7 +3776,7 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); * Return false if parameter invalid or has been set up. * This functuon has no concurrent problem. */ -bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { int ret; bool err = false; @@ -3920,12 +3826,6 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) return err; } -EXPORT_SYMBOL_GPL(sp_config_dvpp_range); - -bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return sp_config_dvpp_range(start, size, device_id, pid); -} EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); static bool is_sp_normal_addr(unsigned long addr) @@ -3936,22 +3836,16 @@ static bool is_sp_normal_addr(unsigned long addr) } /** - * is_sharepool_addr() - Check if a user memory address belongs to share pool. + * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. * * Return true if addr belongs to share pool, or false vice versa. */ -bool is_sharepool_addr(unsigned long addr) +bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && (is_sp_normal_addr(addr) || is_device_addr(addr)); } -EXPORT_SYMBOL_GPL(is_sharepool_addr); - -bool mg_is_sharepool_addr(unsigned long addr) -{ - return is_sharepool_addr(addr); -} EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); int sp_node_id(struct vm_area_struct *vma) @@ -4432,7 +4326,7 @@ static void __init proc_sharepool_init(void) bool sp_check_addr(unsigned long addr) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current)) { sp_dump_stack(); return true; @@ -4442,7 +4336,7 @@ bool sp_check_addr(unsigned long addr) bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { sp_dump_stack(); return true; -- Gitee From 97e1f3df24eff864c20973977a39778ee0e44e11 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:07 +0800 Subject: [PATCH 27/74] mm: sharepool: proc_sp_group_state bugfix Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5K3MH -------------------------------- After refactoring, cat /proc/pid_xx/sp_group will cause kernel panic. Fix this error. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 65c78ef24bdc..73e941636ba7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3995,7 +3995,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT"); - list_for_each_entry(spg_node, &master->node_list, proc_node) { + list_for_each_entry(spg_node, &master->node_list, group_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", spg_node->spg->id, get_spg_proc_alloc(spg_node), -- Gitee From 2c1376f2ff692027437b0137b798dbf95ba71cf3 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:08 +0800 Subject: [PATCH 28/74] mm: sharepool: fix dvpp spm redundant print error Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KSDH -------------------------------- Fix sharepool redundant /proc/sharepool/spa_stat prints when there are multiple groups which are all attached to same sp_mapping. Traverse all dvpp-mappings rather than all groups. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 73e941636ba7..94c2aa02472c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -122,6 +122,11 @@ static LIST_HEAD(master_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(master_list_lock); +/* list of all spm-dvpp */ +static LIST_HEAD(spm_dvpp_list); +/* mutex to protect insert/delete ops from master_list */ +static DEFINE_MUTEX(spm_list_lock); + /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat; @@ -189,6 +194,7 @@ struct sp_mapping { /* list head for all groups attached to this mapping, dvpp mapping only */ struct list_head group_head; + struct list_head spm_node; }; /* Processes in the same sp_group can share memory. @@ -290,6 +296,22 @@ static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat #define SP_MAPPING_NORMAL 0x2 static struct sp_mapping *sp_mapping_normal; +static void sp_mapping_add_to_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_add_tail(&spm->spm_node, &spm_dvpp_list); + mutex_unlock(&spm_list_lock); +} + +static void sp_mapping_remove_from_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spm->spm_node); + mutex_unlock(&spm_list_lock); +} + static void sp_mapping_range_init(struct sp_mapping *spm) { int i; @@ -325,12 +347,14 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; INIT_LIST_HEAD(&spm->group_head); + sp_mapping_add_to_list(spm); return spm; } static void sp_mapping_destroy(struct sp_mapping *spm) { + sp_mapping_remove_from_list(spm); kfree(spm); } @@ -4068,22 +4092,14 @@ static void spa_normal_stat_show(struct seq_file *seq) spa_stat_of_mapping_show(seq, sp_mapping_normal); } -static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) -{ - struct sp_group *spg = p; - struct seq_file *seq = data; - - if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) - spa_stat_of_mapping_show(seq, spg->dvpp); - - return 0; -} - static void spa_dvpp_stat_show(struct seq_file *seq) { - down_read(&sp_group_sem); - idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); - up_read(&sp_group_sem); + struct sp_mapping *spm; + + mutex_lock(&spm_list_lock); + list_for_each_entry(spm, &spm_dvpp_list, spm_node) + spa_stat_of_mapping_show(seq, spm); + mutex_unlock(&spm_list_lock); } -- Gitee From 4370c5a3b78c5c0cd64b25c4949733b79360b94f Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:09 +0800 Subject: [PATCH 29/74] mm/sharepool: Delete unused sysctl interface Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LHGZ -------------------------------- Delete unused sysctl interfaces in sharepool feature. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 21 ---- kernel/sysctl.c | 67 ------------- mm/share_pool.c | 192 ++----------------------------------- 3 files changed, 6 insertions(+), 274 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e8bc9a368e34..4860e4b00e57 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -48,23 +48,8 @@ #define MAX_DEVID 8 /* the max num of Da-vinci devices */ -extern int sysctl_share_pool_hugepage_enable; - -extern int sysctl_ac_mode; - -extern int sysctl_sp_debug_mode; - extern struct static_key_false share_pool_enabled_key; -extern int sysctl_share_pool_map_lock_enable; - -extern int sysctl_sp_compact_enable; -extern unsigned long sysctl_sp_compact_interval; -extern unsigned long sysctl_sp_compact_interval_max; -extern int sysctl_sp_perf_alloc; - -extern int sysctl_sp_perf_k2u; - #ifdef __GENKSYMS__ /* we estimate an sp-group ususally contains at most 64 sp-group */ #define SP_SPG_HASH_BITS 6 @@ -307,12 +292,6 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; } -static inline void sp_dump_stack(void) -{ - if (sysctl_sp_debug_mode) - dump_stack(); -} - static inline bool is_vmalloc_sharepool(unsigned long vm_flags) { if (sp_is_enabled() && (vm_flags & VM_SHAREPOOL)) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 91812d673c6b..5fab117d7f52 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -71,7 +71,6 @@ #include #include #include -#include #include "../lib/kstrtox.h" @@ -3238,72 +3237,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif -#ifdef CONFIG_ASCEND_SHARE_POOL - { - .procname = "sharepool_debug_mode", - .data = &sysctl_sp_debug_mode, - .maxlen = sizeof(sysctl_sp_debug_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_enable", - .data = &sysctl_sp_compact_enable, - .maxlen = sizeof(sysctl_sp_compact_enable), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_interval", - .data = &sysctl_sp_compact_interval, - .maxlen = sizeof(sysctl_sp_compact_interval), - .mode = 0600, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero_ul, - .extra2 = &sysctl_sp_compact_interval_max, - }, - { - /* 0: map_unlock, 1: map_lock */ - .procname = "share_pool_map_lock_enable", - .data = &sysctl_share_pool_map_lock_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_perf_k2u", - .data = &sysctl_sp_perf_k2u, - .maxlen = sizeof(sysctl_sp_perf_k2u), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_perf_alloc", - .data = &sysctl_sp_perf_alloc, - .maxlen = sizeof(sysctl_sp_perf_alloc), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_ac_mode", - .data = &sysctl_ac_mode, - .maxlen = sizeof(sysctl_ac_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_HUGETLBFS { .procname = "hugepage_mig_noalloc", diff --git a/mm/share_pool.c b/mm/share_pool.c index 94c2aa02472c..80a5e01ca62d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -52,10 +51,6 @@ #include #include -/* access control mode macros */ -#define AC_NONE 0 -#define AC_SINGLE_OWNER 1 - #define spg_valid(spg) ((spg)->is_alive == true) /* Use spa va address as mmap offset. This can work because spa_file @@ -82,19 +77,6 @@ static int __read_mostly enable_mdc_default_group; static const int mdc_default_group_id = 1; -/* share the uva to the whole group */ -static int __read_mostly enable_share_k2u_spg = 1; - -/* access control mode */ -int sysctl_ac_mode = AC_NONE; -/* debug mode */ -int sysctl_sp_debug_mode; - -int sysctl_share_pool_map_lock_enable; - -int sysctl_sp_perf_k2u; -int sysctl_sp_perf_alloc; - static int system_group_count; static unsigned int sp_device_number; @@ -232,8 +214,6 @@ struct sp_group { struct list_head spa_list; /* group statistics */ struct sp_spg_stat instat; - /* we define the creator process of a sp_group as owner */ - struct task_struct *owner; /* is_alive == false means it's being destroyed */ bool is_alive; atomic_t use_count; @@ -837,7 +817,6 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { if (unlikely(!spg_node)) { - sp_dump_stack(); WARN(1, "null sp group node\n"); return; } @@ -912,8 +891,6 @@ struct sp_k2u_context { int state; int spg_id; bool to_task; - struct timespec64 start; - struct timespec64 end; }; static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, @@ -1158,7 +1135,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; - spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); @@ -1443,7 +1419,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&sp_group_sem); ret = -EACCES; free_new_spg_id(id_newly_generated, spg_id); - sp_dump_stack(); goto out_put_task; } @@ -1475,15 +1450,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_mm; } - /* access control permission check */ - if (sysctl_ac_mode == AC_SINGLE_OWNER) { - if (spg->owner != current->group_leader) { - ret = -EPERM; - goto out_drop_group; - } - } - - down_write(&spg->rw_lock); ret = mm_add_group_init(tsk, mm, spg); if (ret) { @@ -2067,63 +2033,6 @@ void sp_area_drop(struct vm_area_struct *vma) spin_unlock(&sp_area_lock); } -int sysctl_sp_compact_enable; -unsigned long sysctl_sp_compact_interval = 30UL; -unsigned long sysctl_sp_compact_interval_max = 1000UL; -static unsigned long compact_last_jiffies; -static unsigned long compact_daemon_status; -#define COMPACT_START 1 -#define COMPACT_STOP 0 - -static void sp_compact_nodes(struct work_struct *work) -{ - sysctl_compaction_handler(NULL, 1, NULL, NULL, NULL); - - kfree(work); - - compact_last_jiffies = jiffies; - cmpxchg(&compact_daemon_status, COMPACT_START, COMPACT_STOP); -} - -static void sp_add_work_compact(void) -{ - struct work_struct *compact_work; - - if (!sysctl_sp_compact_enable) - return; - - /* experimental compaction time: 4GB->1.7s, 8GB->3.4s */ - if (!time_after(jiffies, - compact_last_jiffies + sysctl_sp_compact_interval * HZ)) - return; - - if (cmpxchg(&compact_daemon_status, COMPACT_STOP, COMPACT_START) == - COMPACT_START) - return; - - compact_work = kzalloc(sizeof(*compact_work), GFP_KERNEL); - if (!compact_work) - return; - - INIT_WORK(compact_work, sp_compact_nodes); - schedule_work(compact_work); -} - -static void sp_try_to_compact(void) -{ - unsigned long totalram; - unsigned long freeram; - - totalram = totalram_pages(); - freeram = global_zone_page_state(NR_FREE_PAGES); - - /* free < total / 3 */ - if ((freeram + (freeram << 1)) > totalram) - return; - - sp_add_work_compact(); -} - /* * The function calls of do_munmap() won't change any non-atomic member * of struct sp_group. Please review the following chain: @@ -2314,8 +2223,6 @@ int mg_sp_free(unsigned long addr, int id) __sp_area_drop(fc.spa); /* match __find_sp_area in sp_free_get_spa */ out: - sp_dump_stack(); - sp_try_to_compact(); return ret; } EXPORT_SYMBOL_GPL(mg_sp_free); @@ -2333,12 +2240,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; struct vm_area_struct *vma; - /* Mark the mapped region to be locked. After the MAP_LOCKED is enable, - * multiple tasks will preempt resources, causing performance loss. - */ - if (sysctl_share_pool_map_lock_enable) - flags |= MAP_LOCKED; - atomic_inc(&spa->use_count); addr = __do_mmap_mm(mm, file, addr, size, prot, flags, vm_flags, pgoff, populate, NULL); @@ -2353,7 +2254,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, *pvma = vma; } - return addr; } @@ -2371,39 +2271,10 @@ struct sp_alloc_context { unsigned long populate; int state; bool need_fallocate; - struct timespec64 start; - struct timespec64 end; bool have_mbind; enum spa_type type; }; -static void trace_sp_alloc_begin(struct sp_alloc_context *ac) -{ - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->start); -} - -static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) -{ - unsigned long cost; - - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->end); - - cost = SEC2US(ac->end.tv_sec - ac->start.tv_sec) + - NS2US(ac->end.tv_nsec - ac->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_alloc) { - pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", - current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, - is_local_group(ac->spg->id)); - } -} - static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, int spg_id, struct sp_alloc_context *ac) { @@ -2411,8 +2282,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, check_interrupt_context(); - trace_sp_alloc_begin(ac); - /* mdc scene hack */ if (enable_mdc_default_group) spg_id = mdc_default_group_id; @@ -2597,11 +2466,8 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * depends on this feature (and MAP_LOCKED) to work correctly. */ ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) { + if (spa->is_hugepage) memalloc_noreclaim_restore(noreclaim_flag); - if (ret) - sp_add_work_compact(); - } return ret; } @@ -2704,14 +2570,10 @@ static void sp_alloc_finish(int result, struct sp_area *spa, sp_update_process_stat(current, true, spa); /* this will free spa if mmap failed */ - if (spa && !IS_ERR(spa)) { + if (spa && !IS_ERR(spa)) __sp_area_drop(spa); - trace_sp_alloc_finish(ac, spa->va_start); - } sp_group_drop(spg); - sp_dump_stack(); - sp_try_to_compact(); } /** @@ -2996,33 +2858,6 @@ static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) return false; } -static void trace_sp_k2u_begin(struct sp_k2u_context *kc) -{ - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->start); -} - -static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva) -{ - unsigned long cost; - - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->end); - - cost = SEC2US(kc->end.tv_sec - kc->start.tv_sec) + - NS2US(kc->end.tv_nsec - kc->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_k2u) { - pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, to_task is %d\n", - current->comm, current->tgid, current->pid, - (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), - kc->sp_flags, kc->to_task); - } -} - static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc) { @@ -3030,8 +2865,6 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned; - trace_sp_k2u_begin(kc); - if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; @@ -3084,8 +2917,6 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) else uva = uva + (kc->kva - kc->kva_aligned); - trace_sp_k2u_finish(kc, uva); - sp_dump_stack(); return uva; } @@ -3609,8 +3440,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) WARN(1, "unshare uva invalid spa type"); } - sp_dump_stack(); - out_clr_flag: if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); @@ -3895,13 +3724,6 @@ static int __init mdc_default_group(char *s) } __setup("enable_mdc_default_group", mdc_default_group); -static int __init enable_share_k2u_to_group(char *s) -{ - enable_share_k2u_spg = 1; - return 1; -} -__setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); - /*** Statistical and maintenance functions ***/ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, @@ -4343,20 +4165,18 @@ static void __init proc_sharepool_init(void) bool sp_check_addr(unsigned long addr) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current)) { - sp_dump_stack(); + !check_aoscore_process(current)) return true; - } else + else return false; } bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { - sp_dump_stack(); + !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) return true; - } else + else return false; } -- Gitee From 53bcb54403435a1869c7a1359441168f5d8f83ea Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:10 +0800 Subject: [PATCH 30/74] mm: sharepool: Remove unused sp_dev_va_start and sp_dev_va_size Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY5K ----------------------------------- Remove the unused sp_dev_va_start and sp_dev_va_size, the related code can be removed. Add the dvpp_addr checker in mg_is_sharepool_addr() for current proc. Signed-off-by: Zhang Zekun --- mm/share_pool.c | 62 +++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 80a5e01ca62d..4a485f706007 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -80,13 +80,6 @@ static const int mdc_default_group_id = 1; static int system_group_count; static unsigned int sp_device_number; -static unsigned long sp_dev_va_start[MAX_DEVID]; -static unsigned long sp_dev_va_size[MAX_DEVID]; - -static bool is_sp_dev_addr_enabled(int device_id) -{ - return sp_dev_va_size[device_id]; -} /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); @@ -303,14 +296,9 @@ static void sp_mapping_range_init(struct sp_mapping *spm) continue; } - if (!is_sp_dev_addr_enabled(i)) { - spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; - } else { - spm->start[i] = sp_dev_va_start[i]; - spm->end[i] = spm->start[i] + sp_dev_va_size[i]; - } + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } } @@ -1089,18 +1077,6 @@ static bool is_online_node_id(int node_id) return node_id >= 0 && node_id < MAX_NUMNODES && node_online(node_id); } -static bool is_device_addr(unsigned long addr) -{ - int i; - - for (i = 0; i < sp_device_number; i++) { - if (addr >= sp_dev_va_start[i] && - addr < sp_dev_va_start[i] + sp_dev_va_size[i]) - return true; - } - return false; -} - static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; @@ -3688,6 +3664,36 @@ static bool is_sp_normal_addr(unsigned long addr) sp_device_number * MMAP_SHARE_POOL_16G_SIZE; } +static bool is_sp_dvpp_addr(unsigned long addr) +{ + int i; + struct mm_struct *mm; + struct sp_group_master *master; + struct sp_mapping *spm_dvpp; + + mm = current->mm; + if (!mm) + return false; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (!master) { + up_read(&sp_group_sem); + return false; + } + + /* master->local and master->local->dvpp won't be NULL*/ + spm_dvpp = master->local->dvpp; + for (i = 0; i < MAX_DEVID; i++) { + if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { + up_read(&sp_group_sem); + return true; + } + } + up_read(&sp_group_sem); + return false; +} + /** * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. @@ -3697,7 +3703,7 @@ static bool is_sp_normal_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - (is_sp_normal_addr(addr) || is_device_addr(addr)); + ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); -- Gitee From 5999a388fbb377490ee91d4dc4641032e92453dc Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:11 +0800 Subject: [PATCH 31/74] mm: sharepool: Remove sp_device_number_detect function Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY4H ----------------------------------------- Remove the sp_device_number, and we don't need 'sp_device_number' to detect the sp_device_number. Instead, we use maco 'MAX_DEVID' to take the place of sp_device_number. Signed-off-by: Zhang Zekun --- mm/share_pool.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 4a485f706007..a2ee3fe5eebc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -79,8 +79,6 @@ static const int mdc_default_group_id = 1; static int system_group_count; -static unsigned int sp_device_number; - /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); /* rw semaphore for sp_group_idr and mm->sp_group_master */ @@ -372,7 +370,7 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) { int i; - for (i = 0; i < sp_device_number; i++) + for (i = 0; i < MAX_DEVID; i++) if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) return false; @@ -3620,7 +3618,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) return false; ret = get_task(pid, &tsk); @@ -3661,7 +3659,7 @@ static bool is_sp_normal_addr(unsigned long addr) { return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START + - sp_device_number * MMAP_SHARE_POOL_16G_SIZE; + MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; } static bool is_sp_dvpp_addr(unsigned long addr) @@ -4424,18 +4422,6 @@ static int __init enable_share_pool(char *s) } __setup("enable_ascend_share_pool", enable_share_pool); -static void __init sp_device_number_detect(void) -{ - /* NOTE: TO BE COMPLETED */ - sp_device_number = 4; - - if (sp_device_number > MAX_DEVID) { - pr_warn("sp_device_number %d exceed, truncate it to %d\n", - sp_device_number, MAX_DEVID); - sp_device_number = MAX_DEVID; - } -} - static int __init share_pool_init(void) { if (!sp_is_enabled()) @@ -4446,7 +4432,6 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user); - sp_device_number_detect(); proc_sharepool_init(); return 0; -- Gitee From 8d1904609cc33aa3039be353e455b77cf9614fa7 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:12 +0800 Subject: [PATCH 32/74] mm: sharepool: Remove enable_mdc_default_group and change the definition of is_process_in_group() Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY51 ---------------------------------------------- The variable enable_mdc_default_group has been deprecated, thus remove it and the corresponding code. The definition of is_process_in_group() can be ambiguous, thus change the return value type. Signed-off-by: Zhang Zekun --- mm/share_pool.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index a2ee3fe5eebc..826201284503 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,10 +73,6 @@ #define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */ -/* mdc scene hack */ -static int __read_mostly enable_mdc_default_group; -static const int mdc_default_group_id = 1; - static int system_group_count; /* idr of all sp_groups */ @@ -946,16 +942,16 @@ static int get_task(int pid, struct task_struct **task) * 1. hold spg->rw_lock * 2. ensure no concurrency problem for mm_struct */ -static struct sp_group_node *is_process_in_group(struct sp_group *spg, +static bool is_process_in_group(struct sp_group *spg, struct mm_struct *mm) { struct sp_group_node *spg_node; list_for_each_entry(spg_node, &spg->procs, proc_node) if (spg_node->master->mm == mm) - return spg_node; + return true; - return NULL; + return false; } /* user must call sp_group_drop() after use */ @@ -1341,10 +1337,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EINVAL; } - /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("add group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1616,7 +1608,7 @@ int mg_sp_group_del_task(int pid, int spg_id) goto out_put_task; } - spg_node = is_process_in_group(spg, mm); + spg_node = find_spg_node_by_spg(mm, spg); if (!spg_node) { up_write(&sp_group_sem); pr_err_ratelimited("process not in group"); @@ -2256,10 +2248,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, check_interrupt_context(); - /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (current->flags & PF_KTHREAD) { pr_err_ratelimited("allocation failed, task is kthread\n"); return -EINVAL; @@ -3721,13 +3709,6 @@ int sp_node_id(struct vm_area_struct *vma) return node_id; } -static int __init mdc_default_group(char *s) -{ - enable_mdc_default_group = 1; - return 1; -} -__setup("enable_mdc_default_group", mdc_default_group); - /*** Statistical and maintenance functions ***/ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, -- Gitee From 439153b3ae600885586f2b75a34dea27132598cf Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:13 +0800 Subject: [PATCH 33/74] mm: sharepool: Remove the comment and fix a bug in mg_sp_group_id_by_pid() Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY2R ------------------------------------------- Remove the meaningless comment in mg_sp_free() and the fix the bug in mg_sp_group_id_by_pid() parameter check path. Signed-off-by: Zhang Zekun --- mm/share_pool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 826201284503..cce68c468851 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1018,7 +1018,7 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) check_interrupt_context(); - if (!spg_ids || num <= 0) + if (!spg_ids || !num || *num <= 0) return -EINVAL; ret = get_task(pid, &tsk); @@ -2181,7 +2181,6 @@ int mg_sp_free(unsigned long addr, int id) sp_free_unmap_fallocate(fc.spa); - /* current->mm == NULL: allow kthread */ if (current->mm == NULL) atomic64_sub(fc.spa->real_size, &kthread_stat.alloc_size); else -- Gitee From 7823b069da23cca5442304483a2a7668cf405d86 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:14 +0800 Subject: [PATCH 34/74] mm: sharepool: fix statistics error Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5M3PS -------------------------------- - fix SP_RES value incorrect bug - fix SP_RES_T value incorrect bug - fix pid field uninitialized error in pass-through scenario Signed-off-by: Guo Mengqi --- mm/share_pool.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index cce68c468851..099c359d2640 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -250,12 +250,14 @@ struct sp_group_node { #endif /* The caller should hold mmap_sem to protect master (TBD) */ -static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat *stat) +static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, + struct sp_proc_stat *stat) { atomic64_set(&stat->alloc_nsize, 0); atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->k2u_size, 0); stat->mm = mm; + stat->tgid = tgid; get_task_comm(stat->comm, current); } @@ -486,7 +488,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; - sp_init_group_master_stat(mm, &master->instat); + sp_init_group_master_stat(tsk->tgid, mm, &master->instat); mm->sp_group_master = master; mutex_lock(&master_list_lock); @@ -1422,7 +1424,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&spg->rw_lock); goto out_drop_group; } - mm->sp_group_master->instat.tgid = tsk->tgid; ret = sp_mapping_group_setup(mm, spg); if (ret) { @@ -3730,18 +3731,27 @@ static long get_proc_alloc(struct sp_proc_stat *stat) atomic64_read(&stat->alloc_hsize)); } -static void get_process_sp_res(struct sp_proc_stat *stat, +static void get_process_sp_res(struct sp_group_master *master, long *sp_res_out, long *sp_res_nsize_out) { - *sp_res_out = byte2kb(atomic64_read(&stat->alloc_nsize) + - atomic64_read(&stat->alloc_hsize)); - *sp_res_nsize_out = byte2kb(atomic64_read(&stat->alloc_nsize)); + struct sp_group *spg; + struct sp_group_node *spg_node; + + *sp_res_out = 0; + *sp_res_nsize_out = 0; + + list_for_each_entry(spg_node, &master->node_list, group_node) { + spg = spg_node->spg; + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_hsize)); + *sp_res_nsize_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + } } static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + - atomic64_read(&spg_node->instat.alloc_hsize)); + return byte2kb(atomic64_read(&spg_node->spg->instat.alloc_nsize) + + atomic64_read(&spg_node->spg->instat.alloc_hsize)); } /* @@ -3806,7 +3816,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); @@ -4058,7 +4068,7 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); @@ -4119,7 +4129,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", -- Gitee From 54119095a03aa9e3d4474d8f1189a488fab57f2f Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:15 +0800 Subject: [PATCH 35/74] mm: sharepool: fix static code-check errors Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5MS48 -------------------------------- Fix two bugs revealed by static check: - Release the mm->mmap_lock when mm->sp_group_master had not been initialized. - Do not add mm to master list if there process add group failed. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 099c359d2640..ceed27360fd4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -502,6 +502,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0; free_master: + list_del(&master->list_node); mm->sp_group_master = NULL; kfree(master); @@ -3811,8 +3812,10 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) + if (!master) { + up_read(&mm->mmap_lock); return 0; + } get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; -- Gitee From c631b21db02213637cddc1b8b005e360d6fe7d60 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:16 +0800 Subject: [PATCH 36/74] mm: sharepool: Add a read lock in proc_usage_show() Offering: HULK hulk inclusion category: bugfix bugzilla: 187524 ----------------------------------------------- In function get_process_sp_res(), spg_node can be freed by other process, the access to spg_node->spg can cause kernel panic. Add a pair of read lock to fix this problem. Fix the same problem in proc_sp_group_state(). Fixes: 3d37f8717287 ("[Huawei] mm: sharepool: use built-in-statistics") Signed-off-by: Zhang Zekun --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index ceed27360fd4..bd6b3fbf381e 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3810,10 +3810,12 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!mm) return 0; + down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; if (!master) { up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; } @@ -3848,6 +3850,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; } @@ -4128,6 +4131,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", "Non-SP_Shm", "VIRT"); + down_read(&sp_group_sem); mutex_lock(&master_list_lock); list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; @@ -4143,6 +4147,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) page2kb(master->mm->total_vm)); } mutex_unlock(&master_list_lock); + up_read(&sp_group_sem); return 0; } -- Gitee From 669bba8567c6be372667882adfc5a93f6d694f52 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:17 +0800 Subject: [PATCH 37/74] mm: sharepool: delete redundant codes Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5O5RQ -------------------------------- Notice that in sp_unshare_uva(), for authentication check, comparison between current->tgid and spa->applier is well enough. There is no need to check current->mm against spa->mm. Other redundant cases: - find_spg_node_by_spg() will never return NULL in current use context; - spg_info_show() will not come across a group with id 0. Therefore, delete these redundant paths. Signed-off-by: Guo Mengqi --- include/linux/share_pool.h | 1 - mm/share_pool.c | 43 ++++---------------------------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 4860e4b00e57..ebf4b10a0965 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -44,7 +44,6 @@ #define SPG_ID_LOCAL_MAX 299999 #define SPG_FLAG_NON_DVPP (1 << 0) -#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP) #define MAX_DEVID 8 /* the max num of Da-vinci devices */ diff --git a/mm/share_pool.c b/mm/share_pool.c index bd6b3fbf381e..f2db4101eb09 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -801,11 +801,6 @@ static void spa_dec_usage(struct sp_area *spa) static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { - if (unlikely(!spg_node)) { - WARN(1, "null sp group node\n"); - return; - } - switch (type) { case SPA_TYPE_ALLOC: update_mem_usage_alloc(size, inc, is_hugepage, spg_node); @@ -839,10 +834,7 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type; spg_node = find_spg_node_by_spg(tsk->mm, spa->spg); - if (!spg_node) - pr_err("share pool: spg node not found!\n"); - else - update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); + update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); } static inline void check_interrupt_context(void) @@ -1088,11 +1080,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) return ERR_PTR(-ENOSPC); } - if (flag & ~SPG_FLAG_MASK) { - pr_err_ratelimited("invalid flag:%#lx\n", flag); - return ERR_PTR(-EINVAL); - } - spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -2744,10 +2731,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { spg_node = find_spg_node_by_spg(current->mm, spa->spg); - if (!spg_node) - pr_err("spg_node is null\n"); - else - update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); + update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; } @@ -3318,12 +3302,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_drop_area; } - if (!spa->mm) { - pr_err_ratelimited("unshare uva(to task) failed, none spa owner\n"); - ret = -EINVAL; - goto out_drop_area; - } - /* * current thread may be exiting in a multithread process * @@ -3337,13 +3315,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_clr_flag; } - if (spa->mm != mm) { - pr_err_ratelimited("unshare uva(to task) failed, spa not belong to the task\n"); - ret = -EINVAL; - mmput(mm); - goto out_drop_area; - } - down_write(&mm->mmap_lock); if (unlikely(mm->core_state)) { ret = 0; @@ -3981,10 +3952,7 @@ static int spg_info_show(int id, void *p, void *data) return 0; if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + seq_printf(seq, "Group %6d ", id); down_read(&spg->rw_lock); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", @@ -3995,10 +3963,7 @@ static int spg_info_show(int id, void *p, void *data) byte2kb(atomic64_read(&spg->instat.alloc_hsize))); up_read(&spg->rw_lock); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); + pr_info("Group %6d ", id); down_read(&spg->rw_lock); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", -- Gitee From 9c7eb464709c7efa00f77aed91dd3cbbca42d058 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:18 +0800 Subject: [PATCH 38/74] mm: sharepool: fix softlockup in high pressure use case. Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5ODCT -------------------------------- When there are a large number of groups in the system, or with a large number of processes in each group, "cat /proc/sharepool/proc_stat" will encounter softlockup before all prints finished. This is because there are too many loops in the callback function. Remove one of the loops to reduce time cost and add a cond_resched() to avoid this. root@buildroot:~/install# cat /proc/sharepool/proc_stat [ 1250.647469] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cat:309] [ 1250.648610] Modules linked in: sharepool_dev(OE) [ 1250.650795] CPU: 0 PID: 309 Comm: cat Tainted: G OE 5.10.0+ #43 [ 1250.651216] Hardware name: linux,dummy-virt (DT) [ 1250.651721] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) [ 1250.652426] pc : get_process_sp_res+0x40/0x90 [ 1250.652747] lr : proc_usage_by_group+0x158/0x218 ... [ 1250.657903] Call trace: [ 1250.658376] get_process_sp_res+0x40/0x90 [ 1250.658602] proc_usage_by_group+0x158/0x218 [ 1250.658838] idr_for_each+0x6c/0xf0 [ 1250.659027] proc_group_usage_show+0x104/0x120 [ 1250.659263] seq_read_iter+0xe0/0x498 [ 1250.659462] proc_reg_read_iter+0xa8/0xe0 [ 1250.659660] generic_file_splice_read+0xf0/0x1b0 [ 1250.659865] do_splice_to+0x7c/0xd0 [ 1250.660029] splice_direct_to_actor+0xe0/0x2a8 [ 1250.660353] do_splice_direct+0xa4/0xf8 [ 1250.660902] do_sendfile+0x1bc/0x420 [ 1250.661079] __arm64_sys_sendfile64+0x170/0x178 [ 1250.661298] el0_svc_common.constprop.0+0x88/0x268 [ 1250.661505] do_el0_svc+0x34/0xb8 [ 1250.661686] el0_svc+0x1c/0x28 [ 1250.661836] el0_sync_handler+0x8c/0xb0 [ 1250.662033] el0_sync+0x168/0x180 Signed-off-by: Guo Mengqi --- mm/share_pool.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index f2db4101eb09..8cc37699149f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4024,7 +4024,6 @@ static int proc_usage_by_group(int id, void *p, void *data) struct sp_group_master *master; int tgid; unsigned long anon, file, shmem, total_rss; - long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; down_read(&spg->rw_lock); list_for_each_entry(spg_node, &spg->procs, proc_node) { @@ -4039,26 +4038,20 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(master, &sp_res, &sp_res_nsize); - get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", + seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-9ld %-9ld %-9ld %-8ld %-7ld %-7ld ", get_spg_proc_alloc(spg_node), get_spg_proc_k2u(spg_node), get_sp_res_by_spg_proc(spg_node), - sp_res, non_sp_res, page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); + page2kb(shmem)); print_process_prot(seq, spg_node->prot); seq_putc(seq, '\n'); } up_read(&spg->rw_lock); + cond_resched(); return 0; } @@ -4069,9 +4062,9 @@ static int proc_group_usage_show(struct seq_file *seq, void *offset) spa_overview_show(seq); /* print the file header */ - seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); + seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-8s %-7s %-7s %-4s\n", + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", + "VIRT", "RES", "Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", "guard", "-", -- Gitee From ef36692be4a2af55367442bf3a9677ff5bb6db76 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:19 +0800 Subject: [PATCH 39/74] mm: sharepool: fix deadlock in spa_stat_of_mapping_show Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J -------------------------------- The mutex protecting spm_dvpp_list has an ABBA deadlock with spg->rw_lock. Try add a process to a sharepool group and cat /proc/sharepool/spa_stat at the same time will reproduce the problem. Remove spg->rw_lock to avoid this. [ 1101.013480]INFO: task test:3567 blocked for more than 30 seconds. [ 1101.014378] Tainted: G OE 5.10.0+ #45 [ 1101.015707]task:test state:D stack: 0 pid: 3567 [ 1101.016464]Call trace: [ 1101.016736] __switch_to+0xc0/0x128 [ 1101.017082] __schedule+0x3fc/0x898 [ 1101.017626] schedule+0x48/0xd8 [ 1101.017981] schedule_preempt_disabled+0x14/0x20 [ 1101.018519] __mutex_lock.isra.1+0x160/0x638 [ 1101.018899] __mutex_lock_slowpath+0x24/0x30 [ 1101.019291] mutex_lock+0x5c/0x68 [ 1101.019607] sp_mapping_create+0x118/0x1b0 [ 1101.019963] sp_init_group_master_locked.part.9+0x10c/0x288 [ 1101.020356] mg_sp_group_add_task.part.16+0x7dc/0xcd0 [ 1101.020750] mg_sp_group_add_task+0x54/0xd0 [ 1101.021120] dev_ioctl+0x360/0x1e20 [sharepool_dev] [ 1101.022171] __arm64_sys_ioctl+0xb0/0xe8 [ 1101.022695] el0_svc_common.constprop.0+0x88/0x268 [ 1101.023143] do_el0_svc+0x34/0xb8 [ 1101.023487] el0_svc+0x1c/0x28 [ 1101.023775] el0_sync_handler+0x8c/0xb0 [ 1101.024120] el0_sync+0x168/0x180 Signed-off-by: Guo Mengqi --- mm/share_pool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 8cc37699149f..6fc24591e9b0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3839,12 +3839,10 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock); - down_read(&spa->spg->rw_lock); if (spg_valid(spa->spg)) /* k2u to group */ seq_printf(seq, "%-10d ", spa->spg->id); else /* spg is dead */ seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock); seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, -- Gitee From aba8504316ce81543aa8652186a4d399862da294 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:20 +0800 Subject: [PATCH 40/74] mm: sharepool: fix deadlock in sp_check_mmap_addr Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J -------------------------------- Fix a deadlock indicated below: [ 171.669844] Chain exists of: [ 171.669844] &mm->mmap_lock --> sp_group_sem --> &spg->rw_lock [ 171.669844] [ 171.671469] Possible unsafe locking scenario: [ 171.671469] [ 171.672121] CPU0 CPU1 [ 171.672415] ---- ---- [ 171.672706] lock(&spg->rw_lock); [ 171.673114] lock(sp_group_sem); [ 171.673706] lock(&spg->rw_lock); [ 171.674208] lock(&mm->mmap_lock); [ 171.674863] [ 171.674863] *** DEADLOCK *** sharepool use lock in order: sp_group_sem --> &spg->rw_lock --> mm->mmap_lock However, in sp_check_mmap_addr(), when mm->mmap_lock is held, it requested sp_group_sem, which is: mm->mmap_lock --> sp_group_sem. This causes ABBA problem. This happens in: [ 171.642687] the existing dependency chain (in reverse order) is: [ 171.643745] [ 171.643745] -> #2 (&spg->rw_lock){++++}-{3:3}: [ 171.644639] __lock_acquire+0x6f4/0xc40 [ 171.645189] lock_acquire+0x2f0/0x3c8 [ 171.645631] down_read+0x64/0x2d8 [ 171.646075] proc_usage_by_group+0x50/0x258 (spg->rw_lock) [ 171.646542] idr_for_each+0x6c/0xf0 [ 171.647011] proc_group_usage_show+0x140/0x178 [ 171.647629] seq_read_iter+0xe4/0x498 [ 171.648217] proc_reg_read_iter+0xa8/0xe0 [ 171.648776] new_sync_read+0xfc/0x1a0 [ 171.649002] vfs_read+0x1ac/0x1c8 [ 171.649217] ksys_read+0x74/0xf8 [ 171.649596] __arm64_sys_read+0x24/0x30 [ 171.649934] el0_svc_common.constprop.0+0x8c/0x270 [ 171.650528] do_el0_svc+0x34/0xb8 [ 171.651069] el0_svc+0x1c/0x28 [ 171.651278] el0_sync_handler+0x8c/0xb0 [ 171.651636] el0_sync+0x168/0x180 [ 171.652118] [ 171.652118] -> #1 (sp_group_sem){++++}-{3:3}: [ 171.652692] __lock_acquire+0x6f4/0xc40 [ 171.653059] lock_acquire+0x2f0/0x3c8 [ 171.653303] down_read+0x64/0x2d8 [ 171.653704] mg_is_sharepool_addr+0x184/0x340 (&sp_group_sem) [ 171.654085] sp_check_mmap_addr+0x64/0x108 [ 171.654668] arch_get_unmapped_area_topdown+0x9c/0x528 [ 171.655370] thp_get_unmapped_area+0x54/0x68 [ 171.656170] get_unmapped_area+0x94/0x160 [ 171.656415] __do_mmap_mm+0xd4/0x540 [ 171.656629] do_mmap+0x98/0x648 [ 171.656838] vm_mmap_pgoff+0xc0/0x188 [ 171.657129] vm_mmap+0x6c/0x98 [ 171.657619] elf_map+0xe0/0x118 [ 171.657835] load_elf_binary+0x4ec/0xfd8 [ 171.658103] bprm_execve.part.9+0x3ec/0x840 [ 171.658448] bprm_execve+0x7c/0xb0 [ 171.658919] kernel_execve+0x18c/0x198 [ 171.659500] run_init_process+0xf0/0x108 [ 171.660073] try_to_run_init_process+0x20/0x58 [ 171.660558] kernel_init+0xcc/0x120 [ 171.660862] ret_from_fork+0x10/0x18 [ 171.661273] [ 171.661273] -> #0 (&mm->mmap_lock){++++}-{3:3}: [ 171.661885] check_prev_add+0xa4/0xbd8 [ 171.662229] validate_chain+0xf54/0x14b8 [ 171.662705] __lock_acquire+0x6f4/0xc40 [ 171.663310] lock_acquire+0x2f0/0x3c8 [ 171.663658] down_write+0x60/0x208 [ 171.664179] mg_sp_alloc+0x24c/0x1150 (mm->mmap_lock) [ 171.665245] dev_ioctl+0x1128/0x1fb8 [sharepool_dev] [ 171.665688] __arm64_sys_ioctl+0xb0/0xe8 [ 171.666250] el0_svc_common.constprop.0+0x8c/0x270 [ 171.667255] do_el0_svc+0x34/0xb8 [ 171.667806] el0_svc+0x1c/0x28 [ 171.668249] el0_sync_handler+0x8c/0xb0 [ 171.668661] el0_sync+0x168/0x180 Signed-off-by: Guo Mengqi --- mm/share_pool.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 6fc24591e9b0..28c18aef777d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,6 +73,9 @@ #define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */ +#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) + static int system_group_count; /* idr of all sp_groups */ @@ -502,7 +505,9 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0; free_master: + mutex_lock(&master_list_lock); list_del(&master->list_node); + mutex_unlock(&master_list_lock); mm->sp_group_master = NULL; kfree(master); @@ -3551,6 +3556,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier); +static bool is_sp_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3578,7 +3584,8 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) + || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) return false; ret = get_task(pid, &tsk); @@ -3622,34 +3629,19 @@ static bool is_sp_normal_addr(unsigned long addr) MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; } +/* + * | 16G host | 16G device | ... | | + * ^ + * | + * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 + * We only check the device regions. + */ static bool is_sp_dvpp_addr(unsigned long addr) { - int i; - struct mm_struct *mm; - struct sp_group_master *master; - struct sp_mapping *spm_dvpp; - - mm = current->mm; - if (!mm) + if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) return false; - down_read(&sp_group_sem); - master = mm->sp_group_master; - if (!master) { - up_read(&sp_group_sem); - return false; - } - - /* master->local and master->local->dvpp won't be NULL*/ - spm_dvpp = master->local->dvpp; - for (i = 0; i < MAX_DEVID; i++) { - if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { - up_read(&sp_group_sem); - return true; - } - } - up_read(&sp_group_sem); - return false; + return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; } /** -- Gitee From 05d86fb2e9a2b1e4d85312ef8f4dcb4d65305a63 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:21 +0800 Subject: [PATCH 41/74] mm/sharepool: Fix UAF reported by KASAN Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PD4P -------------------------------- [ 2058.802818][ T290] BUG: KASAN: use-after-free in get_process_sp_res+0x70/0x134 [ 2058.810194][ T290] Read of size 8 at addr ffff00088dc6ab28 by task test_debug_loop/290 [ 2058.820520][ T290] CPU: 5 PID: 290 Comm: test_debug_loop Tainted: G W OE 5.10.0+ #2 [ 2058.829377][ T290] Hardware name: EVB(EP) (DT) [ 2058.833982][ T290] Call trace: [ 2058.837217][ T290] dump_backtrace+0x0/0x30c [ 2058.841660][ T290] show_stack+0x20/0x30 [ 2058.845758][ T290] dump_stack+0x120/0x1b0 [ 2058.850028][ T290] print_address_description.constprop.0+0x2c/0x1fc [ 2058.856555][ T290] __kasan_report+0xfc/0x160 [ 2058.861086][ T290] kasan_report+0x44/0xb0 [ 2058.865356][ T290] __asan_load8+0x94/0xd0 [ 2058.869623][ T290] get_process_sp_res+0x70/0x134 [ 2058.874501][ T290] proc_usage_show+0x1ac/0x304 [ 2058.879208][ T290] seq_read_iter+0x254/0x750 [ 2058.883728][ T290] proc_reg_read_iter+0x100/0x140 [ 2058.888689][ T290] new_sync_read+0x1cc/0x2c0 [ 2058.893215][ T290] vfs_read+0x1f4/0x250 [ 2058.897304][ T290] ksys_read+0xcc/0x170 [ 2058.901399][ T290] __arm64_sys_read+0x4c/0x60 [ 2058.906016][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2058.911584][ T290] do_el0_svc+0x8c/0xb0 [ 2058.915677][ T290] el0_svc+0x20/0x30 [ 2058.919503][ T290] el0_sync_handler+0xb0/0xbc [ 2058.924114][ T290] el0_sync+0x180/0x1c0 [ 2058.928190][ T290] [ 2058.930444][ T290] Allocated by task 2176: [ 2058.934714][ T290] kasan_save_stack+0x28/0x60 [ 2058.939328][ T290] __kasan_kmalloc.constprop.0+0xc8/0xf0 [ 2058.944909][ T290] kasan_kmalloc+0x10/0x20 [ 2058.949268][ T290] kmem_cache_alloc_trace+0x128/0xabc [ 2058.954577][ T290] create_spg_node+0x58/0x214 [ 2058.959188][ T290] local_group_add_task+0x30/0x14c [ 2058.964231][ T290] init_local_group+0xd0/0x1a0 [ 2058.968936][ T290] sp_init_group_master_locked.part.0+0x19c/0x290 [ 2058.975298][ T290] mg_sp_group_add_task+0x73c/0xdb0 [ 2058.980456][ T290] dev_sp_add_group+0x124/0x2dc [sharepool_dev] [ 2058.986647][ T290] dev_ioctl+0x21c/0x2ec [sharepool_dev] [ 2058.992222][ T290] __arm64_sys_ioctl+0xd8/0x120 [ 2058.997010][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2059.002572][ T290] do_el0_svc+0x8c/0xb0 [ 2059.006662][ T290] el0_svc+0x20/0x30 [ 2059.010489][ T290] el0_sync_handler+0xb0/0xbc [ 2059.015101][ T290] el0_sync+0x180/0x1c0 [ 2059.019176][ T290] [ 2059.021427][ T290] Freed by task 4125: [ 2059.025343][ T290] kasan_save_stack+0x28/0x60 [ 2059.029949][ T290] kasan_set_track+0x28/0x40 [ 2059.034476][ T290] kasan_set_free_info+0x24/0x50 [ 2059.039347][ T290] __kasan_slab_free+0x104/0x1ac [ 2059.044227][ T290] kasan_slab_free+0x14/0x20 [ 2059.048744][ T290] kfree+0x164/0xb94 [ 2059.052576][ T290] sp_group_post_exit+0xf0/0x980 [ 2059.057448][ T290] mmput.part.0+0xb4/0x220 [ 2059.061790][ T290] mmput+0x2c/0x40 [ 2059.065450][ T290] exit_mm+0x27c/0x3a0 [ 2059.069450][ T290] do_exit+0x2a0/0x790 [ 2059.073448][ T290] do_group_exit+0x64/0x100 [ 2059.077884][ T290] get_signal+0x1fc/0x9fc [ 2059.082144][ T290] do_signal+0x110/0x2cc [ 2059.086320][ T290] do_notify_resume+0x158/0x2b0 [ 2059.091108][ T290] work_pending+0xc/0x6d4 [ 2059.095358][ T290] Signed-off-by: Wang Wensheng --- mm/share_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index 28c18aef777d..23756fe9cbfc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4346,6 +4346,7 @@ void sp_group_post_exit(struct mm_struct *mm) /* match with refcount inc in sp_group_add_task */ if (atomic_dec_and_test(&spg->use_count)) free_sp_group_locked(spg); + list_del(&spg_node->group_node); kfree(spg_node); } up_write(&sp_group_sem); -- Gitee From 5f6544eda81de951c8df1a73b1a4dc0c781626fc Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:22 +0800 Subject: [PATCH 42/74] mm/share pool: delete unnecessary judgment Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA2 -------------------------------- When a process is added to a group, mm->mm_users increases by one. When a process is deleted from a group, mm->mm_users decreases by one. It is not possible to reduce to 0 because this function is preceded by get_task_mm. Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 23756fe9cbfc..0f87eb6782c3 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1623,11 +1623,7 @@ int mg_sp_group_del_task(int pid, int spg_id) list_del(&spg_node->group_node); mm->sp_group_master->count--; kfree(spg_node); - if (atomic_sub_and_test(1, &mm->mm_users)) { - up_write(&sp_group_sem); - WARN(1, "Invalid user counting\n"); - return -EINVAL; - } + atomic_dec(&mm->mm_users); up_write(&sp_group_sem); -- Gitee From 638585ffe756c4b389c2b101bac6ed0c33fb3863 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:23 +0800 Subject: [PATCH 43/74] mm/share pool: Avoid UAF on spa Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA0 -------------------------------- The spa is used during the update_mem_usage. In this case, the spa has been released in the case of concurrency (mg_sp_unshare). Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 0f87eb6782c3..de4e59e3fbbc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2727,7 +2727,6 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un spa->kva = kva; kc.sp_flags = sp_flags; uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); - __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { @@ -2735,6 +2734,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; } + __sp_area_drop(spa); return uva; } @@ -2786,9 +2786,9 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, out: up_read(&spg->rw_lock); - __sp_area_drop(spa); if (!IS_ERR(uva)) sp_update_process_stat(current, true, spa); + __sp_area_drop(spa); return uva; } -- Gitee From 1973168b9215de8ba9273dd7e893c27841bcb5e1 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:24 +0800 Subject: [PATCH 44/74] mm/share pool: Check the maximum value of spg_id Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA4 -------------------------------- The maximum value of spg_id is checked to ensure that the value of spg_id is within the valid range: SPG_ID_DEFAULT or [SPG_ID_MIN SPG_ID_AUTO) Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index de4e59e3fbbc..e490af418a33 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2247,7 +2247,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -EINVAL; } - if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { + if (spg_id != SPG_ID_DEFAULT && (spg_id < SPG_ID_MIN || spg_id >= SPG_ID_AUTO)) { pr_err_ratelimited("allocation failed, invalid group id %d\n", spg_id); return -EINVAL; } -- Gitee From 386dd054273589d7307d3179141f06604bba9ad8 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:25 +0800 Subject: [PATCH 45/74] mm/share pool: Avoid UAF on mm Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA6 -------------------------------- Use get_task_mm to avoid the mm being released when the information in mm_struct is used. Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index e490af418a33..1c862ca41736 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3756,7 +3756,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; struct sp_group_master *master; struct sp_proc_stat *proc_stat; struct sp_group_node *spg_node; @@ -3766,17 +3766,15 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!sp_is_enabled()) return 0; + mm = get_task_mm(task); if (!mm) return 0; down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) { - up_read(&mm->mmap_lock); - up_read(&sp_group_sem); - return 0; - } + if (!master) + goto out; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; @@ -3808,8 +3806,11 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } + +out: up_read(&mm->mmap_lock); up_read(&sp_group_sem); + mmput(mm); return 0; } -- Gitee From 322af44adbcfac8fb10f6024f604532d8a4c3af1 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 18 Oct 2022 19:47:26 +0800 Subject: [PATCH 46/74] sharepool: Make the definitions of MMAP_SHARE_POOL_{START|16G_START} more readable Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q -------------------------------- "TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE" is puzzling. MMAP_SHARE_POOL_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE and MMAP_SHARE_POOL_16G_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE make the memory layout not unintuitive. Signed-off-by: Chen Jun Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 18 +++++++++++++----- mm/share_pool.c | 33 ++++++++++++++------------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index ebf4b10a0965..b5fa0d4d59e0 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -213,11 +213,19 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ #define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL -#define MMAP_SHARE_POOL_SIZE (MMAP_SHARE_POOL_NORMAL_SIZE + MMAP_SHARE_POOL_DVPP_SIZE) -/* align to 2M hugepage size, and MMAP_SHARE_POOL_TOP_16G_START should be align to 16G */ -#define MMAP_SHARE_POOL_END ((TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE) & ~((1 << 21) - 1)) -#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE) -#define MMAP_SHARE_POOL_16G_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE) +/* skip 8T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x80000000000UL +#define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) +#define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) +/* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ +#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) +#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START) + +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ + MMAP_SHARE_POOL_16G_SIZE * 64) #ifdef CONFIG_ASCEND_SHARE_POOL diff --git a/mm/share_pool.c b/mm/share_pool.c index 1c862ca41736..93ce238c5ae8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,9 +73,6 @@ #define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */ -#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL -#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) - static int system_group_count; /* idr of all sp_groups */ @@ -290,12 +287,12 @@ static void sp_mapping_range_init(struct sp_mapping *spm) for (i = 0; i < MAX_DEVID; i++) { if (spm->flag & SP_MAPPING_NORMAL) { - spm->start[i] = MMAP_SHARE_POOL_START; - spm->end[i] = MMAP_SHARE_POOL_16G_START; + spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; continue; } - spm->start[i] = MMAP_SHARE_POOL_16G_START + + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } @@ -1856,7 +1853,7 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, { struct rb_node *n; - if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) n = spg->normal->area_root.rb_node; else n = spg->dvpp->area_root.rb_node; @@ -1912,7 +1909,7 @@ static void sp_free_area(struct sp_area *spa) lockdep_assert_held(&sp_area_lock); - if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) spm = spa->spg->normal; else spm = spa->spg->dvpp; @@ -3552,7 +3549,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier); -static bool is_sp_dvpp_addr(unsigned long addr); +static bool is_sp_dynamic_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3581,7 +3578,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) - || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) + || !is_sp_dynamic_dvpp_addr(start) || !is_sp_dynamic_dvpp_addr(start + size)) return false; ret = get_task(pid, &tsk); @@ -3597,7 +3594,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) goto put_mm; spm = spg->dvpp; - default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) goto put_spg; @@ -3618,11 +3615,9 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) } EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); -static bool is_sp_normal_addr(unsigned long addr) +static bool is_sp_reserve_addr(unsigned long addr) { - return addr >= MMAP_SHARE_POOL_START && - addr < MMAP_SHARE_POOL_16G_START + - MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; + return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; } /* @@ -3632,12 +3627,12 @@ static bool is_sp_normal_addr(unsigned long addr) * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 * We only check the device regions. */ -static bool is_sp_dvpp_addr(unsigned long addr) +static bool is_sp_dynamic_dvpp_addr(unsigned long addr) { - if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) + if (addr < MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE || addr >= MMAP_SHARE_POOL_DYNAMIC_DVPP_END) return false; - return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; + return (addr - MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; } /** @@ -3649,7 +3644,7 @@ static bool is_sp_dvpp_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); + ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); -- Gitee From a3f0b2b7b45cc22c1bb552306a075ae7384ca81c Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 18 Oct 2022 19:47:27 +0800 Subject: [PATCH 47/74] sharepool: Rename sp_mapping.flag to sp_mapping.type Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q -------------------------------- Now, sp_mapping.flag is only used to distinguish sp_mapping types. So, 'type' are more suitable. Signed-off-by: Chen Jun --- mm/share_pool.c | 53 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 93ce238c5ae8..08790abbd0c8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,7 +151,7 @@ struct spg_proc_stat { * address space management */ struct sp_mapping { - unsigned long flag; + unsigned long type; atomic_t user; unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; @@ -263,12 +263,23 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, #define SP_MAPPING_DVPP 0x1 #define SP_MAPPING_NORMAL 0x2 + +static unsigned long sp_mapping_type(struct sp_mapping *spm) +{ + return spm->type; +} + +static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) +{ + spm->type = type; +} + static struct sp_mapping *sp_mapping_normal; static void sp_mapping_add_to_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_add_tail(&spm->spm_node, &spm_dvpp_list); mutex_unlock(&spm_list_lock); } @@ -276,7 +287,7 @@ static void sp_mapping_add_to_list(struct sp_mapping *spm) static void sp_mapping_remove_from_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spm->spm_node); mutex_unlock(&spm_list_lock); } @@ -286,19 +297,23 @@ static void sp_mapping_range_init(struct sp_mapping *spm) int i; for (i = 0; i < MAX_DEVID; i++) { - if (spm->flag & SP_MAPPING_NORMAL) { + switch (sp_mapping_type(spm)) { + case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; - spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; - continue; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; + break; + case SP_MAPPING_DVPP: + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; + break; + default: + pr_err("Invalid sp_mapping type [%lu]\n", sp_mapping_type(spm)); + break; } - - spm->start[i] = MMAP_SHARE_POOL_DVPP_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } } -static struct sp_mapping *sp_mapping_create(unsigned long flag) +static struct sp_mapping *sp_mapping_create(unsigned long type) { struct sp_mapping *spm; @@ -306,7 +321,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) if (!spm) return ERR_PTR(-ENOMEM); - spm->flag = flag; + sp_mapping_set_type(spm, type); sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; @@ -325,18 +340,26 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) { + + switch (sp_mapping_type(spm)) { + case SP_MAPPING_DVPP: spg->dvpp = spm; list_add_tail(&spg->mnode, &spm->group_head); - } else if (spm->flag & SP_MAPPING_NORMAL) + break; + case SP_MAPPING_NORMAL: spg->normal = spm; + break; + default: + break; + } } static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { if (!spm) return; - if (spm->flag & SP_MAPPING_DVPP) + + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); -- Gitee From 863257c39cd39f09f85ca1539530e3c692d6ba32 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 18 Oct 2022 19:47:28 +0800 Subject: [PATCH 48/74] sharepool: replace spg->{dvpp|normal} with spg->mapping[SP_MAPPING_{DVPP|NORMAL}] Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q -------------------------------- spg->dvpp and spg->normal can be combined into one array. Signed-off-by: Chen Jun --- mm/share_pool.c | 79 +++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 08790abbd0c8..3c970b090552 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -147,6 +147,13 @@ struct spg_proc_stat { atomic64_t k2u_size; }; +enum sp_mapping_type { + SP_MAPPING_START, + SP_MAPPING_DVPP = SP_MAPPING_START, + SP_MAPPING_NORMAL, + SP_MAPPING_END, +}; + /* * address space management */ @@ -208,8 +215,7 @@ struct sp_group { struct rw_semaphore rw_lock; /* list node for dvpp mapping */ struct list_head mnode; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + struct sp_mapping *mapping[SP_MAPPING_END]; }; /* a per-process(per mm) struct which manages a sp_group_node list */ @@ -261,9 +267,6 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, get_task_comm(stat->comm, current); } -#define SP_MAPPING_DVPP 0x1 -#define SP_MAPPING_NORMAL 0x2 - static unsigned long sp_mapping_type(struct sp_mapping *spm) { return spm->type; @@ -339,30 +342,29 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type = sp_mapping_type(spm); atomic_inc(&spm->user); - switch (sp_mapping_type(spm)) { - case SP_MAPPING_DVPP: - spg->dvpp = spm; + spg->mapping[type] = spm; + if (type == SP_MAPPING_DVPP) list_add_tail(&spg->mnode, &spm->group_head); - break; - case SP_MAPPING_NORMAL: - spg->normal = spm; - break; - default: - break; - } } static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type; + if (!spm) return; - if (sp_mapping_type(spm) == SP_MAPPING_DVPP) + type = sp_mapping_type(spm); + + if (type == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); + + spg->mapping[type] = NULL; } /* merge old mapping to new, and the old mapping would be destroyed */ @@ -375,7 +377,7 @@ static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { list_move_tail(&spg->mnode, &new->group_head); - spg->dvpp = new; + spg->mapping[SP_MAPPING_DVPP] = new; } atomic_add(atomic_read(&old->user), &new->user); @@ -409,8 +411,10 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - struct sp_group *local = master->local; + struct sp_mapping *local_dvpp_mapping, *spg_dvpp_mapping; + + local_dvpp_mapping = mm->sp_group_master->local->mapping[SP_MAPPING_DVPP]; + spg_dvpp_mapping = spg->mapping[SP_MAPPING_DVPP]; if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { /* @@ -419,15 +423,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) * This may change the address range for the task or group implicitly, * give a warn for it. */ - bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp); + bool is_conflict = !can_mappings_merge(local_dvpp_mapping, spg_dvpp_mapping); - if (is_mapping_empty(local->dvpp)) { - sp_mapping_merge(spg->dvpp, local->dvpp); + if (is_mapping_empty(local_dvpp_mapping)) { + sp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); - } else if (is_mapping_empty(spg->dvpp)) { - sp_mapping_merge(local->dvpp, spg->dvpp); + } else if (is_mapping_empty(spg_dvpp_mapping)) { + sp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); @@ -438,8 +442,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) } else { if (!(spg->flag & SPG_FLAG_NON_DVPP)) /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - if (!spg->normal) + sp_mapping_attach(spg, local_dvpp_mapping); + if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); } @@ -914,14 +918,19 @@ static void free_new_spg_id(bool new, int spg_id) static void free_sp_group_locked(struct sp_group *spg) { + int type; + fput(spg->file); fput(spg->file_hugetlb); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); - sp_mapping_detach(spg, spg->dvpp); - sp_mapping_detach(spg, spg->normal); + + for (type = SP_MAPPING_START; type < SP_MAPPING_END; type++) + sp_mapping_detach(spg, spg->mapping[type]); + if (!is_local_group(spg->id)) system_group_count--; + kfree(spg); WARN(system_group_count < 0, "unexpected group count\n"); } @@ -1746,9 +1755,9 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, } if (flags & SP_DVPP) - mapping = spg->dvpp; + mapping = spg->mapping[SP_MAPPING_DVPP]; else - mapping = spg->normal; + mapping = spg->mapping[SP_MAPPING_NORMAL]; if (!mapping) { pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); @@ -1877,9 +1886,9 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, struct rb_node *n; if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->normal->area_root.rb_node; + n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; else - n = spg->dvpp->area_root.rb_node; + n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node; while (n) { struct sp_area *spa; @@ -1933,9 +1942,9 @@ static void sp_free_area(struct sp_area *spa) lockdep_assert_held(&sp_area_lock); if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->normal; + spm = spa->spg->mapping[SP_MAPPING_NORMAL]; else - spm = spa->spg->dvpp; + spm = spa->spg->mapping[SP_MAPPING_DVPP]; if (spm->free_area_cache) { struct sp_area *cache; @@ -3616,7 +3625,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (IS_ERR(spg)) goto put_mm; - spm = spg->dvpp; + spm = spg->mapping[SP_MAPPING_DVPP]; default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) -- Gitee From 7f8d75ae0167bb54c7ce18aeb6d8daf6d17c6d13 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 18 Oct 2022 19:47:29 +0800 Subject: [PATCH 49/74] sharepool: Extract sp_mapping_find Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q -------------------------------- Extract code logic of obtaining sp_mapping by address into a function sp_mapping_find. Signed-off-by: Chen Jun --- mm/share_pool.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 3c970b090552..031956fbd844 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -450,6 +450,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; } +static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, + unsigned long addr) +{ + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) + return spg->mapping[SP_MAPPING_NORMAL]; + + return spg->mapping[SP_MAPPING_DVPP]; +} + static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); @@ -1883,13 +1892,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, static struct sp_area *__find_sp_area_locked(struct sp_group *spg, unsigned long addr) { - struct rb_node *n; - - if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; - else - n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node; - + struct sp_mapping *spm = sp_mapping_find(spg, addr); + struct rb_node *n = spm->area_root.rb_node; while (n) { struct sp_area *spa; @@ -1941,11 +1945,7 @@ static void sp_free_area(struct sp_area *spa) lockdep_assert_held(&sp_area_lock); - if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->mapping[SP_MAPPING_NORMAL]; - else - spm = spa->spg->mapping[SP_MAPPING_DVPP]; - + spm = sp_mapping_find(spa->spg, addr); if (spm->free_area_cache) { struct sp_area *cache; -- Gitee From 1e0ac6650e71f3b3f25599b27ed32d2d619a7886 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 18 Oct 2022 19:47:30 +0800 Subject: [PATCH 50/74] sharepool: Support alloc ro mapping Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q -------------------------------- 1. Split sharepool normal area(8T) to sharepool readonly area(64G) and sharepool normal area(8T - 64G) 2. User programs can not write to the address in sharepool readonly area. 3. Add SP_PROT_FOCUS for sp_alloc. 4. sp_alloc with SP_PROT_RO | SP_PROT_FOCUS returns the virtual address within sharepool readonly area. 5. Other user programs which add into task with write prot can not write the address in sharepool readonly area. Signed-off-by: Chen Jun --- include/linux/share_pool.h | 36 +++++++++++++++++++++----------- mm/share_pool.c | 42 +++++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b5fa0d4d59e0..1432aaa08087 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -17,6 +17,11 @@ #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) #define SP_PROT_RO (1 << 16) +/* + * SP_PROT_FOCUS should used with SP_PROT_RO, + * to alloc a memory within sharepool ro memory. + */ +#define SP_PROT_FOCUS (1 << 17) #define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -26,7 +31,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS) #define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | SP_PROT_RO | \ + SP_SPEC_NODE_ID | SP_PROT_RO | SP_PROT_FOCUS | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT)) @@ -113,19 +118,22 @@ struct sp_mapping { /* Processes in the same sp_group can share memory. * Memory layout for share pool: * - * |-------------------- 8T -------------------|---|------ 8T ------------| - * | Device 0 | Device 1 |...| | - * |----------------------------------------------------------------------| - * |------------- 16G -------------| 16G | | | - * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory | - * | sp | sp | | | | | - * |----------------------------------------------------------------------| + * |-------------------- 8T -------------------|---|---64G---|----- 8T-64G -----| + * | Device 0 | Device 1 |...| | | + * |-----------------------------------------------|---------|------------------| + * |------------- 16G -------------| 16G | | | | + * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp ro | sp normal memory | + * | sp | sp | | | | | | + * |----------------------------------------------------------------------------| * * The host SVM feature reserves 8T virtual memory by mmap, and due to the * restriction of DVPP, while SVM and share pool will both allocate memory * for DVPP, the memory have to be in the same 32G range. * - * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP. + * Share pool reserves 16T memory, 8T-64G for normal uses, 64G for ro memory + * and 8T for DVPP. + * Within this 64G ro memory, user application will never have write permission + * to this memory address. * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to * tell us which 16G memory range is reserved for share pool . * @@ -207,8 +215,10 @@ struct sp_walk_data { #define MMAP_TOP_4G_SIZE 0x100000000UL -/* 8T size */ -#define MMAP_SHARE_POOL_NORMAL_SIZE 0x80000000000UL +/* 8T - 64G size */ +#define MMAP_SHARE_POOL_NORMAL_SIZE 0x7F000000000UL +/* 64G */ +#define MMAP_SHARE_POOL_RO_SIZE 0x1000000000UL /* 8T size*/ #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ @@ -219,7 +229,9 @@ struct sp_walk_data { #define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) /* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ #define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) -#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_START (MMAP_SHARE_POOL_RO_END - MMAP_SHARE_POOL_RO_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_RO_START) #define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) #define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START) diff --git a/mm/share_pool.c b/mm/share_pool.c index 031956fbd844..7b78a75f1bf7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,6 +151,7 @@ enum sp_mapping_type { SP_MAPPING_START, SP_MAPPING_DVPP = SP_MAPPING_START, SP_MAPPING_NORMAL, + SP_MAPPING_RO, SP_MAPPING_END, }; @@ -278,6 +279,7 @@ static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) } static struct sp_mapping *sp_mapping_normal; +static struct sp_mapping *sp_mapping_ro; static void sp_mapping_add_to_list(struct sp_mapping *spm) { @@ -301,6 +303,10 @@ static void sp_mapping_range_init(struct sp_mapping *spm) for (i = 0; i < MAX_DEVID; i++) { switch (sp_mapping_type(spm)) { + case SP_MAPPING_RO: + spm->start[i] = MMAP_SHARE_POOL_RO_START; + spm->end[i] = MMAP_SHARE_POOL_RO_END; + break; case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; @@ -445,6 +451,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) sp_mapping_attach(spg, local_dvpp_mapping); if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); + if (!spg->mapping[SP_MAPPING_RO]) + sp_mapping_attach(spg, sp_mapping_ro); } return 0; @@ -456,6 +464,9 @@ static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) return spg->mapping[SP_MAPPING_NORMAL]; + if (addr >= MMAP_SHARE_POOL_RO_START && addr < MMAP_SHARE_POOL_RO_END) + return spg->mapping[SP_MAPPING_RO]; + return spg->mapping[SP_MAPPING_DVPP]; } @@ -491,6 +502,7 @@ static int init_local_group(struct mm_struct *mm) } sp_mapping_attach(master->local, spm); sp_mapping_attach(master->local, sp_mapping_normal); + sp_mapping_attach(master->local, sp_mapping_ro); ret = local_group_add_task(mm, spg); if (ret < 0) @@ -1485,6 +1497,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) unsigned long populate = 0; struct file *file = spa_file(spa); unsigned long addr; + unsigned long __prot = prot; + + if ((spa->flags & (SP_PROT_RO | SP_PROT_FOCUS)) == (SP_PROT_RO | SP_PROT_FOCUS)) + __prot &= ~PROT_WRITE; __sp_area_drop_locked(prev); prev = spa; @@ -1497,7 +1513,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) spin_unlock(&sp_area_lock); if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot, NULL); + addr = sp_remap_kva_to_vma(spa->kva, spa, mm, __prot, NULL); if (IS_ERR_VALUE(addr)) pr_warn("add group remap k2u failed %ld\n", addr); @@ -1515,7 +1531,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; } - addr = sp_mmap(mm, file, spa, &populate, prot, NULL); + addr = sp_mmap(mm, file, spa, &populate, __prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -1763,7 +1779,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); } - if (flags & SP_DVPP) + if (flags & SP_PROT_FOCUS) { + if ((flags & (SP_DVPP | SP_PROT_RO)) != SP_PROT_RO) { + pr_err("invalid sp_flags [%lx]\n", flags); + return ERR_PTR(-EINVAL); + } + mapping = spg->mapping[SP_MAPPING_RO]; + } else if (flags & SP_DVPP) mapping = spg->mapping[SP_MAPPING_DVPP]; else mapping = spg->mapping[SP_MAPPING_NORMAL]; @@ -3894,6 +3916,11 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp spin_unlock(&sp_area_lock); } +static void spa_ro_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_ro); +} + static void spa_normal_stat_show(struct seq_file *seq) { spa_stat_of_mapping_show(seq, sp_mapping_normal); @@ -4024,6 +4051,7 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); + spa_ro_stat_show(seq); spa_normal_stat_show(seq); spa_dvpp_stat_show(seq); return 0; @@ -4403,9 +4431,17 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user); + sp_mapping_ro = sp_mapping_create(SP_MAPPING_RO); + if (IS_ERR(sp_mapping_ro)) + goto free_normal; + atomic_inc(&sp_mapping_ro->user); + proc_sharepool_init(); return 0; + +free_normal: + kfree(sp_mapping_normal); fail: pr_err("Ascend share pool initialization failed\n"); static_branch_disable(&share_pool_enabled_key); -- Gitee From 59c56626558bdef4ee997cb1d081aef8de7bda1c Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:31 +0800 Subject: [PATCH 51/74] mm/sharepool: bugfix for 2M U2K Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX -------------------------------- We could determine if a userspace map is huge-mapped after walking its pagetable. So the uva_align should be calculated again after walking the pagetable if it is huge-mapped. Signed-off-by: Zhou Guanghui --- mm/share_pool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index 7b78a75f1bf7..1c0b2a0a6823 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3189,6 +3189,9 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk_data->pages = NULL; } + if (sp_walk_data->is_hugepage) + sp_walk_data->uva_aligned = ALIGN_DOWN(uva, PMD_SIZE); + return ret; } -- Gitee From bdf99bd074ec48ae06ba7bccabebb3baa15fb0e6 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:32 +0800 Subject: [PATCH 52/74] mm: sharepool: delete unused codes Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QETC -------------------------------- sp_make_share_k2u only supports vmalloc address now. Therefore, delete a backup handle case. Also master is guaranteed not be freed until master->node_list is emptied. Signed-off-by: Guo Mengqi --- mm/share_pool.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 1c0b2a0a6823..32678b3e2175 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2658,12 +2658,11 @@ static int is_vmap_hugepage(unsigned long addr) static unsigned long __sp_remap_get_pfn(unsigned long kva) { - unsigned long pfn; + unsigned long pfn = -EINVAL; + /* sp_make_share_k2u only support vmalloc address */ if (is_vmalloc_addr((void *)kva)) pfn = vmalloc_to_pfn((void *)kva); - else - pfn = virt_to_pfn(kva); return pfn; } @@ -4074,11 +4073,6 @@ static int proc_usage_by_group(int id, void *p, void *data) list_for_each_entry(spg_node, &spg->procs, proc_node) { master = spg_node->master; - if (!master) { - pr_info("master is NULL! process %d, group %d\n", - spg_node->instat.tgid, id); - continue; - } mm = master->mm; tgid = master->instat.tgid; -- Gitee From 006910984913dd0bbbab097488706ed6d565e450 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:33 +0800 Subject: [PATCH 53/74] mm: sharepool: fix potential AA deadlock Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5R0X9 -------------------------------- Fix a AA deadlock caused by nested lock in mg_sp_group_add_task(). Deadlock path: mg_sp_group_add_task() down_write(sp_group_sem) find_or_alloc_sp_group() !spg_valid() sp_group_drop() free_sp_group() -> down_write(sp_group_sem) ---> AA deadlock Signed-off-by: Guo Mengqi --- mm/share_pool.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 32678b3e2175..55c3eb6ff9b0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -963,6 +963,14 @@ static void free_sp_group(struct sp_group *spg) up_write(&sp_group_sem); } +static void sp_group_drop_locked(struct sp_group *spg) +{ + lockdep_assert_held_write(&sp_group_sem); + + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -1201,7 +1209,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_drop_locked(spg); return ERR_PTR(-ENODEV); } up_read(&spg->rw_lock); -- Gitee From 0a8e01273b539e9f066fe5ce4d2d495e56c9437a Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:34 +0800 Subject: [PATCH 54/74] mm: sharepool: check size=0 in mg_sp_make_share_k2u() Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QQPG -------------------------------- Add a size-0-check in mg_sp_make_share_k2u() to avoid passing 0-size spa to __insert_sp_area(). Signed-off-by: Guo Mengqi --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index 55c3eb6ff9b0..f7258b27f056 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2871,6 +2871,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned; + if (!size) { + pr_err_ratelimited("k2u input size is 0.\n"); + return -EINVAL; + } + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; -- Gitee From 122e580fa45df6b7fb665be53d7f36306fc92e86 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:35 +0800 Subject: [PATCH 55/74] mm: sharepool: fix hugepage_rsvd count increase error Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5RO2H -------------------------------- When nr_hugepages is configured, sharepool allocates hugepages first from hugetlb pool, then from buddy system if the pool had been used up. Current page release function treat the buddy system hugepages as hugetlb pages, which caused HugePages_Rsvd to increase improperly. Add a check in page release function: if the page is temporary, do not call hugetlb_unreserve_pages. Signed-off-by: Guo Mengqi --- fs/hugetlbfs/inode.c | 19 +++++++++++++------ mm/share_pool.c | 3 +-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6f2943465bff..d5b9b267005d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -596,11 +596,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, */ VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); - freed++; - if (!truncate_op) { - if (unlikely(hugetlb_unreserve_pages(inode, - index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); + /* + * if the page is from buddy system, do not add to freed. + * because freed is used for hugetlbfs reservation accounting. + */ + if (!HPageTemporary(page)) { + freed++; + if (!truncate_op) { + if (unlikely(hugetlb_unreserve_pages(inode, + index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + } } unlock_page(page); @@ -1053,7 +1059,8 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, pgoff_t index = page->index; remove_huge_page(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + if (!HPageTemporary(page) && + unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) hugetlb_fix_reserve_counts(inode); return 0; diff --git a/mm/share_pool.c b/mm/share_pool.c index f7258b27f056..8021dc47c1f1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4225,8 +4225,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = alloc_huge_page_nodemask(hstate_file(vma->vm_file), - node_id, NULL, GFP_KERNEL); + page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); if (!page) page = ERR_PTR(-ENOMEM); } -- Gitee From dcaa36a827bd27e298390f88f78113ba701761b7 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:36 +0800 Subject: [PATCH 56/74] mm/sharepool: Rebind the numa node when fallback to normal pages When we allocate memory using SP_HUGEPAGE, we would try normal pages when there was no enough hugepages. The specified numa node information would get lost when we fallback to normal pages. The result is that we could allocate memory from other numa node than what we have specified. The soultion is to rebind the node before retrying. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 8021dc47c1f1..bfed3ab4fe7f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2622,8 +2622,15 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) } ret = sp_alloc_mmap_populate(spa, &ac); - if (ret && ac.state == ALLOC_RETRY) + if (ret && ac.state == ALLOC_RETRY) { + /* + * The mempolicy for shared memory is located at backend file, which varies + * between normal pages and huge pages. So we should set the mbind policy again + * when we retry using normal pages. + */ + ac.have_mbind = false; goto try_again; + } out: sp_alloc_finish(ret, spa, &ac); -- Gitee From 4a9e9f161a7d799d538bca9f320a559721465be5 Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:37 +0800 Subject: [PATCH 57/74] Fix sharepool hugepage cgroup uncount error. If current->flag is set as PF_MEMALLOC, memcgroup will not check current's allocation against memory use limit, which cause system run out of memory. According to https://lkml.indiana.edu/hypermail/linux/kernel/0911.2/00576.html, PF_MEMALLOC shall only be used when more memory are sure to be freed as a result of this allocation. Do not use PF_MEMALLOC, rather, remove __GFP_RECLAIM from gfp_mask to ensure no reclaim. Signed-off-by: Guo Mengqi --- include/linux/hugetlb.h | 6 ++++-- mm/hugetlb.c | 3 +++ mm/share_pool.c | 27 +++------------------------ 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 634630ebc8a7..2537c1269a5b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -621,9 +621,11 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, #define HUGETLB_ALLOC_NONE 0x00 #define HUGETLB_ALLOC_NORMAL 0x01 /* normal hugepage */ #define HUGETLB_ALLOC_BUDDY 0x02 /* buddy hugepage */ -#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ +#define HUGETLB_ALLOC_NORECLAIM 0x04 /* no reclaim */ +#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ HUGETLB_ALLOC_NORMAL | \ - HUGETLB_ALLOC_BUDDY) + HUGETLB_ALLOC_BUDDY | \ + HUGETLB_ALLOC_NORECLAIM) const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid, int flag); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 34f3dfba5e82..acdc56e593af 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6176,6 +6176,9 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) if (enable_charge_mighp) gfp_mask |= __GFP_ACCOUNT; + if (flag & HUGETLB_ALLOC_NORECLAIM) + gfp_mask &= ~__GFP_RECLAIM; + if (flag & HUGETLB_ALLOC_NORMAL) page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid); else if (flag & HUGETLB_ALLOC_BUDDY) diff --git a/mm/share_pool.c b/mm/share_pool.c index bfed3ab4fe7f..ba3e32da6c0d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2451,35 +2451,13 @@ static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, struct sp_alloc_context *ac) { - int ret = 0; - unsigned long sp_addr = spa->va_start; - unsigned int noreclaim_flag = 0; - - /* - * The direct reclaim and compact may take a long - * time. As a result, sp mutex will be hold for too - * long time to casue the hung task problem. In this - * case, set the PF_MEMALLOC flag to prevent the - * direct reclaim and compact from being executed. - * Since direct reclaim and compact are not performed - * when the fragmentation is severe or the memory is - * insufficient, 2MB continuous physical pages fail - * to be allocated. This situation is allowed. - */ - if (spa->is_hugepage) - noreclaim_flag = memalloc_noreclaim_save(); - /* * We are not ignoring errors, so if we fail to allocate * physical memory we just return failure, so we won't encounter * page fault later on, and more importantly sp_make_share_u2k() * depends on this feature (and MAP_LOCKED) to work correctly. */ - ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) - memalloc_noreclaim_restore(noreclaim_flag); - - return ret; + return do_mm_populate(mm, spa->va_start, ac->populate, 0); } static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, @@ -4232,7 +4210,8 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); + page = hugetlb_alloc_hugepage(node_id, + HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); if (!page) page = ERR_PTR(-ENOMEM); } -- Gitee From 4defb550cc911906ba14fb60bc344ec217b6724b Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:38 +0800 Subject: [PATCH 58/74] Revert "[Huawei] mm: sharepool: remove deprecated interfaces" This reverts commit 730bfcab402f846d64906d3d07fcbac9e1a9239e. --- include/linux/share_pool.h | 90 ++++++++++++++++++++++- mm/share_pool.c | 144 ++++++++++++++++++++++++++++++++----- 2 files changed, 214 insertions(+), 20 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 1432aaa08087..406b59aa76ec 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -250,31 +250,53 @@ static inline void sp_init_mm(struct mm_struct *mm) * Those interfaces are exported for modules */ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); +extern int sp_group_add_task(int pid, int spg_id); + extern int mg_sp_group_del_task(int pid, int spg_id); +extern int sp_group_del_task(int pid, int spg_id); + extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); +extern int sp_group_id_by_pid(int pid); + +extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); +extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); + +extern int sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id); +extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id); extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); + +extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); -extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id); +extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); + +extern int sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data); extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); +extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb); +extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); +extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr); +extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void); extern void sp_area_drop(struct vm_area_struct *vma); @@ -326,11 +348,21 @@ static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EPERM; } +static inline int sp_group_add_task(int pid, int spg_id) +{ + return -EPERM; +} + static inline int mg_sp_group_del_task(int pid, int spg_id) { return -EPERM; } +static inline int sp_group_del_task(int pid, int spg_id) +{ + return -EPERM; +} + static inline int sp_group_exit(struct mm_struct *mm) { return 0; @@ -345,38 +377,74 @@ static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) return -EPERM; } +static inline int sp_group_id_by_pid(int pid) +{ + return -EPERM; +} + static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return -EPERM; } +static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id) +{ + return NULL; +} + static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { return NULL; } +static inline int sp_free(unsigned long addr, int id) +{ + return -EPERM; +} + static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } +static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + return NULL; +} + static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { return NULL; } +static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return NULL; +} + static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { return NULL; } +static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +{ + return -EPERM; +} + static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } +static inline int sp_id_of_current(void) +{ + return -EPERM; +} + static inline int mg_sp_id_of_current(void) { return -EPERM; @@ -390,12 +458,22 @@ static inline void sp_area_drop(struct vm_area_struct *vma) { } +static inline int sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return 0; +} + static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; } +static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ +} + static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { } @@ -410,11 +488,21 @@ static inline int sp_unregister_notifier(struct notifier_block *nb) return -EPERM; } +static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return false; +} + static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { return false; } +static inline bool is_sharepool_addr(unsigned long addr) +{ + return false; +} + static inline bool mg_is_sharepool_addr(unsigned long addr) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index ba3e32da6c0d..e787ececc355 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1052,6 +1052,38 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; } +/** + * sp_group_id_by_pid() - Get the sp_group ID of a process. + * @pid: pid of target process. + * + * Return: + * 0 the sp_group ID. + * -ENODEV target process doesn't belong to any sp_group. + */ +int sp_group_id_by_pid(int pid) +{ + struct sp_group *spg; + int spg_id = -ENODEV; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + check_interrupt_context(); + + spg = __sp_find_spg(pid, SPG_ID_DEFAULT); + if (!spg) + return -ENODEV; + + down_read(&spg->rw_lock); + if (spg_valid(spg)) + spg_id = spg->id; + up_read(&spg->rw_lock); + + sp_group_drop(spg); + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_group_id_by_pid); + /** * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. @@ -1600,6 +1632,12 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_add_task); +int sp_group_add_task(int pid, int spg_id) +{ + return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_add_task); + /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1699,7 +1737,13 @@ int mg_sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_del_task); -int mg_sp_id_of_current(void) +int sp_group_del_task(int pid, int spg_id) +{ + return mg_sp_group_del_task(pid, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_del_task); + +int sp_id_of_current(void) { int ret, spg_id; struct sp_group_master *master; @@ -1731,6 +1775,12 @@ int mg_sp_id_of_current(void) return spg_id; } +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); +} EXPORT_SYMBOL_GPL(mg_sp_id_of_current); /* the caller must hold sp_area_lock */ @@ -2199,7 +2249,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) } /** - * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). + * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2208,7 +2258,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int mg_sp_free(unsigned long addr, int id) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { @@ -2239,6 +2289,12 @@ int mg_sp_free(unsigned long addr, int id) out: return ret; } +EXPORT_SYMBOL_GPL(sp_free); + +int mg_sp_free(unsigned long addr, int id) +{ + return sp_free(addr, id); +} EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ @@ -2565,7 +2621,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, } /** - * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. * @sp_flags: how to allocate the memory. * @spg_id: the share group that the memory is allocated to. @@ -2576,7 +2632,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, * * if succeed, return the starting address of the shared memory. * * if fail, return the pointer of -errno. */ -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_area *spa = NULL; int ret = 0; @@ -2617,6 +2673,12 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } +EXPORT_SYMBOL_GPL(sp_alloc); + +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return sp_alloc(size, sp_flags, spg_id); +} EXPORT_SYMBOL_GPL(mg_sp_alloc); /** @@ -2917,7 +2979,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) } /** - * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. @@ -2933,7 +2995,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, +void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; @@ -2971,6 +3033,13 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, out: return sp_k2u_finish(uva, &kc); } +EXPORT_SYMBOL_GPL(sp_make_share_k2u); + +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); +} EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -3209,7 +3278,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) } /** - * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. + * sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory * @pid: the pid of the specified process(Not currently in use) @@ -3218,7 +3287,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * * if success, return the starting kernel address of the shared memory. * * if failed, return the pointer of -errno. */ -void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; struct mm_struct *mm = current->mm; @@ -3277,6 +3346,12 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) kvfree(sp_walk_data.pages); return p; } +EXPORT_SYMBOL_GPL(sp_make_share_u2k); + +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return sp_make_share_u2k(uva, size, pid); +} EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); /* @@ -3483,7 +3558,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) } /** - * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling + * sp_unshare() - Unshare the kernel or user memory which shared by calling * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory @@ -3492,7 +3567,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) +int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0; @@ -3518,10 +3593,16 @@ int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) return ret; } +EXPORT_SYMBOL_GPL(sp_unshare); + +int mg_sp_unshare(unsigned long va, unsigned long size, int id) +{ + return sp_unshare(va, size, 0, id); +} EXPORT_SYMBOL_GPL(mg_sp_unshare); /** - * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. + * sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. * @size: the size of user memory. * @tsk: task struct of the target task. @@ -3532,7 +3613,7 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare); * When return 0, sp_walk_data describing [uva, uva+size) can be used. * When return -errno, information in sp_walk_data is useless. */ -int mg_sp_walk_page_range(unsigned long uva, unsigned long size, +int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { struct mm_struct *mm; @@ -3571,13 +3652,20 @@ int mg_sp_walk_page_range(unsigned long uva, unsigned long size, return ret; } +EXPORT_SYMBOL_GPL(sp_walk_page_range); + +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return sp_walk_page_range(uva, size, tsk, sp_walk_data); +} EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); /** - * mg_sp_walk_page_free() - Free the sp_walk_data structure. + * sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. */ -void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { if (!sp_is_enabled()) return; @@ -3589,6 +3677,12 @@ void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) __sp_walk_page_free(sp_walk_data); } +EXPORT_SYMBOL_GPL(sp_walk_page_free); + +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ + sp_walk_page_free(sp_walk_data); +} EXPORT_SYMBOL_GPL(mg_sp_walk_page_free); int sp_register_notifier(struct notifier_block *nb) @@ -3605,7 +3699,7 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); static bool is_sp_dynamic_dvpp_addr(unsigned long addr); /** - * mg_sp_config_dvpp_range() - User can config the share pool start address + * sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. * @start: the value of share pool start * @size: the value of share pool @@ -3616,7 +3710,7 @@ static bool is_sp_dynamic_dvpp_addr(unsigned long addr); * Return false if parameter invalid or has been set up. * This functuon has no concurrent problem. */ -bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { int ret; bool err = false; @@ -3667,6 +3761,12 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) return err; } +EXPORT_SYMBOL_GPL(sp_config_dvpp_range); + +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return sp_config_dvpp_range(start, size, device_id, pid); +} EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); static bool is_sp_reserve_addr(unsigned long addr) @@ -3690,16 +3790,22 @@ static bool is_sp_dynamic_dvpp_addr(unsigned long addr) } /** - * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. + * is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. * * Return true if addr belongs to share pool, or false vice versa. */ -bool mg_is_sharepool_addr(unsigned long addr) +bool is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); } +EXPORT_SYMBOL_GPL(is_sharepool_addr); + +bool mg_is_sharepool_addr(unsigned long addr) +{ + return is_sharepool_addr(addr); +} EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); int sp_node_id(struct vm_area_struct *vma) -- Gitee From 0e21cd532d2f14bed42918e6a5c08e7d842cb05e Mon Sep 17 00:00:00 2001 From: Guo Mengqi Date: Tue, 18 Oct 2022 19:47:39 +0800 Subject: [PATCH 59/74] Revert "iommu: handle page response timeout" hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5EOOG Reference: https://gitee.com/openeuler/kernel/commit/496cbac7e54d80026325e5648a78f93aa9b0d82f -------------------------------- This reverts commit da76349ca8776aa7f8b186010005fb563fb163bb. However, the iommu_fault_param and iommu_fault_event changes are reserved to avoid KABI change. Signed-off-by: Guo Mengqi Reviewed-by: Weilong Chen Signed-off-by: Zheng Zengkai --- drivers/iommu/iommu.c | 55 ------------------------------------------- include/linux/iommu.h | 4 ++-- 2 files changed, 2 insertions(+), 57 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9116c93945d0..97953fa27630 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1084,39 +1084,6 @@ int iommu_group_unregister_notifier(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); -static void iommu_dev_fault_timer_fn(struct timer_list *t) -{ - struct iommu_fault_param *fparam = from_timer(fparam, t, timer); - struct iommu_fault_event *evt; - struct iommu_fault_page_request *prm; - - u64 now; - - now = get_jiffies_64(); - - /* The goal is to ensure driver or guest page fault handler(via vfio) - * send page response on time. Otherwise, limited queue resources - * may be occupied by some irresponsive guests or drivers. - * When per device pending fault list is not empty, we periodically checks - * if any anticipated page response time has expired. - * - * TODO: - * We could do the following if response time expires: - * 1. send page response code FAILURE to all pending PRQ - * 2. inform device driver or vfio - * 3. drain in-flight page requests and responses for this device - * 4. clear pending fault list such that driver can unregister fault - * handler(otherwise blocked when pending faults are present). - */ - list_for_each_entry(evt, &fparam->faults, list) { - prm = &evt->fault.prm; - if (time_after64(now, evt->expire)) - pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n", - prm->pasid, prm->grpid, evt->expire, now); - } - mod_timer(t, now + prq_timeout); -} - /** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device @@ -1164,9 +1131,6 @@ int iommu_register_device_fault_handler(struct device *dev, mutex_init(¶m->fault_param->lock); INIT_LIST_HEAD(¶m->fault_param->faults); - if (prq_timeout) - timer_setup(¶m->fault_param->timer, iommu_dev_fault_timer_fn, - TIMER_DEFERRABLE); done_unlock: mutex_unlock(¶m->lock); @@ -1306,9 +1270,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) struct dev_iommu *param = dev->iommu; struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_param *fparam; - struct timer_list *tmr; int ret = 0; - u64 exp; if (!param || !evt || WARN_ON_ONCE(!iommu_fault_valid(&evt->fault))) return -EINVAL; @@ -1329,17 +1291,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ret = -ENOMEM; goto done_unlock; } - /* Keep track of response expiration time */ - exp = get_jiffies_64() + prq_timeout; - evt_pending->expire = exp; mutex_lock(&fparam->lock); - if (list_empty(&fparam->faults)) { - /* First pending event, start timer */ - tmr = &fparam->timer; - WARN_ON(timer_pending(tmr)); - mod_timer(tmr, exp); - } - list_add_tail(&evt_pending->list, &fparam->faults); mutex_unlock(&fparam->lock); } @@ -1417,13 +1369,6 @@ int iommu_page_response(struct device *dev, break; } - /* stop response timer if no more pending request */ - if (list_empty(¶m->fault_param->faults) && - timer_pending(¶m->fault_param->timer)) { - pr_debug("no pending PRQ, stop timer\n"); - del_timer(¶m->fault_param->timer); - } - done_unlock: mutex_unlock(¶m->fault_param->lock); return ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8baf5ed66a84..092384b71ab2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -393,7 +393,7 @@ struct iommu_device { struct iommu_fault_event { struct iommu_fault fault; struct list_head list; - u64 expire; + _KABI_DEPRECATE(u64, expire); }; /** @@ -408,7 +408,7 @@ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; struct list_head faults; - struct timer_list timer; + _KABI_DEPRECATE(struct timer_list, timer); struct mutex lock; }; -- Gitee From 3cc2bec7c24cde3224a550f82eff406f2180cb58 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 18 Oct 2022 19:47:40 +0800 Subject: [PATCH 60/74] ascend: export interfaces required by ascend drivers Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX -------------------------------- Export oom_type_notifier_call map_kernel_range and __get_vm_area_caller for ascend drivers. Export pm_autosleep_set_state and __vmalloc_node_range. Export alloc_workqueue_attrs, free_workqueue_attrs and apply_workqueue_attrs. Signed-off-by: Yuan Can --- kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 +++ mm/oom_kill.c | 1 + mm/vmalloc.c | 3 +++ 4 files changed, 8 insertions(+) diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 9af5a50d3489..6aee5077fbfa 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -113,6 +113,7 @@ int pm_autosleep_set_state(suspend_state_t state) mutex_unlock(&autosleep_lock); return 0; } +EXPORT_SYMBOL_GPL(pm_autosleep_set_state); int __init pm_autosleep_init(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6bef482a152b..0725f94c2439 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3352,6 +3352,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) kfree(attrs); } } +EXPORT_SYMBOL_GPL(free_workqueue_attrs); /** * alloc_workqueue_attrs - allocate a workqueue_attrs @@ -3377,6 +3378,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) free_workqueue_attrs(attrs); return NULL; } +EXPORT_SYMBOL_GPL(alloc_workqueue_attrs); static void copy_workqueue_attrs(struct workqueue_attrs *to, const struct workqueue_attrs *from) @@ -4091,6 +4093,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, return ret; } +EXPORT_SYMBOL_GPL(apply_workqueue_attrs); /** * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ffbe8fe2bbf6..dd2b4f890403 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1119,6 +1119,7 @@ int oom_type_notifier_call(unsigned int type, struct oom_control *oc) return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); } +EXPORT_SYMBOL_GPL(oom_type_notifier_call); #endif /** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index dadbea29241d..d7a68eb0db42 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -643,6 +643,7 @@ int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, flush_cache_vmap(start, start + size); return ret; } +EXPORT_SYMBOL(map_kernel_range); int is_vmalloc_or_module_addr(const void *x) { @@ -2460,6 +2461,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, NUMA_NO_NODE, GFP_KERNEL, caller); } +EXPORT_SYMBOL(__get_vm_area_caller); /** * get_vm_area - reserve a contiguous kernel virtual area @@ -3058,6 +3060,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; } +EXPORT_SYMBOL_GPL(__vmalloc_node_range); /** * __vmalloc_node - allocate virtually contiguous memory -- Gitee From 10129e73d8ad631d48ecca4406403a4a8467edef Mon Sep 17 00:00:00 2001 From: Xu Qiang Date: Tue, 18 Oct 2022 19:47:41 +0800 Subject: [PATCH 61/74] irq-gic-v3: Fix too large cpu_count Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX -------------------------------- Fix that value of CPU is too large in its_inc_lpi_count. Signed-off-by: Xu Qiang --- drivers/irqchip/irq-gic-v3-its.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6b46cfdcb402..45d75bccb7e6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1535,6 +1535,11 @@ static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu) static void its_inc_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1543,6 +1548,11 @@ static void its_inc_lpi_count(struct irq_data *d, int cpu) static void its_dec_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else -- Gitee From eca5751979096a086d11a37f449117383ab5b6bd Mon Sep 17 00:00:00 2001 From: Jing-Ting Wu Date: Tue, 18 Oct 2022 19:47:42 +0800 Subject: [PATCH 62/74] cgroup: Fix race condition at rebind_subsystems() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: The rebind_subsystems() is no lock held when move css object from A list to B list,then let B's head be treated as css node at list_for_each_entry_rcu(). Solution: Add grace period before invalidating the removed rstat_css_node. Reported-by: Jing-Ting Wu Suggested-by: Michal Koutný Signed-off-by: Jing-Ting Wu Tested-by: Jing-Ting Wu Link: https://lore.kernel.org/linux-arm-kernel/d8f0bc5e2fb6ed259f9334c83279b4c011283c41.camel@mediatek.com/T/ Acked-by: Mukesh Ojha Fixes: a7df69b81aac ("cgroup: rstat: support cgroup1") Cc: stable@vger.kernel.org # v5.13+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57f4e19df8c6..46d5c120c626 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1781,6 +1781,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) if (ss->css_rstat_flush) { list_del_rcu(&css->rstat_css_node); + synchronize_rcu(); list_add_rcu(&css->rstat_css_node, &dcgrp->rstat_css_list); } -- Gitee From c4a9cffae43bcb6d22ed45292dd1a68b05a39315 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 18 Oct 2022 19:47:43 +0800 Subject: [PATCH 63/74] add config to control the export of collect_procs() Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I53VVE ------------------------------------------ Add a config to control the feature in bugzilla which export the function collect_procs(). Signed-off-by: Zhang Zekun --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/mm.h | 3 +++ mm/Kconfig | 7 +++++++ mm/memory-failure.c | 7 +++++++ 5 files changed, 19 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 78a63cbc3db6..8dd8e7d0c5f3 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1110,6 +1110,7 @@ CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_PIN_MEMORY=y CONFIG_PID_RESERVE=y CONFIG_MEMORY_RELIABLE=y +CONFIG_EXPORT_COLLECT_PROCS=y # # Data Access Monitoring diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f9c94b618ad4..9da862afc6b8 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1059,6 +1059,7 @@ CONFIG_ARCH_HAS_PKEYS=y # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_MAPPING_DIRTY_HELPERS=y +CONFIG_EXPORT_COLLECT_PROCS=y # # Data Access Monitoring diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e..03dbae9a3007 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3109,8 +3109,11 @@ extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); + +#ifdef CONFIG_EXPORT_COLLECT_PROCS extern void collect_procs(struct page *page, struct list_head *tokill, int force_early); +#endif /* * Error handlers for various types of pages. diff --git a/mm/Kconfig b/mm/Kconfig index 4475bd9f8762..a36deacdf480 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -969,6 +969,13 @@ config MEMORY_RELIABLE To enable this function, mirrored memory is needed and "kernelcore=reliable" need to be added in kernel parameters. +config EXPORT_COLLECT_PROCS + bool "Export the function collect_procs()" + default n + help + Export this function to collect the processes who have the page + mapped via collect_procs(). + source "mm/damon/Kconfig" endmenu diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 509fe34a0421..d2784bf013bb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -541,8 +541,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, /* * Collect the processes who have the corrupted page mapped to kill. */ +#ifdef CONFIG_EXPORT_COLLECT_PROCS void collect_procs(struct page *page, struct list_head *tokill, int force_early) +#else +static void collect_procs(struct page *page, struct list_head *tokill, + int force_early) +#endif { if (!page->mapping) return; @@ -552,7 +557,9 @@ void collect_procs(struct page *page, struct list_head *tokill, else collect_procs_file(page, tokill, force_early); } +#ifdef CONFIG_EXPORT_COLLECT_PROCS EXPORT_SYMBOL_GPL(collect_procs); +#endif static const char *action_name[] = { [MF_IGNORED] = "Ignored", -- Gitee From b3faaa410c7dfdd79bbc634632b4677a1f271ac2 Mon Sep 17 00:00:00 2001 From: Lijun Fang Date: Tue, 18 Oct 2022 19:47:44 +0800 Subject: [PATCH 64/74] modify ascend910 ARCH_NR_GPIOS --- drivers/gpio/gpiolib.c | 45 +++++++++++++++++++++++++++++++++----- include/asm-generic/gpio.h | 13 +++++++++++ include/linux/kernel.h | 6 +++++ 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 00526fdd7691..12b005e24ae8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -176,11 +176,34 @@ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_to_chip); +#ifdef CONFIG_ACPI +int ascend_resize_ngpio __read_mostly; + +static int __init ascend_enable_resize_ngpio(char *s) +{ + ascend_resize_ngpio = 1; + + pr_info("Ascend enable resize ngpio features\n"); + + return 1; +} + +__setup("enable_resize_ngpio", ascend_enable_resize_ngpio); +#endif + +static int set_gpio_base(int ngpio) +{ + if (ascend_resize_ngpio) + return RESIZE_NR_GPIOS - ngpio; + + return ARCH_NR_GPIOS - ngpio; +} + /* dynamic allocation of GPIOs, e.g. on a hotplugged device */ static int gpiochip_find_base(int ngpio) { struct gpio_device *gdev; - int base = ARCH_NR_GPIOS - ngpio; + int base = set_gpio_base(ngpio); list_for_each_entry_reverse(gdev, &gpio_devices, list) { /* found a free space? */ @@ -191,12 +214,22 @@ static int gpiochip_find_base(int ngpio) base = gdev->base - ngpio; } - if (gpio_is_valid(base)) { - pr_debug("%s: found new base at %d\n", __func__, base); - return base; + if (ascend_resize_ngpio) { + if (resize_gpio_is_valid(base)) { + pr_debug("%s: found resize new base at %d\n", __func__, base); + return base; + } else { + pr_err("%s: cannot find resize free range\n", __func__); + return -ENOSPC; + } } else { - pr_err("%s: cannot find free range\n", __func__); - return -ENOSPC; + if (gpio_is_valid(base)) { + pr_debug("%s: found new base at %d\n", __func__, base); + return base; + } else { + pr_err("%s: cannot find free range\n", __func__); + return -ENOSPC; + } } } diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index aea9aee1f3e9..5a9319dfe917 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -32,6 +32,8 @@ #endif #endif +#define RESIZE_NR_GPIOS 1024 + /* * "valid" GPIO numbers are nonnegative and may be passed to * setup routines like gpio_request(). only some valid numbers @@ -46,6 +48,11 @@ static inline bool gpio_is_valid(int number) return number >= 0 && number < ARCH_NR_GPIOS; } +static inline bool resize_gpio_is_valid(int number) +{ + return number >= 0 && number < RESIZE_NR_GPIOS; +} + struct device; struct gpio; struct seq_file; @@ -146,6 +153,12 @@ static inline bool gpio_is_valid(int number) return number >= 0; } +static inline bool resize_gpio_is_valid(int number) +{ + /* only non-negative numbers are valid */ + return number >= 0; +} + /* platforms that don't directly support access to GPIOs through I2C, SPI, * or other blocking infrastructure can use these wrappers. */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 78a0907f0b04..d2c4a87c012a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -216,6 +216,12 @@ static __always_inline void might_resched(void) #endif /* CONFIG_PREEMPT_* */ +#ifdef CONFIG_ACPI +extern int ascend_resize_ngpio; +#else +#define ascend_resize_ngpio 0 +#endif + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset); -- Gitee From b0b080d0e6008020162bfaf52d17ae41066dd6fb Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:45 +0800 Subject: [PATCH 65/74] ascend/arm64: Add ascend_enable_all kernel paramater This kernel paramater is used for ascend scene and would open all the options needed at once. Signed-off-by: Wang Wensheng --- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/mm/init.c | 38 ++++++++++++++++++++++++++++++++++ mm/hugetlb.c | 2 +- mm/memcontrol.c | 2 +- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f5ce1e3a532f..159481996630 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1729,7 +1729,7 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) #endif /* CONFIG_ARM64_E0PD */ #ifdef CONFIG_ARM64_PSEUDO_NMI -static bool enable_pseudo_nmi; +bool enable_pseudo_nmi; static int __init early_enable_pseudo_nmi(char *p) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f5bd046f9e19..ddce006b1b22 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -696,3 +698,39 @@ void dump_mem_limit(void) pr_emerg("Memory Limit: none\n"); } } + +void ascend_enable_all_features(void) +{ + if (IS_ENABLED(CONFIG_ASCEND_DVPP_MMAP)) + enable_mmap_dvpp = 1; + +#ifdef CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES + extern int enable_charge_mighp; + enable_charge_mighp = 1; +#endif + + if (IS_ENABLED(CONFIG_SUSPEND)) + mem_sleep_current = PM_SUSPEND_ON; + + if (IS_ENABLED(CONFIG_MEMCG_KMEM)) { + extern bool cgroup_memory_nokmem; + cgroup_memory_nokmem = false; + } + +#ifdef CONFIG_ARM64_PSEUDO_NMI + extern bool enable_pseudo_nmi; + enable_pseudo_nmi = true; +#endif + +#ifdef CONFIG_CORELOCKUP_DETECTOR + enable_corelockup_detector = true; +#endif +} + +static int __init ascend_enable_setup(char *__unused) +{ + ascend_enable_all_features(); + + return 0; +} +early_param("ascend_enable_all", ascend_enable_setup); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index acdc56e593af..2b938cb7347f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6133,7 +6133,7 @@ void __init hugetlb_cma_check(void) #endif /* CONFIG_CMA */ #ifdef CONFIG_ASCEND_FEATURES -static int enable_charge_mighp __read_mostly; +int enable_charge_mighp __read_mostly; const struct hstate *hugetlb_get_hstate(void) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7061f9283a34..9509232a8dbd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,7 +84,7 @@ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); static bool cgroup_memory_nosocket; /* Kernel memory accounting disabled */ -static bool cgroup_memory_nokmem = true; +bool cgroup_memory_nokmem = true; /* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP -- Gitee From 246bed8bde7d2ac436d51593bf38eff2b998fbe2 Mon Sep 17 00:00:00 2001 From: Fang Lijun Date: Tue, 18 Oct 2022 19:47:46 +0800 Subject: [PATCH 66/74] enable fdm in panic Signed-off-by: Fang Lijun --- arch/arm64/Kconfig | 8 ++++++++ arch/arm64/mm/init.c | 3 +++ include/linux/init.h | 1 + kernel/panic.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 28b4e754e856..c6aa794901e1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2052,6 +2052,14 @@ config ASCEND_CLEAN_CDM the management structures for hbm nodes to the ddr nodes of the same partion to reduce the probability of kernel crashes. +config ASCEND_FDM + bool "Enable support for fdm" + default n + help + Fdm write an interrupt to the register to activate HBM ECC check in panic + + This option only enabled in ascend910 now. + config ASCEND_OOM bool "Enable support for disable oom killer" default y diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ddce006b1b22..5c6da9ec499c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -717,6 +717,9 @@ void ascend_enable_all_features(void) cgroup_memory_nokmem = false; } + if (IS_ENABLED(CONFIG_ASCEND_FDM)) + ascend_fdm_enable = true; + #ifdef CONFIG_ARM64_PSEUDO_NMI extern bool enable_pseudo_nmi; enable_pseudo_nmi = true; diff --git a/include/linux/init.h b/include/linux/init.h index 7b53cb3092ee..885e9b2e7951 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -162,6 +162,7 @@ extern void (*late_time_init)(void); extern bool initcall_debug; +extern int ascend_fdm_enable; #endif #ifndef MODULE diff --git a/kernel/panic.c b/kernel/panic.c index d991c3b1b559..e6c47ab9c4a6 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -58,6 +58,11 @@ bool panic_on_taint_nousertaint = false; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); +int ascend_fdm_enable; +char *fdm_base_reg; +#define FDM_BASE_ADDR 0x202010000 +#define FDM_SIZE 0x1000 + #define PANIC_PRINT_TASK_INFO 0x00000001 #define PANIC_PRINT_MEM_INFO 0x00000002 #define PANIC_PRINT_TIMER_INFO 0x00000004 @@ -166,6 +171,28 @@ static void panic_print_sys_info(void) ftrace_dump(DUMP_ALL); } +static int remap_fdm_base(void) +{ + fdm_base_reg = ioremap(FDM_BASE_ADDR, FDM_SIZE); + if (!fdm_base_reg) + return -ENOMEM; + return 0; +} + +static void enable_fdm(void) +{ + u32 val; + + if (fdm_base_reg == NULL) + return; + val = readl(fdm_base_reg + 0x20); + writel(val, fdm_base_reg + 0x2C); + writel(0xFFFFFF00, fdm_base_reg + 0x04); + writel(0xFFFFFF00, fdm_base_reg + 0x24); + writel(0xFFFFFF00, fdm_base_reg + 0x14); + writel(0x1, fdm_base_reg + 0x18); +} + /** * panic - halt the system * @fmt: The text string to print @@ -183,6 +210,8 @@ void panic(const char *fmt, ...) int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + if (ascend_fdm_enable) + enable_fdm(); /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since @@ -693,6 +722,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set, static __init int register_warn_debugfs(void) { + if (remap_fdm_base()) + pr_err("remap fdm base failed!\n"); /* Don't care about failure */ debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL, &clear_warn_once_fops); -- Gitee From 249dfe237bce8c572cbb0c984f26f375d5d3597f Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Tue, 18 Oct 2022 19:47:47 +0800 Subject: [PATCH 67/74] memcg/ascend: Support not account pages of cdm for memcg ascend inclusion category: feature bugzilla: NA CVE: NA ------------------------------------------------------------------ Normally, pages can not be allocated from CDM node(explicit allocation requests from kernel or user process MPOL_BIND policy which has CDM nodes). This situation may happen. Within the limit bytes range of the memcg, the CDM nodes have a large amount of free memory, and other nodes have no available free memory. Then, the kernel or user process can not get required memory resources normally. For example: size of CMD : A mbytes size of non CMD : B mbytes limit mbytes of memcg : C mbytes. A,B < C < (A + B). If app(user app and OS service app) used up non CMD memory, but a large amount of CDM memory is available. Since OS service app can't get pages from CDM node, the allocating of pages should be failed. This is not what we expect. We hope that the memcg can be used to restrict the memory used by some user apps to ensure that certain memory is available for system services. Therefore, the CDM memory does not need to be charged in the memcg. The total size of CDM is already a limit. Signed-off-by: Zhou Guanghui Signed-off-by: Wang Wensheng --- mm/memcontrol.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9509232a8dbd..93fa3cb7a269 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3053,6 +3053,11 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) objcg = get_obj_cgroup_from_current(); if (objcg) { + if (!mem_cgroup_is_root(objcg->memcg) && is_cdm_node(page_to_nid(page))) { + obj_cgroup_put(objcg); + return 0; + } + ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order); if (!ret) { page->memcg_data = (unsigned long)objcg | @@ -7009,6 +7014,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) if (!memcg) memcg = get_mem_cgroup_from_mm(mm); + if (!mem_cgroup_is_root(memcg) && is_cdm_node(page_to_nid(page))) + goto out; + ret = try_charge(memcg, gfp_mask, nr_pages); if (ret) goto out_put; -- Gitee From a7e883e44ab4b4f895aa967de0eb4502b2fcee11 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 18 Oct 2022 19:47:52 +0800 Subject: [PATCH 68/74] mm: cma: use pr_err_ratelimited for CMA warning If we did not reserve extra CMA memory, the log buffer can be easily filled up by CMA failure warning when the devices calling dmam_alloc_coherent() to alloc DMA memory. Thus we can use pr_err_ratelimited() instead to reduce the duplicate CMA warning. Link: https://lkml.kernel.org/r/ce2251ef49e1727a9a40531d1996660b05462bd2.1615279825.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: David Hildenbrand Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 9361ecaf52be..09f3b1e264c0 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -486,8 +486,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, } if (ret && !no_warn) { - pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n", - __func__, count, ret); + pr_err_ratelimited("%s: alloc failed, req-size: %zu pages, ret: %d\n", + __func__, count, ret); cma_debug_show_areas(cma); } -- Gitee From efb43600c87fb475fa9f157762dbe867f666966f Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:53 +0800 Subject: [PATCH 69/74] bugfix for hugetlb remap Offering:hulk CVE: Reference: Type:feature DTS/AR:NA reason:bugfix for hugetbl remap --- mm/hugetlb.c | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2b938cb7347f..de47c996ee67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6190,39 +6190,43 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) } EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage); +static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long size) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pte_t *ptep = NULL; + + pgdp = pgd_offset(mm, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_alloc(mm, p4dp, addr); + if (!pudp) + return NULL; + + ptep = (pte_t *)pmd_alloc(mm, pudp, addr); + + return ptep; +} + static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, - pgprot_t prot, unsigned long pfn, bool special) + pgprot_t prot, unsigned long pfn) { int ret = 0; pte_t *ptep, entry; struct hstate *h; - struct vm_area_struct *vma; - struct address_space *mapping; spinlock_t *ptl; h = size_to_hstate(PMD_SIZE); if (!h) return -EINVAL; - if (!IS_ALIGNED(addr, PMD_SIZE)) - return -EINVAL; - - vma = find_vma(mm, addr); - if (!vma || !range_in_vma(vma, addr, addr + PMD_SIZE)) - return -EINVAL; - - mapping = vma->vm_file->f_mapping; - i_mmap_lock_read(mapping); - ptep = huge_pte_alloc(mm, addr, huge_page_size(h)); - if (!ptep) { - ret = -ENXIO; - goto out_unlock; - } + ptep = hugetlb_huge_pte_alloc(mm, addr, huge_page_size(h)); + if (!ptep) + return -ENXIO; - if (WARN_ON(ptep && !pte_none(*ptep) && !pmd_huge(*(pmd_t *)ptep))) { - ret = -ENXIO; - goto out_unlock; - } + if (WARN_ON(ptep && !pte_none(*ptep) && !pmd_huge(*(pmd_t *)ptep))) + return -ENXIO; entry = pfn_pte(pfn, prot); entry = huge_pte_mkdirty(entry); @@ -6230,31 +6234,27 @@ static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, entry = huge_pte_mkwrite(entry); entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); - if (special) - entry = pte_mkspecial(entry); + entry = pte_mkspecial(entry); ptl = huge_pte_lockptr(h, mm, ptep); spin_lock(ptl); set_huge_pte_at(mm, addr, ptep, entry); spin_unlock(ptl); -out_unlock: - i_mmap_unlock_read(mapping); - return ret; } int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, pgprot_t prot, struct page *hpage) { - return __hugetlb_insert_hugepage(mm, addr, prot, page_to_pfn(hpage), false); + return __hugetlb_insert_hugepage(mm, addr, prot, page_to_pfn(hpage)); } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte); int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, unsigned long addr, pgprot_t prot, unsigned long phy_addr) { - return __hugetlb_insert_hugepage(mm, addr, prot, phy_addr >> PAGE_SHIFT, true); + return __hugetlb_insert_hugepage(mm, addr, prot, phy_addr >> PAGE_SHIFT); } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa); -- Gitee From 1628c6cd24ac92ba749a3e2940eaf6027800a20e Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Tue, 18 Oct 2022 19:47:54 +0800 Subject: [PATCH 70/74] add oom notifier call for oom panic [IMPACT] : add oom notifier call for oom panic note: update glibc Signed-off-by: z00512904 --- mm/oom_kill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dd2b4f890403..c1f82c7623cc 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1064,6 +1064,7 @@ static void check_panic_on_oom(struct oom_control *oc) if (is_sysrq_oom(oc)) return; dump_header(oc, NULL); + oom_type_notifier_call(0, oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } -- Gitee From 9a01603b5ca349849eede83da2aa6aefc7347169 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:55 +0800 Subject: [PATCH 71/74] iommu: support message based spi for smmu ----------------------------------------------------------------------- The current SMMU driver supports SPI WireInterrupt and Message Based SPI. However, some hisilicon chips use the Messaged Based SPI. Therefore, a special attribute is added to indentify this situation. Add an option "hisicion,message-based-spi" and addr of GICD_SETSPI to dts node of SMMU, like this: hisicion,message-based-spi iommu-spi-base=<0x10 0x9000040> //Addr of GICD_SETSPI: 0x1009000040 Signed-off-by: Zhou Guanghui --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 50 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e463fd31d268..303da4a2bc6e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -125,6 +126,7 @@ struct arm_smmu_ctx_desc quiet_cd = { 0 }; static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, + { ARM_SMMU_OPT_MESSAGE_BASED_SPI, "hisilicon,message-based-spi"}, { 0, NULL}, }; @@ -406,7 +408,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI * payload, so the write will zero the entire command on that platform. */ - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !(smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI)) { ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * q->ent_dwords * 8; } @@ -781,7 +784,8 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !(smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI)) return __arm_smmu_cmdq_poll_until_msi(smmu, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); @@ -4872,6 +4876,37 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu, bool resume } } +static void arm_smmu_setup_message_based_spi(struct arm_smmu_device *smmu) +{ + struct irq_desc *desc; + u32 event_hwirq, gerror_hwirq, pri_hwirq; + + desc = irq_to_desc(smmu->gerr_irq); + gerror_hwirq = desc->irq_data.hwirq; + writeq_relaxed(smmu->spi_base, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writel_relaxed(gerror_hwirq, smmu->base + ARM_SMMU_GERROR_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_GERROR_IRQ_CFG2); + + desc = irq_to_desc(smmu->evtq.q.irq); + event_hwirq = desc->irq_data.hwirq; + writeq_relaxed(smmu->spi_base, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + writel_relaxed(event_hwirq, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_EVTQ_IRQ_CFG2); + + if (smmu->features & ARM_SMMU_FEAT_PRI) { + desc = irq_to_desc(smmu->priq.q.irq); + pri_hwirq = desc->irq_data.hwirq; + + writeq_relaxed(smmu->spi_base, + smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + writel_relaxed(pri_hwirq, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_PRIQ_IRQ_CFG2); + } +} + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool resume) { int ret, irq; @@ -4904,6 +4939,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool resume) if (smmu->features & ARM_SMMU_FEAT_PRI) irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; + if (smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI) + arm_smmu_setup_message_based_spi(smmu); + /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, irqen_flags, ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); @@ -5625,6 +5663,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, parse_driver_options(smmu); + if (smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI) { + if (of_property_read_u64(dev->of_node, "iommu-spi-base", + &smmu->spi_base)) { + dev_err(dev, "missing irq base address\n"); + ret = -EINVAL; + } + } + if (of_dma_is_coherent(dev->of_node)) smmu->features |= ARM_SMMU_FEAT_COHERENCY; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1dd49bed58df..406e52a4d486 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -722,8 +722,11 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) +#define ARM_SMMU_OPT_MESSAGE_BASED_SPI (1 << 3) u32 options; + u64 spi_base; + union { u32 nr_ecmdq; u32 ecmdq_enabled; -- Gitee From 62d6e5780ad7459e7ddb833d14a41c23ff396f05 Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Tue, 18 Oct 2022 19:47:56 +0800 Subject: [PATCH 72/74] Add oom recover for kmemcg when release buddy hugepage Signed-off-by: Jian Zhang --- mm/memcontrol.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 93fa3cb7a269..12dd1cfeb2a5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3077,6 +3077,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) void __memcg_kmem_uncharge_page(struct page *page, int order) { struct obj_cgroup *objcg; + struct mem_cgroup *memcg; unsigned int nr_pages = 1 << order; if (!PageMemcgKmem(page)) @@ -3084,6 +3085,12 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) objcg = __page_objcg(page); obj_cgroup_uncharge_pages(objcg, nr_pages); + + memcg = get_mem_cgroup_from_objcg(objcg); + if (!mem_cgroup_is_root(memcg)) + memcg_oom_recover(memcg); + css_put(&memcg->css); + page->memcg_data = 0; obj_cgroup_put(objcg); } -- Gitee From 3e9abd79b5b3822875d2236c6ef2c6e5e325fa15 Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Tue, 18 Oct 2022 19:47:57 +0800 Subject: [PATCH 73/74] kernel/sdei: enable SDEI for nmi ascend inclusion category: bugfix bugzilla: NA CVE: NA --- arch/arm64/kernel/sdei.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 2132bd953a87..37546f9c6f74 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -123,17 +123,6 @@ bool _on_sdei_stack(unsigned long sp, struct stack_info *info) unsigned long sdei_arch_get_entry_point(int conduit) { - /* - * SDEI works between adjacent exception levels. If we booted at EL1 we - * assume a hypervisor is marshalling events. If we booted at EL2 and - * dropped to EL1 because we don't support VHE, then we can't support - * SDEI. - */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { - pr_err("Not supported on this hardware/boot configuration\n"); - return 0; - } - if (IS_ENABLED(CONFIG_VMAP_STACK)) { if (init_sdei_stacks()) return 0; -- Gitee From ab846d7511d6539d757369343f249595328171ac Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Tue, 18 Oct 2022 19:47:58 +0800 Subject: [PATCH 74/74] mm/hugetlb: support disable clear hugepage Signed-off-by: Zhou Guanghui --- arch/arm64/Kconfig | 7 +++++++ mm/hugetlb.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c6aa794901e1..91175d4d2806 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2092,6 +2092,13 @@ config ASCEND_SHARE_POOL This feature allows multiple processes to share virtual memory both in kernel and user level, which is only enabled for ascend platform. +config ASCEND_CLEAR_HUGEPAGE_DISABLE + bool "Disable clear hugepage" + default n + help + Disable clear hugepage when alloc hugepages to improve the hugepage + application performance. + endif endmenu diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de47c996ee67..9556bf2d9b45 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4763,7 +4763,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ret = vmf_error(PTR_ERR(page)); goto out; } +#ifndef CONFIG_ASCEND_CLEAR_HUGEPAGE_DISABLE clear_huge_page(page, address, pages_per_huge_page(h)); +#endif __SetPageUptodate(page); new_page = true; -- Gitee