diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index ed28bb2c071078e1ae5c8da46a474176ce43a994..d6171a26b6c8592d79c32ab37335d255e2dbb61a 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -33,7 +33,7 @@ #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ -#define MAP_PEER_SHARED 0x10000000 /* GMEM scene, for heterogeneous memory */ +#define MAP_PEER_SHARED 0x2000000 /* GMEM scene, for heterogeneous memory */ /* * Flags for mlock diff --git a/mm/gmem-internal.h b/mm/gmem-internal.h index f30e64ee95fc15789f0818a7d7806eb70cc61c6a..882c4590465fdcc17f56dc3c35bd10fac9921d40 100644 --- a/mm/gmem-internal.h +++ b/mm/gmem-internal.h @@ -17,6 +17,7 @@ struct hnode { struct list_head activelist; spinlock_t freelist_lock; spinlock_t activelist_lock; + spinlock_t lock; atomic_t nr_free_pages; atomic_t nr_active_pages; @@ -94,15 +95,6 @@ static inline bool gm_mapping_nomap(struct gm_mapping *gm_mapping) return !!(gm_mapping->flag & GM_MAPPING_NOMAP); } -enum gmem_stats_item { - NR_PAGE_MIGRATING_H2D, - NR_PAGE_MIGRATING_D2H, - NR_GMEM_STAT_ITEMS -}; - -extern void gmem_stats_counter(enum gmem_stats_item item, int val); -extern void gmem_stats_counter_show(void); - void __init hnuma_init(void); bool is_hnode(int nid); unsigned int alloc_hnode_id(void); diff --git a/mm/gmem.c b/mm/gmem.c index 5c7eb3a24e255a2661490edb4756a575e2cfd224..c4eb3dab77ddaf81e3c6885e3f923bf20e601c36 100644 --- a/mm/gmem.c +++ b/mm/gmem.c @@ -40,55 +40,6 @@ static inline unsigned long pe_mask(unsigned int order) return 0; } -static struct percpu_counter g_gmem_stats[NR_GMEM_STAT_ITEMS]; - -void gmem_stats_counter(enum gmem_stats_item item, int val) -{ - if (!gmem_is_enabled()) - return; - - if (WARN_ON_ONCE(unlikely(item >= NR_GMEM_STAT_ITEMS))) - return; - - percpu_counter_add(&g_gmem_stats[item], val); -} - -static int gmem_stats_init(void) -{ - int i, rc; - - for (i = 0; i < NR_GMEM_STAT_ITEMS; i++) { - rc = percpu_counter_init(&g_gmem_stats[i], 0, GFP_KERNEL); - if (rc) { - int j; - - for (j = i-1; j >= 0; j--) - percpu_counter_destroy(&g_gmem_stats[j]); - - break; /* break the initialization process */ - } - } - - return rc; -} - -#ifdef CONFIG_PROC_FS -static int gmem_stats_show(struct seq_file *m, void *arg) -{ - if (!gmem_is_enabled()) - return 0; - - seq_printf( - m, "migrating H2D : %lld\n", - percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_H2D])); - seq_printf( - m, "migrating D2H : %lld\n", - percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_D2H])); - - return 0; -} -#endif /* CONFIG_PROC_FS */ - static struct workqueue_struct *prefetch_wq; #define GM_WORK_CONCURRENCY 4 @@ -126,20 +77,12 @@ static int __init gmem_init(void) if (err) goto free_vm_object; - err = gmem_stats_init(); - if (err) - goto free_gm_sysfs; - -#ifdef CONFIG_PROC_FS - proc_create_single("gmemstats", 0444, NULL, gmem_stats_show); -#endif - prefetch_wq = alloc_workqueue("prefetch", __WQ_LEGACY | WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE, GM_WORK_CONCURRENCY); if (!prefetch_wq) { gmem_err("fail to alloc workqueue prefetch_wq\n"); err = -EFAULT; - goto free_ctx; + goto free_gm_sysfs; } static_branch_enable(&gmem_status); @@ -477,8 +420,10 @@ int gm_as_attach(struct gm_as *as, struct gm_dev *dev, * gm_as_attach will be used to attach device to process address space. * Handle this case and add hnodes registered by device to process mems_allowed. */ +#ifdef CONFIG_CPUSETS for_each_node_mask(nid, dev->registered_hnodes) node_set(nid, current->mems_allowed); +#endif return 0; } EXPORT_SYMBOL_GPL(gm_as_attach); @@ -832,34 +777,41 @@ static void do_hmemcpy(struct mm_struct *mm, int hnid, unsigned long dest, mutex_lock(&gm_mapping_dest->lock); mutex_lock(&gm_mapping_src->lock); + if (gm_mapping_nomap(gm_mapping_src)) { + gmem_err("hmemcpy: src address is not mapping to CPU or device"); + goto unlock_gm_mapping; + } + // Use memcpy when there is no device address, otherwise use peer_memcpy if (hnid == -1) { if (gm_mapping_cpu(gm_mapping_src)) { // host to host gmem_err("hmemcpy: host to host is unimplemented\n"); goto unlock_gm_mapping; - } else if (gm_mapping_device(gm_mapping_src)) { // device to host + } else { // device to host dev = gm_mapping_src->dev; - gmc.dest = phys_to_dma(dev->dma_dev, - page_to_phys(gm_mapping_dest->page) + (dest & (page_size - 1))); + gmc.dest = dma_map_page(dev->dma_dev, gm_mapping_dest->page, + (dest & (page_size - 1)), page_size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->dma_dev, gmc.dest)) { + gmem_err("hmemcpy dma map failed"); + goto unlock_gm_mapping; + } gmc.src = gm_mapping_src->gm_page->dev_dma_addr + (src & (page_size - 1)); gmc.kind = GM_MEMCPY_D2H; - } else { - gmem_err("hmemcpy: src address is not mapping to CPU or device"); - goto unlock_gm_mapping; } } else { if (gm_mapping_cpu(gm_mapping_src)) { // host to device gmc.dest = gm_mapping_dest->gm_page->dev_dma_addr + (dest & (page_size - 1)); - gmc.src = phys_to_dma(dev->dma_dev, - page_to_phys(gm_mapping_src->page) + (src & (page_size - 1))); + gmc.src = dma_map_page(dev->dma_dev, gm_mapping_src->page, + (src & (page_size - 1)), page_size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->dma_dev, gmc.src)) { + gmem_err("hmemcpy dma map failed"); + goto unlock_gm_mapping; + } gmc.kind = GM_MEMCPY_H2D; - } else if (gm_mapping_device(gm_mapping_src)) { // device to device + } else { // device to device gmem_err("hmemcpy: device to device is unimplemented\n"); goto unlock_gm_mapping; - } else { - gmem_err("hmemcpy: src address is not mapping to CPU or device"); - goto unlock_gm_mapping; } } gmc.mm = mm; @@ -867,6 +819,11 @@ static void do_hmemcpy(struct mm_struct *mm, int hnid, unsigned long dest, gmc.size = size; dev->mmu->peer_hmemcpy(&gmc); + if (hnid == -1) + dma_unmap_page(dev->dma_dev, gmc.dest, page_size, DMA_BIDIRECTIONAL); + else + dma_unmap_page(dev->dma_dev, gmc.src, page_size, DMA_BIDIRECTIONAL); + unlock_gm_mapping: mutex_unlock(&gm_mapping_src->lock); if (gm_mapping_dest && gm_mapping_dest != gm_mapping_src) diff --git a/mm/gmem_phys.c b/mm/gmem_phys.c index d51f030cb42ee7adac28ea8a9e57f6e6c142f5a2..186fb50d66b706a0cfb54218b423c719d073989d 100644 --- a/mm/gmem_phys.c +++ b/mm/gmem_phys.c @@ -22,7 +22,7 @@ static struct kmem_cache *gm_page_cachep; -DEFINE_SPINLOCK(hnode_lock); +static DEFINE_SPINLOCK(hnode_lock); static nodemask_t hnode_map; struct hnode *hnodes[MAX_NUMNODES]; @@ -66,6 +66,7 @@ void hnode_init(struct hnode *hnode, unsigned int hnid, struct gm_dev *dev) INIT_LIST_HEAD(&hnode->activelist); spin_lock_init(&hnode->freelist_lock); spin_lock_init(&hnode->activelist_lock); + spin_lock_init(&hnode->lock); atomic_set(&hnode->nr_free_pages, 0); atomic_set(&hnode->nr_active_pages, 0); hnode->import_failed = false; @@ -294,7 +295,7 @@ enum gm_evict_ret { GM_EVICT_DEVERR, }; -enum gm_evict_ret gm_evict_page_locked(struct gm_page *gm_page) +static enum gm_evict_ret gm_evict_page_locked(struct gm_page *gm_page) { struct gm_dev *gm_dev; struct gm_mapping *gm_mapping; @@ -550,13 +551,27 @@ struct gm_page *gm_alloc_page(struct mm_struct *mm, struct hnode *hnode) retry: gm_page = get_gm_page_from_freelist(hnode); - if (!gm_page && can_import(hnode) && !hnode->import_failed) { - /* Import pages from device. */ - ret = gm_dev->mmu->import_phys_mem(mm, hnode->id, NUM_IMPORT_PAGES); - if (!ret) + spin_lock(&hnode->lock); + if (!gm_page) { + spin_lock(&hnode->freelist_lock); + if (atomic_read(&hnode->nr_free_pages)) { + spin_unlock(&hnode->freelist_lock); + spin_unlock(&hnode->lock); goto retry; - hnode->import_failed = true; + } + spin_unlock(&hnode->freelist_lock); + if (can_import(hnode) && !hnode->import_failed) { + /* Import pages from device. */ + ret = gm_dev->mmu->import_phys_mem(mm, hnode->id, + NUM_IMPORT_PAGES); + if (!ret) { + spin_unlock(&hnode->lock); + goto retry; + } + hnode->import_failed = true; + } } + spin_unlock(&hnode->lock); /* Try to swap pages. */ if (!gm_page) { diff --git a/mm/gmem_stat.c b/mm/gmem_stat.c index 8a7e7f7eadb8de2121fcf19d81dfeb06363d3a76..91e0acb58b1ae4aa0d3513c0fd8a9e8a2ecbf5d3 100644 --- a/mm/gmem_stat.c +++ b/mm/gmem_stat.c @@ -22,6 +22,88 @@ struct hnode_kobject { #define HNODE_NAME_LEN 32 +/* work like memparse, but use kstrtoull to check overflow */ +static unsigned long long safe_memparse(const char *ptr, + unsigned long *result) +{ + char *startptr = (char *)ptr; + char endchar = 0; + unsigned int max_chars = INT_MAX; + unsigned long long num; + unsigned long long ret; + + if (!result) + return -EINVAL; + while (max_chars--) { + if (*startptr == '\0') + break; + /* only support demical */ + if (!('0' <= *startptr && *startptr <= '9') && (*startptr != '\n')) { + if (endchar) + return -EINVAL; + endchar = *startptr; + *startptr = '\0'; + } + startptr++; + } + + ret = kstrtoull(ptr, 0, &num); + if (ret != 0) + return ret; + + switch (endchar) { + case 'E': + case 'e': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + fallthrough; + case 'P': + case 'p': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + fallthrough; + case 'T': + case 't': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + fallthrough; + case 'G': + case 'g': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + fallthrough; + case 'M': + case 'm': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + fallthrough; + case 'K': + case 'k': + if (num >= (ULONG_MAX >> 10)) + return -ERANGE; + num <<= 10; + break; + case 'B': + case 'b': + break; + default: + if (endchar) + return -EINVAL; + } + + if (num >= ULONG_MAX) + return -ERANGE; + + *result = (unsigned long)num; + + return 0; +} + static struct hnode *get_hnode_kobj(struct kobject *kobj) { struct hnode *hnode; @@ -48,15 +130,36 @@ static ssize_t max_memsize_show(struct kobject *kobj, } static ssize_t max_memsize_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) + struct kobj_attribute *attr, const char *buf, + size_t count) { struct hnode *hnode = get_hnode_kobj(kobj); + unsigned long nr_pages; + unsigned long used_mem; + unsigned long max_memsize; + int ret = 0; if (!hnode) return -EINVAL; - hnode->max_memsize = memparse(buf, NULL) & (~(HPAGE_SIZE - 1)); + nr_pages = atomic_read(&hnode->nr_free_pages) + + atomic_read(&hnode->nr_active_pages); + used_mem = nr_pages * HPAGE_SIZE; + ret = safe_memparse(buf, &max_memsize); + if (ret != 0) { + if (ret == -ERANGE) + gmem_err("write to max_memsize overflow, value not changed"); + else + gmem_err("write to max_memsize with invalid value, value not changed"); + return ret; + } + max_memsize = max_memsize & (~(HPAGE_SIZE * NUM_IMPORT_PAGES - 1)); + if (max_memsize < used_mem && max_memsize) { + gmem_err( + "new max_memsize should be larger than used mem, value not changed\n"); + return -EINVAL; + } + hnode->max_memsize = max_memsize; return count; } @@ -89,56 +192,12 @@ static ssize_t nr_activepages_show(struct kobject *kobj, } static struct kobj_attribute nr_activepages_attr = - __ATTR(nr_activepages, 0444, nr_activepages_show, NULL); - -static ssize_t nr_freelist_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - unsigned int nr_freelist = 0; - struct gm_page *gm_page; - struct hnode *hnode = get_hnode_kobj(kobj); - - if (!hnode) - return -EINVAL; - - spin_lock(&hnode->freelist_lock); - list_for_each_entry(gm_page, &hnode->freelist, gm_page_list) { - nr_freelist++; - } - spin_unlock(&hnode->freelist_lock); - return sprintf(buf, "%u\n", nr_freelist); -} - -static struct kobj_attribute nr_freelist_attr = - __ATTR(nr_freelist, 0444, nr_freelist_show, NULL); - -static ssize_t nr_activelist_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - unsigned int nr_activelist = 0; - struct gm_page *gm_page; - struct hnode *hnode = get_hnode_kobj(kobj); - - if (!hnode) - return -EINVAL; - - spin_lock(&hnode->activelist_lock); - list_for_each_entry(gm_page, &hnode->activelist, gm_page_list) { - nr_activelist++; - } - spin_unlock(&hnode->activelist_lock); - return sprintf(buf, "%u\n", nr_activelist); -} - -static struct kobj_attribute nr_activelist_attr = - __ATTR(nr_activelist, 0444, nr_activelist_show, NULL); + __ATTR(nr_activepages, 0440, nr_activepages_show, NULL); static struct attribute *hnode_attrs[] = { &max_memsize_attr.attr, &nr_freepages_attr.attr, &nr_activepages_attr.attr, - &nr_freelist_attr.attr, - &nr_activelist_attr.attr, NULL, }; diff --git a/mm/gmem_util.c b/mm/gmem_util.c index 06c084e4fc0732bc69ef4daf4c656f18f2275a6c..b4fcb1a863c580b9e2cec2edb773ce23abbb8088 100644 --- a/mm/gmem_util.c +++ b/mm/gmem_util.c @@ -135,7 +135,8 @@ vm_fault_t do_peer_shared_anonymous_page(struct vm_fault *vmf) /* map page in pgtable */ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - BUG_ON(!pmd_none(*vmf->pmd)); + if (!pmd_none(*vmf->pmd)) + goto unlock_release; ret = check_stable_address_space(vma->vm_mm); if (ret) goto unlock_release; @@ -242,7 +243,7 @@ static void gmem_reserve_vma(struct mm_struct *mm, unsigned long start, kfree(node); return; } - vm_flags_set(vma, ~VM_PEER_SHARED); + vm_flags_clear(vma, VM_PEER_SHARED); node->start = start; node->len = round_up(len, SZ_2M); diff --git a/mm/mmap.c b/mm/mmap.c index 6d9e6ab2bb2771ef3941345a9e37ab77bc28ddf4..69246062256b7522de5e5e2912faed54bf739555 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1353,7 +1353,7 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon if (IS_ERR_VALUE(addr)) return addr; - if ((flags & MAP_FIXED_NOREPLACE) || (flags & MAP_PEER_SHARED)) { + if ((flags & MAP_FIXED_NOREPLACE) || (gmem_is_enabled() && (flags & MAP_PEER_SHARED))) { if (find_vma_intersection(mm, addr, addr + len)) return -EEXIST; }