From d28654d893669da2e78562d5bb3255cb3b4cc3db Mon Sep 17 00:00:00 2001 From: fanrui Date: Wed, 10 Aug 2022 21:26:53 +0800 Subject: [PATCH] **Title: HCK(High-performance Computing Kit) support** **Content:** HCK(High-performance Computing Kit) is a feature focusing on CPU isolation to reduce OS noise. By providing newly added kernel starting parameters to seperate all cpus into two groups, one group is managed by conventional linux, on which tasks like link interrupt and kernel thread would run, the other group is managed by HCK, on which the user can specify HPC(High Performance Computing) task to run. The goal is to reduce the performance influence of interruputs to tasks running on HCK managed cores. This feature is not enabled by default, open CONFIG_PURPOSE_BUILT_KERNEL to enable this feature. --- Kconfig | 2 + Makefile | 1 + PBK/Kconfig | 9 ++ PBK/Makefile | 5 + PBK/pbk_cpu.c | 183 ++++++++++++++++++++++++++++++++++++ PBK/pbk_cpu.h | 18 ++++ PBK/pbk_domain.c | 172 +++++++++++++++++++++++++++++++++ PBK/pbk_sysfs.c | 158 +++++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 6 ++ block/blk-mq.c | 4 + drivers/base/cacheinfo.c | 23 ++++- drivers/base/core.c | 11 +++ drivers/base/cpu.c | 8 +- drivers/base/node.c | 7 ++ include/linux/pbk.h | 114 ++++++++++++++++++++++ include/linux/sched.h | 6 ++ init/init_task.c | 3 + init/main.c | 4 + kernel/cgroup/cpuset.c | 13 +++ kernel/cpu.c | 28 +++++- kernel/exit.c | 5 + kernel/fork.c | 12 +++ kernel/sched/core.c | 44 ++++++++- kernel/sched/sched.h | 22 +++++ 24 files changed, 851 insertions(+), 7 deletions(-) create mode 100644 PBK/Kconfig create mode 100644 PBK/Makefile create mode 100644 PBK/pbk_cpu.c create mode 100644 PBK/pbk_cpu.h create mode 100644 PBK/pbk_domain.c create mode 100644 PBK/pbk_sysfs.c create mode 100644 include/linux/pbk.h diff --git a/Kconfig b/Kconfig index 745bc773f567..67fd53d81870 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "lib/Kconfig" source "lib/Kconfig.debug" source "Documentation/Kconfig" + +source "PBK/Kconfig" diff --git a/Makefile b/Makefile index 3ebf74787e93..f7829be43ea4 100644 --- a/Makefile +++ b/Makefile @@ -1112,6 +1112,7 @@ export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += PBK/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/PBK/Kconfig b/PBK/Kconfig new file mode 100644 index 000000000000..8f4592b9ec1e --- /dev/null +++ b/PBK/Kconfig @@ -0,0 +1,9 @@ +# Purpose-Built Kernel(PBK) + +menuconfig PURPOSE_BUILT_KERNEL + bool "Purpose-Built Kernel" + default n + depends on SMP && NUMA && HOTPLUG_CPU + help + Purpose-Built Kernel + diff --git a/PBK/Makefile b/PBK/Makefile new file mode 100644 index 000000000000..e8c24f1b9b24 --- /dev/null +++ b/PBK/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Purpose-Built Kernel +# + +obj-$(CONFIG_PURPOSE_BUILT_KERNEL) := pbk_cpu.o pbk_domain.o pbk_sysfs.o diff --git a/PBK/pbk_cpu.c b/PBK/pbk_cpu.c new file mode 100644 index 000000000000..5372d5389388 --- /dev/null +++ b/PBK/pbk_cpu.c @@ -0,0 +1,183 @@ +#define pr_fmt(fmt) "pbk_cpu: " fmt + +#include + +#include "pbk_cpu.h" + +cpumask_t __pbk_cpuset; +cpumask_t __pbk_available_cpuset; +DEFINE_SPINLOCK(pbk_acpuset_lock); + +/* + * Reserve and up/down pbk_cpus. + */ + +int pbk_cpu_parse_args(const char *str, cpumask_t *pbk_cpus) +{ + int ret; + + cpumask_clear(pbk_cpus); + ret = cpulist_parse(str, pbk_cpus); + if (ret < 0 || cpumask_last(pbk_cpus) >= nr_cpu_ids) + pr_err("Invalid cmdline pbk_cpus\n"); + if (cpumask_test_cpu(0, pbk_cpus)) { + pr_err("Can not preserve cpu 0\n"); + ret = -EINVAL; + } + + return ret; +} + +static int __init pbk_cpus(char *str) +{ + int ret; + + ret = pbk_cpu_parse_args(str, pbk_cpuset); + if (ret) { + cpumask_clear(pbk_cpuset); + ret = -EINVAL; + } + + cpumask_copy(pbk_available_cpuset, pbk_cpuset); + return ret; +} +early_param("pbk_cpus", pbk_cpus); + +static int pbk_cpu_up(unsigned int cpu) +{ + int ret; + + ret = do_cpu_up(cpu, PBK_CPU_ONLINE_STATE); + if (ret) + pr_err("Failed to online CPU %u\n", cpu); + + return ret; +} + +static int pbk_cpu_down(unsigned int cpu) +{ + int ret; + + ret = cpu_down(cpu, PBK_CPU_OFFLINE_STATE); + if (ret) + pr_err("Failed to offline CPU %u\n", cpu); + + return ret; +} + +int pbk_cpus_up(cpumask_var_t upset) +{ + unsigned int cpu; + int ret; + + for_each_cpu(cpu, upset) { + ret = pbk_cpu_up(cpu); + if (ret) + return ret; + } + return 0; +} + +int pbk_cpus_down(cpumask_var_t downset) +{ + unsigned int cpu; + int ret; + + for_each_cpu(cpu, downset) { + ret = pbk_cpu_down(cpu); + if (ret) + return ret; + } + + return 0; +} + +/* + * Allocate CPUs in @request from pbk_available_cpuset. + */ +int pbk_alloc_cpus(cpumask_var_t request) +{ + unsigned int cpu; + cpumask_t hold; + + if (cpumask_empty(request)) { + pr_err("Invalid request cpumask\n"); + return -EINVAL; + } + + cpumask_clear(&hold); + spin_lock(&pbk_acpuset_lock); + for_each_cpu(cpu, request) { + if (cpumask_test_and_clear_cpu(cpu, pbk_available_cpuset)) { + cpumask_set_cpu(cpu, &hold); + } else { + spin_unlock(&pbk_acpuset_lock); + pr_err("Request CPU %u is not available\n", cpu); + /* Invalid request, so revert CPUs. */ + for_each_cpu(cpu, &hold) + cpumask_set_cpu(cpu, pbk_available_cpuset); + return -EINVAL; + } + } + spin_unlock(&pbk_acpuset_lock); + + return 0; +} + +int pbk_alloc_nr_cpu(unsigned int nr_cpu, cpumask_var_t mask) +{ + unsigned int cpu; + + if (nr_cpu <= 0) { + pr_err("The value of nr_cpu must be greater than 0\n"); + return -EINVAL; + } + + spin_lock(&pbk_acpuset_lock); + if (cpumask_weight(pbk_available_cpuset) < nr_cpu) { + spin_unlock(&pbk_acpuset_lock); + pr_err("Available CPU is not enough\n"); + return -EINVAL; + } + + for_each_cpu(cpu, pbk_available_cpuset) { + cpumask_clear_cpu(cpu, pbk_available_cpuset); + cpumask_set_cpu(cpu, mask); + nr_cpu--; + if (!nr_cpu) + break; + } + + spin_unlock(&pbk_acpuset_lock); + + if (nr_cpu) { + pr_err("CPU is not enough. May race with others\n"); + BUG(); + } + + return 0; +} + +/* + * Give back CPUs in @release to pbk_available_cpuset. + */ +void pbk_free_cpus(cpumask_var_t release) +{ + unsigned int cpu; + + spin_lock(&pbk_acpuset_lock); + for_each_cpu(cpu, release) + cpumask_set_cpu(cpu, pbk_available_cpuset); + spin_unlock(&pbk_acpuset_lock); +} + +/* + * Add/Delete CPUs @mask to/from domain @pd. + */ +void pbk_set_cpus(struct pbk_domain *pd, cpumask_var_t mask, bool add) +{ + if (add) + cpumask_or(pbk_domain_cpu(pd), pbk_domain_cpu(pd), mask); + else + cpumask_andnot(pbk_domain_cpu(pd), pbk_domain_cpu(pd), mask); +} diff --git a/PBK/pbk_cpu.h b/PBK/pbk_cpu.h new file mode 100644 index 000000000000..0c87a345086e --- /dev/null +++ b/PBK/pbk_cpu.h @@ -0,0 +1,18 @@ +#ifndef _PBK_CPU_H +#define _PBK_CPU_H + +#include + +#define PBK_CPU_ONLINE_STATE (CPUHP_AP_ACTIVE - 1) +#define PBK_CPU_OFFLINE_STATE CPUHP_OFFLINE + +int pbk_cpu_parse_args(const char *str, cpumask_t *pbk_cpus); + +int pbk_cpus_up(cpumask_var_t upset); +int pbk_cpus_down(cpumask_var_t downset); + +int pbk_alloc_cpus(cpumask_var_t request); +int pbk_alloc_nr_cpu(unsigned int nr_cpu, cpumask_var_t mask); +void pbk_free_cpus(cpumask_var_t release); + +#endif /* _PBK_CPU_H */ \ No newline at end of file diff --git a/PBK/pbk_domain.c b/PBK/pbk_domain.c new file mode 100644 index 000000000000..47a4fa483fe2 --- /dev/null +++ b/PBK/pbk_domain.c @@ -0,0 +1,172 @@ +#define pr_fmt(fmt) "pbk_domain: " fmt + +#include +#include +#include +#include + +#include "pbk_cpu.h" + +DEFINE_HASHTABLE(pbk_domains, NR_DOMAINS_MAX_BITS); +DEFINE_SPINLOCK(pbk_domains_lock); + +/* + * Create PBK root domain with pbk_cpuset. + */ +void pbk_create_root_domain(void) +{ + cpumask_t workqueue_unbound_mask; + int ret; + + if (cpumask_empty(pbk_cpuset)) { + pr_info("No valid pbk_cpuset, skip creating PBK root domain\n"); + return; + } + + ret = pbk_cpus_up(pbk_cpuset); + if (ret) + pr_err("Failed to create PBK root domain\n"); + + cpumask_copy(pbk_available_cpuset, pbk_cpuset); + cpumask_andnot(&workqueue_unbound_mask, cpu_possible_mask, pbk_cpuset); + ret = workqueue_set_unbound_cpumask(&workqueue_unbound_mask); + if (!ret) + pr_info("Set workqueue unbound cpumask to %*pbl\n", + cpumask_pr_args(&workqueue_unbound_mask)); +} + +static void pbk_add_domain(struct pbk_domain *pd) +{ + spin_lock(&pbk_domains_lock); + hash_add(pbk_domains, &pd->ht_node, pd->domain_id); + spin_unlock(&pbk_domains_lock); +} + +struct pbk_domain *pbk_find_get_domain(pdid_t domain_id) +{ + struct pbk_domain *pd; + + spin_lock(&pbk_domains_lock); + hash_for_each_possible(pbk_domains, pd, ht_node, domain_id) { + if (pd->domain_id == domain_id) { + spin_unlock(&pbk_domains_lock); + get_pbk_domain(pd); + return pd; + } + } + spin_unlock(&pbk_domains_lock); + + pr_err("PBK domain %d is not found\n", domain_id); + return NULL; +} + +struct pbk_domain *pbk_find_matched_domain(cpumask_var_t request) +{ + struct pbk_domain *pd = NULL; + int bkt = 0; + + // char buf[80]; + // cpumap_print_to_pagebuf(1, buf, request); + // pr_err("X = request = %s hash_empty = %d\n", buf, hash_empty(pbk_domains)); + + spin_lock(&pbk_domains_lock); + hash_for_each(pbk_domains, bkt, pd, ht_node) { + if (cpumask_equal(pbk_domain_cpu(pd), request)) { + spin_unlock(&pbk_domains_lock); + get_pbk_domain(pd); + return pd; + } + } + spin_unlock(&pbk_domains_lock); + + pr_err("PBK can not find matched domain\n"); + return NULL; +} + +static void pbk_del_domain(struct pbk_domain *pd) +{ + spin_lock(&pbk_domains_lock); + hash_del(&pd->ht_node); + spin_unlock(&pbk_domains_lock); +} + +static void pbk_add_process(struct task_struct *p, struct pbk_domain *pd) +{ + spin_lock(&pd->process_list_lock); + list_add(&p->pbk_process, &pd->process_list); + spin_unlock(&pd->process_list_lock); +} + +void pbk_del_process(struct task_struct *p, struct pbk_domain *pd) +{ + spin_lock(&pd->process_list_lock); + list_del(&p->pbk_process); + spin_unlock(&pd->process_list_lock); +} + +void pbk_attach_domain(struct task_struct *p, struct pbk_domain *pd) +{ + p->pbkd = pd; + pbk_add_process(p, pd); + get_pbk_domain(pd); +} + +/* + * Allocate a PBK domain with @request CPU. + */ +struct pbk_domain *pbk_alloc_domain(cpumask_var_t request) +{ + struct pbk_domain *pd; + pd = kmalloc(sizeof(struct pbk_domain), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + refcount_set(&pd->refcount, 1); + spin_lock_init(&pd->process_list_lock); + INIT_LIST_HEAD(&pd->process_list); + cpumask_copy(pbk_domain_cpu(pd), request); + pd->domain_id = current->pid; + pbk_add_process(current, pd); + pbk_add_domain(pd); + + return pd; +} + +void destroy_pbk_domain(struct pbk_domain *pd) +{ + pbk_free_cpus(pbk_domain_cpu(pd)); + pbk_del_domain(pd); + kfree(pd); +} + +int pbk_resched_threads(struct task_struct *p, cpumask_var_t new) +{ + struct task_struct *tsk; + int ret = 0; + + for_each_thread(p, tsk) { + ret = sched_setaffinity(tsk->pid, new); + if (ret) { + pr_err("Failed to set affinity for task %d\n", tsk->pid); + return ret; + } + } + return ret; +} + +int pbk_resched_domain_process(struct pbk_domain *pd) +{ + struct task_struct *p; + int ret = 0; + + spin_lock(&pd->process_list_lock); + + list_for_each_entry(p, &pd->process_list, pbk_process) { + ret = sched_setaffinity(p->pid, pbk_domain_cpu(pd)); + if (ret) + goto out; + } +out: + spin_unlock(&pd->process_list_lock); + return ret; +} diff --git a/PBK/pbk_sysfs.c b/PBK/pbk_sysfs.c new file mode 100644 index 000000000000..d3b85686efae --- /dev/null +++ b/PBK/pbk_sysfs.c @@ -0,0 +1,158 @@ +#define pr_fmt(fmt) "pbk_sysfs: " fmt + +#include +#include +#include + +#include "pbk_cpu.h" + +static struct kobject *pbk_kobj; + +static ssize_t pbk_create_domain_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + cpumask_t request; + struct pbk_domain *pd; + int ret; + + ret = cpulist_parse(buf, &request); + if (ret || !cpumask_subset(&request, pbk_cpuset)) + return -EINVAL; + + pd = pbk_find_matched_domain(&request); + if (pd == NULL) { + ret = pbk_alloc_cpus(&request); + if (ret) + return ret; + + pd = pbk_alloc_domain(&request); + if (IS_ERR(pd)) { + pr_err("Failed to allocate pbk domain\n"); + return PTR_ERR(pd); + } + } + + current->pbkd = pd; + + ret = pbk_resched_domain_process(pd); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute pbk_create_domain_attr = __ATTR_WO(pbk_create_domain); + +static ssize_t pbk_join_domain_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + pdid_t domain_id; + struct pbk_domain *pd; + int ret; + + ret = kstrtoint(buf, 0, &domain_id); + if (ret) + return -EINVAL; + + pd = pbk_find_get_domain(domain_id); + if (!pd) + return -EINVAL; + + pbk_attach_domain(current, pd); + pbk_resched_threads(current, pbk_domain_cpu(pd)); + put_pbk_domain(pd); + + return count; +} + +static struct kobj_attribute pbk_join_domain_attr = __ATTR_WO(pbk_join_domain); + +static ssize_t pbk_with_nr_cpu_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + cpumask_t request; + unsigned int nr_cpu; + struct pbk_domain *pd; + int ret; + + ret = kstrtoint(buf, 0, &nr_cpu); + if (ret) + return -EINVAL; + + cpumask_clear(&request); + ret = pbk_alloc_nr_cpu(nr_cpu, &request); + if (ret) + return ret; + + pd = pbk_alloc_domain(&request); + if (IS_ERR(pd)) { + pr_err("Failed to allocate pbk domain\n"); + return PTR_ERR(pd); + } + + current->pbkd = pd; + + ret = pbk_resched_domain_process(pd); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute pbk_with_nr_cpu_attr = __ATTR_WO(pbk_with_nr_cpu); + +static ssize_t pbk_view_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int pbk_view = 0; + int ret; + + ret = kstrtoint(buf, 0, &pbk_view); + if (ret || pbk_view != 1) + return -EINVAL; + + if (pbk_view) + current->pbk_view = 1; + else + current->pbk_view = 0; + + return count; +} + +static struct kobj_attribute pbk_view_attr = __ATTR_WO(pbk_view); + +static struct attribute *pbk_attributes[] = { + &pbk_create_domain_attr.attr, + &pbk_join_domain_attr.attr, + &pbk_with_nr_cpu_attr.attr, + &pbk_view_attr.attr, + NULL +}; + +static struct attribute_group pbk_attr_group = { + .attrs = pbk_attributes, +}; + +static int __init pbk_sysfs_init(void) +{ + int ret; + + pbk_kobj = kobject_create_and_add("PBK", kernel_kobj); + if (!pbk_kobj) + return -ENOMEM; + + pbk_create_domain_attr.attr.mode |= S_IWGRP; + pbk_join_domain_attr.attr.mode |= S_IWGRP; + pbk_with_nr_cpu_attr.attr.mode |= S_IWGRP; + pbk_view_attr.attr.mode |= S_IWGRP; + + ret = sysfs_create_group(pbk_kobj, &pbk_attr_group); + if (ret) { + pr_err("Failed to create sysfs entries for PBK\n"); + return ret; + } + + return 0; +} + +subsys_initcall(pbk_sysfs_init); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 3899083ff555..ed4c5b26e81d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -152,6 +153,11 @@ static int c_show(struct seq_file *m, void *v) for_each_online_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); u32 midr = cpuinfo->reg_midr; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if((is_pbk_process(current) && !is_current_pbk_cpu(i)) || + (is_pbk_view(current) && !is_pbk_cpu(i))) + continue; +#endif /* * glibc reads /proc/cpuinfo to determine the number of diff --git a/block/blk-mq.c b/block/blk-mq.c index 1941ffc4db85..6860a599c908 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1581,6 +1581,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) * triggered, and we depend on blk-mq timeout handler to * handle dispatched requests to this hctx */ +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + /* Ignore this check for pbk cpus. */ +#else if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && cpu_online(hctx->next_cpu)) { printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n", @@ -1588,6 +1591,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) cpumask_empty(hctx->cpumask) ? "inactive": "active"); dump_stack(); } +#endif /* * We can't run the queue inline with ints disabled. Ensure that diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 8737ad3b412a..8efff8ce7532 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -19,6 +19,7 @@ #include #include #include +#include /* pointer to per cpu cacheinfo */ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo); @@ -415,7 +416,16 @@ static ssize_t shared_cpu_map_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); const struct cpumask *mask = &this_leaf->shared_cpu_map; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current) || is_pbk_view(current)) { + struct cpumask pbk_mask; + if (is_pbk_process(current)) + cpumask_and(&pbk_mask, current_pbk_cpu(), mask); + else + cpumask_and(&pbk_mask, pbk_cpuset, mask); + return sysfs_emit(buf, "%*pb\n", nr_cpu_ids, &pbk_mask); + } +#endif return sysfs_emit(buf, "%*pb\n", nr_cpu_ids, mask); } @@ -424,7 +434,16 @@ static ssize_t shared_cpu_list_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); const struct cpumask *mask = &this_leaf->shared_cpu_map; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current) || is_pbk_view(current)) { + struct cpumask pbk_mask; + if (is_pbk_process(current)) + cpumask_and(&pbk_mask, current_pbk_cpu(), mask); + else + cpumask_and(&pbk_mask, pbk_cpuset, mask); + return sysfs_emit(buf, "%*pbl\n", nr_cpu_ids, &pbk_mask); + } +#endif return sysfs_emit(buf, "%*pbl\n", nr_cpu_ids, mask); } diff --git a/drivers/base/core.c b/drivers/base/core.c index c0566aff5355..ad207562a9d8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -2028,6 +2029,16 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, device_lock(dev); val = !dev->offline; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if ((is_pbk_process(current) || is_pbk_view(current)) && + (dev->bus == &cpu_subsys)) { + if ((is_pbk_process(current) && is_current_pbk_cpu(dev->id)) || + (is_pbk_view(current) && is_pbk_cpu(dev->id))) + val = true; + else + val = false; + } +#endif device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 8ecb9f90f467..903504d776e6 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "base.h" @@ -208,7 +209,12 @@ static ssize_t show_cpus_attr(struct device *dev, char *buf) { struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr); - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + return cpumap_print_to_pagebuf(true, buf, current_pbk_cpu()); + if (is_pbk_view(current)) + return cpumap_print_to_pagebuf(true, buf, pbk_cpuset); +#endif return cpumap_print_to_pagebuf(true, buf, ca->map); } diff --git a/drivers/base/node.c b/drivers/base/node.c index 35c61165292a..21de1a47af18 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct bus_type node_subsys = { .name = "node", @@ -40,6 +41,12 @@ static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) return 0; cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + cpumask_and(mask, mask, current_pbk_cpu()); + else if (is_pbk_view(current)) + cpumask_and(mask, mask, pbk_cpuset); +#endif n = cpumap_print_to_pagebuf(list, buf, mask); free_cpumask_var(mask); diff --git a/include/linux/pbk.h b/include/linux/pbk.h new file mode 100644 index 000000000000..e01e52c1fa66 --- /dev/null +++ b/include/linux/pbk.h @@ -0,0 +1,114 @@ +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + +#ifndef _LINUX_PBK_H +#define _LINUX_PBK_H + +#include +#include +#include +#include + +typedef pid_t pdid_t; + +#define NR_DOMAINS_MAX 16 +#define NR_DOMAINS_MAX_BITS 4 + +extern struct hlist_head pbk_domains[NR_DOMAINS_MAX]; +extern spinlock_t pbk_domains_lock; + +#define DOMAIN_NAME_LEN 64 + +struct pbk_domain { + char name [DOMAIN_NAME_LEN]; + /* Same as pid of the process that creates this domain. */ + pdid_t domain_id; + refcount_t refcount; + cpumask_t cpuset; + + /* All processes that join to this domain */ + struct list_head process_list; + spinlock_t process_list_lock; + + /* Node of hashtable that maps domain_id to domain */ + struct hlist_node ht_node; +}; + +extern void pbk_create_root_domain(void); +extern struct pbk_domain *pbk_find_get_domain(pdid_t domain_id); +extern struct pbk_domain *pbk_find_matched_domain(cpumask_var_t request); +extern struct pbk_domain *pbk_alloc_domain(cpumask_var_t request); +extern void pbk_attach_domain(struct task_struct *p, struct pbk_domain *pd); +extern void destroy_pbk_domain(struct pbk_domain *pd); +extern int pbk_resched_threads(struct task_struct *p, cpumask_var_t new); +extern int pbk_resched_domain_process(struct pbk_domain *pd); +extern void pbk_del_process(struct task_struct *p, struct pbk_domain *pd); + +static inline bool is_pbk_process(struct task_struct *p) +{ + return p->pbkd ? true : false; +} + +static inline bool is_pbk_view(struct task_struct *p) +{ + return p->pbk_view ? true : false; +} + +static inline bool is_pbk_allowed_kthread(struct task_struct *p) +{ + return !strncmp(p->comm, "cpuhp", 5) || + !strncmp(p->comm, "ksoftirqd", 9) || + !strncmp(p->comm, "migration", 9) || + !strncmp(p->comm, "osnoise", 7); +} + +static inline cpumask_t *pbk_domain_cpu(struct pbk_domain *pd) +{ + return &pd->cpuset; +} + +static inline cpumask_t *current_pbk_cpu(void) +{ + return pbk_domain_cpu(current->pbkd); +} + +static inline void get_pbk_domain(struct pbk_domain *pd) +{ + refcount_inc(&pd->refcount); +} + +static inline void put_pbk_domain(struct pbk_domain *pd) +{ + if (refcount_dec_and_test(&pd->refcount)) + destroy_pbk_domain(pd); +} + +extern cpumask_t __pbk_cpuset; +extern cpumask_t __pbk_available_cpuset; +extern spinlock_t pbk_acpuset_lock; + +#define pbk_cpuset (&__pbk_cpuset) +#define pbk_available_cpuset (&__pbk_available_cpuset) + +static inline bool is_pbk_cpu(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, pbk_cpuset); +} + +static inline bool is_current_pbk_cpu(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, current_pbk_cpu()); +} + +static inline bool is_pbk_cpu_state(enum cpuhp_state state) +{ + return (state != CPUHP_AP_IRQ_AFFINITY_ONLINE) && + (state != CPUHP_AP_WORKQUEUE_ONLINE) && + (state != CPUHP_AP_RCUTREE_ONLINE); +} + +extern int do_cpu_up(unsigned int cpu, enum cpuhp_state target); +extern int cpu_down(unsigned int cpu, enum cpuhp_state target); + +#endif /* _LINUX_PBK_H */ + +#endif /* CONFIG_PURPOSE_BUILT_KERNEL */ \ No newline at end of file diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4d..4f26e5f6318e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -873,6 +873,12 @@ struct task_struct { pid_t pid; pid_t tgid; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + struct list_head pbk_process; + struct pbk_domain *pbkd; + int pbk_view; +#endif + #ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsigned long stack_canary; diff --git a/init/init_task.c b/init/init_task.c index 5fa18ed59d33..7f8ba4199012 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -213,6 +213,9 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + .pbkd = NULL, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/init/main.c b/init/main.c index 41a9ce782acc..6774e0b441d0 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include @@ -1438,6 +1439,9 @@ static int __ref kernel_init(void *unused) rcu_end_inkernel_boot(); do_sysctl_args(); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + pbk_create_root_domain(); +#endif if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index b7a936e5d05b..eaec7da8feda 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -66,6 +66,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); @@ -2459,6 +2460,18 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) spin_lock_irq(&callback_lock); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if ((is_pbk_process(current) || is_pbk_view(current)) && + (type == FILE_CPULIST || type == FILE_EFFECTIVE_CPULIST)) { + if (is_pbk_process(current)) + seq_printf(sf, "%*pbl\n", cpumask_pr_args(current_pbk_cpu())); + else + seq_printf(sf, "%*pbl\n", cpumask_pr_args(pbk_cpuset)); + spin_unlock_irq(&callback_lock); + return ret; + } +#endif + switch (type) { case FILE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); diff --git a/kernel/cpu.c b/kernel/cpu.c index c06ced18f78a..347d95140322 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -157,6 +158,11 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, int (*cb)(unsigned int cpu); int ret, cnt; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_cpu(cpu) && !is_pbk_cpu_state(state)) + return 0; +#endif + if (st->fail == state) { st->fail = CPUHP_INVALID; @@ -1116,7 +1122,7 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) return _cpu_down(cpu, 0, target); } -static int cpu_down(unsigned int cpu, enum cpuhp_state target) +int cpu_down(unsigned int cpu, enum cpuhp_state target) { int err; @@ -1306,7 +1312,11 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) return ret; } -static int cpu_up(unsigned int cpu, enum cpuhp_state target) +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +int do_cpu_up(unsigned int cpu, enum cpuhp_state target) +#else +int cpu_up(unsigned int cpu, enum cpuhp_state target) +#endif { int err = 0; @@ -1340,6 +1350,16 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) return err; } +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +int cpu_up(unsigned int cpu, enum cpuhp_state target) +{ + if (is_pbk_cpu(cpu)) + return 0; + + return do_cpu_up(cpu, target); +} +#endif + /** * cpu_device_up - Bring up a cpu device * @dev: Pointer to the cpu device to online @@ -1393,6 +1413,10 @@ void bringup_nonboot_cpus(unsigned int setup_max_cpus) unsigned int cpu; for_each_present_cpu(cpu) { +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (cpumask_test_cpu(cpu, pbk_cpuset)) + continue; +#endif if (num_online_cpus() >= setup_max_cpus) break; if (!cpu_online(cpu)) diff --git a/kernel/exit.c b/kernel/exit.c index d13d67fc5f4e..3e7e3daa0862 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -857,6 +858,10 @@ void __noreturn do_exit(long code) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); exit_tasks_rcu_finish(); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(tsk)) + pbk_del_process(tsk, tsk->pbkd); +#endif lockdep_free_task(tsk); do_task_dead(); diff --git a/kernel/fork.c b/kernel/fork.c index c8ec029e158a..8aa9284fac2d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,6 +99,7 @@ #include #include +#include #include #include #include @@ -744,6 +745,10 @@ void __put_task_struct(struct task_struct *tsk) exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(tsk)) + put_pbk_domain(tsk->pbkd); +#endif if (!profile_handoff_task(tsk)) free_task(tsk); @@ -1981,6 +1986,13 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + pbk_attach_domain(p, current->pbkd); + else + p->pbkd = NULL; +#endif + /* * This _must_ happen before we call free_task(), i.e. before we jump * to any of the bad_fork_* labels. This is to avoid freeing diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51c707897c8d..192babfbf9c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1712,7 +1712,11 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; - if (is_per_cpu_kthread(p)) + if (is_per_cpu_kthread(p) +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + || is_pbk_process(p) +#endif + ) return cpu_online(cpu); return cpu_active(cpu); @@ -1893,6 +1897,10 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, */ cpu_valid_mask = cpu_online_mask; } +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(p)) + cpu_valid_mask = cpu_online_mask; +#endif /* * Must re-check here, to close a race against __kthread_bind(), @@ -2309,6 +2317,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, p->cpus_ptr) { +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (!is_pbk_process(p) && + !is_pbk_allowed_kthread(p) && + is_pbk_cpu(dest_cpu)) + continue; +#endif if (!is_cpu_allowed(p, dest_cpu)) continue; @@ -2364,6 +2378,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) else cpu = cpumask_any(p->cpus_ptr); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(p)) + cpu = pbk_reselect_cpu(p, cpu); +#endif /* * In order not to call set_task_cpu() on a blocking task we need * to rely on ttwu() to place the task on a valid ->cpus_ptr @@ -2376,6 +2394,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) */ if (unlikely(!is_cpu_allowed(p, cpu))) cpu = select_fallback_rq(task_cpu(p), p); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + else if(!is_pbk_process(p) && is_pbk_cpu(cpu)) + cpu = select_fallback_rq(task_cpu(p), p); +#endif return cpu; } @@ -6176,9 +6198,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_free_new_mask; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if(is_pbk_process(p)) { + cpumask_copy(cpus_allowed, pbk_domain_cpu(p->pbkd)); + cpumask_and(new_mask, in_mask, cpus_allowed); + } else { + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, in_mask, cpus_allowed); + } +#else cpuset_cpus_allowed(p, cpus_allowed); cpumask_and(new_mask, in_mask, cpus_allowed); +#endif /* * Since bandwidth control happens on root_domain basis, @@ -6200,7 +6231,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) again: retval = __set_cpus_allowed_ptr(p, new_mask, true); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (!retval && !is_pbk_process(p)) { +#else if (!retval) { +#endif cpuset_cpus_allowed(p, cpus_allowed); if (!cpumask_subset(new_mask, cpus_allowed)) { /* @@ -6274,6 +6309,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if(is_pbk_process(p)) + cpumask_and(mask, &p->cpus_mask, cpu_online_mask); + else +#endif cpumask_and(mask, &p->cpus_mask, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42d5fb7d9464..425e9877b37f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -2778,5 +2779,26 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) } #endif +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +static inline int pbk_reselect_cpu(struct task_struct *p, int prev_cpu) +{ + int cpu; + struct rq *rq; + + rq = cpu_rq(prev_cpu); + if (!rq->nr_running) + return prev_cpu; + + for_each_cpu(cpu, pbk_domain_cpu(p->pbkd)) { + rq = cpu_rq(cpu); + if (!rq->nr_running) + break; + } + + pr_debug("pbk reselect cpu %d\n", cpu); + return cpu; +} +#endif + void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); -- Gitee