diff --git a/Kconfig b/Kconfig index 745bc773f567067a85ce6574fb41ce80833247d9..67fd53d81870006c0bfb4815bcede437f87af068 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "lib/Kconfig" source "lib/Kconfig.debug" source "Documentation/Kconfig" + +source "PBK/Kconfig" diff --git a/Makefile b/Makefile index 3ebf74787e93fccfbecd2bb0217720bf97fb15c7..f7829be43ea445793f14a5a67f338592adb79ec3 100644 --- a/Makefile +++ b/Makefile @@ -1112,6 +1112,7 @@ export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += PBK/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/PBK/Kconfig b/PBK/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..8f4592b9ec1e9b692aed8c63038f831908dd63d4 --- /dev/null +++ b/PBK/Kconfig @@ -0,0 +1,9 @@ +# Purpose-Built Kernel(PBK) + +menuconfig PURPOSE_BUILT_KERNEL + bool "Purpose-Built Kernel" + default n + depends on SMP && NUMA && HOTPLUG_CPU + help + Purpose-Built Kernel + diff --git a/PBK/Makefile b/PBK/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e8c24f1b9b246cc8bcba2117d01ac8fe5125fb89 --- /dev/null +++ b/PBK/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Purpose-Built Kernel +# + +obj-$(CONFIG_PURPOSE_BUILT_KERNEL) := pbk_cpu.o pbk_domain.o pbk_sysfs.o diff --git a/PBK/pbk_cpu.c b/PBK/pbk_cpu.c new file mode 100644 index 0000000000000000000000000000000000000000..5372d53893884b430cbab99e60aafa083c7a89fc --- /dev/null +++ b/PBK/pbk_cpu.c @@ -0,0 +1,183 @@ +#define pr_fmt(fmt) "pbk_cpu: " fmt + +#include + +#include "pbk_cpu.h" + +cpumask_t __pbk_cpuset; +cpumask_t __pbk_available_cpuset; +DEFINE_SPINLOCK(pbk_acpuset_lock); + +/* + * Reserve and up/down pbk_cpus. + */ + +int pbk_cpu_parse_args(const char *str, cpumask_t *pbk_cpus) +{ + int ret; + + cpumask_clear(pbk_cpus); + ret = cpulist_parse(str, pbk_cpus); + if (ret < 0 || cpumask_last(pbk_cpus) >= nr_cpu_ids) + pr_err("Invalid cmdline pbk_cpus\n"); + if (cpumask_test_cpu(0, pbk_cpus)) { + pr_err("Can not preserve cpu 0\n"); + ret = -EINVAL; + } + + return ret; +} + +static int __init pbk_cpus(char *str) +{ + int ret; + + ret = pbk_cpu_parse_args(str, pbk_cpuset); + if (ret) { + cpumask_clear(pbk_cpuset); + ret = -EINVAL; + } + + cpumask_copy(pbk_available_cpuset, pbk_cpuset); + return ret; +} +early_param("pbk_cpus", pbk_cpus); + +static int pbk_cpu_up(unsigned int cpu) +{ + int ret; + + ret = do_cpu_up(cpu, PBK_CPU_ONLINE_STATE); + if (ret) + pr_err("Failed to online CPU %u\n", cpu); + + return ret; +} + +static int pbk_cpu_down(unsigned int cpu) +{ + int ret; + + ret = cpu_down(cpu, PBK_CPU_OFFLINE_STATE); + if (ret) + pr_err("Failed to offline CPU %u\n", cpu); + + return ret; +} + +int pbk_cpus_up(cpumask_var_t upset) +{ + unsigned int cpu; + int ret; + + for_each_cpu(cpu, upset) { + ret = pbk_cpu_up(cpu); + if (ret) + return ret; + } + return 0; +} + +int pbk_cpus_down(cpumask_var_t downset) +{ + unsigned int cpu; + int ret; + + for_each_cpu(cpu, downset) { + ret = pbk_cpu_down(cpu); + if (ret) + return ret; + } + + return 0; +} + +/* + * Allocate CPUs in @request from pbk_available_cpuset. + */ +int pbk_alloc_cpus(cpumask_var_t request) +{ + unsigned int cpu; + cpumask_t hold; + + if (cpumask_empty(request)) { + pr_err("Invalid request cpumask\n"); + return -EINVAL; + } + + cpumask_clear(&hold); + spin_lock(&pbk_acpuset_lock); + for_each_cpu(cpu, request) { + if (cpumask_test_and_clear_cpu(cpu, pbk_available_cpuset)) { + cpumask_set_cpu(cpu, &hold); + } else { + spin_unlock(&pbk_acpuset_lock); + pr_err("Request CPU %u is not available\n", cpu); + /* Invalid request, so revert CPUs. */ + for_each_cpu(cpu, &hold) + cpumask_set_cpu(cpu, pbk_available_cpuset); + return -EINVAL; + } + } + spin_unlock(&pbk_acpuset_lock); + + return 0; +} + +int pbk_alloc_nr_cpu(unsigned int nr_cpu, cpumask_var_t mask) +{ + unsigned int cpu; + + if (nr_cpu <= 0) { + pr_err("The value of nr_cpu must be greater than 0\n"); + return -EINVAL; + } + + spin_lock(&pbk_acpuset_lock); + if (cpumask_weight(pbk_available_cpuset) < nr_cpu) { + spin_unlock(&pbk_acpuset_lock); + pr_err("Available CPU is not enough\n"); + return -EINVAL; + } + + for_each_cpu(cpu, pbk_available_cpuset) { + cpumask_clear_cpu(cpu, pbk_available_cpuset); + cpumask_set_cpu(cpu, mask); + nr_cpu--; + if (!nr_cpu) + break; + } + + spin_unlock(&pbk_acpuset_lock); + + if (nr_cpu) { + pr_err("CPU is not enough. May race with others\n"); + BUG(); + } + + return 0; +} + +/* + * Give back CPUs in @release to pbk_available_cpuset. + */ +void pbk_free_cpus(cpumask_var_t release) +{ + unsigned int cpu; + + spin_lock(&pbk_acpuset_lock); + for_each_cpu(cpu, release) + cpumask_set_cpu(cpu, pbk_available_cpuset); + spin_unlock(&pbk_acpuset_lock); +} + +/* + * Add/Delete CPUs @mask to/from domain @pd. + */ +void pbk_set_cpus(struct pbk_domain *pd, cpumask_var_t mask, bool add) +{ + if (add) + cpumask_or(pbk_domain_cpu(pd), pbk_domain_cpu(pd), mask); + else + cpumask_andnot(pbk_domain_cpu(pd), pbk_domain_cpu(pd), mask); +} diff --git a/PBK/pbk_cpu.h b/PBK/pbk_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..0c87a345086ec7f4f98327ac83ef1e19bdf827c7 --- /dev/null +++ b/PBK/pbk_cpu.h @@ -0,0 +1,18 @@ +#ifndef _PBK_CPU_H +#define _PBK_CPU_H + +#include + +#define PBK_CPU_ONLINE_STATE (CPUHP_AP_ACTIVE - 1) +#define PBK_CPU_OFFLINE_STATE CPUHP_OFFLINE + +int pbk_cpu_parse_args(const char *str, cpumask_t *pbk_cpus); + +int pbk_cpus_up(cpumask_var_t upset); +int pbk_cpus_down(cpumask_var_t downset); + +int pbk_alloc_cpus(cpumask_var_t request); +int pbk_alloc_nr_cpu(unsigned int nr_cpu, cpumask_var_t mask); +void pbk_free_cpus(cpumask_var_t release); + +#endif /* _PBK_CPU_H */ \ No newline at end of file diff --git a/PBK/pbk_domain.c b/PBK/pbk_domain.c new file mode 100644 index 0000000000000000000000000000000000000000..47a4fa483fe2b42b654e758bfaaef16c5f755ba7 --- /dev/null +++ b/PBK/pbk_domain.c @@ -0,0 +1,172 @@ +#define pr_fmt(fmt) "pbk_domain: " fmt + +#include +#include +#include +#include + +#include "pbk_cpu.h" + +DEFINE_HASHTABLE(pbk_domains, NR_DOMAINS_MAX_BITS); +DEFINE_SPINLOCK(pbk_domains_lock); + +/* + * Create PBK root domain with pbk_cpuset. + */ +void pbk_create_root_domain(void) +{ + cpumask_t workqueue_unbound_mask; + int ret; + + if (cpumask_empty(pbk_cpuset)) { + pr_info("No valid pbk_cpuset, skip creating PBK root domain\n"); + return; + } + + ret = pbk_cpus_up(pbk_cpuset); + if (ret) + pr_err("Failed to create PBK root domain\n"); + + cpumask_copy(pbk_available_cpuset, pbk_cpuset); + cpumask_andnot(&workqueue_unbound_mask, cpu_possible_mask, pbk_cpuset); + ret = workqueue_set_unbound_cpumask(&workqueue_unbound_mask); + if (!ret) + pr_info("Set workqueue unbound cpumask to %*pbl\n", + cpumask_pr_args(&workqueue_unbound_mask)); +} + +static void pbk_add_domain(struct pbk_domain *pd) +{ + spin_lock(&pbk_domains_lock); + hash_add(pbk_domains, &pd->ht_node, pd->domain_id); + spin_unlock(&pbk_domains_lock); +} + +struct pbk_domain *pbk_find_get_domain(pdid_t domain_id) +{ + struct pbk_domain *pd; + + spin_lock(&pbk_domains_lock); + hash_for_each_possible(pbk_domains, pd, ht_node, domain_id) { + if (pd->domain_id == domain_id) { + spin_unlock(&pbk_domains_lock); + get_pbk_domain(pd); + return pd; + } + } + spin_unlock(&pbk_domains_lock); + + pr_err("PBK domain %d is not found\n", domain_id); + return NULL; +} + +struct pbk_domain *pbk_find_matched_domain(cpumask_var_t request) +{ + struct pbk_domain *pd = NULL; + int bkt = 0; + + // char buf[80]; + // cpumap_print_to_pagebuf(1, buf, request); + // pr_err("X = request = %s hash_empty = %d\n", buf, hash_empty(pbk_domains)); + + spin_lock(&pbk_domains_lock); + hash_for_each(pbk_domains, bkt, pd, ht_node) { + if (cpumask_equal(pbk_domain_cpu(pd), request)) { + spin_unlock(&pbk_domains_lock); + get_pbk_domain(pd); + return pd; + } + } + spin_unlock(&pbk_domains_lock); + + pr_err("PBK can not find matched domain\n"); + return NULL; +} + +static void pbk_del_domain(struct pbk_domain *pd) +{ + spin_lock(&pbk_domains_lock); + hash_del(&pd->ht_node); + spin_unlock(&pbk_domains_lock); +} + +static void pbk_add_process(struct task_struct *p, struct pbk_domain *pd) +{ + spin_lock(&pd->process_list_lock); + list_add(&p->pbk_process, &pd->process_list); + spin_unlock(&pd->process_list_lock); +} + +void pbk_del_process(struct task_struct *p, struct pbk_domain *pd) +{ + spin_lock(&pd->process_list_lock); + list_del(&p->pbk_process); + spin_unlock(&pd->process_list_lock); +} + +void pbk_attach_domain(struct task_struct *p, struct pbk_domain *pd) +{ + p->pbkd = pd; + pbk_add_process(p, pd); + get_pbk_domain(pd); +} + +/* + * Allocate a PBK domain with @request CPU. + */ +struct pbk_domain *pbk_alloc_domain(cpumask_var_t request) +{ + struct pbk_domain *pd; + pd = kmalloc(sizeof(struct pbk_domain), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + refcount_set(&pd->refcount, 1); + spin_lock_init(&pd->process_list_lock); + INIT_LIST_HEAD(&pd->process_list); + cpumask_copy(pbk_domain_cpu(pd), request); + pd->domain_id = current->pid; + pbk_add_process(current, pd); + pbk_add_domain(pd); + + return pd; +} + +void destroy_pbk_domain(struct pbk_domain *pd) +{ + pbk_free_cpus(pbk_domain_cpu(pd)); + pbk_del_domain(pd); + kfree(pd); +} + +int pbk_resched_threads(struct task_struct *p, cpumask_var_t new) +{ + struct task_struct *tsk; + int ret = 0; + + for_each_thread(p, tsk) { + ret = sched_setaffinity(tsk->pid, new); + if (ret) { + pr_err("Failed to set affinity for task %d\n", tsk->pid); + return ret; + } + } + return ret; +} + +int pbk_resched_domain_process(struct pbk_domain *pd) +{ + struct task_struct *p; + int ret = 0; + + spin_lock(&pd->process_list_lock); + + list_for_each_entry(p, &pd->process_list, pbk_process) { + ret = sched_setaffinity(p->pid, pbk_domain_cpu(pd)); + if (ret) + goto out; + } +out: + spin_unlock(&pd->process_list_lock); + return ret; +} diff --git a/PBK/pbk_sysfs.c b/PBK/pbk_sysfs.c new file mode 100644 index 0000000000000000000000000000000000000000..d3b85686efae6d9eaafe8f6cf9692ca9b2ef6578 --- /dev/null +++ b/PBK/pbk_sysfs.c @@ -0,0 +1,158 @@ +#define pr_fmt(fmt) "pbk_sysfs: " fmt + +#include +#include +#include + +#include "pbk_cpu.h" + +static struct kobject *pbk_kobj; + +static ssize_t pbk_create_domain_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + cpumask_t request; + struct pbk_domain *pd; + int ret; + + ret = cpulist_parse(buf, &request); + if (ret || !cpumask_subset(&request, pbk_cpuset)) + return -EINVAL; + + pd = pbk_find_matched_domain(&request); + if (pd == NULL) { + ret = pbk_alloc_cpus(&request); + if (ret) + return ret; + + pd = pbk_alloc_domain(&request); + if (IS_ERR(pd)) { + pr_err("Failed to allocate pbk domain\n"); + return PTR_ERR(pd); + } + } + + current->pbkd = pd; + + ret = pbk_resched_domain_process(pd); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute pbk_create_domain_attr = __ATTR_WO(pbk_create_domain); + +static ssize_t pbk_join_domain_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + pdid_t domain_id; + struct pbk_domain *pd; + int ret; + + ret = kstrtoint(buf, 0, &domain_id); + if (ret) + return -EINVAL; + + pd = pbk_find_get_domain(domain_id); + if (!pd) + return -EINVAL; + + pbk_attach_domain(current, pd); + pbk_resched_threads(current, pbk_domain_cpu(pd)); + put_pbk_domain(pd); + + return count; +} + +static struct kobj_attribute pbk_join_domain_attr = __ATTR_WO(pbk_join_domain); + +static ssize_t pbk_with_nr_cpu_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + cpumask_t request; + unsigned int nr_cpu; + struct pbk_domain *pd; + int ret; + + ret = kstrtoint(buf, 0, &nr_cpu); + if (ret) + return -EINVAL; + + cpumask_clear(&request); + ret = pbk_alloc_nr_cpu(nr_cpu, &request); + if (ret) + return ret; + + pd = pbk_alloc_domain(&request); + if (IS_ERR(pd)) { + pr_err("Failed to allocate pbk domain\n"); + return PTR_ERR(pd); + } + + current->pbkd = pd; + + ret = pbk_resched_domain_process(pd); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute pbk_with_nr_cpu_attr = __ATTR_WO(pbk_with_nr_cpu); + +static ssize_t pbk_view_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int pbk_view = 0; + int ret; + + ret = kstrtoint(buf, 0, &pbk_view); + if (ret || pbk_view != 1) + return -EINVAL; + + if (pbk_view) + current->pbk_view = 1; + else + current->pbk_view = 0; + + return count; +} + +static struct kobj_attribute pbk_view_attr = __ATTR_WO(pbk_view); + +static struct attribute *pbk_attributes[] = { + &pbk_create_domain_attr.attr, + &pbk_join_domain_attr.attr, + &pbk_with_nr_cpu_attr.attr, + &pbk_view_attr.attr, + NULL +}; + +static struct attribute_group pbk_attr_group = { + .attrs = pbk_attributes, +}; + +static int __init pbk_sysfs_init(void) +{ + int ret; + + pbk_kobj = kobject_create_and_add("PBK", kernel_kobj); + if (!pbk_kobj) + return -ENOMEM; + + pbk_create_domain_attr.attr.mode |= S_IWGRP; + pbk_join_domain_attr.attr.mode |= S_IWGRP; + pbk_with_nr_cpu_attr.attr.mode |= S_IWGRP; + pbk_view_attr.attr.mode |= S_IWGRP; + + ret = sysfs_create_group(pbk_kobj, &pbk_attr_group); + if (ret) { + pr_err("Failed to create sysfs entries for PBK\n"); + return ret; + } + + return 0; +} + +subsys_initcall(pbk_sysfs_init); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 3899083ff555f8086de13d85c89446dd13002361..ed4c5b26e81dcc204149b98f49f02527b02e05cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -152,6 +153,11 @@ static int c_show(struct seq_file *m, void *v) for_each_online_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); u32 midr = cpuinfo->reg_midr; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if((is_pbk_process(current) && !is_current_pbk_cpu(i)) || + (is_pbk_view(current) && !is_pbk_cpu(i))) + continue; +#endif /* * glibc reads /proc/cpuinfo to determine the number of diff --git a/block/blk-mq.c b/block/blk-mq.c index 1941ffc4db85c2819047fd95cb429878f9a38a5e..6860a599c908749cbdb44cf7f607f050a87e5f1f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1581,6 +1581,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) * triggered, and we depend on blk-mq timeout handler to * handle dispatched requests to this hctx */ +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + /* Ignore this check for pbk cpus. */ +#else if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && cpu_online(hctx->next_cpu)) { printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n", @@ -1588,6 +1591,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) cpumask_empty(hctx->cpumask) ? "inactive": "active"); dump_stack(); } +#endif /* * We can't run the queue inline with ints disabled. Ensure that diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 8737ad3b412af186554035e77f03399a207bcea1..8efff8ce753298c773e1a601f1d713c277a52677 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -19,6 +19,7 @@ #include #include #include +#include /* pointer to per cpu cacheinfo */ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo); @@ -415,7 +416,16 @@ static ssize_t shared_cpu_map_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); const struct cpumask *mask = &this_leaf->shared_cpu_map; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current) || is_pbk_view(current)) { + struct cpumask pbk_mask; + if (is_pbk_process(current)) + cpumask_and(&pbk_mask, current_pbk_cpu(), mask); + else + cpumask_and(&pbk_mask, pbk_cpuset, mask); + return sysfs_emit(buf, "%*pb\n", nr_cpu_ids, &pbk_mask); + } +#endif return sysfs_emit(buf, "%*pb\n", nr_cpu_ids, mask); } @@ -424,7 +434,16 @@ static ssize_t shared_cpu_list_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); const struct cpumask *mask = &this_leaf->shared_cpu_map; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current) || is_pbk_view(current)) { + struct cpumask pbk_mask; + if (is_pbk_process(current)) + cpumask_and(&pbk_mask, current_pbk_cpu(), mask); + else + cpumask_and(&pbk_mask, pbk_cpuset, mask); + return sysfs_emit(buf, "%*pbl\n", nr_cpu_ids, &pbk_mask); + } +#endif return sysfs_emit(buf, "%*pbl\n", nr_cpu_ids, mask); } diff --git a/drivers/base/core.c b/drivers/base/core.c index c0566aff535513601666083c93c3fafb69948d67..ad207562a9d86cb5aad0594e347ec824da02140e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -2028,6 +2029,16 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, device_lock(dev); val = !dev->offline; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if ((is_pbk_process(current) || is_pbk_view(current)) && + (dev->bus == &cpu_subsys)) { + if ((is_pbk_process(current) && is_current_pbk_cpu(dev->id)) || + (is_pbk_view(current) && is_pbk_cpu(dev->id))) + val = true; + else + val = false; + } +#endif device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 8ecb9f90f467b0ebfe2bd471c9cce3b6a1ddb96e..903504d776e6d22e9fdd247daee9136452f50a79 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "base.h" @@ -208,7 +209,12 @@ static ssize_t show_cpus_attr(struct device *dev, char *buf) { struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr); - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + return cpumap_print_to_pagebuf(true, buf, current_pbk_cpu()); + if (is_pbk_view(current)) + return cpumap_print_to_pagebuf(true, buf, pbk_cpuset); +#endif return cpumap_print_to_pagebuf(true, buf, ca->map); } diff --git a/drivers/base/node.c b/drivers/base/node.c index 35c61165292afd948b0e15db9a47a5a96cb51b6a..21de1a47af18aeaf830fc7134c8e93c5c2aec483 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct bus_type node_subsys = { .name = "node", @@ -40,6 +41,12 @@ static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) return 0; cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + cpumask_and(mask, mask, current_pbk_cpu()); + else if (is_pbk_view(current)) + cpumask_and(mask, mask, pbk_cpuset); +#endif n = cpumap_print_to_pagebuf(list, buf, mask); free_cpumask_var(mask); diff --git a/include/linux/pbk.h b/include/linux/pbk.h new file mode 100644 index 0000000000000000000000000000000000000000..e01e52c1fa6624bee5d1d82ae5b3a77564b0edeb --- /dev/null +++ b/include/linux/pbk.h @@ -0,0 +1,114 @@ +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + +#ifndef _LINUX_PBK_H +#define _LINUX_PBK_H + +#include +#include +#include +#include + +typedef pid_t pdid_t; + +#define NR_DOMAINS_MAX 16 +#define NR_DOMAINS_MAX_BITS 4 + +extern struct hlist_head pbk_domains[NR_DOMAINS_MAX]; +extern spinlock_t pbk_domains_lock; + +#define DOMAIN_NAME_LEN 64 + +struct pbk_domain { + char name [DOMAIN_NAME_LEN]; + /* Same as pid of the process that creates this domain. */ + pdid_t domain_id; + refcount_t refcount; + cpumask_t cpuset; + + /* All processes that join to this domain */ + struct list_head process_list; + spinlock_t process_list_lock; + + /* Node of hashtable that maps domain_id to domain */ + struct hlist_node ht_node; +}; + +extern void pbk_create_root_domain(void); +extern struct pbk_domain *pbk_find_get_domain(pdid_t domain_id); +extern struct pbk_domain *pbk_find_matched_domain(cpumask_var_t request); +extern struct pbk_domain *pbk_alloc_domain(cpumask_var_t request); +extern void pbk_attach_domain(struct task_struct *p, struct pbk_domain *pd); +extern void destroy_pbk_domain(struct pbk_domain *pd); +extern int pbk_resched_threads(struct task_struct *p, cpumask_var_t new); +extern int pbk_resched_domain_process(struct pbk_domain *pd); +extern void pbk_del_process(struct task_struct *p, struct pbk_domain *pd); + +static inline bool is_pbk_process(struct task_struct *p) +{ + return p->pbkd ? true : false; +} + +static inline bool is_pbk_view(struct task_struct *p) +{ + return p->pbk_view ? true : false; +} + +static inline bool is_pbk_allowed_kthread(struct task_struct *p) +{ + return !strncmp(p->comm, "cpuhp", 5) || + !strncmp(p->comm, "ksoftirqd", 9) || + !strncmp(p->comm, "migration", 9) || + !strncmp(p->comm, "osnoise", 7); +} + +static inline cpumask_t *pbk_domain_cpu(struct pbk_domain *pd) +{ + return &pd->cpuset; +} + +static inline cpumask_t *current_pbk_cpu(void) +{ + return pbk_domain_cpu(current->pbkd); +} + +static inline void get_pbk_domain(struct pbk_domain *pd) +{ + refcount_inc(&pd->refcount); +} + +static inline void put_pbk_domain(struct pbk_domain *pd) +{ + if (refcount_dec_and_test(&pd->refcount)) + destroy_pbk_domain(pd); +} + +extern cpumask_t __pbk_cpuset; +extern cpumask_t __pbk_available_cpuset; +extern spinlock_t pbk_acpuset_lock; + +#define pbk_cpuset (&__pbk_cpuset) +#define pbk_available_cpuset (&__pbk_available_cpuset) + +static inline bool is_pbk_cpu(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, pbk_cpuset); +} + +static inline bool is_current_pbk_cpu(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, current_pbk_cpu()); +} + +static inline bool is_pbk_cpu_state(enum cpuhp_state state) +{ + return (state != CPUHP_AP_IRQ_AFFINITY_ONLINE) && + (state != CPUHP_AP_WORKQUEUE_ONLINE) && + (state != CPUHP_AP_RCUTREE_ONLINE); +} + +extern int do_cpu_up(unsigned int cpu, enum cpuhp_state target); +extern int cpu_down(unsigned int cpu, enum cpuhp_state target); + +#endif /* _LINUX_PBK_H */ + +#endif /* CONFIG_PURPOSE_BUILT_KERNEL */ \ No newline at end of file diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4dfc4c1baed9960ece7aa2ba8e8b4a..4f26e5f6318ef241d9811745917a59e688683a03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -873,6 +873,12 @@ struct task_struct { pid_t pid; pid_t tgid; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + struct list_head pbk_process; + struct pbk_domain *pbkd; + int pbk_view; +#endif + #ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsigned long stack_canary; diff --git a/init/init_task.c b/init/init_task.c index 5fa18ed59d33e70edc516308306cb3bf8408a1a4..7f8ba4199012bfbacf5fe2da79ec03fd74202568 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -213,6 +213,9 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + .pbkd = NULL, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/init/main.c b/init/main.c index 41a9ce782acc9319cbfd8cd736069f0646b3bf46..6774e0b441d08a108a58c1f86aac7eaeba7770e4 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include @@ -1438,6 +1439,9 @@ static int __ref kernel_init(void *unused) rcu_end_inkernel_boot(); do_sysctl_args(); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + pbk_create_root_domain(); +#endif if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index b7a936e5d05bab20ae09e414783ec3db2fdc9a0e..eaec7da8feda1026c77707ffb7c3b116cc96b6a0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -66,6 +66,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); @@ -2459,6 +2460,18 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) spin_lock_irq(&callback_lock); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if ((is_pbk_process(current) || is_pbk_view(current)) && + (type == FILE_CPULIST || type == FILE_EFFECTIVE_CPULIST)) { + if (is_pbk_process(current)) + seq_printf(sf, "%*pbl\n", cpumask_pr_args(current_pbk_cpu())); + else + seq_printf(sf, "%*pbl\n", cpumask_pr_args(pbk_cpuset)); + spin_unlock_irq(&callback_lock); + return ret; + } +#endif + switch (type) { case FILE_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); diff --git a/kernel/cpu.c b/kernel/cpu.c index c06ced18f78ad37c7c93263de1ee729204c65606..347d95140322df8ad48e5fac9f0d9ca78c69e91f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -157,6 +158,11 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, int (*cb)(unsigned int cpu); int ret, cnt; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_cpu(cpu) && !is_pbk_cpu_state(state)) + return 0; +#endif + if (st->fail == state) { st->fail = CPUHP_INVALID; @@ -1116,7 +1122,7 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) return _cpu_down(cpu, 0, target); } -static int cpu_down(unsigned int cpu, enum cpuhp_state target) +int cpu_down(unsigned int cpu, enum cpuhp_state target) { int err; @@ -1306,7 +1312,11 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) return ret; } -static int cpu_up(unsigned int cpu, enum cpuhp_state target) +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +int do_cpu_up(unsigned int cpu, enum cpuhp_state target) +#else +int cpu_up(unsigned int cpu, enum cpuhp_state target) +#endif { int err = 0; @@ -1340,6 +1350,16 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) return err; } +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +int cpu_up(unsigned int cpu, enum cpuhp_state target) +{ + if (is_pbk_cpu(cpu)) + return 0; + + return do_cpu_up(cpu, target); +} +#endif + /** * cpu_device_up - Bring up a cpu device * @dev: Pointer to the cpu device to online @@ -1393,6 +1413,10 @@ void bringup_nonboot_cpus(unsigned int setup_max_cpus) unsigned int cpu; for_each_present_cpu(cpu) { +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (cpumask_test_cpu(cpu, pbk_cpuset)) + continue; +#endif if (num_online_cpus() >= setup_max_cpus) break; if (!cpu_online(cpu)) diff --git a/kernel/exit.c b/kernel/exit.c index d13d67fc5f4e2085f93c46b77440ca1eeb908833..3e7e3daa0862ff4926c57969049e73675d310725 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -857,6 +858,10 @@ void __noreturn do_exit(long code) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); exit_tasks_rcu_finish(); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(tsk)) + pbk_del_process(tsk, tsk->pbkd); +#endif lockdep_free_task(tsk); do_task_dead(); diff --git a/kernel/fork.c b/kernel/fork.c index c8ec029e158a645a5c8b4ca4a9f0cde184a294dd..8aa9284fac2d5b92f75018b3a74cd58205ff28f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,6 +99,7 @@ #include #include +#include #include #include #include @@ -744,6 +745,10 @@ void __put_task_struct(struct task_struct *tsk) exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(tsk)) + put_pbk_domain(tsk->pbkd); +#endif if (!profile_handoff_task(tsk)) free_task(tsk); @@ -1981,6 +1986,13 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(current)) + pbk_attach_domain(p, current->pbkd); + else + p->pbkd = NULL; +#endif + /* * This _must_ happen before we call free_task(), i.e. before we jump * to any of the bad_fork_* labels. This is to avoid freeing diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51c707897c8d696f315506cda91d227271615be9..192babfbf9c5a996180979b119e01c602b949435 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1712,7 +1712,11 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; - if (is_per_cpu_kthread(p)) + if (is_per_cpu_kthread(p) +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + || is_pbk_process(p) +#endif + ) return cpu_online(cpu); return cpu_active(cpu); @@ -1893,6 +1897,10 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, */ cpu_valid_mask = cpu_online_mask; } +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(p)) + cpu_valid_mask = cpu_online_mask; +#endif /* * Must re-check here, to close a race against __kthread_bind(), @@ -2309,6 +2317,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, p->cpus_ptr) { +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (!is_pbk_process(p) && + !is_pbk_allowed_kthread(p) && + is_pbk_cpu(dest_cpu)) + continue; +#endif if (!is_cpu_allowed(p, dest_cpu)) continue; @@ -2364,6 +2378,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) else cpu = cpumask_any(p->cpus_ptr); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (is_pbk_process(p)) + cpu = pbk_reselect_cpu(p, cpu); +#endif /* * In order not to call set_task_cpu() on a blocking task we need * to rely on ttwu() to place the task on a valid ->cpus_ptr @@ -2376,6 +2394,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) */ if (unlikely(!is_cpu_allowed(p, cpu))) cpu = select_fallback_rq(task_cpu(p), p); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + else if(!is_pbk_process(p) && is_pbk_cpu(cpu)) + cpu = select_fallback_rq(task_cpu(p), p); +#endif return cpu; } @@ -6176,9 +6198,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_free_new_mask; - +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if(is_pbk_process(p)) { + cpumask_copy(cpus_allowed, pbk_domain_cpu(p->pbkd)); + cpumask_and(new_mask, in_mask, cpus_allowed); + } else { + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, in_mask, cpus_allowed); + } +#else cpuset_cpus_allowed(p, cpus_allowed); cpumask_and(new_mask, in_mask, cpus_allowed); +#endif /* * Since bandwidth control happens on root_domain basis, @@ -6200,7 +6231,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) again: retval = __set_cpus_allowed_ptr(p, new_mask, true); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if (!retval && !is_pbk_process(p)) { +#else if (!retval) { +#endif cpuset_cpus_allowed(p, cpus_allowed); if (!cpumask_subset(new_mask, cpus_allowed)) { /* @@ -6274,6 +6309,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); +#ifdef CONFIG_PURPOSE_BUILT_KERNEL + if(is_pbk_process(p)) + cpumask_and(mask, &p->cpus_mask, cpu_online_mask); + else +#endif cpumask_and(mask, &p->cpus_mask, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42d5fb7d946437531a3511de026ad856722d14a1..425e9877b37f68eb36102c3a746f461be6ee012a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -2778,5 +2779,26 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) } #endif +#ifdef CONFIG_PURPOSE_BUILT_KERNEL +static inline int pbk_reselect_cpu(struct task_struct *p, int prev_cpu) +{ + int cpu; + struct rq *rq; + + rq = cpu_rq(prev_cpu); + if (!rq->nr_running) + return prev_cpu; + + for_each_cpu(cpu, pbk_domain_cpu(p->pbkd)) { + rq = cpu_rq(cpu); + if (!rq->nr_running) + break; + } + + pr_debug("pbk reselect cpu %d\n", cpu); + return cpu; +} +#endif + void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);