diff options
author | Maximilian Bosch <maximilian@mbosch.me> | 2022-11-10 23:34:56 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-10 23:34:56 +0100 |
commit | 2a63e4f90282e9080efba2e1a96440f9ab4b42b0 (patch) | |
tree | 00106f97c1e0820192df6be73b8d48d03c7eba35 /pkgs/os-specific/linux | |
parent | 7d616f2a326a70104f01655654c37ead1a542699 (diff) | |
parent | 36d5e2658ce7dceaee693a4f276846a75db7acc6 (diff) |
Merge pull request #200218 from Ma27/rm-kernel-4.9
linux_4_9: remove
Diffstat (limited to 'pkgs/os-specific/linux')
-rw-r--r-- | pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch | 784 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/linux-4.9.nix | 12 |
2 files changed, 0 insertions, 796 deletions
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch deleted file mode 100644 index 596718b83c432..0000000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch +++ /dev/null @@ -1,784 +0,0 @@ -commit 280858b0bb3384b9ec06b455e196b453888bd6b8 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Misc preps for cgroup unified hierarchy interface - - Make the following changes in preparation for the cpu controller - interface implementation for the unified hierarchy. This patch - doesn't cause any functional differences. - - * s/cpu_stats_show()/cpu_cfs_stats_show()/ - - * s/cpu_files/cpu_legacy_files/ - - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While - at it, make the @val array u64 for consistency. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 154fd689fe02..57472485b79c 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8705,7 +8705,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) - return ret; - } - --static int cpu_stats_show(struct seq_file *sf, void *v) -+static int cpu_cfs_stats_show(struct seq_file *sf, void *v) - { - struct task_group *tg = css_tg(seq_css(sf)); - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -@@ -8745,7 +8745,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, - } - #endif /* CONFIG_RT_GROUP_SCHED */ - --static struct cftype cpu_files[] = { -+static struct cftype cpu_legacy_files[] = { - #ifdef CONFIG_FAIR_GROUP_SCHED - { - .name = "shares", -@@ -8766,7 +8766,7 @@ static struct cftype cpu_files[] = { - }, - { - .name = "stat", -- .seq_show = cpu_stats_show, -+ .seq_show = cpu_cfs_stats_show, - }, - #endif - #ifdef CONFIG_RT_GROUP_SCHED -@@ -8791,7 +8791,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .fork = cpu_cgroup_fork, - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, -- .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, - .early_init = true, - }; - -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index bc0b309c3f19..d1e5dd0b3a64 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) - return 0; - } - --static int cpuacct_stats_show(struct seq_file *sf, void *v) -+static void cpuacct_stats_read(struct cpuacct *ca, -+ u64 (*val)[CPUACCT_STAT_NSTATS]) - { -- struct cpuacct *ca = css_ca(seq_css(sf)); -- s64 val[CPUACCT_STAT_NSTATS]; - int cpu; -- int stat; - -- memset(val, 0, sizeof(val)); -+ memset(val, 0, sizeof(*val)); -+ - for_each_possible_cpu(cpu) { - u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; - -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; - } -+} -+ -+static int cpuacct_stats_show(struct seq_file *sf, void *v) -+{ -+ u64 val[CPUACCT_STAT_NSTATS]; -+ int stat; -+ -+ cpuacct_stats_read(css_ca(seq_css(sf)), &val); - - for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { -- seq_printf(sf, "%s %lld\n", -+ seq_printf(sf, "%s %llu\n", - cpuacct_stat_desc[stat], - cputime64_to_clock_t(val[stat])); - } - -commit 015cbdcb90034fd566d00de9d3d405613da3cd26 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Implement interface for cgroup unified hierarchy - - While the cpu controller doesn't have any functional problems, there - are a couple interface issues which can be addressed in the v2 - interface. - - * cpuacct being a separate controller. This separation is artificial - and rather pointless as demonstrated by most use cases co-mounting - the two controllers. It also forces certain information to be - accounted twice. - - * Use of different time units. Writable control knobs use - microseconds, some stat fields use nanoseconds while other cpuacct - stat fields use centiseconds. - - * Control knobs which can't be used in the root cgroup still show up - in the root. - - * Control knob names and semantics aren't consistent with other - controllers. - - This patchset implements cpu controller's interface on the unified - hierarchy which adheres to the controller file conventions described - in Documentation/cgroups/unified-hierarchy.txt. Overall, the - following changes are made. - - * cpuacct is implictly enabled and disabled by cpu and its information - is reported through "cpu.stat" which now uses microseconds for all - time durations. All time duration fields now have "_usec" appended - to them for clarity. While this doesn't solve the double accounting - immediately, once majority of users switch to v2, cpu can directly - account and report the relevant stats and cpuacct can be disabled on - the unified hierarchy. - - Note that cpuacct.usage_percpu is currently not included in - "cpu.stat". If this information is actually called for, it can be - added later. - - * "cpu.shares" is replaced with "cpu.weight" and operates on the - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). - The weight is scaled to scheduler weight so that 100 maps to 1024 - and the ratio relationship is preserved - if weight is W and its - scaled value is S, W / 100 == S / 1024. While the mapped range is a - bit smaller than the orignal scheduler weight range, the dead zones - on both sides are relatively small and covers wider range than the - nice value mappings. This file doesn't make sense in the root - cgroup and isn't create on root. - - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" - which contains both quota and period. - - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by - "cpu.rt.max" which contains both runtime and period. - - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for - CFS bandwidth stats and also using raw division for u64. Use - CONFIG_CFS_BANDWITH and do_div() instead. - - The semantics of "cpu.rt.max" is not fully decided yet. Dropped - for now. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 57472485b79c..c0ae869f51c4 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8784,6 +8784,139 @@ static struct cftype cpu_legacy_files[] = { - { } /* terminate */ - }; - -+static int cpu_stats_show(struct seq_file *sf, void *v) -+{ -+ cpuacct_cpu_stats_show(sf); -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ struct task_group *tg = css_tg(seq_css(sf)); -+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -+ u64 throttled_usec; -+ -+ throttled_usec = cfs_b->throttled_time; -+ do_div(throttled_usec, NSEC_PER_USEC); -+ -+ seq_printf(sf, "nr_periods %d\n" -+ "nr_throttled %d\n" -+ "throttled_usec %llu\n", -+ cfs_b->nr_periods, cfs_b->nr_throttled, -+ throttled_usec); -+ } -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ u64 weight = scale_load_down(tg->shares); -+ -+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); -+} -+ -+static int cpu_weight_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 weight) -+{ -+ /* -+ * cgroup weight knobs should use the common MIN, DFL and MAX -+ * values which are 1, 100 and 10000 respectively. While it loses -+ * a bit of range on both ends, it maps pretty well onto the shares -+ * value used by scheduler and the round-trip conversions preserve -+ * the original value over the entire range. -+ */ -+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) -+ return -ERANGE; -+ -+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); -+ -+ return sched_group_set_shares(css_tg(css), scale_load(weight)); -+} -+#endif -+ -+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, -+ long period, long quota) -+{ -+ if (quota < 0) -+ seq_puts(sf, "max"); -+ else -+ seq_printf(sf, "%ld", quota); -+ -+ seq_printf(sf, " %ld\n", period); -+} -+ -+/* caller should put the current value in *@periodp before calling */ -+static int __maybe_unused cpu_period_quota_parse(char *buf, -+ u64 *periodp, u64 *quotap) -+{ -+ char tok[21]; /* U64_MAX */ -+ -+ if (!sscanf(buf, "%s %llu", tok, periodp)) -+ return -EINVAL; -+ -+ *periodp *= NSEC_PER_USEC; -+ -+ if (sscanf(tok, "%llu", quotap)) -+ *quotap *= NSEC_PER_USEC; -+ else if (!strcmp(tok, "max")) -+ *quotap = RUNTIME_INF; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+static int cpu_max_show(struct seq_file *sf, void *v) -+{ -+ struct task_group *tg = css_tg(seq_css(sf)); -+ -+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); -+ return 0; -+} -+ -+static ssize_t cpu_max_write(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, loff_t off) -+{ -+ struct task_group *tg = css_tg(of_css(of)); -+ u64 period = tg_get_cfs_period(tg); -+ u64 quota; -+ int ret; -+ -+ ret = cpu_period_quota_parse(buf, &period, "a); -+ if (!ret) -+ ret = tg_set_cfs_bandwidth(tg, period, quota); -+ return ret ?: nbytes; -+} -+#endif -+ -+static struct cftype cpu_files[] = { -+ { -+ .name = "stat", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_stats_show, -+ }, -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_weight_read_u64, -+ .write_u64 = cpu_weight_write_u64, -+ }, -+#endif -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ .name = "max", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_max_show, -+ .write = cpu_max_write, -+ }, -+#endif -+ { } /* terminate */ -+}; -+ - struct cgroup_subsys cpu_cgrp_subsys = { - .css_alloc = cpu_cgroup_css_alloc, - .css_released = cpu_cgroup_css_released, -@@ -8792,7 +8925,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, - .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, - .early_init = true, -+#ifdef CONFIG_CGROUP_CPUACCT -+ /* -+ * cpuacct is enabled together with cpu on the unified hierarchy -+ * and its stats are reported through "cpu.stat". -+ */ -+ .depends_on = 1 << cpuacct_cgrp_id, -+#endif - }; - - #endif /* CONFIG_CGROUP_SCHED */ -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index d1e5dd0b3a64..57f390514c39 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -347,6 +347,31 @@ static struct cftype files[] = { - { } /* terminate */ - }; - -+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ -+void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+ struct cgroup_subsys_state *css; -+ u64 usage, val[CPUACCT_STAT_NSTATS]; -+ -+ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); -+ -+ usage = cpuusage_read(css, seq_cft(sf)); -+ cpuacct_stats_read(css_ca(css), &val); -+ -+ val[CPUACCT_STAT_USER] *= TICK_NSEC; -+ val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC; -+ do_div(usage, NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC); -+ -+ seq_printf(sf, "usage_usec %llu\n" -+ "user_usec %llu\n" -+ "system_usec %llu\n", -+ usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]); -+ -+ css_put(css); -+} -+ - /* - * charge this task's execution time to its accounting group. - * -diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h -index ba72807c73d4..ddf7af466d35 100644 ---- a/kernel/sched/cpuacct.h -+++ b/kernel/sched/cpuacct.h -@@ -2,6 +2,7 @@ - - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); - extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); -+extern void cpuacct_cpu_stats_show(struct seq_file *sf); - - #else - -@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val) - { - } - -+static inline void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+} -+ - #endif - -commit 5019fe3d7ec456b58d451ef06fe1f81d7d9f28a9 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Aug 5 12:41:01 2016 -0400 - - cgroup: add documentation regarding CPU controller cgroup v2 support - - Signed-off-by: Tejun Heo <tj@kernel.org> - -diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt -new file mode 100644 -index 000000000000..1ed7032d4472 ---- /dev/null -+++ b/Documentation/cgroup-v2-cpu.txt -@@ -0,0 +1,368 @@ -+ -+ -+CPU Controller on Control Group v2 -+ -+August, 2016 Tejun Heo <tj@kernel.org> -+ -+ -+While most controllers have support for cgroup v2 now, the CPU -+controller support is not upstream yet due to objections from the -+scheduler maintainers on the basic designs of cgroup v2. This -+document explains the current situation as well as an interim -+solution, and details the disagreements and arguments. The latest -+version of this document can be found at the following URL. -+ -+ https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu -+ -+This document was posted to the linux-kernel and cgroup mailing lists. -+Unfortunately, no consensus was reached as of Oct, 2016. The thread -+can be found at the following URL. -+ -+ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org -+ -+ -+CONTENTS -+ -+1. Current Situation and Interim Solution -+2. Disagreements and Arguments -+ 2-1. Contentious Restrictions -+ 2-1-1. Process Granularity -+ 2-1-2. No Internal Process Constraint -+ 2-2. Impact on CPU Controller -+ 2-2-1. Impact of Process Granularity -+ 2-2-2. Impact of No Internal Process Constraint -+ 2-3. Arguments for cgroup v2 -+3. Way Forward -+4. References -+ -+ -+1. Current Situation and Interim Solution -+ -+All objections from the scheduler maintainers apply to cgroup v2 core -+design, and there are no known objections to the specifics of the CPU -+controller cgroup v2 interface. The only blocked part is changes to -+expose the CPU controller interface on cgroup v2, which comprises the -+following two patches: -+ -+ [1] sched: Misc preps for cgroup unified hierarchy interface -+ [2] sched: Implement interface for cgroup unified hierarchy -+ -+The necessary changes are superficial and implement the interface -+files on cgroup v2. The combined diffstat is as follows. -+ -+ kernel/sched/core.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++-- -+ kernel/sched/cpuacct.c | 57 ++++++++++++------ -+ kernel/sched/cpuacct.h | 5 + -+ 3 files changed, 189 insertions(+), 22 deletions(-) -+ -+The patches are easy to apply and forward-port. The following git -+branch will always carry the two patches on top of the latest release -+of the upstream kernel. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu -+ -+There also are versioned branches going back to v4.4. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER -+ -+While it's difficult to tell whether the CPU controller support will -+be merged, there are crucial resource control features in cgroup v2 -+that are only possible due to the design choices that are being -+objected to, and every effort will be made to ease enabling the CPU -+controller cgroup v2 support out-of-tree for parties which choose to. -+ -+ -+2. Disagreements and Arguments -+ -+There have been several lengthy discussion threads [3][4] on LKML -+around the structural constraints of cgroup v2. The two that affect -+the CPU controller are process granularity and no internal process -+constraint. Both arise primarily from the need for common resource -+domain definition across different resources. -+ -+The common resource domain is a powerful concept in cgroup v2 that -+allows controllers to make basic assumptions about the structural -+organization of processes and controllers inside the cgroup hierarchy, -+and thus solve problems spanning multiple types of resources. The -+prime example for this is page cache writeback: dirty page cache is -+regulated through throttling buffered writers based on memory -+availability, and initiating batched write outs to the disk based on -+IO capacity. Tracking and controlling writeback inside a cgroup thus -+requires the direct cooperation of the memory and the IO controller. -+ -+This easily extends to other areas, such as CPU cycles consumed while -+performing memory reclaim or IO encryption. -+ -+ -+2-1. Contentious Restrictions -+ -+For controllers of different resources to work together, they must -+agree on a common organization. This uniform model across controllers -+imposes two contentious restrictions on the CPU controller: process -+granularity and the no-internal-process constraint. -+ -+ -+ 2-1-1. Process Granularity -+ -+ For memory, because an address space is shared between all threads -+ of a process, the terminal consumer is a process, not a thread. -+ Separating the threads of a single process into different memory -+ control domains doesn't make semantical sense. cgroup v2 ensures -+ that all controller can agree on the same organization by requiring -+ that threads of the same process belong to the same cgroup. -+ -+ There are other reasons to enforce process granularity. One -+ important one is isolating system-level management operations from -+ in-process application operations. The cgroup interface, being a -+ virtual filesystem, is very unfit for multiple independent -+ operations taking place at the same time as most operations have to -+ be multi-step and there is no way to synchronize multiple accessors. -+ See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity" -+ -+ -+ 2-1-2. No Internal Process Constraint -+ -+ cgroup v2 does not allow processes to belong to any cgroup which has -+ child cgroups when resource controllers are enabled on it (the -+ notable exception being the root cgroup itself). This is because, -+ for some resources, a resource domain (cgroup) is not directly -+ comparable to the terminal consumer (process/task) of said resource, -+ and so putting the two into a sibling relationship isn't meaningful. -+ -+ - Differing Control Parameters and Capabilities -+ -+ A cgroup controller has different resource control parameters and -+ capabilities from a terminal consumer, be that a task or process. -+ There are a couple cases where a cgroup control knob can be mapped -+ to a per-task or per-process API but they are exceptions and the -+ mappings aren't obvious even in those cases. -+ -+ For example, task priorities (also known as nice values) set -+ through setpriority(2) are mapped to the CPU controller -+ "cpu.shares" values. However, how exactly the two ranges map and -+ even the fact that they map to each other at all are not obvious. -+ -+ The situation gets further muddled when considering other resource -+ types and control knobs. IO priorities set through ioprio_set(2) -+ cannot be mapped to IO controller weights and most cgroup resource -+ control knobs including the bandwidth control knobs of the CPU -+ controller don't have counterparts in the terminal consumers. -+ -+ - Anonymous Resource Consumption -+ -+ For CPU, every time slice consumed from inside a cgroup, which -+ comprises most but not all of consumed CPU time for the cgroup, -+ can be clearly attributed to a specific task or process. Because -+ these two types of entities are directly comparable as consumers -+ of CPU time, it's theoretically possible to mix tasks and cgroups -+ on the same tree levels and let them directly compete for the time -+ quota available to their common ancestor. -+ -+ However, the same can't be said for resource types like memory or -+ IO: the memory consumed by the page cache, for example, can be -+ tracked on a per-cgroup level, but due to mismatches in lifetimes -+ of involved objects (page cache can persist long after processes -+ are gone), shared usages and the implementation overhead of -+ tracking persistent state, it can no longer be attributed to -+ individual processes after instantiation. Consequently, any IO -+ incurred by page cache writeback can be attributed to a cgroup, -+ but not to the individual consumers inside the cgroup. -+ -+ For memory and IO, this makes a resource domain (cgroup) an object -+ of a fundamentally different type than a terminal consumer -+ (process). A process can't be a first class object in the resource -+ distribution graph as its total resource consumption can't be -+ described without the containing resource domain. -+ -+ Disallowing processes in internal cgroups avoids competition between -+ cgroups and processes which cannot be meaningfully defined for these -+ resources. All resource control takes place among cgroups and a -+ terminal consumer interacts with the containing cgroup the same way -+ it would with the system without cgroup. -+ -+ Root cgroup is exempt from this constraint, which is in line with -+ how root cgroup is handled in general - it's excluded from cgroup -+ resource accounting and control. -+ -+ -+Enforcing process granularity and no internal process constraint -+allows all controllers to be on the same footing in terms of resource -+distribution hierarchy. -+ -+ -+2-2. Impact on CPU Controller -+ -+As indicated earlier, the CPU controller's resource distribution graph -+is the simplest. Every schedulable resource consumption can be -+attributed to a specific task. In addition, for weight based control, -+the per-task priority set through setpriority(2) can be translated to -+and from a per-cgroup weight. As such, the CPU controller can treat a -+task and a cgroup symmetrically, allowing support for any tree layout -+of cgroups and tasks. Both process granularity and the no internal -+process constraint restrict how the CPU controller can be used. -+ -+ -+ 2-2-1. Impact of Process Granularity -+ -+ Process granularity prevents tasks belonging to the same process to -+ be assigned to different cgroups. It was pointed out [6] that this -+ excludes the valid use case of hierarchical CPU distribution within -+ processes. -+ -+ To address this issue, the rgroup (resource group) [7][8][9] -+ interface, an extension of the existing setpriority(2) API, was -+ proposed, which is in line with other programmable priority -+ mechanisms and eliminates the risk of in-application configuration -+ and system configuration stepping on each other's toes. -+ Unfortunately, the proposal quickly turned into discussions around -+ cgroup v2 design decisions [4] and no consensus could be reached. -+ -+ -+ 2-2-2. Impact of No Internal Process Constraint -+ -+ The no internal process constraint disallows tasks from competing -+ directly against cgroups. Here is an excerpt from Peter Zijlstra -+ pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and -+ t4 are tasks: -+ -+ -+ R -+ / | \ -+ t1 t2 A -+ / \ -+ t3 t4 -+ -+ -+ Is fundamentally different from: -+ -+ -+ R -+ / \ -+ L A -+ / \ / \ -+ t1 t2 t3 t4 -+ -+ -+ Because if in the first hierarchy you add a task (t5) to R, all of -+ its A will run at 1/4th of total bandwidth where before it had -+ 1/3rd, whereas with the second example, if you add our t5 to L, A -+ doesn't get any less bandwidth. -+ -+ -+ It is true that the trees are semantically different from each other -+ and the symmetric handling of tasks and cgroups is aesthetically -+ pleasing. However, it isn't clear what the practical usefulness of -+ a layout with direct competition between tasks and cgroups would be, -+ considering that number and behavior of tasks are controlled by each -+ application, and cgroups primarily deal with system level resource -+ distribution; changes in the number of active threads would directly -+ impact resource distribution. Real world use cases of such layouts -+ could not be established during the discussions. -+ -+ -+2-3. Arguments for cgroup v2 -+ -+There are strong demands for comprehensive hierarchical resource -+control across all major resources, and establishing a common resource -+hierarchy is an essential step. As with most engineering decisions, -+common resource hierarchy definition comes with its trade-offs. With -+cgroup v2, the trade-offs are in the form of structural constraints -+which, among others, restrict the CPU controller's space of possible -+configurations. -+ -+However, even with the restrictions, cgroup v2, in combination with -+rgroup, covers most of identified real world use cases while enabling -+new important use cases of resource control across multiple resource -+types that were fundamentally broken previously. -+ -+Furthermore, for resource control, treating resource domains as -+objects of a different type from terminal consumers has important -+advantages - it can account for resource consumptions which are not -+tied to any specific terminal consumer, be that a task or process, and -+allows decoupling resource distribution controls from in-application -+APIs. Even the CPU controller may benefit from it as the kernel can -+consume significant amount of CPU cycles in interrupt context or tasks -+shared across multiple resource domains (e.g. softirq). -+ -+Finally, it's important to note that enabling cgroup v2 support for -+the CPU controller doesn't block use cases which require the features -+which are not available on cgroup v2. Unlikely, but should anybody -+actually rely on the CPU controller's symmetric handling of tasks and -+cgroups, backward compatibility is and will be maintained by being -+able to disconnect the controller from the cgroup v2 hierarchy and use -+it standalone. This also holds for cpuset which is often used in -+highly customized configurations which might be a poor fit for common -+resource domains. -+ -+The required changes are minimal, the benefits for the target use -+cases are critical and obvious, and use cases which have to use v1 can -+continue to do so. -+ -+ -+3. Way Forward -+ -+cgroup v2 primarily aims to solve the problem of comprehensive -+hierarchical resource control across all major computing resources, -+which is one of the core problems of modern server infrastructure -+engineering. The trade-offs that cgroup v2 took are results of -+pursuing that goal and gaining a better understanding of the nature of -+resource control in the process. -+ -+I believe that real world usages will prove cgroup v2's model right, -+considering the crucial pieces of comprehensive resource control that -+cannot be implemented without common resource domains. This is not to -+say that cgroup v2 is fixed in stone and can't be updated; if there is -+an approach which better serves both comprehensive resource control -+and the CPU controller's flexibility, we will surely move towards -+that. It goes without saying that discussions around such approach -+should consider practical aspects of resource control as a whole -+rather than absolutely focusing on a particular controller. -+ -+Until such consensus can be reached, the CPU controller cgroup v2 -+support will be maintained out of the mainline kernel in an easily -+accessible form. If there is anything cgroup developers can do to -+ease the pain, please feel free to contact us on the cgroup mailing -+list at cgroups@vger.kernel.org. -+ -+ -+4. References -+ -+[1] http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org -+ [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface -+ Tejun Heo <tj@kernel.org> -+ -+[2] http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org -+ [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[3] http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org -+ [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[4] http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Peter Zijlstra <peterz@infradead.org> -+ -+[5] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt -+ Control Group v2 -+ Tejun Heo <tj@kernel.org> -+ -+[6] http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com -+ Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Paul Turner <pjt@google.com> -+ -+[7] http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org -+ [RFD] cgroup: thread granularity support for cpu controller -+ Tejun Heo <tj@kernel.org> -+ -+[8] http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org -+ [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[9] http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org -+ Example program for PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource -+ Peter Zijlstra <peterz@infradead.org> diff --git a/pkgs/os-specific/linux/kernel/linux-4.9.nix b/pkgs/os-specific/linux/kernel/linux-4.9.nix deleted file mode 100644 index c58e05b485e5c..0000000000000 --- a/pkgs/os-specific/linux/kernel/linux-4.9.nix +++ /dev/null @@ -1,12 +0,0 @@ -{ buildPackages, fetchurl, perl, buildLinux, nixosTests, stdenv, ... } @ args: - -buildLinux (args // rec { - version = "4.9.332"; - extraMeta.branch = "4.9"; - extraMeta.broken = stdenv.isAarch64; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; - sha256 = "1kiqa9kw4932n5qglkyymsrak849wbbszw9rnq1aygmdinjz4c8i"; - }; -} // (args.argsOverride or {})) |