<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Anton Blanchard &lt;anton@samba.org&gt;

Below are the diffs between the current ppc64 sched init stuff and x86.

- Ignore the POWER5 specific stuff, I dont set up a sibling map yet.
- What should I set cache_hot_time to?

large cpumask typechecking requirements (perhaps useful on x86 as well):
- cpu-&gt;cpumask = CPU_MASK_NONE -&gt; cpus_clear(cpu-&gt;cpumask);
- cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map) doesnt work,
  need to use a temporary


---

 25-akpm/arch/ppc64/Kconfig            |    9 +
 25-akpm/arch/ppc64/kernel/smp.c       |  279 +++++++++++++++++++++++++++++++++-
 25-akpm/include/asm-ppc64/processor.h |    5 
 3 files changed, 288 insertions(+), 5 deletions(-)

diff -puN arch/ppc64/Kconfig~ppc64-sched-domain-support arch/ppc64/Kconfig
--- 25/arch/ppc64/Kconfig~ppc64-sched-domain-support	2004-04-01 02:02:10.593254528 -0800
+++ 25-akpm/arch/ppc64/Kconfig	2004-04-01 02:02:10.600253464 -0800
@@ -173,6 +173,15 @@ config NUMA
 	bool "NUMA support"
 	depends on DISCONTIGMEM
 
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on SMP
+	default off
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with POWER5 cpus at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
 config PREEMPT
 	bool "Preemptible Kernel"
 	depends on BROKEN
diff -puN arch/ppc64/kernel/smp.c~ppc64-sched-domain-support arch/ppc64/kernel/smp.c
--- 25/arch/ppc64/kernel/smp.c~ppc64-sched-domain-support	2004-04-01 02:02:10.594254376 -0800
+++ 25-akpm/arch/ppc64/kernel/smp.c	2004-04-01 02:02:10.602253160 -0800
@@ -579,11 +579,6 @@ void __init smp_prepare_cpus(unsigned in
 	paca[boot_cpuid].prof_counter = 1;
 	paca[boot_cpuid].prof_multiplier = 1;
 
-	/*
-	 * XXX very rough. 
-	 */
-	cache_decay_ticks = HZ/100;
-
 #ifndef CONFIG_PPC_ISERIES
 	paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
 
@@ -796,3 +791,277 @@ static int __init topology_init(void)
 	return 0;
 }
 __initcall(topology_init);
+
+#ifdef CONFIG_SCHED_SMT
+#ifdef CONFIG_NUMA
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		struct sched_domain *phys_domain = &amp;per_cpu(phys_domains, i);
+		struct sched_domain *node_domain = &amp;per_cpu(node_domains, i);
+		int node = cpu_to_node(i);
+		cpumask_t nodemask = node_to_cpumask(node);
+		cpumask_t my_cpumask = cpumask_of_cpu(i);
+		cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
+
+		*cpu_domain = SD_SIBLING_INIT;
+		if (__is_processor(PV_POWER5))
+			cpus_or(cpu_domain-&gt;span, my_cpumask, sibling_cpumask);
+		else
+			cpu_domain-&gt;span = my_cpumask;
+
+		*phys_domain = SD_CPU_INIT;
+		phys_domain-&gt;span = nodemask;
+		// phys_domain-&gt;cache_hot_time = XXX;
+
+		*node_domain = SD_NODE_INIT;
+		node_domain-&gt;span = cpu_possible_map;
+		// node_domain-&gt;cache_hot_time = XXX;
+	}
+
+	/* Set up CPU (sibling) groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		int j;
+		first_cpu = last_cpu = NULL;
+
+		if (i != first_cpu(cpu_domain-&gt;span)) {
+			cpu_sched_domain(i)-&gt;flags |= SD_SHARE_CPUPOWER;
+			cpu_sched_domain(first_cpu(cpu_domain-&gt;span))-&gt;flags |=
+				SD_SHARE_CPUPOWER;
+			continue;
+		}
+
+		for_each_cpu_mask(j, cpu_domain-&gt;span) {
+			struct sched_group *cpu = &amp;sched_group_cpus[j];
+
+			cpus_clear(cpu-&gt;cpumask);
+			cpu_set(j, cpu-&gt;cpumask);
+			cpu-&gt;cpu_power = SCHED_LOAD_SCALE;
+
+			if (!first_cpu)
+				first_cpu = cpu;
+			if (last_cpu)
+				last_cpu-&gt;next = cpu;
+			last_cpu = cpu;
+		}
+		last_cpu-&gt;next = first_cpu;
+	}
+
+	for (i = 0; i &lt; MAX_NUMNODES; i++) {
+		int j;
+		cpumask_t nodemask;
+		struct sched_group *node = &amp;sched_group_nodes[i];
+		cpumask_t node_cpumask = node_to_cpumask(i);
+		cpus_and(nodemask, node_cpumask, cpu_online_map);
+
+		if (cpus_empty(nodemask))
+			continue;
+
+		first_cpu = last_cpu = NULL;
+		/* Set up physical groups */
+		for_each_cpu_mask(j, nodemask) {
+			struct sched_domain *cpu_domain = cpu_sched_domain(j);
+			struct sched_group *cpu = &amp;sched_group_phys[j];
+
+			if (j != first_cpu(cpu_domain-&gt;span))
+				continue;
+
+			cpu-&gt;cpumask = cpu_domain-&gt;span;
+			/*
+			 * Make each extra sibling increase power by 10% of
+			 * the basic CPU. This is very arbitrary.
+			 */
+			cpu-&gt;cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu-&gt;cpumask)-1) / 10;
+			node-&gt;cpu_power += cpu-&gt;cpu_power;
+
+			if (!first_cpu)
+				first_cpu = cpu;
+			if (last_cpu)
+				last_cpu-&gt;next = cpu;
+			last_cpu = cpu;
+		}
+		last_cpu-&gt;next = first_cpu;
+	}
+
+	/* Set up nodes */
+	first_cpu = last_cpu = NULL;
+	for (i = 0; i &lt; MAX_NUMNODES; i++) {
+		struct sched_group *cpu = &amp;sched_group_nodes[i];
+		cpumask_t nodemask;
+		cpumask_t node_cpumask = node_to_cpumask(i);
+		cpus_and(nodemask, node_cpumask, cpu_possible_map);
+
+		if (cpus_empty(nodemask))
+			continue;
+
+		cpu-&gt;cpumask = nodemask;
+		/* -&gt;cpu_power already setup */
+
+		if (!first_cpu)
+			first_cpu = cpu;
+		if (last_cpu)
+			last_cpu-&gt;next = cpu;
+		last_cpu = cpu;
+	}
+	last_cpu-&gt;next = first_cpu;
+
+	mb();
+	for_each_cpu(i) {
+		int node = cpu_to_node(i);
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		struct sched_domain *phys_domain = &amp;per_cpu(phys_domains, i);
+		struct sched_domain *node_domain = &amp;per_cpu(node_domains, i);
+		struct sched_group *cpu_group = &amp;sched_group_cpus[i];
+		struct sched_group *phys_group = &amp;sched_group_phys[first_cpu(cpu_domain-&gt;span)];
+		struct sched_group *node_group = &amp;sched_group_nodes[node];
+
+		cpu_domain-&gt;parent = phys_domain;
+		phys_domain-&gt;parent = node_domain;
+
+		node_domain-&gt;groups = node_group;
+		phys_domain-&gt;groups = phys_group;
+		cpu_domain-&gt;groups = cpu_group;
+	}
+}
+#else /* !CONFIG_NUMA */
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		struct sched_domain *phys_domain = &amp;per_cpu(phys_domains, i);
+		cpumask_t my_cpumask = cpumask_of_cpu(i);
+		cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
+
+		*cpu_domain = SD_SIBLING_INIT;
+		if (__is_processor(PV_POWER5))
+			cpus_or(cpu_domain-&gt;span, my_cpumask, sibling_cpumask);
+		else
+			cpu_domain-&gt;span = my_cpumask;
+
+		*phys_domain = SD_CPU_INIT;
+		phys_domain-&gt;span = cpu_possible_map;
+		// phys_domain-&gt;cache_hot_time = XXX;
+	}
+
+	/* Set up CPU (sibling) groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		int j;
+		first_cpu = last_cpu = NULL;
+
+		if (i != first_cpu(cpu_domain-&gt;span)) {
+			cpu_sched_domain(i)-&gt;flags |= SD_SHARE_CPUPOWER;
+			cpu_sched_domain(first_cpu(cpu_domain-&gt;span))-&gt;flags |=
+				SD_SHARE_CPUPOWER;
+			continue;
+		}
+
+		for_each_cpu_mask(j, cpu_domain-&gt;span) {
+			struct sched_group *cpu = &amp;sched_group_cpus[j];
+
+			cpus_clear(cpu-&gt;cpumask);
+			cpu_set(j, cpu-&gt;cpumask);
+			cpu-&gt;cpu_power = SCHED_LOAD_SCALE;
+
+			if (!first_cpu)
+				first_cpu = cpu;
+			if (last_cpu)
+				last_cpu-&gt;next = cpu;
+			last_cpu = cpu;
+		}
+		last_cpu-&gt;next = first_cpu;
+	}
+
+	first_cpu = last_cpu = NULL;
+	/* Set up physical groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		struct sched_group *cpu = &amp;sched_group_phys[i];
+
+		if (i != first_cpu(cpu_domain-&gt;span))
+			continue;
+
+		cpu-&gt;cpumask = cpu_domain-&gt;span;
+		/* See SMT+NUMA setup for comment */
+		cpu-&gt;cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu-&gt;cpumask)-1) / 10;
+
+		if (!first_cpu)
+			first_cpu = cpu;
+		if (last_cpu)
+			last_cpu-&gt;next = cpu;
+		last_cpu = cpu;
+	}
+	last_cpu-&gt;next = first_cpu;
+
+	mb();
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = cpu_sched_domain(i);
+		struct sched_domain *phys_domain = &amp;per_cpu(phys_domains, i);
+		struct sched_group *cpu_group = &amp;sched_group_cpus[i];
+		struct sched_group *phys_group = &amp;sched_group_phys[first_cpu(cpu_domain-&gt;span)];
+		cpu_domain-&gt;parent = phys_domain;
+		phys_domain-&gt;groups = phys_group;
+		cpu_domain-&gt;groups = cpu_group;
+	}
+}
+#endif /* CONFIG_NUMA */
+#else /* !CONFIG_SCHED_SMT */
+
+static struct sched_group sched_group_cpus[NR_CPUS];
+
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_sd = cpu_sched_domain(i);
+
+		*cpu_sd = SD_CPU_INIT;
+		cpu_sd-&gt;span = cpu_possible_map;
+		// cpu_sd-&gt;cache_hot_time = XXX;
+	}
+
+	/* Set up CPU groups */
+	for_each_cpu_mask(i, cpu_possible_map) {
+		struct sched_group *cpu = &amp;sched_group_cpus[i];
+
+		cpus_clear(cpu-&gt;cpumask);
+		cpu_set(i, cpu-&gt;cpumask);
+		cpu-&gt;cpu_power = SCHED_LOAD_SCALE;
+
+		if (!first_cpu)
+			first_cpu = cpu;
+		if (last_cpu)
+			last_cpu-&gt;next = cpu;
+		last_cpu = cpu;
+	}
+	last_cpu-&gt;next = first_cpu;
+
+	mb();
+	for_each_cpu(i) {
+		struct sched_domain *cpu_sd = cpu_sched_domain(i);
+		cpu_sd-&gt;groups = &amp;sched_group_cpus[i];
+	}
+}
+
+#endif
diff -puN include/asm-ppc64/processor.h~ppc64-sched-domain-support include/asm-ppc64/processor.h
--- 25/include/asm-ppc64/processor.h~ppc64-sched-domain-support	2004-04-01 02:02:10.596254072 -0800
+++ 25-akpm/include/asm-ppc64/processor.h	2004-04-01 02:02:10.603253008 -0800
@@ -618,6 +618,11 @@ static inline void prefetchw(const void 
 
 #define spin_lock_prefetch(x)	prefetchw(x)
 
+#ifdef CONFIG_SCHED_SMT
+#define ARCH_HAS_SCHED_DOMAIN
+#define ARCH_HAS_SCHED_WAKE_BALANCE
+#endif
+
 #endif /* ASSEMBLY */
 
 #endif /* __ASM_PPC64_PROCESSOR_H */

_
</pre></body></html>