diff mbox series

[v2,1/2] selftests/resctrl: Adjust effective L3 cache size with SNC enabled

Message ID fe9295c6be677d187b1607185e23993dbfe74761.1715769576.git.maciej.wieczor-retman@intel.com
State New
Headers show
Series selftests/resctrl: SNC kernel support discovery | expand

Commit Message

Maciej Wieczor-Retman May 15, 2024, 11:18 a.m. UTC
Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
nodes. Systems may support splitting into either two or four nodes.

When SNC mode is enabled the effective amount of L3 cache available
for allocation is divided by the number of nodes per L3.

Detect which SNC mode is active by comparing the number of CPUs
that share a cache with CPU0, with the number of CPUs on node0.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
 tools/testing/selftests/resctrl/resctrl.h   |  3 ++
 tools/testing/selftests/resctrl/resctrlfs.c | 59 +++++++++++++++++++++
 2 files changed, 62 insertions(+)

Comments

Reinette Chatre May 30, 2024, 11:07 p.m. UTC | #1
Hi Maciej,

Regarding shortlog: L3 cache size should no longer be adjusted when
SNC is enabled. You mention that the tests are passing when running
with this adjustment ... I think that this may be because the test
now just runs on a smaller portion of the cache?

On 5/15/24 4:18 AM, Maciej Wieczor-Retman wrote:
> Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
> nodes. Systems may support splitting into either two or four nodes.

fyi ... from the most recent kernel submission 2, 3, or 4 nodes
are possible:
https://lore.kernel.org/lkml/20240528222006.58283-20-tony.luck@intel.com/

> 
> When SNC mode is enabled the effective amount of L3 cache available
> for allocation is divided by the number of nodes per L3.

This was a mistake in original implementation and no longer done.

> 
> Detect which SNC mode is active by comparing the number of CPUs
> that share a cache with CPU0, with the number of CPUs on node0.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> ---
>   tools/testing/selftests/resctrl/resctrl.h   |  3 ++
>   tools/testing/selftests/resctrl/resctrlfs.c | 59 +++++++++++++++++++++
>   2 files changed, 62 insertions(+)
> 
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index 00d51fa7531c..3dd5d6779786 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -11,6 +11,7 @@
>   #include <signal.h>
>   #include <dirent.h>
>   #include <stdbool.h>
> +#include <ctype.h>
>   #include <sys/stat.h>
>   #include <sys/ioctl.h>
>   #include <sys/mount.h>
> @@ -49,6 +50,7 @@
>   		umount_resctrlfs();		\
>   		exit(EXIT_FAILURE);		\
>   	} while (0)
> +#define MAX_SNC		4
>   
>   /*
>    * user_params:		User supplied parameters
> @@ -131,6 +133,7 @@ extern pid_t bm_pid, ppid;
>   
>   extern char llc_occup_path[1024];
>   
> +int snc_ways(void);
>   int get_vendor(void);
>   bool check_resctrlfs_support(void);
>   int filter_dmesg(void);
> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
> index 1cade75176eb..e4d3624a8817 100644
> --- a/tools/testing/selftests/resctrl/resctrlfs.c
> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
> @@ -156,6 +156,63 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
>   	return 0;
>   }
>   
> +/*
> + * Count number of CPUs in a /sys bit map
> + */
> +static unsigned int count_sys_bitmap_bits(char *name)
> +{
> +	FILE *fp = fopen(name, "r");
> +	int count = 0, c;
> +
> +	if (!fp)
> +		return 0;
> +
> +	while ((c = fgetc(fp)) != EOF) {
> +		if (!isxdigit(c))
> +			continue;
> +		switch (c) {
> +		case 'f':
> +			count++;
> +		case '7': case 'b': case 'd': case 'e':
> +			count++;
> +		case '3': case '5': case '6': case '9': case 'a': case 'c':
> +			count++;
> +		case '1': case '2': case '4': case '8':
> +			count++;
> +		}
> +	}
> +	fclose(fp);
> +
> +	return count;
> +}
> +
> +/*
> + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
> + * If some CPUs are offline the numbers may not be exact multiples of each
> + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
> + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
> + * lower. Still try to get the ratio right by preventing the second possibility.
> + */
> +int snc_ways(void)

"ways" have a specific meaning in cache terminology. Perhaps rather something
like "snc_nodes_per_cache()" or even copy the kernel's (which is still WIP though)
snc_nodes_per_l3_cache()

> +{
> +	int node_cpus, cache_cpus, i;
> +
> +	node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
> +	cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
> +
> +	if (!node_cpus || !cache_cpus) {
> +		fprintf(stderr, "Warning could not determine Sub-NUMA Cluster mode\n");

The tests just use "ksft_print_msg()" for error messages. The "Warning could ..."
is somewhat unexpected, perhaps just "Could not determine ..." or "Warning: Could not ..."?

> +		return 1;
> +	}
> +
> +	for (i = 1; i <= MAX_SNC ; i++) {
> +		if (i * node_cpus >= cache_cpus)
> +			return i;
> +	}

This is not obvious to me. From the function comments this seems to address the
scenarios when CPUs from other nodes are offline. It is not clear to me how
this loop addresses this. For example, let's say there are four SNC nodes
associated with a cache and only the node0 CPUs are online. The above would
detect this as "1", not "4", if I read this right?

I wonder if it may not be easier to just follow what the kernel does
(in the new version).
User space can learn the number of online and present CPUs from
/sys/devices/system/cpu/online and /sys/devices/system/cpu/present
respectively. A simple string compare of the contents can be used to
determine if they are identical and a warning can be printed if they are not.
With a warning when accurate detection cannot be done the simple
check will do.

Could you please add an informational message indicating how many SNC nodes
were indeed detected?

> +
> +	return 1;
> +}
> +
>   /*
>    * get_cache_size - Get cache size for a specified CPU
>    * @cpu_no:	CPU number
> @@ -211,6 +268,8 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
>   			break;
>   	}
>   
> +	if (cache_num == 3)
> +		*cache_size /= snc_ways();
>   	return 0;
>   }
>   

I think this can be dropped.

Reinette
Tony Luck May 30, 2024, 11:46 p.m. UTC | #2
>> When SNC mode is enabled the effective amount of L3 cache available
>> for allocation is divided by the number of nodes per L3.
>
> This was a mistake in original implementation and no longer done.

My original kernel code adjusted value reported in the "size" file in resctrl.
That's no longer done because the effective size depends on how applications
are allocating and using memory. Since the kernel can't know that, it
seemed best to just report the total size of the cache.

But I think the resctrl tests still need to take this into account when running
llc_occupancy tests.

E.g. on a 2-way SNC system with a 100MB L3 cache a test that allocates
memory from its local SNC node (default behavior without using libnuma)
will only see 50 MB llc_occupancy with a fully populated L3 mask in the
schemata file.

-Tony
diff mbox series

Patch

diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 00d51fa7531c..3dd5d6779786 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -11,6 +11,7 @@ 
 #include <signal.h>
 #include <dirent.h>
 #include <stdbool.h>
+#include <ctype.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
@@ -49,6 +50,7 @@ 
 		umount_resctrlfs();		\
 		exit(EXIT_FAILURE);		\
 	} while (0)
+#define MAX_SNC		4
 
 /*
  * user_params:		User supplied parameters
@@ -131,6 +133,7 @@  extern pid_t bm_pid, ppid;
 
 extern char llc_occup_path[1024];
 
+int snc_ways(void);
 int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 1cade75176eb..e4d3624a8817 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -156,6 +156,63 @@  int get_domain_id(const char *resource, int cpu_no, int *domain_id)
 	return 0;
 }
 
+/*
+ * Count number of CPUs in a /sys bit map
+ */
+static unsigned int count_sys_bitmap_bits(char *name)
+{
+	FILE *fp = fopen(name, "r");
+	int count = 0, c;
+
+	if (!fp)
+		return 0;
+
+	while ((c = fgetc(fp)) != EOF) {
+		if (!isxdigit(c))
+			continue;
+		switch (c) {
+		case 'f':
+			count++;
+		case '7': case 'b': case 'd': case 'e':
+			count++;
+		case '3': case '5': case '6': case '9': case 'a': case 'c':
+			count++;
+		case '1': case '2': case '4': case '8':
+			count++;
+		}
+	}
+	fclose(fp);
+
+	return count;
+}
+
+/*
+ * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
+ * If some CPUs are offline the numbers may not be exact multiples of each
+ * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
+ * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
+ * lower. Still try to get the ratio right by preventing the second possibility.
+ */
+int snc_ways(void)
+{
+	int node_cpus, cache_cpus, i;
+
+	node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
+	cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
+
+	if (!node_cpus || !cache_cpus) {
+		fprintf(stderr, "Warning could not determine Sub-NUMA Cluster mode\n");
+		return 1;
+	}
+
+	for (i = 1; i <= MAX_SNC ; i++) {
+		if (i * node_cpus >= cache_cpus)
+			return i;
+	}
+
+	return 1;
+}
+
 /*
  * get_cache_size - Get cache size for a specified CPU
  * @cpu_no:	CPU number
@@ -211,6 +268,8 @@  int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
 			break;
 	}
 
+	if (cache_num == 3)
+		*cache_size /= snc_ways();
 	return 0;
 }