feat(bpf): use time window for bpf sampling to replace per call based…

… sampling Signed-off-by: Huamin Chen <[email protected]>
sustainable-computing-io · Sep 16, 2024 · d88e8b8 · d88e8b8
1 parent b73c6b1
commit d88e8b8
Show file tree

Hide file tree

Showing 13 changed files with 102 additions and 32 deletions.
diff --git a/bpf/kepler.bpf.h b/bpf/kepler.bpf.h
@@ -167,11 +167,30 @@ SEC(".rodata.config")
 __attribute__((btf_decl_tag(
 	"Hardware Events Enabled"))) static volatile const int HW = 1;
 
-// The sampling rate should be disabled by default because its impact on the
-// measurements is unknown.
+// Global parameters for tracking periods (in milli seconds)
 SEC(".rodata.config")
-__attribute__((
-	btf_decl_tag("Sample Rate"))) static volatile const int SAMPLE_RATE = 0;
+__attribute__((btf_decl_tag(
+	"Active Time"))) static volatile const int ACTIVE_TIME = 20;
+
+// Global parameters for non-tracking periods (in milli seconds)
+SEC(".rodata.config")
+__attribute__((btf_decl_tag("Idle Time"))) static volatile const int IDLE_TIME = 80;
+
+// BPF map to track whether we are in the tracking period or not
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u32);
+	__uint(max_entries, 1);
+} tracking_flag_map SEC(".maps");
+
+// BPF map to store the timestamp when the tracking started
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, 1);
+} start_time_map SEC(".maps");
 
 int counter_sched_switch = 0;
 
@@ -317,24 +336,31 @@ static inline int do_kepler_sched_switch_trace(
 
 	cpu_id = bpf_get_smp_processor_id();
 
-	// Skip some samples to minimize overhead
-	if (SAMPLE_RATE > 0) {
-		if (counter_sched_switch > 0) {
-			// update hardware counters to be used when sample is taken
-			if (counter_sched_switch == 1) {
-				collect_metrics_and_reset_counters(
-					&buf, prev_pid, curr_ts, cpu_id);
-				// Add task on-cpu running start time
-				bpf_map_update_elem(
-					&pid_time_map, &next_pid, &curr_ts,
-					BPF_ANY);
-				// create new process metrics
-				register_new_process_if_not_exist(next_tgid);
-			}
-			counter_sched_switch--;
+	// Retrieve tracking flag and start time
+	u32 key = 0;
+	u32 *tracking_flag = bpf_map_lookup_elem(&tracking_flag_map, &key);
+	u64 *start_time = bpf_map_lookup_elem(&start_time_map, &key);
+
+	if (tracking_flag && start_time) {
+		u64 elapsed_time = (curr_ts - *start_time) / 1000000ULL;
+
+		// Update the tracking flag based on elapsed time
+		if (*tracking_flag && elapsed_time >= ACTIVE_TIME) {
+			// Stop tracking
+			*tracking_flag = 0;
+			// Reset start time
+			*start_time = curr_ts;
+		} else if (!*tracking_flag && elapsed_time >= IDLE_TIME) {
+			// Start tracking
+			*tracking_flag = 1;
+			// Reset start time
+			*start_time = curr_ts;
+		}
+
+		// If we are not in the tracking period, return immediately
+		if (!*tracking_flag) {
 			return 0;
 		}
-		counter_sched_switch = SAMPLE_RATE;
 	}
 
 	collect_metrics_and_reset_counters(&buf, prev_pid, curr_ts, cpu_id);

diff --git a/pkg/bpf/exporter.go b/pkg/bpf/exporter.go
@@ -91,7 +91,13 @@ func (e *exporter) attach() error {
 
 	// Set program global variables
 	err = specs.RewriteConstants(map[string]interface{}{
-		"SAMPLE_RATE": int32(config.GetBPFSampleRate()),
+		"ACTIVE_TIME": int32(config.GetBPFActiveSampleWindowMS()),
+	})
+	if err != nil {
+		return fmt.Errorf("error rewriting program constants: %v", err)
+	}
+	err = specs.RewriteConstants(map[string]interface{}{
+		"IDLE_TIME": int32(config.GetBPFIdleSampleWindowMS()),
 	})
 	if err != nil {
 		return fmt.Errorf("error rewriting program constants: %v", err)

diff --git a/pkg/bpf/kepler_bpfeb.go b/pkg/bpf/kepler_bpfeb.go
diff --git a/pkg/bpf/kepler_bpfeb.o b/pkg/bpf/kepler_bpfeb.o
diff --git a/pkg/bpf/kepler_bpfel.go b/pkg/bpf/kepler_bpfel.go
diff --git a/pkg/bpf/kepler_bpfel.o b/pkg/bpf/kepler_bpfel.o
diff --git a/pkg/bpftest/bpf_suite_test.go b/pkg/bpftest/bpf_suite_test.go
@@ -330,7 +330,8 @@ var _ = Describe("BPF Exporter", func() {
 
 		err = specs.RewriteConstants(map[string]interface{}{
 			"TEST":        int32(1),
-			"SAMPLE_RATE": int32(1000),
+			"ACTIVE_TIME": int32(80),
+			"IDLE_TIME":   int32(20),
 		})
 		Expect(err).NotTo(HaveOccurred())
 

diff --git a/pkg/bpftest/test_bpfeb.go b/pkg/bpftest/test_bpfeb.go
diff --git a/pkg/bpftest/test_bpfeb.o b/pkg/bpftest/test_bpfeb.o
diff --git a/pkg/bpftest/test_bpfel.go b/pkg/bpftest/test_bpfel.go
diff --git a/pkg/bpftest/test_bpfel.o b/pkg/bpftest/test_bpfel.o
diff --git a/pkg/config/config.go b/pkg/config/config.go
@@ -59,11 +59,12 @@ type KeplerConfig struct {
 	MockACPIPowerPath            string
 	MaxLookupRetry               int
 	KubeConfig                   string
-	BPFSampleRate                int
 	EstimatorModel               string
 	EstimatorSelectFilter        string
 	CPUArchOverride              string
 	MachineSpecFilePath          string
+	BPFActiveSampleWindowMS      int
+	BPFIdleSampleWindowMS        int
 }
 type MetricsConfig struct {
 	CoreUsageMetric    string
@@ -154,10 +155,11 @@ func getKeplerConfig() KeplerConfig {
 		MockACPIPowerPath:            getConfig("MOCK_ACPI_POWER_PATH", ""),
 		MaxLookupRetry:               getIntConfig("MAX_LOOKUP_RETRY", defaultMaxLookupRetry),
 		KubeConfig:                   getConfig("KUBE_CONFIG", defaultKubeConfig),
-		BPFSampleRate:                getIntConfig("EXPERIMENTAL_BPF_SAMPLE_RATE", defaultBPFSampleRate),
 		EstimatorModel:               getConfig("ESTIMATOR_MODEL", defaultMetricValue),
 		EstimatorSelectFilter:        getConfig("ESTIMATOR_SELECT_FILTER", defaultMetricValue), // no filter
 		CPUArchOverride:              getConfig("CPU_ARCH_OVERRIDE", defaultCPUArchOverride),
+		BPFActiveSampleWindowMS:      getIntConfig("EXPERIMENTAL_BPF_ACTIVE_SAMPLE_WINDOW_MS", 1000),
+		BPFIdleSampleWindowMS:        getIntConfig("EXPERIMENTAL_BPF_IDLE_SAMPLE_WINDOW_MS", 0),
 	}
 }
 
@@ -261,7 +263,8 @@ func logBoolConfigs() {
 		klog.V(5).Infof("EXPOSE_BPF_METRICS: %t", instance.Kepler.ExposeBPFMetrics)
 		klog.V(5).Infof("EXPOSE_COMPONENT_POWER: %t", instance.Kepler.ExposeComponentPower)
 		klog.V(5).Infof("EXPOSE_ESTIMATED_IDLE_POWER_METRICS: %t. This only impacts when the power is estimated using pre-prained models. Estimated idle power is meaningful only when Kepler is running on bare-metal or with a single virtual machine (VM) on the node.", instance.Kepler.ExposeIdlePowerMetrics)
-		klog.V(5).Infof("EXPERIMENTAL_BPF_SAMPLE_RATE: %d", instance.Kepler.BPFSampleRate)
+		klog.V(5).Infof("EXPERIMENTAL_BPF_ACTIVE_SAMPLE_WINDOW_MS: %d", instance.Kepler.BPFActiveSampleWindowMS)
+		klog.V(5).Infof("EXPERIMENTAL_BPF_IDLE_SAMPLE_WINDOW_MS: %d", instance.Kepler.BPFIdleSampleWindowMS)
 	}
 }
 
@@ -395,6 +398,21 @@ func SetGPUUsageMetric(metric string) {
 	instance.Metrics.GPUUsageMetric = metric
 }
 
+func GetBPFActiveSampleWindowMS() int {
+	ensureConfigInitialized()
+	return instance.Kepler.BPFActiveSampleWindowMS
+}
+
+func GetBPFIdleSampleWindowMS() int {
+	ensureConfigInitialized()
+	return instance.Kepler.BPFIdleSampleWindowMS
+}
+
+func GetDCGMHostEngineEndpoint() string {
+	ensureConfigInitialized()
+	return instance.DCGMHostEngineEndpoint
+}
+
 func (c *Config) getUnixName() (unix.Utsname, error) {
 	var utsname unix.Utsname
 	err := unix.Uname(&utsname)
@@ -552,11 +570,6 @@ func ExposeIRQCounterMetrics() bool {
 	return instance.Kepler.ExposeIRQCounterMetrics
 }
 
-func GetBPFSampleRate() int {
-	ensureConfigInitialized()
-	return instance.Kepler.BPFSampleRate
-}
-
 func GetRedfishCredFilePath() string {
 	ensureConfigInitialized()
 	return instance.Redfish.CredFilePath

diff --git a/pkg/sensors/accelerator/device/sources/dcgm.go b/pkg/sensors/accelerator/device/sources/dcgm.go
@@ -62,7 +62,7 @@ type GPUDcgm struct {
 }
 
 func init() {
-	if _, err := dcgm.Init(dcgm.Standalone, config.DCGMHostEngineEndpoint, isSocket); err != nil {
+	if _, err := dcgm.Init(dcgm.Standalone, config.GetDCGMHostEngineEndpoint(), isSocket); err != nil {
 		klog.Errorf("Error initializing dcgm: %v", err)
 		return
 	}
@@ -131,7 +131,7 @@ func (d *GPUDcgm) InitLib() (err error) {
 			err = fmt.Errorf("could not init dcgm: %v", r)
 		}
 	}()
-	cleanup, err := dcgm.Init(dcgm.Standalone, config.DCGMHostEngineEndpoint, isSocket)
+	cleanup, err := dcgm.Init(dcgm.Standalone, config.GetDCGMHostEngineEndpoint(), isSocket)
 	if err != nil {
 		klog.Infof("There is no DCGM daemon running in the host: %s", err)
 		// embedded mode is not recommended for production per https://github.com/NVIDIA/dcgm-exporter/issues/22#issuecomment-1321521995