Skip to content

Commit 5c3f51e

Browse files
committed
feat(bpf): use time window for bpf sampling to replace per call based sampling
Signed-off-by: Huamin Chen <[email protected]>
1 parent b73c6b1 commit 5c3f51e

13 files changed

+102
-32
lines changed

bpf/kepler.bpf.h

+46-20
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,30 @@ SEC(".rodata.config")
167167
__attribute__((btf_decl_tag(
168168
"Hardware Events Enabled"))) static volatile const int HW = 1;
169169

170-
// The sampling rate should be disabled by default because its impact on the
171-
// measurements is unknown.
170+
// Global parameters for tracking periods (in milli seconds)
172171
SEC(".rodata.config")
173-
__attribute__((
174-
btf_decl_tag("Sample Rate"))) static volatile const int SAMPLE_RATE = 0;
172+
__attribute__((btf_decl_tag(
173+
"Active Time"))) static volatile const int ACTIVE_TIME = 20;
174+
175+
// Global parameters for non-tracking periods (in milli seconds)
176+
SEC(".rodata.config")
177+
__attribute__((btf_decl_tag("Idle Time"))) static volatile const int IDLE_TIME = 80;
178+
179+
// BPF map to track whether we are in the tracking period or not
180+
struct {
181+
__uint(type, BPF_MAP_TYPE_ARRAY);
182+
__type(key, u32);
183+
__type(value, u32);
184+
__uint(max_entries, 1);
185+
} tracking_flag_map SEC(".maps");
186+
187+
// BPF map to store the timestamp when the tracking started
188+
struct {
189+
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
190+
__type(key, u32);
191+
__type(value, u64);
192+
__uint(max_entries, 1);
193+
} start_time_map SEC(".maps");
175194

176195
int counter_sched_switch = 0;
177196

@@ -317,24 +336,31 @@ static inline int do_kepler_sched_switch_trace(
317336

318337
cpu_id = bpf_get_smp_processor_id();
319338

320-
// Skip some samples to minimize overhead
321-
if (SAMPLE_RATE > 0) {
322-
if (counter_sched_switch > 0) {
323-
// update hardware counters to be used when sample is taken
324-
if (counter_sched_switch == 1) {
325-
collect_metrics_and_reset_counters(
326-
&buf, prev_pid, curr_ts, cpu_id);
327-
// Add task on-cpu running start time
328-
bpf_map_update_elem(
329-
&pid_time_map, &next_pid, &curr_ts,
330-
BPF_ANY);
331-
// create new process metrics
332-
register_new_process_if_not_exist(next_tgid);
333-
}
334-
counter_sched_switch--;
339+
// Retrieve tracking flag and start time
340+
u32 key = 0;
341+
u32 *tracking_flag = bpf_map_lookup_elem(&tracking_flag_map, &key);
342+
u64 *start_time = bpf_map_lookup_elem(&start_time_map, &key);
343+
344+
if (tracking_flag && start_time) {
345+
u64 elapsed_time = (curr_ts - *start_time) / 1000000ULL;
346+
347+
// Update the tracking flag based on elapsed time
348+
if (*tracking_flag && elapsed_time >= ACTIVE_TIME) {
349+
// Stop tracking
350+
*tracking_flag = 0;
351+
// Reset start time
352+
*start_time = curr_ts;
353+
} else if (!*tracking_flag && elapsed_time >= IDLE_TIME) {
354+
// Start tracking
355+
*tracking_flag = 1;
356+
// Reset start time
357+
*start_time = curr_ts;
358+
}
359+
360+
// If we are not in the tracking period, return immediately
361+
if (!*tracking_flag) {
335362
return 0;
336363
}
337-
counter_sched_switch = SAMPLE_RATE;
338364
}
339365

340366
collect_metrics_and_reset_counters(&buf, prev_pid, curr_ts, cpu_id);

pkg/bpf/exporter.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,13 @@ func (e *exporter) attach() error {
9191

9292
// Set program global variables
9393
err = specs.RewriteConstants(map[string]interface{}{
94-
"SAMPLE_RATE": int32(config.GetBPFSampleRate()),
94+
"ACTIVE_TIME": int32(config.GetBPFActiveSampleWindowMS()),
95+
})
96+
if err != nil {
97+
return fmt.Errorf("error rewriting program constants: %v", err)
98+
}
99+
err = specs.RewriteConstants(map[string]interface{}{
100+
"IDLE_TIME": int32(config.GetBPFIdleSampleWindowMS()),
95101
})
96102
if err != nil {
97103
return fmt.Errorf("error rewriting program constants: %v", err)

pkg/bpf/kepler_bpfeb.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/bpf/kepler_bpfeb.o

1.48 KB
Binary file not shown.

pkg/bpf/kepler_bpfel.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/bpf/kepler_bpfel.o

1.48 KB
Binary file not shown.

pkg/bpftest/bpf_suite_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ var _ = Describe("BPF Exporter", func() {
330330

331331
err = specs.RewriteConstants(map[string]interface{}{
332332
"TEST": int32(1),
333-
"SAMPLE_RATE": int32(1000),
333+
"ACTIVE_TIME": int32(1000),
334+
"IDLE_TIME": int32(0),
334335
})
335336
Expect(err).NotTo(HaveOccurred())
336337

pkg/bpftest/test_bpfeb.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/bpftest/test_bpfeb.o

1.55 KB
Binary file not shown.

pkg/bpftest/test_bpfel.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/bpftest/test_bpfel.o

1.55 KB
Binary file not shown.

pkg/config/config.go

+21-8
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,12 @@ type KeplerConfig struct {
5959
MockACPIPowerPath string
6060
MaxLookupRetry int
6161
KubeConfig string
62-
BPFSampleRate int
6362
EstimatorModel string
6463
EstimatorSelectFilter string
6564
CPUArchOverride string
6665
MachineSpecFilePath string
66+
BPFActiveSampleWindowMS int
67+
BPFIdleSampleWindowMS int
6768
}
6869
type MetricsConfig struct {
6970
CoreUsageMetric string
@@ -154,10 +155,11 @@ func getKeplerConfig() KeplerConfig {
154155
MockACPIPowerPath: getConfig("MOCK_ACPI_POWER_PATH", ""),
155156
MaxLookupRetry: getIntConfig("MAX_LOOKUP_RETRY", defaultMaxLookupRetry),
156157
KubeConfig: getConfig("KUBE_CONFIG", defaultKubeConfig),
157-
BPFSampleRate: getIntConfig("EXPERIMENTAL_BPF_SAMPLE_RATE", defaultBPFSampleRate),
158158
EstimatorModel: getConfig("ESTIMATOR_MODEL", defaultMetricValue),
159159
EstimatorSelectFilter: getConfig("ESTIMATOR_SELECT_FILTER", defaultMetricValue), // no filter
160160
CPUArchOverride: getConfig("CPU_ARCH_OVERRIDE", defaultCPUArchOverride),
161+
BPFActiveSampleWindowMS: getIntConfig("EXPERIMENTAL_BPF_ACTIVE_SAMPLE_WINDOW_MS", 1000),
162+
BPFIdleSampleWindowMS: getIntConfig("EXPERIMENTAL_BPF_IDLE_SAMPLE_WINDOW_MS", 0),
161163
}
162164
}
163165

@@ -261,7 +263,8 @@ func logBoolConfigs() {
261263
klog.V(5).Infof("EXPOSE_BPF_METRICS: %t", instance.Kepler.ExposeBPFMetrics)
262264
klog.V(5).Infof("EXPOSE_COMPONENT_POWER: %t", instance.Kepler.ExposeComponentPower)
263265
klog.V(5).Infof("EXPOSE_ESTIMATED_IDLE_POWER_METRICS: %t. This only impacts when the power is estimated using pre-prained models. Estimated idle power is meaningful only when Kepler is running on bare-metal or with a single virtual machine (VM) on the node.", instance.Kepler.ExposeIdlePowerMetrics)
264-
klog.V(5).Infof("EXPERIMENTAL_BPF_SAMPLE_RATE: %d", instance.Kepler.BPFSampleRate)
266+
klog.V(5).Infof("EXPERIMENTAL_BPF_ACTIVE_SAMPLE_WINDOW_MS: %d", instance.Kepler.BPFActiveSampleWindowMS)
267+
klog.V(5).Infof("EXPERIMENTAL_BPF_IDLE_SAMPLE_WINDOW_MS: %d", instance.Kepler.BPFIdleSampleWindowMS)
265268
}
266269
}
267270

@@ -395,6 +398,21 @@ func SetGPUUsageMetric(metric string) {
395398
instance.Metrics.GPUUsageMetric = metric
396399
}
397400

401+
func GetBPFActiveSampleWindowMS() int {
402+
ensureConfigInitialized()
403+
return instance.Kepler.BPFActiveSampleWindowMS
404+
}
405+
406+
func GetBPFIdleSampleWindowMS() int {
407+
ensureConfigInitialized()
408+
return instance.Kepler.BPFIdleSampleWindowMS
409+
}
410+
411+
func GetDCGMHostEngineEndpoint() string {
412+
ensureConfigInitialized()
413+
return instance.DCGMHostEngineEndpoint
414+
}
415+
398416
func (c *Config) getUnixName() (unix.Utsname, error) {
399417
var utsname unix.Utsname
400418
err := unix.Uname(&utsname)
@@ -552,11 +570,6 @@ func ExposeIRQCounterMetrics() bool {
552570
return instance.Kepler.ExposeIRQCounterMetrics
553571
}
554572

555-
func GetBPFSampleRate() int {
556-
ensureConfigInitialized()
557-
return instance.Kepler.BPFSampleRate
558-
}
559-
560573
func GetRedfishCredFilePath() string {
561574
ensureConfigInitialized()
562575
return instance.Redfish.CredFilePath

pkg/sensors/accelerator/device/sources/dcgm.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ type GPUDcgm struct {
6262
}
6363

6464
func init() {
65-
if _, err := dcgm.Init(dcgm.Standalone, config.DCGMHostEngineEndpoint, isSocket); err != nil {
65+
if _, err := dcgm.Init(dcgm.Standalone, config.GetDCGMHostEngineEndpoint(), isSocket); err != nil {
6666
klog.Errorf("Error initializing dcgm: %v", err)
6767
return
6868
}
@@ -131,7 +131,7 @@ func (d *GPUDcgm) InitLib() (err error) {
131131
err = fmt.Errorf("could not init dcgm: %v", r)
132132
}
133133
}()
134-
cleanup, err := dcgm.Init(dcgm.Standalone, config.DCGMHostEngineEndpoint, isSocket)
134+
cleanup, err := dcgm.Init(dcgm.Standalone, config.GetDCGMHostEngineEndpoint(), isSocket)
135135
if err != nil {
136136
klog.Infof("There is no DCGM daemon running in the host: %s", err)
137137
// embedded mode is not recommended for production per https://github.com/NVIDIA/dcgm-exporter/issues/22#issuecomment-1321521995

0 commit comments

Comments
 (0)