@@ -325,7 +325,7 @@ static inline void do_page_cache_hit_increment(u32 curr_pid)
325
325
process_metrics -> page_cache_hit ++ ;
326
326
}
327
327
328
- static inline int do_kepler_sched_switch_trace (
328
+ static inline int do_kepler_sched_switch_trace_old (
329
329
u32 prev_pid , u32 next_pid , u32 prev_tgid , u32 next_tgid )
330
330
{
331
331
u32 cpu_id ;
@@ -404,3 +404,195 @@ bpf_map_lookup_or_try_init(void *map, const void *key, const void *init)
404
404
405
405
return bpf_map_lookup_elem (map , key );
406
406
}
407
+
408
+ typedef struct period_metrics_t {
409
+ u64 run_time_delta ;
410
+ u64 cycles_delta ;
411
+ u64 instr_delta ;
412
+ u64 cache_miss_delta ;
413
+ u64 period_duration_ns ;
414
+ } period_metrics_t ;
415
+
416
+ struct {
417
+ __uint (type , BPF_MAP_TYPE_PERCPU_ARRAY );
418
+ __type (key , u32 );
419
+ __type (value , period_metrics_t );
420
+ __uint (max_entries , MAP_SIZE );
421
+ } min_period_metrics SEC (".maps" );
422
+
423
+ struct {
424
+ __uint (type , BPF_MAP_TYPE_PERCPU_ARRAY );
425
+ __type (key , u32 );
426
+ __type (value , period_metrics_t );
427
+ __uint (max_entries , MAP_SIZE );
428
+ } max_period_metrics SEC (".maps" );
429
+
430
+ struct {
431
+ __uint (type , BPF_MAP_TYPE_PERCPU_ARRAY );
432
+ __type (key , u32 );
433
+ __type (value , process_metrics_t );
434
+ __uint (max_entries , MAP_SIZE );
435
+ } last_sample SEC (".maps" );
436
+
437
+ struct {
438
+ __uint (type , BPF_MAP_TYPE_PERCPU_ARRAY );
439
+ __type (key , u32 );
440
+ __type (value , u64 );
441
+ __uint (max_entries , MAP_SIZE );
442
+ } last_interpolation_ts SEC (".maps" );
443
+
444
+ struct {
445
+ __uint (type , BPF_MAP_TYPE_ARRAY );
446
+ __type (key , u32 );
447
+ __type (value , u64 );
448
+ __uint (max_entries , MAP_SIZE );
449
+ } period_start_ts SEC (".maps" );
450
+
451
+ // retain the last sample of the process metrics
452
+ static inline void update_period_statistics (struct process_metrics_t * curr_metrics , u32 tgid , u64 curr_ts ) {
453
+ struct process_metrics_t * last = bpf_map_lookup_elem (& last_sample , & tgid );
454
+ struct period_metrics_t period = {0 };
455
+ struct period_metrics_t * min_val , * max_val ;
456
+ u64 * period_start = bpf_map_lookup_elem (& period_start_ts , & tgid );
457
+ u32 key = 0 ;
458
+
459
+ if (!period_start ) {
460
+ bpf_map_update_elem (& period_start_ts , & tgid , & curr_ts , BPF_ANY );
461
+ return ;
462
+ }
463
+
464
+ period .period_duration_ns = curr_ts - * period_start ;
465
+
466
+ if (last ) {
467
+ period .run_time_delta = curr_metrics -> process_run_time - last -> process_run_time ;
468
+ period .cycles_delta = curr_metrics -> cpu_cycles - last -> cpu_cycles ;
469
+ period .instr_delta = curr_metrics -> cpu_instr - last -> cpu_instr ;
470
+ period .cache_miss_delta = curr_metrics -> cache_miss - last -> cache_miss ;
471
+ } else {
472
+ period .run_time_delta = curr_metrics -> process_run_time ;
473
+ period .cycles_delta = curr_metrics -> cpu_cycles ;
474
+ period .instr_delta = curr_metrics -> cpu_instr ;
475
+ period .cache_miss_delta = curr_metrics -> cache_miss ;
476
+ }
477
+
478
+ bpf_map_update_elem (& last_sample , & tgid , curr_metrics , BPF_ANY );
479
+ bpf_map_update_elem (& period_start_ts , & tgid , & curr_ts , BPF_ANY );
480
+
481
+ min_val = bpf_map_lookup_elem (& min_period_metrics , & tgid );
482
+ max_val = bpf_map_lookup_elem (& max_period_metrics , & tgid );
483
+
484
+ if (!min_val || !max_val ) {
485
+ bpf_map_update_elem (& min_period_metrics , & tgid , & period , BPF_ANY );
486
+ bpf_map_update_elem (& max_period_metrics , & tgid , & period , BPF_ANY );
487
+ return ;
488
+ }
489
+
490
+ if (period .period_duration_ns > 1000000 ) { // threshold with 1ms period
491
+ if (period .run_time_delta < min_val -> run_time_delta )
492
+ min_val -> run_time_delta = period .run_time_delta ;
493
+ if (period .run_time_delta > max_val -> run_time_delta )
494
+ max_val -> run_time_delta = period .run_time_delta ;
495
+
496
+ if (period .cycles_delta < min_val -> cycles_delta )
497
+ min_val -> cycles_delta = period .cycles_delta ;
498
+ if (period .cycles_delta > max_val -> cycles_delta )
499
+ max_val -> cycles_delta = period .cycles_delta ;
500
+
501
+ if (period .instr_delta < min_val -> instr_delta )
502
+ min_val -> instr_delta = period .instr_delta ;
503
+ if (period .instr_delta > max_val -> instr_delta )
504
+ max_val -> instr_delta = period .instr_delta ;
505
+
506
+ if (period .cache_miss_delta < min_val -> cache_miss_delta )
507
+ min_val -> cache_miss_delta = period .cache_miss_delta ;
508
+ if (period .cache_miss_delta > max_val -> cache_miss_delta )
509
+ max_val -> cache_miss_delta = period .cache_miss_delta ;
510
+ }
511
+ }
512
+
513
+ // Interpolate the metrics during idle
514
+ static inline void interpolate_idle_metrics (u32 tgid , u64 curr_ts ) {
515
+ struct process_metrics_t * curr_metrics = bpf_map_lookup_elem (& processes , & tgid );
516
+ struct period_metrics_t * min_val = bpf_map_lookup_elem (& min_period_metrics , & tgid );
517
+ struct period_metrics_t * max_val = bpf_map_lookup_elem (& max_period_metrics , & tgid );
518
+ u64 * last_ts = bpf_map_lookup_elem (& last_interpolation_ts , & tgid );
519
+
520
+ if (!curr_metrics || !min_val || !max_val || !last_ts )
521
+ return ;
522
+
523
+ u64 time_since_last = curr_ts - * last_ts ;
524
+ if (time_since_last < (IDLE_TIME * 1000000ULL ))
525
+ return ;
526
+
527
+ u64 avg_period_duration = (min_val -> period_duration_ns + max_val -> period_duration_ns ) / 2 ;
528
+ if (avg_period_duration == 0 )
529
+ return ;
530
+
531
+ u64 missed_periods = time_since_last / avg_period_duration ;
532
+ if (missed_periods == 0 )
533
+ return ;
534
+
535
+ u64 avg_runtime_delta = (min_val -> run_time_delta + max_val -> run_time_delta ) / 2 ;
536
+ u64 avg_cycles_delta = (min_val -> cycles_delta + max_val -> cycles_delta ) / 2 ;
537
+ u64 avg_instr_delta = (min_val -> instr_delta + max_val -> instr_delta ) / 2 ;
538
+ u64 avg_cache_miss_delta = (min_val -> cache_miss_delta + max_val -> cache_miss_delta ) / 2 ;
539
+
540
+ curr_metrics -> process_run_time += (avg_runtime_delta * missed_periods );
541
+ curr_metrics -> cpu_cycles += (avg_cycles_delta * missed_periods );
542
+ curr_metrics -> cpu_instr += (avg_instr_delta * missed_periods );
543
+ curr_metrics -> cache_miss += (avg_cache_miss_delta * missed_periods );
544
+
545
+ * last_ts = curr_ts ;
546
+ }
547
+
548
+ static inline int do_kepler_sched_switch_trace (
549
+ u32 prev_pid , u32 next_pid , u32 prev_tgid , u32 next_tgid )
550
+ {
551
+ u32 cpu_id ;
552
+ u64 curr_ts = bpf_ktime_get_ns ();
553
+ struct process_metrics_t * curr_tgid_metrics , * prev_tgid_metrics ;
554
+ struct process_metrics_t buf = {};
555
+
556
+ cpu_id = bpf_get_smp_processor_id ();
557
+
558
+ u32 key = 0 ;
559
+ u32 * tracking_flag = bpf_map_lookup_elem (& tracking_flag_map , & key );
560
+ u64 * start_time = bpf_map_lookup_elem (& start_time_map , & key );
561
+
562
+ if (tracking_flag && start_time ) {
563
+ u64 elapsed_time = (curr_ts - * start_time ) / 1000000ULL ;
564
+
565
+ if (* tracking_flag && elapsed_time >= ACTIVE_TIME ) {
566
+ * tracking_flag = 0 ;
567
+ * start_time = curr_ts ;
568
+ bpf_map_update_elem (& last_interpolation_ts , & prev_tgid , & curr_ts , BPF_ANY );
569
+ } else if (!* tracking_flag && elapsed_time >= IDLE_TIME ) {
570
+ * tracking_flag = 1 ;
571
+ * start_time = curr_ts ;
572
+ }
573
+
574
+ if (!* tracking_flag ) {
575
+ interpolate_idle_metrics (prev_tgid , curr_ts );
576
+ return 0 ;
577
+ }
578
+ }
579
+
580
+ collect_metrics_and_reset_counters (& buf , prev_pid , curr_ts , cpu_id );
581
+
582
+ if (buf .process_run_time > 0 ) {
583
+ prev_tgid_metrics = bpf_map_lookup_elem (& processes , & prev_tgid );
584
+ if (prev_tgid_metrics ) {
585
+ prev_tgid_metrics -> process_run_time += buf .process_run_time ;
586
+ prev_tgid_metrics -> cpu_cycles += buf .cpu_cycles ;
587
+ prev_tgid_metrics -> cpu_instr += buf .cpu_instr ;
588
+ prev_tgid_metrics -> cache_miss += buf .cache_miss ;
589
+
590
+ update_period_statistics (prev_tgid_metrics , prev_tgid , curr_ts );
591
+ }
592
+ }
593
+
594
+ bpf_map_update_elem (& pid_time_map , & next_pid , & curr_ts , BPF_ANY );
595
+ register_new_process_if_not_exist (prev_tgid );
596
+
597
+ return 0 ;
598
+ }
0 commit comments