@@ -55,6 +55,7 @@ static struct prog_env {
5555 bool reset ;
5656 bool timestamp ;
5757 bool verbose ;
58+ bool per_thread ;
5859} env = {
5960 .nr_locks = 99999999 ,
6061 .nr_stack_entries = 1 ,
@@ -71,7 +72,7 @@ static const char args_doc[] = "FUNCTION";
7172static const char program_doc [] =
7273"Trace mutex/sem lock acquisition and hold times, in nsec\n"
7374"\n"
74- "Usage: klockstat [-hRTv ] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
75+ "Usage: klockstat [-hPRTv ] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
7576" [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]\n"
7677"\v"
7778"Examples:\n"
@@ -86,8 +87,9 @@ static const char program_doc[] =
8687" klockstat -S acq_count # sort lock acquired results by acquire count\n"
8788" klockstat -S hld_total # sort lock held results by total held time\n"
8889" klockstat -S acq_count,hld_total # combination of above\n"
89- " klockstat -n 3 # display top 3 locks\n"
90+ " klockstat -n 3 # display top 3 locks/threads \n"
9091" klockstat -s 6 # display 6 stack entries per lock\n"
92+ " klockstat -P # print stats per thread\n"
9193;
9294
9395static const struct argp_option opts [] = {
@@ -97,7 +99,7 @@ static const struct argp_option opts[] = {
9799 { "caller" , 'c' , "FUNC" , 0 , "Filter by caller string prefix" },
98100 { "lock" , 'L' , "LOCK" , 0 , "Filter by specific ksym lock name" },
99101 { 0 , 0 , 0 , 0 , "" },
100- { "locks" , 'n' , "NR_LOCKS" , 0 , "Number of locks to print" },
102+ { "locks" , 'n' , "NR_LOCKS" , 0 , "Number of locks or threads to print" },
101103 { "stacks" , 's' , "NR_STACKS" , 0 , "Number of stack entries to print per lock" },
102104 { "sort" , 'S' , "SORT" , 0 , "Sort by field:\n acq_[max|total|count]\n hld_[max|total|count]" },
103105 { 0 , 0 , 0 , 0 , "" },
@@ -106,6 +108,7 @@ static const struct argp_option opts[] = {
106108 { "reset" , 'R' , NULL , 0 , "Reset stats each interval" },
107109 { "timestamp" , 'T' , NULL , 0 , "Print timestamp" },
108110 { "verbose" , 'v' , NULL , 0 , "Verbose debug output" },
111+ { "per-thread" , 'P' , NULL , 0 , "Print per-thread stats" },
109112
110113 { NULL , 'h' , NULL , OPTION_HIDDEN , "Show the full help" },
111114 {},
@@ -229,6 +232,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
229232 case 'T' :
230233 env -> timestamp = true;
231234 break ;
235+ case 'P' :
236+ env -> per_thread = true;
237+ break ;
232238 case 'h' :
233239 argp_state_help (state , stderr , ARGP_HELP_STD_HELP );
234240 break ;
@@ -241,6 +247,10 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
241247 env -> interval = env -> duration ;
242248 env -> iterations = env -> duration / env -> interval ;
243249 }
250+ if (env -> per_thread && env -> nr_stack_entries != 1 ) {
251+ warn ("--per-thread and --stacks cannot be used together\n" );
252+ argp_usage (state );
253+ }
244254 break ;
245255 default :
246256 return ARGP_ERR_UNKNOWN ;
@@ -327,6 +337,12 @@ static char *symname(struct ksyms *ksyms, uint64_t pc, char *buf, size_t n)
327337 return buf ;
328338}
329339
340+ static char * print_caller (char * buf , int size , struct stack_stat * ss )
341+ {
342+ snprintf (buf , size , "%u %16s" , ss -> stack_id , ss -> ls .acq_max_comm );
343+ return buf ;
344+ }
345+
330346static char * print_time (char * buf , int size , uint64_t nsec )
331347{
332348 struct {
@@ -355,7 +371,12 @@ static char *print_time(char *buf, int size, uint64_t nsec)
355371
356372static void print_acq_header (void )
357373{
358- printf ("\n Caller Avg Wait Count Max Wait Total Wait\n" );
374+ if (env .per_thread )
375+ printf ("\n Tid Comm" );
376+ else
377+ printf ("\n Caller" );
378+
379+ printf (" Avg Wait Count Max Wait Total Wait\n" );
359380}
360381
361382static void print_acq_stat (struct ksyms * ksyms , struct stack_stat * ss ,
@@ -374,19 +395,39 @@ static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
374395 print_time (max , sizeof (max ), ss -> ls .acq_max_time ),
375396 print_time (tot , sizeof (tot ), ss -> ls .acq_total_time ));
376397 for (i = 1 ; i < nr_stack_entries ; i ++ ) {
377- if (!ss -> bt [i ])
398+ if (!ss -> bt [i ] || env . per_thread )
378399 break ;
379400 printf ("%37s\n" , symname (ksyms , ss -> bt [i ], buf , sizeof (buf )));
380401 }
381- if (nr_stack_entries > 1 )
402+ if (nr_stack_entries > 1 && ! env . per_thread )
382403 printf (" Max PID %llu, COMM %s\n" ,
383404 ss -> ls .acq_max_id >> 32 ,
384405 ss -> ls .acq_max_comm );
385406}
386407
408+ static void print_acq_task (struct stack_stat * ss )
409+ {
410+ char buf [40 ];
411+ char avg [40 ];
412+ char max [40 ];
413+ char tot [40 ];
414+
415+ printf ("%37s %9s %8llu %10s %12s\n" ,
416+ print_caller (buf , sizeof (buf ), ss ),
417+ print_time (avg , sizeof (avg ), ss -> ls .acq_total_time / ss -> ls .acq_count ),
418+ ss -> ls .acq_count ,
419+ print_time (max , sizeof (max ), ss -> ls .acq_max_time ),
420+ print_time (tot , sizeof (tot ), ss -> ls .acq_total_time ));
421+ }
422+
387423static void print_hld_header (void )
388424{
389- printf ("\n Caller Avg Hold Count Max Hold Total Hold\n" );
425+ if (env .per_thread )
426+ printf ("\n Tid Comm" );
427+ else
428+ printf ("\n Caller" );
429+
430+ printf (" Avg Hold Count Max Hold Total Hold\n" );
390431}
391432
392433static void print_hld_stat (struct ksyms * ksyms , struct stack_stat * ss ,
@@ -405,16 +446,31 @@ static void print_hld_stat(struct ksyms *ksyms, struct stack_stat *ss,
405446 print_time (max , sizeof (max ), ss -> ls .hld_max_time ),
406447 print_time (tot , sizeof (tot ), ss -> ls .hld_total_time ));
407448 for (i = 1 ; i < nr_stack_entries ; i ++ ) {
408- if (!ss -> bt [i ])
449+ if (!ss -> bt [i ] || env . per_thread )
409450 break ;
410451 printf ("%37s\n" , symname (ksyms , ss -> bt [i ], buf , sizeof (buf )));
411452 }
412- if (nr_stack_entries > 1 )
453+ if (nr_stack_entries > 1 && ! env . per_thread )
413454 printf (" Max PID %llu, COMM %s\n" ,
414455 ss -> ls .hld_max_id >> 32 ,
415456 ss -> ls .hld_max_comm );
416457}
417458
459+ static void print_hld_task (struct stack_stat * ss )
460+ {
461+ char buf [40 ];
462+ char avg [40 ];
463+ char max [40 ];
464+ char tot [40 ];
465+
466+ printf ("%37s %9s %8llu %10s %12s\n" ,
467+ print_caller (buf , sizeof (buf ), ss ),
468+ print_time (avg , sizeof (avg ), ss -> ls .hld_total_time / ss -> ls .hld_count ),
469+ ss -> ls .hld_count ,
470+ print_time (max , sizeof (max ), ss -> ls .hld_max_time ),
471+ print_time (tot , sizeof (tot ), ss -> ls .hld_total_time ));
472+ }
473+
418474static int print_stats (struct ksyms * ksyms , int stack_map , int stat_map )
419475{
420476 struct stack_stat * * stats , * ss ;
@@ -423,6 +479,7 @@ static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
423479 uint32_t lookup_key = 0 ;
424480 uint32_t stack_id ;
425481 int ret , i ;
482+ int nr_stack_entries ;
426483
427484 stats = calloc (stats_sz , sizeof (void * ));
428485 if (!stats ) {
@@ -458,31 +515,39 @@ static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
458515 free (ss );
459516 continue ;
460517 }
461- if (bpf_map_lookup_elem (stack_map , & stack_id , & ss -> bt )) {
518+ if (! env . per_thread && bpf_map_lookup_elem (stack_map , & stack_id , & ss -> bt )) {
462519 /* Can still report the results without a backtrace. */
463520 warn ("failed to lookup stack_id %u\n" , stack_id );
464521 }
465- if (!caller_is_traced (ksyms , ss -> bt [0 ])) {
522+ if (!env . per_thread && ! caller_is_traced (ksyms , ss -> bt [0 ])) {
466523 free (ss );
467524 continue ;
468525 }
469526 stats [stat_idx ++ ] = ss ;
470527 }
471528
529+ nr_stack_entries = MIN (env .nr_stack_entries , PERF_MAX_STACK_DEPTH );
530+
472531 qsort (stats , stat_idx , sizeof (void * ), sort_by_acq );
473532 for (i = 0 ; i < MIN (env .nr_locks , stat_idx ); i ++ ) {
474533 if (i == 0 || env .nr_stack_entries > 1 )
475534 print_acq_header ();
476- print_acq_stat (ksyms , stats [i ],
477- MIN (env .nr_stack_entries , PERF_MAX_STACK_DEPTH ));
535+
536+ if (env .per_thread )
537+ print_acq_task (stats [i ]);
538+ else
539+ print_acq_stat (ksyms , stats [i ], nr_stack_entries );
478540 }
479541
480542 qsort (stats , stat_idx , sizeof (void * ), sort_by_hld );
481543 for (i = 0 ; i < MIN (env .nr_locks , stat_idx ); i ++ ) {
482544 if (i == 0 || env .nr_stack_entries > 1 )
483545 print_hld_header ();
484- print_hld_stat (ksyms , stats [i ],
485- MIN (env .nr_stack_entries , PERF_MAX_STACK_DEPTH ));
546+
547+ if (env .per_thread )
548+ print_hld_task (stats [i ]);
549+ else
550+ print_hld_stat (ksyms , stats [i ], nr_stack_entries );
486551 }
487552
488553 for (i = 0 ; i < stat_idx ; i ++ )
@@ -565,6 +630,7 @@ int main(int argc, char **argv)
565630 obj -> rodata -> targ_tgid = env .pid ;
566631 obj -> rodata -> targ_pid = env .tid ;
567632 obj -> rodata -> targ_lock = lock_addr ;
633+ obj -> rodata -> per_thread = env .per_thread ;
568634
569635 if (fentry_can_attach ("mutex_lock_nested" , NULL )) {
570636 bpf_program__set_attach_target (obj -> progs .mutex_lock , 0 ,
0 commit comments