From 70d89c2f3306236f39b78ee33ef6c09d2674c6c7 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Mon, 25 Apr 2022 22:11:44 -0400 Subject: [PATCH] Add Go-specific hacks to make demo.go trace See the comment in trace_writer.ml for more details about what this entails. The gist of what I've done here is review every call to `gogo` within demo.go and make sure the stack returns to the right spot. Fixes #100 --- src/trace_writer.ml | 75 ++++++++++++++++++++++++++++++++++++++++++- test/simple_gogo.ml | 62 +++++++++++++++++++++++++++++++++++ test/simple_gogo.mli | 1 + test/simple_gogo.perf | 61 +++++++++++++++++++++++++++++++++++ 4 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 test/simple_gogo.ml create mode 100644 test/simple_gogo.mli create mode 100644 test/simple_gogo.perf diff --git a/src/trace_writer.ml b/src/trace_writer.ml index 3901715d4..25a5d3a1e 100644 --- a/src/trace_writer.ml +++ b/src/trace_writer.ml @@ -479,7 +479,7 @@ let call t thread_info ~time ~location = Callstack.push thread_info.callstack location.symbol ;; -let ret t (thread_info : _ Thread_info.t) ~time = +let ret_without_checking_for_go_hacks t (thread_info : _ Thread_info.t) ~time = match Callstack.pop thread_info.callstack with | Some symbol -> add_event t thread_info time { symbol; kind = Ret } | None -> @@ -495,6 +495,7 @@ let ret t (thread_info : _ Thread_info.t) ~time = ;; let rec clear_callstack t (thread_info : _ Thread_info.t) ~time = + let ret = ret_without_checking_for_go_hacks in match Callstack.top thread_info.callstack with | None -> () | Some _ -> @@ -524,6 +525,69 @@ let end_of_thread t (thread_info : _ Thread_info.t) ~time ~is_kernel_address : u Thread_info.set_callstack thread_info ~is_kernel_address ~time ;; +(* Go (the programming language) has coroutines known as goroutines. The function [gogo] jumps + from one goroutine to the next. Since [gogo] can jump anywhere, it's a shining example of what + magic-trace can't handle out of the box. So, we hack it. + + Most of the time, control flow returns parallel to (i.e. as if jumped from) the previous caller + of [runtime.mcall] or [runtime.morestack.abi0]. + + At startup (and maybe other situations?), gogo clears all callstacks and executes [main]. *) +module Go_hacks : sig + val ret_track_gogo + : 'a inner + -> 'a Thread_info.t + -> time:Mapped_time.t + -> returned_from:Symbol.t option + -> unit +end = struct + let is_gogo (symbol : Symbol.t) = + match symbol with + | From_perf "gogo" -> true + | _ -> false + ;; + + let is_known_gogo_destination (symbol : Symbol.t) = + match symbol with + | From_perf ("runtime.mcall" | "runtime.morestack.abi0") -> true + | _ -> false + ;; + + let current_stack_contains_known_gogo_destination (thread_info : _ Thread_info.t) = + Stack.find thread_info.callstack.stack ~f:is_known_gogo_destination |> Option.is_some + ;; + + let rec pop_until_gogo_destination t (thread_info : _ Thread_info.t) ~time = + let ret = ret_without_checking_for_go_hacks in + match Callstack.top thread_info.callstack with + | None -> () + | Some symbol -> + ret t thread_info ~time; + (* Return one past the known gogo destination. This hack is necessary because: + + - all gogo-destination functions are jumped into and out of + - magic-trace translates the jump returning from gogo-destination into a ret/call pair + - this runs on the ret, but the call is to gogo-destination's caller and we don't + want a second stack frame for that. + + This is a little janky because you see a stack frame momentarily end then start back + up again on every [gogo]. I think that's a small price to pay to keep all the Go hacks + in one place. *) + if is_known_gogo_destination symbol + then ret t thread_info ~time + else pop_until_gogo_destination t thread_info ~time + ;; + + let ret_track_gogo t thread_info ~time ~returned_from = + let is_ret_from_gogo = Option.value_map ~f:is_gogo returned_from ~default:false in + if is_ret_from_gogo + then + if current_stack_contains_known_gogo_destination thread_info + then pop_until_gogo_destination t thread_info ~time + else end_of_thread t thread_info ~time ~is_kernel_address:false + ;; +end + (* Handles ocaml exception unwinding, unless the application calls [raise_notrace] (in which case, magic-trace doesn't detect the irregular control flow). The way this works is that it counts the number of [caml_next_frame_descriptor] calls while an exception is unwinding, and knows to @@ -532,6 +596,9 @@ let end_of_thread t (thread_info : _ Thread_info.t) ~time ~is_kernel_address : u module Ocaml_hacks : sig val ret_track_exn_data : 'a inner -> 'a Thread_info.t -> time:Mapped_time.t -> unit end = struct + (* It's ocaml, not go. *) + let ret = ret_without_checking_for_go_hacks + let unwind_stack t (thread_info : _ Thread_info.t) ~time diff = let frames_to_unwind = thread_info.frames_to_unwind in for _ = 0 to !frames_to_unwind + diff do @@ -554,6 +621,12 @@ end = struct ;; end +let ret t (thread_info : _ Thread_info.t) ~time : unit = + let returned_from = Callstack.top thread_info.callstack in + ret_without_checking_for_go_hacks t thread_info ~time; + Go_hacks.ret_track_gogo t thread_info ~time ~returned_from +;; + let check_current_symbol t (thread_info : _ Thread_info.t) diff --git a/test/simple_gogo.ml b/test/simple_gogo.ml new file mode 100644 index 000000000..f49034192 --- /dev/null +++ b/test/simple_gogo.ml @@ -0,0 +1,62 @@ +open! Core + +let%expect_test "an mcall/gogo pair from a small go program" = + Perf_script.run ~trace_mode:Userspace "simple_gogo.perf"; + [%expect + {| + 3109264/3109264 3363099.849998148: tr strt 0 [unknown] => 404bf2 runtime.chanrecv+0x372 + 3109264/3109264 3363099.849998187: call 404de7 runtime.chanrecv+0x567 => 4307e0 runtime.gopark+0x0 + 3109264/3109264 3363099.849998187: call 4308b1 runtime.gopark+0xd1 => 455260 runtime.mcall+0x0 + 3109264/3109264 3363099.849998225: call 4552a0 runtime.mcall+0x40 => 436e60 runtime.park_m+0x0 + -> 39ns BEGIN runtime.gopark + -> 58ns BEGIN runtime.mcall + 3109264/3109264 3363099.849998235: call 436eb4 runtime.park_m+0x54 => 432360 runtime.casgstatus+0x0 + -> 77ns BEGIN runtime.park_m + 3109264/3109264 3363099.849998261: return 43262b runtime.casgstatus+0x2cb => 436eb9 runtime.park_m+0x59 + -> 87ns BEGIN runtime.casgstatus + 3109264/3109264 3363099.849998718: call 436f05 runtime.park_m+0xa5 => 405180 runtime.chanparkcommit+0x0 + -> 113ns END runtime.casgstatus + 3109264/3109264 3363099.849998831: call 4051ab runtime.chanparkcommit+0x2b => 4094e0 runtime.unlock2+0x0 + -> 570ns BEGIN runtime.chanparkcommit + 3109264/3109264 3363099.849998854: return 409548 runtime.unlock2+0x68 => 4051b0 runtime.chanparkcommit+0x30 + -> 683ns BEGIN runtime.unlock2 + 3109264/3109264 3363099.849998854: return 4051be runtime.chanparkcommit+0x3e => 436f07 runtime.park_m+0xa7 + 3109264/3109264 3363099.849998866: call 436fa8 runtime.park_m+0x148 => 436820 runtime.schedule+0x0 + -> 706ns END runtime.unlock2 + -> 706ns END runtime.chanparkcommit + 3109264/3109264 3363099.850068993: call 436872 runtime.schedule+0x52 => 435140 runtime.execute+0x0 + -> 718ns BEGIN runtime.schedule + 3109264/3109264 3363099.850069069: call 4351c0 runtime.execute+0x80 => 432360 runtime.casgstatus+0x0 + -> 70.845us BEGIN runtime.execute + 3109264/3109264 3363099.850069114: return 43262b runtime.casgstatus+0x2cb => 4351c5 runtime.execute+0x85 + -> 70.921us BEGIN runtime.casgstatus + 3109264/3109264 3363099.850069116: call 435269 runtime.execute+0x129 => 455240 runtime.gogo.abi0+0x0 + -> 70.966us END runtime.casgstatus + 3109264/3109264 3363099.850069116: jmp 45524c runtime.gogo.abi0+0xc => 454460 gogo+0x0 + 3109264/3109264 3363099.850069118: jmp 45449e gogo+0x3e => 4308b6 runtime.gopark+0xd6 + -> 70.968us BEGIN runtime.gogo.abi0 + -> 70.969us END runtime.gogo.abi0 + -> 70.969us BEGIN gogo + 3109264/3109264 3363099.850069130: return 4308c0 runtime.gopark+0xe0 => 404dec runtime.chanrecv+0x56c + -> 70.97us END gogo + -> 70.97us END runtime.execute + -> 70.97us END runtime.schedule + -> 70.97us END runtime.park_m + -> 70.97us END runtime.mcall + -> 70.97us END runtime.gopark + -> 70.97us BEGIN runtime.gopark + 3109264/3109264 3363099.850069207: call 404ea0 runtime.chanrecv+0x620 => 430ce0 runtime.releaseSudog+0x0 + -> 70.982us END runtime.gopark + 3109264/3109264 3363099.850069390: return 430e4e runtime.releaseSudog+0x16e => 404ea5 runtime.chanrecv+0x625 + -> 71.059us BEGIN runtime.releaseSudog + 3109264/3109264 3363099.850069411: return 404ebe runtime.chanrecv+0x63e => 404858 runtime.chanrecv1+0x18 + -> 71.242us END runtime.releaseSudog + 3109264/3109264 3363099.850069422: return 404861 runtime.chanrecv1+0x21 => 413f76 runtime.gcenable+0xb6 + -> 71.263us END runtime.chanrecv + END + -> 0ns BEGIN runtime.gcenable [inferred start time] + -> 0ns BEGIN runtime.chanrecv1 [inferred start time] + -> 0ns BEGIN runtime.chanrecv + -> 71.274us END runtime.chanrecv1 + -> 71.274us END runtime.gcenable |}] +;; diff --git a/test/simple_gogo.mli b/test/simple_gogo.mli new file mode 100644 index 000000000..6d41c4508 --- /dev/null +++ b/test/simple_gogo.mli @@ -0,0 +1 @@ +(* intentionally left blank *) diff --git a/test/simple_gogo.perf b/test/simple_gogo.perf new file mode 100644 index 000000000..727f1bbe8 --- /dev/null +++ b/test/simple_gogo.perf @@ -0,0 +1,61 @@ +3109264/3109264 3363099.849998148: tr strt 0 [unknown] => 404bf2 runtime.chanrecv+0x372 +3109264/3109264 3363099.849998166: jcc 404c03 runtime.chanrecv+0x383 => 404c0d runtime.chanrecv+0x38d +3109264/3109264 3363099.849998166: jmp 404c2a runtime.chanrecv+0x3aa => 404c48 runtime.chanrecv+0x3c8 +3109264/3109264 3363099.849998166: jmp 404c69 runtime.chanrecv+0x3e9 => 404c7e runtime.chanrecv+0x3fe +3109264/3109264 3363099.849998166: jmp 404cad runtime.chanrecv+0x42d => 404cdf runtime.chanrecv+0x45f +3109264/3109264 3363099.849998166: jmp 404cf1 runtime.chanrecv+0x471 => 404d0e runtime.chanrecv+0x48e +3109264/3109264 3363099.849998166: jcc 404d2d runtime.chanrecv+0x4ad => 404d75 runtime.chanrecv+0x4f5 +3109264/3109264 3363099.849998187: jmp 404d8e runtime.chanrecv+0x50e => 404dbc runtime.chanrecv+0x53c +3109264/3109264 3363099.849998187: call 404de7 runtime.chanrecv+0x567 => 4307e0 runtime.gopark+0x0 +3109264/3109264 3363099.849998187: jcc 430824 runtime.gopark+0x44 => 430833 runtime.gopark+0x53 +3109264/3109264 3363099.849998187: jmp 43084a runtime.gopark+0x6a => 43086d runtime.gopark+0x8d +3109264/3109264 3363099.849998187: jcc 430895 runtime.gopark+0xb5 => 4308aa runtime.gopark+0xca +3109264/3109264 3363099.849998187: call 4308b1 runtime.gopark+0xd1 => 455260 runtime.mcall+0x0 +3109264/3109264 3363099.849998214: jcc 455282 runtime.mcall+0x22 => 455289 runtime.mcall+0x29 +3109264/3109264 3363099.849998225: call 4552a0 runtime.mcall+0x40 => 436e60 runtime.park_m+0x0 +3109264/3109264 3363099.849998235: jcc 436e89 runtime.park_m+0x29 => 436eaa runtime.park_m+0x4a +3109264/3109264 3363099.849998235: call 436eb4 runtime.park_m+0x54 => 432360 runtime.casgstatus+0x0 +3109264/3109264 3363099.849998252: jcc 43238a runtime.casgstatus+0x2a => 4323ce runtime.casgstatus+0x6e +3109264/3109264 3363099.849998252: jmp 4323d2 runtime.casgstatus+0x72 => 4323ea runtime.casgstatus+0x8a +3109264/3109264 3363099.849998252: jcc 432403 runtime.casgstatus+0xa3 => 4324aa runtime.casgstatus+0x14a +3109264/3109264 3363099.849998252: jcc 4324b9 runtime.casgstatus+0x159 => 4324c2 runtime.casgstatus+0x162 +3109264/3109264 3363099.849998261: jcc 4324d8 runtime.casgstatus+0x178 => 432622 runtime.casgstatus+0x2c2 +3109264/3109264 3363099.849998261: return 43262b runtime.casgstatus+0x2cb => 436eb9 runtime.park_m+0x59 +3109264/3109264 3363099.849998718: call 436f05 runtime.park_m+0xa5 => 405180 runtime.chanparkcommit+0x0 +3109264/3109264 3363099.849998831: call 4051ab runtime.chanparkcommit+0x2b => 4094e0 runtime.unlock2+0x0 +3109264/3109264 3363099.849998838: jcc 409507 runtime.unlock2+0x27 => 409513 runtime.unlock2+0x33 +3109264/3109264 3363099.849998838: jcc 409535 runtime.unlock2+0x55 => 40953f runtime.unlock2+0x5f +3109264/3109264 3363099.849998854: return 409548 runtime.unlock2+0x68 => 4051b0 runtime.chanparkcommit+0x30 +3109264/3109264 3363099.849998854: return 4051be runtime.chanparkcommit+0x3e => 436f07 runtime.park_m+0xa7 +3109264/3109264 3363099.849998854: jmp 436f26 runtime.park_m+0xc6 => 436f36 runtime.park_m+0xd6 +3109264/3109264 3363099.849998866: jmp 436f50 runtime.park_m+0xf0 => 436f65 runtime.park_m+0x105 +3109264/3109264 3363099.849998866: jcc 436f67 runtime.park_m+0x107 => 436fa8 runtime.park_m+0x148 +3109264/3109264 3363099.849998866: call 436fa8 runtime.park_m+0x148 => 436820 runtime.schedule+0x0 +3109264/3109264 3363099.850068993: call 436872 runtime.schedule+0x52 => 435140 runtime.execute+0x0 +3109264/3109264 3363099.850069069: jmp 435173 runtime.execute+0x33 => 435185 runtime.execute+0x45 +3109264/3109264 3363099.850069069: jmp 4351a1 runtime.execute+0x61 => 4351ac runtime.execute+0x6c +3109264/3109264 3363099.850069069: call 4351c0 runtime.execute+0x80 => 432360 runtime.casgstatus+0x0 +3109264/3109264 3363099.850069072: jcc 43238a runtime.casgstatus+0x2a => 4323ce runtime.casgstatus+0x6e +3109264/3109264 3363099.850069072: jmp 4323d2 runtime.casgstatus+0x72 => 4323ea runtime.casgstatus+0x8a +3109264/3109264 3363099.850069114: jcc 432403 runtime.casgstatus+0xa3 => 4324aa runtime.casgstatus+0x14a +3109264/3109264 3363099.850069114: jcc 4324ad runtime.casgstatus+0x14d => 4324d1 runtime.casgstatus+0x171 +3109264/3109264 3363099.850069114: jcc 4324d8 runtime.casgstatus+0x178 => 432622 runtime.casgstatus+0x2c2 +3109264/3109264 3363099.850069114: return 43262b runtime.casgstatus+0x2cb => 4351c5 runtime.execute+0x85 +3109264/3109264 3363099.850069114: jcc 4351f2 runtime.execute+0xb2 => 4351fb runtime.execute+0xbb +3109264/3109264 3363099.850069114: jcc 435226 runtime.execute+0xe6 => 435232 runtime.execute+0xf2 +3109264/3109264 3363099.850069116: jcc 435239 runtime.execute+0xf9 => 435261 runtime.execute+0x121 +3109264/3109264 3363099.850069116: call 435269 runtime.execute+0x129 => 455240 runtime.gogo.abi0+0x0 +3109264/3109264 3363099.850069116: jmp 45524c runtime.gogo.abi0+0xc => 454460 gogo+0x0 +3109264/3109264 3363099.850069118: jmp 45449e gogo+0x3e => 4308b6 runtime.gopark+0xd6 +3109264/3109264 3363099.850069130: return 4308c0 runtime.gopark+0xe0 => 404dec runtime.chanrecv+0x56c +3109264/3109264 3363099.850069155: jmp 404e21 runtime.chanrecv+0x5a1 => 404e2f runtime.chanrecv+0x5af +3109264/3109264 3363099.850069200: jcc 404e3d runtime.chanrecv+0x5bd => 404e5e runtime.chanrecv+0x5de +3109264/3109264 3363099.850069207: jmp 404e82 runtime.chanrecv+0x602 => 404e9c runtime.chanrecv+0x61c +3109264/3109264 3363099.850069207: call 404ea0 runtime.chanrecv+0x620 => 430ce0 runtime.releaseSudog+0x0 +3109264/3109264 3363099.850069336: jcc 430d87 runtime.releaseSudog+0xa7 => 430d92 runtime.releaseSudog+0xb2 +3109264/3109264 3363099.850069382: jcc 430dae runtime.releaseSudog+0xce => 430df9 runtime.releaseSudog+0x119 +3109264/3109264 3363099.850069390: jmp 430e15 runtime.releaseSudog+0x135 => 430e1c runtime.releaseSudog+0x13c +3109264/3109264 3363099.850069390: jcc 430e3b runtime.releaseSudog+0x15b => 430e45 runtime.releaseSudog+0x165 +3109264/3109264 3363099.850069390: return 430e4e runtime.releaseSudog+0x16e => 404ea5 runtime.chanrecv+0x625 +3109264/3109264 3363099.850069411: return 404ebe runtime.chanrecv+0x63e => 404858 runtime.chanrecv1+0x18 +3109264/3109264 3363099.850069422: return 404861 runtime.chanrecv1+0x21 => 413f76 runtime.gcenable+0xb6