Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Print task backtraces before timeout #185

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/ReTestItems.jl
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ function _runtests_in_current_env(
run_number = 1
max_runs = 1 + max(retries, testitem.retries)
while run_number ≤ max_runs
res = runtestitem(testitem, ctx; test_end_expr, verbose_results, logs)
res = runtestitem(testitem, ctx; test_end_expr, verbose_results, logs, timeout=something(testitem.timeout, testitem_timeout))
ts = res.testset
print_errors_and_captured_logs(testitem, run_number; logs)
report_empty_testsets(testitem, ts)
Expand Down Expand Up @@ -414,7 +414,7 @@ end
# The provided `worker_num` is only for logging purposes, and not persisted as part of the worker.
function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems; worker_num=nothing)
w = Worker(; threads="$nworker_threads")
i = worker_num == nothing ? "" : " $worker_num"
i = worker_num === nothing ? "" : " $worker_num"
# remote_fetch here because we want to make sure the worker is all setup before starting to eval testitems
remote_fetch(w, quote
using ReTestItems, Test
Expand Down Expand Up @@ -518,7 +518,7 @@ function manage_worker(
end
testitem.workerid[] = worker.pid
timeout = something(testitem.timeout, default_timeout)
fut = remote_eval(worker, :(ReTestItems.runtestitem($testitem, GLOBAL_TEST_CONTEXT; test_end_expr=$(QuoteNode(test_end_expr)), verbose_results=$verbose_results, logs=$(QuoteNode(logs)))))
fut = remote_eval(worker, :(ReTestItems.runtestitem($testitem, GLOBAL_TEST_CONTEXT; test_end_expr=$(QuoteNode(test_end_expr)), verbose_results=$verbose_results, logs=$(QuoteNode(logs)), timeout=$timeout)))
max_runs = 1 + max(retries, testitem.retries)
try
timer = Timer(timeout) do tm
Expand Down Expand Up @@ -919,11 +919,22 @@ end
# when `runtestitem` called directly or `@testitem` called outside of `runtests`.
function runtestitem(
ti::TestItem, ctx::TestContext;
test_end_expr::Union{Nothing,Expr}=nothing, logs::Symbol=:eager, verbose_results::Bool=true, finish_test::Bool=true,
test_end_expr::Union{Nothing,Expr}=nothing, logs::Symbol=:eager, verbose_results::Bool=true, finish_test::Bool=true, timeout::Int=something(ti.timeout, DEFAULT_TESTITEM_TIMEOUT)
)
if should_skip(ti)::Bool
return skiptestitem(ti, ctx; verbose_results)
end

# TODO: How to control this? Should we reuse `timeout_profile_wait`?
timer = Timer(timeout - clamp(0.2 * timeout, 0.25, 5.0)) do _
if logs !== :eager
@loglock _redirect_logs(debugpath(ti)) do
ccall(:jl_print_task_backtraces, Cvoid, ())
end
else
ccall(:jl_print_task_backtraces, Cvoid, ())
end
end
if test_end_expr === nothing
has_test_end_expr = false
test_end_expr = Expr(:block)
Expand Down Expand Up @@ -1012,6 +1023,7 @@ function runtestitem(
(Test.Base).current_exceptions(),
LineNumberNode(ti.line, ti.file)))
finally
close(timer)
# Make sure all test setup logs are commited to file
foreach(ts->isassigned(ts.logstore) && flush(ts.logstore[]), ti.testsetups)
ts1 = Test.pop_testset()
Expand Down
19 changes: 15 additions & 4 deletions src/log_capture.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ function logfile_name(ts::TestSetup)
end
logpath(ti::TestItem, i=nothing) = joinpath(RETESTITEMS_TEMP_FOLDER[], logfile_name(ti, i))
logpath(ts::TestSetup) = joinpath(RETESTITEMS_TEMP_FOLDER[], logfile_name(ts))
debugpath(ti::TestItem, i=nothing) = logpath(ti, i) * ".task_backtraces"

"""
_redirect_logs(f, target::Union{IO,String})
Expand Down Expand Up @@ -139,6 +140,7 @@ _has_logs(ts::TestSetup) = filesize(logpath(ts)) > 0
# The path might not exist if a testsetup always throws an error and we don't get to actually
# evaluate the test item.
_has_logs(ti::TestItem, i=nothing) = (path = logpath(ti, i); (isfile(path) && filesize(path) > 0))
_has_debug(ti::TestItem, i=nothing) = (path = debugpath(ti, i); isfile(path) && filesize(path) > 0)
# Stats to help diagnose OOM issues.
_mem_watermark() = string(
# Tracks the peak memory usage of a process / worker
Expand Down Expand Up @@ -172,7 +174,7 @@ function print_errors_and_captured_logs(
)
ts = ti.testsets[run_number]
has_errors = any_non_pass(ts)
has_logs = _has_logs(ti, run_number) || any(_has_logs, ti.testsetups)
has_logs = _has_logs(ti, run_number) || any(_has_logs, ti.testsetups) || _has_debug(ti, run_number)
if has_errors || logs == :batched
report_iob = IOContext(IOBuffer(), :color=>Base.get_have_color())
println(report_iob)
Expand All @@ -193,6 +195,7 @@ function print_errors_and_captured_logs(
end
# If we have errors, keep the tesitem log file for JUnit report.
!has_errors && rm(logpath(ti, run_number), force=true)
!has_errors && rm(debugpath(ti, run_number), force=true) # TODO: move the debug logs to the log file?
return nothing
end

Expand All @@ -217,14 +220,22 @@ function _print_captured_logs(io, ti::TestItem, run_number::Int)
for setup in ti.testsetups
_print_captured_logs(io, setup, ti)
end
has_debug = _has_debug(ti, run_number)
has_logs = _has_logs(ti, run_number)
bold_text = has_logs ? "Captured Logs" : "No Captured Logs"
has_printable = has_logs || has_debug

bold_text = has_printable ? "Captured Logs" : "No Captured Logs"
printstyled(io, bold_text; bold=true, color=Base.info_color())
print(io, " for test item $(repr(ti.name)) at ")
printstyled(io, _file_info(ti); bold=true, color=:default)
println(io, _on_worker(ti))
has_logs && open(logpath(ti, run_number), "r") do logstore
write(io, logstore)
if has_printable
has_logs && open(logpath(ti, run_number), "r") do logstore
write(io, logstore)
end
has_debug && open(debugpath(ti, run_number), "r") do logstore
write(io, logstore)
end
end
return nothing
end
Expand Down
Loading