Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions ci/githubstats/last.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
SELECT
-- Bazel's first_start_time is really the time the last attempt started.
bt.first_start_time AS "last_started_at",
bt.first_start_time,

bt.total_run_duration * INTERVAL '1 second' AS "duration",

Expand All @@ -11,18 +10,18 @@ SELECT
WHEN bt.overall_status = 4 THEN 'FAILED'
END AS "status",

bi.build_id AS "invocation_id",
bi.build_id,

wr.head_branch AS "branch",
wr.head_branch,

CASE
-- This is to fix the weird reality that all master commits have pull_request_number 855
-- and pull_request_url https://api.github.com/repos/bit-cook/ic/pulls/855.
WHEN wr.event_type = 'pull_request' THEN CAST(wr.pull_request_number AS TEXT)
ELSE ''
END AS "PR",
END AS "pull_request_number",

bi.head_sha AS "commit"
bi.head_sha

FROM
workflow_runs AS wr JOIN
Expand Down
195 changes: 143 additions & 52 deletions ci/githubstats/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,67 @@ def normalize_duration(td: pd.Timedelta):
)


def filter_columns(columns_metadata, columns_list):
"""
Filter and reorder columns based on user specification.

Args:
columns_metadata: List of 3-tuples (column_name, header, alignment)
columns_list: List of column names like ["label", "total", "non_success"] or ["-owners", "-timeout"]

Returns:
Filtered/reordered colalignments list of 3-tuples (dataframe_column, header, alignment)

"""
if not columns_list:
# Return colalignments (without user_facing_name)
return [(column_name, header, align) for column_name, header, align in columns_metadata]

# Build mappings
available_columns = {column_name: (column_name, header, align) for column_name, header, align in columns_metadata}

# Check if all columns start with '-' (exclusion mode)
if all(col.startswith("-") for col in columns_list):
# Exclusion mode: start with all columns, remove specified ones
exclude_set = {col[1:] for col in columns_list}

# Validate that excluded columns exist
invalid_cols = exclude_set - set(available_columns.keys())
if invalid_cols:
die(
f"Invalid column names: {', '.join(sorted(invalid_cols))}\n"
f"Available columns: {', '.join(sorted(available_columns.keys()))}"
)

return [
(column_name, header, align)
for column_name, header, align in columns_metadata
if column_name not in exclude_set
]

# Check if any columns start with '-' (mixed mode - not allowed)
if any(col.startswith("-") for col in columns_list):
die(
"Cannot mix inclusion and exclusion modes. Either specify columns to include, or prefix all with '-' to exclude."
)

# Inclusion mode: return only specified columns in specified order
# Validate that all columns exist
invalid_cols = set(columns_list) - set(available_columns.keys())
if invalid_cols:
die(
f"Invalid column names: {', '.join(sorted(invalid_cols))}\n"
f"Available columns: {', '.join(sorted(available_columns.keys()))}"
)

result = [available_columns[col] for col in columns_list if col in available_columns]

if not result:
die("No valid columns to display after filtering.")

return result


def download_and_process_logs(logs_base_dir, test_target: str, download_ic_logs: bool, df: pd.DataFrame):
"""
Download the logs of all runs of test_target in the given DataFrame,
Expand Down Expand Up @@ -251,8 +312,8 @@ def download_and_process_logs(logs_base_dir, test_target: str, download_ic_logs:
row["lock"] = threading.Lock()

buildbuddy_url = row["buildbuddy_url"]
invocation_id = row["invocation_id"]
last_started_at = row["last_started_at"].strftime("%Y-%m-%dT%H:%M:%S")
invocation_id = row["build_id"]
last_started_at = row["first_start_time"].strftime("%Y-%m-%dT%H:%M:%S")
invocation_dir = output_dir / f"{last_started_at}_{invocation_id}"

# Parse the BuildBuddy URL to extract the cluster and its base URL for use with gRPC later.
Expand Down Expand Up @@ -818,24 +879,61 @@ def process_elasticsearch_hits_from_queue(
log_file.close()


# fmt: off
TOP_COLUMNS = [
# (column_name, header, alignment)
("label", "label", "left"),
("total", "total", "decimal"),
("non_success", "non_success", "decimal"),
("flaky", "flaky", "decimal"),
("timeout", "timeout", "decimal"),
("fail", "fail", "decimal"),
("non_success%", "non_success%", "decimal"),
("flaky%", "flaky%", "decimal"),
("timeout%", "timeout%", "decimal"),
("fail%", "fail%", "decimal"),
("impact", "impact", "right"),
("total_duration", "total duration", "right"),
("duration_p90", "duration_p90", "right"),
("owners", "owners", "left"),
]

LAST_COLUMNS = [
# (column_name, header, alignment)
("last_started_at", "last started at (UTC)", "right"),
("duration", "duration", "right"),
("status", "status", "left"),
("branch", "branch", "left"),
("PR", "PR", "left"),
("commit", "commit", "left"),
("buildbuddy", "buildbuddy", "left"),
("errors", "errors per attempt", "left")
]
# fmt: on


def write_log_dir_readme(readme_path: Path, test_target: str, df: pd.DataFrame, timestamp: datetime.timestamp):
"""
Write a nice README.md in the log output directory describing the //ci/githubstats:query invocation
that was used to generate the log output directory. This is useful when the invocation has to be redone or tweaked later.
"""
# fmt: off
colalignments = [
("last started at (UTC)", "right"),
("duration", "right"),
("status", "left"),
("branch", "left"),
("PR", "left"),
("commit", "left"),
("buildbuddy_url", "left"),
# (df_column, header, alignment)
("last_started_at", "last started at (UTC)", "right"),
("duration", "duration", "right"),
("status", "status", "left"),
("head_branch", "branch", "left"),
("pull_request_number", "PR", "left"),
("head_sha", "commit", "left"),
("buildbuddy_url", "buildbuddy", "left"),
]
# fmt: on

cmd = shlex.join(["bazel", "run", "//ci/githubstats:query", "--", *sys.argv[1:]])
columns, alignments = zip(*colalignments)
table_md = tabulate(df[list(columns)], headers="keys", tablefmt="github", colalign=["decimal"] + list(alignments))
columns, headers, alignments = zip(*colalignments)
kwargs = {} if df.empty else {"colalign": ["decimal"] + list(alignments)}
table_md = tabulate(df[list(columns)], headers=list(headers), tablefmt="github", **kwargs)
readme = f"""Logs of `{test_target}`
===
Generated at {timestamp} using:
Expand Down Expand Up @@ -926,26 +1024,11 @@ def top(args):
# Turn the Bazel labels into terminal hyperlinks to a SourceGraph search for the test target:
df["label"] = df["label"].apply(lambda label: terminal_hyperlink(label, sourcegraph_url(label)))

colalignments = [
# (column, alignment)
("label", "left"),
("total", "decimal"),
("non_success", "decimal"),
("flaky", "decimal"),
("timeout", "decimal"),
("fail", "decimal"),
("non_success%", "decimal"),
("flaky%", "decimal"),
("timeout%", "decimal"),
("fail%", "decimal"),
("impact", "right"),
("total_duration", "right"),
("duration_p90", "right"),
("owners", "left"),
]

columns, alignments = zip(*colalignments)
print(tabulate(df[list(columns)], headers="keys", tablefmt=args.tablefmt, colalign=["decimal"] + list(alignments)))
# Apply column filtering if --columns is specified, otherwise use all columns
colalignments = filter_columns(TOP_COLUMNS, args.columns)
columns, headers, alignments = zip(*colalignments)
kwargs = {} if df.empty else {"colalign": ["decimal"] + list(alignments)}
print(tabulate(df[list(columns)], headers=list(headers), tablefmt=args.tablefmt, **kwargs))


def last(args):
Expand Down Expand Up @@ -992,22 +1075,23 @@ def direct_url_to_buildbuddy(invocation_id):
return f"{redirect}?target={args.test_target}" if redirect else url

with ThreadPoolExecutor() as executor:
df["buildbuddy_url"] = list(executor.map(direct_url_to_buildbuddy, df["invocation_id"]))
df["buildbuddy_url"] = list(executor.map(direct_url_to_buildbuddy, df["build_id"]))

df["buildbuddy_links"] = df["buildbuddy_url"].apply(lambda url: terminal_hyperlink("logs", url))
df["buildbuddy"] = df["buildbuddy_url"].apply(lambda url: terminal_hyperlink("logs", url))

# Turn the commit SHAs into terminal hyperlinks to the GitHub commit page
df["commit_link"] = df["commit"].apply(
df["commit"] = df["head_sha"].apply(
lambda commit: terminal_hyperlink(commit[:7], f"https://github.com/{ORG}/{REPO}/commit/{commit}")
)

df["last started at (UTC)"] = df["last_started_at"].apply(lambda t: t.strftime("%a %Y-%m-%d %X"))
# Bazel's first_start_time is really the time the last attempt started.
df["last_started_at"] = df["first_start_time"].apply(lambda t: t.strftime("%a %Y-%m-%d %X"))

df["branch_link"] = df["branch"].apply(
df["branch"] = df["head_branch"].apply(
lambda branch: terminal_hyperlink(shorten(branch, 16), f"https://github.com/{ORG}/{REPO}/tree/{branch}")
)

df["PR_link"] = df["PR"].apply(
df["PR"] = df["pull_request_number"].apply(
lambda pr: terminal_hyperlink(f"#{pr}", f"https://github.com/{ORG}/{REPO}/pull/{pr}") if pr else ""
)

Expand All @@ -1016,30 +1100,33 @@ def direct_url_to_buildbuddy(invocation_id):
if not args.skip_download:
download_and_process_logs(args.logs_base_dir, args.test_target, args.download_ic_logs, df)

colalignments = [
# (column, header, alignment)
("last started at (UTC)", "last started at (UTC)", "right"),
("duration", "duration", "right"),
("status", "status", "left"),
("branch_link", "branch", "left"),
("PR_link", "PR", "left"),
("commit_link", "commit", "left"),
("buildbuddy_links", "buildbuddy", "left"),
] + ([] if args.skip_download else [("errors", "errors per attempt", "left")])
columns_metadata = LAST_COLUMNS
# When downlods are skipped we don't have any error information so skip the "errors" column.
if args.skip_download:
columns_metadata = [col for col in columns_metadata if col[0] != "errors"]

# Apply column filtering if --columns is specified, otherwise use all columns
colalignments = filter_columns(columns_metadata, args.columns)
columns, headers, alignments = zip(*colalignments)
print(
tabulate(
df[list(columns)], headers=list(headers), tablefmt=args.tablefmt, colalign=["decimal"] + list(alignments)
)
)
kwargs = {} if df.empty else {"colalign": ["decimal"] + list(alignments)}
print(tabulate(df[list(columns)], headers=list(headers), tablefmt=args.tablefmt, **kwargs))


# argparse formatter to allow newlines in --help.
class RawDefaultsFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter):
pass


def add_columns_argument(parser, columns_metadata):
parser.add_argument(
"--columns",
metavar="COLS",
type=lambda s: [col.strip() for col in s.split(",")],
help=f"""Comma-separated list of columns to display in order or hide if preceded by '-'. Available columns:
{",".join([column_name.replace("%", "%%") for column_name, _, _ in columns_metadata])}""",
)


def main():
parser = argparse.ArgumentParser(prog="bazel run //ci/githubstats:query --")

Expand Down Expand Up @@ -1174,6 +1261,8 @@ def main():
help="Table format. See: https://pypi.org/project/tabulate/",
)

add_columns_argument(top_parser, TOP_COLUMNS)

## last ###################################################################

last_runs_parser = subparsers.add_parser(
Expand Down Expand Up @@ -1257,6 +1346,8 @@ def main():
help="Table format. See: https://pypi.org/project/tabulate/",
)

add_columns_argument(last_runs_parser, LAST_COLUMNS)

###########################################################################

args = parser.parse_args()
Expand Down
Loading