Skip to content

Commit

Permalink
Add pytest reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
awtkns committed Nov 28, 2023
1 parent f627be9 commit e8959bd
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 5 deletions.
107 changes: 107 additions & 0 deletions bananalyzer/hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Any

import pytest
from _pytest.terminal import TerminalReporter
from tabulate import tabulate


def print_field_data(
terminalreporter: TerminalReporter, results: dict[str, dict[bool, int]]
) -> None:
headers = "Group", "Passed", "Failed", "Correct %", "Perfect"

table_data = []
total_passed = 0
total_failed = 0
perfect_count = 0

for field, values in results.items():
passed = values.get(True, 0)
failed = values.get(False, 0)
total_passed += passed
total_failed += failed
is_perfect = failed == 0
perfect_count += int(is_perfect)
percentage = (
f"{passed / (passed + failed) * 100:.2f}%"
if (passed + failed) > 0
else "N/A"
)
perfect = is_perfect and "✅" or "❌"
table_data.append([field, passed, failed, percentage, perfect])

# Calculate the percentage for the total row
total_percentage = (
f"{total_passed / (total_passed + total_failed) * 100:.2f}%"
if (total_passed + total_failed) > 0
else "N/A"
)
perfect_percentage = (
f"{perfect_count / len(results) * 100:.2f}%" if results else "N/A"
)

# Add a totals row
table_data.append(["-" * len(header) for header in headers])
table_data.append(
["Total", total_passed, total_failed, total_percentage, perfect_percentage]
)

# Create a table using tabulate
table = tabulate(table_data, headers=headers, tablefmt="psql")

# Print the table
terminalreporter.write_line(table)
terminalreporter.write_line("\n")


class BananalyzerPytestPlugin:
@pytest.hookimpl(trylast=True)
def pytest_terminal_summary(
self, terminalreporter: TerminalReporter, *args: Any, **kwargs: Any
) -> None:
terminalreporter.section("Bananalyzer Results")

results: dict[str, dict[str, dict[bool, int]]] = {}
for test_result in (
terminalreporter.stats.get("passed", [])
+ terminalreporter.stats.get("failed", [])
+ terminalreporter.stats.get("error", [])
):
for key, value in test_result.user_properties:
result_property = results.setdefault(key, {})
result_property_value = result_property.setdefault(value, {})
result_property_value[test_result.passed] = (
result_property_value.get(test_result.passed, 0) + 1
)

total_passed = len(terminalreporter.stats.get("passed", []))
total_failed = len(terminalreporter.stats.get("failed", [])) + len(
terminalreporter.stats.get("error", [])
)
total_tests = total_passed + total_failed

if "field" in results:
terminalreporter.write_line("Field Results:")
print_field_data(terminalreporter, results["field"])

if "class" in results:
terminalreporter.write_line("Class Results:")
print_field_data(terminalreporter, results["class"])

table_data = {
"Total Tests": total_tests,
"Tests Passed": total_passed,
"Tests Failed": total_failed,
"Percent Passed": f"{total_passed / total_tests * 100:.2f}%",
}

terminalreporter.write_line("Summary:")
terminalreporter.write_line(tabulate(table_data.items(), tablefmt="psql"))

@pytest.fixture(autouse=True)
def add_user_properties(self, record_property, request) -> None: # type: ignore
for key, value in {
"field": request.node.callspec.params.get("key", ""),
"class": request.cls.__name__,
}.items():
record_property(key, value)
2 changes: 1 addition & 1 deletion bananalyzer/runner/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def is_string_similar(actual: str, expected: str, tolerance: int = 2) -> bool:
return SequenceMatcher(None, actual, expected).ratio() >= 0.8


def native_count_differences(actual: str, expected: str):
def native_count_differences(actual: str, expected: str) -> int:
non_alnum_actual = "".join(char for char in actual if not char.isalnum())
non_alnum_expected = "".join(char for char in expected if not char.isalnum())
# Compare the sequence of non-alphanumeric characters with a tolerance for
Expand Down
3 changes: 2 additions & 1 deletion bananalyzer/runner/null_agent_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
from random import random

from playwright.async_api import Page

Expand All @@ -18,7 +19,7 @@ async def run(
) -> AgentResult:
print(f"Testing {example.get_static_url()}")
await page.goto(example.get_static_url())
await asyncio.sleep(0.5)
await asyncio.sleep(0.2)

print(f"Done testing {example.get_static_url()}")

Expand Down
3 changes: 2 additions & 1 deletion bananalyzer/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pydantic import BaseModel

from bananalyzer.data.schemas import Example
from bananalyzer.hooks import BananalyzerPytestPlugin
from bananalyzer.schema import AgentRunnerClass, PytestArgs

TestType = Callable[[], Awaitable[None]]
Expand Down Expand Up @@ -132,4 +133,4 @@ def run_tests(
+ [f"--html={str(report_path)}"]
)

return pytest.main(args)
return pytest.main(args, plugins=[(BananalyzerPytestPlugin())])
27 changes: 26 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bananalyzer"
version = "0.5.8"
version = "0.6.0"
description = "Open source AI Agent evaluation framework for web tasks 🐒🍌"
authors = ["asim-shrestha <[email protected]>"]
readme = "README.md"
Expand All @@ -17,6 +17,7 @@ deepdiff = "^6.7.0"
pytest-xdist = "^3.4.0"
black = { extras = ["jupyter"], version = "^23.11.0" }
pytest-html = "^4.1.1"
tabulate = "^0.9.0"

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
Expand All @@ -30,6 +31,7 @@ black = { extras = ["jupyter"], version = "^23.11.0" }
types-requests = "^2.31.0.10"
pytest-asyncio = "^0.21.1"
isort = "^5.12.0"
types-tabulate = "^0.9.0.3"

[tool.isort]
profile = "black"
Expand Down

0 comments on commit e8959bd

Please sign in to comment.