Skip to content

Commit

Permalink
Merge branch 'augustin/11-13-live-tests_handle_multiple_connections' …
Browse files Browse the repository at this point in the history
…of github.com:airbytehq/airbyte into augustin/11-13-live-tests_handle_multiple_connections
  • Loading branch information
alafanechere committed Dec 16, 2024
2 parents c5b319a + 5ed7615 commit dfd6b07
Show file tree
Hide file tree
Showing 9 changed files with 257 additions and 122 deletions.
8 changes: 4 additions & 4 deletions airbyte-ci/connectors/live-tests/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions airbyte-ci/connectors/live-tests/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ pytest = "^8.1.1"
pydash = "~=7.0.7"
docker = ">=6,<7"
asyncclick = "^8.1.7.1"
# Pinning until we can manage multiple connections returned by the new connection-retriever version
connection-retriever = {git = "[email protected]:airbytehq/airbyte-platform-internal", subdirectory = "tools/connection-retriever", rev = "f7359106b28e5197e45b3c8524c4f72a314805a2"}
connection-retriever = {git = "[email protected]:airbytehq/airbyte-platform-internal", subdirectory = "tools/connection-retriever"}
duckdb = "<=0.10.1" # Pinned due to this issue https://github.com/duckdb/duckdb/issues/11152
pandas = "^2.2.1"
pytest-sugar = "^1.0.0"
Expand Down

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions airbyte-ci/connectors/live-tests/src/live_tests/commons/hacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

import copy

import rich

console = rich.get_console()


def patch_configured_catalog(configured_catalog: dict) -> dict:
"""
The configured catalog extracted from the platform can be incompatible with the airbyte-protocol.
This leads to validation error when we serialize the configured catalog into a ConfiguredAirbyteCatalog object.
This functions is a best effort to patch the configured catalog to make it compatible with the airbyte-protocol.
"""
patched_catalog = copy.deepcopy(configured_catalog)
for stream in patched_catalog["streams"]:
if stream.get("destination_sync_mode") == "overwrite_dedup":
stream["destination_sync_mode"] = "overwrite"
console.log(
f"Stream {stream['stream']['name']} destination_sync_mode has been patched from 'overwrite_dedup' to 'overwrite' to guarantee compatibility with the airbyte-protocol."
)
return patched_catalog
44 changes: 28 additions & 16 deletions airbyte-ci/connectors/live-tests/src/live_tests/commons/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,38 @@ def dump_cache_volume(self) -> dagger.CacheVolume:
def mitmproxy_dir_cache(self) -> dagger.CacheVolume:
return self.dagger_client.cache_volume(self.MITMPROXY_IMAGE)

async def get_container(
self,
) -> dagger.Container:
def generate_mitmconfig(self):
return (
self.dagger_client.container()
.from_(self.MITMPROXY_IMAGE)
# Mitmproxy generates its self signed certs at first run, we need to run it once to generate the certs
# They are stored in /root/.mitmproxy
.with_exec(["timeout", "--preserve-status", "1", "mitmdump"])
.directory("/root/.mitmproxy")
)

async def get_container(self, mitm_config: dagger.Directory) -> dagger.Container:
"""Get a container for the mitmproxy service.
If a stream for server replay is provided, it will be used to replay requests to the same URL.
Returns:
dagger.Container: The container for the mitmproxy service.
mitm_config (dagger.Directory): The directory containing the mitmproxy configuration.
"""
container_addons_path = "/addons.py"
proxy_container = (
self.dagger_client.container()
.from_(self.MITMPROXY_IMAGE)
.with_exec(["mkdir", "-p", "/home/mitmproxy/.mitmproxy"])
.with_exec(["mkdir", "/dumps"])
# This is caching the mitmproxy stream files, which can contain sensitive information
# We want to nuke this cache after test suite execution.
.with_mounted_cache("/dumps", self.dump_cache_volume)
# This is caching the mitmproxy self-signed certificate, no sensitive information is stored in it
.with_mounted_cache("/home/mitmproxy/.mitmproxy", self.mitmproxy_dir_cache)
.with_file(
container_addons_path,
self.dagger_client.host().file(self.MITM_ADDONS_PATH),
)
.with_directory("/root/.mitmproxy", mitm_config)
)

# If the proxy was instantiated with a stream for server replay from a previous run, we want to use it.
Expand Down Expand Up @@ -99,8 +108,8 @@ async def get_container(

return proxy_container.with_exec(command)

async def get_service(self) -> dagger.Service:
return (await self.get_container()).with_exposed_port(self.PROXY_PORT).as_service()
async def get_service(self, mitm_config: dagger.Directory) -> dagger.Service:
return (await self.get_container(mitm_config)).with_exposed_port(self.PROXY_PORT).as_service()

async def bind_container(self, container: dagger.Container) -> dagger.Container:
"""Bind a container to the proxy service and set environment variables to use the proxy for HTTP(S) traffic.
Expand All @@ -111,24 +120,27 @@ async def bind_container(self, container: dagger.Container) -> dagger.Container:
Returns:
dagger.Container: The container with the proxy service bound and environment variables set.
"""
cert_path_in_volume = "/mitmproxy_dir/mitmproxy-ca.pem"
ca_certificate_path = "/usr/local/share/ca-certificates/mitmproxy.crt"
mitmconfig_dir = self.generate_mitmconfig()
pem = mitmconfig_dir.file("mitmproxy-ca.pem")

# Find the python version in the container to get the correct path for the requests cert file
# We will overwrite this file with the mitmproxy self-signed certificate
# I could not find a less brutal way to make Requests trust the mitmproxy self-signed certificate
# I tried running update-ca-certificates + setting REQUESTS_CA_BUNDLE in the container but it did not work
python_version_output = (await container.with_exec(["python", "--version"]).stdout()).strip()
python_version = python_version_output.split(" ")[-1]
python_version_minor_only = ".".join(python_version.split(".")[:-1])
requests_cert_path = f"/usr/local/lib/python{python_version_minor_only}/site-packages/certifi/cacert.pem"
current_user = (await container.with_exec(["whoami"]).stdout()).strip()
try:
return await (
container.with_service_binding(self.hostname, await self.get_service())
.with_mounted_cache("/mitmproxy_dir", self.mitmproxy_dir_cache)
.with_exec(["cp", cert_path_in_volume, requests_cert_path])
.with_exec(["cp", cert_path_in_volume, ca_certificate_path])
# The following command make the container use the proxy for all outgoing HTTP requests
.with_env_variable("REQUESTS_CA_BUNDLE", requests_cert_path)
.with_exec(["update-ca-certificates"])
container.with_user("root")
# Overwrite the requests cert file with the mitmproxy self-signed certificate
.with_file(requests_cert_path, pem, owner=current_user)
.with_env_variable("http_proxy", f"{self.hostname}:{self.PROXY_PORT}")
.with_env_variable("https_proxy", f"{self.hostname}:{self.PROXY_PORT}")
.with_user(current_user)
.with_service_binding(self.hostname, await self.get_service(mitmconfig_dir))
)
except dagger.DaggerError as e:
# This is likely hapenning on Java connector images whose certificates location is different
Expand Down
35 changes: 28 additions & 7 deletions airbyte-ci/connectors/live-tests/src/live_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pytest
from airbyte_protocol.models import AirbyteCatalog, AirbyteStateMessage, ConfiguredAirbyteCatalog, ConnectorSpecification # type: ignore
from connection_retriever.audit_logging import get_user_email # type: ignore
from connection_retriever.retrieval import ConnectionNotFoundError, NotPermittedError, get_current_docker_image_tag # type: ignore
from connection_retriever.retrieval import ConnectionNotFoundError, get_current_docker_image_tag # type: ignore
from live_tests import stash_keys
from live_tests.commons.connection_objects_retrieval import ConnectionObject, InvalidConnectionError, get_connection_objects
from live_tests.commons.connector_runner import ConnectorRunner, Proxy
Expand Down Expand Up @@ -99,6 +99,11 @@ def pytest_addoption(parser: Parser) -> None:
default=ConnectionSubset.SANDBOXES.value,
help="Whether to select from sandbox accounts only.",
)
parser.addoption(
"--max-connections",
default=None,
help="The maximum number of connections to retrieve and use for testing.",
)


def pytest_configure(config: Config) -> None:
Expand Down Expand Up @@ -130,6 +135,7 @@ def pytest_configure(config: Config) -> None:
private_details_path = test_artifacts_directory / "private_details.html"
config.stash[stash_keys.TEST_ARTIFACT_DIRECTORY] = test_artifacts_directory
dagger_log_path.touch()
LOGGER.info("Dagger log path: %s", dagger_log_path)
config.stash[stash_keys.DAGGER_LOG_PATH] = dagger_log_path
config.stash[stash_keys.PR_URL] = get_option_or_fail(config, "--pr-url")
_connection_id = config.getoption("--connection-id")
Expand All @@ -143,6 +149,10 @@ def pytest_configure(config: Config) -> None:
custom_state_path = config.getoption("--state-path")
config.stash[stash_keys.SELECTED_STREAMS] = set(config.getoption("--stream") or [])
config.stash[stash_keys.TEST_EVALUATION_MODE] = TestEvaluationMode(config.getoption("--test-evaluation-mode", "strict"))
config.stash[stash_keys.MAX_CONNECTIONS] = config.getoption("--max-connections")
config.stash[stash_keys.MAX_CONNECTIONS] = (
int(config.stash[stash_keys.MAX_CONNECTIONS]) if config.stash[stash_keys.MAX_CONNECTIONS] else None
)

if config.stash[stash_keys.RUN_IN_AIRBYTE_CI]:
config.stash[stash_keys.SHOULD_READ_WITH_STATE] = bool(config.getoption("--should-read-with-state"))
Expand Down Expand Up @@ -170,12 +180,12 @@ def pytest_configure(config: Config) -> None:
Path(custom_configured_catalog_path) if custom_configured_catalog_path else None,
Path(custom_state_path) if custom_state_path else None,
retrieval_reason,
fail_if_missing_objects=False,
connector_image=config.stash[stash_keys.CONNECTOR_IMAGE],
connector_version=config.stash[stash_keys.CONTROL_VERSION],
auto_select_connections=config.stash[stash_keys.AUTO_SELECT_CONNECTION],
selected_streams=config.stash[stash_keys.SELECTED_STREAMS],
connection_subset=config.stash[stash_keys.CONNECTION_SUBSET],
max_connections=config.stash[stash_keys.MAX_CONNECTIONS],
)
config.stash[stash_keys.IS_PERMITTED_BOOL] = True
except (ConnectionNotFoundError, InvalidConnectionError) as exc:
Expand All @@ -188,15 +198,17 @@ def pytest_configure(config: Config) -> None:
if config.stash[stash_keys.CONTROL_VERSION] == config.stash[stash_keys.TARGET_VERSION]:
pytest.exit(f"Control and target versions are the same: {control_version}. Please provide different versions.")

config.stash[stash_keys.TEST_REPORT] = TestReport(
report_path,
config.stash[stash_keys.PRIVATE_DETAILS_REPORT] = PrivateDetailsReport(
private_details_path,
config,
)

config.stash[stash_keys.PRIVATE_DETAILS_REPORT] = PrivateDetailsReport(
private_details_path,
config.stash[stash_keys.TEST_REPORT] = TestReport(
report_path,
config,
private_details_url=config.stash[stash_keys.PRIVATE_DETAILS_REPORT].path.resolve().as_uri(),
)

webbrowser.open_new_tab(config.stash[stash_keys.TEST_REPORT].path.resolve().as_uri())


Expand All @@ -215,6 +227,7 @@ def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item

def pytest_terminal_summary(terminalreporter: SugarTerminalReporter, exitstatus: int, config: Config) -> None:
config.stash[stash_keys.TEST_REPORT].update(ReportState.FINISHED)
config.stash[stash_keys.PRIVATE_DETAILS_REPORT].update(ReportState.FINISHED)
if not config.stash.get(stash_keys.IS_PERMITTED_BOOL, False):
# Don't display the prompt if the tests were not run due to inability to fetch config
clean_up_artifacts(MAIN_OUTPUT_DIRECTORY, LOGGER)
Expand Down Expand Up @@ -495,6 +508,7 @@ async def run_command_and_add_to_report(
duckdb_path: Path,
runs_in_ci,
test_report: TestReport,
private_details_report: PrivateDetailsReport,
) -> ExecutionResult:
"""Run the given command for the given connector and connection objects and add the results to the test report."""
execution_result, proxy = await run_command(
Expand All @@ -506,7 +520,12 @@ async def run_command_and_add_to_report(
duckdb_path,
runs_in_ci,
)
test_report.add_control_execution_result(execution_result)
if connector.target_or_control is TargetOrControl.CONTROL:
test_report.add_control_execution_result(execution_result)
private_details_report.add_control_execution_result(execution_result)
if connector.target_or_control is TargetOrControl.TARGET:
test_report.add_target_execution_result(execution_result)
private_details_report.add_target_execution_result(execution_result)
return execution_result, proxy


Expand Down Expand Up @@ -539,6 +558,7 @@ async def generated_fixture(
request.config.stash[stash_keys.DUCKDB_PATH],
request.config.stash[stash_keys.RUN_IN_AIRBYTE_CI],
request.config.stash[stash_keys.TEST_REPORT],
request.config.stash[stash_keys.PRIVATE_DETAILS_REPORT],
)

yield execution_results
Expand All @@ -561,6 +581,7 @@ async def generated_fixture(
request.config.stash[stash_keys.DUCKDB_PATH],
request.config.stash[stash_keys.RUN_IN_AIRBYTE_CI],
request.config.stash[stash_keys.TEST_REPORT],
request.config.stash[stash_keys.PRIVATE_DETAILS_REPORT],
)

yield execution_results
Expand Down
22 changes: 9 additions & 13 deletions airbyte-ci/connectors/live-tests/src/live_tests/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ def __init__(self, path: Path, pytest_config: Config) -> None:
self.path = path
self.pytest_config = pytest_config
self.created_at = datetime.datetime.utcnow()
self.updated_at = self.created_at

self.control_execution_results_per_command: dict[Command, List[ExecutionResult]] = {command: [] for command in Command}
self.target_execution_results_per_command: dict[Command, List[ExecutionResult]] = {command: [] for command in Command}
self._state = ReportState.INITIALIZING
self.update(ReportState.INITIALIZING)

@abstractmethod
def render(self) -> None:
Expand Down Expand Up @@ -72,9 +74,8 @@ class PrivateDetailsReport(BaseReport):
SPEC_SECRET_MASK_URL = "https://connectors.airbyte.com/files/registries/v0/specs_secrets_mask.yaml"

def __init__(self, path: Path, pytest_config: Config) -> None:
super().__init__(path, pytest_config)
self.secret_properties = self.get_secret_properties()
self.render()
super().__init__(path, pytest_config)

def get_secret_properties(self) -> list:
response = requests.get(self.SPEC_SECRET_MASK_URL)
Expand Down Expand Up @@ -162,16 +163,10 @@ def get_requested_urls_per_command(
class TestReport(BaseReport):
TEMPLATE_NAME = "report.html.j2"

def __init__(self, path: Path, pytest_config: Config) -> None:
super().__init__(path, pytest_config)
self.updated_at = self.created_at
def __init__(self, path: Path, pytest_config: Config, private_details_url: Optional[str] = None) -> None:
self.private_details_url = private_details_url
self.test_results: list[dict[str, Any]] = []
self.update(ReportState.INITIALIZING)

def update(self, state: ReportState = ReportState.RUNNING) -> None:
self._state = state
self.updated_at = datetime.datetime.utcnow()
self.render()
super().__init__(path, pytest_config)

def add_test_result(self, test_report: pytest.TestReport, test_documentation: Optional[str] = None) -> None:
cut_properties: list[tuple[str, str]] = []
Expand Down Expand Up @@ -219,6 +214,7 @@ def render(self) -> None:
record_count_per_command_and_stream=self.get_record_count_per_stream(),
test_results=self.test_results,
max_lines=MAX_LINES_IN_REPORT,
private_details_url=self.private_details_url,
)
self.path.write_text(rendered)

Expand All @@ -242,7 +238,7 @@ def get_stream_coverage_metrics(self) -> dict[str, str]:
return {
"Available in catalog": str(catalog_stream_count),
"In use (in configured catalog)": str(configured_catalog_stream_count),
"Coverage": f"{coverage:.2f}%",
"Coverage": f"{coverage * 100:.2f}%",
}

def get_record_count_per_stream(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@
USER = pytest.StashKey[str]()
WORKSPACE_ID = pytest.StashKey[str]()
TEST_EVALUATION_MODE = pytest.StashKey[TestEvaluationMode]
MAX_CONNECTIONS = pytest.StashKey[int | None]()
Loading

0 comments on commit dfd6b07

Please sign in to comment.