diff --git a/CHANGELOG.md b/CHANGELOG.md index 6777b005..22fd6b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,14 @@ and this project adheres to [Semantic Versioning][semver]. ### Added +- Check if metadata files at revision match those downloaded by TUF updater ([389]) + ### Changed ### Fixed +[389]: https://github.com/openlawlibrary/taf/pull/389 + ## [0.29.1] - 02/07/2024 ### Added diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py index 3f855ac0..f2a4a140 100644 --- a/taf/tests/test_updater/test_repo_update/test_updater.py +++ b/taf/tests/test_updater/test_repo_update/test_updater.py @@ -85,6 +85,7 @@ TARGET_MISSING_COMMIT_PATTERN = r"Update of organization/auth_repo failed due to error: Failure to validate organization/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)" NOT_CLEAN_PATTERN = r"^Update of ([\w/]+) failed due to error: Repository ([\w/-]+) should contain only committed changes\." INVALID_KEYS_PATTERN = r"^Update of organization/auth_repo failed due to error: Validation of authentication repository organization/auth_repo failed at revision ([0-9a-f]{40}) due to error: ([\w/-]+) was signed by (\d+)/(\d+) keys$" +INVALID_METADATA_PATTERN = r"^Update of organization/auth_repo failed due to error: Validation of authentication repository organization/auth_repo failed at revision ([0-9a-f]{40}) due to error: Invalid metadata file ([\w/]+\.\w+)$" NO_REPOSITORY_INFO_JSON = "Update of repository failed due to error: Error during info.json parse. If the authentication repository's path is not specified, info.json metadata is expected to be in targets/protected" @@ -126,8 +127,6 @@ def run_around_tests(client_dir): ("test-updater-multiple-branches", UpdateType.OFFICIAL, True), ("test-updater-delegated-roles", UpdateType.OFFICIAL, True), ("test-updater-updated-root", UpdateType.OFFICIAL, True), - ("test-updater-updated-root-old-snapshot", UpdateType.OFFICIAL, True), - ("test-updater-updated-root-version-skipped", UpdateType.OFFICIAL, True), ("test-updater-updated-root-version-skipped", UpdateType.OFFICIAL, True), ("test-updater-expired-metadata", UpdateType.OFFICIAL, True), ], @@ -272,6 +271,13 @@ def test_no_update_necessary( True, True, ), + ( + "test-updater-updated-root-old-snapshot", + INVALID_METADATA_PATTERN, + True, + False, + False, + ), ( "test-updater-missing-target-commit", TARGET_ADDITIONAL_COMMIT_PATTERN, diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py index 0bc542eb..0bd04700 100644 --- a/taf/updater/handlers.py +++ b/taf/updater/handlers.py @@ -38,7 +38,7 @@ class GitUpdater(FetcherInterface): - A commit is considered to be a TUF Updater instance. We keep track of the current commit. - This class is designed in such a way that for each subsequent call of the updater's refresh method the metadata from next commit is used within TUF's updater. - - The updater's method for downloading and retrieving current metadata is overriden + - The updater's method for downloading and retrieving current metadata is overridden by our own '_fetch' call. We override TUF's FetcherInterface abstract class to fetch metadata from local git revisions, instead of by downloading data from another protocol, like http/https. So, what we want to do is to return the current commit, @@ -243,11 +243,24 @@ def get_current_targets(self): except GitError: return [] + def get_current_metadata(self): + try: + return self.validation_auth_repo.list_files_at_revision( + self.current_commit, "metadata" + ) + except GitError: + return [] + def get_current_target_data(self, filepath, raw=False): return self.validation_auth_repo.get_file( self.current_commit, f"targets/{filepath}", raw=raw ) + def get_current_metadata_data(self, filepath, raw=False): + return self.validation_auth_repo.get_file( + self.current_commit, f"metadata/{filepath}", raw=raw + ) + @revert_tuf_patch_on_last_commit # type: ignore def update_done(self): """Used to indicate whether updater has finished with update. diff --git a/taf/updater/updater.py b/taf/updater/updater.py index 46b058b5..ff93f871 100644 --- a/taf/updater/updater.py +++ b/taf/updater/updater.py @@ -644,8 +644,8 @@ def validate_repository( ) settings.overwrite_last_validated_commit = True settings.last_validated_commit = validate_from_commit + auth_repo_name = None try: - auth_repo_name, error = _update_named_repository( operation=OperationType.UPDATE, url=str(auth_path), @@ -661,7 +661,7 @@ def validate_repository( raise error except Exception as e: raise ValidationFailedError( - f"Validation or repository {auth_repo_name} failed due to error: {e}" + f"Validation or repository {auth_repo_name or ''} failed due to error: {e}" ) settings.overwrite_last_validated_commit = False settings.last_validated_commit = None diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py index 3ec6e594..7f5c2991 100644 --- a/taf/updater/updater_pipeline.py +++ b/taf/updater/updater_pipeline.py @@ -3,6 +3,7 @@ import functools from logging import DEBUG, INFO from pathlib import Path +import re import shutil import tempfile from typing import Any, Dict, List, Optional @@ -94,7 +95,8 @@ def wrapper(self, *args, **kwargs): return result finally: if ( - self.state.event == Event.FAILED + not self.only_validate + and self.state.event == Event.FAILED and not self.state.existing_repo and self.state.users_auth_repo is not None ): @@ -1168,68 +1170,27 @@ def _is_unauthenticated_allowed(repository): "Running TUF validation of the authentication repository...", logger=taf_logger, ) -def _run_tuf_updater(git_updater, auth_repo_name): +def _run_tuf_updater(git_fetcher, auth_repo_name): def _init_updater(): try: return Updater( - git_updater.metadata_dir, + git_fetcher.metadata_dir, "metadata/", - git_updater.targets_dir, + git_fetcher.targets_dir, "targets/", - fetcher=git_updater, + fetcher=git_fetcher, ) except Exception as e: taf_logger.error(f"Failed to instantiate TUF Updater due to error: {e}") raise e - def _update_tuf_current_revision(): - current_commit = git_updater.current_commit - try: - updater.refresh() - taf_logger.debug("Validated metadata files at revision {}", current_commit) - # using refresh, we have updated all main roles - # we still need to update the delegated roles (if there are any) - # and validate any target files - current_targets = git_updater.get_current_targets() - for target_path in current_targets: - target_filepath = target_path.replace("\\", "/") - - targetinfo = updater.get_targetinfo(target_filepath) - target_data = git_updater.get_current_target_data( - target_filepath, raw=True - ) - targetinfo.verify_length_and_hashes(target_data) - - taf_logger.debug( - "Successfully validated target file {} at {}", - target_filepath, - current_commit, - ) - return current_commit - except Exception as e: - metadata_expired = EXPIRED_METADATA_ERROR in type( - e - ).__name__ or EXPIRED_METADATA_ERROR in str(e) - if not metadata_expired or settings.strict: - taf_logger.error( - "Validation of authentication repository {} failed at revision {} due to error: {}", - auth_repo_name or "", - current_commit, - e, - ) - raise UpdateFailedError( - f"Validation of authentication repository {auth_repo_name or ''}" - f" failed at revision {current_commit} due to error: {e}" - ) - taf_logger.warning( - f"WARNING: Could not validate authentication repository at revision {current_commit} due to error: {e}" - ) - last_validated_commit = None try: - while not git_updater.update_done(): + while not git_fetcher.update_done(): updater = _init_updater() - current_commit = _update_tuf_current_revision() + current_commit = _update_tuf_current_revision( + git_fetcher, updater, auth_repo_name + ) if current_commit is not None: last_validated_commit = current_commit except UpdateFailedError as e: @@ -1238,6 +1199,86 @@ def _update_tuf_current_revision(): return last_validated_commit, None +def _update_tuf_current_revision(git_fetcher, updater, auth_repo_name): + current_commit = git_fetcher.current_commit + try: + updater.refresh() + taf_logger.debug("Validated metadata files at revision {}", current_commit) + # using refresh, we have updated all main roles + # we still need to update the delegated roles (if there are any) + # and validate any target files + current_targets = git_fetcher.get_current_targets() + for target_path in current_targets: + target_filepath = target_path.replace("\\", "/") + + targetinfo = updater.get_targetinfo(target_filepath) + target_data = git_fetcher.get_current_target_data(target_filepath, raw=True) + targetinfo.verify_length_and_hashes(target_data) + + taf_logger.debug( + "Successfully validated target file {} at {}", + target_filepath, + current_commit, + ) + + _validate_metadata_on_disk(git_fetcher) + return current_commit + except Exception as e: + metadata_expired = EXPIRED_METADATA_ERROR in type( + e + ).__name__ or EXPIRED_METADATA_ERROR in str(e) + if not metadata_expired or settings.strict: + taf_logger.error( + "Validation of authentication repository {} failed at revision {} due to error: {}", + auth_repo_name or "", + current_commit, + e, + ) + raise UpdateFailedError( + f"Validation of authentication repository {auth_repo_name or ''}" + f" failed at revision {current_commit} due to error: {e}" + ) + taf_logger.warning( + f"WARNING: Could not validate authentication repository at revision {current_commit} due to error: {e}" + ) + + +def _validate_metadata_on_disk(git_fetcher): + """ + TUF updater does not always check the validity of all metadata files + if timestamp is not updated, the updater will determine that a new version + of the snapshot file does not need to be downloaded and it will not be validated + during the update process, the metadata files that TUF updater downloads is stored + in a separate folder within the temp directory + For each commit, check if the metadata files inside that directory are the same + as the ones in the auth repository's metadata folder at that revision + """ + consistent_snaphost_pattern = r"\d+\.[^\.\s]+\.\w+" + for metadata_file_name in git_fetcher.get_current_metadata(): + # version (consistent snapshot files) are downloaded to remote + # by the TUF updater, but saved to the main metadata file + # so, 2.root.json is downloaded and saved to root.json + if re.search(consistent_snaphost_pattern, metadata_file_name): + continue + + current_tuf_metadata_file = Path(git_fetcher.metadata_dir, metadata_file_name) + if not current_tuf_metadata_file.is_file(): + # this validation causes an issue with one of the first + # commits of our production repositories and it should + # not be enabled until we specify a later commit of those + # repositories as the initial valid ones + # this error happens when a metadata file is added, but + # snapshot is not updated + # raise UpdateFailedError( + # f"Invalid metadata file {metadata_file_name}" + # ) + continue + metadata_content = git_fetcher.get_current_metadata_data(metadata_file_name) + tuf_metadata_content = current_tuf_metadata_file.read_text() + if metadata_content != tuf_metadata_content: + raise UpdateFailedError(f"Invalid metadata file {metadata_file_name}") + + def _find_next_value(value, values_list): """ Find the next value in the list after the given value.