Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check if metadata files at revision match those downloaded by TUF updater #389

Merged
merged 6 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@ and this project adheres to [Semantic Versioning][semver].

### Added

- Check if metadata files at revision match those downloaded by TUF updater ([389])

### Changed

### Fixed

[389]: https://github.com/openlawlibrary/taf/pull/389

## [0.29.1] - 02/07/2024

### Added
Expand Down
10 changes: 8 additions & 2 deletions taf/tests/test_updater/test_repo_update/test_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
TARGET_MISSING_COMMIT_PATTERN = r"Update of organization/auth_repo failed due to error: Failure to validate organization/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)"
NOT_CLEAN_PATTERN = r"^Update of ([\w/]+) failed due to error: Repository ([\w/-]+) should contain only committed changes\."
INVALID_KEYS_PATTERN = r"^Update of organization/auth_repo failed due to error: Validation of authentication repository organization/auth_repo failed at revision ([0-9a-f]{40}) due to error: ([\w/-]+) was signed by (\d+)/(\d+) keys$"
INVALID_METADATA_PATTERN = r"^Update of organization/auth_repo failed due to error: Validation of authentication repository organization/auth_repo failed at revision ([0-9a-f]{40}) due to error: Invalid metadata file ([\w/]+\.\w+)$"


NO_REPOSITORY_INFO_JSON = "Update of repository failed due to error: Error during info.json parse. If the authentication repository's path is not specified, info.json metadata is expected to be in targets/protected"
Expand Down Expand Up @@ -126,8 +127,6 @@ def run_around_tests(client_dir):
("test-updater-multiple-branches", UpdateType.OFFICIAL, True),
("test-updater-delegated-roles", UpdateType.OFFICIAL, True),
("test-updater-updated-root", UpdateType.OFFICIAL, True),
("test-updater-updated-root-old-snapshot", UpdateType.OFFICIAL, True),
("test-updater-updated-root-version-skipped", UpdateType.OFFICIAL, True),
("test-updater-updated-root-version-skipped", UpdateType.OFFICIAL, True),
("test-updater-expired-metadata", UpdateType.OFFICIAL, True),
],
Expand Down Expand Up @@ -272,6 +271,13 @@ def test_no_update_necessary(
True,
True,
),
(
"test-updater-updated-root-old-snapshot",
INVALID_METADATA_PATTERN,
True,
False,
False,
),
(
"test-updater-missing-target-commit",
TARGET_ADDITIONAL_COMMIT_PATTERN,
Expand Down
15 changes: 14 additions & 1 deletion taf/updater/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class GitUpdater(FetcherInterface):
- A commit is considered to be a TUF Updater instance. We keep track of the current commit.
- This class is designed in such a way that for each subsequent call of the
updater's refresh method the metadata from next commit is used within TUF's updater.
- The updater's method for downloading and retrieving current metadata is overriden
- The updater's method for downloading and retrieving current metadata is overridden
by our own '_fetch' call. We override TUF's FetcherInterface abstract class to fetch
metadata from local git revisions, instead of by downloading data from another protocol,
like http/https. So, what we want to do is to return the current commit,
Expand Down Expand Up @@ -243,11 +243,24 @@ def get_current_targets(self):
except GitError:
return []

def get_current_metadata(self):
try:
return self.validation_auth_repo.list_files_at_revision(
self.current_commit, "metadata"
)
except GitError:
return []

def get_current_target_data(self, filepath, raw=False):
return self.validation_auth_repo.get_file(
self.current_commit, f"targets/{filepath}", raw=raw
)

def get_current_metadata_data(self, filepath, raw=False):
return self.validation_auth_repo.get_file(
self.current_commit, f"metadata/{filepath}", raw=raw
)

@revert_tuf_patch_on_last_commit # type: ignore
def update_done(self):
"""Used to indicate whether updater has finished with update.
Expand Down
4 changes: 2 additions & 2 deletions taf/updater/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,8 @@ def validate_repository(
)
settings.overwrite_last_validated_commit = True
settings.last_validated_commit = validate_from_commit
auth_repo_name = None
try:

auth_repo_name, error = _update_named_repository(
operation=OperationType.UPDATE,
url=str(auth_path),
Expand All @@ -661,7 +661,7 @@ def validate_repository(
raise error
except Exception as e:
raise ValidationFailedError(
f"Validation or repository {auth_repo_name} failed due to error: {e}"
f"Validation or repository {auth_repo_name or ''} failed due to error: {e}"
)
settings.overwrite_last_validated_commit = False
settings.last_validated_commit = None
43 changes: 42 additions & 1 deletion taf/updater/updater_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
from logging import DEBUG, INFO
from pathlib import Path
import re
import shutil
import tempfile
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -94,7 +95,8 @@ def wrapper(self, *args, **kwargs):
return result
finally:
if (
self.state.event == Event.FAILED
not self.only_validate
and self.state.event == Event.FAILED
and not self.state.existing_repo
and self.state.users_auth_repo is not None
):
Expand Down Expand Up @@ -1205,6 +1207,45 @@ def _update_tuf_current_revision():
target_filepath,
current_commit,
)

# TUF updater does not always check the validity of all metadata files
# if timestamp is not updated, the updater will determine that a new version
# of the snapshot file does not need to be downloaded and it will not be validated
# during the update process, the metadata files that TUF updater downloads is stored
# in a separate folder within the temp directory
# For each commit, check if the metadata files inside that directory are the same
# as the ones in the auth repository's metadata folder at that revision
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This explanation looks like a good candidate to be a docstring. Could we move the code to a function?

pattern = r"\d+\.[^\.\s]+\.\w+"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to add explanation as to what this pattern is searching for

for metadata_file_name in git_updater.get_current_metadata():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're reading all metadata files from disk for each commit, it's worth timing the updater to see how long this new validation takes, compared to the implementation we have on master.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ran SMC clone a couoek of times. The first was was slightly slower than on master, the second faster than on master, so I don't think that this impacts the performance. Other operations are much slower

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome

# version (consistent snapshot files) are downloaded to remote
# by the TUF updater, but saved to the main metadata file
# so, 2.root.json is downloaded and saved to root.json
if re.search(pattern, metadata_file_name):
continue

current_tuf_metadata_file = Path(
git_updater.metadata_dir, metadata_file_name
)
if not current_tuf_metadata_file.is_file():
# this validation causes an issue with one of the first
# commits of our production repositories and it should
# not be enabled until we specify a later commit of those
# repositories as the initial valid ones
# this error happens when a metadata file is added, but
# snapshot is not updated
# raise UpdateFailedError(
# f"Invalid metadata file {metadata_file_name}"
# )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we turn this into an issue?

continue
metadata_content = git_updater.get_current_metadata_data(
metadata_file_name
)
tuf_metadata_content = current_tuf_metadata_file.read_text()
if metadata_content != tuf_metadata_content:
raise UpdateFailedError(
f"Invalid metadata file {metadata_file_name}"
)

return current_commit
except Exception as e:
metadata_expired = EXPIRED_METADATA_ERROR in type(
Expand Down
Loading