diff --git a/documentation/docs/getting-started/configuration.mdx b/documentation/docs/getting-started/configuration.mdx index 9a775d106..f8f39cef6 100644 --- a/documentation/docs/getting-started/configuration.mdx +++ b/documentation/docs/getting-started/configuration.mdx @@ -113,7 +113,8 @@ Please use this table as a reference. | OPAL_POLICY_REPO_URL | The repo url the policy repo is located at. Must be available from the machine running OPAL (opt for public internet addresses). Supported URI schemes: https:// and ssh{" "} (i.e: git@). | | | OPAL_POLICY_REPO_SSH_KEY | The content of the var is a private crypto key (i.e: SSH key). You will need to register the matching public key with your repo. For example, see the{" "} GitHub tutorial {" "} on the subject. The passed value must be the contents of the SSH key in one line (replace new-line with underscore, i.e: \n with{" "} \_). | | | OPAL_POLICY_REPO_CLONE_PATH | Where (i.e: base target path) to clone the repo in your docker filesystem (not important unless you mount a docker volume). | | -| OPAL_POLICY_REPO_MAIN_BRANCH | Name of the git branch to track for policy files (default: `master`). | | +| OPAL_POLICY_REPO_MAIN_BRANCH | Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set). | | +| OPAL_POLICY_REPO_TAG | Name of the git tag to track for policy files (default: None). | | | OPAL_BUNDLE_IGNORE | Paths to omit from policy bundle. List of glob style paths, or paths without wildcards but ending with "/\*\*" indicating a parent path (ignoring all under it). | `bundle_ignore: Optional[List[str]]` | ## OPAL Client Configuration Variables diff --git a/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx b/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx index 68b7d5e6c..3d7e341f2 100644 --- a/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx +++ b/documentation/docs/getting-started/running-opal/as-python-package/opal-server-setup.mdx @@ -90,9 +90,11 @@ a [Github SSH key here](https://docs.github.com/en/github/authenticating-to-gith The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or the contents of the SSH-key - with newlines replaced with `\_`. -#### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH` +#### `OPAL_POLICY_REPO_CLONE_PATH`, `OPAL_POLICY_REPO_MAIN_BRANCH` & `OPAL_POLICY_REPO_TAG` -These will allow you to control how the repo is cloned. +These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo. + +You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`. ### Simple run with Data source configuration diff --git a/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx b/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx index 9ddd784bb..f5f9c7457 100644 --- a/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx +++ b/documentation/docs/getting-started/running-opal/as-python-package/overview.mdx @@ -185,7 +185,9 @@ The value you pass for the `POLICY_REPO_SSH_KEY` can either be a file path, or t ##### `OPAL_POLICY_REPO_CLONE_PATH` & `OPAL_POLICY_REPO_MAIN_BRANCH` -These will allow you to control how the repo is cloned. +These will allow you to control how the repo is cloned. By default OPAL will track the `master` branch of the repo, you may optionally track another branch or a tag in the repo. + +You must choose between tracking a branch or a tag, OPAL will fail if you try to supply both `OPAL_POLICY_REPO_MAIN_BRANCH` and `OPAL_POLICY_REPO_TAG`. #### Simple run with Data source configuration diff --git a/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx b/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx index 242226d77..807fd7b2c 100644 --- a/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx +++ b/documentation/docs/getting-started/running-opal/run-opal-server/policy-repo-location.mdx @@ -87,7 +87,13 @@ For these config vars, in most cases you are good with the default values: OPAL_POLICY_REPO_MAIN_BRANCH - Name of the git branch to track for policy files (default: `master`) + Name of the git branch to track for policy files (default: `master`, unless `OPAL_POLICY_REPO_TAG` is set) + + + + OPAL_POLICY_REPO_TAG + + Name of the git tag to track for policy files (default: `None`). diff --git a/packages/opal-common/opal_common/git/branch_tracker.py b/packages/opal-common/opal_common/git/branch_tracker.py index 19bba8770..3c8b374dc 100644 --- a/packages/opal-common/opal_common/git/branch_tracker.py +++ b/packages/opal-common/opal_common/git/branch_tracker.py @@ -1,7 +1,7 @@ from functools import partial from typing import Optional, Tuple -from git import GitCommandError, Head, Remote, Repo +from git import GitCommandError, Head, Reference, Remote, Repo from git.objects.commit import Commit from opal_common.git.env import provide_git_ssh_environment from opal_common.git.exceptions import GitFailed @@ -135,6 +135,10 @@ def tracked_branch(self) -> Head: ) raise GitFailed(e) + @property + def tracked_reference(self) -> Reference: + return self.tracked_branch + @property def tracked_remote(self) -> Remote: """returns the tracked remote object (of type git.Remote) or throws if diff --git a/packages/opal-common/opal_common/git/tag_tracker.py b/packages/opal-common/opal_common/git/tag_tracker.py new file mode 100644 index 000000000..f68729eb5 --- /dev/null +++ b/packages/opal-common/opal_common/git/tag_tracker.py @@ -0,0 +1,113 @@ +from functools import partial +from typing import Optional, Tuple + +from git import GitCommandError, Reference, Repo, Tag +from git.objects.commit import Commit +from opal_common.git.branch_tracker import BranchTracker +from opal_common.git.env import provide_git_ssh_environment +from opal_common.git.exceptions import GitFailed +from opal_common.logger import logger +from tenacity import retry, stop_after_attempt, wait_fixed + + +class TagTracker(BranchTracker): + """Tracks the state of a git tag (hash the tag is pointing at). + + Can detect if the tag has been moved to point at a different commit. + """ + + def __init__( + self, + repo: Repo, + tag_name: str, + remote_name: str = "origin", + retry_config=None, + ssh_key: Optional[str] = None, + ): + """Initializes the TagTracker. + + Args: + repo (Repo): a git repo in which we want to track the specific commit a tag is pointing to + tag_name (str): the tag we want to track + remote_name (str): the remote in which the tag is located + retry_config (dict): Tenacity.retry config + ssh_key (Optional[str]): SSH key for private repositories + """ + self._tag_name = tag_name + super().__init__( + repo, + branch_name=None, + remote_name=remote_name, + retry_config=retry_config, + ssh_key=ssh_key, + ) + + def checkout(self): + """Checkouts the repository at the current tag.""" + checkout_func = partial(self._repo.git.checkout, self._tag_name) + attempt_checkout = retry(**self._retry_config)(checkout_func) + try: + return attempt_checkout() + except GitCommandError as e: + tags = [tag.name for tag in self._repo.tags] + logger.error( + "did not find tag: {tag_name}, instead found: {tags_found}, got error: {error}", + tag_name=self._tag_name, + tags_found=tags, + error=str(e), + ) + raise GitFailed(e) + + def _fetch(self): + """Fetch updates including tags with force option.""" + + def _inner_fetch(*args, **kwargs): + env = provide_git_ssh_environment(self.tracked_remote.url, self._ssh_key) + with self.tracked_remote.repo.git.custom_environment(**env): + self.tracked_remote.repo.git.fetch("--tags", "--force", *args, **kwargs) + + attempt_fetch = retry(**self._retry_config)(_inner_fetch) + return attempt_fetch() + + @property + def latest_commit(self) -> Commit: + """the commit of the tracked tag.""" + return self.tracked_tag.commit + + @property + def tracked_tag(self) -> Tag: + """returns the tracked tag reference (of type git.Reference) or throws + if such tag does not exist on the repo.""" + try: + return getattr(self._repo.tags, self._tag_name) + except AttributeError as e: + tags = [{"path": tag.path} for tag in self._repo.tags] + logger.exception( + "did not find main branch: {error}, instead found: {tags_found}", + error=e, + tags_found=tags, + ) + raise GitFailed(e) + + @property + def tracked_reference(self) -> Reference: + return self.tracked_tag + + def pull(self) -> Tuple[bool, Commit, Commit]: + """Overrides the pull method to handle tag updates. + + Returns: + pull_result (bool, Commit, Commit): a tuple consisting of: + has_changes (bool): whether the tag has been moved to a different commit + prev (Commit): the previous commit the tag was pointing to + latest (Commit): the new commit the tag is currently pointing to + """ + self._fetch() + self.checkout() + + if self.prev_commit.hexsha == self.latest_commit.hexsha: + return False, self.prev_commit, self.prev_commit + else: + prev = self._prev_commit + self._save_latest_commit_as_prev_commit() + return True, prev, self.latest_commit diff --git a/packages/opal-common/opal_common/git/tests/conftest.py b/packages/opal-common/opal_common/git/tests/conftest.py index c60099a5c..cbe156cb8 100644 --- a/packages/opal-common/opal_common/git/tests/conftest.py +++ b/packages/opal-common/opal_common/git/tests/conftest.py @@ -73,6 +73,15 @@ def create_rename_file_commit( repo.index.move([filename, new_filename]) repo.index.commit(commit_msg, author=author) + @staticmethod + def create_new_tag(repo: Repo, tag_name: str): + repo.create_tag(tag_name) + + @staticmethod + def update_tag_to_head(repo: Repo, tag_name: str): + repo.delete_tag(tag_name) + repo.create_tag(tag_name) + @pytest.fixture def helpers() -> Helpers: @@ -140,6 +149,9 @@ def local_repo(tmp_path, helpers: Helpers) -> Repo: # create a "delete" commit helpers.create_delete_file_commit(repo, root / "deleted.rego") + + # create a test tag + helpers.create_new_tag(repo, "test_tag") return repo diff --git a/packages/opal-common/opal_common/git/tests/repo_watcher_test.py b/packages/opal-common/opal_common/git/tests/repo_watcher_test.py index d94eff2ee..215de4b04 100644 --- a/packages/opal-common/opal_common/git/tests/repo_watcher_test.py +++ b/packages/opal-common/opal_common/git/tests/repo_watcher_test.py @@ -46,6 +46,7 @@ async def failure_callback(e: Exception): # configure the watcher to watch an invalid repo watcher = GitPolicySource( remote_source_url=INVALID_REPO_REMOTE_URL, + branch_name="master", local_clone_path=target_path, request_timeout=3, ) @@ -86,7 +87,9 @@ async def new_commits_callback( # configure the watcher with a valid local repo (our test repo) # the returned repo will track the local remote repo watcher = GitPolicySource( - remote_source_url=remote_repo.working_tree_dir, local_clone_path=target_path + remote_source_url=remote_repo.working_tree_dir, + local_clone_path=target_path, + branch_name=remote_repo.active_branch.name, ) # configure the error callback watcher.add_on_new_policy_callback(partial(new_commits_callback, detected_commits)) @@ -157,6 +160,7 @@ async def new_commits_callback( watcher = GitPolicySource( remote_source_url=remote_repo.working_tree_dir, local_clone_path=target_path, + branch_name=remote_repo.active_branch.name, polling_interval=3, # every 3 seconds do a pull to try and detect changes ) # configure the error callback diff --git a/packages/opal-common/opal_common/git/tests/tag_tracker_test.py b/packages/opal-common/opal_common/git/tests/tag_tracker_test.py new file mode 100644 index 000000000..87347197c --- /dev/null +++ b/packages/opal-common/opal_common/git/tests/tag_tracker_test.py @@ -0,0 +1,81 @@ +import os +import sys + +import pytest + +# Add root opal dir to use local src as package for tests (i.e, no need for python -m pytest) +root_dir = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + os.path.pardir, + os.path.pardir, + os.path.pardir, + ) +) +sys.path.append(root_dir) + +from pathlib import Path + +from git import Repo +from git.objects.commit import Commit +from opal_common.git.exceptions import GitFailed +from opal_common.git.tag_tracker import TagTracker + + +def test_pull_with_no_changes(local_repo_clone: Repo): + """Test pulling when there are no changes on the remote repo.""" + repo: Repo = local_repo_clone # local repo, cloned from another local repo + tracker = TagTracker(repo=repo, tag_name="test_tag") + latest_commit: Commit = repo.head.commit + assert latest_commit == tracker.latest_commit == tracker.prev_commit + has_changes, prev, latest = tracker.pull() # pulls from origin + assert has_changes == False + assert latest_commit == prev == latest + + +def test_pull_with_new_commits( + local_repo: Repo, + local_repo_clone: Repo, + helpers, +): + """Test pulling when there are changes (new commits) on the remote repo.""" + remote_repo: Repo = ( + local_repo # local repo, the 'origin' remote of 'local_repo_clone' + ) + repo: Repo = local_repo_clone # local repo, cloned from 'local_repo' + + tracker = TagTracker(repo=repo, tag_name="test_tag") + most_recent_commit_before_pull: Commit = repo.head.commit + + assert ( + most_recent_commit_before_pull == tracker.latest_commit == tracker.prev_commit + ) + + # create new file commit on the remote repo + helpers.create_new_file_commit( + remote_repo, Path(remote_repo.working_tree_dir) / "2.txt" + ) + + helpers.update_tag_to_head(remote_repo, "test_tag") + + # now the remote repo tag is pointing at a different commit + assert remote_repo.tags.__getattr__("test_tag").commit != repo.head.commit + # and our tag tracker does not know it yet + assert remote_repo.tags.__getattr__("test_tag").commit != tracker.latest_commit + + has_changes, prev, latest = tracker.pull() # pulls from origin + assert has_changes == True + assert prev != latest + assert most_recent_commit_before_pull == prev + assert ( + remote_repo.tags.__getattr__("test_tag").commit + == repo.tags.__getattr__("test_tag").commit + == latest + == tracker.latest_commit + ) + + +def test_tracked_branch_does_not_exist(local_repo: Repo): + """Test that tag tracker throws when tag does not exist.""" + with pytest.raises(GitFailed): + tracker = TagTracker(local_repo, tag_name="no_such_tag") diff --git a/packages/opal-common/opal_common/sources/git_policy_source.py b/packages/opal-common/opal_common/sources/git_policy_source.py index bffe8517d..88e0f1320 100644 --- a/packages/opal-common/opal_common/sources/git_policy_source.py +++ b/packages/opal-common/opal_common/sources/git_policy_source.py @@ -4,6 +4,7 @@ from opal_common.git.branch_tracker import BranchTracker from opal_common.git.exceptions import GitFailed from opal_common.git.repo_cloner import RepoCloner +from opal_common.git.tag_tracker import TagTracker from opal_common.logger import logger from opal_common.sources.base_policy_source import BasePolicySource @@ -30,7 +31,8 @@ def __init__( self, remote_source_url: str, local_clone_path: str, - branch_name: str = "master", + branch_name: Optional[str] = None, + tag_name: Optional[str] = None, ssh_key: Optional[str] = None, polling_interval: int = 0, request_timeout: int = 0, @@ -49,7 +51,16 @@ def __init__( ssh_key=self._ssh_key, clone_timeout=request_timeout, ) + + if branch_name is None and tag_name is None: + logger.exception("Must provide either branch_name or tag_name") + raise ValueError("Must provide either branch_name or tag_name") + if branch_name is not None and tag_name is not None: + logger.exception("Must provide either branch_name or tag_name, not both") + raise ValueError("Must provide either branch_name or tag_name, not both") + self._branch_name = branch_name + self._tag_name = tag_name self._tracker = None async def get_initial_policy_state_from_remote(self): @@ -82,9 +93,14 @@ async def get_initial_policy_state_from_remote(self): await self._on_git_failed(e) return - self._tracker = BranchTracker( - repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key - ) + if self._tag_name is not None: + self._tracker = TagTracker( + repo=repo, tag_name=self._tag_name, ssh_key=self._ssh_key + ) + else: + self._tracker = BranchTracker( + repo=repo, branch_name=self._branch_name, ssh_key=self._ssh_key + ) async def check_for_changes(self): """Calling this method will trigger a git pull from the tracked remote. @@ -98,7 +114,11 @@ async def check_for_changes(self): ) has_changes, prev, latest = self._tracker.pull() if not has_changes: - logger.info("No new commits: HEAD is at '{head}'", head=latest.hexsha) + logger.info( + "No new commits: {ref} is at '{head}'", + ref=self._tracker.tracked_reference.name, + head=latest.hexsha, + ) else: logger.info( "Found new commits: old HEAD was '{prev_head}', new HEAD is '{new_head}'", diff --git a/packages/opal-server/opal_server/config.py b/packages/opal-server/opal_server/config.py index 0f2a05a00..d209c6e1c 100644 --- a/packages/opal-server/opal_server/config.py +++ b/packages/opal-server/opal_server/config.py @@ -99,7 +99,8 @@ class OpalServerConfig(Confi): False, "Set if OPAL server should use a fixed clone path (and reuse if it already exists) instead of randomizing its suffix on each run", ) - POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", "master") + POLICY_REPO_MAIN_BRANCH = confi.str("POLICY_REPO_MAIN_BRANCH", None) + POLICY_REPO_TAG = confi.str("POLICY_REPO_TAG", None) POLICY_REPO_SSH_KEY = confi.str("POLICY_REPO_SSH_KEY", None) POLICY_REPO_MANIFEST_PATH = confi.str( "POLICY_REPO_MANIFEST_PATH", diff --git a/packages/opal-server/opal_server/policy/watcher/factory.py b/packages/opal-server/opal_server/policy/watcher/factory.py index dabf8cf73..0f738d1c5 100644 --- a/packages/opal-server/opal_server/policy/watcher/factory.py +++ b/packages/opal-server/opal_server/policy/watcher/factory.py @@ -21,6 +21,7 @@ def setup_watcher_task( remote_source_url: str = None, clone_path_finder: RepoClonePathFinder = None, branch_name: str = None, + tag_name: str = None, ssh_key: Optional[str] = None, polling_interval: int = None, request_timeout: int = None, @@ -39,6 +40,7 @@ def setup_watcher_task( remote_source_url(str): the base address to request the policy from clone_path_finder(RepoClonePathFinder): from which the local dir path for the repo clone would be retrieved branch_name(str): name of remote branch in git to pull + tag_name(str): name of remote tag in git to track ssh_key (str, optional): private ssh key used to gain access to the cloned repo polling_interval(int): how many seconds need to wait between polling request_timeout(int): how many seconds need to wait until timeout @@ -71,6 +73,11 @@ def setup_watcher_task( branch_name = load_conf_if_none( branch_name, opal_server_config.POLICY_REPO_MAIN_BRANCH ) + tag_name = load_conf_if_none(tag_name, opal_server_config.POLICY_REPO_TAG) + if branch_name is None and tag_name is None: + logger.info("No branch or tag specified, falling back to using branch 'master'") + branch_name = "master" + ssh_key = load_conf_if_none(ssh_key, opal_server_config.POLICY_REPO_SSH_KEY) polling_interval = load_conf_if_none( polling_interval, opal_server_config.POLICY_REPO_POLLING_INTERVAL @@ -97,6 +104,7 @@ def setup_watcher_task( remote_source_url=remote_source_url, local_clone_path=clone_path, branch_name=branch_name, + tag_name=tag_name, ssh_key=ssh_key, polling_interval=polling_interval, request_timeout=request_timeout,