Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ GITGUARDIAN_INSTANCE=https://dashboard.gitguardian.com/
# - and set TEST_GG_VALID_TOKEN_IGNORE_SHA to matching commit sha
# TEST_GG_VALID_TOKEN=
# TEST_GG_VALID_TOKEN_IGNORE_SHA=

# Fallback value for the repository remote URL in case it cannot be determined using `git remote -v`
# This variable is particularly relevant when running ggshield in a git pre-receive hook
#REPOSITORY_REMOTE_FALLBACK=
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<!--
A new scriv changelog fragment.

Uncomment the section that is right (remove the HTML comment wrapper).
For top level release notes, leave all the headers commented out.
-->

<!--
### Removed

- A bullet item for the Removed category.

-->

### Added

- Add `REPOSITORY_REMOTE_FALLBACK` environment variable that allows setting a fallback value for the repository remote.

<!--
### Changed

- A bullet item for the Changed category.

-->
<!--
### Deprecated

- A bullet item for the Deprecated category.

-->
<!--
### Fixed

- A bullet item for the Fixed category.

-->
<!--
### Security

- A bullet item for the Security category.

-->
1 change: 1 addition & 0 deletions ggshield/core/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"GITGUARDIAN_INSTANCE",
"GITGUARDIAN_API_URL",
"GITGUARDIAN_API_KEY",
"REPOSITORY_REMOTE_FALLBACK",
}

logger = logging.getLogger(__name__)
Expand Down
24 changes: 21 additions & 3 deletions ggshield/utils/git_shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,21 +379,39 @@ def get_repository_url_from_path(wd: Path) -> Optional[str]:
Returns one of the repository remote urls. Returns None if no remote are found,
or the directory is not a repository or we don't have git so we can't know if the
directory is a repository.

If REPOSITORY_REMOTE_FALLBACK environment variable is set, it will be used as a
fallback value when no remote URL can be detected from the git repository.
"""
try:
if not is_git_available() or not is_git_dir(wd):
return None
return _get_repository_url_fallback()
remotes_raw = git(["remote", "-v"], cwd=wd).splitlines()
except (subprocess.CalledProcessError, OSError):
return None
return _get_repository_url_fallback()

url: Optional[str] = None
for line in remotes_raw:
if match := re.search(r"^(.*)\t(.*) \(fetch\)$", line):
name, url = match.groups()
if name == "origin":
break
return simplify_git_url(url) if url else None

if url:
return simplify_git_url(url)

return _get_repository_url_fallback()


def _get_repository_url_fallback() -> Optional[str]:
"""
Returns the repository URL from the REPOSITORY_REMOTE_FALLBACK environment variable.
Returns None if the environment variable is not set or empty.
"""
url = os.getenv("REPOSITORY_REMOTE_FALLBACK")
if url:
return simplify_git_url(url)
return None


def get_filepaths_from_ref(
Expand Down
44 changes: 44 additions & 0 deletions tests/unit/cmd/scan/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,47 @@ def test_scan_context_repository(
and arg.get("GGShield-Repository-URL") == "github.com/owner/repository"
for arg in scan_mock.call_args[0]
)

@patch("pygitguardian.GGClient.multi_content_scan")
@my_vcr.use_cassette("test_scan_context_repository.yaml")
def test_scan_path_with_fallback_repository_url(
self,
scan_mock: Mock,
tmp_path: Path,
cli_fs_runner: CliRunner,
) -> None:
"""
GIVEN a repository without a remote url
WHEN executing a scan with REPOSITORY_REMOTE_FALLBACK set
THEN the environment variable value is sent in the headers
"""
local_repo = Repository.create(tmp_path)

file = local_repo.path / "file_secret"
write_text(file, "Hello")
local_repo.add(file)
local_repo.create_commit()

scan_result = MultiScanResult([])
scan_result.status_code = 200
scan_mock.return_value = scan_result

fallback_url = "https://github.com/fallback/repository.git"
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
result = cli_fs_runner.invoke(
cli,
[
"secret",
"scan",
"path",
str(file),
],
)
assert result.exit_code == ExitCode.SUCCESS, result.output

scan_mock.assert_called_once()
assert any(
isinstance(arg, dict)
and arg.get("GGShield-Repository-URL") == "github.com/fallback/repository"
for arg in scan_mock.call_args[0]
)
75 changes: 75 additions & 0 deletions tests/unit/core/scan/test_scan_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,78 @@ def test_ci_no_env(env, fake_url_repo: Repository) -> None:
target_path=fake_url_repo.path,
)
_assert_repo_url_in_headers(context, EXPECTED_HEADER_REMOTE)


@pytest.mark.parametrize(
("setup_type", "env_var_value", "expected_url"),
[
# Repository with remote - remote takes precedence
(
"repo_with_remote",
"https://github.com/fallback/repository.git",
EXPECTED_HEADER_REMOTE,
),
("repo_with_remote", None, EXPECTED_HEADER_REMOTE),
# Repository without remote - fallback to env var
(
"repo_without_remote",
"https://github.com/fallback/repository.git",
"github.com/fallback/repository",
),
("repo_without_remote", None, None),
# Non-git directory - fallback to env var
(
"non_git_dir",
"https://github.com/fallback/repository.git",
"github.com/fallback/repository",
),
("non_git_dir", None, None),
],
ids=[
"repo_with_remote_with_env_var",
"repo_with_remote_no_env_var",
"repo_without_remote_with_env_var",
"repo_without_remote_no_env_var",
"non_git_dir_with_env_var",
"non_git_dir_no_env_var",
],
)
def test_repository_url_fallback_in_scan_context(
tmp_path: Path,
fake_url_repo: Repository,
setup_type: str,
env_var_value: Union[str, None],
expected_url: Union[str, None],
) -> None:
"""
Test that REPOSITORY_REMOTE_FALLBACK environment variable is properly used in scan context.
Covers all combinations of repo types (with remote, without remote, non-git) and env var presence.
"""
# Setup the target path based on the test case
if setup_type == "repo_with_remote":
target_path = fake_url_repo.path
elif setup_type == "repo_without_remote":
repo = Repository.create(tmp_path / "repo")
repo.create_commit()
target_path = repo.path
else: # non_git_dir
target_path = tmp_path

# Create context with or without env var
env_dict = (
{"REPOSITORY_REMOTE_FALLBACK": env_var_value}
if env_var_value is not None
else {}
)
with mock.patch.dict(os.environ, env_dict, clear=False):
context = ScanContext(
scan_mode=ScanMode.PATH,
command_path="ggshield secret scan path",
target_path=target_path,
)

# Assert the expected URL in headers
if expected_url:
_assert_repo_url_in_headers(context, expected_url)
else:
_assert_no_repo_url_in_headers(context)
Loading