Skip to content

Commit 00d20ca

Browse files
author
Kevin Westphal
committed
feat(git-hooks): set repo remote fallback with env variable
In some configurations ggshield runs in repositories without a configured remote, for example when running in a git pre-receive hook. This commit adds a REPOSITORY_REMOTE_FALLBACK environment variable for setting a fallback value for the remote URL. Issue #1158
1 parent 7218450 commit 00d20ca

File tree

7 files changed

+396
-3
lines changed

7 files changed

+396
-3
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@ GITGUARDIAN_INSTANCE=https://dashboard.gitguardian.com/
1616
# - and set TEST_GG_VALID_TOKEN_IGNORE_SHA to matching commit sha
1717
# TEST_GG_VALID_TOKEN=
1818
# TEST_GG_VALID_TOKEN_IGNORE_SHA=
19+
20+
# Fallback value for the repository remote URL in case it cannot be determined using `git remote -v`
21+
# This variable is particularly relevant when running ggshield in a git pre-receive hook
22+
#REPOSITORY_REMOTE_FALLBACK=
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Removed
10+
11+
- A bullet item for the Removed category.
12+
13+
-->
14+
15+
### Added
16+
17+
- Add `REPOSITORY_REMOTE_FALLBACK` environment variable that allows setting a fallback value for the repository remote.
18+
19+
<!--
20+
### Changed
21+
22+
- A bullet item for the Changed category.
23+
24+
-->
25+
<!--
26+
### Deprecated
27+
28+
- A bullet item for the Deprecated category.
29+
30+
-->
31+
<!--
32+
### Fixed
33+
34+
- A bullet item for the Fixed category.
35+
36+
-->
37+
<!--
38+
### Security
39+
40+
- A bullet item for the Security category.
41+
42+
-->

ggshield/core/env_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"GITGUARDIAN_INSTANCE",
1515
"GITGUARDIAN_API_URL",
1616
"GITGUARDIAN_API_KEY",
17+
"REPOSITORY_REMOTE_FALLBACK",
1718
}
1819

1920
logger = logging.getLogger(__name__)

ggshield/utils/git_shell.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,21 +379,39 @@ def get_repository_url_from_path(wd: Path) -> Optional[str]:
379379
Returns one of the repository remote urls. Returns None if no remote are found,
380380
or the directory is not a repository or we don't have git so we can't know if the
381381
directory is a repository.
382+
383+
If REPOSITORY_REMOTE_FALLBACK environment variable is set, it will be used as a
384+
fallback value when no remote URL can be detected from the git repository.
382385
"""
383386
try:
384387
if not is_git_available() or not is_git_dir(wd):
385-
return None
388+
return _get_repository_url_fallback()
386389
remotes_raw = git(["remote", "-v"], cwd=wd).splitlines()
387390
except (subprocess.CalledProcessError, OSError):
388-
return None
391+
return _get_repository_url_fallback()
389392

390393
url: Optional[str] = None
391394
for line in remotes_raw:
392395
if match := re.search(r"^(.*)\t(.*) \(fetch\)$", line):
393396
name, url = match.groups()
394397
if name == "origin":
395398
break
396-
return simplify_git_url(url) if url else None
399+
400+
if url:
401+
return simplify_git_url(url)
402+
403+
return _get_repository_url_fallback()
404+
405+
406+
def _get_repository_url_fallback() -> Optional[str]:
407+
"""
408+
Returns the repository URL from the REPOSITORY_REMOTE_FALLBACK environment variable.
409+
Returns None if the environment variable is not set or empty.
410+
"""
411+
url = os.getenv("REPOSITORY_REMOTE_FALLBACK")
412+
if url:
413+
return simplify_git_url(url)
414+
return None
397415

398416

399417
def get_filepaths_from_ref(

tests/unit/cmd/scan/test_path.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,47 @@ def test_scan_context_repository(
557557
and arg.get("GGShield-Repository-URL") == "github.com/owner/repository"
558558
for arg in scan_mock.call_args[0]
559559
)
560+
561+
@patch("pygitguardian.GGClient.multi_content_scan")
562+
@my_vcr.use_cassette("test_scan_context_repository.yaml")
563+
def test_scan_path_with_fallback_repository_url(
564+
self,
565+
scan_mock: Mock,
566+
tmp_path: Path,
567+
cli_fs_runner: CliRunner,
568+
) -> None:
569+
"""
570+
GIVEN a repository without a remote url
571+
WHEN executing a scan with REPOSITORY_REMOTE_FALLBACK set
572+
THEN the environment variable value is sent in the headers
573+
"""
574+
local_repo = Repository.create(tmp_path)
575+
576+
file = local_repo.path / "file_secret"
577+
write_text(file, "Hello")
578+
local_repo.add(file)
579+
local_repo.create_commit()
580+
581+
scan_result = MultiScanResult([])
582+
scan_result.status_code = 200
583+
scan_mock.return_value = scan_result
584+
585+
fallback_url = "https://github.com/fallback/repository.git"
586+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
587+
result = cli_fs_runner.invoke(
588+
cli,
589+
[
590+
"secret",
591+
"scan",
592+
"path",
593+
str(file),
594+
],
595+
)
596+
assert result.exit_code == ExitCode.SUCCESS, result.output
597+
598+
scan_mock.assert_called_once()
599+
assert any(
600+
isinstance(arg, dict)
601+
and arg.get("GGShield-Repository-URL") == "github.com/fallback/repository"
602+
for arg in scan_mock.call_args[0]
603+
)

tests/unit/core/scan/test_scan_context.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,78 @@ def test_ci_no_env(env, fake_url_repo: Repository) -> None:
123123
target_path=fake_url_repo.path,
124124
)
125125
_assert_repo_url_in_headers(context, EXPECTED_HEADER_REMOTE)
126+
127+
128+
@pytest.mark.parametrize(
129+
("setup_type", "env_var_value", "expected_url"),
130+
[
131+
# Repository with remote - remote takes precedence
132+
(
133+
"repo_with_remote",
134+
"https://github.com/fallback/repository.git",
135+
EXPECTED_HEADER_REMOTE,
136+
),
137+
("repo_with_remote", None, EXPECTED_HEADER_REMOTE),
138+
# Repository without remote - fallback to env var
139+
(
140+
"repo_without_remote",
141+
"https://github.com/fallback/repository.git",
142+
"github.com/fallback/repository",
143+
),
144+
("repo_without_remote", None, None),
145+
# Non-git directory - fallback to env var
146+
(
147+
"non_git_dir",
148+
"https://github.com/fallback/repository.git",
149+
"github.com/fallback/repository",
150+
),
151+
("non_git_dir", None, None),
152+
],
153+
ids=[
154+
"repo_with_remote_with_env_var",
155+
"repo_with_remote_no_env_var",
156+
"repo_without_remote_with_env_var",
157+
"repo_without_remote_no_env_var",
158+
"non_git_dir_with_env_var",
159+
"non_git_dir_no_env_var",
160+
],
161+
)
162+
def test_repository_url_fallback_in_scan_context(
163+
tmp_path: Path,
164+
fake_url_repo: Repository,
165+
setup_type: str,
166+
env_var_value: Union[str, None],
167+
expected_url: Union[str, None],
168+
) -> None:
169+
"""
170+
Test that REPOSITORY_REMOTE_FALLBACK environment variable is properly used in scan context.
171+
Covers all combinations of repo types (with remote, without remote, non-git) and env var presence.
172+
"""
173+
# Setup the target path based on the test case
174+
if setup_type == "repo_with_remote":
175+
target_path = fake_url_repo.path
176+
elif setup_type == "repo_without_remote":
177+
repo = Repository.create(tmp_path / "repo")
178+
repo.create_commit()
179+
target_path = repo.path
180+
else: # non_git_dir
181+
target_path = tmp_path
182+
183+
# Create context with or without env var
184+
env_dict = (
185+
{"REPOSITORY_REMOTE_FALLBACK": env_var_value}
186+
if env_var_value is not None
187+
else {}
188+
)
189+
with mock.patch.dict(os.environ, env_dict, clear=False):
190+
context = ScanContext(
191+
scan_mode=ScanMode.PATH,
192+
command_path="ggshield secret scan path",
193+
target_path=target_path,
194+
)
195+
196+
# Assert the expected URL in headers
197+
if expected_url:
198+
_assert_repo_url_in_headers(context, expected_url)
199+
else:
200+
_assert_no_repo_url_in_headers(context)

0 commit comments

Comments
 (0)