Skip to content

Commit 3c0a0fb

Browse files
authored
Merge pull request #68 from linux-kdevops/cel/terraform-fixes
Merge the first part of neocloud Mcgrof's patches from https://lore.kernel.org/kdevops/[email protected]/
2 parents 76d55dd + b96ceff commit 3c0a0fb

17 files changed

+445
-1349
lines changed

CLAUDE.md

Lines changed: 196 additions & 1303 deletions
Large diffs are not rendered by default.

playbooks/terraform.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
---
22
- name: Manage infrastructure lifecycle and SSH access with Terraform
33
hosts: localhost
4-
gather_facts: false
54
roles:
65
- role: terraform

scripts/check-ssh-key-migration.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: copyleft-next-0.3.1
3+
#
4+
# Check if SSH keys need migration from old (unhashed) to new (hashed) paths.
5+
# This helps users upgrading from older kdevops versions that used a fixed
6+
# SSH key path to the new per-directory hashed paths.
7+
8+
set -e
9+
10+
TOPDIR_PATH="${1:-.}"
11+
HASH=$(echo "$TOPDIR_PATH" | sha256sum | cut -c1-8)
12+
13+
OLD_KEY="$HOME/.ssh/kdevops_terraform"
14+
OLD_PUBKEY="$HOME/.ssh/kdevops_terraform.pub"
15+
NEW_KEY="$HOME/.ssh/kdevops_terraform_${HASH}"
16+
NEW_PUBKEY="$HOME/.ssh/kdevops_terraform_${HASH}.pub"
17+
18+
# Only show notice if old key exists but new key doesn't
19+
if [ -f "$OLD_PUBKEY" ] && [ ! -f "$NEW_PUBKEY" ]; then
20+
cat <<EOF
21+
--------------------------------------------------------------------------------
22+
NOTE: SSH key path has changed
23+
24+
kdevops now uses directory-specific SSH key paths. An old-style key exists:
25+
Old: $OLD_PUBKEY
26+
New: $NEW_PUBKEY
27+
28+
If you have RUNNING VMs that need the old key, migrate it:
29+
mv "$OLD_KEY" "$NEW_KEY"
30+
mv "$OLD_PUBKEY" "$NEW_PUBKEY"
31+
32+
Otherwise, a new key will be generated automatically.
33+
--------------------------------------------------------------------------------
34+
EOF
35+
fi

scripts/terraform.Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,14 @@ ifeq (y,$(CONFIG_TERRAFORM_SSH_CONFIG_GENKEY_OVERWRITE))
9292
DEFAULT_DEPS += remove-ssh-key
9393
endif
9494

95+
DEFAULT_DEPS += ssh-key-migration-check
9596
DEFAULT_DEPS += $(KDEVOPS_SSH_PRIVKEY)
9697
endif # CONFIG_TERRAFORM_SSH_CONFIG_GENKEY
9798

99+
PHONY += ssh-key-migration-check
100+
ssh-key-migration-check:
101+
$(Q)$(TOPDIR)/scripts/check-ssh-key-migration.sh $(TOPDIR_PATH)
102+
98103
ANSIBLE_EXTRA_ARGS += $(TERRAFORM_EXTRA_VARS)
99104

100105
# Lambda Labs SSH key management

terraform/Kconfig.ssh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,17 @@ config TERRAFORM_SSH_CONFIG_USER
2121

2222
config TERRAFORM_SSH_CONFIG_PUBKEY_FILE
2323
string "File containing Ansible's ssh public key"
24-
default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8).pub" if TERRAFORM_LAMBDALABS
25-
default "~/.ssh/kdevops_terraform.pub"
24+
default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8).pub"
2625
help
2726
The filename of the file containing an ssh public key
2827
Ansible is to use to manage its target nodes. The
2928
matching private key should be located in a file using
3029
the same basename (without the ".pub").
3130

31+
The filename includes an 8-character hash of the current
32+
directory path, allowing multiple kdevops installations to
33+
use separate SSH keys without conflicts.
34+
3235
config TERRAFORM_SSH_CONFIG_GENKEY
3336
bool "Should we create a new random key for you?"
3437
default y

terraform/aws/scripts/aws_common.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
from botocore.exceptions import ClientError, NoCredentialsError
1919

2020

21+
class AwsNotConfiguredError(Exception):
22+
"""Raised when AWS credentials are not available."""
23+
24+
pass
25+
26+
2127
def get_default_region():
2228
"""
2329
Get the default AWS region from the ~/.aws/config file.
@@ -128,6 +134,30 @@ def handle_aws_credentials_error(quiet=False):
128134
return False
129135

130136

137+
def require_aws_credentials():
138+
"""
139+
Require AWS credentials, raising an exception if not configured.
140+
141+
This function should be called early in main() to validate AWS
142+
credentials. If AWS is not configured, it raises AwsNotConfiguredError
143+
to let the caller decide how to handle it.
144+
145+
This centralizes the handling of missing AWS credentials and avoids
146+
TOCTOU race conditions from manual file existence checks.
147+
148+
Returns:
149+
dict: Caller identity information if credentials are valid
150+
151+
Raises:
152+
AwsNotConfiguredError: If AWS credentials are not found
153+
"""
154+
try:
155+
sts = boto3.client("sts")
156+
return sts.get_caller_identity()
157+
except NoCredentialsError as e:
158+
raise AwsNotConfiguredError("AWS credentials not found") from e
159+
160+
131161
def get_all_regions(quiet=False):
132162
"""
133163
Retrieve the list of all AWS regions.

terraform/aws/scripts/gen_kconfig_ami

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ from collections import defaultdict
1414
from datetime import datetime, timedelta
1515
from concurrent.futures import ThreadPoolExecutor, as_completed
1616

17-
import boto3
18-
from botocore.exceptions import ClientError, NoCredentialsError
17+
from botocore.exceptions import ClientError
1918

2019
from aws_common import (
20+
AwsNotConfiguredError,
2121
get_default_region,
2222
get_jinja2_environment,
2323
create_ec2_client,
2424
handle_aws_client_error,
25-
handle_aws_credentials_error,
25+
require_aws_credentials,
2626
)
2727

2828

@@ -257,9 +257,6 @@ def discover_ami_patterns(
257257

258258
return discovered_patterns
259259

260-
except NoCredentialsError:
261-
handle_aws_credentials_error(quiet)
262-
return {}
263260
except ClientError as e:
264261
handle_aws_client_error(e, f"discovering AMI patterns for {owner_name}", quiet)
265262
return {}
@@ -822,6 +819,14 @@ def main():
822819
output_owners_raw(owners, args.quiet)
823820
return
824821

822+
# Allow make dynconfig to succeed without AWS credentials
823+
try:
824+
require_aws_credentials()
825+
except AwsNotConfiguredError:
826+
if not args.quiet:
827+
print("AWS not configured - skipping (optional)", file=sys.stderr)
828+
sys.exit(0)
829+
825830
if args.region:
826831
region = args.region
827832
else:

terraform/aws/scripts/gen_kconfig_instance

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ construct the "instance" Kconfig menu.
99
import sys
1010
import argparse
1111

12-
from botocore.exceptions import ClientError, NoCredentialsError
13-
1412
from aws_common import (
13+
AwsNotConfiguredError,
1514
get_default_region,
1615
get_all_instance_types,
1716
get_jinja2_environment,
18-
handle_aws_client_error,
19-
handle_aws_credentials_error,
17+
require_aws_credentials,
2018
)
2119

2220

@@ -316,6 +314,14 @@ def main():
316314
"""Main function to run the program."""
317315
args = parse_arguments()
318316

317+
# Allow make dynconfig to succeed without AWS credentials
318+
try:
319+
require_aws_credentials()
320+
except AwsNotConfiguredError:
321+
if not args.quiet:
322+
print("AWS not configured - skipping (optional)", file=sys.stderr)
323+
sys.exit(0)
324+
319325
if args.region:
320326
region = args.region
321327
else:

terraform/aws/scripts/gen_kconfig_location

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,15 @@ import sys
1010
import argparse
1111

1212
from concurrent.futures import ThreadPoolExecutor, as_completed
13-
from botocore.exceptions import ClientError, NoCredentialsError
1413

1514
from aws_common import (
15+
AwsNotConfiguredError,
1616
get_default_region,
1717
get_all_regions,
1818
get_region_availability_zones,
1919
get_jinja2_environment,
2020
get_region_kconfig_name,
21-
create_ec2_client,
22-
handle_aws_client_error,
23-
handle_aws_credentials_error,
21+
require_aws_credentials,
2422
)
2523

2624

@@ -214,6 +212,14 @@ def main():
214212
"""Main function to run the program."""
215213
args = parse_arguments()
216214

215+
# Allow make dynconfig to succeed without AWS credentials
216+
try:
217+
require_aws_credentials()
218+
except AwsNotConfiguredError:
219+
if not args.quiet:
220+
print("AWS not configured - skipping (optional)", file=sys.stderr)
221+
sys.exit(0)
222+
217223
if not args.quiet:
218224
print("Fetching list of all AWS regions...", file=sys.stderr)
219225
regions = get_all_regions()

terraform/azure/scripts/azure_common.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
from jinja2 import Environment, FileSystemLoader
1818

1919

20+
class AzureNotConfiguredError(Exception):
21+
"""Raised when Azure credentials are not available."""
22+
23+
pass
24+
25+
2026
def get_default_region():
2127
"""
2228
Get the default Azure region from Azure configuration.
@@ -371,3 +377,45 @@ def exit_on_empty_result(result, context, quiet=False):
371377
)
372378
print("Run 'az login' to authenticate with Azure.", file=sys.stderr)
373379
sys.exit(1)
380+
381+
382+
def require_azure_credentials():
383+
"""
384+
Require Azure credentials, raising an exception if not configured.
385+
386+
This function should be called early in main() to validate Azure
387+
credentials. If Azure is not configured, it raises AzureNotConfiguredError
388+
to let the caller decide how to handle it.
389+
390+
This centralizes the handling of missing Azure credentials and avoids
391+
TOCTOU race conditions from manual file existence checks.
392+
393+
Returns:
394+
str: Subscription ID if credentials are valid
395+
396+
Raises:
397+
AzureNotConfiguredError: If Azure credentials are not found
398+
"""
399+
try:
400+
from azure.common.credentials import get_cli_profile
401+
402+
profile = get_cli_profile()
403+
credentials, subscription_id, _ = profile.get_login_credentials(
404+
resource="https://management.azure.com"
405+
)
406+
return subscription_id
407+
except ImportError as e:
408+
raise AzureNotConfiguredError("Azure SDK not installed") from e
409+
except Exception as e:
410+
# Only treat as "not configured" if it looks like an auth/login issue
411+
error_msg = str(e).lower()
412+
auth_indicators = [
413+
"login",
414+
"logged in",
415+
"authenticate",
416+
"credential",
417+
"az login",
418+
]
419+
if any(phrase in error_msg for phrase in auth_indicators):
420+
raise AzureNotConfiguredError("Azure credentials not found") from e
421+
raise

0 commit comments

Comments
 (0)