diff --git a/.gitignore b/.gitignore index ab3b7078..4c25f55f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ coverage/ .nyc_output/ *.swp *.swo +*.so *~ .idea/ diff --git a/containers/agent/Dockerfile b/containers/agent/Dockerfile index 65ba8cc5..717c56cf 100644 --- a/containers/agent/Dockerfile +++ b/containers/agent/Dockerfile @@ -7,21 +7,6 @@ # NOTE: ARG declared before first FROM is global and available in all FROM statements ARG BASE_IMAGE=ubuntu:22.04 -# Multi-stage build: Use official Rust image to build one-shot-token library -# SECURITY: Using official rust:1.77-slim image prevents executing unverified -# scripts from the internet during build time (supply chain attack mitigation) -# NOTE: Rust 1.77+ required for C string literal syntax (c"...") used in src/lib.rs -FROM rust:1.77-slim AS rust-builder - -# Copy one-shot-token source files -COPY one-shot-token/Cargo.toml /tmp/one-shot-token/Cargo.toml -COPY one-shot-token/src/ /tmp/one-shot-token/src/ - -# Build the one-shot-token library -WORKDIR /tmp/one-shot-token -RUN cargo build --release - -# Main stage FROM ${BASE_IMAGE} # Install required packages and Node.js 22 @@ -85,9 +70,21 @@ RUN chmod +x /usr/local/bin/setup-iptables.sh /usr/local/bin/entrypoint.sh /usr/ # Copy pre-built one-shot-token library from rust-builder stage # This prevents tokens from being read multiple times (e.g., by malicious code) -# SECURITY: Using multi-stage build with official Rust image avoids executing -# unverified scripts from the internet during build time -COPY --from=rust-builder /tmp/one-shot-token/target/release/libone_shot_token.so /usr/local/lib/one-shot-token.so +# Build flags: -fvisibility=hidden hides internal symbols, -s strips at link time +COPY one-shot-token/one-shot-token.c /tmp/one-shot-token.c +RUN set -eux; \ + BUILD_PKGS="gcc libc6-dev binutils"; \ + apt-get update && \ + ( apt-get install -y --no-install-recommends $BUILD_PKGS || \ + (rm -rf /var/lib/apt/lists/* && apt-get update && \ + apt-get install -y --no-install-recommends $BUILD_PKGS) ) && \ + gcc -shared -fPIC -fvisibility=hidden -O2 -Wall -s \ + -o /usr/local/lib/one-shot-token.so /tmp/one-shot-token.c -ldl -lpthread && \ + strip --strip-unneeded /usr/local/lib/one-shot-token.so && \ + rm /tmp/one-shot-token.c && \ + apt-get remove -y $BUILD_PKGS && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* # Install Docker stub script that shows helpful error message # Docker-in-Docker support was removed in v0.9.1 diff --git a/containers/agent/one-shot-token/Cargo.toml b/containers/agent/one-shot-token/Cargo.toml deleted file mode 100644 index 9d8093bc..00000000 --- a/containers/agent/one-shot-token/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "one-shot-token" -version = "0.1.0" -edition = "2021" -description = "LD_PRELOAD library for one-shot access to sensitive environment variables" -license = "MIT" - -[lib] -name = "one_shot_token" -crate-type = ["cdylib"] - -[dependencies] -libc = "0.2" -once_cell = "1.19" - -[profile.release] -opt-level = 2 -lto = true -strip = true diff --git a/containers/agent/one-shot-token/README.md b/containers/agent/one-shot-token/README.md index eb9cb642..2dff7c58 100644 --- a/containers/agent/one-shot-token/README.md +++ b/containers/agent/one-shot-token/README.md @@ -158,11 +158,14 @@ In chroot mode, the library must be accessible from within the chroot (host file ### In Docker (automatic) -The Dockerfile compiles the Rust library during image build: +The Dockerfile compiles the library during image build with hardened flags: ```dockerfile -RUN cargo build --release && \ - cp target/release/libone_shot_token.so /usr/local/lib/one-shot-token.so +RUN gcc -shared -fPIC -fvisibility=hidden -O2 -Wall -s \ + -o /usr/local/lib/one-shot-token.so \ + /tmp/one-shot-token.c \ + -ldl -lpthread && \ + strip --strip-unneeded /usr/local/lib/one-shot-token.so ``` ### Locally (for testing) @@ -175,6 +178,24 @@ Requires Rust toolchain (install via [rustup](https://rustup.rs/)): This builds `target/release/libone_shot_token.so` and creates a symlink `one-shot-token.so` for backwards compatibility. +### Binary Hardening + +The build applies several hardening measures to reduce reconnaissance value: + +- **XOR-obfuscated token names**: Default token names are stored as XOR-encoded byte arrays + and decoded at runtime. This prevents extraction via `strings` or `objdump -s -j .rodata`. +- **Hidden symbol visibility**: `-fvisibility=hidden` hides all internal symbols by default. + Only `getenv` and `secure_getenv` are exported (required for LD_PRELOAD interposition). +- **Stripped binary**: `-s` flag and `strip --strip-unneeded` remove the symbol table, + debug sections, and build metadata. + +To regenerate the obfuscated byte arrays after changing default token names: + +```bash +./encode-tokens.sh +# Paste the output into one-shot-token.c, replacing the OBFUSCATED_DEFAULTS section +``` + ## Testing ### Basic Test (Default Tokens) @@ -305,7 +326,7 @@ This library is one layer in AWF's security model: ## Files -- `src/lib.rs` - Library source code (Rust) -- `Cargo.toml` - Rust package configuration -- `build.sh` - Local build script +- `one-shot-token.c` - Library source code (token names are XOR-obfuscated) +- `build.sh` - Local build script (includes hardening flags and verification) +- `encode-tokens.sh` - Generates XOR-encoded byte arrays for default token names - `README.md` - This documentation diff --git a/containers/agent/one-shot-token/build.sh b/containers/agent/one-shot-token/build.sh index 6996725c..8d92c93a 100644 --- a/containers/agent/one-shot-token/build.sh +++ b/containers/agent/one-shot-token/build.sh @@ -9,26 +9,27 @@ LINK_FILE="${SCRIPT_DIR}/one-shot-token.so" echo "[build] Building one-shot-token with Cargo..." -cd "${SCRIPT_DIR}" - -# Build the release version -cargo build --release - -# Determine the output file based on platform -if [[ "$(uname)" == "Darwin" ]]; then - OUTPUT_FILE="${SCRIPT_DIR}/target/release/libone_shot_token.dylib" - echo "[build] Successfully built: ${OUTPUT_FILE} (macOS)" -else - OUTPUT_FILE="${SCRIPT_DIR}/target/release/libone_shot_token.so" - echo "[build] Successfully built: ${OUTPUT_FILE}" - - # Create symlink for backwards compatibility (Linux only) - if [[ -L "${LINK_FILE}" ]]; then - rm "${LINK_FILE}" - fi - ln -sf "target/release/libone_shot_token.so" "${LINK_FILE}" - echo "[build] Created symlink: ${LINK_FILE} -> target/release/libone_shot_token.so" -fi +# Compile as a shared library with hardened build flags: +# -shared: create a shared library +# -fPIC: position-independent code (required for shared libs) +# -fvisibility=hidden: hide all symbols by default (only getenv/secure_getenv +# are exported via __attribute__((visibility("default")))) +# -ldl: link with libdl for dlsym +# -lpthread: link with pthread for mutex +# -O2: optimize for performance +# -Wall -Wextra: enable warnings +# -s: strip symbol table and relocation info at link time +gcc -shared -fPIC \ + -fvisibility=hidden \ + -O2 -Wall -Wextra -s \ + -o "${OUTPUT_FILE}" \ + "${SOURCE_FILE}" \ + -ldl -lpthread + +# Remove remaining unneeded symbols (debug sections, build metadata) +strip --strip-unneeded "${OUTPUT_FILE}" + +echo "[build] Successfully built: ${OUTPUT_FILE}" # Verify it's a valid shared library if file "${OUTPUT_FILE}" | grep -qE "shared object|dynamically linked"; then @@ -37,3 +38,11 @@ else echo "[build] ERROR: Output is not a valid shared library" exit 1 fi + +# Verify hardening: token names should NOT appear in binary +if strings -a "${OUTPUT_FILE}" | grep -qE '(COPILOT_GITHUB_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY)'; then + echo "[build] WARNING: Cleartext token names still present in binary" + exit 1 +else + echo "[build] Verified: no cleartext token names in binary" +fi diff --git a/containers/agent/one-shot-token/encode-tokens.sh b/containers/agent/one-shot-token/encode-tokens.sh new file mode 100755 index 00000000..e59645d9 --- /dev/null +++ b/containers/agent/one-shot-token/encode-tokens.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Generate XOR-obfuscated byte arrays for default token names. +# Run this script whenever the default token list changes, then paste +# the output into one-shot-token.c (replacing the OBFUSCATED_DEFAULTS section). +# +# The obfuscation prevents token names from appearing as cleartext strings +# in the .rodata section of the compiled binary. This is NOT cryptographic +# security -- a determined attacker can reverse the XOR. The goal is to +# defeat casual reconnaissance via strings(1) / objdump. + +set -euo pipefail + +KEY=0x5A + +TOKENS=( + "COPILOT_GITHUB_TOKEN" + "GITHUB_TOKEN" + "GH_TOKEN" + "GITHUB_API_TOKEN" + "GITHUB_PAT" + "GH_ACCESS_TOKEN" + "OPENAI_API_KEY" + "OPENAI_KEY" + "ANTHROPIC_API_KEY" + "CLAUDE_API_KEY" + "CODEX_API_KEY" +) + +echo "/* --- BEGIN GENERATED OBFUSCATED DEFAULTS (key=0x$(printf '%02X' $KEY)) --- */" +echo "/* Re-generate with: containers/agent/one-shot-token/encode-tokens.sh */" +echo "#define NUM_DEFAULT_TOKENS ${#TOKENS[@]}" +echo "" + +for i in "${!TOKENS[@]}"; do + token="${TOKENS[$i]}" + printf "static const unsigned char OBF_%d[] = { " "$i" + for ((j=0; j<${#token}; j++)); do + byte=$(printf '%d' "'${token:$j:1}") + encoded=$((byte ^ KEY)) + if ((j > 0)); then + printf ", " + fi + printf "0x%02x" "$encoded" + done + printf " }; /* length=%d */\n" "${#token}" +done + +echo "" +echo "static const struct obf_entry OBFUSCATED_DEFAULTS[${#TOKENS[@]}] = {" +for i in "${!TOKENS[@]}"; do + echo " { OBF_${i}, sizeof(OBF_${i}) }," +done +echo "};" +echo "/* --- END GENERATED OBFUSCATED DEFAULTS --- */" diff --git a/containers/agent/one-shot-token/one-shot-token.c b/containers/agent/one-shot-token/one-shot-token.c index 8e452acb..48a1afd8 100644 --- a/containers/agent/one-shot-token/one-shot-token.c +++ b/containers/agent/one-shot-token/one-shot-token.c @@ -10,7 +10,12 @@ * AWF_ONE_SHOT_TOKENS - Comma-separated list of token names to protect * If not set, uses built-in defaults * - * Compile: gcc -shared -fPIC -o one-shot-token.so one-shot-token.c -ldl + * Build hardening: + * Default token names are XOR-obfuscated to prevent cleartext extraction + * via strings(1) or objdump. Internal symbols use hidden visibility. + * Binary should be stripped after compilation (see build.sh / Dockerfile). + * + * Compile: gcc -shared -fPIC -fvisibility=hidden -o one-shot-token.so one-shot-token.c -ldl * Usage: LD_PRELOAD=/path/to/one-shot-token.so ./your-program */ @@ -22,25 +27,69 @@ #include #include -/* Default sensitive token environment variable names */ -static const char *DEFAULT_SENSITIVE_TOKENS[] = { - /* GitHub tokens */ - "COPILOT_GITHUB_TOKEN", - "GITHUB_TOKEN", - "GH_TOKEN", - "GITHUB_API_TOKEN", - "GITHUB_PAT", - "GH_ACCESS_TOKEN", - /* OpenAI tokens */ - "OPENAI_API_KEY", - "OPENAI_KEY", - /* Anthropic/Claude tokens */ - "ANTHROPIC_API_KEY", - "CLAUDE_API_KEY", - /* Codex tokens */ - "CODEX_API_KEY", - NULL +/* -------------------------------------------------------------------------- + * Obfuscated default token names + * + * Token names are XOR-encoded so they do not appear as cleartext in the + * .rodata section. This is NOT cryptographic protection -- a determined + * attacker can reverse the XOR. The goal is to prevent trivial discovery + * via `strings`, `objdump -s -j .rodata`, or similar reconnaissance. + * + * Re-generate with: containers/agent/one-shot-token/encode-tokens.sh + * -------------------------------------------------------------------------- */ + +#define OBF_KEY 0x5A + +/* Entry in the obfuscated defaults table */ +struct obf_entry { + const unsigned char *data; + size_t len; +}; + +/* --- BEGIN GENERATED OBFUSCATED DEFAULTS (key=0x5A) --- */ +/* Re-generate with: containers/agent/one-shot-token/encode-tokens.sh */ +#define NUM_DEFAULT_TOKENS 11 + +static const unsigned char OBF_0[] = { 0x19, 0x15, 0x0a, 0x13, 0x16, 0x15, 0x0e, 0x05, 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=20 */ +static const unsigned char OBF_1[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=12 */ +static const unsigned char OBF_2[] = { 0x1d, 0x12, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=8 */ +static const unsigned char OBF_3[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=16 */ +static const unsigned char OBF_4[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0a, 0x1b, 0x0e }; /* length=10 */ +static const unsigned char OBF_5[] = { 0x1d, 0x12, 0x05, 0x1b, 0x19, 0x19, 0x1f, 0x09, 0x09, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=15 */ +static const unsigned char OBF_6[] = { 0x15, 0x0a, 0x1f, 0x14, 0x1b, 0x13, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=14 */ +static const unsigned char OBF_7[] = { 0x15, 0x0a, 0x1f, 0x14, 0x1b, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=10 */ +static const unsigned char OBF_8[] = { 0x1b, 0x14, 0x0e, 0x12, 0x08, 0x15, 0x0a, 0x13, 0x19, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=17 */ +static const unsigned char OBF_9[] = { 0x19, 0x16, 0x1b, 0x0f, 0x1e, 0x1f, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=14 */ +static const unsigned char OBF_10[] = { 0x19, 0x15, 0x1e, 0x1f, 0x02, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=13 */ + +static const struct obf_entry OBFUSCATED_DEFAULTS[11] = { + { OBF_0, sizeof(OBF_0) }, + { OBF_1, sizeof(OBF_1) }, + { OBF_2, sizeof(OBF_2) }, + { OBF_3, sizeof(OBF_3) }, + { OBF_4, sizeof(OBF_4) }, + { OBF_5, sizeof(OBF_5) }, + { OBF_6, sizeof(OBF_6) }, + { OBF_7, sizeof(OBF_7) }, + { OBF_8, sizeof(OBF_8) }, + { OBF_9, sizeof(OBF_9) }, + { OBF_10, sizeof(OBF_10) }, }; +/* --- END GENERATED OBFUSCATED DEFAULTS --- */ + +/** + * Decode an obfuscated entry into a newly allocated string. + * Returns NULL on allocation failure. + */ +static char *decode_obf(const struct obf_entry *entry) { + char *decoded = malloc(entry->len + 1); + if (decoded == NULL) return NULL; + for (size_t i = 0; i < entry->len; i++) { + decoded[i] = (char)(entry->data[i] ^ OBF_KEY); + } + decoded[entry->len] = '\0'; + return decoded; +} /* Maximum number of tokens we can track (for static allocation). This limit * balances memory usage with practical needs - 100 tokens should be more than @@ -112,7 +161,7 @@ static void init_token_list(void) { /* Get the configuration from environment */ const char *config = real_getenv("AWF_ONE_SHOT_TOKENS"); - + if (config != NULL && config[0] != '\0') { /* Parse comma-separated token list using strtok_r for thread safety */ char *config_copy = strdup(config); @@ -126,7 +175,7 @@ static void init_token_list(void) { while (token != NULL && num_tokens < MAX_TOKENS) { /* Trim leading whitespace */ while (*token && isspace((unsigned char)*token)) token++; - + /* Trim trailing whitespace (only if string is non-empty) */ size_t token_len = strlen(token); if (token_len > 0) { @@ -169,11 +218,11 @@ static void init_token_list(void) { return; } } - + /* Use default token list (when AWF_ONE_SHOT_TOKENS is unset, empty, or parsed to zero tokens) */ - /* Note: num_tokens should be 0 when we reach here */ - for (int i = 0; DEFAULT_SENSITIVE_TOKENS[i] != NULL && num_tokens < MAX_TOKENS; i++) { - sensitive_tokens[num_tokens] = strdup(DEFAULT_SENSITIVE_TOKENS[i]); + /* Decode obfuscated defaults at runtime */ + for (int i = 0; i < NUM_DEFAULT_TOKENS && num_tokens < MAX_TOKENS; i++) { + sensitive_tokens[num_tokens] = decode_obf(&OBFUSCATED_DEFAULTS[i]); if (sensitive_tokens[num_tokens] == NULL) { fprintf(stderr, "[one-shot-token] ERROR: Failed to allocate memory for default token name\n"); /* Clean up previously allocated tokens */ @@ -224,16 +273,16 @@ static int get_token_index(const char *name) { */ static const char *format_token_value(const char *value) { static char formatted[8]; /* "abcd..." + null terminator */ - + if (value == NULL) { return "NULL"; } - + size_t len = strlen(value); if (len == 0) { return "(empty)"; } - + if (len <= 4) { /* If 4 chars or less, just show it all with ... */ snprintf(formatted, sizeof(formatted), "%s...", value); @@ -241,7 +290,7 @@ static const char *format_token_value(const char *value) { /* Show first 4 chars + ... */ snprintf(formatted, sizeof(formatted), "%.4s...", value); } - + return formatted; } @@ -257,6 +306,7 @@ static const char *format_token_value(const char *value) { * * For all other variables: passes through to real getenv */ +__attribute__((visibility("default"))) char *getenv(const char *name) { ensure_real_getenv(); @@ -329,13 +379,53 @@ char *getenv(const char *name) { * * For all other variables: passes through to real secure_getenv (or getenv if unavailable) */ +__attribute__((visibility("default"))) char *secure_getenv(const char *name) { ensure_real_secure_getenv(); ensure_real_getenv(); if (real_secure_getenv == NULL) { return getenv(name); } - /* Simple passthrough - no mutex, no token handling. - * Token protection is handled by getenv() which is also intercepted. */ - return real_secure_getenv(name); + + int token_idx = get_token_index(name); + + /* Not a sensitive token - pass through to real secure_getenv */ + if (token_idx < 0) { + return real_secure_getenv(name); + } + + /* Sensitive token - handle cached access with secure_getenv semantics */ + pthread_mutex_lock(&token_mutex); + + char *result = NULL; + + if (!token_accessed[token_idx]) { + /* First access - get the real value using secure_getenv */ + result = real_secure_getenv(name); + + if (result != NULL) { + /* Cache the value so subsequent reads succeed after unsetenv */ + /* Note: This memory is intentionally never freed - it must persist + * for the lifetime of the process */ + token_cache[token_idx] = strdup(result); + + /* Unset the variable from the environment so /proc/self/environ is cleared */ + unsetenv(name); + + fprintf(stderr, "[one-shot-token] Token %s accessed and cached (value: %s) (via secure_getenv)\n", + name, format_token_value(token_cache[token_idx])); + + result = token_cache[token_idx]; + } + + /* Mark as accessed even if NULL (prevents repeated log messages) */ + token_accessed[token_idx] = 1; + } else { + /* Already accessed - return cached value */ + result = token_cache[token_idx]; + } + + pthread_mutex_unlock(&token_mutex); + + return result; } diff --git a/containers/agent/one-shot-token/src/lib.rs b/containers/agent/one-shot-token/src/lib.rs deleted file mode 100644 index 1472c5fb..00000000 --- a/containers/agent/one-shot-token/src/lib.rs +++ /dev/null @@ -1,403 +0,0 @@ -//! One-Shot Token LD_PRELOAD Library -//! -//! Intercepts getenv() calls for sensitive token environment variables. -//! On first access, caches the value in memory and unsets from environment. -//! Subsequent calls return the cached value, so the process can read tokens -//! multiple times while /proc/self/environ no longer exposes them. -//! -//! Configuration: -//! AWF_ONE_SHOT_TOKENS - Comma-separated list of token names to protect -//! If not set, uses built-in defaults -//! -//! Compile: cargo build --release -//! Usage: LD_PRELOAD=/path/to/libone_shot_token.so ./your-program - -use libc::{c_char, c_void}; -use once_cell::sync::Lazy; -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::ptr; -use std::sync::Mutex; - -// External declaration of the environ pointer -// This is a POSIX standard global that points to the process's environment -extern "C" { - static mut environ: *mut *mut c_char; -} - -/// Maximum number of tokens we can track -const MAX_TOKENS: usize = 100; - -/// Default sensitive token environment variable names -const DEFAULT_SENSITIVE_TOKENS: &[&str] = &[ - // GitHub tokens - "COPILOT_GITHUB_TOKEN", - "GITHUB_TOKEN", - "GH_TOKEN", - "GITHUB_API_TOKEN", - "GITHUB_PAT", - "GH_ACCESS_TOKEN", - // OpenAI tokens - "OPENAI_API_KEY", - "OPENAI_KEY", - // Anthropic/Claude tokens - "ANTHROPIC_API_KEY", - "CLAUDE_API_KEY", - // Codex tokens - "CODEX_API_KEY", -]; - -/// State for tracking tokens and their cached values -struct TokenState { - /// List of sensitive token names to protect - tokens: Vec, - /// Cached token values - stored on first access so subsequent reads succeed - /// even after the variable is unset from the environment. This allows - /// /proc/self/environ to be cleaned while the process can still read tokens. - /// Maps token name to cached C string pointer (or null if token was not set). - cache: HashMap, - /// Whether initialization has completed - initialized: bool, -} - -// SAFETY: TokenState is only accessed through a Mutex, ensuring thread safety -unsafe impl Send for TokenState {} -unsafe impl Sync for TokenState {} - -impl TokenState { - fn new() -> Self { - Self { - tokens: Vec::new(), - cache: HashMap::new(), - initialized: false, - } - } -} - -/// Global state protected by a mutex -static STATE: Lazy> = Lazy::new(|| Mutex::new(TokenState::new())); - -/// Type alias for the real getenv function -type GetenvFn = unsafe extern "C" fn(*const c_char) -> *mut c_char; - -/// Cached pointer to the real getenv function -static REAL_GETENV: Lazy = Lazy::new(|| { - // SAFETY: We're looking up a standard C library function - unsafe { - let symbol = libc::dlsym(libc::RTLD_NEXT, c"getenv".as_ptr()); - if symbol.is_null() { - eprintln!("[one-shot-token] FATAL: Could not find real getenv"); - std::process::abort(); - } - std::mem::transmute::<*mut c_void, GetenvFn>(symbol) - } -}); - -/// Cached pointer to the real secure_getenv function (may be null if unavailable) -static REAL_SECURE_GETENV: Lazy> = Lazy::new(|| { - // SAFETY: We're looking up a standard C library function - unsafe { - let symbol = libc::dlsym(libc::RTLD_NEXT, c"secure_getenv".as_ptr()); - if symbol.is_null() { - eprintln!("[one-shot-token] WARNING: secure_getenv not available, falling back to getenv"); - None - } else { - Some(std::mem::transmute::<*mut c_void, GetenvFn>(symbol)) - } - } -}); - -/// Call the real getenv function -/// -/// # Safety -/// The `name` parameter must be a valid null-terminated C string -unsafe fn call_real_getenv(name: *const c_char) -> *mut c_char { - (*REAL_GETENV)(name) -} - -/// Call the real secure_getenv function, falling back to getenv if unavailable -/// -/// # Safety -/// The `name` parameter must be a valid null-terminated C string -unsafe fn call_real_secure_getenv(name: *const c_char) -> *mut c_char { - match *REAL_SECURE_GETENV { - Some(func) => func(name), - None => call_real_getenv(name), - } -} - -/// Initialize the token list from AWF_ONE_SHOT_TOKENS or defaults -/// -/// # Safety -/// Must be called with STATE lock held -fn init_token_list(state: &mut TokenState) { - if state.initialized { - return; - } - - // Get configuration from environment - let config_cstr = CString::new("AWF_ONE_SHOT_TOKENS").unwrap(); - // SAFETY: We're calling the real getenv with a valid C string - let config_ptr = unsafe { call_real_getenv(config_cstr.as_ptr()) }; - - if !config_ptr.is_null() { - // SAFETY: config_ptr is valid if not null - let config = unsafe { CStr::from_ptr(config_ptr) }; - if let Ok(config_str) = config.to_str() { - if !config_str.is_empty() { - // Parse comma-separated token list - for token in config_str.split(',') { - let token = token.trim(); - if !token.is_empty() && state.tokens.len() < MAX_TOKENS { - state.tokens.push(token.to_string()); - } - } - - if !state.tokens.is_empty() { - eprintln!( - "[one-shot-token] Initialized with {} custom token(s) from AWF_ONE_SHOT_TOKENS", - state.tokens.len() - ); - state.initialized = true; - return; - } - - // Config was set but parsed to zero tokens - fall back to defaults - eprintln!("[one-shot-token] WARNING: AWF_ONE_SHOT_TOKENS was set but parsed to zero tokens"); - eprintln!("[one-shot-token] WARNING: Falling back to default token list to maintain protection"); - } - } - } - - // Use default token list - for token in DEFAULT_SENSITIVE_TOKENS { - if state.tokens.len() >= MAX_TOKENS { - break; - } - state.tokens.push((*token).to_string()); - } - - eprintln!( - "[one-shot-token] Initialized with {} default token(s)", - state.tokens.len() - ); - state.initialized = true; -} - -/// Check if a token name is sensitive -fn is_sensitive_token(state: &TokenState, name: &str) -> bool { - state.tokens.iter().any(|t| t == name) -} - -/// Format token value for logging: show first 4 characters + "..." -fn format_token_value(value: &str) -> String { - if value.is_empty() { - return "(empty)".to_string(); - } - - if value.len() <= 4 { - format!("{}...", value) - } else { - format!("{}...", &value[..4]) - } -} - -/// Check if a token still exists in the process environment -/// -/// This function verifies whether unsetenv() successfully cleared the token -/// by directly checking the process's environ pointer. This works correctly -/// in both chroot and non-chroot modes (reading /proc/self/environ fails in -/// chroot because it shows the host's procfs, not the chrooted process's state). -fn check_task_environ_exposure(token_name: &str) { - // SAFETY: environ is a standard POSIX global that points to the process's environment. - // It's safe to read as long as we don't hold references across modifications. - // We're only reading it after unsetenv() has completed, so the pointer is stable. - unsafe { - let mut env_ptr = environ; - if env_ptr.is_null() { - eprintln!("[one-shot-token] INFO: Token {} cleared (environ is null)", token_name); - return; - } - - // Iterate through environment variables - let token_prefix = format!("{}=", token_name); - let token_prefix_bytes = token_prefix.as_bytes(); - - while !(*env_ptr).is_null() { - let env_cstr = CStr::from_ptr(*env_ptr); - let env_bytes = env_cstr.to_bytes(); - - // Check if this entry starts with our token name - if env_bytes.len() >= token_prefix_bytes.len() - && &env_bytes[..token_prefix_bytes.len()] == token_prefix_bytes { - eprintln!( - "[one-shot-token] WARNING: Token {} still exposed in process environment", - token_name - ); - return; - } - - env_ptr = env_ptr.add(1); - } - - // Token not found in environment - success! - eprintln!( - "[one-shot-token] INFO: Token {} cleared from process environment", - token_name - ); - } -} - -/// Core implementation for cached token access -/// -/// # Safety -/// - `name` must be a valid null-terminated C string -/// - `real_getenv_fn` must be a valid function to call for getting the real value -unsafe fn handle_getenv_impl( - name: *const c_char, - real_getenv_fn: unsafe fn(*const c_char) -> *mut c_char, - via_secure: bool, -) -> *mut c_char { - // Null name - pass through - if name.is_null() { - return real_getenv_fn(name); - } - - // Convert name to Rust string for comparison - let name_cstr = CStr::from_ptr(name); - let name_str = match name_cstr.to_str() { - Ok(s) => s, - Err(_) => return real_getenv_fn(name), - }; - - // Lock state and ensure initialization - let mut state = match STATE.lock() { - Ok(guard) => guard, - Err(poisoned) => poisoned.into_inner(), - }; - - if !state.initialized { - init_token_list(&mut state); - } - - // Check if this is a sensitive token - if !is_sensitive_token(&state, name_str) { - // Not sensitive - pass through (drop lock first for performance) - drop(state); - return real_getenv_fn(name); - } - - // Sensitive token - check if already cached - if let Some(&cached_ptr) = state.cache.get(name_str) { - // Already accessed - return cached value (may be null if token wasn't set) - return cached_ptr; - } - - // First access - get the real value and cache it - let result = real_getenv_fn(name); - - if result.is_null() { - // Token not set - cache null to prevent repeated log messages - state.cache.insert(name_str.to_string(), ptr::null_mut()); - return ptr::null_mut(); - } - - // Copy the value before unsetting - let value_cstr = CStr::from_ptr(result); - let value_str = value_cstr.to_str().unwrap_or(""); - let value_bytes = value_cstr.to_bytes_with_nul(); - - // Allocate memory that will never be freed (must persist for caller's use) - let cached = libc::malloc(value_bytes.len()) as *mut c_char; - if cached.is_null() { - eprintln!("[one-shot-token] ERROR: Failed to allocate memory for token value"); - std::process::abort(); - } - - // Copy the value - ptr::copy_nonoverlapping(value_bytes.as_ptr(), cached as *mut u8, value_bytes.len()); - - // Cache the pointer so subsequent reads return the same value - state.cache.insert(name_str.to_string(), cached); - - // Unset the environment variable so it's no longer accessible - libc::unsetenv(name); - - // Verify the token was cleared from the process environment - check_task_environ_exposure(name_str); - - let suffix = if via_secure { " (via secure_getenv)" } else { "" }; - eprintln!( - "[one-shot-token] Token {} accessed and cached (value: {}){}", - name_str, format_token_value(value_str), suffix - ); - - cached -} - -/// Intercepted getenv function -/// -/// For sensitive tokens: -/// - First call: caches the value, unsets from environment, returns cached value -/// - Subsequent calls: returns the cached value from memory -/// -/// This clears tokens from /proc/self/environ while allowing the process -/// to read them multiple times via getenv(). -/// -/// For all other variables: passes through to real getenv -/// -/// # Safety -/// This function is called from C code and must maintain C ABI compatibility. -/// The `name` parameter must be a valid null-terminated C string. -#[no_mangle] -pub unsafe extern "C" fn getenv(name: *const c_char) -> *mut c_char { - handle_getenv_impl(name, call_real_getenv, false) -} - -/// Intercepted secure_getenv function -/// -/// This function preserves secure_getenv semantics (returns NULL in privileged contexts) -/// while applying the same cached token protection as getenv. -/// -/// For sensitive tokens: -/// - First call: caches the value, unsets from environment, returns cached value -/// - Subsequent calls: returns the cached value from memory -/// -/// For all other variables: passes through to real secure_getenv (or getenv if unavailable) -/// -/// # Safety -/// This function is called from C code and must maintain C ABI compatibility. -/// The `name` parameter must be a valid null-terminated C string. -#[no_mangle] -pub unsafe extern "C" fn secure_getenv(name: *const c_char) -> *mut c_char { - handle_getenv_impl(name, call_real_secure_getenv, true) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_default_tokens_defined() { - assert!(!DEFAULT_SENSITIVE_TOKENS.is_empty()); - assert!(DEFAULT_SENSITIVE_TOKENS.contains(&"GITHUB_TOKEN")); - assert!(DEFAULT_SENSITIVE_TOKENS.contains(&"OPENAI_API_KEY")); - } - - #[test] - fn test_token_state_new() { - let state = TokenState::new(); - assert!(state.tokens.is_empty()); - assert!(state.cache.is_empty()); - assert!(!state.initialized); - } - - #[test] - fn test_format_token_value() { - assert_eq!(format_token_value(""), "(empty)"); - assert_eq!(format_token_value("ab"), "ab..."); - assert_eq!(format_token_value("abcd"), "abcd..."); - assert_eq!(format_token_value("abcde"), "abcd..."); - assert_eq!(format_token_value("ghp_1234567890"), "ghp_..."); - } -}