Skip to content

Enhancement: Add S3 Configuration Validation #707

@corylanou

Description

@corylanou

Enhancement: Add S3 Configuration Validation

Overview

With the AWS SDK v2 upgrade and new configuration options, we should implement comprehensive validation of S3 configurations at startup to provide clear, actionable error messages for misconfigured settings.

Problem

Currently, configuration errors may only surface during actual replication attempts, leading to:

  • Delayed error detection
  • Cryptic error messages from the AWS SDK
  • Wasted resources attempting invalid operations
  • Difficulty debugging configuration issues

Proposed Solution

Implement validation in the S3 replica client's Init() method to check configuration validity and provide helpful error messages.

Implementation Details

1. Validation Checks to Implement

// s3/replica_client.go - enhance Init() method

func (c *ReplicaClient) Init(ctx context.Context) error {
    // 1. Required fields validation
    if c.Bucket == "" {
        return fmt.Errorf("s3: bucket name is required")
    }
    
    // 2. Bucket name validation (S3 naming rules)
    if err := validateBucketName(c.Bucket); err != nil {
        return fmt.Errorf("s3: invalid bucket name %q: %w", c.Bucket, err)
    }
    
    // 3. Region validation
    if c.Region == "" && c.Endpoint == "" {
        // Try to detect from environment
        if region := os.Getenv("AWS_REGION"); region == "" {
            if region = os.Getenv("AWS_DEFAULT_REGION"); region == "" {
                return fmt.Errorf("s3: region is required (set via config, AWS_REGION, or AWS_DEFAULT_REGION)")
            }
        }
    }
    
    // 4. Multipart upload settings validation
    if c.PartSize > 0 && c.PartSize < 5*1024*1024 {
        return fmt.Errorf("s3: part-size must be at least 5MB (got %s)", 
            humanize.Bytes(uint64(c.PartSize)))
    }
    
    if c.PartSize > 5*1024*1024*1024 {
        return fmt.Errorf("s3: part-size cannot exceed 5GB (got %s)", 
            humanize.Bytes(uint64(c.PartSize)))
    }
    
    if c.Concurrency < 0 {
        return fmt.Errorf("s3: concurrency must be positive (got %d)", c.Concurrency)
    }
    
    if c.Concurrency > 100 {
        log.Printf("s3: warning: high concurrency value %d may cause memory issues", c.Concurrency)
    }
    
    // 5. Endpoint validation
    if c.Endpoint != "" {
        if _, err := url.Parse(c.Endpoint); err != nil {
            return fmt.Errorf("s3: invalid endpoint URL %q: %w", c.Endpoint, err)
        }
        
        // Warn about non-HTTPS endpoints
        if u, _ := url.Parse(c.Endpoint); u.Scheme == "http" && !c.SkipVerify {
            log.Printf("s3: warning: using non-HTTPS endpoint %q without skip-verify", c.Endpoint)
        }
    }
    
    // 6. Authentication validation
    if err := c.validateAuth(ctx); err != nil {
        return fmt.Errorf("s3: authentication validation failed: %w", err)
    }
    
    // 7. Memory usage warning
    estimatedMemory := c.PartSize * int64(c.Concurrency)
    if estimatedMemory > 1024*1024*1024 { // > 1GB
        log.Printf("s3: warning: configuration may use up to %s of memory during uploads",
            humanize.Bytes(uint64(estimatedMemory)))
    }
    
    // Continue with existing Init logic...
    return nil
}

2. Bucket Name Validation

func validateBucketName(name string) error {
    // S3 bucket naming rules
    if len(name) < 3 || len(name) > 63 {
        return fmt.Errorf("must be between 3 and 63 characters")
    }
    
    if !regexp.MustCompile(`^[a-z0-9]`).MatchString(name) {
        return fmt.Errorf("must start with lowercase letter or number")
    }
    
    if !regexp.MustCompile(`[a-z0-9]$`).MatchString(name) {
        return fmt.Errorf("must end with lowercase letter or number")
    }
    
    if regexp.MustCompile(`[^a-z0-9.-]`).MatchString(name) {
        return fmt.Errorf("can only contain lowercase letters, numbers, hyphens, and periods")
    }
    
    if regexp.MustCompile(`\.\.`).MatchString(name) {
        return fmt.Errorf("cannot contain consecutive periods")
    }
    
    if regexp.MustCompile(`\.-|-\.`).MatchString(name) {
        return fmt.Errorf("periods and hyphens cannot be adjacent")
    }
    
    if net.ParseIP(name) != nil {
        return fmt.Errorf("cannot be formatted as an IP address")
    }
    
    return nil
}

3. Authentication Validation

func (c *ReplicaClient) validateAuth(ctx context.Context) error {
    // Check for any authentication method
    hasExplicitCreds := c.AccessKeyID != "" && c.SecretAccessKey != ""
    hasEnvCreds := os.Getenv("AWS_ACCESS_KEY_ID") != "" && 
                   os.Getenv("AWS_SECRET_ACCESS_KEY") != ""
    
    // Try to load config to see if credentials are available
    cfg, err := c.config(ctx)
    if err != nil {
        return fmt.Errorf("failed to load AWS config: %w", err)
    }
    
    // Test credentials
    creds, err := cfg.Credentials.Retrieve(ctx)
    if err != nil {
        if !hasExplicitCreds && !hasEnvCreds {
            return fmt.Errorf("no credentials found (provide access-key-id/secret-access-key, set AWS_* environment variables, or use IAM role)")
        }
        return fmt.Errorf("failed to retrieve credentials: %w", err)
    }
    
    if !creds.HasKeys() {
        return fmt.Errorf("credentials are incomplete")
    }
    
    return nil
}

4. Configuration Warnings and Suggestions

func (c *ReplicaClient) logConfigSuggestions() {
    // Performance suggestions based on configuration
    if c.PartSize == 0 && c.Concurrency == 0 {
        log.Printf("s3: using default multipart settings (part-size: 5MB, concurrency: 5)")
    }
    
    // S3-compatible service detection
    if c.Endpoint != "" {
        if strings.Contains(c.Endpoint, "minio") && !c.ForcePathStyle {
            log.Printf("s3: detected MinIO endpoint, consider setting force-path-style: true")
        }
        
        if strings.Contains(c.Endpoint, "wasabi") {
            log.Printf("s3: detected Wasabi endpoint, ensure region matches endpoint")
        }
    }
    
    // Cost optimization suggestion
    if c.PartSize < 10*1024*1024 {
        log.Printf("s3: consider increasing part-size to reduce S3 request costs")
    }
}

5. Dry-Run Validation Mode

Add a validation command to test configuration without starting replication:

// cmd/litestream/validate.go

type ValidateCommand struct{}

func (c *ValidateCommand) Run(ctx context.Context, args []string) (err error) {
    // Load configuration
    config, err := ReadConfigFile(configPath)
    if err != nil {
        return fmt.Errorf("invalid configuration: %w", err)
    }
    
    // Validate each database and replica
    for _, db := range config.DBs {
        log.Printf("Validating database: %s", db.Path)
        
        for i, replica := range db.Replicas {
            log.Printf("  Validating replica %d (type: %s)", i+1, replica.Type)
            
            client, err := replica.ReplicaClient()
            if err != nil {
                return fmt.Errorf("replica %d: %w", i+1, err)
            }
            
            if err := client.Init(ctx); err != nil {
                return fmt.Errorf("replica %d validation failed: %w", i+1, err)
            }
            
            // Optionally test connectivity
            if testConnectivity {
                if err := client.TestConnection(ctx); err != nil {
                    log.Printf("  Warning: connectivity test failed: %v", err)
                } else {
                    log.Printf("  Connectivity test passed")
                }
            }
        }
    }
    
    log.Printf("Configuration validation successful")
    return nil
}

6. Enhanced Error Messages

// Common configuration errors with helpful messages
var configErrors = map[string]string{
    "NoSuchBucket": "Bucket does not exist or you don't have access. Create the bucket or check permissions.",
    "InvalidAccessKeyId": "Access key is invalid. Check your access-key-id configuration.",
    "SignatureDoesNotMatch": "Secret key is invalid. Check your secret-access-key configuration.",
    "RequestTimeout": "Connection timed out. Check endpoint URL and network connectivity.",
    "AccessDenied": "Access denied. Check bucket permissions and IAM policies.",
}

func enhanceS3Error(err error) error {
    if err == nil {
        return nil
    }
    
    errStr := err.Error()
    for code, hint := range configErrors {
        if strings.Contains(errStr, code) {
            return fmt.Errorf("%w\nHint: %s", err, hint)
        }
    }
    
    return err
}

Testing Requirements

  1. Unit Tests: Test all validation functions with valid and invalid inputs
  2. Integration Tests: Test with real S3 and S3-compatible services
  3. Error Message Tests: Verify helpful error messages for common misconfigurations
  4. Performance Tests: Ensure validation doesn't significantly impact startup time

User Experience Improvements

  1. Clear Error Messages: Every validation error should suggest how to fix it
  2. Warnings for Suboptimal Config: Log warnings for configurations that work but could be improved
  3. Validation Command: Allow users to test configuration without starting replication
  4. Progress Indication: Show what's being validated during startup

Example Error Messages

Error: s3: bucket name is required
Hint: Add 'bucket: your-bucket-name' to your S3 replica configuration

Error: s3: invalid bucket name "My-Bucket": must contain only lowercase letters
Hint: S3 bucket names must be lowercase. Try "my-bucket" instead

Error: s3: part-size must be at least 5MB (got 1MB)
Hint: Set part-size to at least 5MB or remove it to use the default

Warning: s3: configuration may use up to 1GB of memory during uploads
Hint: Reduce part-size or concurrency if memory is limited

Error: s3: no credentials found
Hint: Provide access-key-id/secret-access-key in config, set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY environment variables, or use an IAM role

Implementation Steps

  1. Add validation functions to s3/replica_client.go
  2. Enhance Init() method with validation checks
  3. Add validate subcommand to CLI
  4. Write comprehensive tests for validation logic
  5. Update documentation with validation command usage

Priority

Medium - Improves user experience and reduces support burden

Labels

  • enhancement
  • s3
  • validation
  • user-experience

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions