-
Notifications
You must be signed in to change notification settings - Fork 326
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Enhancement: Add S3 Configuration Validation
Overview
With the AWS SDK v2 upgrade and new configuration options, we should implement comprehensive validation of S3 configurations at startup to provide clear, actionable error messages for misconfigured settings.
Problem
Currently, configuration errors may only surface during actual replication attempts, leading to:
- Delayed error detection
- Cryptic error messages from the AWS SDK
- Wasted resources attempting invalid operations
- Difficulty debugging configuration issues
Proposed Solution
Implement validation in the S3 replica client's Init() method to check configuration validity and provide helpful error messages.
Implementation Details
1. Validation Checks to Implement
// s3/replica_client.go - enhance Init() method
func (c *ReplicaClient) Init(ctx context.Context) error {
// 1. Required fields validation
if c.Bucket == "" {
return fmt.Errorf("s3: bucket name is required")
}
// 2. Bucket name validation (S3 naming rules)
if err := validateBucketName(c.Bucket); err != nil {
return fmt.Errorf("s3: invalid bucket name %q: %w", c.Bucket, err)
}
// 3. Region validation
if c.Region == "" && c.Endpoint == "" {
// Try to detect from environment
if region := os.Getenv("AWS_REGION"); region == "" {
if region = os.Getenv("AWS_DEFAULT_REGION"); region == "" {
return fmt.Errorf("s3: region is required (set via config, AWS_REGION, or AWS_DEFAULT_REGION)")
}
}
}
// 4. Multipart upload settings validation
if c.PartSize > 0 && c.PartSize < 5*1024*1024 {
return fmt.Errorf("s3: part-size must be at least 5MB (got %s)",
humanize.Bytes(uint64(c.PartSize)))
}
if c.PartSize > 5*1024*1024*1024 {
return fmt.Errorf("s3: part-size cannot exceed 5GB (got %s)",
humanize.Bytes(uint64(c.PartSize)))
}
if c.Concurrency < 0 {
return fmt.Errorf("s3: concurrency must be positive (got %d)", c.Concurrency)
}
if c.Concurrency > 100 {
log.Printf("s3: warning: high concurrency value %d may cause memory issues", c.Concurrency)
}
// 5. Endpoint validation
if c.Endpoint != "" {
if _, err := url.Parse(c.Endpoint); err != nil {
return fmt.Errorf("s3: invalid endpoint URL %q: %w", c.Endpoint, err)
}
// Warn about non-HTTPS endpoints
if u, _ := url.Parse(c.Endpoint); u.Scheme == "http" && !c.SkipVerify {
log.Printf("s3: warning: using non-HTTPS endpoint %q without skip-verify", c.Endpoint)
}
}
// 6. Authentication validation
if err := c.validateAuth(ctx); err != nil {
return fmt.Errorf("s3: authentication validation failed: %w", err)
}
// 7. Memory usage warning
estimatedMemory := c.PartSize * int64(c.Concurrency)
if estimatedMemory > 1024*1024*1024 { // > 1GB
log.Printf("s3: warning: configuration may use up to %s of memory during uploads",
humanize.Bytes(uint64(estimatedMemory)))
}
// Continue with existing Init logic...
return nil
}2. Bucket Name Validation
func validateBucketName(name string) error {
// S3 bucket naming rules
if len(name) < 3 || len(name) > 63 {
return fmt.Errorf("must be between 3 and 63 characters")
}
if !regexp.MustCompile(`^[a-z0-9]`).MatchString(name) {
return fmt.Errorf("must start with lowercase letter or number")
}
if !regexp.MustCompile(`[a-z0-9]$`).MatchString(name) {
return fmt.Errorf("must end with lowercase letter or number")
}
if regexp.MustCompile(`[^a-z0-9.-]`).MatchString(name) {
return fmt.Errorf("can only contain lowercase letters, numbers, hyphens, and periods")
}
if regexp.MustCompile(`\.\.`).MatchString(name) {
return fmt.Errorf("cannot contain consecutive periods")
}
if regexp.MustCompile(`\.-|-\.`).MatchString(name) {
return fmt.Errorf("periods and hyphens cannot be adjacent")
}
if net.ParseIP(name) != nil {
return fmt.Errorf("cannot be formatted as an IP address")
}
return nil
}3. Authentication Validation
func (c *ReplicaClient) validateAuth(ctx context.Context) error {
// Check for any authentication method
hasExplicitCreds := c.AccessKeyID != "" && c.SecretAccessKey != ""
hasEnvCreds := os.Getenv("AWS_ACCESS_KEY_ID") != "" &&
os.Getenv("AWS_SECRET_ACCESS_KEY") != ""
// Try to load config to see if credentials are available
cfg, err := c.config(ctx)
if err != nil {
return fmt.Errorf("failed to load AWS config: %w", err)
}
// Test credentials
creds, err := cfg.Credentials.Retrieve(ctx)
if err != nil {
if !hasExplicitCreds && !hasEnvCreds {
return fmt.Errorf("no credentials found (provide access-key-id/secret-access-key, set AWS_* environment variables, or use IAM role)")
}
return fmt.Errorf("failed to retrieve credentials: %w", err)
}
if !creds.HasKeys() {
return fmt.Errorf("credentials are incomplete")
}
return nil
}4. Configuration Warnings and Suggestions
func (c *ReplicaClient) logConfigSuggestions() {
// Performance suggestions based on configuration
if c.PartSize == 0 && c.Concurrency == 0 {
log.Printf("s3: using default multipart settings (part-size: 5MB, concurrency: 5)")
}
// S3-compatible service detection
if c.Endpoint != "" {
if strings.Contains(c.Endpoint, "minio") && !c.ForcePathStyle {
log.Printf("s3: detected MinIO endpoint, consider setting force-path-style: true")
}
if strings.Contains(c.Endpoint, "wasabi") {
log.Printf("s3: detected Wasabi endpoint, ensure region matches endpoint")
}
}
// Cost optimization suggestion
if c.PartSize < 10*1024*1024 {
log.Printf("s3: consider increasing part-size to reduce S3 request costs")
}
}5. Dry-Run Validation Mode
Add a validation command to test configuration without starting replication:
// cmd/litestream/validate.go
type ValidateCommand struct{}
func (c *ValidateCommand) Run(ctx context.Context, args []string) (err error) {
// Load configuration
config, err := ReadConfigFile(configPath)
if err != nil {
return fmt.Errorf("invalid configuration: %w", err)
}
// Validate each database and replica
for _, db := range config.DBs {
log.Printf("Validating database: %s", db.Path)
for i, replica := range db.Replicas {
log.Printf(" Validating replica %d (type: %s)", i+1, replica.Type)
client, err := replica.ReplicaClient()
if err != nil {
return fmt.Errorf("replica %d: %w", i+1, err)
}
if err := client.Init(ctx); err != nil {
return fmt.Errorf("replica %d validation failed: %w", i+1, err)
}
// Optionally test connectivity
if testConnectivity {
if err := client.TestConnection(ctx); err != nil {
log.Printf(" Warning: connectivity test failed: %v", err)
} else {
log.Printf(" Connectivity test passed")
}
}
}
}
log.Printf("Configuration validation successful")
return nil
}6. Enhanced Error Messages
// Common configuration errors with helpful messages
var configErrors = map[string]string{
"NoSuchBucket": "Bucket does not exist or you don't have access. Create the bucket or check permissions.",
"InvalidAccessKeyId": "Access key is invalid. Check your access-key-id configuration.",
"SignatureDoesNotMatch": "Secret key is invalid. Check your secret-access-key configuration.",
"RequestTimeout": "Connection timed out. Check endpoint URL and network connectivity.",
"AccessDenied": "Access denied. Check bucket permissions and IAM policies.",
}
func enhanceS3Error(err error) error {
if err == nil {
return nil
}
errStr := err.Error()
for code, hint := range configErrors {
if strings.Contains(errStr, code) {
return fmt.Errorf("%w\nHint: %s", err, hint)
}
}
return err
}Testing Requirements
- Unit Tests: Test all validation functions with valid and invalid inputs
- Integration Tests: Test with real S3 and S3-compatible services
- Error Message Tests: Verify helpful error messages for common misconfigurations
- Performance Tests: Ensure validation doesn't significantly impact startup time
User Experience Improvements
- Clear Error Messages: Every validation error should suggest how to fix it
- Warnings for Suboptimal Config: Log warnings for configurations that work but could be improved
- Validation Command: Allow users to test configuration without starting replication
- Progress Indication: Show what's being validated during startup
Example Error Messages
Error: s3: bucket name is required
Hint: Add 'bucket: your-bucket-name' to your S3 replica configuration
Error: s3: invalid bucket name "My-Bucket": must contain only lowercase letters
Hint: S3 bucket names must be lowercase. Try "my-bucket" instead
Error: s3: part-size must be at least 5MB (got 1MB)
Hint: Set part-size to at least 5MB or remove it to use the default
Warning: s3: configuration may use up to 1GB of memory during uploads
Hint: Reduce part-size or concurrency if memory is limited
Error: s3: no credentials found
Hint: Provide access-key-id/secret-access-key in config, set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY environment variables, or use an IAM role
Implementation Steps
- Add validation functions to
s3/replica_client.go - Enhance
Init()method with validation checks - Add
validatesubcommand to CLI - Write comprehensive tests for validation logic
- Update documentation with validation command usage
Priority
Medium - Improves user experience and reduces support burden
Labels
- enhancement
- s3
- validation
- user-experience
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request