Skip to content

Commit

Permalink
[PLAT-13910] Improve IAM credentials fetch logging and add retries
Browse files Browse the repository at this point in the history
Summary:
- Improved logging for IAM credentials fetch code. Also added retries to mitigate quick network blip issues.
- We'll now do 10 retries with a sleep time of 5 seconds between each.

- Updated logs for failure scenario looks like the following:

```
2024-09-09T16:24:03.636Z  [debug] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:124 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Fetching IAM credentials failed, will retry after 5 seconds
2024-09-09T16:24:08.637Z  [info] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:113 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Loading credentials from: WebIdentityTokenCredentialsProvider
2024-09-09T16:24:08.638Z  [warn] 6b3174ee-3990-4b02-b008-843e7b953793 ApacheConnectionManagerFactory.java:142 [application-pekko.actor.default-dispatcher-14] com.amazonaws.http.AmazonHttpClient SSL Certificate checking for endpoints has been explicitly disabled.
2024-09-09T16:24:08.639Z  [info] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:113 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Loading credentials from: ProfileCredentialsProvider
2024-09-09T16:24:08.639Z  [warn] 6b3174ee-3990-4b02-b008-843e7b953793 CredentialsLegacyConfigLocationProvider.java:45 [application-pekko.actor.default-dispatcher-14] com.amazonaws.profile.path.cred.CredentialsLegacyConfigLocationProvider Found the legacy config profiles file at [/home/centos/.aws/config]. Please move it to the latest default location [~/.aws/credentials].
2024-09-09T16:24:08.640Z  [info] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:113 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Loading credentials from: AssumeInstanceRole
2024-09-09T16:24:08.641Z  [warn] 6b3174ee-3990-4b02-b008-843e7b953793 ApacheConnectionManagerFactory.java:142 [application-pekko.actor.default-dispatcher-14] com.amazonaws.http.AmazonHttpClient SSL Certificate checking for endpoints has been explicitly disabled.
2024-09-09T16:24:08.644Z  [debug] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:174 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Could not get maximum duration for role arn: null. Using default 1 hour instead.
2024-09-09T16:24:08.645Z  [info] 6b3174ee-3990-4b02-b008-843e7b953793 IAMTemporaryCredentialsProvider.java:113 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.IAMTemporaryCredentialsProvider Loading credentials from: EC2ContainerCredentialsProvider
2024-09-09T16:24:08.647Z  [error] 6b3174ee-3990-4b02-b008-843e7b953793 AWSUtil.java:310 [application-pekko.actor.default-dispatcher-14] com.yugabyte.yw.common.AWSUtil Fetching IAM credentials failed: Unable to load AWS credentials: [Source 'WebIdentityTokenCredentialsProvider': AWS_ROLE_ARN: blank variable value., Source 'ProfileCredentialsProvider': Unable to load credentials into profile [default]: AWS Access Key ID is not specified., Source 'AssumeInstanceRole': Forbidden (Service: null; Status Code: 403; Error Code: null; Request ID: null; Proxy: null), Source 'EC2ContainerCredentialsProvider': Forbidden (Service: null; Status Code: 403; Error Code: null; Request ID: null; Proxy: null)]
```

Test Plan: Tested manually on my IAM enabled dev-server by disabling EC2 metadata service.

Reviewers: sneelakantan

Reviewed By: sneelakantan

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D37908
  • Loading branch information
kv83821-yb committed Sep 10, 2024
1 parent aa41478 commit bcf7f47
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 67 deletions.
7 changes: 4 additions & 3 deletions managed/src/main/java/com/yugabyte/yw/common/AWSUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@
@Singleton
@Slf4j
public class AWSUtil implements CloudUtil {
@Inject IAMTemporaryCredentialsProvider iamCredsProvider;
@Inject CustomCAStoreManager customCAStoreManager;
@Inject RuntimeConfGetter runtimeConfGetter;
@Inject AWSCloudImpl awsCloudImpl;
Expand Down Expand Up @@ -305,7 +304,8 @@ public AmazonS3 createS3Client(CustomerConfigStorageS3Data s3Data, String region
try {
s3ClientBuilder.withCredentials(
new AWSStaticCredentialsProvider(
new IAMTemporaryCredentialsProvider().getTemporaryCredentials(s3Data)));
(new IAMTemporaryCredentialsProvider(runtimeConfGetter))
.getTemporaryCredentials(s3Data)));
} catch (Exception e) {
log.error("Fetching IAM credentials failed: {}", e.getMessage());
throw new PlatformServiceException(PRECONDITION_FAILED, e.getMessage());
Expand Down Expand Up @@ -671,7 +671,8 @@ private Map<String, String> createCredsMapYbc(
private void fillMapWithIAMCreds(
Map<String, String> s3CredsMap, CustomerConfigStorageS3Data s3Data) {
try {
AWSCredentials creds = iamCredsProvider.getTemporaryCredentials(s3Data);
AWSCredentials creds =
(new IAMTemporaryCredentialsProvider(runtimeConfGetter)).getTemporaryCredentials(s3Data);
s3CredsMap.put(YBC_AWS_ACCESS_KEY_ID_FIELDNAME, creds.getAWSAccessKeyId());
s3CredsMap.put(YBC_AWS_SECRET_ACCESS_KEY_FIELDNAME, creds.getAWSSecretKey());
if (creds instanceof AWSSessionCredentials) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// Copyright (c) YugaByte, Inc.

package com.yugabyte.yw.common;

import com.amazonaws.SdkClientException;
Expand All @@ -23,27 +25,39 @@
import com.amazonaws.services.securitytoken.model.Credentials;
import com.amazonaws.services.securitytoken.model.GetCallerIdentityRequest;
import com.amazonaws.services.securitytoken.model.GetCallerIdentityResult;
import com.google.inject.Singleton;
import com.yugabyte.yw.common.config.RuntimeConfGetter;
import com.yugabyte.yw.models.configs.data.CustomerConfigStorageS3Data;
import com.yugabyte.yw.models.configs.data.CustomerConfigStorageS3Data.IAMConfiguration;
import java.io.File;
import java.util.Optional;
import java.util.stream.Stream;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;

@Slf4j
@Singleton
public class IAMTemporaryCredentialsProvider {

public static final String STS_DEFAULT_ENDPOINT = "sts.amazonaws.com";
private static final String NUM_RETRIES_CONFIG_PATH = "yb.aws.iam_credentials_num_retries";
private static final int WAIT_EACH_RETRY_SECS = 5;

private final RuntimeConfGetter confGetter;

public IAMTemporaryCredentialsProvider(RuntimeConfGetter confGetter) {
this.confGetter = confGetter;
}

public enum IAMCredentialSource {
// Try all in order WEB_TOKEN > IAM_USER > ASSUME_INSTANCE_ROLE > EC2_INSTANCE
DEFAULT("default"),
// ProfileCredentialsProvider
IAM_USER("iam_user"),
// WebIdentityTokenCredentialsProvider
WEB_TOKEN("web_token"),
// AssumeInstanceRole
ASSUME_INSTANCE_ROLE("assume_instance_role"),
// EC2ContainerCredentialsProvider
EC2_INSTANCE("ec2_instance");

private final String enumVal;
Expand All @@ -57,64 +71,83 @@ public String getValue() {
}
}

public AWSCredentials getTemporaryCredentials(CustomerConfigStorageS3Data s3Data)
throws Exception {
return CustomAWSCredentials.getTemporaryCredentials(s3Data);
}

private interface CustomAWSCredentials {
AWSCredentials getCredentialsOrFail() throws Exception;

default Optional<AWSCredentials> optionalGetCredentials() {
try {
return Optional.of(getCredentialsOrFail());
} catch (Exception e) {
return Optional.empty();
}
public AWSCredentials getTemporaryCredentials(CustomerConfigStorageS3Data s3Data) {
List<CustomAWSCredentials> credentialsSource = new ArrayList<>();
switch (s3Data.iamConfig.credentialSource) {
case ASSUME_INSTANCE_ROLE:
credentialsSource.add(new AssumeInstanceRole(s3Data.iamConfig, s3Data.fallbackRegion));
break;
case EC2_INSTANCE:
credentialsSource.add(new InstanceProfileCredentials());
break;
case IAM_USER:
credentialsSource.add(new IAMUserCredentials(s3Data.iamConfig.iamUserProfile));
break;
case WEB_TOKEN:
credentialsSource.add(
new AssumeRoleWithWebIdentity(s3Data.iamConfig, s3Data.fallbackRegion));
break;
case DEFAULT:
log.debug(
"Trying chain of credential providers in order: WebIdentityTokenCredentialsProvider,"
+ " ProfileCredentialsProvider, AssumeInstanceRole,"
+ " EC2ContainerCredentialsProvider");
credentialsSource.add(
new AssumeRoleWithWebIdentity(s3Data.iamConfig, s3Data.fallbackRegion));
credentialsSource.add(new IAMUserCredentials(s3Data.iamConfig.iamUserProfile));
credentialsSource.add(new AssumeInstanceRole(s3Data.iamConfig, s3Data.fallbackRegion));
credentialsSource.add(new InstanceProfileCredentials());
break;
default:
throw new RuntimeException(
String.format(
"Invalid IAM credential source option %s", s3Data.iamConfig.credentialSource));
}

static AWSCredentials getTemporaryCredentials(CustomerConfigStorageS3Data s3Data) {
try {
switch (s3Data.iamConfig.credentialSource) {
case ASSUME_INSTANCE_ROLE:
return new AssumeInstanceRole(s3Data.iamConfig, s3Data.fallbackRegion)
.getCredentialsOrFail();
case EC2_INSTANCE:
return new InstanceProfileCredentials().getCredentialsOrFail();
case IAM_USER:
return new IAMUserCredentials(s3Data.iamConfig.iamUserProfile).getCredentialsOrFail();
case WEB_TOKEN:
return new AssumeRoleWithWebIdentity(s3Data.iamConfig, s3Data.fallbackRegion)
.getCredentialsOrFail();
case DEFAULT:
Optional<AWSCredentials> creds =
Stream.of(
new AssumeRoleWithWebIdentity(s3Data.iamConfig, s3Data.fallbackRegion),
new IAMUserCredentials(s3Data.iamConfig.iamUserProfile),
new AssumeInstanceRole(s3Data.iamConfig, s3Data.fallbackRegion),
new InstanceProfileCredentials())
.map(CustomAWSCredentials::optionalGetCredentials)
.filter(Optional::isPresent)
.findFirst()
.get();
if (creds.isPresent()) {
return creds.get();
} else {
throw new RuntimeException("No credential found in chain.");
}
default:
throw new RuntimeException("Invalid IAM credential source option");
int numRetries = confGetter.getStaticConf().getInt(NUM_RETRIES_CONFIG_PATH);
List<String> errorMessages = new ArrayList<>();
while (numRetries > 0) {
errorMessages.clear();
// Loop through credential sources and get working credentials
for (CustomAWSCredentials credentialSource : credentialsSource) {
try {
log.info("Loading IAM credentials from: {}", credentialSource.getSourceName());
AWSCredentials credentials = credentialSource.getCredentialsOrFail();
log.info("Found IAM credentials in: {}", credentialSource.getSourceName());
return credentials;
} catch (Exception e) {
String message = "Source '" + credentialSource.getSourceName() + "': " + e.getMessage();
errorMessages.add(message);
}
}
numRetries--;
if (numRetries > 0) {
log.debug("Fetching IAM credentials failed, will retry after 5 seconds");
try {
Thread.sleep(WAIT_EACH_RETRY_SECS * 1000);
} catch (InterruptedException e) {
throw new RuntimeException("Thread interrupted while sleeping!");
}
} catch (Exception e) {
throw new RuntimeException(e.getCause());
}
}
throw new RuntimeException("Unable to load AWS credentials: " + errorMessages);
}

private interface CustomAWSCredentials {
AWSCredentials getCredentialsOrFail() throws Exception;

String getSourceName();
}

private static class AssumeInstanceRole implements CustomAWSCredentials {
IAMConfiguration iamConfig;
String signingRegion;

@Override
public String getSourceName() {
return "AssumeInstanceRole";
}

public AssumeInstanceRole(IAMConfiguration iamConfig, String signingRegion) {
this.iamConfig = iamConfig;
this.signingRegion = signingRegion;
Expand Down Expand Up @@ -144,7 +177,7 @@ public AWSCredentials getCredentialsOrFail() throws SdkClientException {
roleArn = iamRole.getArn();
maxDuration = iamRole.getMaxSessionDuration();
} catch (Exception e) {
log.error(
log.debug(
"Could not get maximum duration for role arn: {}. Using default 1 hour instead.",
roleArn);
}
Expand Down Expand Up @@ -181,13 +214,18 @@ private static class AssumeRoleWithWebIdentity implements CustomAWSCredentials {
IAMConfiguration iamConfig;
String signingRegion;

@Override
public String getSourceName() {
return "WebIdentityTokenCredentialsProvider";
}

public AssumeRoleWithWebIdentity(IAMConfiguration iamConfig, String signingRegion) {
this.iamConfig = iamConfig;
this.signingRegion = signingRegion;
}

@Override
public AWSCredentials getCredentialsOrFail() throws Exception {
public AWSCredentials getCredentialsOrFail() throws SdkClientException {
String webIdentityRoleArn = null;
String webToken = null;
int maxDuration = iamConfig.duration;
Expand All @@ -200,14 +238,9 @@ public AWSCredentials getCredentialsOrFail() throws Exception {
// Fetch AWS_ROLE_ARN from environment( Yugaware is required to have it if service account IAM
// set).
// This is how default chain fetches it.
try {
webIdentityRoleArn = System.getenv("AWS_ROLE_ARN");
if (StringUtils.isBlank(webIdentityRoleArn)) {
throw new RuntimeException("AWS_ROLE_ARN: blank variable value.");
}
} catch (Exception e) {
throw new RuntimeException(
"AWS_ROLE_ARN not found for Web Identity assume role.", e.getCause());
webIdentityRoleArn = System.getenv("AWS_ROLE_ARN");
if (StringUtils.isBlank(webIdentityRoleArn)) {
throw new RuntimeException("AWS_ROLE_ARN: blank variable value.");
}

// Fetch web-token for making the Assume role request.
Expand All @@ -217,7 +250,7 @@ public AWSCredentials getCredentialsOrFail() throws Exception {
new File(System.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")), "UTF-8");
} catch (Exception e) {
throw new RuntimeException(
"Could not get web token for Assume role request.", e.getCause());
String.format("Fetching Web Identity token failed: %s", e.getMessage()));
}

try {
Expand All @@ -233,7 +266,7 @@ public AWSCredentials getCredentialsOrFail() throws Exception {
.getRole();
maxDuration = iamRole.getMaxSessionDuration();
} catch (Exception e) {
log.error(
log.debug(
"Could not get maximum duration for role arn: {}. Using default 1 hour instead.",
webIdentityRoleArn);
}
Expand Down Expand Up @@ -271,6 +304,11 @@ private static class InstanceProfileCredentials implements CustomAWSCredentials

public InstanceProfileCredentials() {}

@Override
public String getSourceName() {
return "EC2ContainerCredentialsProvider";
}

@Override
public AWSCredentials getCredentialsOrFail() throws Exception {
AWSCredentialsProvider ec2CredentialsProvider = new EC2ContainerCredentialsProviderWrapper();
Expand All @@ -283,6 +321,11 @@ private static class IAMUserCredentials implements CustomAWSCredentials {

String profileName;

@Override
public String getSourceName() {
return "ProfileCredentialsProvider";
}

public IAMUserCredentials(String profileName) {
this.profileName = profileName;
}
Expand All @@ -293,7 +336,7 @@ public AWSCredentials getCredentialsOrFail() throws Exception {
}
}

public static AWSSecurityTokenServiceClientBuilder getStandardSTSClientWithoutCredentials(
private static AWSSecurityTokenServiceClientBuilder getStandardSTSClientWithoutCredentials(
String fallbackRegion, boolean regionalSTS) {
fallbackRegion = AWSUtil.getClientRegion(fallbackRegion);
String stsEndpoint = STS_DEFAULT_ENDPOINT;
Expand Down
1 change: 1 addition & 0 deletions managed/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ yb {
}

enable_imdsv2_support = true
iam_credentials_num_retries = 10

disk_resize_cooldown_hours = 6
}
Expand Down

0 comments on commit bcf7f47

Please sign in to comment.