Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple Service Deployer enhancements #44

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 66 additions & 43 deletions lib/restate-constructs/register-service-handler/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,53 @@ import { randomInt } from "crypto";
import * as https from "node:https";
import * as http from "node:http";

/**
* Custom Resource event shape for registering Restate Lambda service handlers with a Restate environment.
*/
export interface RegistrationProperties {
servicePath?: string;
/** Where to find the Restate admin endpoint. */
adminUrl?: string;

/**
* Optional service name to look for in the deployment. If more than one service is behind the same endpoint, any one
* should match. Leave unset to skip the check.
*/
servicePath?: string;

serviceLambdaArn?: string;

invokeRoleArn?: string;
removalPolicy?: cdk.RemovalPolicy;

/**
* Authentication token ARN to use with the admin endpoint. The secret value will be used as a bearer token, if set.
*/
authTokenSecretArn?: string;
/* Not used by the handler, purely used to trick CloudFormation to perform an update when it otherwise would not. */

/** Not used by the handler, purely used to trick CloudFormation to perform an update when it otherwise would not. */
configurationVersion?: string;
/* Whether to mark the service as private, and make it unavailable to be called via Restate ingress. */

/**
* Whether to mark the service as private, and make it unavailable to be called via Restate ingress. If there are
* multiple services provided by the endpoint, they will all be marked as specified.
*/
private?: "true" | "false";
/* Whether to trust any certificate from the admin endpoint. */

/** Whether to trust any certificate when connecting to the admin endpoint. */
insecure?: "true" | "false";

removalPolicy?: cdk.RemovalPolicy;
}

type RegisterDeploymentResponse = {
id?: string;
services?: { name?: string; revision?: number }[];
id: string;
services: { name: string; revision: number; public: boolean }[];
};

const MAX_HEALTH_CHECK_ATTEMPTS = 5; // This is intentionally quite long to allow some time for first-run EC2 and Docker boot up
const MAX_REGISTRATION_ATTEMPTS = 3;

// const INSECURE = true;

const DEPLOYMENTS_PATH = "deployments";
const SERVICES_PATH = "services";
const DEPLOYMENTS_PATH_LEGACY = "endpoints"; // temporarily fall back for legacy clusters

/**
* Custom Resource event handler for Restate service registration. This handler backs the custom resources created by
Expand Down Expand Up @@ -143,16 +162,17 @@ export const handler: Handler<CloudFormationCustomResourceEvent, void> = async f
await sleep(waitTimeMillis);
}

let deploymentsUrl = `${props.adminUrl}/${DEPLOYMENTS_PATH}`;
const deploymentsUrl = `${props.adminUrl}/${DEPLOYMENTS_PATH}`;
const registrationRequest = JSON.stringify({
arn: props.serviceLambdaArn,
assume_role_arn: props.invokeRoleArn,
});

let failureReason;
attempt = 1;
console.log(`Triggering registration at ${deploymentsUrl}: ${registrationRequest}`);
while (true) {
console.log(`Registering services at ${deploymentsUrl}: ${registrationRequest}`);

registration_retry_loop: while (true) {
try {
console.log(`Making registration request #${attempt}...`);
const controller = new AbortController();
Expand All @@ -168,48 +188,51 @@ export const handler: Handler<CloudFormationCustomResourceEvent, void> = async f
agent: agentSelector,
}).finally(() => clearTimeout(registerCallTimeout));

if (registerDeploymentResponse.status == 404 && attempt == 1) {
deploymentsUrl = `${props.adminUrl}/${DEPLOYMENTS_PATH_LEGACY}`;
console.log(`Got 404, falling back to <0.7.0 legacy endpoint registration at: ${deploymentsUrl}`);
}

if (registerDeploymentResponse.status >= 200 && registerDeploymentResponse.status < 300) {
const response = (await registerDeploymentResponse.json()) as RegisterDeploymentResponse;

// TODO: there may be more than one! support optional exact/partial matching
if (!response?.services?.find((s) => s.name === props.servicePath)) {
if (props.servicePath && !response.services.find((s) => s.name === props.servicePath)) {
failureReason =
"Restate service registration failed: service name indicated by service response" +
` ("${response?.services?.[0]?.name})) does not match the expected value ("${props.servicePath}")!`;
`"Registration succeeded, but none the services names in the deployment matched the specified name. " +
"Expected \"${props.servicePath}\"", got back: [` + response.services.map((svc) => svc?.name).join(", ");
`]`;

attempt = MAX_REGISTRATION_ATTEMPTS; // don't retry this
break;
}

console.log("Successful registration!");
console.log("Successful registration! Services: ", JSON.stringify(response.services));

const isPublic = (props.private ?? "false") === "false";
console.log(`Marking service ${props.servicePath} as ${isPublic ? "public" : "private"}...`);
const controller = new AbortController();
const privateCallTimeout = setTimeout(() => controller.abort("timeout"), 10_000);
const patchResponse = await fetch(`${props.adminUrl}/${SERVICES_PATH}/${props.servicePath}`, {
signal: controller.signal,
method: "PATCH",
headers: {
"Content-Type": "application/json",
...authHeader,
},
body: JSON.stringify({ public: isPublic }),
agent: agentSelector,
}).finally(() => clearTimeout(privateCallTimeout));

console.log(`Got patch response back: ${patchResponse.status}`);
if (patchResponse.status != 200) {
failureReason = `Marking service as ${props.private ? "private" : "public"} failed: ${patchResponse.statusText} (${patchResponse.status})`;
break; // don't throw immediately - let retry loop decide whether to abort s
}

console.log(`Successfully marked service as ${isPublic ? "public" : "private"}.`);
for (const service of response.services ?? []) {
if (service.public === isPublic) {
console.log(`Service ${service.name} is ${isPublic ? "public" : "private"}.`);
continue;
}

console.log(`Marking service ${service.name} as ${isPublic ? "public" : "private"}...`);
const controller = new AbortController();
const privateCallTimeout = setTimeout(() => controller.abort("timeout"), 10_000);
const patchResponse = await fetch(`${props.adminUrl}/${SERVICES_PATH}/${service.name}`, {
signal: controller.signal,
method: "PATCH",
headers: {
"Content-Type": "application/json",
...authHeader,
},
body: JSON.stringify({ public: isPublic }),
agent: agentSelector,
}).finally(() => clearTimeout(privateCallTimeout));

console.log(`Got patch response back: ${patchResponse.status}`);
if (patchResponse.status != 200) {
failureReason = `Marking service as ${props.private ? "private" : "public"} failed: ${patchResponse.statusText} (${patchResponse.status})`;
break registration_retry_loop; // don't throw immediately - let retry loop decide whether to abort s
}

console.log(`Successfully marked service as ${isPublic ? "public" : "private"}.`);
}

return; // Overall success!
} else {
Expand Down
136 changes: 91 additions & 45 deletions lib/restate-constructs/service-deployer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,61 @@ import { RegistrationProperties } from "./register-service-handler";

const DEFAULT_TIMEOUT = cdk.Duration.seconds(180);

export interface ServiceRegistrationProps {
/**
* Secrets Manager secret ARN for the authentication token to use when calling the admin API. Takes precedence
* over the environment's token.
*/
authToken?: secrets.ISecret;

/**
* Whether to skip granting the invoker role permission to invoke the service handler. The deployer by default
* will grant the invoker role permission to invoke the handler, but you can set this to `true` to handle this
* manually.
*/
skipInvokeFunctionGrant?: boolean;

/**
* Private services are only available to other Restate services in the same environment, and are not accessible for
* ingress-based invocation. If multiple services are exposed by the same handler, all of them will be updated.
* Default: `false`, i.e. services will be made public and reachable via ingress by default.
*
* @see https://docs.restate.dev/operate/registration#private-services
*/
private?: boolean;

/**
* A dummy parameter to force CloudFormation to update the deployment when the configuration changes. Useful if
* you want to target the "latest version" of a service handler and need to force a deployment in order to trigger
* discovery. Set this to a new value every time you want to force a service registration to happen, e.g. a timestamp.
*/
configurationVersion?: string;

/**
* Accept self-signed certificates.
*/
insecure?: boolean;

/**
* Specify a custom admin endpoint URL, overriding the one exposed by the target environment. You may need this if
* the `Environment` construct is reporting a different URL from the one that the deployer can reach, e.g. if your
* Restate service is behind a load balancer.
*/
adminUrl?: string;
}

/**
* This construct implements a custom CloudFormation resource provider that handles deploying Lambda-based service
* handlers with a Restate environment. It is used internally by the Cloud and self-hosted Restate environment
* constructs and not intended for direct use by end users of Restate.
*
* This functionality is implemented as a custom resource so that we are notified of any updates to service handler
* functions: by creating a CloudFormation component, we can model the dependency that any changes to the handlers need
* to be communicated to the registrar. Without this dependency, CloudFormation might perform an update deployment that
* triggered by a Lambda handler code or configuration change, and the Restate environment would be unaware of it.
* Register Lambda-backed restate services with Restate environments.
*
* You can share the same deployer across multiple service registries provided the configuration options are compatible
* (e.g. the Restate environments it needs to communicate with for deployment are all accessible via the same VPC and
* Security Groups, accept the same authentication token, and so on).
* You can reuse the same deployer to register the services exposed by multiple handlers. You can also reuse the
* deployer to target multiple Restate environments, provided the configuration options are compatible (e.g. the Restate
* environments it needs to communicate with are all accessible from the same VPC and Security Groups, accept the same
* authentication token, and so on). Conversely, you can create multiple deployers in cases when you need to deploy to
* multiple environments that require distinct configuration.
*
* Deployment logs are retained for 30 days by default.
*
* @see {register}
*/
export class ServiceDeployer extends Construct {
/** The custom resource provider for handling "deployment" resources. */
Expand Down Expand Up @@ -100,10 +140,33 @@ export class ServiceDeployer extends Construct {
}

/**
* Deploy a Lambda-backed Restate service to a given environment. This will register a deployment that will trigger
* a Restate registration whenever the handler resource changes.
* Deploy a Lambda-backed Restate handler to a given environment.
*
* Note that a change in the handler properties is necessary to trigger re-discovery due to how CloudFormation updates
* work. If you deploy a fixed Lambda alias such as `$LATEST` which isn't changing on every handler code or
* configuration update, you will want to set the `configurationVersion` property in `options` to a new value (e.g. a
* timestamp) to ensure an update to the Restate environment is triggered on stack deployment.
*
* @param handler service handler - must be a specific function version, use "latest" if you don't care about explicit versioning
* @param environment target Restate environment
* @param options additional options; see field documentation for details
* @see {ServiceRegistrationProps}
*/
register(handler: lambda.IVersion, environment: IRestateEnvironment, options?: ServiceRegistrationProps) {
this.registerServiceInternal(undefined, handler, environment, options);
}

/**
* Deploy a Lambda-backed Restate handler to a given environment, ensuring that a particular service name exists.
*
* @param serviceName the service name within Restate - this must match the service's self-reported name during discovery
* Note that a change in the handler properties is necessary to trigger re-discovery due to how CloudFormation updates
* work. If you deploy a fixed Lambda alias such as `$LATEST` which isn't changing on every handler code or
* configuration update, you will want to set the `configurationVersion` property in `options` to a new value (e.g. a
* timestamp) to ensure an update to the Restate environment is triggered on stack deployment.
*
* @param serviceName the service name within Restate - as a safety mechanism, this must match the service's
* self-reported name during discovery; if there are multiple services, one of them must match or the
* deployment fails
* @param handler service handler - must be a specific function version, use "latest" if you don't care about explicit versioning
* @param environment target Restate environment
* @param options additional options; see field documentation for details
Expand All @@ -112,36 +175,16 @@ export class ServiceDeployer extends Construct {
serviceName: string,
handler: lambda.IVersion,
environment: IRestateEnvironment,
options?: {
/**
* Secrets Manager secret ARN for the authentication token to use when calling the admin API. Takes precedence
* over the environment's token.
*/
authToken?: secrets.ISecret;
/**
* Whether to skip granting the invoker role permission to invoke the service handler.
*/
skipInvokeFunctionGrant?: boolean;
/**
* Whether to mark the service as private, and make it unavailable to be called via Restate ingress.
* @see https://docs.restate.dev/operate/registration#private-services
*/
private?: boolean;
/**
* A dummy parameter to force CloudFormation to update the deployment when the configuration changes. Useful if
* you want to target the "latest version" of a service handler and need to force a deployment in order to trigger
* discovery.
*/
configurationVersion?: string;
/**
* Whether to accept self-signed certificates.
*/
insecure?: boolean;
/**
* Specify a custom admin endpoint URL, overriding the one exposed by the target environment.
*/
adminUrl?: string;
},
options?: ServiceRegistrationProps,
) {
this.registerServiceInternal(serviceName, handler, environment, options);
}

private registerServiceInternal(
serviceName: string | undefined,
handler: lambda.IVersion,
environment: IRestateEnvironment,
options?: ServiceRegistrationProps,
) {
const authToken = options?.authToken ?? environment.authToken;
authToken?.grantRead(this.deploymentResourceProvider.onEventHandler);
Expand All @@ -157,7 +200,10 @@ export class ServiceDeployer extends Construct {
invokeRoleArn: environment.invokerRole?.roleArn,
removalPolicy: cdk.RemovalPolicy.RETAIN,
private: (options?.private ?? false).toString() as "true" | "false",
configurationVersion: options?.configurationVersion,
configurationVersion:
options?.configurationVersion || handler.functionArn.endsWith(":$LATEST")
? new Date().toISOString()
: undefined,
insecure: (options?.insecure ?? false).toString() as "true" | "false",
} satisfies RegistrationProperties,
});
Expand Down
2 changes: 0 additions & 2 deletions test/__snapshots__/restate-constructs.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,6 @@ exports[`Restate constructs Deploy a Lambda service handler to Restate Cloud env
'Fn::GetAtt':
- ServiceDeployerCustomResourceProviderframeworkonEvent528FE6C2
- Arn
servicePath: Service
adminUrl: 'https://test.env.us.restate.cloud:9070'
authTokenSecretArn:
'Fn::Join':
Expand Down Expand Up @@ -1513,7 +1512,6 @@ exports[`Restate constructs Deploy a Lambda service handler to existing Restate
'Fn::GetAtt':
- ServiceDeployerCustomResourceProviderframeworkonEvent528FE6C2
- Arn
servicePath: Service
adminUrl: 'https://restate.example.com:9070'
authTokenSecretArn:
Ref: RestateApiKey6463672F
Expand Down
4 changes: 2 additions & 2 deletions test/restate-constructs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ describe("Restate constructs", () => {
// only needed in testing, where the relative path of the registration function is different from how customers would use it
entry: "dist/register-service-handler/index.js",
});
serviceDeployer.deployService("Service", handler.currentVersion, cloudEnvironment);
serviceDeployer.register(handler.currentVersion, cloudEnvironment);

expect(stack).toMatchCdkSnapshot({
ignoreAssets: true,
Expand Down Expand Up @@ -71,7 +71,7 @@ describe("Restate constructs", () => {
// only needed in testing, where the relative path of the registration function is different from how customers would use it
entry: "dist/register-service-handler/index.js",
});
serviceDeployer.deployService("Service", handler.currentVersion, restateEnvironment);
serviceDeployer.register(handler.currentVersion, restateEnvironment);

expect(stack).toMatchCdkSnapshot({
ignoreAssets: true,
Expand Down