Skip to content

Commit b285666

Browse files
authored
Adding Sagemaker pipeline sample (#272)
Issue #, if available: Description of changes: Adding a Sagemaker pipeline example for creating and executing a pipeline run. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent de8e0f4 commit b285666

File tree

4 files changed

+212
-0
lines changed

4 files changed

+212
-0
lines changed

samples/pipeline/README.md

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Pipeline Sample
2+
3+
This sample demonstrates how to submit a pipeline to Sagemaker for execution using your own JSON pipeline definition, using the AWS Controllers for Kubernetes (ACK) service controller for Amazon SageMaker.
4+
5+
## Prerequisites
6+
7+
This sample assumes that you have completed the [common prerequisites](/samples/README.md).
8+
9+
### Updating the Pipeline Specification
10+
11+
In the `pipeline.yaml` file, modify the placeholder values with those associated with your account.
12+
13+
## Submitting your Pipeline Specification
14+
15+
### Modify/Create a JSON pipeline definition
16+
17+
Create the JSON pipeline definition using the JSON schema documented at https://aws-sagemaker-mlops.github.io/sagemaker-model-building-pipeline-definition-JSON-schema/. In this sample, you are provided a sample pipeline definition with one Training step.
18+
19+
There are two ways to modify the *.spec.pipelineDefinition* key in the Kubernetes YAML spec. Choose one:
20+
21+
Option 1: You can pass JSON pipeline definition inline as a JSON object. Example of this option is included in the `pipeline.yaml` file.
22+
23+
Option 2: You can convert your JSON pipeline definition into String format. You may use online third-party tools to convert from JSON to String format.
24+
25+
### Submit pipeline to Sagemaker and start an execution
26+
27+
To submit your prepared pipeline specification, apply the specification to your Kubernetes cluster as such:
28+
```
29+
$ kubectl apply -f my-pipeline.yaml
30+
pipeline.sagemaker.services.k8s.aws/my-kubernetes-pipeline created
31+
```
32+
To start an execution run of the pipeline:
33+
```
34+
$ kubectl apply -f pipeline-execution.yaml
35+
pipelineexecution.sagemaker.services.k8s.aws/my-kubernetes-pipeline-execution created
36+
```
37+
38+
### List pipelines and pipeline executions
39+
40+
To list all pipelines created using the ACK controller use the following command:
41+
```
42+
$ kubectl get pipeline
43+
```
44+
If it is a pipeline executions it is endpointsconfigs.sagemaker.services.k8s.aws
45+
```
46+
$ kubectl get pipelineexecution
47+
```
48+
49+
### Describe a pipeline and pipeline execution
50+
51+
To get more details about the pipeline once it's submitted, like checking the status, errors or parameters of the pipeline, use the following command:
52+
```
53+
$ kubectl describe pipeline my-kubernetes-pipeline
54+
```
55+
56+
If it is a endpoint config it is endpointsconfigs.sagemaker.services.k8s.aws
57+
```
58+
$ kubectl describe pipelineexecution my-kubernetes-pipeline-execution
59+
```
60+
61+
### Delete a pipeline and a pipeline execution
62+
63+
To delete the pipeline, use the following command:
64+
```
65+
$ kubectl delete pipeline my-kubernetes-pipeline
66+
```
67+
68+
If it is a endpoint config it is endpointsconfigs.sagemaker.services.k8s.aws
69+
```
70+
$ kubectl delete pipelineexecution my-kubernetes-pipeline-execution
71+
```

samples/pipeline/my-pipeline.json

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"Version": "2020-12-01",
3+
"Steps": [
4+
{
5+
"Name": "AbaloneTrain",
6+
"Type": "Training",
7+
"Arguments": {
8+
"RoleArn": "<YOUR_SAGEMAKER_ROLE_ARN>",
9+
"HyperParameters": {
10+
"max_depth": "5",
11+
"gamma": "4",
12+
"eta": "0.2",
13+
"min_child_weight": "6",
14+
"objective": "multi:softmax",
15+
"num_class": "10",
16+
"num_round": "10"
17+
},
18+
"AlgorithmSpecification": {
19+
"TrainingImage": "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1",
20+
"TrainingInputMode": "File"
21+
},
22+
"OutputDataConfig": {
23+
"S3OutputPath": "s3://<YOUR_BUCKET_NAME>/sagemaker/"
24+
},
25+
"ResourceConfig": {
26+
"InstanceCount": 1,
27+
"InstanceType": "ml.m4.xlarge",
28+
"VolumeSizeInGB": 5
29+
},
30+
"StoppingCondition": {
31+
"MaxRuntimeInSeconds": 86400
32+
},
33+
"InputDataConfig": [
34+
{
35+
"ChannelName": "train",
36+
"DataSource": {
37+
"S3DataSource": {
38+
"S3DataType": "S3Prefix",
39+
"S3Uri": "s3://<YOUR_BUCKET_NAME>/sagemaker/xgboost/train/",
40+
"S3DataDistributionType": "FullyReplicated"
41+
}
42+
},
43+
"ContentType": "text/libsvm"
44+
},
45+
{
46+
"ChannelName": "validation",
47+
"DataSource": {
48+
"S3DataSource": {
49+
"S3DataType": "S3Prefix",
50+
"S3Uri": "s3://<YOUR_BUCKET_NAME>/sagemaker/xgboost/validation/",
51+
"S3DataDistributionType": "FullyReplicated"
52+
}
53+
},
54+
"ContentType": "text/libsvm"
55+
}
56+
]
57+
}
58+
}
59+
]
60+
}
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: sagemaker.services.k8s.aws/v1alpha1
2+
kind: PipelineExecution
3+
metadata:
4+
name: my-kubernetes-pipeline-execution
5+
spec:
6+
parallelismConfiguration:
7+
maxParallelExecutionSteps: 2
8+
pipelineExecutionDescription: "My first pipeline execution via Amazon EKS cluster."
9+
pipelineName: my-kubernetes-pipeline
10+

samples/pipeline/pipeline.yaml

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
apiVersion: sagemaker.services.k8s.aws/v1alpha1
2+
kind: Pipeline
3+
metadata:
4+
name: my-kubernetes-pipeline
5+
spec:
6+
parallelismConfiguration:
7+
maxParallelExecutionSteps: 2
8+
pipelineName: my-kubernetes-pipeline
9+
pipelineDefinition: |
10+
{
11+
"Version": "2020-12-01",
12+
"Steps": [
13+
{
14+
"Name": "AbaloneTrain",
15+
"Type": "Training",
16+
"Arguments": {
17+
"RoleArn": "<YOUR_SAGEMAKER_ROLE_ARN>",
18+
"HyperParameters": {
19+
"max_depth": "5",
20+
"gamma": "4",
21+
"eta": "0.2",
22+
"min_child_weight": "6",
23+
"objective": "multi:softmax",
24+
"num_class": "10",
25+
"num_round": "30"
26+
},
27+
"AlgorithmSpecification": {
28+
"TrainingImage": "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1",
29+
"TrainingInputMode": "File"
30+
},
31+
"OutputDataConfig": {
32+
"S3OutputPath": "s3://<YOUR_S3_BUCKET>/sagemaker/"
33+
},
34+
"ResourceConfig": {
35+
"InstanceCount": 1,
36+
"InstanceType": "ml.m4.xlarge",
37+
"VolumeSizeInGB": 5
38+
},
39+
"StoppingCondition": {
40+
"MaxRuntimeInSeconds": 86400
41+
},
42+
"InputDataConfig": [
43+
{
44+
"ChannelName": "train",
45+
"DataSource": {
46+
"S3DataSource": {
47+
"S3DataType": "S3Prefix",
48+
"S3Uri": "s3://<YOUR_S3_BUCKET>/sagemaker/xgboost/train/",
49+
"S3DataDistributionType": "FullyReplicated"
50+
}
51+
},
52+
"ContentType": "text/libsvm"
53+
},
54+
{
55+
"ChannelName": "validation",
56+
"DataSource": {
57+
"S3DataSource": {
58+
"S3DataType": "S3Prefix",
59+
"S3Uri": "s3://<YOUR_S3_BUCKET>/sagemaker/xgboost/validation/",
60+
"S3DataDistributionType": "FullyReplicated"
61+
}
62+
},
63+
"ContentType": "text/libsvm"
64+
}
65+
]
66+
}
67+
}
68+
]
69+
}
70+
pipelineDisplayName: my-kubernetes-pipeline
71+
roleARN: <YOUR_SAGEMAKER_ROLE_ARN>

0 commit comments

Comments
 (0)