From d3a75f0de6fb980eec1b755214363c0b71fd18cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Fri, 18 Jan 2019 15:40:12 +0100 Subject: [PATCH 01/12] Minimal ec2 provider --- backend/ec2.go | 418 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) create mode 100644 backend/ec2.go diff --git a/backend/ec2.go b/backend/ec2.go new file mode 100644 index 000000000..83d9c3121 --- /dev/null +++ b/backend/ec2.go @@ -0,0 +1,418 @@ +package backend + +import ( + "fmt" + "io" + "net" + "net/url" + "strings" + "time" + + gocontext "context" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/pkg/errors" + + "github.com/travis-ci/worker/config" + "github.com/travis-ci/worker/context" + "github.com/travis-ci/worker/image" + "github.com/travis-ci/worker/ssh" +) + +func init() { + Register("ec2", "EC2", map[string]string{ + "IMAGE_MAP": "Map of which image to use for which language", + "AWS_ACCESS_KEY_ID": "AWS Access Key ID", + "AWS_SECRET_ACCESS_KEY": "AWS Secret Access Key", + "REGION": "Which region to run workers in", // Should be autodetected when running on EC2 instances? + "INSTANCE_TYPE": "Instance type to use for builds", // t2 and t3 are burstable + "SUBNET_ID": "Subnet ID to launch instances into", + "EBS_OPTIMIZED": "Whether or not to use EBS-optimized instances (Default: false)", + "IAM_INSTANCE_PROFILE": "This is not a good idea... for security, builds should provice API keys", + "USER_DATA": "Why?", + "CPU_CREDIT_SPECIFICATION": "standard|unlimited (for faster boots)", + "TAGS": "Tags, how to deal with key value?", + //"SECURITY_GROUPS": "Security groups to assign ", + }, newEC2Provider) +} + +var ( + defaultEC2ScriptLocation = "/home/jonhenrik/travis-in-ec2" + defaultEC2SSHDialTimeout = 5 * time.Second + defaultEC2ImageSelectorType = "env" + defaultEC2SSHUserName = "ubuntu" + defaultEC2SSHPrivateKeyPath = "/home/jonhenrik/.ssh/devops-infra-del-sndbx.pem" + defaultEC2ExecCmd = "bash /home/ubuntu/build.sh" + defaultEC2SubnetID = "" + defaultEC2InstanceType = "t2.micro" + defaultEC2Image = "ami-02790d1ebf3b5181d" + defaultEC2SecurityGroupIDs = "default" +) + +/****** POC SECTION *******/ + +type ec2Provider struct { + cfg *config.ProviderConfig + sshDialer ssh.Dialer + sshDialTimeout time.Duration + execCmd []string + imageSelector image.Selector + awsSession *session.Session + instanceType string + defaultImage string + securityGroups []string +} + +func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { + + //sshDialer, err := ssh.NewDialerWithPassword("travis") + sshDialer, err := ssh.NewDialer(defaultEC2SSHPrivateKeyPath, "") + + if err != nil { + return nil, errors.Wrap(err, "couldn't create SSH dialer") + } + + sshDialTimeout := defaultEC2SSHDialTimeout + if cfg.IsSet("SSH_DIAL_TIMEOUT") { + sshDialTimeout, err = time.ParseDuration(cfg.Get("SSH_DIAL_TIMEOUT")) + if err != nil { + return nil, err + } + } + + execCmd := strings.Split(defaultEC2ExecCmd, " ") + if cfg.IsSet("EXEC_CMD") { + execCmd = strings.Split(cfg.Get("EXEC_CMD"), " ") + } + + imageSelectorType := defaultEC2ImageSelectorType + if cfg.IsSet("IMAGE_SELECTOR_TYPE") { + imageSelectorType = cfg.Get("IMAGE_SELECTOR_TYPE") + } + + if imageSelectorType != "env" && imageSelectorType != "api" { + return nil, fmt.Errorf("invalid image selector type %q", imageSelectorType) + } + + imageSelector, err := buildEC2ImageSelector(imageSelectorType, cfg) + if err != nil { + return nil, err + } + + awsSession := &session.Session{} + + if cfg.IsSet("AWS_ACCESS_KEY_ID") && cfg.IsSet("AWS_SECRET_ACCESS_KEY") { + config := aws.NewConfig().WithCredentialsChainVerboseErrors(true) + staticCreds := credentials.NewStaticCredentials(cfg.Get("AWS_ACCESS_KEY_ID"), cfg.Get("AWS_SECRET_ACCESS_KEY"), "") + if _, err = staticCreds.Get(); err != credentials.ErrStaticCredentialsEmpty { + config.WithCredentials(staticCreds) + } + + if err != nil { + return nil, err + } + + config = config.WithRegion("eu-west-1") + config = config.WithMaxRetries(8) + + opts := session.Options{ + SharedConfigState: session.SharedConfigEnable, + Config: *config, + } + awsSession, err = session.NewSessionWithOptions(opts) + + if err != nil { + return nil, err + } + } + + instanceType := defaultEC2InstanceType + + if cfg.IsSet("INSTANCE_TYPE") { + instanceType = cfg.Get("INSTANCE_TYPE") + } + + defaultImage := defaultEC2Image + if cfg.IsSet("DEFAULT_IMAGE") { + defaultImage = cfg.Get("DEFAULT_IMAGE") + } + + securityGroups := strings.Split(defaultEC2SecurityGroupIDs, ",") + if cfg.IsSet("SECURITY_GROUP_IDS") { + securityGroups = strings.Split(cfg.Get("SECURITY_GROUP_IDS"), ",") + } + + return &ec2Provider{ + cfg: cfg, + sshDialTimeout: sshDialTimeout, + sshDialer: sshDialer, + execCmd: execCmd, + imageSelector: imageSelector, + awsSession: awsSession, + instanceType: instanceType, + defaultImage: defaultImage, + securityGroups: securityGroups, + }, nil +} + +func buildEC2ImageSelector(selectorType string, cfg *config.ProviderConfig) (image.Selector, error) { + switch selectorType { + case "env": + return image.NewEnvSelector(cfg) + case "api": + baseURL, err := url.Parse(cfg.Get("IMAGE_SELECTOR_URL")) + if err != nil { + return nil, err + } + return image.NewAPISelector(baseURL), nil + default: + return nil, fmt.Errorf("invalid image selector type %q", selectorType) + } +} + +func (p *ec2Provider) StartWithProgress(ctx gocontext.Context, startAttributes *StartAttributes, progresser Progresser) (Instance, error) { + return nil, nil +} + +func (p *ec2Provider) SupportsProgress() bool { + return false +} + +func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) { + startBooting := time.Now() + logger := context.LoggerFromContext(ctx).WithField("self", "backend/ec2_provider") + hostName := hostnameFromContext(ctx) + + // Create EC2 service client + svc := ec2.New(p.awsSession) + + keyResp, err := svc.CreateKeyPair( + &ec2.CreateKeyPairInput{ + KeyName: aws.String(hostName), + }, + ) + + if err != nil { + logger.WithField("err", err).Error("ooooehhh noesss!!!") + return nil, err + } + + privateKey := *keyResp.KeyMaterial + sshDialer, err := ssh.NewDialerWithKeyWithoutPassPhrase([]byte(privateKey)) + + if err != nil { + return nil, err + } + + imageID, err := p.imageSelector.Select(ctx, &image.Params{ + Language: startAttributes.Language, + Infra: "ec2", + }) + + if imageID == "default" { + imageID = p.defaultImage + } + + if err != nil { + return nil, err + } + + securityGroups := []*string{} + for _, securityGroup := range p.securityGroups { + securityGroups = append(securityGroups, &securityGroup) + } + + runOpts := &ec2.RunInstancesInput{ + ImageId: aws.String(imageID), + InstanceType: aws.String(p.instanceType), + MaxCount: aws.Int64(1), + MinCount: aws.Int64(1), + KeyName: keyResp.KeyName, + SecurityGroupIds: securityGroups, + TagSpecifications: []*ec2.TagSpecification{ + &ec2.TagSpecification{ + ResourceType: aws.String("instance"), + Tags: []*ec2.Tag{ + &ec2.Tag{ + Key: aws.String("Name"), + Value: aws.String(hostName), + }, + }, + }, + }, + } + reservation, err := svc.RunInstances(runOpts) + + if err != nil { + return nil, err + } + + describeInstancesInput := &ec2.DescribeInstancesInput{ + InstanceIds: []*string{ + reservation.Instances[0].InstanceId, + }, + } + + instance := &ec2.Instance{} + for { + instances, err := svc.DescribeInstances(describeInstancesInput) + + if err != nil { + return nil, err + } + instance = instances.Reservations[0].Instances[0] + if instances != nil { + address := *instance.PublicDnsName + if address != "" { + break + } + } + time.Sleep(1 * time.Second) + } + + err = p.waitForSSH(*instance.PublicDnsName, 22, 120) + + if err != nil { + return nil, err + } + return &ec2Instance{ + provider: p, + sshDialer: sshDialer, + endBooting: time.Now(), + startBooting: startBooting, + instance: instance, + }, nil +} + +func (p *ec2Provider) Setup(ctx gocontext.Context) error { + return nil +} + +type ec2Instance struct { + provider *ec2Provider + startBooting time.Time + endBooting time.Time + sshDialer ssh.Dialer + instance *ec2.Instance +} + +func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { + return i.uploadScriptSCP(ctx, script) +} + +func (i *ec2Instance) uploadScriptSCP(ctx gocontext.Context, script []byte) error { + conn, err := i.sshConnection(ctx) + if err != nil { + return err + } + defer conn.Close() + + existed, err := conn.UploadFile("build.sh", script) + if existed { + return ErrStaleVM + } + if err != nil { + return errors.Wrap(err, "couldn't upload build script") + } + + return nil +} + +func (p *ec2Provider) waitForSSH(host string, port, timeout int) error { + // Wait for ssh to becom available + iter := 0 + for { + _, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, 22), 1*time.Second) + if err == nil { + break + } + iter = iter + 1 + if iter > timeout { + return err + } + time.Sleep(1 * time.Second) + } + return nil +} + +func (i *ec2Instance) sshConnection(ctx gocontext.Context) (ssh.Connection, error) { + return i.sshDialer.Dial(fmt.Sprintf("%s:22", *i.instance.PublicDnsName), defaultEC2SSHUserName, i.provider.sshDialTimeout) +} + +func (i *ec2Instance) RunScript(ctx gocontext.Context, output io.Writer) (*RunResult, error) { + return i.runScriptSSH(ctx, output) +} + +func (i *ec2Instance) runScriptSSH(ctx gocontext.Context, output io.Writer) (*RunResult, error) { + conn, err := i.sshConnection(ctx) + if err != nil { + return &RunResult{Completed: false}, errors.Wrap(err, "couldn't connect to SSH server") + } + defer conn.Close() + + exitStatus, err := conn.RunCommand(strings.Join(i.provider.execCmd, " "), output) + + return &RunResult{Completed: err != nil, ExitCode: exitStatus}, errors.Wrap(err, "error running script") +} + +func (i *ec2Instance) Stop(ctx gocontext.Context) error { + logger := context.LoggerFromContext(ctx).WithField("self", "backend/ec2_provider") + //hostName := hostnameFromContext(ctx) + + svc := ec2.New(i.provider.awsSession) + + instanceTerminationInput := &ec2.TerminateInstancesInput{ + InstanceIds: []*string{ + i.instance.InstanceId, + }, + } + + _, err := svc.TerminateInstances(instanceTerminationInput) + + if err != nil { + return err + } + + logger.Info(fmt.Sprintf("Terminated instance %s with hostname %s", *i.instance.InstanceId, *i.instance.PublicDnsName)) + + deleteKeyPairInput := &ec2.DeleteKeyPairInput{ + KeyName: i.instance.KeyName, + } + + _, err = svc.DeleteKeyPair(deleteKeyPairInput) + + if err != nil { + return err + } + + logger.Info(fmt.Sprintf("Deleted keypair %s", *i.instance.KeyName)) + + return nil +} + +func (i *ec2Instance) DownloadTrace(gocontext.Context) ([]byte, error) { + return nil, nil +} + +func (i *ec2Instance) SupportsProgress() bool { + return false +} + +func (i *ec2Instance) Warmed() bool { + return false +} + +func (i *ec2Instance) ID() string { + return *i.instance.PublicDnsName +} + +func (i *ec2Instance) ImageName() string { + return "ec2" +} + +func (i *ec2Instance) StartupDuration() time.Duration { + return i.endBooting.Sub(i.startBooting) +} From f397443e445b98821a383f041a95fb55af739644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Mon, 21 Jan 2019 12:42:22 +0100 Subject: [PATCH 02/12] Adding disksize, ebsoptimized --- backend/ec2.go | 70 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 83d9c3121..999fc431f 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -5,6 +5,7 @@ import ( "io" "net" "net/url" + "strconv" "strings" "time" @@ -22,6 +23,21 @@ import ( "github.com/travis-ci/worker/ssh" ) +var ( + defaultEC2ScriptLocation = "/home/jonhenrik/travis-in-ec2" + defaultEC2SSHDialTimeout = 5 * time.Second + defaultEC2ImageSelectorType = "env" + defaultEC2SSHUserName = "ubuntu" + defaultEC2SSHPrivateKeyPath = "/home/jonhenrik/.ssh/devops-infra-del-sndbx.pem" + defaultEC2ExecCmd = "bash /home/ubuntu/build.sh" + defaultEC2SubnetID = "" + defaultEC2InstanceType = "t2.micro" + defaultEC2Image = "ami-02790d1ebf3b5181d" + defaultEC2SecurityGroupIDs = "default" + defaultEC2EBSOptimized = false + defaultEC2DiskSize = int64(8) +) + func init() { Register("ec2", "EC2", map[string]string{ "IMAGE_MAP": "Map of which image to use for which language", @@ -34,26 +50,12 @@ func init() { "IAM_INSTANCE_PROFILE": "This is not a good idea... for security, builds should provice API keys", "USER_DATA": "Why?", "CPU_CREDIT_SPECIFICATION": "standard|unlimited (for faster boots)", - "TAGS": "Tags, how to deal with key value?", + "TAGS": "Tags, how to deal with key value?", + "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), //"SECURITY_GROUPS": "Security groups to assign ", }, newEC2Provider) } -var ( - defaultEC2ScriptLocation = "/home/jonhenrik/travis-in-ec2" - defaultEC2SSHDialTimeout = 5 * time.Second - defaultEC2ImageSelectorType = "env" - defaultEC2SSHUserName = "ubuntu" - defaultEC2SSHPrivateKeyPath = "/home/jonhenrik/.ssh/devops-infra-del-sndbx.pem" - defaultEC2ExecCmd = "bash /home/ubuntu/build.sh" - defaultEC2SubnetID = "" - defaultEC2InstanceType = "t2.micro" - defaultEC2Image = "ami-02790d1ebf3b5181d" - defaultEC2SecurityGroupIDs = "default" -) - -/****** POC SECTION *******/ - type ec2Provider struct { cfg *config.ProviderConfig sshDialer ssh.Dialer @@ -64,6 +66,8 @@ type ec2Provider struct { instanceType string defaultImage string securityGroups []string + ebsOptimized bool + diskSize int64 } func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { @@ -145,6 +149,19 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { securityGroups = strings.Split(cfg.Get("SECURITY_GROUP_IDS"), ",") } + ebsOptimized := defaultEC2EBSOptimized + if cfg.IsSet("EBS_OPTIMIZED") { + ebsOptimized = asBool(cfg.Get("EBS_OPTIMIZED")) + } + + diskSize := defaultEC2DiskSize + if cfg.IsSet("DISK_SIZE") { + diskSize, err = strconv.ParseInt(cfg.Get("DISK_SIZE"), 10, 64) + if err != nil { + return nil, err + } + } + return &ec2Provider{ cfg: cfg, sshDialTimeout: sshDialTimeout, @@ -155,6 +172,8 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { instanceType: instanceType, defaultImage: defaultImage, securityGroups: securityGroups, + ebsOptimized: ebsOptimized, + diskSize: diskSize, }, nil } @@ -225,6 +244,15 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu securityGroups = append(securityGroups, &securityGroup) } + blockDeviceMappings := []*ec2.BlockDeviceMapping{ + &ec2.BlockDeviceMapping{ + DeviceName: aws.String("/dev/sda1"), + Ebs: &ec2.EbsBlockDevice{ + VolumeSize: aws.Int64(p.diskSize), + }, + }, + } + runOpts := &ec2.RunInstancesInput{ ImageId: aws.String(imageID), InstanceType: aws.String(p.instanceType), @@ -232,6 +260,11 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu MinCount: aws.Int64(1), KeyName: keyResp.KeyName, SecurityGroupIds: securityGroups, + CreditSpecification: &ec2.CreditSpecificationRequest{ + CpuCredits: aws.String("unlimited"), // TODO: + }, + EbsOptimized: aws.Bool(p.ebsOptimized), + BlockDeviceMappings: blockDeviceMappings, TagSpecifications: []*ec2.TagSpecification{ &ec2.TagSpecification{ ResourceType: aws.String("instance"), @@ -273,11 +306,12 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu time.Sleep(1 * time.Second) } - err = p.waitForSSH(*instance.PublicDnsName, 22, 120) + err = p.waitForSSH(ctx, *instance.PublicDnsName, 22, 120) if err != nil { return nil, err } + return &ec2Instance{ provider: p, sshDialer: sshDialer, @@ -321,7 +355,7 @@ func (i *ec2Instance) uploadScriptSCP(ctx gocontext.Context, script []byte) erro return nil } -func (p *ec2Provider) waitForSSH(host string, port, timeout int) error { +func (p *ec2Provider) waitForSSH(ctx gocontext.Context, host string, port, timeout int) error { // Wait for ssh to becom available iter := 0 for { From fa1619b7ba097e7e26222d92e7d48acab2324697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Mon, 21 Jan 2019 15:38:02 +0100 Subject: [PATCH 03/12] Adding ssh wait similar to gce --- backend/ec2.go | 159 +++++++++++++++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 58 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 999fc431f..5a20b0bd4 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -3,7 +3,6 @@ package backend import ( "fmt" "io" - "net" "net/url" "strconv" "strings" @@ -16,6 +15,7 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "github.com/travis-ci/worker/config" "github.com/travis-ci/worker/context" @@ -36,6 +36,8 @@ var ( defaultEC2SecurityGroupIDs = "default" defaultEC2EBSOptimized = false defaultEC2DiskSize = int64(8) + defaultEC2UploadRetries = uint64(120) + defaultEC2UploadRetrySleep = 1 * time.Second ) func init() { @@ -50,41 +52,39 @@ func init() { "IAM_INSTANCE_PROFILE": "This is not a good idea... for security, builds should provice API keys", "USER_DATA": "Why?", "CPU_CREDIT_SPECIFICATION": "standard|unlimited (for faster boots)", - "TAGS": "Tags, how to deal with key value?", - "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), - //"SECURITY_GROUPS": "Security groups to assign ", + "TAGS": "Tags, how to deal with key value?", + "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), + "SSH_DIAL_TIMEOUT": fmt.Sprintf("connection timeout for ssh connections (default %v)", defaultEC2SSHDialTimeout), + "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), + "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), + "SECURITY_GROUPS": "Security groups to assign", }, newEC2Provider) } type ec2Provider struct { - cfg *config.ProviderConfig - sshDialer ssh.Dialer - sshDialTimeout time.Duration - execCmd []string - imageSelector image.Selector - awsSession *session.Session - instanceType string - defaultImage string - securityGroups []string - ebsOptimized bool - diskSize int64 + cfg *config.ProviderConfig + execCmd []string + imageSelector image.Selector + awsSession *session.Session + instanceType string + defaultImage string + securityGroups []string + ebsOptimized bool + diskSize int64 + uploadRetries uint64 + uploadRetrySleep time.Duration + sshDialTimeout time.Duration } func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { - //sshDialer, err := ssh.NewDialerWithPassword("travis") - sshDialer, err := ssh.NewDialer(defaultEC2SSHPrivateKeyPath, "") - - if err != nil { - return nil, errors.Wrap(err, "couldn't create SSH dialer") - } - sshDialTimeout := defaultEC2SSHDialTimeout if cfg.IsSet("SSH_DIAL_TIMEOUT") { - sshDialTimeout, err = time.ParseDuration(cfg.Get("SSH_DIAL_TIMEOUT")) + sd, err := time.ParseDuration(cfg.Get("SSH_DIAL_TIMEOUT")) if err != nil { return nil, err } + sshDialTimeout = sd } execCmd := strings.Split(defaultEC2ExecCmd, " ") @@ -162,18 +162,37 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { } } + uploadRetries := defaultEC2UploadRetries + if cfg.IsSet("UPLOAD_RETRIES") { + ur, err := strconv.ParseUint(cfg.Get("UPLOAD_RETRIES"), 10, 64) + if err != nil { + return nil, err + } + uploadRetries = ur + } + + uploadRetrySleep := defaultEC2UploadRetrySleep + if cfg.IsSet("UPLOAD_RETRY_SLEEP") { + si, err := time.ParseDuration(cfg.Get("UPLOAD_RETRY_SLEEP")) + if err != nil { + return nil, err + } + uploadRetrySleep = si + } + return &ec2Provider{ - cfg: cfg, - sshDialTimeout: sshDialTimeout, - sshDialer: sshDialer, - execCmd: execCmd, - imageSelector: imageSelector, - awsSession: awsSession, - instanceType: instanceType, - defaultImage: defaultImage, - securityGroups: securityGroups, - ebsOptimized: ebsOptimized, - diskSize: diskSize, + cfg: cfg, + sshDialTimeout: sshDialTimeout, + execCmd: execCmd, + imageSelector: imageSelector, + awsSession: awsSession, + instanceType: instanceType, + defaultImage: defaultImage, + securityGroups: securityGroups, + ebsOptimized: ebsOptimized, + diskSize: diskSize, + uploadRetries: uploadRetries, + uploadRetrySleep: uploadRetrySleep, }, nil } @@ -306,12 +325,6 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu time.Sleep(1 * time.Second) } - err = p.waitForSSH(ctx, *instance.PublicDnsName, 22, 120) - - if err != nil { - return nil, err - } - return &ec2Instance{ provider: p, sshDialer: sshDialer, @@ -334,6 +347,54 @@ type ec2Instance struct { } func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { + defer context.TimeSince(ctx, "boot_poll_ssh", time.Now()) + uploadedChan := make(chan error) + var lastErr error + + logger := context.LoggerFromContext(ctx).WithField("self", "backend/ec2_instance") + + // Wait for ssh to becom available + go func() { + var errCount uint64 + for { + if ctx.Err() != nil { + return + } + + err := i.uploadScriptAttempt(ctx, script) + if err != nil { + logger.WithError(err).Debug("upload script attempt errored") + } else { + uploadedChan <- nil + return + } + + lastErr = err + + errCount++ + if errCount > i.provider.uploadRetries { + uploadedChan <- err + return + } + time.Sleep(i.provider.uploadRetrySleep) + } + }() + + select { + case err := <-uploadedChan: + return err + case <-ctx.Done(): + context.LoggerFromContext(ctx).WithFields(logrus.Fields{ + "err": lastErr, + "self": "backend/gce_instance", + }).Info("stopping upload retries, error from last attempt") + return ctx.Err() + } + + //return i.uploadScriptAttempt(ctx, script) +} + +func (i *ec2Instance) uploadScriptAttempt(ctx gocontext.Context, script []byte) error { return i.uploadScriptSCP(ctx, script) } @@ -351,24 +412,6 @@ func (i *ec2Instance) uploadScriptSCP(ctx gocontext.Context, script []byte) erro if err != nil { return errors.Wrap(err, "couldn't upload build script") } - - return nil -} - -func (p *ec2Provider) waitForSSH(ctx gocontext.Context, host string, port, timeout int) error { - // Wait for ssh to becom available - iter := 0 - for { - _, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, 22), 1*time.Second) - if err == nil { - break - } - iter = iter + 1 - if iter > timeout { - return err - } - time.Sleep(1 * time.Second) - } return nil } From 54e0b7ef2a9b5f0b96e776579c846878baf486db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Wed, 23 Jan 2019 09:43:55 +0100 Subject: [PATCH 04/12] Adding network wait for ssh --- backend/ec2.go | 90 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 5a20b0bd4..f6ce83e79 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -3,6 +3,7 @@ package backend import ( "fmt" "io" + "net" "net/url" "strconv" "strings" @@ -58,6 +59,8 @@ func init() { "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), "SECURITY_GROUPS": "Security groups to assign", + "PUBLIC_IP": "boot job instances with a public ip, disable this for NAT (default true)", + "PUBLIC_IP_CONNECT": "connect to the public ip of the instance instead of the internal, only takes effect if PUBLIC_IP is true (default true)", }, newEC2Provider) } @@ -74,6 +77,9 @@ type ec2Provider struct { uploadRetries uint64 uploadRetrySleep time.Duration sshDialTimeout time.Duration + publicIP bool + publicIPConnect bool + subnetID string } func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { @@ -139,6 +145,11 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { instanceType = cfg.Get("INSTANCE_TYPE") } + subnetID := "" + if cfg.IsSet("SUBNET_ID") { + subnetID = cfg.Get("SUBNET_ID") + } + defaultImage := defaultEC2Image if cfg.IsSet("DEFAULT_IMAGE") { defaultImage = cfg.Get("DEFAULT_IMAGE") @@ -180,6 +191,16 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { uploadRetrySleep = si } + publicIP := true + if cfg.IsSet("PUBLIC_IP") { + publicIP = asBool(cfg.Get("PUBLIC_IP")) + } + + publicIPConnect := true + if cfg.IsSet("PUBLIC_IP_CONNECT") { + publicIPConnect = asBool(cfg.Get("PUBLIC_IP_CONNECT")) + } + return &ec2Provider{ cfg: cfg, sshDialTimeout: sshDialTimeout, @@ -193,6 +214,9 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { diskSize: diskSize, uploadRetries: uploadRetries, uploadRetrySleep: uploadRetrySleep, + publicIP: publicIP, + publicIPConnect: publicIPConnect, + subnetID: subnetID, }, nil } @@ -273,12 +297,12 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu } runOpts := &ec2.RunInstancesInput{ - ImageId: aws.String(imageID), - InstanceType: aws.String(p.instanceType), - MaxCount: aws.Int64(1), - MinCount: aws.Int64(1), - KeyName: keyResp.KeyName, - SecurityGroupIds: securityGroups, + ImageId: aws.String(imageID), + InstanceType: aws.String(p.instanceType), + MaxCount: aws.Int64(1), + MinCount: aws.Int64(1), + KeyName: keyResp.KeyName, + CreditSpecification: &ec2.CreditSpecificationRequest{ CpuCredits: aws.String("unlimited"), // TODO: }, @@ -296,6 +320,22 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu }, }, } + + if p.subnetID != "" && p.publicIP { + runOpts.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + AssociatePublicIpAddress: &p.publicIP, + SubnetId: aws.String(p.subnetID), + Groups: securityGroups, + DeleteOnTermination: aws.Bool(true), + }, + } + } else { + runOpts.SubnetId = aws.String(p.subnetID) + runOpts.SecurityGroupIds = securityGroups + } + reservation, err := svc.RunInstances(runOpts) if err != nil { @@ -317,7 +357,10 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu } instance = instances.Reservations[0].Instances[0] if instances != nil { - address := *instance.PublicDnsName + address := *instance.PrivateDnsName + if p.publicIPConnect { + address = *instance.PublicDnsName + } if address != "" { break } @@ -394,6 +437,28 @@ func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { //return i.uploadScriptAttempt(ctx, script) } +func (i *ec2Instance) waitForSSH(port, timeout int) error { + + host := *i.instance.PrivateIpAddress + if i.provider.publicIPConnect { + host = *i.instance.PublicIpAddress + } + + iter := 0 + for { + _, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), 1*time.Second) + if err == nil { + break + } + iter = iter + 1 + if iter > timeout { + return err + } + time.Sleep(500 * time.Millisecond) + } + return nil +} + func (i *ec2Instance) uploadScriptAttempt(ctx gocontext.Context, script []byte) error { return i.uploadScriptSCP(ctx, script) } @@ -416,7 +481,11 @@ func (i *ec2Instance) uploadScriptSCP(ctx gocontext.Context, script []byte) erro } func (i *ec2Instance) sshConnection(ctx gocontext.Context) (ssh.Connection, error) { - return i.sshDialer.Dial(fmt.Sprintf("%s:22", *i.instance.PublicDnsName), defaultEC2SSHUserName, i.provider.sshDialTimeout) + ip := *i.instance.PrivateIpAddress + if i.provider.publicIPConnect { + ip = *i.instance.PublicIpAddress + } + return i.sshDialer.Dial(fmt.Sprintf("%s:22", ip), defaultEC2SSHUserName, i.provider.sshDialTimeout) } func (i *ec2Instance) RunScript(ctx gocontext.Context, output io.Writer) (*RunResult, error) { @@ -483,7 +552,10 @@ func (i *ec2Instance) Warmed() bool { } func (i *ec2Instance) ID() string { - return *i.instance.PublicDnsName + if i.provider.publicIP { + return *i.instance.PublicDnsName + } + return *i.instance.PrivateDnsName } func (i *ec2Instance) ImageName() string { From f500385b62a6c32009b7bb36128a2e6c978d01e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Fri, 25 Jan 2019 15:08:12 +0100 Subject: [PATCH 05/12] stuff --- backend/ec2.go | 139 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 28 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index f6ce83e79..4f99ed16d 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -1,12 +1,18 @@ package backend import ( + "bytes" + "crypto/x509" + "encoding/base64" + "encoding/pem" "fmt" "io" "net" "net/url" + "regexp" "strconv" "strings" + "text/template" "time" gocontext "context" @@ -25,22 +31,51 @@ import ( ) var ( - defaultEC2ScriptLocation = "/home/jonhenrik/travis-in-ec2" defaultEC2SSHDialTimeout = 5 * time.Second defaultEC2ImageSelectorType = "env" - defaultEC2SSHUserName = "ubuntu" - defaultEC2SSHPrivateKeyPath = "/home/jonhenrik/.ssh/devops-infra-del-sndbx.pem" - defaultEC2ExecCmd = "bash /home/ubuntu/build.sh" - defaultEC2SubnetID = "" - defaultEC2InstanceType = "t2.micro" - defaultEC2Image = "ami-02790d1ebf3b5181d" - defaultEC2SecurityGroupIDs = "default" - defaultEC2EBSOptimized = false - defaultEC2DiskSize = int64(8) - defaultEC2UploadRetries = uint64(120) - defaultEC2UploadRetrySleep = 1 * time.Second + defaultEC2SSHUserName = "travis" + //defaultEC2ExecCmd = "bash ~/build.sh" + defaultEC2ExecCmd = "bash -c 'lsblk -l && sudo cat /root/mounter.log'" + defaultEC2SubnetID = "" + defaultEC2InstanceType = "t2.micro" + defaultEC2Image = "ami-02790d1ebf3b5181d" + defaultEC2SecurityGroupIDs = "default" + defaultEC2EBSOptimized = false + defaultEC2DiskSize = int64(8) + defaultEC2UploadRetries = uint64(120) + defaultEC2UploadRetrySleep = 1 * time.Second ) +var ( + /* + #if [[ -b /dev/nvme1n1 ]]; then + # Are we able to run on instance/store disks + #fi + + gceStartupScript = template.Must(template.New("gce-startup").Parse(`#!/usr/bin/env bash + {{ if .AutoImplode }}echo poweroff | at now + {{ .HardTimeoutMinutes }} minutes{{ end }} + cat > ~travis/.ssh/authorized_keys < ~travis/.ssh/authorized_keys < Date: Wed, 3 Apr 2019 15:56:30 +0200 Subject: [PATCH 06/12] Adding tagging + go routine instance readyness --- backend/ec2.go | 131 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 41 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 4f99ed16d..16075d117 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -34,8 +34,8 @@ var ( defaultEC2SSHDialTimeout = 5 * time.Second defaultEC2ImageSelectorType = "env" defaultEC2SSHUserName = "travis" - //defaultEC2ExecCmd = "bash ~/build.sh" - defaultEC2ExecCmd = "bash -c 'lsblk -l && sudo cat /root/mounter.log'" + defaultEC2ExecCmd = "bash ~/build.sh" + //wdefaultEC2ExecCmd = "bash -c 'lsblk -l && sudo cat /root/mounter.log'" defaultEC2SubnetID = "" defaultEC2InstanceType = "t2.micro" defaultEC2Image = "ami-02790d1ebf3b5181d" @@ -194,13 +194,11 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { userData := "" if cfg.IsSet("USER_DATA") { - userDataBytes, err := base64.RawURLEncoding.DecodeString(cfg.Get("USER_DATA")) + var userDataBytes []byte + userDataBytes, err = base64.RawURLEncoding.DecodeString(cfg.Get("USER_DATA")) if err != nil { - fmt.Println(cfg.Get("USER_DATA")) - fmt.Println(string(userDataBytes)) return nil, err } - userData = string(userDataBytes) } @@ -374,24 +372,53 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu }, } + tags := []*ec2.Tag{ + &ec2.Tag{ + Key: aws.String("Name"), + Value: aws.String(hostName), + }, + &ec2.Tag{ + Key: aws.String("travis.dist"), + Value: aws.String(startAttributes.Dist), + }, + } + + /* + for key, value := range p.containerLabels { + labels[key] = value + } + */ + + r, ok := context.RepositoryFromContext(ctx) + if ok { + tags = append(tags, &ec2.Tag{ + Key: aws.String("travis.repo"), + Value: aws.String(r), + }) + } + + jid, ok := context.JobIDFromContext(ctx) + if ok { + tags = append(tags, &ec2.Tag{ + Key: aws.String("travis.job_id"), + Value: aws.String(strconv.FormatUint(jid, 10)), + }) + } + runOpts := &ec2.RunInstancesInput{ - ImageId: aws.String(imageID), - InstanceType: aws.String(p.instanceType), - MaxCount: aws.Int64(1), - MinCount: aws.Int64(1), - KeyName: keyResp.KeyName, + ImageId: aws.String(imageID), + InstanceType: aws.String(p.instanceType), + MaxCount: aws.Int64(1), + MinCount: aws.Int64(1), + KeyName: keyResp.KeyName, + //KeyName: aws.String("devops"), EbsOptimized: aws.Bool(p.ebsOptimized), UserData: aws.String(userDataEncoded), BlockDeviceMappings: blockDeviceMappings, TagSpecifications: []*ec2.TagSpecification{ &ec2.TagSpecification{ ResourceType: aws.String("instance"), - Tags: []*ec2.Tag{ - &ec2.Tag{ - Key: aws.String("Name"), - Value: aws.String(hostName), - }, - }, + Tags: tags, }, }, } @@ -431,33 +458,55 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu }, } - instance := &ec2.Instance{} - for { - instances, err := svc.DescribeInstances(describeInstancesInput) - - if err != nil { - return nil, err - } - instance = instances.Reservations[0].Instances[0] - if instances != nil { - address := *instance.PrivateDnsName - if p.publicIPConnect { - address = *instance.PublicDnsName + instanceChan := make(chan *ec2.Instance) + var lastErr error + go func() { + for { + var errCount uint64 + var instances *ec2.DescribeInstancesOutput + if ctx.Err() != nil { + return } - if address != "" { - break + instances, lastErr = svc.DescribeInstances(describeInstancesInput) + if instances != nil { + instance := instances.Reservations[0].Instances[0] + address := *instance.PrivateDnsName + if p.publicIPConnect { + address = *instance.PublicDnsName + } + if address != "" { + instanceChan <- instance + return + } } + errCount++ + if errCount > p.uploadRetries { + instanceChan <- nil + return + } + time.Sleep(500 * time.Millisecond) } - time.Sleep(1 * time.Second) - } + }() - return &ec2Instance{ - provider: p, - sshDialer: sshDialer, - endBooting: time.Now(), - startBooting: startBooting, - instance: instance, - }, nil + select { + case instance := <-instanceChan: + if instance != nil { + return &ec2Instance{ + provider: p, + sshDialer: sshDialer, + endBooting: time.Now(), + startBooting: startBooting, + instance: instance, + }, nil + } + return nil, lastErr + case <-ctx.Done(): + context.LoggerFromContext(ctx).WithFields(logrus.Fields{ + "err": lastErr, + "self": "backend/ec2_instance", + }).Info("Stopping probing for up instance") + return nil, ctx.Err() + } } func (p *ec2Provider) Setup(ctx gocontext.Context) error { @@ -512,7 +561,7 @@ func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { case <-ctx.Done(): context.LoggerFromContext(ctx).WithFields(logrus.Fields{ "err": lastErr, - "self": "backend/gce_instance", + "self": "backend/ec2_instance", }).Info("stopping upload retries, error from last attempt") return ctx.Err() } From 69cc3c2c3381b4229a33423e58286a1855d00f59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Thu, 4 Apr 2019 15:19:00 +0200 Subject: [PATCH 07/12] Tcp probe of ssh, custom taggin etc. --- backend/ec2.go | 143 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 46 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 16075d117..f67029cce 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -88,15 +88,21 @@ func init() { "IAM_INSTANCE_PROFILE": "This is not a good idea... for security, builds should provice API keys", "USER_DATA": "User data, needs to be URL safe base64 encoded format (RFC 4648)", "CPU_CREDIT_SPECIFICATION": "standard|unlimited (for faster boots)", - "TAGS": "Tags, how to deal with key value?", - "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), - "SSH_DIAL_TIMEOUT": fmt.Sprintf("connection timeout for ssh connections (default %v)", defaultEC2SSHDialTimeout), - "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), - "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), - "SECURITY_GROUPS": "Security groups to assign", - "PUBLIC_IP": "boot job instances with a public ip, disable this for NAT (default true)", - "PUBLIC_IP_CONNECT": "connect to the public ip of the instance instead of the internal, only takes effect if PUBLIC_IP is true (default true)", - "USE_INSTANCE_STORE": "Use instance store for builds if available", + "TAGS": "Tags, how to deal with key value?", + "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), + "SSH_DIAL_TIMEOUT": fmt.Sprintf("connection timeout for ssh connections (default %v)", defaultEC2SSHDialTimeout), + "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), + "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), + "SECURITY_GROUPS": "Security groups to assign", + "PUBLIC_IP": "boot job instances with a public ip, disable this for NAT (default true)", + "PUBLIC_IP_CONNECT": "connect to the public ip of the instance instead of the internal, only takes effect if PUBLIC_IP is true (default true)", + "KEY_NAME": "Key name to use for the admin user, this is in case you need login access to instances. The travis user has a auto generated key.", + "IMAGE_ALIASES": "comma-delimited strings used as stable names for images, used only when image selector type is \"env\"", + "IMAGE_DEFAULT": "default image name to use when none found", + "IMAGE_SELECTOR_TYPE": fmt.Sprintf("image selector type (\"env\" or \"api\", default %q)", defaultEC2ImageSelectorType), + "IMAGE_SELECTOR_URL": "URL for image selector API, used only when image selector is \"api\"", + "IMAGE_[ALIAS_]{ALIAS}": "full name for a given alias given via IMAGE_ALIASES, where the alias form in the key is uppercased and normalized by replacing non-alphanumerics with _", + "CUSTOM_TAGS": "Custom tags to set for the EC2 instance. Comma separated list with format key1=value1,key2=value2.....keyN=valueN", }, newEC2Provider) } @@ -116,7 +122,9 @@ type ec2Provider struct { publicIP bool publicIPConnect bool subnetID string + keyName string userData string + customTags map[string]string } func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { @@ -129,6 +137,14 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { } sshDialTimeout = sd } + customTags := make(map[string]string, 0) + if cfg.IsSet("CUSTOM_TAGS") { + items := strings.Split(cfg.Get("CUSTOM_TAGS"), ",") + for _, tag := range items { + item := strings.Split(tag, "=") + customTags[item[0]] = item[1] + } + } execCmd := strings.Split(defaultEC2ExecCmd, " ") if cfg.IsSet("EXEC_CMD") { @@ -149,31 +165,35 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { return nil, err } - awsSession := &session.Session{} + awsConfig := &aws.Config{ + Region: aws.String("eu-west-1"), + MaxRetries: aws.Int(8), + } if cfg.IsSet("AWS_ACCESS_KEY_ID") && cfg.IsSet("AWS_SECRET_ACCESS_KEY") { - config := aws.NewConfig().WithCredentialsChainVerboseErrors(true) - staticCreds := credentials.NewStaticCredentials(cfg.Get("AWS_ACCESS_KEY_ID"), cfg.Get("AWS_SECRET_ACCESS_KEY"), "") + staticCreds := credentials.NewStaticCredentials( + cfg.Get("AWS_ACCESS_KEY_ID"), + cfg.Get("AWS_SECRET_ACCESS_KEY"), + "", + ) if _, err = staticCreds.Get(); err != credentials.ErrStaticCredentialsEmpty { - config.WithCredentials(staticCreds) + awsConfig.WithCredentials(staticCreds) } if err != nil { return nil, err } + } - config = config.WithRegion("eu-west-1") - config = config.WithMaxRetries(8) + opts := session.Options{ + SharedConfigState: session.SharedConfigEnable, + Config: *awsConfig, + } - opts := session.Options{ - SharedConfigState: session.SharedConfigEnable, - Config: *config, - } - awsSession, err = session.NewSessionWithOptions(opts) + awsSession, err := session.NewSessionWithOptions(opts) - if err != nil { - return nil, err - } + if err != nil { + return nil, err } instanceType := defaultEC2InstanceType @@ -187,9 +207,9 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { subnetID = cfg.Get("SUBNET_ID") } - defaultImage := defaultEC2Image - if cfg.IsSet("DEFAULT_IMAGE") { - defaultImage = cfg.Get("DEFAULT_IMAGE") + defaultImage := "" + if cfg.IsSet("IMAGE_DEFAULT") { + defaultImage = cfg.Get("IMAGE_DEFAULT") } userData := "" @@ -248,6 +268,11 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { publicIPConnect = asBool(cfg.Get("PUBLIC_IP_CONNECT")) } + keyName := "" + if cfg.IsSet("KEY_NAME") { + keyName = cfg.Get("KEY_NAME") + } + return &ec2Provider{ cfg: cfg, sshDialTimeout: sshDialTimeout, @@ -265,6 +290,8 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { publicIPConnect: publicIPConnect, subnetID: subnetID, userData: userData, + keyName: keyName, + customTags: customTags, }, nil } @@ -350,14 +377,14 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu Infra: "ec2", }) - if imageID == "default" { - imageID = p.defaultImage - } - if err != nil { return nil, err } + if imageID == "default" { + imageID = p.defaultImage + } + securityGroups := []*string{} for _, securityGroup := range p.securityGroups { securityGroups = append(securityGroups, &securityGroup) @@ -383,11 +410,12 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu }, } - /* - for key, value := range p.containerLabels { - labels[key] = value - } - */ + for key, value := range p.customTags { + tags = append(tags, &ec2.Tag{ + Key: aws.String(key), + Value: aws.String(value), + }) + } r, ok := context.RepositoryFromContext(ctx) if ok { @@ -395,6 +423,17 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu Key: aws.String("travis.repo"), Value: aws.String(r), }) + + repo := strings.Split(r, "/") + tags = append(tags, + &ec2.Tag{ + Key: aws.String("travis.github_repo"), + Value: aws.String(repo[1]), + }, + &ec2.Tag{ + Key: aws.String("travis.github_org"), + Value: aws.String(repo[0]), + }) } jid, ok := context.JobIDFromContext(ctx) @@ -405,13 +444,18 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu }) } + keyName := keyResp.KeyName + if p.keyName != "" { + keyName = aws.String(p.keyName) + } + //RequestSpotInstances + runOpts := &ec2.RunInstancesInput{ - ImageId: aws.String(imageID), - InstanceType: aws.String(p.instanceType), - MaxCount: aws.Int64(1), - MinCount: aws.Int64(1), - KeyName: keyResp.KeyName, - //KeyName: aws.String("devops"), + ImageId: aws.String(imageID), + InstanceType: aws.String(p.instanceType), + MaxCount: aws.Int64(1), + MinCount: aws.Int64(1), + KeyName: keyName, EbsOptimized: aws.Bool(p.ebsOptimized), UserData: aws.String(userDataEncoded), BlockDeviceMappings: blockDeviceMappings, @@ -464,9 +508,11 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu for { var errCount uint64 var instances *ec2.DescribeInstancesOutput + if ctx.Err() != nil { return } + instances, lastErr = svc.DescribeInstances(describeInstancesInput) if instances != nil { instance := instances.Reservations[0].Instances[0] @@ -475,8 +521,11 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu address = *instance.PublicDnsName } if address != "" { - instanceChan <- instance - return + _, lastErr = net.DialTimeout("tcp", fmt.Sprintf("%s:%d", address, 22), 1*time.Second) + if lastErr == nil { + instanceChan <- instance + return + } } } errCount++ @@ -497,6 +546,7 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu endBooting: time.Now(), startBooting: startBooting, instance: instance, + tmpKeyName: keyResp.KeyName, }, nil } return nil, lastErr @@ -519,6 +569,7 @@ type ec2Instance struct { endBooting time.Time sshDialer ssh.Dialer instance *ec2.Instance + tmpKeyName *string } func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { @@ -654,10 +705,10 @@ func (i *ec2Instance) Stop(ctx gocontext.Context) error { return err } - logger.Info(fmt.Sprintf("Terminated instance %s with hostname %s", *i.instance.InstanceId, *i.instance.PublicDnsName)) + logger.Info(fmt.Sprintf("Terminated instance %s with hostname %s", *i.instance.InstanceId, *i.instance.PrivateDnsName)) deleteKeyPairInput := &ec2.DeleteKeyPairInput{ - KeyName: i.instance.KeyName, + KeyName: i.tmpKeyName, } _, err = svc.DeleteKeyPair(deleteKeyPairInput) @@ -666,7 +717,7 @@ func (i *ec2Instance) Stop(ctx gocontext.Context) error { return err } - logger.Info(fmt.Sprintf("Deleted keypair %s", *i.instance.KeyName)) + logger.Info(fmt.Sprintf("Deleted keypair %s", *i.tmpKeyName)) return nil } From ba3911da04dcd53d3d0972159eb9fc68a4a0ddbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Fri, 5 Apr 2019 13:33:22 +0200 Subject: [PATCH 08/12] Region --- backend/ec2.go | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index f67029cce..2e2c2ce36 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -35,31 +35,18 @@ var ( defaultEC2ImageSelectorType = "env" defaultEC2SSHUserName = "travis" defaultEC2ExecCmd = "bash ~/build.sh" - //wdefaultEC2ExecCmd = "bash -c 'lsblk -l && sudo cat /root/mounter.log'" - defaultEC2SubnetID = "" - defaultEC2InstanceType = "t2.micro" - defaultEC2Image = "ami-02790d1ebf3b5181d" - defaultEC2SecurityGroupIDs = "default" - defaultEC2EBSOptimized = false - defaultEC2DiskSize = int64(8) - defaultEC2UploadRetries = uint64(120) - defaultEC2UploadRetrySleep = 1 * time.Second + defaultEC2SubnetID = "" + defaultEC2InstanceType = "t2.micro" + defaultEC2Image = "ami-02790d1ebf3b5181d" + defaultEC2SecurityGroupIDs = "default" + defaultEC2EBSOptimized = false + defaultEC2DiskSize = int64(8) + defaultEC2UploadRetries = uint64(120) + defaultEC2UploadRetrySleep = 1 * time.Second + defaultEC2Region = "eu-west-1" ) var ( - /* - #if [[ -b /dev/nvme1n1 ]]; then - # Are we able to run on instance/store disks - #fi - - gceStartupScript = template.Must(template.New("gce-startup").Parse(`#!/usr/bin/env bash - {{ if .AutoImplode }}echo poweroff | at now + {{ .HardTimeoutMinutes }} minutes{{ end }} - cat > ~travis/.ssh/authorized_keys < ~travis/.ssh/authorized_keys < Date: Mon, 8 Apr 2019 12:55:53 +0200 Subject: [PATCH 09/12] Change to ip address --- backend/ec2.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 2e2c2ce36..5d14c6607 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -508,9 +508,9 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu instances, lastErr = svc.DescribeInstances(describeInstancesInput) if instances != nil { instance := instances.Reservations[0].Instances[0] - address := *instance.PrivateDnsName + address := *instance.PrivateIpAddress if p.publicIPConnect { - address = *instance.PublicDnsName + address = *instance.PublicIpAddress } if address != "" { _, lastErr = net.DialTimeout("tcp", fmt.Sprintf("%s:%d", address, 22), 1*time.Second) From d6612bfd12c666911296de526f9c164858762b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=20Henrik=20Bj=C3=B8rnstad?= Date: Mon, 12 Aug 2019 14:22:29 +0200 Subject: [PATCH 10/12] change keyname to avoid name collisions --- backend/ec2.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 5d14c6607..49303a438 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -75,21 +75,21 @@ func init() { "IAM_INSTANCE_PROFILE": "This is not a good idea... for security, builds should provice API keys", "USER_DATA": "User data, needs to be URL safe base64 encoded format (RFC 4648)", "CPU_CREDIT_SPECIFICATION": "standard|unlimited (for faster boots)", - "TAGS": "Tags, how to deal with key value?", - "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), - "SSH_DIAL_TIMEOUT": fmt.Sprintf("connection timeout for ssh connections (default %v)", defaultEC2SSHDialTimeout), - "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), - "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), - "SECURITY_GROUPS": "Security groups to assign", - "PUBLIC_IP": "boot job instances with a public ip, disable this for NAT (default true)", - "PUBLIC_IP_CONNECT": "connect to the public ip of the instance instead of the internal, only takes effect if PUBLIC_IP is true (default true)", - "KEY_NAME": "Key name to use for the admin user, this is in case you need login access to instances. The travis user has a auto generated key.", - "IMAGE_ALIASES": "comma-delimited strings used as stable names for images, used only when image selector type is \"env\"", - "IMAGE_DEFAULT": "default image name to use when none found", - "IMAGE_SELECTOR_TYPE": fmt.Sprintf("image selector type (\"env\" or \"api\", default %q)", defaultEC2ImageSelectorType), - "IMAGE_SELECTOR_URL": "URL for image selector API, used only when image selector is \"api\"", - "IMAGE_[ALIAS_]{ALIAS}": "full name for a given alias given via IMAGE_ALIASES, where the alias form in the key is uppercased and normalized by replacing non-alphanumerics with _", - "CUSTOM_TAGS": "Custom tags to set for the EC2 instance. Comma separated list with format key1=value1,key2=value2.....keyN=valueN", + "TAGS": "Tags, how to deal with key value?", + "DISK_SIZE": fmt.Sprintf("Disk size in GB (default %d)", defaultEC2DiskSize), + "SSH_DIAL_TIMEOUT": fmt.Sprintf("connection timeout for ssh connections (default %v)", defaultEC2SSHDialTimeout), + "UPLOAD_RETRIES": fmt.Sprintf("number of times to attempt to upload script before erroring (default %d)", defaultEC2UploadRetries), + "UPLOAD_RETRY_SLEEP": fmt.Sprintf("sleep interval between script upload attempts (default %v)", defaultEC2UploadRetrySleep), + "SECURITY_GROUPS": "Security groups to assign", + "PUBLIC_IP": "boot job instances with a public ip, disable this for NAT (default true)", + "PUBLIC_IP_CONNECT": "connect to the public ip of the instance instead of the internal, only takes effect if PUBLIC_IP is true (default true)", + "KEY_NAME": "Key name to use for the admin user, this is in case you need login access to instances. The travis user has a auto generated key.", + "IMAGE_ALIASES": "comma-delimited strings used as stable names for images, used only when image selector type is \"env\"", + "IMAGE_DEFAULT": "default image name to use when none found", + "IMAGE_SELECTOR_TYPE": fmt.Sprintf("image selector type (\"env\" or \"api\", default %q)", defaultEC2ImageSelectorType), + "IMAGE_SELECTOR_URL": "URL for image selector API, used only when image selector is \"api\"", + "IMAGE_[ALIAS_]{ALIAS}": "full name for a given alias given via IMAGE_ALIASES, where the alias form in the key is uppercased and normalized by replacing non-alphanumerics with _", + "CUSTOM_TAGS": "Custom tags to set for the EC2 instance. Comma separated list with format key1=value1,key2=value2.....keyN=valueN", }, newEC2Provider) } @@ -319,7 +319,7 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu svc := ec2.New(p.awsSession) keyPairInput := &ec2.CreateKeyPairInput{ - KeyName: aws.String(hostName), + KeyName: aws.String(fmt.Sprintf("%s-%d", hostName, time.Now().Unix())), } keyResp, err := svc.CreateKeyPair(keyPairInput) From 26d6b53ccf555f657aef58954e82cf9f1714168d Mon Sep 17 00:00:00 2001 From: Maciej Kempin Date: Thu, 9 Jul 2020 16:37:31 +0100 Subject: [PATCH 11/12] Sanity check after failed build --- backend/ec2.go | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 49303a438..9ca4dd6c6 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -35,9 +35,7 @@ var ( defaultEC2ImageSelectorType = "env" defaultEC2SSHUserName = "travis" defaultEC2ExecCmd = "bash ~/build.sh" - defaultEC2SubnetID = "" defaultEC2InstanceType = "t2.micro" - defaultEC2Image = "ami-02790d1ebf3b5181d" defaultEC2SecurityGroupIDs = "default" defaultEC2EBSOptimized = false defaultEC2DiskSize = int64(8) @@ -124,7 +122,7 @@ func newEC2Provider(cfg *config.ProviderConfig) (Provider, error) { } sshDialTimeout = sd } - customTags := make(map[string]string, 0) + customTags := make(map[string]string) if cfg.IsSet("CUSTOM_TAGS") { items := strings.Split(cfg.Get("CUSTOM_TAGS"), ",") for _, tag := range items { @@ -612,28 +610,6 @@ func (i *ec2Instance) UploadScript(ctx gocontext.Context, script []byte) error { //return i.uploadScriptAttempt(ctx, script) } -func (i *ec2Instance) waitForSSH(port, timeout int) error { - - host := *i.instance.PrivateIpAddress - if i.provider.publicIPConnect { - host = *i.instance.PublicIpAddress - } - - iter := 0 - for { - _, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), 1*time.Second) - if err == nil { - break - } - iter = iter + 1 - if iter > timeout { - return err - } - time.Sleep(500 * time.Millisecond) - } - return nil -} - func (i *ec2Instance) uploadScriptAttempt(ctx gocontext.Context, script []byte) error { return i.uploadScriptSCP(ctx, script) } From e877b495752faab8af5b301cf9cd00d0b77b69c0 Mon Sep 17 00:00:00 2001 From: Maciej Kempin Date: Wed, 15 Jul 2020 11:30:56 +0100 Subject: [PATCH 12/12] Fixing null pointer --- backend/ec2.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/ec2.go b/backend/ec2.go index 9ca4dd6c6..b2b6a7efc 100644 --- a/backend/ec2.go +++ b/backend/ec2.go @@ -507,7 +507,7 @@ func (p *ec2Provider) Start(ctx gocontext.Context, startAttributes *StartAttribu if instances != nil { instance := instances.Reservations[0].Instances[0] address := *instance.PrivateIpAddress - if p.publicIPConnect { + if instance.PublicIpAddress != nil && p.publicIPConnect { address = *instance.PublicIpAddress } if address != "" { @@ -633,7 +633,7 @@ func (i *ec2Instance) uploadScriptSCP(ctx gocontext.Context, script []byte) erro func (i *ec2Instance) sshConnection(ctx gocontext.Context) (ssh.Connection, error) { ip := *i.instance.PrivateIpAddress - if i.provider.publicIPConnect { + if i.instance.PublicIpAddress != nil && i.provider.publicIPConnect { ip = *i.instance.PublicIpAddress } return i.sshDialer.Dial(fmt.Sprintf("%s:22", ip), defaultEC2SSHUserName, i.provider.sshDialTimeout)