Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show errors when BG Health Checks Fail #3828

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion internal/build/imgsrc/depot.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func depotBuild(ctx context.Context, streams *iostreams.IOStreams, opts ImageOpt
}

func initBuilder(ctx context.Context, buildState *build, appName string, streams *iostreams.IOStreams) (*depotmachine.Machine, *depotbuild.Build, error) {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
apiClient := flyutil.ClientFromContext(ctx)
region := os.Getenv("FLY_REMOTE_BUILDER_REGION")
if region != "" {
Expand All @@ -168,23 +170,30 @@ func initBuilder(ctx context.Context, buildState *build, appName string, streams
return nil, nil, err
}

fmt.Println("creating a build from an existing one")
build, err := depotbuild.FromExistingBuild(ctx, *buildInfo.EnsureDepotRemoteBuilder.BuildId, *buildInfo.EnsureDepotRemoteBuilder.BuildToken)
if err != nil {
fmt.Println("failed to create a build from an existing one")
streams.StopProgressIndicator()
return nil, nil, err
}
fmt.Println("created a build from an existing one")

// Set the buildErr to any error that represents the build failing.
fmt.Println("huh")
var buildErr error

fmt.Println("wut")
var buildkit *depotmachine.Machine
fmt.Println("get")
buildkit, buildErr = depotmachine.Acquire(ctx, build.ID, build.Token, "amd64")
if buildErr != nil {
build.Finish(buildErr)
streams.StopProgressIndicator()
return nil, nil, buildErr
}

return buildkit, &build, err
return buildkit, &build, nil
}

func buildImage(ctx context.Context, buildkitClient *client.Client, opts ImageOptions, dockerfilePath string) (*client.SolveResponse, error) {
Expand Down
58 changes: 48 additions & 10 deletions internal/command/deploy/strategy_bluegreen.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,9 @@ func (bg *blueGreen) changeDetected(a, b map[string]string) bool {
return false
}

func (bg *blueGreen) renderMachineHealthchecks(state map[string]*fly.HealthCheckStatus) func() {
func (bg *blueGreen) renderMachineHealthchecks(state map[string]healthCheckStatusResult) func() {
_, span := tracing.GetTracer().Start(context.Background(), "render_machine_healthchecks")
defer span.End()
firstRun := true

previousView := map[string]string{}
Expand All @@ -300,8 +302,11 @@ func (bg *blueGreen) renderMachineHealthchecks(state map[string]*fly.HealthCheck
bg.healthLock.RLock()
for id, value := range state {
status := "unchecked"
if value.Total != 0 {
status = fmt.Sprintf("%d/%d passing", value.Passing, value.Total)
if value.healthCheckStatus.Total != 0 {
status = fmt.Sprintf("%d/%d passing", value.healthCheckStatus.Passing, value.healthCheckStatus.Total)
}
if value.err != nil {
status = fmt.Sprintf("error: %v", value.err)
}

currentView[id] = status
Expand All @@ -324,18 +329,20 @@ func (bg *blueGreen) renderMachineHealthchecks(state map[string]*fly.HealthCheck
}
}

func (bg *blueGreen) allMachinesHealthy(stateMap map[string]*fly.HealthCheckStatus) bool {
func (bg *blueGreen) allMachinesHealthy(stateMap map[string]healthCheckStatusResult) bool {
_, span := tracing.GetTracer().Start(context.Background(), "all_machines_healthy")
defer span.End()
passed := 0

bg.healthLock.RLock()
for _, v := range stateMap {
// we initialize all machine ids with an empty struct, so all fields are zero'd on init.
// without v.hcs.Total != 0, the first call to this function will pass since 0 == 0
if v.Total == 0 {
if v.healthCheckStatus.Total == 0 {
continue
}

if v.Passing == v.Total {
if v.healthCheckStatus.Passing == v.healthCheckStatus.Total {
passed += 1
}
}
Expand All @@ -344,18 +351,25 @@ func (bg *blueGreen) allMachinesHealthy(stateMap map[string]*fly.HealthCheckStat
return passed == len(stateMap)
}

type healthCheckStatusResult struct {
healthCheckStatus *fly.HealthCheckStatus
err error
}

func (bg *blueGreen) WaitForGreenMachinesToBeHealthy(ctx context.Context) error {
ctx, span := tracing.GetTracer().Start(ctx, "green_machines_health_wait")
defer span.End()

wait := time.NewTicker(bg.timeout)
machineIDToHealthStatus := map[string]*fly.HealthCheckStatus{}
machineIDToHealthStatus := map[string]healthCheckStatusResult{}
errChan := make(chan error)
render := bg.renderMachineHealthchecks(machineIDToHealthStatus)

for _, gm := range bg.greenMachines {
if gm.launchInput.SkipLaunch {
machineIDToHealthStatus[gm.leasableMachine.FormattedMachineId()] = &fly.HealthCheckStatus{Total: 1, Passing: 1}
machineIDToHealthStatus[gm.leasableMachine.FormattedMachineId()] = healthCheckStatusResult{
healthCheckStatus: &fly.HealthCheckStatus{Total: 1, Passing: 1},
}
continue
}

Expand All @@ -366,7 +380,9 @@ func (bg *blueGreen) WaitForGreenMachinesToBeHealthy(ctx context.Context) error
continue
}

machineIDToHealthStatus[gm.leasableMachine.FormattedMachineId()] = &fly.HealthCheckStatus{}
machineIDToHealthStatus[gm.leasableMachine.FormattedMachineId()] = healthCheckStatusResult{
healthCheckStatus: &fly.HealthCheckStatus{},
}
}

for _, gm := range bg.greenMachines {
Expand All @@ -382,34 +398,55 @@ func (bg *blueGreen) WaitForGreenMachinesToBeHealthy(ctx context.Context) error
}

go func(m machine.LeasableMachine) {
ctx, span := tracing.GetTracer().Start(ctx, "green_machine_health_check", trace.WithAttributes(
attribute.String("machine_id", m.FormattedMachineId()),
attribute.Int("bg_timeout_ms", int(bg.timeout.Milliseconds())),
))
defer span.End()

waitCtx, cancel := context.WithTimeout(ctx, bg.timeout)
defer cancel()

interval, gracePeriod := m.GetMinIntervalAndMinGracePeriod()

span.SetAttributes(attribute.Int("interval_ms", int(interval.Milliseconds())), attribute.Int("grace_period_ms", int(gracePeriod.Milliseconds())))
span.AddEvent("sleeping for grace period")
time.Sleep(gracePeriod)

for {
updateMachine, err := bg.flaps.Get(waitCtx, m.Machine().ID)

switch {
case waitCtx.Err() != nil:
span.RecordError(waitCtx.Err())
machineIDToHealthStatus[m.FormattedMachineId()] = healthCheckStatusResult{
err: waitCtx.Err(),
}
errChan <- waitCtx.Err()
return
case err != nil:
span.RecordError(err)
machineIDToHealthStatus[m.FormattedMachineId()] = healthCheckStatusResult{
err: waitCtx.Err(),
}
errChan <- err
return
}

status := updateMachine.TopLevelChecks()
bg.healthLock.Lock()
machineIDToHealthStatus[m.FormattedMachineId()] = status
machineIDToHealthStatus[m.FormattedMachineId()] = healthCheckStatusResult{
healthCheckStatus: status,
}
bg.healthLock.Unlock()
span.AddEvent(fmt.Sprintf("total health checks: %d", status.Total))
span.AddEvent(fmt.Sprintf("passing health checks: %d", status.Passing))

if (status.Total == status.Passing) && (status.Total != 0) {
return
}

span.AddEvent("sleeping for interval")
time.Sleep(interval)
}
}(gm.leasableMachine)
Expand All @@ -428,6 +465,7 @@ func (bg *blueGreen) WaitForGreenMachinesToBeHealthy(ctx context.Context) error

select {
case err := <-errChan:
span.RecordError(err)
return err
case <-wait.C:
return ErrWaitTimeout
Expand Down
Loading