Skip to content

Commit f58ce93

Browse files
authored
Deploy with buildkit (#4703)
Switch preflight tests to remote BuildKit and make them more resilient
1 parent fbbede7 commit f58ce93

File tree

17 files changed

+750
-224
lines changed

17 files changed

+750
-224
lines changed

.github/workflows/preflight.yml

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,25 @@ on:
1212

1313
jobs:
1414
preflight-tests:
15+
name: "preflight-tests (${{ matrix.group }})"
1516
if: ${{ github.repository == 'superfly/flyctl' }}
1617
runs-on: ubuntu-latest
1718
strategy:
1819
fail-fast: false
1920
matrix:
20-
parallelism: [20]
21-
index:
22-
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
21+
group:
22+
- apps
23+
- deploy
24+
- launch
25+
- scale
26+
- volume
27+
- console
28+
- logs
29+
- machine
30+
- postgres
31+
- tokens
32+
- wireguard
33+
- misc
2334
steps:
2435
- uses: actions/checkout@v6
2536
- uses: actions/setup-go@v6
@@ -32,14 +43,6 @@ jobs:
3243
- name: Set FLY_PREFLIGHT_TEST_APP_PREFIX
3344
run: |
3445
echo "FLY_PREFLIGHT_TEST_APP_PREFIX=gha-$GITHUB_RUN_ID-$GITHUB_RUN_ATTEMPT" >> "$GITHUB_ENV"
35-
- name: Generate go test slice
36-
id: test_split
37-
uses: hashicorp-forge/go-test-split-action@v1
38-
with:
39-
total: ${{ matrix.parallelism }}
40-
index: ${{ matrix.index }}
41-
packages: ./test/preflight/...
42-
flags: --tags=integration
4346
# If this workflow is triggered by code changes (eg PRs), download the binary to save time.
4447
- uses: actions/download-artifact@v7
4548
id: download-flyctl
@@ -53,37 +56,19 @@ jobs:
5356
- name: Run preflight tests
5457
id: preflight
5558
env:
56-
FLY_PREFLIGHT_TEST_ACCESS_TOKEN: ${{ secrets.FLYCTL_PREFLIGHT_CI_FLY_API_TOKEN }}
59+
# Use user token if available (required for deploy token tests), otherwise fall back to limited token
60+
FLY_PREFLIGHT_TEST_ACCESS_TOKEN: ${{ secrets.FLYCTL_PREFLIGHT_CI_USER_TOKEN || secrets.FLYCTL_PREFLIGHT_CI_FLY_API_TOKEN }}
5761
FLY_PREFLIGHT_TEST_FLY_ORG: flyctl-ci-preflight
5862
FLY_PREFLIGHT_TEST_FLY_REGIONS: ${{ inputs.region }}
5963
FLY_PREFLIGHT_TEST_NO_PRINT_HISTORY_ON_FAIL: 'true'
6064
FLY_FORCE_TRACE: 'true'
6165
run: |
6266
mkdir -p bin
63-
if [ -e master-build/flyctl ]; then
64-
mv master-build/flyctl bin/flyctl
65-
fi
66-
if [ -e bin/flyctl ]; then
67-
chmod +x bin/flyctl
68-
fi
67+
(test -e master-build/flyctl) && mv master-build/flyctl bin/flyctl
68+
chmod +x bin/flyctl
6969
export PATH=$PWD/bin:$PATH
70-
test_opts=""
71-
if [[ "${{ github.ref }}" != "refs/heads/master" ]]; then
72-
test_opts="-short"
73-
fi
74-
test_log="$(mktemp)"
75-
function finish {
76-
rm "$test_log"
77-
}
78-
trap finish EXIT
79-
set +e
80-
go test ./test/preflight/... --tags=integration -v -timeout=15m $test_opts -run "${{ steps.test_split.outputs.run }}" | tee "$test_log"
81-
test_status=$?
82-
set -e
8370
echo -n failed= >> $GITHUB_OUTPUT
84-
awk '/^--- FAIL:/{ printf("%s ", $3) }' "$test_log" >> $GITHUB_OUTPUT
85-
echo >> $GITHUB_OUTPUT
86-
exit $test_status
71+
./scripts/preflight.sh -r "${{ github.ref }}" -g "${{ matrix.group }}" -o $GITHUB_OUTPUT
8772
- name: Post failure to slack
8873
if: ${{ github.ref == 'refs/heads/master' && failure() }}
8974
uses: slackapi/slack-github-action@91efab103c0de0a537f72a35f6b8cda0ee76bf0a

internal/build/imgsrc/docker.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ func newRemoteDockerClient(ctx context.Context, apiClient flyutil.Client, flapsC
301301

302302
if !connectOverWireguard && !wglessCompatible {
303303
client := &http.Client{
304+
Timeout: 30 * time.Second, // Add timeout for each request
304305
Transport: &http.Transport{
305306
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
306307
return tls.Dial("tcp", fmt.Sprintf("%s.fly.dev:443", app.Name), &tls.Config{})
@@ -322,9 +323,29 @@ func newRemoteDockerClient(ctx context.Context, apiClient flyutil.Client, flapsC
322323
fmt.Fprintln(streams.Out, streams.ColorScheme().Yellow("👀 checking remote builder compatibility with wireguardless deploys ..."))
323324
span.AddEvent("checking remote builder compatibility with wireguardless deploys")
324325

325-
res, err := client.Do(req)
326+
// Retry with backoff to allow DNS propagation time
327+
var res *http.Response
328+
b := &backoff.Backoff{
329+
Min: 2 * time.Second,
330+
Max: 30 * time.Second,
331+
Factor: 2,
332+
Jitter: true,
333+
}
334+
maxRetries := 10 // Up to ~5 minutes total with backoff
335+
for attempt := 0; attempt < maxRetries; attempt++ {
336+
res, err = client.Do(req)
337+
if err == nil {
338+
break
339+
}
340+
341+
if attempt < maxRetries-1 {
342+
dur := b.Duration()
343+
terminal.Debugf("Remote builder compatibility check failed (attempt %d/%d), retrying in %s (err: %v)\n", attempt+1, maxRetries, dur, err)
344+
pause.For(ctx, dur)
345+
}
346+
}
326347
if err != nil {
327-
tracing.RecordError(span, err, "failed to get remote builder settings")
348+
tracing.RecordError(span, err, "failed to get remote builder settings after retries")
328349
return nil, err
329350
}
330351

@@ -594,7 +615,7 @@ func buildRemoteClientOpts(ctx context.Context, apiClient flyutil.Client, appNam
594615
}
595616

596617
func waitForDaemon(parent context.Context, client *dockerclient.Client) (up bool, err error) {
597-
ctx, cancel := context.WithTimeout(parent, 2*time.Minute)
618+
ctx, cancel := context.WithTimeout(parent, 5*time.Minute) // 5 minutes for daemon to become responsive (includes DNS propagation time)
598619
defer cancel()
599620

600621
b := &backoff.Backoff{

internal/build/imgsrc/ensure_builder.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ func (p *Provisioner) createBuilder(ctx context.Context, region, builderName str
531531
return nil, nil, retErr
532532
}
533533

534-
retErr = flapsClient.Wait(ctx, builderName, mach, "started", 60*time.Second)
534+
retErr = flapsClient.Wait(ctx, builderName, mach, "started", 180*time.Second) // 3 minutes for machine start + DNS propagation
535535
if retErr != nil {
536536
tracing.RecordError(span, retErr, "error waiting for builder machine to start")
537537
return nil, nil, retErr
@@ -582,7 +582,7 @@ func restartBuilderMachine(ctx context.Context, appName string, builderMachine *
582582
return err
583583
}
584584

585-
if err := flapsClient.Wait(ctx, appName, builderMachine, "started", time.Second*60); err != nil {
585+
if err := flapsClient.Wait(ctx, appName, builderMachine, "started", time.Second*180); err != nil { // 3 minutes for restart + DNS propagation
586586
tracing.RecordError(span, err, "error waiting for builder machine to start")
587587
return err
588588
}

internal/command/console/console.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,11 @@ func runConsole(ctx context.Context) error {
231231
consoleCommand = flag.GetString(ctx, "command")
232232
}
233233

234-
return ssh.Console(ctx, sshClient, consoleCommand, true, params.Container)
234+
// Allocate PTY only when no command is specified or when explicitly requested
235+
// This matches the behavior of `fly ssh console`
236+
allocPTY := consoleCommand == "" || flag.GetBool(ctx, "pty")
237+
238+
return ssh.Console(ctx, sshClient, consoleCommand, allocPTY, params.Container)
235239
}
236240

237241
func selectMachine(ctx context.Context, app *fly.AppCompact, appConfig *appconfig.Config) (*fly.Machine, func(), error) {

internal/command/deploy/machines_deploymachinesapp.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ func (md *machineDeployment) DeployMachinesApp(ctx context.Context) error {
107107

108108
if updateErr := md.updateReleaseInBackend(ctx, status, metadata); updateErr != nil {
109109
if err == nil {
110-
err = fmt.Errorf("failed to set final release status: %w", updateErr)
110+
// Deployment succeeded, but we couldn't update the release status
111+
// This is not critical enough to fail the entire deployment
112+
terminal.Warnf("failed to set final release status after successful deployment: %v\n", updateErr)
111113
} else {
112114
terminal.Warnf("failed to set final release status after deployment failure: %v\n", updateErr)
113115
}

internal/command/scale/count_machines.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,14 +317,15 @@ func computeActions(appName string, machines []*fly.Machine, expectedGroupCounts
317317
delete(mConfig.Env, "FLY_STANDBY_FOR")
318318

319319
for region, delta := range regionDiffs {
320+
existingMachinesInRegion := perRegionMachines[region]
320321
actions = append(actions, &planItem{
321322
GroupName: groupName,
322323
Region: region,
323324
Delta: delta,
324-
Machines: perRegionMachines[region],
325+
Machines: existingMachinesInRegion,
325326
LaunchMachineInput: &fly.LaunchMachineInput{Region: region, Config: mConfig, MinSecretsVersion: minvers},
326327
Volumes: defaults.PopAvailableVolumes(mConfig, region, delta),
327-
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta),
328+
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta, len(existingMachinesInRegion)),
328329
})
329330
}
330331
}
@@ -352,7 +353,7 @@ func computeActions(appName string, machines []*fly.Machine, expectedGroupCounts
352353
Delta: delta,
353354
LaunchMachineInput: &fly.LaunchMachineInput{Region: region, Config: mConfig, MinSecretsVersion: minvers},
354355
Volumes: defaults.PopAvailableVolumes(mConfig, region, delta),
355-
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta),
356+
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta, 0), // No existing machines for new groups
356357
})
357358
}
358359
}

internal/command/scale/machine_defaults.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,17 +118,23 @@ func (d *defaultValues) PopAvailableVolumes(mConfig *fly.MachineConfig, region s
118118
return availableVolumes
119119
}
120120

121-
func (d *defaultValues) CreateVolumeRequest(mConfig *fly.MachineConfig, region string, delta int) *fly.CreateVolumeRequest {
121+
func (d *defaultValues) CreateVolumeRequest(mConfig *fly.MachineConfig, region string, delta int, existingMachineCount int) *fly.CreateVolumeRequest {
122122
if len(mConfig.Mounts) == 0 || delta <= 0 {
123123
return nil
124124
}
125125
mount := mConfig.Mounts[0]
126+
127+
// Enable RequireUniqueZone for HA scenarios (when total machines in region > 1)
128+
// This ensures volumes (and their attached machines) are distributed across different hosts
129+
totalMachinesInRegion := existingMachineCount + delta
130+
requireUniqueZone := totalMachinesInRegion > 1
131+
126132
return &fly.CreateVolumeRequest{
127133
Name: mount.Name,
128134
Region: region,
129135
SizeGb: &mount.SizeGb,
130136
Encrypted: fly.Pointer(mount.Encrypted),
131-
RequireUniqueZone: fly.Pointer(false),
137+
RequireUniqueZone: fly.Pointer(requireUniqueZone),
132138
SnapshotID: d.snapshotID,
133139
ComputeRequirements: mConfig.Guest,
134140
ComputeImage: mConfig.Image,

scanner/rails_dockerfile_test.go

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,8 @@ CMD ["rails", "server"]
4646
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
4747
require.NoError(t, err)
4848

49-
// Change to test directory
50-
originalDir, _ := os.Getwd()
51-
defer os.Chdir(originalDir)
52-
err = os.Chdir(dir)
53-
require.NoError(t, err)
54-
5549
// Run the scanner - it should detect the Rails app
50+
// No need to change directories, configureRails accepts a directory path
5651
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
5752
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
5853

@@ -89,11 +84,7 @@ CMD ["rails", "server"]`
8984
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
9085
require.NoError(t, err)
9186

92-
originalDir, _ := os.Getwd()
93-
defer os.Chdir(originalDir)
94-
err = os.Chdir(dir)
95-
require.NoError(t, err)
96-
87+
// No need to change directories, configureRails accepts a directory path
9788
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
9889
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
9990
require.NoError(t, err)
@@ -123,11 +114,7 @@ CMD ["rails", "server"]`
123114
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
124115
require.NoError(t, err)
125116

126-
originalDir, _ := os.Getwd()
127-
defer os.Chdir(originalDir)
128-
err = os.Chdir(dir)
129-
require.NoError(t, err)
130-
117+
// No need to change directories, configureRails accepts a directory path
131118
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
132119
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
133120
require.NoError(t, err)
@@ -150,12 +137,8 @@ CMD ["rails", "server"]`
150137

151138
// Note: No Dockerfile created
152139

153-
originalDir, _ := os.Getwd()
154-
defer os.Chdir(originalDir)
155-
err = os.Chdir(dir)
156-
require.NoError(t, err)
157-
158140
// This test would need bundle to not be available, which is hard to simulate
141+
// No need to change directories, configureRails accepts a directory path
159142
// The scanner will either find bundle (and try to use it) or not find it
160143
// If bundle is not found and no Dockerfile exists, it should fail
161144

@@ -199,11 +182,7 @@ EXPOSE 3000`
199182
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
200183
require.NoError(t, err)
201184

202-
originalDir, _ := os.Getwd()
203-
defer os.Chdir(originalDir)
204-
err = os.Chdir(dir)
205-
require.NoError(t, err)
206-
185+
// No need to change directories, configureRails accepts a directory path
207186
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
208187
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
209188
require.NoError(t, err)

0 commit comments

Comments
 (0)