Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3f93b85
Add downloaders for uv, go and jq
shreyas-goenka Aug 19, 2025
59c4b4d
-
shreyas-goenka Aug 19, 2025
807f060
lint
shreyas-goenka Aug 19, 2025
cd593cd
lint
shreyas-goenka Aug 19, 2025
7bc9fcb
Merge remote-tracking branch 'origin' into downloaders
shreyas-goenka Aug 21, 2025
5cbb42a
Add archive build for DBR testing
shreyas-goenka Aug 21, 2025
22114a5
typo
shreyas-goenka Aug 21, 2025
2fdd717
lint
shreyas-goenka Aug 21, 2025
4f353f0
runner commit
shreyas-goenka Aug 21, 2025
5d22804
run all tests
shreyas-goenka Aug 21, 2025
469da10
final fixes
shreyas-goenka Aug 21, 2025
730e6aa
fix build tag and lint
shreyas-goenka Aug 21, 2025
48a943d
undo timeout changes
shreyas-goenka Aug 21, 2025
1fbc0b6
-
shreyas-goenka Aug 21, 2025
1bbc2c8
-
shreyas-goenka Aug 21, 2025
2ea1d05
-
shreyas-goenka Aug 21, 2025
acee37a
-
shreyas-goenka Aug 21, 2025
de62b56
add support for short runs
shreyas-goenka Aug 21, 2025
9848a9f
run ruff
shreyas-goenka Aug 21, 2025
ef78430
lint
shreyas-goenka Aug 21, 2025
2c53909
fix flag and runner
shreyas-goenka Aug 22, 2025
da71e1c
cleanup test directory
shreyas-goenka Aug 25, 2025
b6ecaa9
add timestammp to tmpdir; doc comment to dbr_runner.py
shreyas-goenka Aug 25, 2025
00077ea
AMD -> amd64; reuse bin_dir
shreyas-goenka Aug 25, 2025
d789054
comment our arm downloads
shreyas-goenka Aug 25, 2025
c035ebe
typo and cleanup
shreyas-goenka Aug 25, 2025
d9579c3
-
shreyas-goenka Aug 25, 2025
3e4c5d2
add flag instead for dbr tests
shreyas-goenka Aug 25, 2025
921595c
common workspace dir method
shreyas-goenka Aug 25, 2025
7df2b68
pass build and bin directories as args
shreyas-goenka Aug 25, 2025
33fb6bd
-
shreyas-goenka Aug 25, 2025
5f1e04e
add url; dynamic timeout
shreyas-goenka Aug 25, 2025
616346f
parameterize archive name
shreyas-goenka Aug 25, 2025
bc87a45
create tmp dir instead of home
shreyas-goenka Aug 25, 2025
3197df2
pull latest changes
shreyas-goenka Aug 25, 2025
1829078
fix flag order
shreyas-goenka Aug 25, 2025
d9304ae
fix tmpdir
shreyas-goenka Aug 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions acceptance/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ import (
)

var (
KeepTmp bool
NoRepl bool
VerboseTest bool = os.Getenv("VERBOSE_TEST") != ""
Tail bool
Forcerun bool
LogRequests bool
LogConfig bool
SkipLocal bool
UseVersion string
KeepTmp bool
NoRepl bool
VerboseTest bool = os.Getenv("VERBOSE_TEST") != ""
Tail bool
Forcerun bool
LogRequests bool
LogConfig bool
SkipLocal bool
WorkspaceTmpDir bool
UseVersion string
Dbr bool
)

// In order to debug CLI running under acceptance test, search for TestInprocessMode and update
Expand All @@ -66,7 +68,9 @@ func init() {
flag.BoolVar(&LogRequests, "logrequests", false, "Log request and responses from testserver")
flag.BoolVar(&LogConfig, "logconfig", false, "Log merged for each test case")
flag.BoolVar(&SkipLocal, "skiplocal", false, "Skip tests that are enabled to run on Local")
flag.BoolVar(&WorkspaceTmpDir, "workspace-tmp-dir", false, "Run tests on the workspace file system (For DBR testing).")
flag.StringVar(&UseVersion, "useversion", "", "Download previously released version of CLI and use it to run the tests")
flag.BoolVar(&Dbr, "dbr", false, "Only run the tests on DBR via a Databricks job.")
}

const (
Expand Down Expand Up @@ -115,6 +119,11 @@ var Ignored = map[string]bool{
}

func TestAccept(t *testing.T) {
if Dbr {
testDbrAcceptance(t)
return
}

testAccept(t, InprocessMode, "")
}

Expand Down Expand Up @@ -486,6 +495,15 @@ func runTest(t *testing.T,
tmpDir, err = os.MkdirTemp(tempDirBase, "")
require.NoError(t, err)
t.Logf("Created directory: %s", tmpDir)
} else if WorkspaceTmpDir {
// If the test is being run on DBR, auth is already configured
// by the dbr_runner notebook by reading a token from the notebook context and
// setting DATABRICKS_TOKEN and DATABRICKS_HOST environment variables.
_, _, tmpDir = workspaceTmpDir(t.Context(), t)

// Run DBR tests on the workspace file system to mimic usage from
// DABs in the workspace.
t.Logf("Running DBR tests on %s", tmpDir)
} else {
tmpDir = t.TempDir()
}
Expand Down
81 changes: 81 additions & 0 deletions acceptance/dbr_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Databricks notebook source
# This notebook is meant to be run from a job on a Databricks workspace.
# It is recommended to have the job cluster be a serverless cluster
# to match DABs in the workspace execution environment.
# The recommended flow to run this is:
# run: deco env run -i -n <env-name> -- go test -run TestAccept github.com/databricks/cli/acceptance -dbr
# where <env-name> is the name of the environment to run the tests in. This will automatically
# start a job to execute integration acceptance tests on a serverless cluster.

import os
import subprocess
import sys
import tarfile
import tempfile
from pathlib import Path
from dbruntime.databricks_repl_context import get_context


def extract_cli_archive():
src = dbutils.widgets.get("archive_path")
if not src:
print("Error: archive_path is not set", file=sys.stderr)
sys.exit(1)

dst = Path(tempfile.mkdtemp(prefix="cli_archive"))

with tarfile.open(src, "r:gz") as tar:
tar.extractall(path=dst)

print(f"Extracted {src} to {dst}")
return dst


def main():
archive_dir = extract_cli_archive()
env = os.environ.copy()

# Today all serverless instances are amd64. There are plans to also
# have ARM based instances in Q4 FY26 but for now we can keep using the amd64
# binaries without checking for the architecture.
bin_dir = archive_dir / "bin" / "amd64"
go_bin_dir = bin_dir / "go" / "bin"
env["PATH"] = os.pathsep.join([str(go_bin_dir), str(bin_dir), env.get("PATH", "")])

# Env vars used by the acceptance tests. These need to
# be provided by the job parameters to the test runner here.
envvars = [
"CLOUD_ENV",
"TEST_DEFAULT_CLUSTER_ID",
"TEST_DEFAULT_WAREHOUSE_ID",
"TEST_INSTANCE_POOL_ID",
"TEST_METASTORE_ID",
]

for envvar in envvars:
env[envvar] = dbutils.widgets.get(envvar)
assert env[envvar] is not None, f"Error: {envvar} is not set"

ctx = get_context()
workspace_url = spark.conf.get("spark.databricks.workspaceUrl")

# Configure auth for the acceptance tests.
env["DATABRICKS_TOKEN"] = ctx.apiToken
env["DATABRICKS_HOST"] = workspace_url

# Change working directory to the root of the CLI repo.
os.chdir(archive_dir / "cli")
cmd = ["go", "test", "-timeout", "14400s", "-test.v", "-run", r"^TestAccept", "github.com/databricks/cli/acceptance", "-workspace-tmp-dir"]

if dbutils.widgets.get("short") == "true":
cmd.append("-short")

print("Running acceptance tests...")
result = subprocess.run(cmd, env=env, check=False)
print(result.stdout, flush=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK run() does not capture output by default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did see logs from the runner in the output. Do you mean that stdout is not captured and only stderr is?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is just printed to parent process stdout/stderr, no special handling needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For parity with the other test runners, it should run in JSON mode and write output to a file. Notebook cell output is limited to a couple MB, which in verbose mode this may exceed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For parity with the other test runners, it should run in JSON mode and write output to a file

I plan to do that in a follow-up PR. For now, the important thing for me is to actually run these in non blocking CI and get some signal.

print(result.stderr, flush=True)
assert result.returncode == 0, "Acceptance tests failed"


if __name__ == "__main__":
main()
155 changes: 155 additions & 0 deletions acceptance/dbr_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
package acceptance_test

import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"testing"
"time"

"github.com/databricks/cli/libs/filer"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
)

func workspaceTmpDir(ctx context.Context, t *testing.T) (*databricks.WorkspaceClient, filer.Filer, string) {
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)

currentUser, err := w.CurrentUser.Me(ctx)
require.NoError(t, err)

timestamp := time.Now().Format("2006-01-02T15:04:05Z")
tmpDir := fmt.Sprintf(
"/Workspace/Users/%s/acceptance/%s/%s",
currentUser.UserName,
timestamp,
uuid.New().String(),
)

t.Cleanup(func() {
err := os.RemoveAll(tmpDir)
require.NoError(t, err)
})

err = w.Workspace.MkdirsByPath(ctx, tmpDir)
require.NoError(t, err)

f, err := filer.NewWorkspaceFilesClient(w, tmpDir)
require.NoError(t, err)

return w, f, tmpDir
}

func buildAndUploadArchive(ctx context.Context, t *testing.T, f filer.Filer) string {
pkgDir := path.Join("..", "internal", "testarchive")

binDir := "_bin"
buildDir := "_build"
archiveName := "archive.tar.gz"

// Build the CLI archives and upload to the workspace.
RunCommand(t, []string{"go", "run", ".", buildDir, binDir, archiveName}, pkgDir)

archiveReader, err := os.Open(filepath.Join(pkgDir, buildDir, archiveName))
require.NoError(t, err)

t.Logf("Uploading archive...")
err = f.Write(ctx, archiveName, archiveReader)
require.NoError(t, err)

err = archiveReader.Close()
require.NoError(t, err)

return archiveName
}

func uploadRunner(ctx context.Context, t *testing.T, f filer.Filer) string {
runnerReader, err := os.Open("dbr_runner.py")
require.NoError(t, err)

t.Logf("Uploading DBR runner...")
err = f.Write(ctx, "dbr_runner.py", runnerReader)
require.NoError(t, err)

err = runnerReader.Close()
require.NoError(t, err)

return "dbr_runner"
}

func runDbrTests(ctx context.Context, t *testing.T, w *databricks.WorkspaceClient, runnerPath, archivePath string) {
t.Logf("Submitting test runner job...")

envvars := []string{
"CLOUD_ENV",
"TEST_DEFAULT_CLUSTER_ID",
"TEST_DEFAULT_WAREHOUSE_ID",
"TEST_INSTANCE_POOL_ID",
"TEST_METASTORE_ID",
}

baseParams := map[string]string{
"archive_path": archivePath,
"short": strconv.FormatBool(testing.Short()),
}
for _, envvar := range envvars {
baseParams[envvar] = os.Getenv(envvar)
}

waiter, err := w.Jobs.Submit(ctx, jobs.SubmitRun{
RunName: "DBR Acceptance Tests",
Tasks: []jobs.SubmitTask{
{
TaskKey: "dbr_runner",
NotebookTask: &jobs.NotebookTask{
NotebookPath: runnerPath,
BaseParameters: baseParams,
},
},
},
})
require.NoError(t, err)

t.Logf("Waiting for test runner job to finish. Run URL: %s", urlForRun(ctx, t, w, waiter.RunId))

var run *jobs.Run
deadline, ok := t.Deadline()
if ok {
// If -timeout is configured for the test, wait until that time for the job run results.
run, err = waiter.GetWithTimeout(time.Until(deadline))
Copy link
Contributor Author

@shreyas-goenka shreyas-goenka Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intention here really is to increase the timeout since the default one is too short. I suspect eventually we might need a custom poller here since even with a bigger timeout this method often fails due to some opaque TCP error.

require.NoError(t, err)
} else {
// Use the default timeout from the SDK otherwise.
run, err = waiter.Get()
require.NoError(t, err)
}

t.Logf("The test runner job finished with status: %s", run.State.LifeCycleState)
}

func urlForRun(ctx context.Context, t *testing.T, w *databricks.WorkspaceClient, runId int64) string {
run, err := w.Jobs.GetRun(ctx, jobs.GetRunRequest{RunId: runId})
require.NoError(t, err)
return run.RunPageUrl
}

func testDbrAcceptance(t *testing.T) {
ctx := context.Background()

w, f, testDir := workspaceTmpDir(ctx, t)
t.Logf("Test directory for the DBR runner: %s", testDir)

// We compile and upload an archive of the entire repo to the workspace.
// Only files tracked by git and binaries required by acceptance tests like
// go, uv, jq, etc. are included.
archiveName := buildAndUploadArchive(ctx, t, f)
runnerName := uploadRunner(ctx, t, f)

runDbrTests(ctx, t, w, path.Join(testDir, runnerName), path.Join(testDir, archiveName))
}
15 changes: 9 additions & 6 deletions internal/testarchive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func gitFiles(repoRoot string) ([]string, error) {
scanner := bufio.NewScanner(strings.NewReader(string(output)))
var gitFiles []string
for scanner.Scan() {
file := strings.TrimSpace(scanner.Text())
file := scanner.Text()
if file != "" {
gitFiles = append(gitFiles, file)
}
Expand Down Expand Up @@ -98,20 +98,23 @@ func addFileToArchive(tarWriter *tar.Writer, src, dst string) error {
}

// createArchive creates a tar.gz archive of all git-tracked files plus downloaded tools
func createArchive(archiveDir, binDir, repoRoot string) error {
archivePath := filepath.Join(archiveDir, "archive.tar.gz")
func createArchive(archiveDir, binDir, archiveName, repoRoot string) error {
archivePath := filepath.Join(archiveDir, archiveName)

// Download tools for both arm and amd64 architectures.
// The right architecture to use is decided at runtime on the serverless driver.
// The Databricks platform explicitly does not provide any guarantees around
// the CPU architecture to keep the door open for future optimizations.
downloaders := []downloader{
goDownloader{arch: "amd64", binDir: binDir},
goDownloader{arch: "arm64", binDir: binDir},
uvDownloader{arch: "amd64", binDir: binDir},
uvDownloader{arch: "arm64", binDir: binDir},
jqDownloader{arch: "amd64", binDir: binDir},
jqDownloader{arch: "arm64", binDir: binDir},

// TODO: Once ARM64 for serverless clusters is available, enable download for
// these and add runtime detection to the test runner to choose the right binaries.
// uvDownloader{arch: "arm64", binDir: binDir},
// goDownloader{arch: "arm64", binDir: binDir},
// jqDownloader{arch: "arm64", binDir: binDir},
}

for _, downloader := range downloaders {
Expand Down
8 changes: 0 additions & 8 deletions internal/testarchive/go.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@ import (
"strings"
)

// Initialize these to prevent linter from complaining about unused types.
// This can be removed once we actually use these downloaders.
var (
_ = goDownloader{}
_ = uvDownloader{}
_ = jqDownloader{}
)

type downloader interface {
Download() error
}
Expand Down
11 changes: 10 additions & 1 deletion internal/testarchive/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@ import (
)

func main() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend moving this to a cmd subpackage that calls into testarchive functions.

That keeps this a pure package that you can test independently.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems unnecessary. What's the value? We already have unit test coverage (behind a flag, but I plan to eventually enable it). See: downloaders_test.go for example.

if len(os.Args) != 4 {
fmt.Fprintf(os.Stderr, "Usage: %s <build-dir> <bin-dir> <archive-name>\n", os.Args[0])
os.Exit(1)
}

buildDir := os.Args[1]
binDir := os.Args[2]
archiveName := os.Args[3]

// Directories with the _ prefix are ignored by Go. That is important
// since the go installation in _bin would include stdlib go modules which would
// otherwise cause an error during a build of the CLI.
err := createArchive("_build", "_bin", "../..")
err := createArchive(buildDir, binDir, archiveName, "../..")
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating archive: %v\n", err)
os.Exit(1)
Expand Down
2 changes: 0 additions & 2 deletions internal/testarchive/uv.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ type uvDownloader struct {
arch string
}

// uvDownloader creates a new UV downloader

// mapArchitecture maps our architecture names to UV's naming convention
func (u uvDownloader) mapArchitecture(arch string) (string, error) {
switch arch {
Expand Down
Loading