-
Notifications
You must be signed in to change notification settings - Fork 108
Add runner for DBR tests #3453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add runner for DBR tests #3453
Changes from all commits
3f93b85
59c4b4d
807f060
cd593cd
7bc9fcb
5cbb42a
22114a5
2fdd717
4f353f0
5d22804
469da10
730e6aa
48a943d
1fbc0b6
1bbc2c8
2ea1d05
acee37a
de62b56
9848a9f
ef78430
2c53909
da71e1c
b6ecaa9
00077ea
d789054
c035ebe
d9579c3
3e4c5d2
921595c
7df2b68
33fb6bd
5f1e04e
616346f
bc87a45
3197df2
1829078
d9304ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# Databricks notebook source | ||
# This notebook is meant to be run from a job on a Databricks workspace. | ||
# It is recommended to have the job cluster be a serverless cluster | ||
# to match DABs in the workspace execution environment. | ||
# The recommended flow to run this is: | ||
# run: deco env run -i -n <env-name> -- go test -run TestAccept github.com/databricks/cli/acceptance -dbr | ||
# where <env-name> is the name of the environment to run the tests in. This will automatically | ||
# start a job to execute integration acceptance tests on a serverless cluster. | ||
|
||
import os | ||
import subprocess | ||
import sys | ||
import tarfile | ||
import tempfile | ||
from pathlib import Path | ||
from dbruntime.databricks_repl_context import get_context | ||
|
||
|
||
def extract_cli_archive(): | ||
src = dbutils.widgets.get("archive_path") | ||
if not src: | ||
print("Error: archive_path is not set", file=sys.stderr) | ||
sys.exit(1) | ||
|
||
dst = Path(tempfile.mkdtemp(prefix="cli_archive")) | ||
|
||
with tarfile.open(src, "r:gz") as tar: | ||
tar.extractall(path=dst) | ||
|
||
print(f"Extracted {src} to {dst}") | ||
return dst | ||
|
||
|
||
def main(): | ||
archive_dir = extract_cli_archive() | ||
env = os.environ.copy() | ||
|
||
# Today all serverless instances are amd64. There are plans to also | ||
# have ARM based instances in Q4 FY26 but for now we can keep using the amd64 | ||
# binaries without checking for the architecture. | ||
shreyas-goenka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bin_dir = archive_dir / "bin" / "amd64" | ||
shreyas-goenka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
go_bin_dir = bin_dir / "go" / "bin" | ||
env["PATH"] = os.pathsep.join([str(go_bin_dir), str(bin_dir), env.get("PATH", "")]) | ||
|
||
# Env vars used by the acceptance tests. These need to | ||
# be provided by the job parameters to the test runner here. | ||
envvars = [ | ||
"CLOUD_ENV", | ||
"TEST_DEFAULT_CLUSTER_ID", | ||
"TEST_DEFAULT_WAREHOUSE_ID", | ||
"TEST_INSTANCE_POOL_ID", | ||
"TEST_METASTORE_ID", | ||
] | ||
|
||
for envvar in envvars: | ||
env[envvar] = dbutils.widgets.get(envvar) | ||
assert env[envvar] is not None, f"Error: {envvar} is not set" | ||
|
||
ctx = get_context() | ||
workspace_url = spark.conf.get("spark.databricks.workspaceUrl") | ||
|
||
# Configure auth for the acceptance tests. | ||
env["DATABRICKS_TOKEN"] = ctx.apiToken | ||
env["DATABRICKS_HOST"] = workspace_url | ||
|
||
# Change working directory to the root of the CLI repo. | ||
os.chdir(archive_dir / "cli") | ||
cmd = ["go", "test", "-timeout", "14400s", "-test.v", "-run", r"^TestAccept", "github.com/databricks/cli/acceptance", "-workspace-tmp-dir"] | ||
|
||
if dbutils.widgets.get("short") == "true": | ||
cmd.append("-short") | ||
|
||
print("Running acceptance tests...") | ||
result = subprocess.run(cmd, env=env, check=False) | ||
print(result.stdout, flush=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIK run() does not capture output by default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did see logs from the runner in the output. Do you mean that stdout is not captured and only stderr is? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is just printed to parent process stdout/stderr, no special handling needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For parity with the other test runners, it should run in JSON mode and write output to a file. Notebook cell output is limited to a couple MB, which in verbose mode this may exceed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I plan to do that in a follow-up PR. For now, the important thing for me is to actually run these in non blocking CI and get some signal. |
||
print(result.stderr, flush=True) | ||
assert result.returncode == 0, "Acceptance tests failed" | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
package acceptance_test | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"path" | ||
"path/filepath" | ||
"strconv" | ||
"testing" | ||
"time" | ||
|
||
"github.com/databricks/cli/libs/filer" | ||
"github.com/databricks/databricks-sdk-go" | ||
"github.com/databricks/databricks-sdk-go/service/jobs" | ||
"github.com/google/uuid" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func workspaceTmpDir(ctx context.Context, t *testing.T) (*databricks.WorkspaceClient, filer.Filer, string) { | ||
w, err := databricks.NewWorkspaceClient() | ||
require.NoError(t, err) | ||
|
||
currentUser, err := w.CurrentUser.Me(ctx) | ||
require.NoError(t, err) | ||
|
||
timestamp := time.Now().Format("2006-01-02T15:04:05Z") | ||
tmpDir := fmt.Sprintf( | ||
"/Workspace/Users/%s/acceptance/%s/%s", | ||
currentUser.UserName, | ||
timestamp, | ||
uuid.New().String(), | ||
) | ||
|
||
t.Cleanup(func() { | ||
err := os.RemoveAll(tmpDir) | ||
require.NoError(t, err) | ||
}) | ||
|
||
err = w.Workspace.MkdirsByPath(ctx, tmpDir) | ||
require.NoError(t, err) | ||
|
||
f, err := filer.NewWorkspaceFilesClient(w, tmpDir) | ||
require.NoError(t, err) | ||
|
||
return w, f, tmpDir | ||
} | ||
|
||
func buildAndUploadArchive(ctx context.Context, t *testing.T, f filer.Filer) string { | ||
pkgDir := path.Join("..", "internal", "testarchive") | ||
|
||
binDir := "_bin" | ||
buildDir := "_build" | ||
archiveName := "archive.tar.gz" | ||
|
||
// Build the CLI archives and upload to the workspace. | ||
RunCommand(t, []string{"go", "run", ".", buildDir, binDir, archiveName}, pkgDir) | ||
|
||
archiveReader, err := os.Open(filepath.Join(pkgDir, buildDir, archiveName)) | ||
require.NoError(t, err) | ||
|
||
t.Logf("Uploading archive...") | ||
err = f.Write(ctx, archiveName, archiveReader) | ||
require.NoError(t, err) | ||
|
||
err = archiveReader.Close() | ||
require.NoError(t, err) | ||
|
||
return archiveName | ||
} | ||
|
||
func uploadRunner(ctx context.Context, t *testing.T, f filer.Filer) string { | ||
runnerReader, err := os.Open("dbr_runner.py") | ||
require.NoError(t, err) | ||
|
||
t.Logf("Uploading DBR runner...") | ||
err = f.Write(ctx, "dbr_runner.py", runnerReader) | ||
require.NoError(t, err) | ||
|
||
err = runnerReader.Close() | ||
require.NoError(t, err) | ||
|
||
return "dbr_runner" | ||
} | ||
|
||
func runDbrTests(ctx context.Context, t *testing.T, w *databricks.WorkspaceClient, runnerPath, archivePath string) { | ||
t.Logf("Submitting test runner job...") | ||
|
||
envvars := []string{ | ||
"CLOUD_ENV", | ||
"TEST_DEFAULT_CLUSTER_ID", | ||
"TEST_DEFAULT_WAREHOUSE_ID", | ||
"TEST_INSTANCE_POOL_ID", | ||
"TEST_METASTORE_ID", | ||
} | ||
|
||
baseParams := map[string]string{ | ||
"archive_path": archivePath, | ||
"short": strconv.FormatBool(testing.Short()), | ||
} | ||
for _, envvar := range envvars { | ||
baseParams[envvar] = os.Getenv(envvar) | ||
} | ||
|
||
waiter, err := w.Jobs.Submit(ctx, jobs.SubmitRun{ | ||
RunName: "DBR Acceptance Tests", | ||
Tasks: []jobs.SubmitTask{ | ||
{ | ||
TaskKey: "dbr_runner", | ||
NotebookTask: &jobs.NotebookTask{ | ||
NotebookPath: runnerPath, | ||
BaseParameters: baseParams, | ||
}, | ||
}, | ||
}, | ||
}) | ||
require.NoError(t, err) | ||
|
||
t.Logf("Waiting for test runner job to finish. Run URL: %s", urlForRun(ctx, t, w, waiter.RunId)) | ||
|
||
var run *jobs.Run | ||
deadline, ok := t.Deadline() | ||
if ok { | ||
// If -timeout is configured for the test, wait until that time for the job run results. | ||
run, err = waiter.GetWithTimeout(time.Until(deadline)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The intention here really is to increase the timeout since the default one is too short. I suspect eventually we might need a custom poller here since even with a bigger timeout this method often fails due to some opaque TCP error. |
||
require.NoError(t, err) | ||
} else { | ||
// Use the default timeout from the SDK otherwise. | ||
run, err = waiter.Get() | ||
require.NoError(t, err) | ||
} | ||
|
||
t.Logf("The test runner job finished with status: %s", run.State.LifeCycleState) | ||
} | ||
|
||
func urlForRun(ctx context.Context, t *testing.T, w *databricks.WorkspaceClient, runId int64) string { | ||
run, err := w.Jobs.GetRun(ctx, jobs.GetRunRequest{RunId: runId}) | ||
require.NoError(t, err) | ||
return run.RunPageUrl | ||
} | ||
|
||
func testDbrAcceptance(t *testing.T) { | ||
ctx := context.Background() | ||
|
||
w, f, testDir := workspaceTmpDir(ctx, t) | ||
t.Logf("Test directory for the DBR runner: %s", testDir) | ||
shreyas-goenka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// We compile and upload an archive of the entire repo to the workspace. | ||
// Only files tracked by git and binaries required by acceptance tests like | ||
// go, uv, jq, etc. are included. | ||
archiveName := buildAndUploadArchive(ctx, t, f) | ||
runnerName := uploadRunner(ctx, t, f) | ||
|
||
runDbrTests(ctx, t, w, path.Join(testDir, runnerName), path.Join(testDir, archiveName)) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,19 @@ import ( | |
) | ||
|
||
func main() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend moving this to a That keeps this a pure package that you can test independently. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems unnecessary. What's the value? We already have unit test coverage (behind a flag, but I plan to eventually enable it). See: downloaders_test.go for example. |
||
if len(os.Args) != 4 { | ||
fmt.Fprintf(os.Stderr, "Usage: %s <build-dir> <bin-dir> <archive-name>\n", os.Args[0]) | ||
os.Exit(1) | ||
} | ||
|
||
buildDir := os.Args[1] | ||
binDir := os.Args[2] | ||
archiveName := os.Args[3] | ||
|
||
// Directories with the _ prefix are ignored by Go. That is important | ||
// since the go installation in _bin would include stdlib go modules which would | ||
// otherwise cause an error during a build of the CLI. | ||
err := createArchive("_build", "_bin", "../..") | ||
err := createArchive(buildDir, binDir, archiveName, "../..") | ||
if err != nil { | ||
fmt.Fprintf(os.Stderr, "Error creating archive: %v\n", err) | ||
os.Exit(1) | ||
|
Uh oh!
There was an error while loading. Please reload this page.