From 98bdd8d0b1b469be302f9f379d079cb4f34b7146 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Fri, 27 Dec 2024 08:14:31 +0000 Subject: [PATCH 1/6] commit for offline install --- .vscode/settings.json | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index f8b04f1269..0000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "[go]": { - "editor.insertSpaces": false, - "editor.formatOnSave": true - }, - "go.lintTool": "golangci-lint", - "go.lintFlags": [ - "--fast" - ], - "go.useLanguageServer": true, - "gopls": { - "formatting.gofumpt": true - }, - "files.trimTrailingWhitespace": true, - "files.insertFinalNewline": true, - "files.trimFinalNewlines": true, - "python.envFile": "${workspaceRoot}/.env", - "python.analysis.stubPath": ".vscode", - "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", - "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------" -} From c514cca92935b86a71ddcb059e5efd1a3034f2d3 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Fri, 27 Dec 2024 08:15:01 +0000 Subject: [PATCH 2/6] commit for offlineinstall --- cmd/labs/github/releases.go | 11 +++++--- cmd/labs/install.go | 26 ++++++++++-------- cmd/labs/localcache/jsonfile.go | 4 +-- cmd/labs/project/fetcher.go | 43 ++++++++++++++++++++++-------- cmd/labs/project/installer.go | 10 ++++--- cmd/labs/project/installer_test.go | 34 +++++++++++++++++++++++ 6 files changed, 97 insertions(+), 31 deletions(-) diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go index 0dae0317d4..1737ffbf40 100644 --- a/cmd/labs/github/releases.go +++ b/cmd/labs/github/releases.go @@ -16,24 +16,29 @@ const cacheTTL = 1 * time.Hour func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache { pattern := fmt.Sprintf("%s-%s-releases", org, repo) return &ReleaseCache{ - cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), + Cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), Org: org, Repo: repo, } } type ReleaseCache struct { - cache localcache.LocalCache[Versions] + Cache localcache.LocalCache[Versions] Org string Repo string } func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) { - return r.cache.Load(ctx, func() (Versions, error) { + return r.Cache.Load(ctx, func() (Versions, error) { return getVersions(ctx, r.Org, r.Repo) }) } +func (r *ReleaseCache) LoadCache(ctx context.Context) (Versions, error) { + cached, err := r.Cache.LoadCache() + return cached.Data, err +} + // getVersions is considered to be a private API, as we want the usage go through a cache func getVersions(ctx context.Context, org, repo string) (Versions, error) { var releases Versions diff --git a/cmd/labs/install.go b/cmd/labs/install.go index 6ed6b2e918..978916bb8d 100644 --- a/cmd/labs/install.go +++ b/cmd/labs/install.go @@ -7,16 +7,20 @@ import ( ) func newInstallCommand() *cobra.Command { - return &cobra.Command{ - Use: "install NAME", - Args: root.ExactArgs(1), - Short: "Installs project", - RunE: func(cmd *cobra.Command, args []string) error { - inst, err := project.NewInstaller(cmd, args[0]) - if err != nil { - return err - } - return inst.Install(cmd.Context()) - }, + cmd := &cobra.Command{} + var offlineInstall bool + + cmd.Flags().BoolVar(&offlineInstall, "offline-install", offlineInstall, `If installing in offline mode, set this flag to true.`) + + cmd.Use = "install NAME" + cmd.Args = root.ExactArgs(1) + cmd.Short = "Installs project" + cmd.RunE = func(cmd *cobra.Command, args []string) error { + inst, err := project.NewInstaller(cmd, args[0], offlineInstall) + if err != nil { + return err + } + return inst.Install(cmd.Context(), offlineInstall) } + return cmd } diff --git a/cmd/labs/localcache/jsonfile.go b/cmd/labs/localcache/jsonfile.go index 6540e4ac27..5f6d353599 100644 --- a/cmd/labs/localcache/jsonfile.go +++ b/cmd/labs/localcache/jsonfile.go @@ -35,7 +35,7 @@ type LocalCache[T any] struct { } func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) { - cached, err := r.loadCache() + cached, err := r.LoadCache() if errors.Is(err, fs.ErrNotExist) { return r.refreshCache(ctx, refresh, r.zero) } else if err != nil { @@ -96,7 +96,7 @@ func (r *LocalCache[T]) FileName() string { return filepath.Join(r.dir, fmt.Sprintf("%s.json", r.name)) } -func (r *LocalCache[T]) loadCache() (*cached[T], error) { +func (r *LocalCache[T]) LoadCache() (*cached[T], error) { jsonFile := r.FileName() raw, err := os.ReadFile(r.FileName()) if err != nil { diff --git a/cmd/labs/project/fetcher.go b/cmd/labs/project/fetcher.go index 8f4fafde69..efc654c8de 100644 --- a/cmd/labs/project/fetcher.go +++ b/cmd/labs/project/fetcher.go @@ -15,7 +15,7 @@ import ( ) type installable interface { - Install(ctx context.Context) error + Install(ctx context.Context, offlineInstall bool) error } type devInstallation struct { @@ -23,7 +23,7 @@ type devInstallation struct { *cobra.Command } -func (d *devInstallation) Install(ctx context.Context) error { +func (d *devInstallation) Install(ctx context.Context, offlineInstall bool) error { if d.Installer == nil { return nil } @@ -54,7 +54,7 @@ func (d *devInstallation) Install(ctx context.Context) error { return d.Installer.runHook(d.Command) } -func NewInstaller(cmd *cobra.Command, name string) (installable, error) { +func NewInstaller(cmd *cobra.Command, name string, offlineInstall bool) (installable, error) { if name == "." { wd, err := os.Getwd() if err != nil { @@ -75,14 +75,17 @@ func NewInstaller(cmd *cobra.Command, name string) (installable, error) { version = "latest" } f := &fetcher{name} - version, err := f.checkReleasedVersions(cmd, version) + + version, err := f.checkReleasedVersions(cmd, version, offlineInstall) if err != nil { return nil, fmt.Errorf("version: %w", err) } - prj, err := f.loadRemoteProjectDefinition(cmd, version) + + prj, err := f.loadRemoteProjectDefinition(cmd, version, offlineInstall) if err != nil { return nil, fmt.Errorf("remote: %w", err) } + return &installer{ Project: prj, version: version, @@ -92,11 +95,11 @@ func NewInstaller(cmd *cobra.Command, name string) (installable, error) { func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) { f := &fetcher{name} - version, err := f.checkReleasedVersions(cmd, "latest") + version, err := f.checkReleasedVersions(cmd, "latest", false) if err != nil { return nil, fmt.Errorf("version: %w", err) } - prj, err := f.loadRemoteProjectDefinition(cmd, version) + prj, err := f.loadRemoteProjectDefinition(cmd, version, false) if err != nil { return nil, fmt.Errorf("remote: %w", err) } @@ -115,7 +118,7 @@ type fetcher struct { name string } -func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) { +func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string, offlineInstall bool) (string, error) { ctx := cmd.Context() cacheDir, err := PathInLabs(ctx, f.name, "cache") if err != nil { @@ -123,7 +126,13 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str } // `databricks labs isntall X` doesn't know which exact version to fetch, so first // we fetch all versions and then pick the latest one dynamically. - versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) + var versions github.Versions + if offlineInstall { + versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir).LoadCache(ctx) + } else { + versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) + } + if err != nil { return "", fmt.Errorf("versions: %w", err) } @@ -140,9 +149,21 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str return version, nil } -func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) { +func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string, offlineInstall bool) (*Project, error) { ctx := cmd.Context() - raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml") + var raw []byte + var err error + if !offlineInstall { + raw, err = github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml") + } else { + libDir, file_err := PathInLabs(ctx, i.name, "lib") + if file_err != nil { + return nil, file_err + } + fileName := filepath.Join(libDir, "labs.yml") + raw, err = os.ReadFile(fileName) + } + if err != nil { return nil, fmt.Errorf("read labs.yml from GitHub: %w", err) } diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go index 041415964f..10540f9045 100644 --- a/cmd/labs/project/installer.go +++ b/cmd/labs/project/installer.go @@ -79,7 +79,7 @@ type installer struct { cmd *cobra.Command } -func (i *installer) Install(ctx context.Context) error { +func (i *installer) Install(ctx context.Context, offlineInstall bool) error { err := i.EnsureFoldersExist() if err != nil { return fmt.Errorf("folders: %w", err) @@ -101,9 +101,11 @@ func (i *installer) Install(ctx context.Context) error { } else if err != nil { return fmt.Errorf("login: %w", err) } - err = i.downloadLibrary(ctx) - if err != nil { - return fmt.Errorf("lib: %w", err) + if !offlineInstall { + err = i.downloadLibrary(ctx) + if err != nil { + return fmt.Errorf("lib: %w", err) + } } err = i.setupPythonVirtualEnvironment(ctx, w) if err != nil { diff --git a/cmd/labs/project/installer_test.go b/cmd/labs/project/installer_test.go index a69389b315..d8c8e00859 100644 --- a/cmd/labs/project/installer_test.go +++ b/cmd/labs/project/installer_test.go @@ -240,6 +240,40 @@ func TestInstallerWorksForReleases(t *testing.T) { r.RunAndExpectOutput("setting up important infrastructure") } +func TestOfflineInstallerWorksForReleases(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/2.1/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + ctx := installerContext(t, server) + newHome := copyTestdata(t, "testdata/installed-in-home") + ctx = env.WithUserHomeDir(ctx, newHome) + + ctx, stub := process.WithStub(ctx) + stub.WithStdoutFor(`python[\S]+ --version`, "Python 3.10.5") + // on Unix, we call `python3`, but on Windows it is `python.exe` + stub.WithStderrFor(`python[\S]+ -m venv .*/.databricks/labs/blueprint/state/venv`, "[mock venv create]") + stub.WithStderrFor(`python[\S]+ -m pip install --upgrade --upgrade-strategy eager .`, "[mock pip install]") + stub.WithStdoutFor(`python[\S]+ install.py`, "setting up important infrastructure") + + // simulate the case of GitHub Actions + ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL) + ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...") + ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") + ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") + + r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline-install=true", "--debug") + r.RunAndExpectOutput("setting up important infrastructure") +} + func TestInstallerWorksForDevelopment(t *testing.T) { defer func() { if !t.Failed() { From 41313db1b2eaa16063eafc702edf9815c8164747 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Mon, 27 Jan 2025 17:56:46 +0000 Subject: [PATCH 3/6] resolving review comments --- .vscode/settings.json | 21 +++++++++++++++++++++ cmd/labs/github/releases.go | 7 ++++--- cmd/labs/install.go | 2 +- cmd/labs/project/fetcher.go | 7 +------ 4 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..2c4fbeb900 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "[go]": { + "editor.insertSpaces": false, + "editor.formatOnSave": true + }, + "go.lintTool": "golangci-lint", + "go.lintFlags": [ + "--fast" + ], + "go.useLanguageServer": true, + "gopls": { + "formatting.gofumpt": true + }, + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "python.envFile": "${workspaceRoot}/.env", + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------" +} \ No newline at end of file diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go index 1737ffbf40..d2a26c7f87 100644 --- a/cmd/labs/github/releases.go +++ b/cmd/labs/github/releases.go @@ -23,9 +23,10 @@ func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache { } type ReleaseCache struct { - Cache localcache.LocalCache[Versions] - Org string - Repo string + Cache localcache.LocalCache[Versions] + Org string + Repo string + Offline bool } func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) { diff --git a/cmd/labs/install.go b/cmd/labs/install.go index 978916bb8d..5735ce30c0 100644 --- a/cmd/labs/install.go +++ b/cmd/labs/install.go @@ -10,7 +10,7 @@ func newInstallCommand() *cobra.Command { cmd := &cobra.Command{} var offlineInstall bool - cmd.Flags().BoolVar(&offlineInstall, "offline-install", offlineInstall, `If installing in offline mode, set this flag to true.`) + cmd.Flags().BoolVar(&offlineInstall, "offline", offlineInstall, `If installing in offline mode, set this flag to true.`) cmd.Use = "install NAME" cmd.Args = root.ExactArgs(1) diff --git a/cmd/labs/project/fetcher.go b/cmd/labs/project/fetcher.go index efc654c8de..97f83750be 100644 --- a/cmd/labs/project/fetcher.go +++ b/cmd/labs/project/fetcher.go @@ -127,12 +127,7 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string, offl // `databricks labs isntall X` doesn't know which exact version to fetch, so first // we fetch all versions and then pick the latest one dynamically. var versions github.Versions - if offlineInstall { - versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir).LoadCache(ctx) - } else { - versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) - } - + versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir, offlineInstall).Load(ctx) if err != nil { return "", fmt.Errorf("versions: %w", err) } From 870003d2fc1a4aa8792f64fc4f00b505dda91f37 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Mon, 27 Jan 2025 17:58:28 +0000 Subject: [PATCH 4/6] review comments --- cmd/labs/github/releases.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go index d2a26c7f87..d0e2cb796f 100644 --- a/cmd/labs/github/releases.go +++ b/cmd/labs/github/releases.go @@ -13,30 +13,30 @@ const cacheTTL = 1 * time.Hour // NewReleaseCache creates a release cache for a repository in the GitHub org. // Caller has to provide different cache directories for different repositories. -func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache { +func NewReleaseCache(org, repo, cacheDir string, offlineInstall bool) *ReleaseCache { pattern := fmt.Sprintf("%s-%s-releases", org, repo) return &ReleaseCache{ - Cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), - Org: org, - Repo: repo, + cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), + Org: org, + Repo: repo, + Offline: offlineInstall, } } type ReleaseCache struct { - Cache localcache.LocalCache[Versions] + cache localcache.LocalCache[Versions] Org string Repo string Offline bool } func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) { - return r.Cache.Load(ctx, func() (Versions, error) { - return getVersions(ctx, r.Org, r.Repo) - }) -} - -func (r *ReleaseCache) LoadCache(ctx context.Context) (Versions, error) { - cached, err := r.Cache.LoadCache() + if !r.Offline { + return r.cache.Load(ctx, func() (Versions, error) { + return getVersions(ctx, r.Org, r.Repo) + }) + } + cached, err := r.cache.LoadCache() return cached.Data, err } From 4ed82deb9a8275b413e684eeab9ad695dc1b9a10 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Mon, 27 Jan 2025 20:02:21 +0000 Subject: [PATCH 5/6] changing the logic for offline install --- cmd/labs/github/releases_test.go | 2 +- cmd/labs/project/installer_test.go | 2 +- cmd/labs/project/project.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/labs/github/releases_test.go b/cmd/labs/github/releases_test.go index 9c3d7a9598..eb8ec34bd3 100644 --- a/cmd/labs/github/releases_test.go +++ b/cmd/labs/github/releases_test.go @@ -25,7 +25,7 @@ func TestLoadsReleasesForCLI(t *testing.T) { ctx := context.Background() ctx = WithApiOverride(ctx, server.URL) - r := NewReleaseCache("databricks", "cli", t.TempDir()) + r := NewReleaseCache("databricks", "cli", t.TempDir(), false) all, err := r.Load(ctx) assert.NoError(t, err) assert.Len(t, all, 2) diff --git a/cmd/labs/project/installer_test.go b/cmd/labs/project/installer_test.go index d8c8e00859..06bd1a45e3 100644 --- a/cmd/labs/project/installer_test.go +++ b/cmd/labs/project/installer_test.go @@ -270,7 +270,7 @@ func TestOfflineInstallerWorksForReleases(t *testing.T) { ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") - r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline-install=true", "--debug") + r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline=true", "--debug") r.RunAndExpectOutput("setting up important infrastructure") } diff --git a/cmd/labs/project/project.go b/cmd/labs/project/project.go index 75f5e584fa..a9f5f45c26 100644 --- a/cmd/labs/project/project.go +++ b/cmd/labs/project/project.go @@ -307,7 +307,7 @@ func (p *Project) checkUpdates(cmd *cobra.Command) error { // might not be installed yet return nil } - r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir()) + r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(), false) versions, err := r.Load(ctx) if err != nil { return err From d846c91f96bd43854cfc8344621416e2a33d924c Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Mon, 27 Jan 2025 20:56:05 +0000 Subject: [PATCH 6/6] added check for local dir exists in offline mode --- .vscode/settings.json | 2 +- cmd/labs/project/installer.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c4fbeb900..002a1e60a7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -18,4 +18,4 @@ "python.analysis.stubPath": ".vscode", "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------" -} \ No newline at end of file +} diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go index 10540f9045..4902e614e5 100644 --- a/cmd/labs/project/installer.go +++ b/cmd/labs/project/installer.go @@ -107,6 +107,10 @@ func (i *installer) Install(ctx context.Context, offlineInstall bool) error { return fmt.Errorf("lib: %w", err) } } + + if _, err := os.Stat(i.LibDir()); os.IsNotExist(err) { + return fmt.Errorf("no local installation found: %w", err) + } err = i.setupPythonVirtualEnvironment(ctx, w) if err != nil { return fmt.Errorf("python: %w", err)