Update to MLflow 2.6.0 (#19)

anaconda · Oct 18, 2023 · 3545e1f · 3545e1f
1 parent 5863f74
commit 3545e1f
Show file tree

Hide file tree

Showing 42 changed files with 13,413 additions and 259 deletions.
diff --git a/.github/workflows/main-scan.yml b/.github/workflows/main-scan.yml
@@ -27,7 +27,7 @@ jobs:
       # https://github.com/conda-incubator/setup-miniconda
       uses: conda-incubator/setup-miniconda@3b0f2504dd76ef23b6d31f291f4913fb60ab5ff3 # v2
       with:
-        channels: default,conda-forge,joshburt
+        channels: default,conda-forge
         auto-activate-base: true
         activate-environment: github.action
         environment-file: environment.github.yml
@@ -46,8 +46,3 @@ jobs:
     - name: Generate Coverage Report
       run: |
         anaconda-project run coverage
-    - name: SonarCloud Scan
-      uses: sonarsource/sonarcloud-github-action@master
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml
@@ -34,7 +34,7 @@ jobs:
       # https://github.com/conda-incubator/setup-miniconda
       uses: conda-incubator/setup-miniconda@3b0f2504dd76ef23b6d31f291f4913fb60ab5ff3 # v2
       with:
-        channels: default,conda-forge,joshburt
+        channels: default,conda-forge
         auto-activate-base: true
         activate-environment: github.action
         environment-file: environment.github.yml
@@ -56,8 +56,3 @@ jobs:
     - name: Integration Tests
       run: |
         anaconda-project run test:integration
-    - name: SonarCloud Scan
-      uses: sonarsource/sonarcloud-github-action@master
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
diff --git a/anaconda-project.yml b/anaconda-project.yml
@@ -113,8 +113,8 @@ commands:
       sphinx-build -b html docs/source docs/build
 
 channels:
+    - defaults
     - https://conda.anaconda.org/conda-forge/
-    - https://conda.anaconda.org/joshburt/
 
 platforms:
   - linux-64
@@ -129,73 +129,74 @@ env_specs:
   minimum:
     description: Minimum Runtime Environment
     packages:
-      - python=3.10.8
-      - mlflow=2.3.0
-      - ipykernel
+      - defaults:python=3.11
+      - defaults:mlflow=2.6.0
+      - defaults:ipykernel
 
   # The `default` environment contains all external drivers.
   default:
     description: Runtime Environment
     packages:
-      - python=3.10.8
-      - mlflow=2.3.0
-      - ipykernel
+      - defaults:python=3.11
+      - defaults:mlflow=2.6.0
+      - defaults:ipykernel
 
 ###### Backend SQL Drivers ######
 
       # PostgreSQL | https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql
-      - psycopg2=2.9.3
-      - pg8000=1.29.3
-      - asyncpg=0.27.0
+      - defaults:psycopg2
+      - conda-forge:pg8000
+      - defaults::asyncpg
 
       # MySQL | https://docs.sqlalchemy.org/en/20/core/engines.html#mysql
-      - mysqlclient=2.0.3
-      - PyMySQL=1.0.2
+      - defaults::mysqlclient
+      - defaults::PyMySQL
 
       # Oracle Drivers | https://docs.sqlalchemy.org/en/20/core/engines.html#oracle
-      - cx_oracle=8.3.0
+      - defaults::cx_oracle
 
       # Microsoft SQL Server Drivers | https://docs.sqlalchemy.org/en/20/core/engines.html#microsoft-sql-server
-      - pyodbc=4.0.35
+      - conda-forge:pyodbc
 
 ###### Backend File System Drivers ######
-      - azure-storage-blob=12.14.1
-      - google-cloud-storage=2.7.0
+      - conda-forge:azure-storage-blob
+      - conda-forge:google-cloud-storage
 
  # The `development` environment is used for development and maintenance of this solution.
   development:
     description: Development Environment
     packages:
-      - python=3.10.8
-      - mlflow=2.3.0
-      - ipykernel
-      - isort
-      - black
-      - black-jupyter
-      - pylint
-      - coverage
-      - pyyaml
-      - sphinx
-      - sphinx-rtd-theme
-      - myst-parser
+      - defaults::python=3.11
+      - defaults::mlflow=2.6.0
+      - defaults::ipykernel
+      - defaults::isort
+      - defaults::black
+      - defaults::pylint
+      - defaults::coverage
+      - defaults::pyyaml
+
+      # Documentation
+      - defaults:sphinx
+      - defaults:sphinx-rtd-theme
+      - defaults:myst-parser
 
 ###### Backend SQL Drivers ######
 
       # PostgreSQL | https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql
-      - psycopg2=2.9.3
-      - pg8000=1.29.3
-      - asyncpg=0.27.0
+      - defaults::psycopg2
+      - conda-forge:pg8000
+      - defaults:asyncpg
 
       # MySQL | https://docs.sqlalchemy.org/en/20/core/engines.html#mysql
-      - mysqlclient=2.0.3
-      - PyMySQL=1.0.2
+      - defaults:mysqlclient
+      - defaults:PyMySQL
 
       # Oracle Drivers | https://docs.sqlalchemy.org/en/20/core/engines.html#oracle
-      - cx_oracle=8.3.0
+      - defaults:cx_oracle
 
       # Microsoft SQL Server Drivers | https://docs.sqlalchemy.org/en/20/core/engines.html#microsoft-sql-server
-      - pyodbc=4.0.35
+      - conda-forge:pyodbc
 
-###### Backend File System Drivers ######
-      - azure-storage-blob=12.14.1
-      - google-cloud-storage=2.7.0
+####### Backend File System Drivers ######
+      - conda-forge:azure-storage-blob
+      - conda-forge:google-cloud-storage
diff --git a/docs/build/.buildinfo b/docs/build/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 0adc5f2f5c46253ce1e3c94b39fc6f3d
+config: a7479ecc6a404409f025bed59af7915c
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/build/.doctrees/environment.pickle b/docs/build/.doctrees/environment.pickle
diff --git a/docs/build/.doctrees/high_level_concepts.doctree b/docs/build/.doctrees/high_level_concepts.doctree
diff --git a/docs/build/.doctrees/index.doctree b/docs/build/.doctrees/index.doctree
diff --git a/docs/build/.doctrees/installation_guide.doctree b/docs/build/.doctrees/installation_guide.doctree
diff --git a/docs/build/.doctrees/server_overview.doctree b/docs/build/.doctrees/server_overview.doctree
diff --git a/docs/build/_sources/high_level_concepts.md.txt b/docs/build/_sources/high_level_concepts.md.txt
@@ -2,8 +2,8 @@
 
 Tracking Server, Workflows, and Endpoints can all run as deployments natively within Anaconda Enterprise.
 
-Below we can see the deployment and access pattern for MLFlow Tracking Server within Anaconda Enterprise.
-<img src="_static/AE5 MLFlow High Level.png" alt="AE5 MLFlow High Level Diagram">
+Below we can see the deployment and access pattern for MLflow Tracking Server within Anaconda Enterprise.
+<img src="_static/AE5 MLFlow High Level.png" alt="AE5 MLflow High Level Diagram">
 In the following sections we will go into further detail on each tier within the diagram.
 
 ## Consumers
@@ -14,11 +14,11 @@ Consumers are internal or external to the platform.  Both types of consumers mus
 
 **External Consumer Examples**
 
-* Users directly accessing the MLFlow Tracking Server, triggering project runs, or accessing model endpoints.
+* Users directly accessing the MLflow Tracking Server, triggering project runs, or accessing model endpoints.
 
 **Internal Consumer Examples**
 
-* AE5 hosted notebook instances or workflows which interact with the MLFlow tracking server for tracking, or reporting.
+* AE5 hosted notebook instances or workflows which interact with the MLflow tracking server for tracking, or reporting.
 
 ## AE5 Platform
 

diff --git a/docs/build/_sources/index.rst.txt b/docs/build/_sources/index.rst.txt
@@ -1,4 +1,4 @@
-MLFlow Tracking Server For Anaconda Enterprise
+MLflow Tracking Server For Anaconda Enterprise
 ===========================================================
 
 .. toctree::

diff --git a/docs/build/_sources/installation_guide.md.txt b/docs/build/_sources/installation_guide.md.txt
@@ -2,7 +2,7 @@
 
 Overview
 --------
-Provides a hosted MLFlow Tracking Server meant for deployment into an Anaconda Enterprise environment.
+Provides a hosted MLflow Tracking Server meant for deployment into an Anaconda Enterprise environment.
 
 Configuration
 --------
@@ -20,7 +20,7 @@ These should be defined as AE5 secrets within the service account running the tr
 
     **Details**
       * The backend store URI will most likely contain credentials for the connection and should not be exposed within anaconda-project.yml as plain text.
-      * For additional details see: [SQLAlchemy - Engine Configuration](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) and [MLFlow - Backend Stores](https://mlflow.org/docs/2.3.0/tracking.html#backend-stores).
+      * For additional details see: [SQLAlchemy - Engine Configuration](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) and [MLflow - Backend Stores](https://mlflow.org/docs/2.6.0/tracking.html#backend-stores).
 
     **Default**
 
@@ -35,7 +35,7 @@ These should be defined as AE5 secrets within the service account running the tr
 
     **Details**
       * The artifact destination may not be sensitive and can be set as an ae5 secret for ease of configuration, or directly within the anaconda-project.yml for the tracking server project.
-      * See [MLFlow Artifact Stores](https://mlflow.org/docs/2.3.0/tracking.html#artifact-stores) for supported stores (this can be as simple as a locally mounted volume).
+      * See [MLflow Artifact Stores](https://mlflow.org/docs/2.6.0/tracking.html#artifact-stores) for supported stores (this can be as simple as a locally mounted volume).
 
     **Default**
 
@@ -69,17 +69,17 @@ These should be defined as AE5 secrets within the service account running the tr
     AE5 Private Endpoint Access Token
 
     **Details**
-      * This is used for authorization to the MLFLow services.
+      * This is used for authorization to the MLfLow services.
 
 
 6. `MLFLOW_TRACKING_GC_TTL`
 
     **Description**
 
-    MLFLow compliant string for the time limit
+    MLfLow compliant string for the time limit
 
     **Details**
-      * See [MLFlow Tracking Server Garbage Collection](https://mlflow.org/docs/2.3.0/cli.html?highlight=gc#mlflow-gc) and  [GC Older Than](https://mlflow.org/docs/2.3.0/cli.html?highlight=gc#cmdoption-mlflow-gc-older-than) for additional details.
+      * See [MLflow Tracking Server Garbage Collection](https://mlflow.org/docs/2.6.0/cli.html?highlight=gc#mlflow-gc) and  [GC Older Than](https://mlflow.org/docs/2.6.0/cli.html?highlight=gc#cmdoption-mlflow-gc-older-than) for additional details.
 
     **Default**
 
@@ -177,8 +177,8 @@ Deployment
 12. **Store Private Deployment Token**
 
     * If using the stale artifact clean up process then this **MUST** be stored within an ae5 secret.
-    * The token will be **REQUIRED** by all clients and users which need to access the MLFlow Tracking Service API.
-    * The administrator of the MLFlow Tracking Server **MUST** generate and provide the access token **EVERY TIME** the server is restart.
+    * The token will be **REQUIRED** by all clients and users which need to access the MLflow Tracking Service API.
+    * The administrator of the MLflow Tracking Server **MUST** generate and provide the access token **EVERY TIME** the server is restart.
 
     **Store the value:**
 
@@ -195,7 +195,7 @@ Deployment
 
     What is garbage collection?
 
-    The MLFlow Tracking Server does not automatically purge resources that a client deletes.  Instead, resources are set to the `deleted` lifecycle state and hidden from the UI and most API calls by default.  A deleted resource will block the creation of a new resource of the same name until the garbage collection process has purged it.  In order to purge deleted items a garbage collection process **MUST** be manually executed.
+    The MLflow Tracking Server does not automatically purge resources that a client deletes.  Instead, resources are set to the `deleted` lifecycle state and hidden from the UI and most API calls by default.  A deleted resource will block the creation of a new resource of the same name until the garbage collection process has purged it.  In order to purge deleted items a garbage collection process **MUST** be manually executed.
 
     * The project comes with a command for invoking garbage collection.  A schedule for the process **SHOULD** be created so that this is occurring regularly.
 
@@ -218,6 +218,6 @@ These commands are used to start the server and perform the various administrati
 
 | Command           | Environment | Description                                                    |
 |-------------------|-------------|:---------------------------------------------------------------|
-| TrackingServer    | Runtime     | Launches the MLFlow Tracking Server                            |
-| GarbageCollection | Runtime     | Launches the MLFlow tracking server garbage collection process |
-| DatabaseUpgrade   | Runtime     | Launches the MLFlow tracking server database upgrade process   |
+| TrackingServer    | Runtime     | Launches the MLflow Tracking Server                            |
+| GarbageCollection | Runtime     | Launches the MLflow tracking server garbage collection process |
+| DatabaseUpgrade   | Runtime     | Launches the MLflow tracking server database upgrade process   |
diff --git a/docs/build/_sources/server_overview.md.txt b/docs/build/_sources/server_overview.md.txt
@@ -1,16 +1,16 @@
-# MLFlow Tracking Server Overview
+# MLflow Tracking Server Overview
 
-The recommended way to deploy an MLFlow Tracking Server directly within an AE5 instance is with proxied artifact mode.  
+The recommended way to deploy an MLflow Tracking Server directly within an AE5 instance is with proxied artifact mode.  
 
-* In practice this simplifies, unifies, and limits access to (metadata and models) allowing the tracking server to act as the source of truth for experimentation tracking and model storage.
+* In practice, this simplifies, unifies, and limits access to (metadata and models) allowing the tracking server to act as the source of truth for experimentation tracking and model storage.
 * The alternatives require clients to have read/write access to the backend artifact storage system.  Not putting an API in front of this can lead to catastrophic data loss (e.g. user error, etc.) and is not recommended.  The tracking server supports “file system only access“ deployments and many other configurations to accommodate edge cases.
 
 
 ## Tracking Server
 
 ### Assumptions
 
-* Operate in proxied artifact mode.  See [MLflow Tracking — MLflow 2.3.0 documentation](https://www.mlflow.org/docs/2.3.0/tracking.html#scenario-5-mlflow-tracking-server-enabled-with-proxied-artifact-storage-access) for additional details.
+* Operate in proxied artifact mode.  See [MLflow Tracking — MLflow 2.6.0 documentation](https://www.mlflow.org/docs/2.6.0/tracking.html#scenario-5-mlflow-tracking-server-enabled-with-proxied-artifact-storage-access) for additional details.
 * Run as a private deployment.
 * Deployed with a static URL.
 * Storage is on a persistence volume, or location such as in data.
@@ -23,21 +23,21 @@ The tracking server has two types of assets (metadata, and artifacts).
 ### Metadata
 
 Metadata can be persisted to any backend supported by SQLAlchemy, and the server itself uses a SQLAlchemy compliant connection string for this configuration parameter. 
-* See [Where Runs Are Recorded](https://www.mlflow.org/docs/2.3.0/tracking.html#where-runs-are-recorded)  for details on all supported configurations.  
+* See [Where Runs Are Recorded](https://www.mlflow.org/docs/2.6.0/tracking.html#where-runs-are-recorded)  for details on all supported configurations.  
 * For additional details on SQLAlchemy see: [Database URLs](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls).
 
 ### File Assets
 
-* File assets are stored on a file system accessible to the server at runtime.  See [Artifact Stores](https://www.mlflow.org/docs/2.3.0/tracking.html#artifact-stores)  for details on all supported configurations.
+* File assets are stored on a file system accessible to the server at runtime.  See [Artifact Stores](https://www.mlflow.org/docs/2.6.0/tracking.html#artifact-stores)  for details on all supported configurations.
 
 ### Backup Strategy
 
 * Standard backup and verification practices **SHOULD** be followed that meet the business continuity requirements of the organization.
-* Since the MLFlow Tracking Server uses two different storage systems, backups **MUST** be synchronized between the two systems to ensure recoverability.
+* Since the MLflow Tracking Server uses two different storage systems, backups **MUST** be synchronized between the two systems to ensure recoverability.
 
 ### Upgrades
 
-* MLFlow has a schema upgrade mechanism for its database. See [DB Upgrade](https://mlflow.org/docs/2.3.0/cli.html?highlight=schema#mlflow-db) for the documented process and its caveats.  Specifically ensure that backups of the data tier exist and are usable as the process can be destructive and is not reversible.
+* MLflow has a schema upgrade mechanism for its database. See [DB Upgrade](https://mlflow.org/docs/2.6.0/cli.html?highlight=schema#mlflow-db) for the documented process and its caveats.  Specifically, ensure that backups of the data tier exist and are usable as the process can be destructive and is not reversible.
 
 ### Disaster Recovery
 
@@ -47,5 +47,5 @@ Metadata can be persisted to any backend supported by SQLAlchemy, and the server
 
 * UI must be `popped out` of iframe. (Lack of CORS permissions involving iframing the UI causes the UI to be unusable)
   * [mlflow UI not working in iframe[BUG]--cross origin issue · Issue #3583 · mlflow/mlflow](https://github.com/mlflow/mlflow/issues/3583)
-* MLFlow does NOT have any authentication/authorization mechanism.  We leverage AE5’s authorization mechanism to secure it. (e.g. private deployment, with token access for API consumption)
+* MLflow does NOT have any authentication/authorization mechanism.  We leverage AE5’s authorization mechanism to secure it. (e.g. private deployment, with token access for API consumption)
 * Possible performance issues with large numbers of experiments ([MLflow worker timeout when opening UI · Issue #925 · mlflow/mlflow](https://github.com/mlflow/mlflow/issues/925))