From 18936b6acf98898faf7997440e09997e10ca47da Mon Sep 17 00:00:00 2001 From: ryanSoley Date: Mon, 26 Feb 2024 10:10:28 +0000 Subject: [PATCH 1/3] [edgetest] automated change --- setup.cfg | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.cfg b/setup.cfg index 24a71eae..07cc7354 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,20 +29,20 @@ include_package_data = True packages = find: install_requires = click<=8.1.7,>=7.1 - fsspec<=2023.12.2,>=2021.4.0 - intake[dataframe]<=0.7.0,>=0.5.2 + fsspec<=2024.2.0,>=2021.4.0 + intake[dataframe]<=2.0.1,>=0.5.2 jsonpath-ng<=1.6.1,>=1.5.3 - numpy<=1.26.3,>=1.22.0 - pandas<=2.2.0,>=1.0.0 + numpy<=1.26.4,>=1.22.0 + pandas<=2.2.1,>=1.0.0 pyarrow<=15.0.0,>=14.0.1 PyYAML<=6.0.1,>=5.4.0 - scikit-learn<=1.4.0,>=0.22.0 + scikit-learn<=1.4.1.post1,>=0.22.0 [options.extras_require] prefect = prefect<=1.2.4,>=0.12.0 s3 = - s3fs<=2023.12.2,>=0.4 + s3fs<=2024.2.0,>=0.4 ui = dash<=2.15.0,>=2.11.0 dash-bootstrap-components<=1.5.0,>=1.0.0 @@ -53,7 +53,7 @@ all = dash<=2.15.0,>=2.11.0 dash-bootstrap-components<=1.5.0,>=1.0.0 prefect<=1.2.4,>=0.12.0 - s3fs<=2023.12.2,>=0.4 + s3fs<=2024.2.0,>=0.4 [options.entry_points] console_scripts = From 238b2a4672afa353c824548209e6f5ae70981372 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Mon, 26 Feb 2024 10:56:22 -0500 Subject: [PATCH 2/3] update environments --- docs/docs-environment.yml | 10 +++++----- environment.yml | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/docs-environment.yml b/docs/docs-environment.yml index a73346a0..8476c62a 100644 --- a/docs/docs-environment.yml +++ b/docs/docs-environment.yml @@ -19,12 +19,12 @@ dependencies: # so local pip install doesn't - dash<=2.15.0,>=2.11.0 - dash-bootstrap-components<=1.5.0,>=1.0.0 - - fsspec<=2023.12.2,>=2021.4.0 - - intake[dataframe]<=0.7.0,>=0.5.2 + - fsspec<=2024.2.0,>=2021.4.0 + - intake[dataframe]<=2.0.1,>=0.5.2 - jsonpath-ng<=1.6.1,>=1.5.3 - - numpy<=1.26.3,>=1.22.0 - - pandas<=2.2.0,>=1.0.0 + - numpy<=1.26.4,>=1.22.0 + - pandas<=2.2.1,>=1.0.0 - prefect<=1.2.4,>=0.12.0 - pyarrow<=15.0.0,>=14.0.1 - PyYAML<=6.0.1,>=5.4.0 - - scikit-learn<=1.4.0,>=0.22.0 + - scikit-learn<=1.4.1.post1,>=0.22.0 diff --git a/environment.yml b/environment.yml index 151af57f..826e3727 100644 --- a/environment.yml +++ b/environment.yml @@ -6,20 +6,20 @@ dependencies: - pip - click<=8.1.7,>=7.1 - - fsspec<=2023.12.2,>=2021.4.0 - - intake[dataframe]<=0.7.0,>=0.5.2 + - fsspec<=2024.2.0,>=2021.4.0 + - intake[dataframe]<=2.0.1,>=0.5.2 - jsonpath-ng<=1.6.1,>=1.5.3 - - numpy<=1.26.3,>=1.22.0 - - pandas<=2.2.0,>=1.0.0 + - numpy<=1.26.4,>=1.22.0 + - pandas<=2.2.1,>=1.0.0 - pyarrow<=15.0.0,>=14.0.1 - PyYAML<=6.0.1,>=5.4.0 - - scikit-learn<=1.4.0,>=0.22.0 + - scikit-learn<=1.4.1.post1,>=0.22.0 # for prefect extras - prefect<=1.2.4,>=0.12.0 # for s3fs extras - - s3fs<=2023.12.2,>=0.4 + - s3fs<=2024.2.0,>=0.4 # for viz extras - dash<=2.15.0,>=2.11.0 From 156446e4b1597939f18359b883ca57b564515f09 Mon Sep 17 00:00:00 2001 From: Ryan Soley Date: Mon, 26 Feb 2024 11:34:01 -0500 Subject: [PATCH 3/3] update failing notebook --- .../logging-examples/multiple-backend.ipynb | 332 ++++++------------ 1 file changed, 116 insertions(+), 216 deletions(-) diff --git a/notebooks/logging-examples/multiple-backend.ipynb b/notebooks/logging-examples/multiple-backend.ipynb index 2a86d785..9caaa537 100644 --- a/notebooks/logging-examples/multiple-backend.ipynb +++ b/notebooks/logging-examples/multiple-backend.ipynb @@ -6,7 +6,8 @@ "metadata": {}, "source": [ "# Log with Multiple Backends\n", - "`rubicon-ml` allows users to instantiate `Rubicon` objects with multiple backends to write to/read from. These backends include local, memory, and S3 repositories. Here's a walk through of how one might instantiate and use a `Rubicon` object with multiple backends." + "\n", + "rubicon-ml allows users to instantiate `Rubicon` objects with multiple backends to write to/read from at once. These backends include local, memory, and S3 repositories. Here's a walk through of how one might instantiate and use a `Rubicon` object with multiple backends." ] }, { @@ -19,37 +20,26 @@ "from rubicon_ml import Rubicon" ] }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e38b0be9", - "metadata": {}, - "outputs": [], - "source": [ - "#rb = Rubicon(persistence=\"memory\")\n", - "#or\n", - "#rb = Rubicon(persistence=\"filesystem\")" - ] - }, { "cell_type": "markdown", - "id": "e1936248", + "id": "b0ad7b71-0efe-4c10-8abc-8b78c8dbd6b1", "metadata": {}, "source": [ - "However, when we want multiple backends we utilize the `composite_config` kwarg:" + "Let's say we want to log to two separate locations on our local filesystem. This example is a bit contrived,\n", + "but you could imagine writing to both a local filesystem for quick, ad-hoc exploration and an S3 bucket for\n", + "persistent storage." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "095655e5", "metadata": {}, "outputs": [], "source": [ - "#example multiple backend instantiaiton\n", - "rb = Rubicon(composite_config=[\n", - " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/rootA\"},\n", - " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/rootB\"},\n", + "rubicon_composite = Rubicon(composite_config=[\n", + " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/root_a\"},\n", + " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/root_b\"},\n", "])" ] }, @@ -58,27 +48,46 @@ "id": "66644d33", "metadata": {}, "source": [ - "### Write Commands\n", - "The following commands write to all insantiated backend repositories:" + "### Writing\n", + "\n", + "All of rubicon-ml's logging functions will now log to both locations in the filesystem with a single function call." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "b7ecf19d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "project = rb.create_project(\"example_project\")\n", + "import pandas as pd\n", "\n", - "experiment = project.log_experiment(\"example_experiment\")\n", + "project_composite = rubicon_composite.create_project(name=\"multiple backends\")\n", + "experiment_composite = project_composite.log_experiment()\n", "\n", - "artifact = experiment.log_artifact(data_bytes=b\"bytes\", name=\"example_artifact\")\n", - "import pandas as pd\n", - "dataframe = experiment.log_dataframe(pd.DataFrame([[5, 0, 0], [0, 5, 1], [0, 0, 4]], columns=[\"x\", \"y\", \"z\"]))\n", - "feature = experiment.log_feature(\"year\")\n", - "metric = experiment.log_metric(\"accuracy\", .8)\n", - "parameter = experiment.log_parameter(\"n_estimators\")" + "feature = experiment_composite.log_feature(name=\"year\")\n", + "metric = experiment_composite.log_metric(name=\"accuracy\", value=1.0)\n", + "parameter = experiment_composite.log_parameter(name=\"n_estimators\", value=100)\n", + "artifact = experiment_composite.log_artifact(\n", + " data_bytes=b\"bytes\", name=\"example artifact\"\n", + ")\n", + "dataframe = experiment_composite.log_dataframe(\n", + " pd.DataFrame([[5, 0, 0], [0, 5, 1], [0, 0, 4]], columns=[\"x\", \"y\", \"z\"]),\n", + " name=\"example dataframe\",\n", + ")\n", + "\n", + "experiment_composite.id" ] }, { @@ -86,235 +95,126 @@ "id": "10db7e8b", "metadata": {}, "source": [ - "Let's verify both of our backends have been written to:" + "Let's verify both of our backends have been written to by retrieving the data one location at a time." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "c9e815cf", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[36mexampleproject\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mexperiments\u001b[m\u001b[m metadata.json\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8661b18a-afe3-4779-8ed4-e7d2a9aba244\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36martifacts\u001b[m\u001b[m \u001b[1m\u001b[36mfeatures\u001b[m\u001b[m \u001b[1m\u001b[36mmetrics\u001b[m\u001b[m\n", - "\u001b[1m\u001b[36mdataframes\u001b[m\u001b[m metadata.json \u001b[1m\u001b[36mparameters\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36me3b53858-ae06-4a11-996b-17b077821168\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8cd7f032-3206-480b-948b-44d878f6bb56\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36myear\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36maccuracy\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mnestimators\u001b[m\u001b[m\n" - ] - } - ], - "source": [ - "!ls \"./rubicon-root/rootA\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}\" \n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/artifacts\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/dataframes\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/features\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/metrics\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/parameters\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d95347c9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[36mexampleproject\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mexperiments\u001b[m\u001b[m metadata.json\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8661b18a-afe3-4779-8ed4-e7d2a9aba244\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36martifacts\u001b[m\u001b[m \u001b[1m\u001b[36mfeatures\u001b[m\u001b[m \u001b[1m\u001b[36mmetrics\u001b[m\u001b[m\n", - "\u001b[1m\u001b[36mdataframes\u001b[m\u001b[m metadata.json \u001b[1m\u001b[36mparameters\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36me3b53858-ae06-4a11-996b-17b077821168\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8cd7f032-3206-480b-948b-44d878f6bb56\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36myear\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36maccuracy\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mnestimators\u001b[m\u001b[m\n" - ] + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "!ls \"./rubicon-root/rootB\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}\" \n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/artifacts\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/dataframes\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/features\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/metrics\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/parameters\"" + "rubicon_a = Rubicon(persistence=\"filesystem\", root_dir=\"./rubicon-root/root_a\")\n", + "project_a = rubicon_a.get_project(name=\"multiple backends\")\n", + "\n", + "project_a.experiments()[0].id" ] }, { "cell_type": "markdown", - "id": "12a5c1df", + "id": "baf58168-49ca-4659-b2c8-2315853cbad9", "metadata": {}, "source": [ - "### Read Commands\n", - "Now that we've seen both of our backends have been written to, let's see the read commands. Read commands will iterate over all backend repositories and return from the first one they are able to read from. A `RubiconException` will be raised if none of the backend repositories can be read the requested item(s)." + "Each experiments' IDs match, confirming they are the same." ] }, { "cell_type": "code", - "execution_count": 7, - "id": "0a2eda44", + "execution_count": 5, + "id": "d95347c9", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "projects: []\n", - "\n", - "\n", - "experiments: []\n", - "\n", - "\n", - "artifacts: []\n", - "\n", - "\n", - "dataframes: []\n", - "\n", - "\n", - "features: []\n", - "\n", - "\n", - "metrics: []\n", - "\n", - "\n", - "parameters: []\n", - "\n", - "\n" - ] + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "projects = rb.projects()\n", - "print(\"projects: \" + str(projects))\n", - "print(\"\\n\")\n", - "\n", - "experiments = project.experiments()\n", - "print(\"experiments: \" + str(experiments))\n", - "print(\"\\n\")\n", - "\n", - "artifacts = experiment.artifacts()\n", - "print(\"artifacts: \" + str(artifacts))\n", - "print(\"\\n\")\n", - "\n", - "dataframes = experiment.dataframes()\n", - "print(\"dataframes: \" + str(dataframes))\n", - "print(\"\\n\")\n", - "\n", - "features = experiment.features()\n", - "print(\"features: \" + str(features))\n", - "print(\"\\n\")\n", + "rubicon_b = Rubicon(persistence=\"filesystem\", root_dir=\"./rubicon-root/root_b\")\n", + "project_b = rubicon_a.get_project(name=\"multiple backends\")\n", "\n", - "metrics = experiment.metrics()\n", - "print(\"metrics: \" + str(metrics))\n", - "print(\"\\n\")\n", - "\n", - "parameters = experiment.parameters()\n", - "print(\"parameters: \" + str(parameters))\n", - "print(\"\\n\")" + "project_b.experiments()[0].id" ] }, { "cell_type": "markdown", - "id": "faf2bfcd", - "metadata": {}, - "source": [ - "#### Additional Read Commands\n", - "Along with the commands demonstrated above, all other \"read\" type rubicon commands work the same way in that they will iterate over backend repositories and return from the first one they are able to read from. These include commands which read a specific logged object like `get_project()`, `experiment()`, `artifact()`, `dataframe()`, `metric()`, and `parameter()`." - ] - }, - { - "cell_type": "markdown", - "id": "e4763de2", + "id": "12a5c1df", "metadata": {}, "source": [ - "Cleaning up local repository folders" + "### Reading\n", + "\n", + "rubicon-ml's reading functions will iterate over all backend repositories and return from the first one they are able to read from. A `RubiconException` will be raised if none of the backend repositories can be read the requested item(s)." ] }, { "cell_type": "code", - "execution_count": 8, - "id": "d0edd788", + "execution_count": 6, + "id": "d66157e0-77f5-47d7-994d-09598d878e24", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rm -rf rubicon-root/rootA" + "project_read = rubicon_composite.get_project(name=\"multiple backends\")\n", + "project_read" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "9ff8be30", + "execution_count": 7, + "id": "1f9b622e-84d0-465f-9110-03a3c0289e74", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "features: ['year']\n", + "metrics: ['accuracy']\n", + "parameters: ['n_estimators']\n", + "artifact data: b'bytes'\n", + "dataframe data:\n", + " x y z\n", + "0 5 0 0\n", + "1 0 5 1\n", + "2 0 0 4\n" + ] + } + ], "source": [ - "rm -rf rubicon-root/rootB" + "for experiment in project_read.experiments():\n", + " print(f\"features: {[f.name for f in experiment.features()]}\")\n", + " print(f\"metrics: {[m.name for m in experiment.metrics()]}\")\n", + " print(f\"parameters: {[p.name for p in experiment.parameters()]}\")\n", + " print(f\"artifact data: {experiment.artifact(name='example artifact').get_data()}\")\n", + " print(f\"dataframe data:\\n{experiment.dataframe(name='example dataframe').get_data()}\")" ] } ], @@ -334,7 +234,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.12.1" } }, "nbformat": 4,