diff --git a/docs/docs-environment.yml b/docs/docs-environment.yml index a73346a0..8476c62a 100644 --- a/docs/docs-environment.yml +++ b/docs/docs-environment.yml @@ -19,12 +19,12 @@ dependencies: # so local pip install doesn't - dash<=2.15.0,>=2.11.0 - dash-bootstrap-components<=1.5.0,>=1.0.0 - - fsspec<=2023.12.2,>=2021.4.0 - - intake[dataframe]<=0.7.0,>=0.5.2 + - fsspec<=2024.2.0,>=2021.4.0 + - intake[dataframe]<=2.0.1,>=0.5.2 - jsonpath-ng<=1.6.1,>=1.5.3 - - numpy<=1.26.3,>=1.22.0 - - pandas<=2.2.0,>=1.0.0 + - numpy<=1.26.4,>=1.22.0 + - pandas<=2.2.1,>=1.0.0 - prefect<=1.2.4,>=0.12.0 - pyarrow<=15.0.0,>=14.0.1 - PyYAML<=6.0.1,>=5.4.0 - - scikit-learn<=1.4.0,>=0.22.0 + - scikit-learn<=1.4.1.post1,>=0.22.0 diff --git a/environment.yml b/environment.yml index 151af57f..826e3727 100644 --- a/environment.yml +++ b/environment.yml @@ -6,20 +6,20 @@ dependencies: - pip - click<=8.1.7,>=7.1 - - fsspec<=2023.12.2,>=2021.4.0 - - intake[dataframe]<=0.7.0,>=0.5.2 + - fsspec<=2024.2.0,>=2021.4.0 + - intake[dataframe]<=2.0.1,>=0.5.2 - jsonpath-ng<=1.6.1,>=1.5.3 - - numpy<=1.26.3,>=1.22.0 - - pandas<=2.2.0,>=1.0.0 + - numpy<=1.26.4,>=1.22.0 + - pandas<=2.2.1,>=1.0.0 - pyarrow<=15.0.0,>=14.0.1 - PyYAML<=6.0.1,>=5.4.0 - - scikit-learn<=1.4.0,>=0.22.0 + - scikit-learn<=1.4.1.post1,>=0.22.0 # for prefect extras - prefect<=1.2.4,>=0.12.0 # for s3fs extras - - s3fs<=2023.12.2,>=0.4 + - s3fs<=2024.2.0,>=0.4 # for viz extras - dash<=2.15.0,>=2.11.0 diff --git a/notebooks/logging-examples/multiple-backend.ipynb b/notebooks/logging-examples/multiple-backend.ipynb index 2a86d785..9caaa537 100644 --- a/notebooks/logging-examples/multiple-backend.ipynb +++ b/notebooks/logging-examples/multiple-backend.ipynb @@ -6,7 +6,8 @@ "metadata": {}, "source": [ "# Log with Multiple Backends\n", - "`rubicon-ml` allows users to instantiate `Rubicon` objects with multiple backends to write to/read from. These backends include local, memory, and S3 repositories. Here's a walk through of how one might instantiate and use a `Rubicon` object with multiple backends." + "\n", + "rubicon-ml allows users to instantiate `Rubicon` objects with multiple backends to write to/read from at once. These backends include local, memory, and S3 repositories. Here's a walk through of how one might instantiate and use a `Rubicon` object with multiple backends." ] }, { @@ -19,37 +20,26 @@ "from rubicon_ml import Rubicon" ] }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e38b0be9", - "metadata": {}, - "outputs": [], - "source": [ - "#rb = Rubicon(persistence=\"memory\")\n", - "#or\n", - "#rb = Rubicon(persistence=\"filesystem\")" - ] - }, { "cell_type": "markdown", - "id": "e1936248", + "id": "b0ad7b71-0efe-4c10-8abc-8b78c8dbd6b1", "metadata": {}, "source": [ - "However, when we want multiple backends we utilize the `composite_config` kwarg:" + "Let's say we want to log to two separate locations on our local filesystem. This example is a bit contrived,\n", + "but you could imagine writing to both a local filesystem for quick, ad-hoc exploration and an S3 bucket for\n", + "persistent storage." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "095655e5", "metadata": {}, "outputs": [], "source": [ - "#example multiple backend instantiaiton\n", - "rb = Rubicon(composite_config=[\n", - " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/rootA\"},\n", - " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/rootB\"},\n", + "rubicon_composite = Rubicon(composite_config=[\n", + " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/root_a\"},\n", + " {\"persistence\": \"filesystem\", \"root_dir\": \"./rubicon-root/root_b\"},\n", "])" ] }, @@ -58,27 +48,46 @@ "id": "66644d33", "metadata": {}, "source": [ - "### Write Commands\n", - "The following commands write to all insantiated backend repositories:" + "### Writing\n", + "\n", + "All of rubicon-ml's logging functions will now log to both locations in the filesystem with a single function call." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "b7ecf19d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "project = rb.create_project(\"example_project\")\n", + "import pandas as pd\n", "\n", - "experiment = project.log_experiment(\"example_experiment\")\n", + "project_composite = rubicon_composite.create_project(name=\"multiple backends\")\n", + "experiment_composite = project_composite.log_experiment()\n", "\n", - "artifact = experiment.log_artifact(data_bytes=b\"bytes\", name=\"example_artifact\")\n", - "import pandas as pd\n", - "dataframe = experiment.log_dataframe(pd.DataFrame([[5, 0, 0], [0, 5, 1], [0, 0, 4]], columns=[\"x\", \"y\", \"z\"]))\n", - "feature = experiment.log_feature(\"year\")\n", - "metric = experiment.log_metric(\"accuracy\", .8)\n", - "parameter = experiment.log_parameter(\"n_estimators\")" + "feature = experiment_composite.log_feature(name=\"year\")\n", + "metric = experiment_composite.log_metric(name=\"accuracy\", value=1.0)\n", + "parameter = experiment_composite.log_parameter(name=\"n_estimators\", value=100)\n", + "artifact = experiment_composite.log_artifact(\n", + " data_bytes=b\"bytes\", name=\"example artifact\"\n", + ")\n", + "dataframe = experiment_composite.log_dataframe(\n", + " pd.DataFrame([[5, 0, 0], [0, 5, 1], [0, 0, 4]], columns=[\"x\", \"y\", \"z\"]),\n", + " name=\"example dataframe\",\n", + ")\n", + "\n", + "experiment_composite.id" ] }, { @@ -86,235 +95,126 @@ "id": "10db7e8b", "metadata": {}, "source": [ - "Let's verify both of our backends have been written to:" + "Let's verify both of our backends have been written to by retrieving the data one location at a time." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "c9e815cf", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[36mexampleproject\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mexperiments\u001b[m\u001b[m metadata.json\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8661b18a-afe3-4779-8ed4-e7d2a9aba244\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36martifacts\u001b[m\u001b[m \u001b[1m\u001b[36mfeatures\u001b[m\u001b[m \u001b[1m\u001b[36mmetrics\u001b[m\u001b[m\n", - "\u001b[1m\u001b[36mdataframes\u001b[m\u001b[m metadata.json \u001b[1m\u001b[36mparameters\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36me3b53858-ae06-4a11-996b-17b077821168\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8cd7f032-3206-480b-948b-44d878f6bb56\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36myear\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36maccuracy\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mnestimators\u001b[m\u001b[m\n" - ] - } - ], - "source": [ - "!ls \"./rubicon-root/rootA\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}\" \n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/artifacts\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/dataframes\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/features\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/metrics\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootA/exampleproject/experiments/{experiment.id}/parameters\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d95347c9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m\u001b[36mexampleproject\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mexperiments\u001b[m\u001b[m metadata.json\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8661b18a-afe3-4779-8ed4-e7d2a9aba244\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36martifacts\u001b[m\u001b[m \u001b[1m\u001b[36mfeatures\u001b[m\u001b[m \u001b[1m\u001b[36mmetrics\u001b[m\u001b[m\n", - "\u001b[1m\u001b[36mdataframes\u001b[m\u001b[m metadata.json \u001b[1m\u001b[36mparameters\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36me3b53858-ae06-4a11-996b-17b077821168\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36m8cd7f032-3206-480b-948b-44d878f6bb56\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36myear\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36maccuracy\u001b[m\u001b[m\n", - "\n", - "\n", - "\u001b[1m\u001b[36mnestimators\u001b[m\u001b[m\n" - ] + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "!ls \"./rubicon-root/rootB\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}\" \n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/artifacts\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/dataframes\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/features\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/metrics\"\n", - "print(\"\\n\")\n", - "!ls \"./rubicon-root/rootB/exampleproject/experiments/{experiment.id}/parameters\"" + "rubicon_a = Rubicon(persistence=\"filesystem\", root_dir=\"./rubicon-root/root_a\")\n", + "project_a = rubicon_a.get_project(name=\"multiple backends\")\n", + "\n", + "project_a.experiments()[0].id" ] }, { "cell_type": "markdown", - "id": "12a5c1df", + "id": "baf58168-49ca-4659-b2c8-2315853cbad9", "metadata": {}, "source": [ - "### Read Commands\n", - "Now that we've seen both of our backends have been written to, let's see the read commands. Read commands will iterate over all backend repositories and return from the first one they are able to read from. A `RubiconException` will be raised if none of the backend repositories can be read the requested item(s)." + "Each experiments' IDs match, confirming they are the same." ] }, { "cell_type": "code", - "execution_count": 7, - "id": "0a2eda44", + "execution_count": 5, + "id": "d95347c9", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "projects: []\n", - "\n", - "\n", - "experiments: []\n", - "\n", - "\n", - "artifacts: []\n", - "\n", - "\n", - "dataframes: []\n", - "\n", - "\n", - "features: []\n", - "\n", - "\n", - "metrics: []\n", - "\n", - "\n", - "parameters: []\n", - "\n", - "\n" - ] + "data": { + "text/plain": [ + "'8abfbff9-a9a1-46de-b782-3bb4ad1c41a0'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "projects = rb.projects()\n", - "print(\"projects: \" + str(projects))\n", - "print(\"\\n\")\n", - "\n", - "experiments = project.experiments()\n", - "print(\"experiments: \" + str(experiments))\n", - "print(\"\\n\")\n", - "\n", - "artifacts = experiment.artifacts()\n", - "print(\"artifacts: \" + str(artifacts))\n", - "print(\"\\n\")\n", - "\n", - "dataframes = experiment.dataframes()\n", - "print(\"dataframes: \" + str(dataframes))\n", - "print(\"\\n\")\n", - "\n", - "features = experiment.features()\n", - "print(\"features: \" + str(features))\n", - "print(\"\\n\")\n", + "rubicon_b = Rubicon(persistence=\"filesystem\", root_dir=\"./rubicon-root/root_b\")\n", + "project_b = rubicon_a.get_project(name=\"multiple backends\")\n", "\n", - "metrics = experiment.metrics()\n", - "print(\"metrics: \" + str(metrics))\n", - "print(\"\\n\")\n", - "\n", - "parameters = experiment.parameters()\n", - "print(\"parameters: \" + str(parameters))\n", - "print(\"\\n\")" + "project_b.experiments()[0].id" ] }, { "cell_type": "markdown", - "id": "faf2bfcd", - "metadata": {}, - "source": [ - "#### Additional Read Commands\n", - "Along with the commands demonstrated above, all other \"read\" type rubicon commands work the same way in that they will iterate over backend repositories and return from the first one they are able to read from. These include commands which read a specific logged object like `get_project()`, `experiment()`, `artifact()`, `dataframe()`, `metric()`, and `parameter()`." - ] - }, - { - "cell_type": "markdown", - "id": "e4763de2", + "id": "12a5c1df", "metadata": {}, "source": [ - "Cleaning up local repository folders" + "### Reading\n", + "\n", + "rubicon-ml's reading functions will iterate over all backend repositories and return from the first one they are able to read from. A `RubiconException` will be raised if none of the backend repositories can be read the requested item(s)." ] }, { "cell_type": "code", - "execution_count": 8, - "id": "d0edd788", + "execution_count": 6, + "id": "d66157e0-77f5-47d7-994d-09598d878e24", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rm -rf rubicon-root/rootA" + "project_read = rubicon_composite.get_project(name=\"multiple backends\")\n", + "project_read" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "9ff8be30", + "execution_count": 7, + "id": "1f9b622e-84d0-465f-9110-03a3c0289e74", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "features: ['year']\n", + "metrics: ['accuracy']\n", + "parameters: ['n_estimators']\n", + "artifact data: b'bytes'\n", + "dataframe data:\n", + " x y z\n", + "0 5 0 0\n", + "1 0 5 1\n", + "2 0 0 4\n" + ] + } + ], "source": [ - "rm -rf rubicon-root/rootB" + "for experiment in project_read.experiments():\n", + " print(f\"features: {[f.name for f in experiment.features()]}\")\n", + " print(f\"metrics: {[m.name for m in experiment.metrics()]}\")\n", + " print(f\"parameters: {[p.name for p in experiment.parameters()]}\")\n", + " print(f\"artifact data: {experiment.artifact(name='example artifact').get_data()}\")\n", + " print(f\"dataframe data:\\n{experiment.dataframe(name='example dataframe').get_data()}\")" ] } ], @@ -334,7 +234,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/setup.cfg b/setup.cfg index 24a71eae..07cc7354 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,20 +29,20 @@ include_package_data = True packages = find: install_requires = click<=8.1.7,>=7.1 - fsspec<=2023.12.2,>=2021.4.0 - intake[dataframe]<=0.7.0,>=0.5.2 + fsspec<=2024.2.0,>=2021.4.0 + intake[dataframe]<=2.0.1,>=0.5.2 jsonpath-ng<=1.6.1,>=1.5.3 - numpy<=1.26.3,>=1.22.0 - pandas<=2.2.0,>=1.0.0 + numpy<=1.26.4,>=1.22.0 + pandas<=2.2.1,>=1.0.0 pyarrow<=15.0.0,>=14.0.1 PyYAML<=6.0.1,>=5.4.0 - scikit-learn<=1.4.0,>=0.22.0 + scikit-learn<=1.4.1.post1,>=0.22.0 [options.extras_require] prefect = prefect<=1.2.4,>=0.12.0 s3 = - s3fs<=2023.12.2,>=0.4 + s3fs<=2024.2.0,>=0.4 ui = dash<=2.15.0,>=2.11.0 dash-bootstrap-components<=1.5.0,>=1.0.0 @@ -53,7 +53,7 @@ all = dash<=2.15.0,>=2.11.0 dash-bootstrap-components<=1.5.0,>=1.0.0 prefect<=1.2.4,>=0.12.0 - s3fs<=2023.12.2,>=0.4 + s3fs<=2024.2.0,>=0.4 [options.entry_points] console_scripts =