Merge branch 'doc'

stroblme · Mar 17, 2023 · 7b8449d · 7b8449d
2 parents 327baf4 + 594fb8d
commit 7b8449d
Show file tree

Hide file tree

Showing 56 changed files with 7,894 additions and 187 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 venv
 mlruns
+.mlruns_bckp
 *.pdf
 .ipython
 *.png

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -85,6 +85,16 @@
             // Any other arguments should be passed as a comma-seperated-list
             // e.g "args": ["run", "--pipeline", "pipeline_name"]
         },
+        {
+            "name": "KR: - validation",
+            "type": "python",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "kedro",
+            "args": ["run", "--pipeline", "validation_qgnn_pipeline"]
+            // Any other arguments should be passed as a comma-seperated-list
+            // e.g "args": ["run", "--pipeline", "pipeline_name"]
+        },
         {
             "name": "KR: - training Optuna",
             "type": "python",
@@ -114,7 +124,8 @@
             "request": "launch",
             "console": "integratedTerminal",
             "module": "kedro",
-            "args": ["run"]
+            "args": ["run"],
+            // "justMyCode": false
             // Any other arguments should be passed as a comma-seperated-list
             // e.g "args": ["run", "--pipeline", "pipeline_name"]
         }

diff --git a/.vscode/mlflow_cleanup.py b/.vscode/mlflow_cleanup.py
@@ -0,0 +1,34 @@
+import glob
+import os
+import yaml
+import shutil
+
+mlflow_path = "./mlruns/1"
+backup_dir = "./.mlruns_bckp"
+
+cut_after = 1  # 1670237437014
+
+runs = glob.glob(os.path.join(mlflow_path, "*"))
+
+for r in runs:
+    mark_for_deletion = False
+    mark_for_deprecation = False
+    if not os.path.isdir(r):
+        continue
+    with open(os.path.join(r, "meta.yaml"), "r") as f:
+        try:
+            content = yaml.safe_load(f)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+        if content["status"] != 3:
+            mark_for_deletion = True
+        elif int(content["end_time"]) < cut_after:
+            mark_for_deprecation = True
+            content["lifecycle_stage"] = "deleted"
+
+    if mark_for_deletion:
+        shutil.move(r, os.path.join(backup_dir, os.path.basename(r)))
+    elif mark_for_deprecation:
+        with open(os.path.join(r, "meta.yaml"), "w") as f:
+            yaml.safe_dump(content, f)
diff --git a/.vscode/setup.sh b/.vscode/setup.sh
@@ -0,0 +1,2 @@
+poetry install --without dev
+poetry run kedro mlflow init
diff --git a/.vscode/setup_dev.sh b/.vscode/setup_dev.sh
@@ -0,0 +1,6 @@
+poetry install
+poetry run pre-commit autoupdate
+poetry run pre-commit install
+poetry run kedro mlflow init
+poetry run pytest
+poetry run mkdocs build
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -0,0 +1,95 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+
+        {
+            "label": "Setup",
+            "type": "shell",
+            "command": ".vscode/setup.sh",
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "new"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "Kedro Test",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "pytest"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "dedicated"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "Kedro Viz",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "kedro", "viz", "--autoreload"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "new"
+            },
+            "runOptions": {
+                "runOn": "folderOpen"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "MLFlow Dashboard",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "mlflow", "ui"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "new"
+            },
+            "runOptions": {
+                "runOn": "folderOpen"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "MkDocs Build",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "mkdocs", "build"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "dedicated"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "MkDocs Serve",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "mkdocs", "serve"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "dedicated"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "MkDocs Deploy",
+            "type": "shell",
+            "command": "poetry",
+            "args": ["run", "mkdocs", "gh-deploy"],
+            "group": "none",
+            "presentation": {
+                "reveal": "never",
+                "panel": "dedicated"
+            },
+            "problemMatcher": []
+        },
+    ]
+  }
diff --git a/README.md b/README.md
@@ -18,6 +18,18 @@ pip compile
 
 torch_scatter needs gcc-c++ and python3-devel packages to build successfully.
 
+with poetry you need to release the tensorflow-probability dependency from phasespace such that the line in ```.venv/lib/python3.10/site-packages/phasespace-1.8.0.dist-info/METADATA``` becomes
+```
+Requires-Dist: tensorflow-probability (>=0.15)
+```
+To achieve this, you may want to run
+```
+poetry add phasespace
+```
+prior to the installation of the other packages using
+```
+poe
+
 changed module dependencies in phasespace:
 
 before:
@@ -44,125 +56,3 @@ run kedro mlflow init
 
 
 
-
-
-## Overview
-
-This is your new Kedro project, which was generated using `Kedro 0.17.7`.
-
-Take a look at the [Kedro documentation](https://kedro.readthedocs.io) to get started.
-
-## Rules and guidelines
-
-In order to get the best out of the template:
-
-* Don't remove any lines from the `.gitignore` file we provide
-* Make sure your results can be reproduced by following a [data engineering convention](https://kedro.readthedocs.io/en/stable/12_faq/01_faq.html#what-is-data-engineering-convention)
-* Don't commit data to your repository
-* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/`
-
-## How to install dependencies
-
-Declare any dependencies in `src/requirements.txt` for `pip` installation and `src/environment.yml` for `conda` installation.
-
-To install them, run:
-
-```
-kedro install
-```
-
-## How to run your Kedro pipeline
-
-You can run your Kedro project with:
-
-```
-kedro run
-```
-
-## How to test your Kedro project
-
-Have a look at the file `src/tests/test_run.py` for instructions on how to write your tests. You can run your tests as follows:
-
-```
-kedro test
-```
-
-To configure the coverage threshold, go to the `.coveragerc` file.
-
-## Project dependencies
-
-To generate or update the dependency requirements for your project:
-
-```
-kedro build-reqs
-```
-
-This will copy the contents of `src/requirements.txt` into a new file `src/requirements.in` which will be used as the source for `pip-compile`. You can see the output of the resolution by opening `src/requirements.txt`.
-
-After this, if you'd like to update your project requirements, please update `src/requirements.in` and re-run `kedro build-reqs`.
-
-[Further information about project dependencies](https://kedro.readthedocs.io/en/stable/04_kedro_project_setup/01_dependencies.html#project-specific-dependencies)
-
-## How to work with Kedro and notebooks
-
-> Note: Using `kedro jupyter` or `kedro ipython` to run your notebook provides these variables in scope: `context`, `catalog`, and `startup_error`.
->
-> Jupyter, JupyterLab, and IPython are already included in the project requirements by default, so once you have run `kedro install` you will not need to take any extra steps before you use them.
-
-### Jupyter
-To use Jupyter notebooks in your Kedro project, you need to install Jupyter:
-
-```
-pip install jupyter
-```
-
-After installing Jupyter, you can start a local notebook server:
-
-```
-kedro jupyter notebook
-```
-
-### JupyterLab
-To use JupyterLab, you need to install it:
-
-```
-pip install jupyterlab
-```
-
-You can also start JupyterLab:
-
-```
-kedro jupyter lab
-```
-
-### IPython
-And if you want to run an IPython session:
-
-```
-kedro ipython
-```
-
-### How to convert notebook cells to nodes in a Kedro project
-You can move notebook code over into a Kedro project structure using a mixture of [cell tagging](https://jupyter-notebook.readthedocs.io/en/stable/changelog.html#release-5-0-0) and Kedro CLI commands.
-
-By adding the `node` tag to a cell and running the command below, the cell's source code will be copied over to a Python file within `src/<package_name>/nodes/`:
-
-```
-kedro jupyter convert <filepath_to_my_notebook>
-```
-> *Note:* The name of the Python file matches the name of the original notebook.
-
-Alternatively, you may want to transform all your notebooks in one go. Run the following command to convert all notebook files found in the project root directory and under any of its sub-folders:
-
-```
-kedro jupyter convert --all
-```
-
-### How to ignore notebook output cells in `git`
-To automatically strip out all output cell contents before committing to `git`, you can run `kedro activate-nbstripout`. This will add a hook in `.git/config` which will run `nbstripout` before anything is committed to `git`.
-
-> *Note:* Your output cells will be retained locally.
-
-## Package your Kedro project
-
-[Further information about building project documentation and packaging your project](https://kedro.readthedocs.io/en/stable/03_tutorial/08_package_a_project.html)
diff --git a/conf/base/parameters/data_generation.yml b/conf/base/parameters/data_generation.yml
@@ -7,8 +7,8 @@ max_depth: 3 # this will control the dimensionality of the classes
 max_children: 3 # careful, this only controls how many children a single particle can have
 min_children: 2
 isp_weight: 1.0
-train_events_per_top: 500
-val_events_per_top: 100
+train_events_per_top: 100
+val_events_per_top: 20
 test_events_per_top: 10
 seed: 1111
 iso_retries: 0