Skip to content

Commit

Permalink
Merge branch 'doc'
Browse files Browse the repository at this point in the history
  • Loading branch information
stroblme committed Mar 17, 2023
2 parents 327baf4 + 594fb8d commit 7b8449d
Show file tree
Hide file tree
Showing 56 changed files with 7,894 additions and 187 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
venv
mlruns
.mlruns_bckp
*.pdf
.ipython
*.png
Expand Down
13 changes: 12 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@
// Any other arguments should be passed as a comma-seperated-list
// e.g "args": ["run", "--pipeline", "pipeline_name"]
},
{
"name": "KR: - validation",
"type": "python",
"request": "launch",
"console": "integratedTerminal",
"module": "kedro",
"args": ["run", "--pipeline", "validation_qgnn_pipeline"]
// Any other arguments should be passed as a comma-seperated-list
// e.g "args": ["run", "--pipeline", "pipeline_name"]
},
{
"name": "KR: - training Optuna",
"type": "python",
Expand Down Expand Up @@ -114,7 +124,8 @@
"request": "launch",
"console": "integratedTerminal",
"module": "kedro",
"args": ["run"]
"args": ["run"],
// "justMyCode": false
// Any other arguments should be passed as a comma-seperated-list
// e.g "args": ["run", "--pipeline", "pipeline_name"]
}
Expand Down
34 changes: 34 additions & 0 deletions .vscode/mlflow_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import glob
import os
import yaml
import shutil

mlflow_path = "./mlruns/1"
backup_dir = "./.mlruns_bckp"

cut_after = 1 # 1670237437014

runs = glob.glob(os.path.join(mlflow_path, "*"))

for r in runs:
mark_for_deletion = False
mark_for_deprecation = False
if not os.path.isdir(r):
continue
with open(os.path.join(r, "meta.yaml"), "r") as f:
try:
content = yaml.safe_load(f)
except yaml.YAMLError as exc:
print(exc)

if content["status"] != 3:
mark_for_deletion = True
elif int(content["end_time"]) < cut_after:
mark_for_deprecation = True
content["lifecycle_stage"] = "deleted"

if mark_for_deletion:
shutil.move(r, os.path.join(backup_dir, os.path.basename(r)))
elif mark_for_deprecation:
with open(os.path.join(r, "meta.yaml"), "w") as f:
yaml.safe_dump(content, f)
2 changes: 2 additions & 0 deletions .vscode/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
poetry install --without dev
poetry run kedro mlflow init
6 changes: 6 additions & 0 deletions .vscode/setup_dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
poetry install
poetry run pre-commit autoupdate
poetry run pre-commit install
poetry run kedro mlflow init
poetry run pytest
poetry run mkdocs build
95 changes: 95 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{
"version": "2.0.0",
"tasks": [

{
"label": "Setup",
"type": "shell",
"command": ".vscode/setup.sh",
"group": "none",
"presentation": {
"reveal": "never",
"panel": "new"
},
"problemMatcher": []
},
{
"label": "Kedro Test",
"type": "shell",
"command": "poetry",
"args": ["run", "pytest"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "Kedro Viz",
"type": "shell",
"command": "poetry",
"args": ["run", "kedro", "viz", "--autoreload"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "new"
},
"runOptions": {
"runOn": "folderOpen"
},
"problemMatcher": []
},
{
"label": "MLFlow Dashboard",
"type": "shell",
"command": "poetry",
"args": ["run", "mlflow", "ui"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "new"
},
"runOptions": {
"runOn": "folderOpen"
},
"problemMatcher": []
},
{
"label": "MkDocs Build",
"type": "shell",
"command": "poetry",
"args": ["run", "mkdocs", "build"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "MkDocs Serve",
"type": "shell",
"command": "poetry",
"args": ["run", "mkdocs", "serve"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "MkDocs Deploy",
"type": "shell",
"command": "poetry",
"args": ["run", "mkdocs", "gh-deploy"],
"group": "none",
"presentation": {
"reveal": "never",
"panel": "dedicated"
},
"problemMatcher": []
},
]
}
134 changes: 12 additions & 122 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ pip compile

torch_scatter needs gcc-c++ and python3-devel packages to build successfully.

with poetry you need to release the tensorflow-probability dependency from phasespace such that the line in ```.venv/lib/python3.10/site-packages/phasespace-1.8.0.dist-info/METADATA``` becomes
```
Requires-Dist: tensorflow-probability (>=0.15)
```
To achieve this, you may want to run
```
poetry add phasespace
```
prior to the installation of the other packages using
```
poe
changed module dependencies in phasespace:
before:
Expand All @@ -44,125 +56,3 @@ run kedro mlflow init


## Overview

This is your new Kedro project, which was generated using `Kedro 0.17.7`.

Take a look at the [Kedro documentation](https://kedro.readthedocs.io) to get started.

## Rules and guidelines

In order to get the best out of the template:

* Don't remove any lines from the `.gitignore` file we provide
* Make sure your results can be reproduced by following a [data engineering convention](https://kedro.readthedocs.io/en/stable/12_faq/01_faq.html#what-is-data-engineering-convention)
* Don't commit data to your repository
* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/`

## How to install dependencies

Declare any dependencies in `src/requirements.txt` for `pip` installation and `src/environment.yml` for `conda` installation.

To install them, run:

```
kedro install
```

## How to run your Kedro pipeline

You can run your Kedro project with:

```
kedro run
```

## How to test your Kedro project

Have a look at the file `src/tests/test_run.py` for instructions on how to write your tests. You can run your tests as follows:

```
kedro test
```

To configure the coverage threshold, go to the `.coveragerc` file.

## Project dependencies

To generate or update the dependency requirements for your project:

```
kedro build-reqs
```

This will copy the contents of `src/requirements.txt` into a new file `src/requirements.in` which will be used as the source for `pip-compile`. You can see the output of the resolution by opening `src/requirements.txt`.

After this, if you'd like to update your project requirements, please update `src/requirements.in` and re-run `kedro build-reqs`.

[Further information about project dependencies](https://kedro.readthedocs.io/en/stable/04_kedro_project_setup/01_dependencies.html#project-specific-dependencies)

## How to work with Kedro and notebooks

> Note: Using `kedro jupyter` or `kedro ipython` to run your notebook provides these variables in scope: `context`, `catalog`, and `startup_error`.
>
> Jupyter, JupyterLab, and IPython are already included in the project requirements by default, so once you have run `kedro install` you will not need to take any extra steps before you use them.
### Jupyter
To use Jupyter notebooks in your Kedro project, you need to install Jupyter:

```
pip install jupyter
```

After installing Jupyter, you can start a local notebook server:

```
kedro jupyter notebook
```

### JupyterLab
To use JupyterLab, you need to install it:

```
pip install jupyterlab
```

You can also start JupyterLab:

```
kedro jupyter lab
```

### IPython
And if you want to run an IPython session:

```
kedro ipython
```

### How to convert notebook cells to nodes in a Kedro project
You can move notebook code over into a Kedro project structure using a mixture of [cell tagging](https://jupyter-notebook.readthedocs.io/en/stable/changelog.html#release-5-0-0) and Kedro CLI commands.

By adding the `node` tag to a cell and running the command below, the cell's source code will be copied over to a Python file within `src/<package_name>/nodes/`:

```
kedro jupyter convert <filepath_to_my_notebook>
```
> *Note:* The name of the Python file matches the name of the original notebook.
Alternatively, you may want to transform all your notebooks in one go. Run the following command to convert all notebook files found in the project root directory and under any of its sub-folders:

```
kedro jupyter convert --all
```

### How to ignore notebook output cells in `git`
To automatically strip out all output cell contents before committing to `git`, you can run `kedro activate-nbstripout`. This will add a hook in `.git/config` which will run `nbstripout` before anything is committed to `git`.

> *Note:* Your output cells will be retained locally.
## Package your Kedro project

[Further information about building project documentation and packaging your project](https://kedro.readthedocs.io/en/stable/03_tutorial/08_package_a_project.html)
4 changes: 2 additions & 2 deletions conf/base/parameters/data_generation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ max_depth: 3 # this will control the dimensionality of the classes
max_children: 3 # careful, this only controls how many children a single particle can have
min_children: 2
isp_weight: 1.0
train_events_per_top: 500
val_events_per_top: 100
train_events_per_top: 100
val_events_per_top: 20
test_events_per_top: 10
seed: 1111
iso_retries: 0
Expand Down
Loading

0 comments on commit 7b8449d

Please sign in to comment.