-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move the canonical class to parsing and core modules #100
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,79 @@ | ||
from __future__ import annotations | ||
|
||
from itertools import product | ||
from itertools import chain, product | ||
from typing import TYPE_CHECKING, Self | ||
|
||
from sirocco.core.graph_items import Cycle, Data, Store, Task | ||
from sirocco.parsing._yaml_data_models import ( | ||
CanonicalWorkflow, | ||
load_workflow_config, | ||
ConfigWorkflow, | ||
) | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Iterator | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
from sirocco.parsing._yaml_data_models import ConfigCycle | ||
from sirocco.parsing._yaml_data_models import ( | ||
ConfigAvailableData, | ||
ConfigCycle, | ||
ConfigData, | ||
ConfigGeneratedData, | ||
ConfigTask, | ||
) | ||
|
||
|
||
class Workflow: | ||
"""Internal representation of a workflow""" | ||
|
||
def __init__(self, workflow_config: CanonicalWorkflow) -> None: | ||
self.name: str = workflow_config.name | ||
self.config_rootdir: Path = workflow_config.rootdir | ||
def __init__( | ||
self, | ||
name: str, | ||
config_rootdir: Path, | ||
cycles: list[ConfigCycle], | ||
tasks: list[ConfigTask], | ||
data: ConfigData, | ||
parameters: dict[str, list], | ||
) -> None: | ||
self.name: str = name | ||
self.config_rootdir: Path = config_rootdir | ||
|
||
self.tasks: Store = Store() | ||
self.data: Store = Store() | ||
self.cycles: Store = Store() | ||
|
||
data_dict: dict[str, ConfigAvailableData | ConfigGeneratedData] = { | ||
data.name: data for data in chain(data.available, data.generated) | ||
} | ||
task_dict: dict[str, ConfigTask] = {task.name: task for task in tasks} | ||
|
||
# Function to iterate over date and parameter combinations | ||
def iter_coordinates(param_refs: list, date: datetime | None = None) -> Iterator[dict]: | ||
space = ({} if date is None else {"date": [date]}) | {k: workflow_config.parameters[k] for k in param_refs} | ||
space = ({} if date is None else {"date": [date]}) | {k: parameters[k] for k in param_refs} | ||
yield from (dict(zip(space.keys(), x, strict=False)) for x in product(*space.values())) | ||
|
||
# 1 - create availalbe data nodes | ||
for data_config in workflow_config.data.available: | ||
for data_config in data.available: | ||
for coordinates in iter_coordinates(param_refs=data_config.parameters, date=None): | ||
self.data.add(Data.from_config(config=data_config, coordinates=coordinates)) | ||
|
||
# 2 - create output data nodes | ||
for cycle_config in workflow_config.cycles: | ||
for cycle_config in cycles: | ||
for date in self.cycle_dates(cycle_config): | ||
for task_ref in cycle_config.tasks: | ||
for data_ref in task_ref.outputs: | ||
data_name = data_ref.name | ||
data_config = workflow_config.data_dict[data_name] | ||
data_config = data_dict[data_name] | ||
for coordinates in iter_coordinates(param_refs=data_config.parameters, date=date): | ||
self.data.add(Data.from_config(config=data_config, coordinates=coordinates)) | ||
|
||
# 3 - create cycles and tasks | ||
for cycle_config in workflow_config.cycles: | ||
for cycle_config in cycles: | ||
cycle_name = cycle_config.name | ||
for date in self.cycle_dates(cycle_config): | ||
cycle_tasks = [] | ||
for task_graph_spec in cycle_config.tasks: | ||
task_name = task_graph_spec.name | ||
task_config = workflow_config.task_dict[task_name] | ||
task_config = task_dict[task_name] | ||
|
||
for coordinates in iter_coordinates(param_refs=task_config.parameters, date=date): | ||
task = Task.from_config( | ||
|
@@ -88,5 +107,21 @@ def cycle_dates(cycle_config: ConfigCycle) -> Iterator[datetime]: | |
yield date | ||
|
||
@classmethod | ||
def from_yaml(cls: type[Self], config_path: str) -> Self: | ||
return cls(load_workflow_config(config_path)) | ||
def from_config_file(cls: type[Self], config_path: str) -> Self: | ||
""" | ||
Loads a python representation of a workflow config file. | ||
|
||
:param config_path: the string to the config yaml file containing the workflow definition | ||
""" | ||
return cls.from_config_workflow(ConfigWorkflow.from_config_file(config_path)) | ||
|
||
@classmethod | ||
def from_config_workflow(cls: type[Self], config_workflow: ConfigWorkflow) -> Workflow: | ||
return cls( | ||
name=config_workflow.name, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will fail mypy checks, because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR needs to catch that now. In general, please run There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
config_rootdir=config_workflow.rootdir, | ||
cycles=config_workflow.cycles, | ||
tasks=config_workflow.tasks, | ||
data=config_workflow.data, | ||
parameters=config_workflow.parameters, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
from ._yaml_data_models import ( | ||
load_workflow_config, | ||
ConfigWorkflow, | ||
) | ||
|
||
__all__ = [ | ||
"load_workflow_config", | ||
"ConfigWorkflow", | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,22 @@ | ||
import pathlib | ||
|
||
import pytest | ||
|
||
from sirocco.parsing import _yaml_data_models as models | ||
|
||
pytest_plugins = ["aiida.tools.pytest_fixtures"] | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def minimal_config() -> models.ConfigWorkflow: | ||
return models.ConfigWorkflow( | ||
name="minimal", | ||
rootdir=pathlib.Path("minimal"), | ||
cycles=[models.ConfigCycle(minimal={"tasks": [models.ConfigCycleTask(some_task={})]})], | ||
tasks=[models.ConfigShellTask(some_task={"plugin": "shell"})], | ||
data=models.ConfigData( | ||
available=[models.ConfigAvailableData(name="foo", type=models.DataType.FILE, src="foo.txt")], | ||
generated=[models.ConfigGeneratedData(name="bar", type=models.DataType.DIR, src="bar")], | ||
), | ||
parameters={}, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These changes strike me as orthogonal to the goal of canonicalizing the yaml models. They are improvements along the same lines in that they make the
workflow.Workflow
class and everything that depends on it more testable. Nevertheless, they could be isolated and put into their own PR.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure we we can merge it after #89 but it needs to be based on
test-config-workflow
to see a proper diffThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant isolated from other changes, like undoing the shift to properties instead of duplicating information, moving the validators to the ConfigWorkflow and replacing CanonicalWorkflow with some other way of storing ConfigWorkflow + rootdir.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The goal of this PR is to move all the logic that was implemented in
CanonicalWorkflow
to the classesWorkflow
andConfigWorkflow
, that has been done so far. I feel like splitting it up into several PRs seems not fully taking all logic that are currently implemented in account.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can split it up maybe in multiple commits?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I splitted up the PR in multiple commits but a lot of things are not orthogonal improvements and need to be in the first commit, I am very verbose in the commit message to explain the dependency of the changes within the commit