Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: transient variables #379

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions damnit/ctxsupport/ctxrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,12 @@ def from_str(cls, code: str, path='<string>'):
log.debug("Loaded %d variables", len(vars))
return cls(vars, code)

def vars_to_dict(self):
"""Get a plain dict of variable metadata to store in the database"""
def vars_to_dict(self, inc_transient=False):
"""Get a plain dict of variable metadata to store in the database

args:
inc_transient (bool): include transient Variables in the dict
"""
return {
name: {
'title': v.title,
Expand All @@ -244,6 +248,7 @@ def vars_to_dict(self):
'type': None,
}
for (name, v) in self.vars.items()
if not v.transient or inc_transient
}

def filter(self, run_data=RunData.ALL, cluster=True, name_matches=(), variables=()):
Expand Down Expand Up @@ -349,6 +354,12 @@ def execute(self, run_data, run_number, proposal, input_vars) -> 'Results':
t1 = time.perf_counter()
log.info("Computed %s in %.03f s", name, t1 - t0)
res[name] = data

# remove transient results
for name, var in self.vars.items():
if var.transient and (name in res):
res.pop(name)

return Results(res, self)


Expand Down
4 changes: 3 additions & 1 deletion damnit/ctxsupport/damnit_ctx.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ class Variable:
_name = None

def __init__(
self, title=None, description=None, summary=None, data=None, cluster=False, tags=None,
self, title=None, description=None, summary=None, data=None,
cluster=False, tags=None, transient=False
):
self.title = title
self.tags = (tags,) if isinstance(tags, str) else tags
self.description = description
self.summary = summary
self.cluster = cluster
self.transient = transient
self._data = data

# @Variable() is used as a decorator on a function that computes a value
Expand Down
3 changes: 3 additions & 0 deletions docs/backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ these arguments:
```
- `cluster` (bool): whether or not to execute this variable in a Slurm job. This
should always be used if the variable does any heavy processing.
- `transient` (bool): whether or not to save the variable's result to the
database. This is useful for e.g. intermediate results. By default variables
Comment on lines +74 to +75
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"whether or not to save" implies that True means save.

I imagine most users will be familiar with the word transient anyway, so the flip will be obvious, but maybe we could either reword this ("whether or not to skip saving"), or change the sense of the parameter (saved=False).

save their results (transient=False).

Variable functions can return any of:

Expand Down
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Added:
- Add a `tags` attribute allowing cathegorizing `Variable`s (!354).
- Add support for `complex` numbers (!374)
- GUI: Add a Dark theme (!376)
- add a`transient` attribute for variables we don't want to save data (!xxx)

Changed:

Expand Down
46 changes: 45 additions & 1 deletion tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from damnit.backend import backend_is_running, initialize_and_start_backend
from damnit.backend.db import DamnitDB
from damnit.backend.extract_data import Extractor, RunExtractor, add_to_db
from damnit.backend.extract_data import Extractor, RunExtractor, add_to_db, load_reduced_data
from damnit.backend.extraction_control import ExtractionJobTracker
from damnit.backend.listener import (MAX_CONCURRENT_THREADS, EventProcessor,
local_extraction_threads)
Expand Down Expand Up @@ -1002,3 +1002,47 @@ def test_job_tracker():

fake_squeue.assert_called()
assert set(tracker.jobs) == set()

def test_transient_variables(mock_run, mock_db, tmp_path):
db_dir, db = mock_db

ctx_code = """
from damnit_ctx import Variable, Cell
import numpy as np

@Variable()
def var1(run):
return 7

@Variable(transient=True)
def var2(run, data: 'var#var1'):
return np.arange(data)

@Variable(summary='max')
def var3(run, data: 'var#var2'):
return data.size * data
"""
ctx = mkcontext(ctx_code)
results = ctx.execute(mock_run, 1000, 123, {})
results_hdf5_path = tmp_path / 'results.h5'
results.save_hdf5(results_hdf5_path)

with h5py.File(results_hdf5_path) as f:
assert '.reduced/var1' in f
assert 'var1' in f
# transient variables are not saved
assert '.reduced/var2' not in f
assert 'var2' not in f
assert '.reduced/var3' in f
assert 'var3' in f

assert f['.reduced/var3'][()] == 42
assert np.allclose(f['var3/data'][()], np.arange(7) * 7)

reduced_data = load_reduced_data(results_hdf5_path)
add_to_db(reduced_data, db, 1000, 123)
vars = db.conn.execute('SELECT value FROM run_variables WHERE name="var3"').fetchall()
assert vars[0]['value'] == 42
# also not saved in the db
vars = db.conn.execute('SELECT * FROM run_variables WHERE name="var2"').fetchall()
assert vars == []