Skip to content

Commit

Permalink
Adding incremental files.
Browse files Browse the repository at this point in the history
  • Loading branch information
GeigerJ2 committed Jan 23, 2025
1 parent d2c7287 commit 748336d
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 144 deletions.
179 changes: 35 additions & 144 deletions src/aiida/tools/dumping/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
# TODO: flat list using UUIDs, or sorted by type?
# TODO: individual lists, ids, uuids, etc., or sorted: by data, or more fine-grained
# TODO: Dumping to json files with timestamp in filename, inside file, or none at all
# TODO: When dumping incrementally, just keep a list of all the UUIDs, and, before creating/updating the dictionary,
# TODO: drop everything apart from the set intersection
# TODO: How to take care of deleting previously dumped node data that doesn't exist in the profile anymore
# TODO: One could persist the list of all profile UUIDs throughout multiple instantiations of the class, and method
# TODO: calls, by writing to file, but not sure if this is something we want to do by default?
# TODO: Possibly, persist the history of up to N dumps -> Specifyable by the user
# TODO: Pass the `DumpNodeCollector` around, and `pop` UUIDs, as the dumping has actually been done?
# TODO: Make sure the dumping UUID list update functions are atomic


_node_dict_entries = [
Expand All @@ -20,188 +28,71 @@
'symlinked', # Should this be here?
]

_node_dict_template: dict[str,None] = {k: None for k in _node_dict_entries}

class DumpNodeCollector:

# TODO: When dumping incrementally, just keep a list of all the UUIDs, and, before creating/updating the dictionary,
# TODO: drop everything apart from the set intersection
# TODO: How to take care of deleting previously dumped node data that doesn't exist in the profile anymore
# TODO: One could persist the list of all profile UUIDs throughout multiple instantiations of the class, and method
# TODO: calls, by writing to file, but not sure if this is something we want to do by default?
# TODO: Possibly, persist the history of up to N dumps -> Specifyable by the user
# TODO: Pass the `DumpNodeCollector` around, and `pop` UUIDs, as the dumping has actually been done?
_node_dict_template: dict[str, None] = {k: None for k in _node_dict_entries}


class IncrementalDumper:
def __init__(self, dump_parent_path: Path):
self.dump_parent_path: Path = dump_parent_path

# self.profile_uuids: set[str] = set()
profile_uuids = orm.QueryBuilder().append(orm.Node, project=['uuid']).all(flat=True)
self.profile_uuids = set(profile_uuids)

self.prevdump_uuids: set[str] = set()
self.prevdel_uuids: set[str] = set()
self.todump_uuids: set[str] = set()
self.todel_uuids: set[str] = set()

self.profile_uuid_file: Path= self.dump_parent_path / 'profile-uuids.json'
self.todump_uuid_file: Path = self.dump_parent_path / 'profile-uuids.json'
self.profile_uuid_file: Path = self.dump_parent_path / 'profile-uuids.json'
self.todump_uuid_file: Path = self.dump_parent_path / 'todump-uuids.json'
self.prevdump_uuid_file: Path = self.dump_parent_path / 'prevdump-uuids.json'
self.todel_uuid_file: Path = self.dump_parent_path / 'todel-uuids.json'
self.prevdel_uuid_file: Path = self.dump_parent_path / 'prevdel-uuids.json'

self.profile_uuids: set[str] = set(orm.QueryBuilder().append(orm.Node, project=['uuid']).all(flat=True))


# TODO: Make sure the dumping UUID list update functions are atomic



def update_uuids_before_dump(self):
"""
abc
"""

# Could implement smarter logic here to check if anything actually changed in terms of profile UUIDs

if self.prevdump_uuid_file.exists():
with open(self.prevdump_uuid_file, 'r') as handle:
self.prevdump_uuids = json.load(handle)
self.prevdump_uuids = set(json.load(handle) or [])
else:
self.prevdump_uuids = set()

# ? Differentiate different types here somewhat?
if self.prevdel_uuid_file.exists():
with open(self.prevdel_uuid_file, 'r') as handle:
self.prevdel_uuids = set(json.load(handle) or [])
else:
self.prevdel_uuids = set()

# New UUIDs from the profile that weren't dumped in the past
self.todump_uuids = set(self.profile_uuids).difference(self.prevdump_uuids)
self.todump_uuids = {str(uuid) for uuid in self.profile_uuids.difference(self.prevdump_uuids)}
self.profile_uuids.add(set(self.todump_uuids))

# UUIDs of nodes that were deleted in the profile, but had already been dumped in the past
# Depending on `--delete-files` this should be deleted, or not
# Depending on `--delete-files` these should be deleted, or not
self.todel_uuids = set(self.prevdump_uuids).difference(self.profile_uuids)

with open(self.profile_uuid_file, 'w') as handle:
json.dump(list(self.profile_uuids), handle, indent=4)
json.dump(self.profile_uuids, handle, indent=4)

with open(self.todump_uuid_file, 'w') as handle:
json.dump(list(self.todump_uuids), handle, indent=4)
json.dump(self.todump_uuids, handle, indent=4)

with open(self.todel_uuid_file, 'w') as handle:
json.dump(list(self.todel_uuids), handle, indent=4)

# ? Maybe better to keep it as separate files, so that one doesn't have to re-write everything...
# full_uuid_dict = {
# 'profile': self.profile_uuids,
# 'prev-dump': self.prevdump_uuids,
# 'this-dump': self.todump_uuids,
# 'to-delete:': self.todel_uuids
# }

# self.full_uuid_dict = full_uuid_dict

# with open(self.dump_parent_path / 'full-uuid-dict.json', 'w') as f:
# json.dump(list(self.full_uuid_dict), f, indent=4)
json.dump(self.todel_uuids, handle, indent=4)

def update_uuids_after_dump(self):

# Move `todump` to `prevdump`
self.todump_uuid_file.rename(self.prevdump_uuid_file)

# Move `todel` to `prevddel`
self.todel_uuid_file.rename(self.prevdel_uuid_file)

# Re-create empty todump-file -> Don't really need that
# (self.dump_parent_path / 'todump-uuids.json').touch()
# with open(self.dump_parent_path / 'todump-uuids.json', 'w') as handle:
# json.dump([], handle, indent=4)

# def create_organized_uuid_dicts(self):

# # ! This requires actually loading the `Node`s rather than just working with UUID strings

# for uuid in self.profile_uuids:
# node = orm.load_node(uuid)
# entry_point_name = node.entry_point.name
# if entry_point_name not in self.profile_organized_uuids:
# self.profile_organized_uuids[entry_point_name] = [uuid]
# else:
# self.profile_organized_uuids[entry_point_name].append(uuid)

# for uuid in self.todump_uuids:
# node = orm.load_node(uuid)
# entry_point_name = node.entry_point.name
# if entry_point_name not in self.todump_organized_uuids:
# self.todump_organized_uuids[entry_point_name] = [uuid]
# else:
# self.todump_organized_uuids[entry_point_name].append(uuid)

# for uuid in self.todel_uuids:
# node = orm.load_node(uuid)
# entry_point_name = node.entry_point.name
# if entry_point_name not in self.todel_organized_uuids:
# self.todel_organized_uuids[entry_point_name] = [uuid]
# else:
# self.todel_organized_uuids[entry_point_name].append(uuid)

# pprint(self.profile_organized_uuids)
# pprint(self.todump_organized_uuids)
# pprint(self.todel_organized_uuids)

# import json
# import os


# # Dump profile_organized_uuids to a JSON file
# with open(self.dump_parent_path / 'profile-organized-uuids.json', 'w') as f:
# json.dump(self.profile_organized_uuids, f, indent=4)

# # Dump todump_organized_uuids to a JSON file
# with open(self.dump_parent_path / 'todump-organized-uuids.json', 'w') as f:
# json.dump(self.todump_organized_uuids, f, indent=4)

# # Dump todel_organized_uuids to a JSON file
# with open(self.dump_parent_path / 'todel-organized-uuids.json', 'w') as f:
# json.dump(self.todel_organized_uuids, f, indent=4)
# # breakpoint()

# # List comprehension rather than dict.from_keys, otherwise always same reference is passed
# # self.todump_dict = {uuid: _node_dict_template.copy() for uuid in self.todump_uuids}

# def populate_uuid_dict(self):
# # Mainly copied from `def get_collection_nodes`
# # TODO: Possibly add 'dumped' True/False to dictionary and then json file
# # TODO: What if node in multiple groups -> Maybe just dump the last one?
# # Add group information
# groups = orm.QueryBuilder().append(orm.Group).all(flat=True)

# for group in groups:

# nodes_in_groups = [node.uuid for node in group.nodes]

# # Only resolve the sub-workflows for nodes in groups if nodes will be dumped
# nodes_in_groups = [node for node in nodes_in_groups if node in self.todump_uuids]

# # Need to expand here also with the called_descendants of `WorkflowNodes`, otherwise the called
# # `CalculationNode`s for `WorkflowNode`s that are part of a group are dumped twice
# sub_nodes_in_groups = list(it.chain(
# *[
# orm.load_node(node).called_descendants
# for node in nodes_in_groups
# if isinstance(orm.load_node(node), orm.WorkflowNode)
# ]
# ))
# sub_nodes_in_groups = [node.uuid for node in sub_nodes_in_groups]
# nodes_in_groups = nodes_in_groups + sub_nodes_in_groups

# dict_list = []
# for node_in_group in nodes_in_groups:
# self.todump_dict[node_in_group]['group'] = group.label


# pprint(self.todump_dict)
# print(self.todump_dict['df12d734-a99b-4bef-9868-4f96aef34405'] == self.todump_dict['f9eabbd5-97bd-456a-bd20-895b84942d38'])

# # for node_uuid in self.todump_uuids:

# # node = orm.load_node(node_uuid)
# # raise SystemExit

# pass


def obtain_node_difference():
pass


def resolve_path():
pass

Expand Down
131 changes: 131 additions & 0 deletions tests/tools/dumping/test_incremental.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Tests for the dumping of ProcessNode data to disk."""

from __future__ import annotations

import io
import shutil
from pathlib import Path

from _pytest.tmpdir import tmp_path
import pytest

from aiida.tools.dumping.incremental import IncrementalDumper
from aiida import orm
from aiida.manage import get_manager

# @pytest.fixture(scope='class', autouse=True)
# def setup_test_dumper(test_dumper):


@pytest.mark.usefixtures("init_profile")
class TestIncrementalDumper:

@classmethod
def setup_class(cls):

cls.int: orm.Node = orm.Int(1).store()
cls.float: orm.Node = orm.Float(1.0).store()
cls.str: orm.Node = orm.Str('a').store()

cls.groups = {
'add': orm.Group(label='add').store(),
'add_multiply': orm.Group(label='add_multiply').store(),
}

kpoints = orm.KpointsData()
kpoints.set_kpoints_mesh(mesh=[1]*3)
cls.kpoints = kpoints
cls.structuredata = orm.StructureData(cell=((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0))).store()


# other organizational entities
cls.manager = get_manager()
cls.profile = cls.manager.get_profile()
cls.storage = cls.manager.get_profile_storage()

# for dev
cls.storage_info: dict = cls.storage.get_info(detailed=True)

# Add one ArithmeticAddNode not in any group
# cls.add_node = generate_calculation_node_add()
# cls.multiply_add_node = generate_workchain_multiply_add()

@pytest.fixture(scope="session", autouse=True)
def init_profile(
self,
tmpdir_factory
):
"""Initialize the profile."""
# Cannot put this into `setup_class`, as then `tmpdir_factory` is expected as an actual argument
self.incremental_dumper: IncrementalDumper = IncrementalDumper(dump_parent_path=tmpdir_factory.mktemp('incr') / 'dump-parent')

def test_update_uuids_before_dump(self):
# print(wc_node)
from aiida.cmdline.utils import echo
print(self.float)
pass
# print(self.storage_info)
# self.incremental_dumper.update_uuids_before_dump()
# # echo.echo_dictionary(self.storage_info)
# assert False
# pass

def test_update_uuids_after_dump(self, tmp_path):
pass


# cj_nodes = [
# generate_calculation_node_io(attach_outputs=False),
# generate_calculation_node_io(attach_outputs=False),
# ]
# wc_node = generate_workchain_node_io(cj_nodes=cj_nodes)



# @pytest.fixture(scope="session", autouse=True)
# def init_profile(
# self,
# # aiida_localhost,
# # tmp_path, # -> Function-scoped
# tmpdir_factory
# ):
# """Initialize the profile."""
# # Add one of each orm entity of interest to the profile
# # self.computer: orm.Computer = aiida_localhost
# # self.authinfo: orm.AuthInfo = self.computer.get_authinfo(user=orm.User.collection.get_default())
# # self.code: orm.Code = orm.InstalledCode(computer=self.computer, filepath_executable='/bin/true').store()
# self.int: orm.Node = orm.Int(1).store()
# # self.float: orm.Node = orm.Float(1.0).store()
# # self.str: orm.Node = orm.Str('a').store()

# # self.groups = {
# # 'add': orm.Group(label='add').store(),
# # 'add_multiply': orm.Group(label='add_multiply').store(),
# # }

# # kpoints = orm.KpointsData()
# # kpoints.set_kpoints_mesh(mesh=[1]*3)
# # self.kpoints = kpoints
# # self.structuredata = orm.StructureData(cell=((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0))).store()

# # self.incremental_dumper: IncrementalDumper = IncrementalDumper(dump_parent_path=tmpdir_factory.mktemp('incr') / 'dump-parent')

# # # other organizational entities
# # self.manager = get_manager()
# # self.profile = self.manager.get_profile()
# # self.storage = self.manager.get_profile_storage()

# # # for dev
# # self.storage_info: dict = self.storage.get_info(detailed=True)

# # Add one ArithmeticAddNode not in any group
# # self.add_node = generate_calculation_node_add()
# # self.multiply_add_node = generate_workchain_multiply_add()

0 comments on commit 748336d

Please sign in to comment.