Skip to content

Commit

Permalink
Setting internal json indent to None, and splitting json string in hdf5
Browse files Browse the repository at this point in the history
  • Loading branch information
momchil-flex committed Nov 3, 2023
1 parent 30f3693 commit f58e161
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 35 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

### Changed
- Indent for the json string of Tidy3D models has been changed to `None` when used internally; kept as `indent=4` for writing to `json` and `yaml` files.

### Fixed
- Fixed the duplication of log messages in Jupyter when `set_logging_file` is used.
- If input to circular filters in adjoint have size smaller than the diameter, instead of erroring, warn user and truncate the filter kernel accordingly.
- When writing the json string of a model to an `hdf5` file, the string is split into chunks if it has more than a set (very large) number of characters. This fixes potential error if the string size is more than 4GB.


## [2.5.0rc2] - 2023-10-30

Expand Down
Binary file modified tests/sims/simulation_2_5_0rc2.h5
Binary file not shown.
18 changes: 13 additions & 5 deletions tests/test_components/test_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
SIM_DIR = "tests/sims"


@pytest.fixture
def split_string(monkeypatch):
"""Lower the max string length in hdf5 read/write, in order to test the string splitting."""
from tidy3d.components import base

monkeypatch.setattr(base, "MAX_STRING_LENGTH", 100)


def set_datasets_to_none(sim):
sim_dict = sim.dict()
for src in sim_dict["sources"]:
Expand All @@ -47,7 +55,7 @@ def set_datasets_to_none(sim):
return td.Simulation.parse_obj(sim_dict)


def test_simulation_load_export():
def test_simulation_load_export(split_string):
major, minor, patch = __version__.split(".")
path = os.path.join(SIM_DIR, f"simulation_{major}_{minor}_{patch}.json")
# saving as .h5 since *.hdf5 is git ignored
Expand Down Expand Up @@ -81,28 +89,28 @@ def test_component_load_export_yaml(tmp_path):
assert td.Medium() == M2, "original and loaded medium are not the same"


def test_simulation_load_export_hdf5(tmp_path):
def test_simulation_load_export_hdf5(split_string, tmp_path):
path = str(tmp_path / "simulation.hdf5")
SIM.to_file(path)
SIM2 = td.Simulation.from_file(path)
assert SIM == SIM2, "original and loaded simulations are not the same"


def test_simulation_load_export_hdf5_gz(tmp_path):
def test_simulation_load_export_hdf5_gz(split_string, tmp_path):
path = str(tmp_path / "simulation.hdf5.gz")
SIM.to_file(path)
SIM2 = td.Simulation.from_file(path)
assert SIM == SIM2, "original and loaded simulations are not the same"


def test_simulation_load_export_hdf5_explicit(tmp_path):
def test_simulation_load_export_hdf5_explicit(split_string, tmp_path):
path = str(tmp_path / "simulation.hdf5")
SIM.to_hdf5(path)
SIM2 = td.Simulation.from_hdf5(path)
assert SIM == SIM2, "original and loaded simulations are not the same"


def test_simulation_load_export_hdf5_gz_explicit(tmp_path):
def test_simulation_load_export_hdf5_gz_explicit(split_string, tmp_path):
path = str(tmp_path / "simulation.hdf5.gz")
SIM.to_hdf5_gz(path)
SIM2 = td.Simulation.from_hdf5_gz(path)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_web/test_webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def get_str(*args, **kwargs):
return sim.json().encode("utf-8")

monkeypatch.setattr(f"{task_core_path}.download_file", mock_download)
monkeypatch.setattr(f"{task_core_path}._read_simulation_from_hdf5", get_str)
monkeypatch.setattr(f"{task_core_path}.read_simulation_from_hdf5", get_str)

fname_tmp = str(tmp_path / "web_test_tmp.json")
download_json(TASK_ID, fname_tmp)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_web/test_webapi_heat.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def get_str(*args, **kwargs):
return sim.json().encode("utf-8")

monkeypatch.setattr(f"{task_core_path}.download_file", mock_download)
monkeypatch.setattr(f"{task_core_path}._read_simulation_from_hdf5", get_str)
monkeypatch.setattr(f"{task_core_path}.read_simulation_from_hdf5", get_str)

fname_tmp = str(tmp_path / "web_test_tmp.json")
download_json(TASK_ID, fname_tmp)
Expand Down
40 changes: 31 additions & 9 deletions tidy3d/components/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tempfile
from functools import wraps
from typing import List, Callable, Dict, Union, Tuple, Any
from math import ceil

import rich
import pydantic.v1 as pydantic
Expand All @@ -22,9 +23,12 @@
from ..exceptions import FileError
from ..log import log

# default indentation (# spaces) in files
INDENT = 4

INDENT_JSON_FILE = 4 # default indentation of json string in json files
INDENT = None # default indentation of json string used internally
JSON_TAG = "JSON_STRING"
# If json string is larger than ``MAX_STRING_LENGTH``, split the string when storing in hdf5
MAX_STRING_LENGTH = 1e9


def cache(prop):
Expand Down Expand Up @@ -309,7 +313,7 @@ def to_json(self, fname: str) -> None:
-------
>>> simulation.to_json(fname='folder/sim.json') # doctest: +SKIP
"""
json_string = self._json_string
json_string = self._json(indent=INDENT_JSON_FILE)
self._warn_if_contains_data(json_string)
with open(fname, "w", encoding="utf-8") as file_handle:
file_handle.write(json_string)
Expand Down Expand Up @@ -375,7 +379,7 @@ def to_yaml(self, fname: str) -> None:
self._warn_if_contains_data(json_string)
model_dict = json.loads(json_string)
with open(fname, "w+", encoding="utf-8") as file_handle:
yaml.dump(model_dict, file_handle, indent=INDENT)
yaml.dump(model_dict, file_handle, indent=INDENT_JSON_FILE)

@staticmethod
def _warn_if_contains_data(json_str: str) -> None:
Expand Down Expand Up @@ -430,6 +434,23 @@ def get_sub_model(cls, group_path: str, model_dict: dict | list) -> dict:
model_dict = model_dict[key]
return model_dict

@staticmethod
def _json_string_key(index: int) -> str:
"""Get json string key for string chunk number ``index``."""
if index:
return f"{JSON_TAG}_{index}"
return JSON_TAG

@classmethod
def _json_string_from_hdf5(cls, fname: str) -> str:
"""Load the model json string from an hdf5 file."""
with h5py.File(fname, "r") as f_handle:
num_string_parts = len([key for key in f_handle.keys() if JSON_TAG in key])
json_string = b""
for ind in range(num_string_parts):
json_string += f_handle[cls._json_string_key(ind)][()]
return json_string

@classmethod
def dict_from_hdf5(
cls, fname: str, group_path: str = "", custom_decoders: List[Callable] = None
Expand Down Expand Up @@ -501,10 +522,7 @@ def load_data_from_file(model_dict: dict, group_path: str = "") -> None:
elif isinstance(value, dict):
load_data_from_file(model_dict=value, group_path=subpath)

with h5py.File(fname, "r") as f_handle:
json_string = f_handle[JSON_TAG][()]
model_dict = json.loads(json_string)

model_dict = json.loads(cls._json_string_from_hdf5(fname=fname))
group_path = cls._construct_group_path(group_path)
model_dict = cls.get_sub_model(group_path=group_path, model_dict=model_dict)
load_data_from_file(model_dict=model_dict, group_path=group_path)
Expand Down Expand Up @@ -563,7 +581,11 @@ def to_hdf5(self, fname: str, custom_encoders: List[Callable] = None) -> None:

with h5py.File(fname, "w") as f_handle:

f_handle[JSON_TAG] = self._json_string
json_str = self._json_string
for ind in range(ceil(len(json_str) / MAX_STRING_LENGTH)):
ind_start = int(ind * MAX_STRING_LENGTH)
ind_stop = min(int(ind + 1) * MAX_STRING_LENGTH, len(json_str))
f_handle[self._json_string_key(ind)] = json_str[ind_start:ind_stop]

def add_data_to_file(data_dict: dict, group_path: str = "") -> None:
"""For every DataArray item in dictionary, write path of hdf5 group as value."""
Expand Down
7 changes: 3 additions & 4 deletions tidy3d/plugins/adjoint/components/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from jax.tree_util import tree_flatten as jax_tree_flatten
from jax.tree_util import tree_unflatten as jax_tree_unflatten

from ....components.base import Tidy3dBaseModel, cached_property
from ....components.base import Tidy3dBaseModel
from .data.data_array import JaxDataArray, JAX_DATA_ARRAY_TAG


Expand Down Expand Up @@ -93,11 +93,10 @@ def from_tidy3d(cls, tidy3d_obj: Tidy3dBaseModel) -> JaxObject:

""" IO """

@cached_property
def _json_string(self) -> str:
def _json(self, *args, **kwargs) -> str:
"""Overwritten method to get the json string to store in the files."""

json_string_og = super()._json_string
json_string_og = super()._json(*args, **kwargs)
json_dict = json.loads(json_string_og)

def strip_data_array(sub_dict: dict) -> None:
Expand Down
22 changes: 19 additions & 3 deletions tidy3d/web/core/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,28 @@ def read_simulation_from_hdf5_gz(file_name: str) -> str:
return json_str


"""TODO: _json_string_key and read_simulation_from_hdf5 are duplicated functions that also exist
as methods in Tidy3dBaseModel. For consistency it would be best if this duplication is avoided."""


def _json_string_key(index):
"""Get json string key for string chunk number ``index``."""
if index:
return f"{JSON_TAG}_{index}"
return JSON_TAG


def read_simulation_from_hdf5(file_name: str) -> str:
"""read simulation str from hdf5"""

with h5py.File(file_name, "r") as f_handle:
json_string = f_handle[JSON_TAG][()]
return json_string
num_string_parts = len([key for key in f_handle.keys() if JSON_TAG in key])
json_string = b""
for ind in range(num_string_parts):
json_string += f_handle[_json_string_key(ind)][()]
return json_string


"""End TODO"""


def read_simulation_from_json(file_name: str) -> str:
Expand Down
15 changes: 3 additions & 12 deletions tidy3d/web/core/task_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import List, Optional, Callable, Tuple
import pydantic.v1 as pd
from pydantic.v1 import Extra, Field, parse_obj_as
import h5py

from . import http_util
from .core_config import get_logger_console
Expand All @@ -22,16 +21,8 @@
from .types import Tidy3DResource


from .constants import SIM_FILE_HDF5_GZ, SIMULATION_DATA_HDF5, SIM_LOG_FILE, JSON_TAG
from .file_util import extract_gzip_file


def _read_simulation_from_hdf5(file_name: str):
"""read simulation str from hdf5"""

with h5py.File(file_name, "r") as f_handle:
json_string = f_handle[JSON_TAG][()]
return json_string
from .constants import SIM_FILE_HDF5_GZ, SIMULATION_DATA_HDF5, SIM_LOG_FILE
from .file_util import extract_gzip_file, read_simulation_from_hdf5


class Folder(Tidy3DResource, Queryable, extra=Extra.allow):
Expand Down Expand Up @@ -313,7 +304,7 @@ def get_simulation_json(self, to_file: str, verbose: bool = True) -> pathlib.Pat
try:
self.get_simulation_hdf5(hdf5_file_path)
if os.path.exists(hdf5_file_path):
json_string = _read_simulation_from_hdf5(hdf5_file_path)
json_string = read_simulation_from_hdf5(hdf5_file_path)
with open(to_file, "w") as file:
# Write the string to the file
file.write(json_string.decode("utf-8"))
Expand Down

0 comments on commit f58e161

Please sign in to comment.