Skip to content

Commit

Permalink
increased code coverage of tests, primarily related to catching edge …
Browse files Browse the repository at this point in the history
…cases and failures
  • Loading branch information
chrisiacovella committed Feb 14, 2025
1 parent b516831 commit 1e8fdcf
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 3 deletions.
1 change: 1 addition & 0 deletions modelforge-curate/modelforge/curate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
__all__ = ["__version__"]

from .curate import Record, SourceDataset
from .properties import *
4 changes: 2 additions & 2 deletions modelforge-curate/modelforge/curate/curate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def __repr__(self):
output_string += f"* n_configs: {self.n_configs}\n"
output_string += "* atomic_numbers:\n"
output_string += f" - {self.atomic_numbers}\n"
output_string += f"* per-atom properties ({list(self.per_atom.keys())}):\n"
output_string += f"* per-atom properties: ({list(self.per_atom.keys())}):\n"
for key, value in self.per_atom.items():
output_string += f" - {value}\n"
output_string += f"* per-system properties ({list(self.per_system.keys())}):\n"
output_string += f"* per-system properties: ({list(self.per_system.keys())}):\n"
for key, value in self.per_system.items():
output_string += f" - {value}\n"
output_string += f"* meta_data: ({list(self.meta_data.keys())})\n"
Expand Down
240 changes: 239 additions & 1 deletion modelforge-curate/modelforge/curate/tests/test_curate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,44 @@ def test_source_dataset_init():
assert len(new_dataset.records) == 2


def test_dataset_create_record():
# test creating a record that already exists
# this will fail
new_dataset = SourceDataset("test_dataset")
new_dataset.create_record("mol1")
assert "mol1" in new_dataset.records
with pytest.raises(ValueError):
new_dataset.create_record("mol1")

new_dataset.create_record("mol2")
assert "mol2" in new_dataset.records
assert "mol1" in new_dataset.records
assert len(new_dataset.records) == 2

record = Record(name="mol1")

with pytest.raises(ValueError):
new_dataset.add_record(record)

record = Record(name="mol3")
new_dataset.add_record(record)
assert "mol3" in new_dataset.records
assert len(new_dataset.records) == 3

new_dataset.remove_record("mol3")
assert "mol3" not in new_dataset.records

# we already removed it so it doesn't exist, this will do nothing
# but will log a warning
new_dataset.remove_record("mol4")

# let us make sure we create a new record if we try to add a record that doesn't exist
property = AtomicNumbers(value=np.array([[1], [6]]))
assert "mol4" not in new_dataset.records
new_dataset.add_properties("mol4", [property])
assert "mol4" in new_dataset.records


def test_initialize_properties():
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")

Expand Down Expand Up @@ -192,6 +230,8 @@ def test_initialize_properties():
positions = Positions(
value=[[[1.0, 1.0, 1.0, 2.0], [2.0, 2.0, 2.0, 3.0]]], units="nanometer"
)
with pytest.raises(ValueError):
positions = Positions(value=[1.0, 1.0, 1.0, 2.0, 2.0, 2.0], units="meter")
# not units! we don't assume, must specify
with pytest.raises(ValueError):
positions = Positions(value=np.array([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]))
Expand All @@ -203,7 +243,8 @@ def test_initialize_properties():
# wrong shape
with pytest.raises(ValueError):
energies = Energies(value=np.array([[0.1, 0.3]]), units=unit.hartree)

with pytest.raises(ValueError):
atomic_numbers = AtomicNumbers(value=np.array([1, 6]))
# wrong shape
with pytest.raises(ValueError):
atomic_numbers = AtomicNumbers(value=np.array([[1, 6]]))
Expand Down Expand Up @@ -266,6 +307,164 @@ def test_add_properties_to_records_directly():
assert "mol1" in new_dataset.records.keys()


def test_record_repr(capsys):
record = Record(name="mol1")

positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
smiles = MetaData(name="smiles", value="[CH]")
print(record)
out, err = capsys.readouterr()

assert "n_atoms: cannot be determined" in out
assert "n_configs: cannot be determined" in out

record.add_properties([positions, energies, atomic_numbers, smiles])

print(record)
out, err = capsys.readouterr()
assert "name: mol1" in out
assert "n_atoms: 2" in out
assert "n_configs: 1" in out
assert " per-atom properties: (['positions'])" in out
assert " per-system properties: (['energies'])" in out
assert " meta_data: (['smiles'])" in out
assert " atomic_numbers" in out
assert " name='atomic_numbers' value=array([[1]" in out
assert (
" [6]]) units=<Unit('dimensionless')> classification='atomic_numbers' property_type='atomic_numbers' n_configs=None n_atoms=2"
in out
)
assert "name='positions' value=array([[[1., 1., 1.]" in out
assert (
"[2., 2., 2.]]]) units=<Unit('nanometer')> classification='per_atom' property_type='length' n_configs=1 n_atoms=2"
in out
)
assert (
"name='energies' value=array([[0.1]]) units=<Unit('hartree')> classification='per_system' property_type='energy' n_configs=1 n_atoms=None"
in out
)
assert (
" name='smiles' value='[CH]' units=<Unit('dimensionless')> classification='meta_data' property_type='meta_data' n_configs=None n_atoms=None"
in out
)
print(record)


def test_record_to_dict():
record = Record(name="mol1")

positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
smiles = MetaData(name="smiles", value="[CH]")

record.add_properties([positions, energies, atomic_numbers, smiles])
record_dict = record.to_dict()

assert record_dict["name"] == "mol1"
assert record_dict["n_atoms"] == 2
assert record_dict["n_configs"] == 1
assert np.all(record_dict["atomic_numbers"].value == atomic_numbers.value)
assert np.all(record_dict["per_atom"]["positions"].value == positions.value)
assert np.all(record_dict["per_system"]["energies"].value == energies.value)


def test_record_validation():
record = Record(name="mol1")
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
meta_data = MetaData(name="smiles", value="[CH]")

record.add_properties([positions, energies, atomic_numbers, meta_data])
assert record._validate_n_configs() == True
assert record._validate_n_atoms() == True
assert record.validate() == True

# this will fail because we will have different number of n_configs
# note failure doesn't raise an error, but logs a warning and returns False
record2 = Record(name="mol2")
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1], [0.2]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))

record2.add_properties([positions, energies, atomic_numbers])

assert record2._validate_n_configs() == False
assert record2.validate() == False

# this will fail because we will have different number of n_atoms
record3 = Record(name="mol3")
positions = Positions(
value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]], units="nanometer"
)
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))

record3.add_properties([positions, energies, atomic_numbers])
assert record3._validate_n_atoms() == False
assert record3.validate() == False

# this will fail because we haven't set atomic numbers

record4 = Record(name="mol4")
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)

record4.add_properties([positions, energies])
assert record4._validate_n_atoms() == False
assert record4.validate() == False
# this will fail because we don't have any properties that will dictate number of configs
record5 = Record(name="mol5")
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
record5.add_property(atomic_numbers)

assert record5._validate_n_configs() == False
assert record5.validate() == False


def test_add_properties_failures():
# test to ensure we can't add the same property multiple times
record = Record(name="mol1")
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
meta_data = MetaData(name="smiles", value="[CH]")

record.add_properties([positions, energies, atomic_numbers, meta_data])
with pytest.raises(ValueError):
record.add_property(positions)
with pytest.raises(ValueError):
record.add_property(energies)
with pytest.raises(ValueError):
record.add_property(atomic_numbers)
with pytest.raises(ValueError):
record.add_property(meta_data)

# we cannot have any property with the name "atomic_numbers" as it is reserved
# so let us set up a bunch with that name and try to set them to a new record
record = Record(name="mol1")
meta_data = MetaData(name="atomic_numbers", value="[1,2]")
positions = Positions(
name="atomic_numbers",
value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]],
units="nanometer",
)
energies = Energies(
name="atomic_numbers", value=np.array([[0.1]]), units=unit.hartree
)
atomic_numbers = AtomicNumbers(name="atomic_numbers", value=np.array([[1], [6]]))

with pytest.raises(ValueError):
record.add_property(meta_data)
with pytest.raises(ValueError):
record.add_property(positions)
with pytest.raises(ValueError):
record.add_property(energies)


def test_add_properties():
new_dataset = SourceDataset("test_dataset")
new_dataset.create_record("mol1")
Expand Down Expand Up @@ -324,6 +523,19 @@ def test_slicing_properties():
assert sliced1.n_configs == 1
assert sliced1.per_system["energies"].value == [[0.1]]

# check dataset level slicing, that just calls the record level slicing
new_dataset = SourceDataset("test_dataset")
new_dataset.add_record(record)

sliced2 = new_dataset.slice_record("mol1", 0, 1)
assert sliced2.n_configs == 1
assert sliced2.per_system["energies"].value == [[0.1]]

# let us try to break this by passing the record, not record name

with pytest.raises(AssertionError):
new_dataset.slice_record(record, 0, 1)


def test_counting_records():
new_dataset = SourceDataset("test_dataset")
Expand Down Expand Up @@ -580,3 +792,29 @@ def test_unit_system():

GlobalUnitSystem.set_global_units("length", unit.nanometer)
assert GlobalUnitSystem.length == unit.nanometer


def test_dataset_validation():
new_dataset = SourceDataset("test_dataset")
new_dataset.create_record("mol1")
positions = Positions(value=[[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]], units="nanometer")
energies = Energies(value=np.array([[0.1]]), units=unit.hartree)
atomic_numbers = AtomicNumbers(value=np.array([[1], [6]]))
meta_data = MetaData(name="smiles", value="[CH]")

new_dataset.add_properties("mol1", [positions, energies, atomic_numbers, meta_data])

assert new_dataset.validate_records() == True

new_dataset.create_record("mol2")
assert new_dataset.validate_records() == False
assert new_dataset.validate_record("mol1") == True
assert new_dataset.validate_record("mol2") == False

new_dataset.add_property("mol2", positions)
assert new_dataset.validate_record("mol2") == False
new_dataset.add_property("mol2", atomic_numbers)
assert new_dataset.validate_record("mol2") == False
new_dataset.add_property("mol2", energies)
assert new_dataset.validate_record("mol2") == True
assert new_dataset.validate_records() == True

0 comments on commit 1e8fdcf

Please sign in to comment.