Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Predictive GFM 2025 #318

Open
wants to merge 33 commits into
base: Predictive_GFM_2025
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ba4cedc
data attributes updated for consistency across datasets
allaffa Jan 11, 2025
a7ab975
non-normalized chemical composition added as data attribute
allaffa Jan 11, 2025
7292244
download_dataset.sh added for transition1x example
allaffa May 6, 2024
c220207
download dataset flag updated
allaffa Sep 17, 2024
7c7a5c3
scripts updated
Jan 11, 2025
9018d8a
development of tranistion1x scripts continues
allaffa Jan 11, 2025
a1b6361
transiton1x scripts completed
allaffa Jan 11, 2025
224295c
black formatting fixed
allaffa Jan 11, 2025
9b4dae2
Merge pull request #21 from allaffa/transition1x
allaffa Jan 11, 2025
47cf3a8
printouts removed
allaffa Jan 11, 2025
650dc35
parallelizatin of data reading introduced
allaffa Jan 11, 2025
c193db0
blsck formatting fixed
allaffa Jan 11, 2025
08a8d34
Merge pull request #22 from allaffa/transition1x
allaffa Jan 11, 2025
4c128e6
detach().clone() used to defined normalized energy per atom and black…
allaffa Jan 11, 2025
8f76423
add compute_grad_energy=False as explicit argument
allaffa Jan 11, 2025
4407d66
add data name as attributed to each data object
allaffa Jan 12, 2025
f33cb8c
compute_grad_energy is parsed as input argument with default value se…
allaffa Jan 12, 2025
78ea0cb
edge_index, edge_attr, and edge_shifts explicitly itnroduced in the d…
allaffa Jan 12, 2025
3c3c434
changed data.force into data.forces for ani1x and qm7x examples
allaffa Jan 13, 2025
4687734
smiles_string added as data attribute
allaffa Jan 16, 2025
8a235c0
remove redundant logic on energy normalization from omat24 example
allaffa Jan 16, 2025
28560e6
force threshold value increased to 1000 for ani-1x
allaffa Jan 22, 2025
743f020
Reverted smiles_utils.py to version from commit 3c3c434f544d1a042775a…
allaffa Jan 23, 2025
b07be51
xyz2mol functionalities put in a separate file
allaffa Jan 23, 2025
ae1e214
download dataset script added for qm7x
Jan 23, 2025
34397d4
renamed data.force as data.forces in ani1x
allaffa Jan 25, 2025
8e81d75
natoms converted into a tensor
allaffa Jan 25, 2025
baf6866
verbosity level ntroduced for ani1x
allaffa Jan 25, 2025
c185151
Z corrected into atomic_numbers for qm7x example
allaffa Jan 25, 2025
c1617db
bug fixed for data attributes in transition1x
allaffa Jan 27, 2025
e7ed2ce
try-except in transition1x rescoped
allaffa Jan 27, 2025
493ba9d
transform coordinates fixed in transition1x
allaffa Jan 27, 2025
e7cb7e3
iterate_tqdm used in utils.create_graph_data for transition1x example
allaffa Jan 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 71 additions & 14 deletions examples/alexandria/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from mpi4py import MPI
import argparse

import random
import numpy as np

import random

import torch
from torch_geometric.data import Data
from torch_geometric.transforms import AddLaplacianEigenvectorPE

from torch_geometric.transforms import Distance, Spherical, LocalCartesian

Expand Down Expand Up @@ -67,12 +69,19 @@ def list_directories(path):
reversed_dict_periodic_table = {value: key for key, value in periodic_table.items()}

# transform_coordinates = Spherical(norm=False, cat=False)
# transform_coordinates = LocalCartesian(norm=False, cat=False)
transform_coordinates = Distance(norm=False, cat=False)
transform_coordinates = LocalCartesian(norm=False, cat=False)
# transform_coordinates = Distance(norm=False, cat=False)


class Alexandria(AbstractBaseDataset):
def __init__(self, dirpath, var_config, energy_per_atom=True, dist=False):
def __init__(
self,
dirpath,
var_config,
graphgps_transform=None,
energy_per_atom=True,
dist=False,
):
super().__init__()

self.dist = dist
Expand All @@ -86,6 +95,11 @@ def __init__(self, dirpath, var_config, energy_per_atom=True, dist=False):
self.radius_graph = RadiusGraph(5.0, loop=False, max_num_neighbors=50)
self.radius_graph_pbc = RadiusGraphPBC(5.0, loop=False, max_num_neighbors=50)

self.graphgps_transform = graphgps_transform

# Threshold for atomic forces in eV/angstrom
self.forces_norm_threshold = 1000.0

list_dirs = list_directories(
os.path.join(dirpath, "compressed_data", "alexandria.icams.rub.de")
)
Expand Down Expand Up @@ -241,16 +255,31 @@ def get_magmoms_array_from_structure(structure):
# print(f"Structure {entry_id} does not have e_above_hull")
# return data_object

x = torch.cat([atomic_numbers, pos, forces], dim=1)

# Calculate chemical composition
atomic_number_list = atomic_numbers.tolist()
assert len(atomic_number_list) == natoms
## 118: number of atoms in the periodic table
hist, _ = np.histogram(atomic_number_list, bins=range(1, 118 + 2))
chemical_composition = torch.tensor(hist).unsqueeze(1).to(torch.float32)

data_object = Data(
dataset_name="alexandria",
natoms=natoms,
pos=pos,
cell=cell,
pbc=pbc,
edge_index=None,
edge_attr=None,
edge_shifts=None,
atomic_numbers=atomic_numbers,
forces=forces,
chemical_composition=chemical_composition,
# entry_id=entry_id,
natoms=natoms,
total_energy=total_energy_tensor,
total_energy_per_atom=total_energy_per_atom_tensor,
x=x,
energy=total_energy_tensor,
energy_per_atom=total_energy_per_atom_tensor,
forces=forces,
# formation_energy=torch.tensor(formation_energy).float(),
# formation_energy_per_atom=torch.tensor(formation_energy_per_atom).float(),
# energy_above_hull=energy_above_hull,
Expand All @@ -265,10 +294,6 @@ def get_magmoms_array_from_structure(structure):
else:
data_object.y = data_object.total_energy

data_object.x = torch.cat(
[data_object.atomic_numbers, data_object.pos, data_object.forces], dim=1
)

if data_object.pbc is not None and data_object.cell is not None:
try:
data_object = self.radius_graph_pbc(data_object)
Expand All @@ -281,9 +306,21 @@ def get_magmoms_array_from_structure(structure):
else:
data_object = self.radius_graph(data_object)

# Build edge attributes
data_object = transform_coordinates(data_object)

return data_object
# LPE
if self.graphgps_transform is not None:
data_object = self.graphgps_transform(data_object)

if self.check_forces_values(data_object.forces):
return data_object
else:
print(
f"L2-norm of force tensor exceeds threshold {self.forces_norm_threshold} - atomistic structure: {data}",
flush=True,
)
return None

def process_file_content(self, filepath):
"""
Expand Down Expand Up @@ -332,6 +369,14 @@ def process_file_content(self, filepath):
except Exception as e:
print("An error occurred:", e, flush=True)

def check_forces_values(self, forces):

# Calculate the L2 norm for each row
norms = torch.norm(forces, p=2, dim=1)
# Check if all norms are less than the threshold

return torch.all(norms < self.forces_norm_threshold).item()

def len(self):
return len(self.dataset)

Expand All @@ -356,7 +401,7 @@ def get(self, idx):
"--energy_per_atom",
help="option to normalize energy by number of atoms",
type=bool,
default=True,
default=False,
)
parser.add_argument("--ddstore", action="store_true", help="ddstore dataset")
parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None)
Expand All @@ -365,6 +410,9 @@ def get(self, idx):
parser.add_argument("--batch_size", type=int, help="batch_size", default=None)
parser.add_argument("--everyone", action="store_true", help="gptimer")
parser.add_argument("--modelname", help="model name")
parser.add_argument(
"--compute_grad_energy", type=bool, help="compute_grad_energy", default=False
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--adios",
Expand Down Expand Up @@ -402,6 +450,13 @@ def get(self, idx):
var_config["node_feature_names"] = node_feature_names
var_config["node_feature_dims"] = node_feature_dims

# Transformation to create positional and structural laplacian encoders
graphgps_transform = AddLaplacianEigenvectorPE(
k=config["NeuralNetwork"]["Architecture"]["pe_dim"],
attr_name="pe",
is_undirected=True,
)

if args.batch_size is not None:
config["NeuralNetwork"]["Training"]["batch_size"] = args.batch_size

Expand Down Expand Up @@ -431,6 +486,7 @@ def get(self, idx):
total = Alexandria(
os.path.join(datadir),
var_config,
graphgps_transform=graphgps_transform,
energy_per_atom=args.energy_per_atom,
dist=True,
)
Expand Down Expand Up @@ -597,6 +653,7 @@ def get(self, idx):
log_name,
verbosity,
create_plots=False,
compute_grad_energy=args.compute_grad_energy,
)

hydragnn.utils.model.save_model(model, optimizer, log_name)
Expand Down
83 changes: 64 additions & 19 deletions examples/ani1_x/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from torch_geometric.data import Data
from torch_geometric.transforms import Distance, Spherical, LocalCartesian
from torch_geometric.transforms import AddLaplacianEigenvectorPE

import hydragnn
from hydragnn.utils.profiling_and_tracing.time_utils import Timer
Expand Down Expand Up @@ -51,12 +52,19 @@ def info(*args, logtype="info", sep=" "):


# transform_coordinates = Spherical(norm=False, cat=False)
# transform_coordinates = LocalCartesian(norm=False, cat=False)
transform_coordinates = Distance(norm=False, cat=False)
transform_coordinates = LocalCartesian(norm=False, cat=False)
# transform_coordinates = Distance(norm=False, cat=False)


class ANI1xDataset(AbstractBaseDataset):
def __init__(self, dirpath, var_config, energy_per_atom=True, dist=False):
def __init__(
self,
dirpath,
var_config,
graphgps_transform=None,
energy_per_atom=True,
dist=False,
):
super().__init__()

self.var_config = var_config
Expand All @@ -66,6 +74,8 @@ def __init__(self, dirpath, var_config, energy_per_atom=True, dist=False):

self.radius_graph = RadiusGraph(5.0, loop=False, max_num_neighbors=50)

self.graphgps_transform = graphgps_transform

self.dist = dist
if self.dist:
assert torch.distributed.is_initialized()
Expand Down Expand Up @@ -103,35 +113,58 @@ def convert_trajectories_to_graphs(self):
# extract positions, energies, and forces for each step
for frame_id in local_trajectories_id:

positions = torch.from_numpy(X[frame_id]).to(torch.float32)
pos = torch.from_numpy(X[frame_id]).to(torch.float32)
energy = (
torch.tensor(E[frame_id])
.unsqueeze(0)
.unsqueeze(1)
.to(torch.float32)
)
if self.energy_per_atom:
energy /= natoms

energy_per_atom = energy.detach().clone() / natoms
forces = torch.from_numpy(F[frame_id]).to(torch.float32)
x = torch.cat([atomic_numbers, positions, forces], dim=1)
x = torch.cat([atomic_numbers, pos, forces], dim=1)

data = Data(
energy=energy,
force=forces,
# Calculate chemical composition
atomic_number_list = atomic_numbers.tolist()
assert len(atomic_number_list) == natoms
## 118: number of atoms in the periodic table
hist, _ = np.histogram(atomic_number_list, bins=range(1, 118 + 2))
chemical_composition = torch.tensor(hist).unsqueeze(1).to(torch.float32)

data_object = Data(
dataset_name="ani1x",
natoms=natoms,
# stress=torch.tensor(stresses, dtype=torch.float32),
# magmom=torch.tensor(magmom, dtype=torch.float32),
pos=positions,
pos=pos,
cell=None, # even if not needed, cell needs to be defined because ADIOS requires consistency across datasets
pbc=None, # even if not needed, pbc needs to be defined because ADIOS requires consistency across datasets
edge_index = None,
edge_attr=None,
edge_shifts=None, # even if not needed, edge_shift needs to be defined because ADIOS requires consistency across datasets
allaffa marked this conversation as resolved.
Show resolved Hide resolved
atomic_numbers=atomic_numbers, # Reshaping atomic_numbers to Nx1 tensor
chemical_composition=chemical_composition,
x=x,
y=energy,
energy=energy,
energy_per_atom=energy_per_atom,
force=forces,
allaffa marked this conversation as resolved.
Show resolved Hide resolved
)

data = self.radius_graph(data)
data = transform_coordinates(data)
if self.energy_per_atom:
data_object.y = data_object.energy_per_atom
else:
data_object.y = data_object.energy

data_object = self.radius_graph(data_object)

# Build edge attributes
data_object = transform_coordinates(data_object)

# LPE
if self.graphgps_transform is not None:
data_object = self.graphgps_transform(data_object)

if self.check_forces_values(data.force):
self.dataset.append(data)
if self.check_forces_values(data_object.force):
self.dataset.append(data_object)
else:
print(
f"L2-norm of force tensor exceeds threshold {self.forces_norm_threshold} - atomistic structure: {data}",
Expand Down Expand Up @@ -195,7 +228,7 @@ def get(self, idx):
"--energy_per_atom",
help="option to normalize energy by number of atoms",
type=bool,
default=True,
default=False,
)
parser.add_argument("--ddstore", action="store_true", help="ddstore dataset")
parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None)
Expand All @@ -204,6 +237,9 @@ def get(self, idx):
parser.add_argument("--batch_size", type=int, help="batch_size", default=None)
parser.add_argument("--everyone", action="store_true", help="gptimer")
parser.add_argument("--modelname", help="model name")
parser.add_argument(
"--compute_grad_energy", type=bool, help="compute_grad_energy", default=False
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--adios",
Expand Down Expand Up @@ -241,6 +277,13 @@ def get(self, idx):
var_config["node_feature_names"] = node_feature_names
var_config["node_feature_dims"] = node_feature_dims

# Transformation to create positional and structural laplacian encoders
graphgps_transform = AddLaplacianEigenvectorPE(
k=config["NeuralNetwork"]["Architecture"]["pe_dim"],
attr_name="pe",
is_undirected=True,
)

if args.batch_size is not None:
config["NeuralNetwork"]["Training"]["batch_size"] = args.batch_size

Expand Down Expand Up @@ -270,6 +313,7 @@ def get(self, idx):
total = ANI1xDataset(
os.path.join(datadir),
var_config,
graphgps_transform=graphgps_transform,
energy_per_atom=args.energy_per_atom,
dist=True,
)
Expand Down Expand Up @@ -436,6 +480,7 @@ def get(self, idx):
log_name,
verbosity,
create_plots=False,
compute_grad_energy=args.compute_grad_energy,
)

hydragnn.utils.model.save_model(model, optimizer, log_name)
Expand Down
Loading