Skip to content

Commit

Permalink
Read existing dataset to save time.
Browse files Browse the repository at this point in the history
  • Loading branch information
knc6 committed May 4, 2024
1 parent 575216b commit e600009
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions alignn/lmdb_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def get_torch_dataset(
output_dir=".",
tmp_name="dataset",
map_size=1e12,
read_existing=True,
):
"""Get Torch Dataset with LMDB."""
vals = np.array([ii[target] for ii in dataset]) # df[target].values
Expand All @@ -119,6 +120,13 @@ def get_torch_dataset(
f.write(line)
f.close()
ids = []
if os.path.exists(tmp_name) and read_existing:
for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)):
ids.append(d[id_tag])
dat = TorchLMDBDataset(lmdb_path=tmp_name, ids=ids)
print("Reading dataset", tmp_name)
return dat
ids = []
env = lmdb.open(tmp_name, map_size=int(map_size))
with env.begin(write=True) as txn:
for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)):
Expand Down

0 comments on commit e600009

Please sign in to comment.