Skip to content

Commit

Permalink
updates for data conversion to binary (facebookresearch#165)
Browse files Browse the repository at this point in the history
updates to create binary data

updates to create binary data set using data_loader_terabyte.py
  • Loading branch information
dkorchevgithub authored Mar 8, 2021
1 parent d25bbc2 commit 382d46f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
13 changes: 8 additions & 5 deletions data_loader_terabyte.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,12 @@ def _batch_generator(
def _test():
generator = _batch_generator(
data_filename='day',
data_directory='/input',
data_directory='./input',
days=range(23),
split="train",
batch_size=2048
batch_size=2048,
drop_last=True,
max_ind_range=-1
)
t1 = time.time()
for x_int, lS_o, x_cat, y in generator:
Expand Down Expand Up @@ -305,7 +307,7 @@ def _test_bin():
required=True)
args = parser.parse_args()

# _preprocess(args)
_preprocess(args)

binary_data_file = os.path.join(args.output_directory,
'{}_data.bin'.format(args.split))
Expand All @@ -314,7 +316,8 @@ def _test_bin():
dataset_binary = CriteoBinDataset(data_file=binary_data_file,
counts_file=counts_file,
batch_size=2048,)
from dlrm_data_pytorch import CriteoDataset, collate_wrapper_criteo
from dlrm_data_pytorch import CriteoDataset
from dlrm_data_pytorch import collate_wrapper_criteo_offset as collate_wrapper_criteo

binary_loader = torch.utils.data.DataLoader(
dataset_binary,
Expand Down Expand Up @@ -362,4 +365,4 @@ def _test_bin():

if __name__ == '__main__':
_test()
_test_bin
_test_bin()
1 change: 0 additions & 1 deletion dlrm_data_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,6 @@ def write_dist_to_file(file_path, unique_accesses, list_sd, cumm_sd):


if __name__ == "__main__":
import sys
import operator
import argparse

Expand Down

0 comments on commit 382d46f

Please sign in to comment.