Skip to content

Commit

Permalink
Tgrel/minor mlperf fixes (facebookresearch#54)
Browse files Browse the repository at this point in the history
* Fix command-line flag typo

* Remove the end-of-epoch evaluation in MLPerf mode to avoid two evals close to each other
  • Loading branch information
tgrel authored Feb 15, 2020
1 parent 1768658 commit a5d707f
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion bench/run_and_time.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ else
fi
#echo $dlrm_extra_option

python dlrm_s_pytorch.py --arch-sparse-feature-size=128 --arch-mlp-bot="13-512-256-128" --arch-mlp-top="1024-1024-512-256-1" --max-ind-range=40000000 --data-generation=dataset --data-set=terabyte --raw-data-file=./input/day --processed-data-file=./input/terabyte_processed.npz --loss-function=bce --round-targets=True --learning-rate=1.0 --mini-batch-size=2048 --print-freq=2048 --print-time --test-freq=102400 --test-mini-batch-size=16384 --test-num-workers=16 --memory-map --mlperf-logging --mlperf-auc-threshold=0.8025 --mlperf-bin-file --mlperf-bin-shuffle $dlrm_extra_option 2>&1 | tee run_terabyte_mlperf_pt.log
python dlrm_s_pytorch.py --arch-sparse-feature-size=128 --arch-mlp-bot="13-512-256-128" --arch-mlp-top="1024-1024-512-256-1" --max-ind-range=40000000 --data-generation=dataset --data-set=terabyte --raw-data-file=./input/day --processed-data-file=./input/terabyte_processed.npz --loss-function=bce --round-targets=True --learning-rate=1.0 --mini-batch-size=2048 --print-freq=2048 --print-time --test-freq=102400 --test-mini-batch-size=16384 --test-num-workers=16 --memory-map --mlperf-logging --mlperf-auc-threshold=0.8025 --mlperf-bin-loader --mlperf-bin-shuffle $dlrm_extra_option 2>&1 | tee run_terabyte_mlperf_pt.log

echo "done"
2 changes: 1 addition & 1 deletion dlrm_s_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ def loss_fn_wrap(Z, T, use_gpu, device):
should_test = (
(args.test_freq > 0)
and (args.data_generation == "dataset")
and (((j + 1) % args.test_freq == 0) or (j + 1 == nbatches))
and (((j + 1) % args.test_freq == 0) or (j + 1 == nbatches and not args.mlperf_logging))
)

# print time, loss and accuracy
Expand Down

0 comments on commit a5d707f

Please sign in to comment.