Skip to content

Commit

Permalink
some test cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
definitelynotmcarilli committed Mar 2, 2019
1 parent 2445031 commit 484292f
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 6 deletions.
2 changes: 1 addition & 1 deletion apex/amp/handle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import logging
import warnings
import torch

from . import utils
from .opt import OptimWrapper
Expand Down Expand Up @@ -83,7 +84,6 @@ def skip_step():
"loss scale to {}".format(optimizer.loss_scaler.loss_scale()))
optimizer.step = optimizer_step
optimizer.step = skip_step

# Probably ok to skip this if not delay_unscale
if _amp_state.opt_properties.patch_torch_functions:
_amp_state.handle._clear_cache()
Expand Down
12 changes: 8 additions & 4 deletions apex/amp/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ def clear_overflow_state(self):
self._overflow_buf.zero_()

def unscale(self, model_params, master_params, scale):
# torch.cuda.nvtx.range_push("unscale")
if self._has_overflow:
# torch.cuda.nvtx.range_pop()
return

# Lots of defensive list processing going on here. Way more less efficient than
Expand All @@ -92,6 +90,12 @@ def unscale(self, model_params, master_params, scale):
in zip(model_params, master_params)] # some of these may be None

if LossScaler.has_fused_kernel:
# TODO: Make these lists permanent attributes of self, so they don't need to be created
# or garbage collected. Profiler shows that garbage collection overhead may be
# substantial (200-300 usec).
# This may be tricky because right now the lists need to be packed densely.
# Maybe this could be handled within the multi_tensor_apply wrapper
# (allow some Tensors to be None using at::optional).
src_dst_pairs = {torch.float16 : {torch.float16 : [[],[]], torch.float32 : [[],[]]},
torch.float32 : {torch.float16 : [[],[]], torch.float32 : [[],[]]}}

Expand Down Expand Up @@ -142,6 +146,8 @@ def unscale(self, model_params, master_params, scale):
if scale == 1.0 and all_same and not self.dynamic:
return

# TODO: Make these lists permanent attributes of self, so they don't need to be created
# or garbage collected?
model_grads = [mmp[0].grad.data for mmp in model_master_params if mmp[0].grad is not None]
master_grads = [mmp[1].grad.data for mmp in model_master_params if mmp[1].grad is not None]

Expand All @@ -151,8 +157,6 @@ def unscale(self, model_params, master_params, scale):
if LossScaler.has_fused_kernel and self.dynamic and not self._has_overflow:
self._has_overflow = self._overflow_buf.item()

# torch.cuda.nvtx.range_pop()

# Separate so unscale() can be called more that once before updating.
def update_scale(self):
if self._has_overflow and self.dynamic:
Expand Down
2 changes: 1 addition & 1 deletion apex/multi_tensor_apply/multi_tensor_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, chunk_size):
MultiTensorApply.available = True
self.chunk_size = chunk_size
except ImportError as err:
MultiTensorApply.availble = False
MultiTensorApply.available = False
MultiTensorApply.import_err = err

def check_avail(self):
Expand Down
2 changes: 2 additions & 0 deletions tests/L1/common/main_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ def fast_collate(batch):
print("loss_scale = {}".format(args.loss_scale), type(args.loss_scale))


print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))

if args.deterministic:
cudnn.benchmark = False
cudnn.deterministic = True
Expand Down
8 changes: 8 additions & 0 deletions tests/L1/common/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ rm False*

set -e

print_banner "Installing Apex with --cuda_ext and --cpp_ext"

pushd ../../..
python setup.py install --cuda_ext --cpp_ext
popd
Expand Down Expand Up @@ -76,6 +78,8 @@ do
set +x
done

print_banner "Reinstalling apex without extensions"

pushd ../../..
python setup.py install
popd
Expand All @@ -102,13 +106,17 @@ do
do
for keep_batchnorm in "${keep_batchnorms[@]}"
do
echo ""
echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
set -x
python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm}
set +x
done
done
done

print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"

pushd ../../..
python setup.py install --cuda_ext --cpp_ext
popd

0 comments on commit 484292f

Please sign in to comment.