Skip to content

Commit

Permalink
Prioritize fp16 compute when using allow_fp16_accumulation
Browse files Browse the repository at this point in the history
  • Loading branch information
comfyanonymous committed Feb 23, 2025
1 parent aff1653 commit ace899e
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions comfy/model_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,12 @@ def is_amd():
torch.backends.cuda.enable_flash_sdp(True)
torch.backends.cuda.enable_mem_efficient_sdp(True)


PRIORITIZE_FP16 = False # TODO: remove and replace with something that shows exactly which dtype is faster than the other
try:
if is_nvidia() and args.fast:
torch.backends.cuda.matmul.allow_fp16_accumulation = True
PRIORITIZE_FP16 = True # TODO: limit to cards where it actually boosts performance
except:
pass

Expand Down Expand Up @@ -681,6 +684,10 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor
if model_params * 2 > free_model_memory:
return fp8_dtype

if PRIORITIZE_FP16:
if torch.float16 in supported_dtypes and should_use_fp16(device=device, model_params=model_params):
return torch.float16

for dt in supported_dtypes:
if dt == torch.float16 and should_use_fp16(device=device, model_params=model_params):
if torch.float16 in supported_dtypes:
Expand Down

0 comments on commit ace899e

Please sign in to comment.