diff --git a/ring_attention_pytorch/ring_flash_attention_cuda.py b/ring_attention_pytorch/ring_flash_attention_cuda.py index ce71480..8dcf876 100644 --- a/ring_attention_pytorch/ring_flash_attention_cuda.py +++ b/ring_attention_pytorch/ring_flash_attention_cuda.py @@ -767,12 +767,6 @@ def backward(ctx, do): else: ring_dq, ring_dk, ring_dv = 0., 0., 0. - - q = q[:, :row_length] - o = o[:, :row_length] - do = do[:, :row_length] - lse = lse[..., :row_length] - else: ( diff --git a/setup.py b/setup.py index c347516..94bbe45 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'ring-attention-pytorch', packages = find_packages(exclude=[]), - version = '0.3.6', + version = '0.3.7', license='MIT', description = 'Ring Attention - Pytorch', author = 'Phil Wang',